This commit is contained in:
Simon Petit 2024-11-24 17:10:33 +01:00
commit 4bfda4690e
13 changed files with 324 additions and 0 deletions

1
.gitignore vendored Normal file
View File

@ -0,0 +1 @@
_build

4
bin/dune Normal file
View File

@ -0,0 +1,4 @@
(executable
(public_name ocaml_sql)
(name main)
(libraries parser lexer ast))

9
bin/main.ml Normal file
View File

@ -0,0 +1,9 @@
open Ast
open Printf
let () =
let input = "SELECT aa, ab FROM b1" in
let lexbuf = Lexing.from_string input in
match Parser.main Lexer.read_token lexbuf with
| Query(Select (Column("ab")::Column("aa")::[], [Table "b1"])) -> printf("ok")
| _ -> printf("not ok")

28
dune-project Normal file
View File

@ -0,0 +1,28 @@
(lang dune 3.16)
(name ocaml_sql_parser)
(generate_opam_files true)
(source
(github username/reponame))
(authors "Author Name")
(maintainers "Maintainer Name")
(license LICENSE)
(documentation https://url/to/documentation)
(using menhir 3.0)
(package
(name ocaml_sql_parser)
(synopsis "A short synopsis")
(description "A longer description")
(depends ocaml dune)
(tags
(topics "to describe" your project)))
; See the complete stanza docs at https://dune.readthedocs.io/en/stable/dune-files.html#dune-project

6
lexer/dune Normal file
View File

@ -0,0 +1,6 @@
(library
(name lexer)
(modules lexer)
(libraries parser))
(ocamllex lexer)

27
lexer/lexer.mll Normal file
View File

@ -0,0 +1,27 @@
{
open Parser
}
let whitespace = [' ' '\t']+
let digit = ['0'-'9']
let alpha = ['a'-'z' 'A'-'Z' '_']
let alphanumeric = (alpha|digit)
rule read_token = parse
| "SELECT" { SELECT }
| "DISTINCT" { DISTINCT }
| "FROM" { FROM }
| "LEFT" { LEFT }
| "RIGHT" { RIGHT }
| "CROSS" { CROSS }
| "UNION" { UNION }
| "JOIN" { JOIN }
| "ON" { ON }
| "*" { ASTERISK }
| "." { DOT }
| "=" { EQUALS_OPERATOR }
| whitespace { read_token lexbuf }
| "WHERE" { WHERE }
| alpha alphanumeric* as ident { IDENT ident }
| "," { COMMA }
| eof { EOF }

18
lib/ast.ml Normal file
View File

@ -0,0 +1,18 @@
type query = Query of select_stmt
and select_stmt =
| Select of column list * table list
and column =
| Asterisk
| Column of string
and table =
| Table of string
| Join of table * join_type * table
and join_type =
| Inner
| Left
| Right
| Full
| Cross
| Union
| Natural

3
lib/dune Normal file
View File

@ -0,0 +1,3 @@
(library
(modules ast)
(name ast))

31
ocaml_sql_parser.opam Normal file
View File

@ -0,0 +1,31 @@
# This file is generated by dune, edit dune-project instead
opam-version: "2.0"
synopsis: "A short synopsis"
description: "A longer description"
maintainer: ["Maintainer Name"]
authors: ["Author Name"]
license: "LICENSE"
tags: ["topics" "to describe" "your" "project"]
homepage: "https://github.com/username/reponame"
doc: "https://url/to/documentation"
bug-reports: "https://github.com/username/reponame/issues"
depends: [
"ocaml"
"dune" {>= "3.16"}
"odoc" {with-doc}
]
build: [
["dune" "subst"] {dev}
[
"dune"
"build"
"-p"
name
"-j"
jobs
"@install"
"@runtest" {with-test}
"@doc" {with-doc}
]
]
dev-repo: "git+https://github.com/username/reponame.git"

6
parser/dune Normal file
View File

@ -0,0 +1,6 @@
(library
(name parser)
(libraries ast))
(menhir
(modules parser))

177
parser/parser.mly Normal file
View File

@ -0,0 +1,177 @@
%{
(* Header: Define the AST type *)
open Ast
%}
%token SELECT ALL DISTINCT FROM WHERE
%token LEFT RIGHT FULL INNER OUTER
%token CROSS NATURAL UNION JOIN
%token GREATER_THAN_OPERATOR LESS_THAN_OPERATOR EQUALS_OPERATOR
%token <string> IDENT
%token COMMA DOT
%token ASTERISK
%token AS ON
%token OR AND NOT
%token EOF
%start main
%type <query> main
%%
main:
| select_stmt EOF { Query($1) }
select_stmt :
| SELECT select_list table_expression { Select($2, $3) }
| SELECT set_identifier select_list table_expression { Select($3, $4) }
set_identifier :
| ALL {}
| DISTINCT {}
select_list :
| ASTERISK { [Asterisk] }
| select_sublist {$1}
select_sublist :
| IDENT { [Column($1)] }
| select_sublist COMMA IDENT { Column($3)::$1 }
table_expression:
| from_clause { $1 }
| from_clause where_clause { $1 }
from_clause :
| FROM table_reference_list { $2 }
table_reference_list :
| table_reference { [$1] }
| table_reference_list COMMA table_reference { $3::$1 }
table_reference :
| table_primary_or_joined_table { $1 }
(* | table_primary_or_joined_table sample_clause { $1 } *)
table_primary_or_joined_table:
(* | table_primary { Table($1) }*)
| table_primary { $1 }
| joined_table { $1 }
table_primary :
| table_or_query_name { $1 }
table_or_query_name:
| table_name { $1 }
joined_table :
| cross_join { $1 }
| qualified_join { $1 }
| natural_join { $1 }
| union_join { $1 }
cross_join:
| table_reference CROSS JOIN table_primary { Join($1, Cross, $4) }
qualified_join:
| table_reference JOIN table_reference join_specification { Join($1, Left, $3) }
| table_reference join_type JOIN table_reference join_specification { Join($1, $2, $4) }
join_specification:
| join_condition {}
join_condition:
| ON search_condition {}
natural_join:
| table_reference NATURAL JOIN table_primary { Join($1, Natural, $4) }
| table_reference NATURAL join_type JOIN table_primary { Join($1, Natural, $5) }
union_join:
| table_reference UNION JOIN table_primary { Join($1,Union, $4) }
table_name :
| IDENT { Table($1) }
join_type:
| INNER { Inner }
| outer_join_type { $1 }
| outer_join_type OUTER { $1 }
outer_join_type:
| LEFT { Left }
| RIGHT { Right }
| FULL { Full }
where_clause :
| WHERE search_condition { }
search_condition:
| IDENT EQUALS_OPERATOR IDENT {}
boolean_value_expression:
| boolean_term {}
| boolean_value_expression OR boolean_term {}
boolean_term:
| boolean_factor {}
| boolean_term AND boolean_factor {}
boolean_factor:
| boolean_test {}
| NOT boolean_test {}
boolean_test:
| boolean_primary {}
boolean_primary :
| predicate {}
| boolean_predicand {}
predicate :
| comparison_predicate {}
comparison_predicate :
| row_value_predicand comparison_predicate_part2 {}
comparison_predicate_part2:
| comp_op row_value_predicand {}
comp_op :
| EQUALS_OPERATOR {}
| not_equals_operator {}
| LESS_THAN_OPERATOR {}
| GREATER_THAN_OPERATOR {}
| less_than_or_equals_operator {}
| greater_than_or_equals_operator {}
not_equals_operator :
| LESS_THAN_OPERATOR GREATER_THAN_OPERATOR {}
less_than_or_equals_operator:
| LESS_THAN_OPERATOR EQUALS_OPERATOR {}
greater_than_or_equals_operator:
| GREATER_THAN_OPERATOR EQUALS_OPERATOR {}
row_value_predicand:
| row_value_special_case {}
row_value_special_case :
| nonparenthesized_value_expression_primary {}
nonparenthesized_value_expression_primary:
| column_reference {}
column_reference:
| basic_identifier_chain {}
basic_identifier_chain:
| identifier_chain {}
identifier_chain:
| IDENT {}
| identifier_chain DOT IDENT {}
boolean_predicand:
| nonparenthesized_value_expression_primary {}

11
test/SQL_parser.ml Normal file
View File

@ -0,0 +1,11 @@
open Ast
let parse query =
let lexbuf = Lexing.from_string query in
Parser.main Lexer.read_token lexbuf
let () =
assert(parse "SELECT ab FROM b1" = Query(Select([Column("ab")], [Table "b1"])));
assert(parse "SELECT * FROM b1" = Query(Select([Asterisk], [Table "b1"])));
assert(parse "SELECT * FROM t1 CROSS JOIN t2" = Query(Select([Asterisk], [Join(Table("t1"), Cross, Table("t2"))])));
assert(parse "SELECT * FROM t1 JOIN t2 ON a = b" = Query(Select([Asterisk], [Join(Table("t1"), Left, Table("t2"))])));

3
test/dune Normal file
View File

@ -0,0 +1,3 @@
(test
(name SQL_parser)
(libraries parser lexer ast))