From 4bfda4690e449f87a70407d8df68ae75491b84ec Mon Sep 17 00:00:00 2001 From: Simon Petit Date: Sun, 24 Nov 2024 17:10:33 +0100 Subject: [PATCH] wip --- .gitignore | 1 + bin/dune | 4 + bin/main.ml | 9 +++ dune-project | 28 +++++++ lexer/dune | 6 ++ lexer/lexer.mll | 27 +++++++ lib/ast.ml | 18 +++++ lib/dune | 3 + ocaml_sql_parser.opam | 31 ++++++++ parser/dune | 6 ++ parser/parser.mly | 177 ++++++++++++++++++++++++++++++++++++++++++ test/SQL_parser.ml | 11 +++ test/dune | 3 + 13 files changed, 324 insertions(+) create mode 100644 .gitignore create mode 100644 bin/dune create mode 100644 bin/main.ml create mode 100644 dune-project create mode 100644 lexer/dune create mode 100644 lexer/lexer.mll create mode 100644 lib/ast.ml create mode 100644 lib/dune create mode 100644 ocaml_sql_parser.opam create mode 100644 parser/dune create mode 100644 parser/parser.mly create mode 100644 test/SQL_parser.ml create mode 100644 test/dune diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..e35d885 --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +_build diff --git a/bin/dune b/bin/dune new file mode 100644 index 0000000..86df72f --- /dev/null +++ b/bin/dune @@ -0,0 +1,4 @@ +(executable + (public_name ocaml_sql) + (name main) + (libraries parser lexer ast)) diff --git a/bin/main.ml b/bin/main.ml new file mode 100644 index 0000000..a312490 --- /dev/null +++ b/bin/main.ml @@ -0,0 +1,9 @@ +open Ast +open Printf + +let () = + let input = "SELECT aa, ab FROM b1" in + let lexbuf = Lexing.from_string input in + match Parser.main Lexer.read_token lexbuf with + | Query(Select (Column("ab")::Column("aa")::[], [Table "b1"])) -> printf("ok") + | _ -> printf("not ok") diff --git a/dune-project b/dune-project new file mode 100644 index 0000000..8e16b3c --- /dev/null +++ b/dune-project @@ -0,0 +1,28 @@ +(lang dune 3.16) + +(name ocaml_sql_parser) + +(generate_opam_files true) + +(source + (github username/reponame)) + +(authors "Author Name") + +(maintainers "Maintainer Name") + +(license LICENSE) + +(documentation https://url/to/documentation) + +(using menhir 3.0) + +(package + (name ocaml_sql_parser) + (synopsis "A short synopsis") + (description "A longer description") + (depends ocaml dune) + (tags + (topics "to describe" your project))) + +; See the complete stanza docs at https://dune.readthedocs.io/en/stable/dune-files.html#dune-project diff --git a/lexer/dune b/lexer/dune new file mode 100644 index 0000000..52aeb33 --- /dev/null +++ b/lexer/dune @@ -0,0 +1,6 @@ +(library + (name lexer) + (modules lexer) + (libraries parser)) + +(ocamllex lexer) diff --git a/lexer/lexer.mll b/lexer/lexer.mll new file mode 100644 index 0000000..61505db --- /dev/null +++ b/lexer/lexer.mll @@ -0,0 +1,27 @@ +{ +open Parser +} + +let whitespace = [' ' '\t']+ +let digit = ['0'-'9'] +let alpha = ['a'-'z' 'A'-'Z' '_'] +let alphanumeric = (alpha|digit) + +rule read_token = parse + | "SELECT" { SELECT } + | "DISTINCT" { DISTINCT } + | "FROM" { FROM } + | "LEFT" { LEFT } + | "RIGHT" { RIGHT } + | "CROSS" { CROSS } + | "UNION" { UNION } + | "JOIN" { JOIN } + | "ON" { ON } + | "*" { ASTERISK } + | "." { DOT } + | "=" { EQUALS_OPERATOR } + | whitespace { read_token lexbuf } + | "WHERE" { WHERE } + | alpha alphanumeric* as ident { IDENT ident } + | "," { COMMA } + | eof { EOF } diff --git a/lib/ast.ml b/lib/ast.ml new file mode 100644 index 0000000..3f37dc3 --- /dev/null +++ b/lib/ast.ml @@ -0,0 +1,18 @@ +type query = Query of select_stmt +and select_stmt = + | Select of column list * table list +and column = + | Asterisk + | Column of string +and table = + | Table of string + | Join of table * join_type * table +and join_type = + | Inner + | Left + | Right + | Full + | Cross + | Union + | Natural + diff --git a/lib/dune b/lib/dune new file mode 100644 index 0000000..84b7fb2 --- /dev/null +++ b/lib/dune @@ -0,0 +1,3 @@ +(library + (modules ast) + (name ast)) diff --git a/ocaml_sql_parser.opam b/ocaml_sql_parser.opam new file mode 100644 index 0000000..b296c11 --- /dev/null +++ b/ocaml_sql_parser.opam @@ -0,0 +1,31 @@ +# This file is generated by dune, edit dune-project instead +opam-version: "2.0" +synopsis: "A short synopsis" +description: "A longer description" +maintainer: ["Maintainer Name"] +authors: ["Author Name"] +license: "LICENSE" +tags: ["topics" "to describe" "your" "project"] +homepage: "https://github.com/username/reponame" +doc: "https://url/to/documentation" +bug-reports: "https://github.com/username/reponame/issues" +depends: [ + "ocaml" + "dune" {>= "3.16"} + "odoc" {with-doc} +] +build: [ + ["dune" "subst"] {dev} + [ + "dune" + "build" + "-p" + name + "-j" + jobs + "@install" + "@runtest" {with-test} + "@doc" {with-doc} + ] +] +dev-repo: "git+https://github.com/username/reponame.git" diff --git a/parser/dune b/parser/dune new file mode 100644 index 0000000..a05e733 --- /dev/null +++ b/parser/dune @@ -0,0 +1,6 @@ +(library + (name parser) + (libraries ast)) + +(menhir + (modules parser)) diff --git a/parser/parser.mly b/parser/parser.mly new file mode 100644 index 0000000..2f2afc9 --- /dev/null +++ b/parser/parser.mly @@ -0,0 +1,177 @@ +%{ +(* Header: Define the AST type *) +open Ast +%} + +%token SELECT ALL DISTINCT FROM WHERE +%token LEFT RIGHT FULL INNER OUTER +%token CROSS NATURAL UNION JOIN +%token GREATER_THAN_OPERATOR LESS_THAN_OPERATOR EQUALS_OPERATOR +%token IDENT +%token COMMA DOT +%token ASTERISK +%token AS ON +%token OR AND NOT +%token EOF +%start main +%type main + +%% + +main: + | select_stmt EOF { Query($1) } + +select_stmt : + | SELECT select_list table_expression { Select($2, $3) } + | SELECT set_identifier select_list table_expression { Select($3, $4) } + +set_identifier : + | ALL {} + | DISTINCT {} + +select_list : + | ASTERISK { [Asterisk] } + | select_sublist {$1} + +select_sublist : + | IDENT { [Column($1)] } + | select_sublist COMMA IDENT { Column($3)::$1 } + +table_expression: + | from_clause { $1 } + | from_clause where_clause { $1 } + +from_clause : + | FROM table_reference_list { $2 } + +table_reference_list : + | table_reference { [$1] } + | table_reference_list COMMA table_reference { $3::$1 } + +table_reference : + | table_primary_or_joined_table { $1 } +(* | table_primary_or_joined_table sample_clause { $1 } *) + +table_primary_or_joined_table: + (* | table_primary { Table($1) }*) + | table_primary { $1 } + | joined_table { $1 } + +table_primary : + | table_or_query_name { $1 } + +table_or_query_name: + | table_name { $1 } + +joined_table : + | cross_join { $1 } + | qualified_join { $1 } + | natural_join { $1 } + | union_join { $1 } + +cross_join: + | table_reference CROSS JOIN table_primary { Join($1, Cross, $4) } + +qualified_join: + | table_reference JOIN table_reference join_specification { Join($1, Left, $3) } + | table_reference join_type JOIN table_reference join_specification { Join($1, $2, $4) } + +join_specification: + | join_condition {} + +join_condition: + | ON search_condition {} + +natural_join: + | table_reference NATURAL JOIN table_primary { Join($1, Natural, $4) } + | table_reference NATURAL join_type JOIN table_primary { Join($1, Natural, $5) } + +union_join: + | table_reference UNION JOIN table_primary { Join($1,Union, $4) } + +table_name : + | IDENT { Table($1) } + +join_type: + | INNER { Inner } + | outer_join_type { $1 } + | outer_join_type OUTER { $1 } + +outer_join_type: + | LEFT { Left } + | RIGHT { Right } + | FULL { Full } + + +where_clause : + | WHERE search_condition { } + +search_condition: + | IDENT EQUALS_OPERATOR IDENT {} + +boolean_value_expression: + | boolean_term {} + | boolean_value_expression OR boolean_term {} + +boolean_term: + | boolean_factor {} + | boolean_term AND boolean_factor {} + +boolean_factor: + | boolean_test {} + | NOT boolean_test {} + +boolean_test: + | boolean_primary {} + +boolean_primary : + | predicate {} + | boolean_predicand {} + +predicate : + | comparison_predicate {} + +comparison_predicate : + | row_value_predicand comparison_predicate_part2 {} + +comparison_predicate_part2: + | comp_op row_value_predicand {} + +comp_op : + | EQUALS_OPERATOR {} + | not_equals_operator {} + | LESS_THAN_OPERATOR {} + | GREATER_THAN_OPERATOR {} + | less_than_or_equals_operator {} + | greater_than_or_equals_operator {} + +not_equals_operator : + | LESS_THAN_OPERATOR GREATER_THAN_OPERATOR {} + +less_than_or_equals_operator: + | LESS_THAN_OPERATOR EQUALS_OPERATOR {} + +greater_than_or_equals_operator: + | GREATER_THAN_OPERATOR EQUALS_OPERATOR {} + +row_value_predicand: + | row_value_special_case {} + +row_value_special_case : + | nonparenthesized_value_expression_primary {} + +nonparenthesized_value_expression_primary: + | column_reference {} + +column_reference: + | basic_identifier_chain {} + +basic_identifier_chain: + | identifier_chain {} + +identifier_chain: + | IDENT {} + | identifier_chain DOT IDENT {} + +boolean_predicand: + | nonparenthesized_value_expression_primary {} diff --git a/test/SQL_parser.ml b/test/SQL_parser.ml new file mode 100644 index 0000000..a09f19c --- /dev/null +++ b/test/SQL_parser.ml @@ -0,0 +1,11 @@ +open Ast + +let parse query = + let lexbuf = Lexing.from_string query in + Parser.main Lexer.read_token lexbuf + +let () = + assert(parse "SELECT ab FROM b1" = Query(Select([Column("ab")], [Table "b1"]))); + assert(parse "SELECT * FROM b1" = Query(Select([Asterisk], [Table "b1"]))); + assert(parse "SELECT * FROM t1 CROSS JOIN t2" = Query(Select([Asterisk], [Join(Table("t1"), Cross, Table("t2"))]))); + assert(parse "SELECT * FROM t1 JOIN t2 ON a = b" = Query(Select([Asterisk], [Join(Table("t1"), Left, Table("t2"))]))); diff --git a/test/dune b/test/dune new file mode 100644 index 0000000..20beef1 --- /dev/null +++ b/test/dune @@ -0,0 +1,3 @@ +(test + (name SQL_parser) + (libraries parser lexer ast))