diff --git a/lexer/lexer.mll b/lexer/lexer.mll index 61505db..4116cb7 100644 --- a/lexer/lexer.mll +++ b/lexer/lexer.mll @@ -9,6 +9,11 @@ let alphanumeric = (alpha|digit) rule read_token = parse | "SELECT" { SELECT } + | "AVG" { AVG } + | "MAX" { MAX } + | "MIN" { MIN } + | "SUM" { SUM } + | "COUNT" { COUNT } | "DISTINCT" { DISTINCT } | "FROM" { FROM } | "LEFT" { LEFT } @@ -17,11 +22,15 @@ rule read_token = parse | "UNION" { UNION } | "JOIN" { JOIN } | "ON" { ON } + | "GROUP" { GROUP } + | "BY" { BY } | "*" { ASTERISK } | "." { DOT } | "=" { EQUALS_OPERATOR } + | "(" { LEFT_PAREN } + | ")" { RIGHT_PAREN } + | "," { COMMA } | whitespace { read_token lexbuf } | "WHERE" { WHERE } | alpha alphanumeric* as ident { IDENT ident } - | "," { COMMA } | eof { EOF } diff --git a/lib/ast.ml b/lib/ast.ml index 3f37dc3..5debe2f 100644 --- a/lib/ast.ml +++ b/lib/ast.ml @@ -6,7 +6,7 @@ and column = | Column of string and table = | Table of string - | Join of table * join_type * table + | Join of table * join_type * table * condition option and join_type = | Inner | Left @@ -15,4 +15,19 @@ and join_type = | Cross | Union | Natural - +and condition = + | Condition of string * comparison + | And of condition * condition + | Or of condition * condition + | Not of condition +and comparison = + | Comparison of operator * string +and operator = + | Equals + | NotEquals + | LessThan + | GreaterThan + | LessEquals + | GreaterEquals +and search_condition = + | Search of string diff --git a/lib/csv.ml b/lib/csv.ml new file mode 100644 index 0000000..1e512ee --- /dev/null +++ b/lib/csv.ml @@ -0,0 +1,11 @@ +let load path delimiter header filter = + let ic = open_in path in + let rec aux acc = + try + let line = input_line ic + + let rows = [] + let line = read_line ic in + let columns = String.split_on_char delimiter line in + + diff --git a/lib/dune b/lib/dune index 84b7fb2..699c6d1 100644 --- a/lib/dune +++ b/lib/dune @@ -1,3 +1,8 @@ (library (modules ast) (name ast)) + +(library + (modules logical_plan) + (libraries ast) + (name logical_plan)) diff --git a/lib/files.ml b/lib/files.ml new file mode 100644 index 0000000..e69de29 diff --git a/lib/logical_plan.ml b/lib/logical_plan.ml new file mode 100644 index 0000000..af0ea1b --- /dev/null +++ b/lib/logical_plan.ml @@ -0,0 +1,27 @@ +type logical_plan = + | Scan of string (* Table name *) + (*| Filter of logical_plan * condition*) + | Join of logical_plan * Ast.join_type * logical_plan + +let rec generate_logical_plan ast = + match ast with + | Ast.Query(Select(_, tables)) -> + let base_plan = generate_from_clause tables in + base_plan + +and generate_from_clause tables = + match tables with + | [Table(name)] -> Scan(name) + | [Ast.Join(left, j_type, right, _)] -> + Join( + generate_from_clause [left], + j_type, + generate_from_clause [right] + ) + | _ -> failwith "Unsupported table structure" + + +let evaluate_plan plan = + match plan with + | Scan(table) -> + | _ -> failwith "Unsupported plan" diff --git a/lib/physical_plan.ml b/lib/physical_plan.ml new file mode 100644 index 0000000..e69de29 diff --git a/parser/parser.mly b/parser/parser.mly index 6cf6930..c72b6d4 100644 --- a/parser/parser.mly +++ b/parser/parser.mly @@ -7,10 +7,12 @@ open Ast %token LEFT RIGHT FULL INNER OUTER %token CROSS NATURAL UNION JOIN %token GREATER_THAN_OPERATOR LESS_THAN_OPERATOR EQUALS_OPERATOR +%token MAX MIN SUM COUNT AVG %token IDENT %token COMMA DOT +%token LEFT_PAREN RIGHT_PAREN %token ASTERISK -%token AS ON +%token AS ON GROUP BY FILTER %token OR AND NOT %token EOF %start main @@ -23,9 +25,9 @@ main: select_stmt : | SELECT select_list table_expression { Select($2, $3) } - | SELECT set_identifier select_list table_expression { Select($3, $4) } + | SELECT set_quantifier select_list table_expression { Select($3, $4) } -set_identifier : +set_quantifier : | ALL {} | DISTINCT {} @@ -37,6 +39,33 @@ select_sublist : | IDENT { [Column($1)] } | select_sublist COMMA IDENT { Column($3)::$1 } +derived_column: + | value_expression {} + | value_expression as_clause {} + +as_clause : + | AS column_name {} + | column_name {} + +column_name : + | IDENT {} + +value_expression: + | common_value_expression {} + +common_value_expression: + | reference_value_expression {} + +reference_value_expression: + | value_expression_primary {} + +value_expression_primary: + | parenthesized_value_expression {} + | nonparenthesized_value_expression_primary {} + +parenthesized_value_expression: + | LEFT_PAREN value_expression RIGHT_PAREN {} + table_expression: | from_clause { $1 } | from_clause where_clause { $1 } @@ -53,7 +82,6 @@ table_reference : (* | table_primary_or_joined_table sample_clause { $1 } *) table_primary_or_joined_table: - (* | table_primary { Table($1) }*) | table_primary { $1 } | joined_table { $1 } @@ -70,24 +98,24 @@ joined_table : | union_join { $1 } cross_join: - | table_reference CROSS JOIN table_primary { Join($1, Cross, $4) } + | table_reference CROSS JOIN table_primary { Join($1, Cross, $4, None) } qualified_join: - | table_reference JOIN table_reference join_specification { Join($1, Left, $3) } - | table_reference join_type JOIN table_reference join_specification { Join($1, $2, $4) } + | table_reference JOIN table_reference join_specification { Join($1, Left, $3, $4) } + | table_reference join_type JOIN table_reference join_specification { Join($1, $2, $4, $5) } join_specification: - | join_condition {} + | join_condition { $1 } join_condition: - | ON search_condition {} + | ON search_condition { Some($2) } natural_join: - | table_reference NATURAL JOIN table_primary { Join($1, Natural, $4) } - | table_reference NATURAL join_type JOIN table_primary { Join($1, Natural, $5) } + | table_reference NATURAL JOIN table_primary { Join($1, Natural, $4, None) } + | table_reference NATURAL join_type JOIN table_primary { Join($1, Natural, $5, None) } union_join: - | table_reference UNION JOIN table_primary { Join($1,Union, $4) } + | table_reference UNION JOIN table_primary { Join($1, Union, $4, None) } table_name : | IDENT { Table($1) } @@ -107,43 +135,44 @@ where_clause : | WHERE search_condition { } search_condition: - | boolean_value_expression {} + (*| IDENT EQUALS_OPERATOR IDENT {}*) + | boolean_value_expression { $1 } boolean_value_expression: - | boolean_term {} - | boolean_value_expression OR boolean_term {} + | boolean_term { $1 } + | boolean_value_expression OR boolean_term { Or($1, $3) } boolean_term: - | boolean_factor {} - | boolean_term AND boolean_factor {} + | boolean_factor { $1 } + | boolean_term AND boolean_factor { And($1, $3) } boolean_factor: - | boolean_test {} - | NOT boolean_test {} + | boolean_test { $1 } + | NOT boolean_test { Not($2) } boolean_test: - | boolean_primary {} + | boolean_primary { $1 } boolean_primary : - | predicate {} - | boolean_predicand {} + | predicate { $1 } + (*| boolean_predicand {}*) predicate : - | comparison_predicate {} + | comparison_predicate { $1 } comparison_predicate : - | row_value_predicand comparison_predicate_part2 {} + | row_value_predicand comparison_predicate_part2 { Condition($1, $2) } comparison_predicate_part2: - | comp_op row_value_predicand {} + | comp_op row_value_predicand { Comparison($1, $2) } comp_op : - | EQUALS_OPERATOR {} - | not_equals_operator {} - | LESS_THAN_OPERATOR {} - | GREATER_THAN_OPERATOR {} - | less_than_or_equals_operator {} - | greater_than_or_equals_operator {} + | EQUALS_OPERATOR { Equals } + | not_equals_operator { NotEquals } + | LESS_THAN_OPERATOR { LessThan } + | GREATER_THAN_OPERATOR { GreaterThan } + | less_than_or_equals_operator { LessEquals } + | greater_than_or_equals_operator { GreaterEquals } not_equals_operator : | LESS_THAN_OPERATOR GREATER_THAN_OPERATOR {} @@ -155,23 +184,68 @@ greater_than_or_equals_operator: | GREATER_THAN_OPERATOR EQUALS_OPERATOR {} row_value_predicand: - | row_value_special_case {} + | row_value_special_case { $1 } row_value_special_case : - | nonparenthesized_value_expression_primary {} + | nonparenthesized_value_expression_primary { $1 } nonparenthesized_value_expression_primary: - | column_reference {} + | column_reference { $1 } +(* | set_function_specification { $1 }*) + +set_function_specification: + | aggregate_function { $1 } + +aggregate_function: + | COUNT LEFT_PAREN ASTERISK RIGHT_PAREN { Asterisk } + | COUNT LEFT_PAREN ASTERISK RIGHT_PAREN filter_clause { Asterisk } + | general_set_function { $1 } + | general_set_function filter_clause { $1 } + +general_set_function: + | set_function_type LEFT_PAREN value_expression RIGHT_PAREN { $3 } + | set_function_type LEFT_PAREN set_quantifier value_expression RIGHT_PAREN { $4 } + +set_function_type: + | computationnal_operation {} + +computationnal_operation: + | AVG {} + | MAX {} + | MIN {} + | SUM {} + | COUNT {} + +filter_clause : + | FILTER LEFT_PAREN WHERE search_condition RIGHT_PAREN {} column_reference: - | basic_identifier_chain {} + | basic_identifier_chain { $1 } basic_identifier_chain: - | identifier_chain {} + | identifier_chain { $1 } identifier_chain: - | IDENT {} - | identifier_chain DOT IDENT {} + | IDENT { $1 } + (*| identifier_chain DOT IDENT {}*) boolean_predicand: | nonparenthesized_value_expression_primary {} + +group_by_clause: + | GROUP BY grouping_element_list {} + | GROUP BY set_quantifier grouping_element_list {} + +grouping_element_list : + | grouping_element {} + | grouping_element_list COMMA grouping_element_list {} + +grouping_element: + | ordinary_grouping_set {} + +ordinary_grouping_set : + | grouping_column_reference {} + +grouping_column_reference: + | column_reference {} + (*| column_reference collate_clause {}*) diff --git a/test/SQL_parser.ml b/test/SQL_parser.ml index a09f19c..a2afa92 100644 --- a/test/SQL_parser.ml +++ b/test/SQL_parser.ml @@ -6,6 +6,49 @@ let parse query = let () = assert(parse "SELECT ab FROM b1" = Query(Select([Column("ab")], [Table "b1"]))); + assert(parse "SELECT ab FROM test" = Query(Select([Column("ab")], [Table "test"]))); assert(parse "SELECT * FROM b1" = Query(Select([Asterisk], [Table "b1"]))); - assert(parse "SELECT * FROM t1 CROSS JOIN t2" = Query(Select([Asterisk], [Join(Table("t1"), Cross, Table("t2"))]))); - assert(parse "SELECT * FROM t1 JOIN t2 ON a = b" = Query(Select([Asterisk], [Join(Table("t1"), Left, Table("t2"))]))); + assert(parse "SELECT * FROM t1 CROSS JOIN t2" = Query(Select([Asterisk], [Join(Table("t1"), Cross, Table("t2"), None)]))); + assert(parse "SELECT * FROM t1 JOIN t2 ON a = b" = Query( + Select([Asterisk], [ + Join( + Table("t1"), + Left, + Table("t2"), + Some( + Condition( + "a", + Comparison(Equals, "b") + ) + ) + ) + ] + ) + )); + assert(parse "SELECT * FROM t1 JOIN t2 ON a = b JOIN t3 ON c = d" = Query( + Select([Asterisk], [ + Join( + Join( + Table("t1"), + Left, + Table("t2"), + Some( + Condition( + "a", + Comparison(Equals, "b") + ) + ) + ), + Left, + Table("t3"), + Some( + Condition( + "c", + Comparison(Equals, "d") + ) + ) + ) + ] + ) + ) + ); diff --git a/test/dune b/test/dune index 20beef1..96751a9 100644 --- a/test/dune +++ b/test/dune @@ -1,3 +1,7 @@ (test (name SQL_parser) (libraries parser lexer ast)) + +(test + (name logical_plan_test) + (libraries ast logical_plan)) diff --git a/test/logical_plan_test.ml b/test/logical_plan_test.ml new file mode 100644 index 0000000..9e1ffea --- /dev/null +++ b/test/logical_plan_test.ml @@ -0,0 +1,14 @@ +open Ast + +let () = + let ast1 = Query(Select([Column("ab")], [Table "b1"])) in + assert( Logical_plan.generate_logical_plan ast1 = Logical_plan.Scan("b1")); + let ast2 = Query(Select([Asterisk], [Join(Table("t1"), Cross, Table("t2"), None)])) in + assert(Logical_plan.generate_logical_plan ast2 = + Logical_plan.Join( + Logical_plan.Scan("t1"), + Cross, + Logical_plan.Scan("t2") + ) + ); +