diff --git a/lexer/lexer.mll b/lexer/lexer.mll index 61505db..4116cb7 100644 --- a/lexer/lexer.mll +++ b/lexer/lexer.mll @@ -9,6 +9,11 @@ let alphanumeric = (alpha|digit) rule read_token = parse | "SELECT" { SELECT } + | "AVG" { AVG } + | "MAX" { MAX } + | "MIN" { MIN } + | "SUM" { SUM } + | "COUNT" { COUNT } | "DISTINCT" { DISTINCT } | "FROM" { FROM } | "LEFT" { LEFT } @@ -17,11 +22,15 @@ rule read_token = parse | "UNION" { UNION } | "JOIN" { JOIN } | "ON" { ON } + | "GROUP" { GROUP } + | "BY" { BY } | "*" { ASTERISK } | "." { DOT } | "=" { EQUALS_OPERATOR } + | "(" { LEFT_PAREN } + | ")" { RIGHT_PAREN } + | "," { COMMA } | whitespace { read_token lexbuf } | "WHERE" { WHERE } | alpha alphanumeric* as ident { IDENT ident } - | "," { COMMA } | eof { EOF } diff --git a/lib/.files.ml.swp b/lib/.files.ml.swp new file mode 100644 index 0000000..4c8a152 Binary files /dev/null and b/lib/.files.ml.swp differ diff --git a/lib/.logical_plan.ml.swp b/lib/.logical_plan.ml.swp new file mode 100644 index 0000000..061fd1d Binary files /dev/null and b/lib/.logical_plan.ml.swp differ diff --git a/lib/dune b/lib/dune index 84b7fb2..699c6d1 100644 --- a/lib/dune +++ b/lib/dune @@ -1,3 +1,8 @@ (library (modules ast) (name ast)) + +(library + (modules logical_plan) + (libraries ast) + (name logical_plan)) diff --git a/lib/files.ml b/lib/files.ml new file mode 100644 index 0000000..10a1994 --- /dev/null +++ b/lib/files.ml @@ -0,0 +1 @@ +let load_csv file_path = diff --git a/lib/logical_plan.ml b/lib/logical_plan.ml new file mode 100644 index 0000000..af0ea1b --- /dev/null +++ b/lib/logical_plan.ml @@ -0,0 +1,27 @@ +type logical_plan = + | Scan of string (* Table name *) + (*| Filter of logical_plan * condition*) + | Join of logical_plan * Ast.join_type * logical_plan + +let rec generate_logical_plan ast = + match ast with + | Ast.Query(Select(_, tables)) -> + let base_plan = generate_from_clause tables in + base_plan + +and generate_from_clause tables = + match tables with + | [Table(name)] -> Scan(name) + | [Ast.Join(left, j_type, right, _)] -> + Join( + generate_from_clause [left], + j_type, + generate_from_clause [right] + ) + | _ -> failwith "Unsupported table structure" + + +let evaluate_plan plan = + match plan with + | Scan(table) -> + | _ -> failwith "Unsupported plan" diff --git a/lib/physical_plan.ml b/lib/physical_plan.ml new file mode 100644 index 0000000..e69de29 diff --git a/parser/parser.mly b/parser/parser.mly index 184c70b..c72b6d4 100644 --- a/parser/parser.mly +++ b/parser/parser.mly @@ -7,10 +7,12 @@ open Ast %token LEFT RIGHT FULL INNER OUTER %token CROSS NATURAL UNION JOIN %token GREATER_THAN_OPERATOR LESS_THAN_OPERATOR EQUALS_OPERATOR +%token MAX MIN SUM COUNT AVG %token IDENT %token COMMA DOT +%token LEFT_PAREN RIGHT_PAREN %token ASTERISK -%token AS ON +%token AS ON GROUP BY FILTER %token OR AND NOT %token EOF %start main @@ -23,9 +25,9 @@ main: select_stmt : | SELECT select_list table_expression { Select($2, $3) } - | SELECT set_identifier select_list table_expression { Select($3, $4) } + | SELECT set_quantifier select_list table_expression { Select($3, $4) } -set_identifier : +set_quantifier : | ALL {} | DISTINCT {} @@ -37,6 +39,33 @@ select_sublist : | IDENT { [Column($1)] } | select_sublist COMMA IDENT { Column($3)::$1 } +derived_column: + | value_expression {} + | value_expression as_clause {} + +as_clause : + | AS column_name {} + | column_name {} + +column_name : + | IDENT {} + +value_expression: + | common_value_expression {} + +common_value_expression: + | reference_value_expression {} + +reference_value_expression: + | value_expression_primary {} + +value_expression_primary: + | parenthesized_value_expression {} + | nonparenthesized_value_expression_primary {} + +parenthesized_value_expression: + | LEFT_PAREN value_expression RIGHT_PAREN {} + table_expression: | from_clause { $1 } | from_clause where_clause { $1 } @@ -162,6 +191,33 @@ row_value_special_case : nonparenthesized_value_expression_primary: | column_reference { $1 } +(* | set_function_specification { $1 }*) + +set_function_specification: + | aggregate_function { $1 } + +aggregate_function: + | COUNT LEFT_PAREN ASTERISK RIGHT_PAREN { Asterisk } + | COUNT LEFT_PAREN ASTERISK RIGHT_PAREN filter_clause { Asterisk } + | general_set_function { $1 } + | general_set_function filter_clause { $1 } + +general_set_function: + | set_function_type LEFT_PAREN value_expression RIGHT_PAREN { $3 } + | set_function_type LEFT_PAREN set_quantifier value_expression RIGHT_PAREN { $4 } + +set_function_type: + | computationnal_operation {} + +computationnal_operation: + | AVG {} + | MAX {} + | MIN {} + | SUM {} + | COUNT {} + +filter_clause : + | FILTER LEFT_PAREN WHERE search_condition RIGHT_PAREN {} column_reference: | basic_identifier_chain { $1 } @@ -175,3 +231,21 @@ identifier_chain: boolean_predicand: | nonparenthesized_value_expression_primary {} + +group_by_clause: + | GROUP BY grouping_element_list {} + | GROUP BY set_quantifier grouping_element_list {} + +grouping_element_list : + | grouping_element {} + | grouping_element_list COMMA grouping_element_list {} + +grouping_element: + | ordinary_grouping_set {} + +ordinary_grouping_set : + | grouping_column_reference {} + +grouping_column_reference: + | column_reference {} + (*| column_reference collate_clause {}*) diff --git a/test/SQL_parser.ml b/test/SQL_parser.ml index eb1d4c3..a2afa92 100644 --- a/test/SQL_parser.ml +++ b/test/SQL_parser.ml @@ -6,6 +6,7 @@ let parse query = let () = assert(parse "SELECT ab FROM b1" = Query(Select([Column("ab")], [Table "b1"]))); + assert(parse "SELECT ab FROM test" = Query(Select([Column("ab")], [Table "test"]))); assert(parse "SELECT * FROM b1" = Query(Select([Asterisk], [Table "b1"]))); assert(parse "SELECT * FROM t1 CROSS JOIN t2" = Query(Select([Asterisk], [Join(Table("t1"), Cross, Table("t2"), None)]))); assert(parse "SELECT * FROM t1 JOIN t2 ON a = b" = Query( diff --git a/test/dune b/test/dune index 20beef1..96751a9 100644 --- a/test/dune +++ b/test/dune @@ -1,3 +1,7 @@ (test (name SQL_parser) (libraries parser lexer ast)) + +(test + (name logical_plan_test) + (libraries ast logical_plan)) diff --git a/test/logical_plan_test.ml b/test/logical_plan_test.ml new file mode 100644 index 0000000..9e1ffea --- /dev/null +++ b/test/logical_plan_test.ml @@ -0,0 +1,14 @@ +open Ast + +let () = + let ast1 = Query(Select([Column("ab")], [Table "b1"])) in + assert( Logical_plan.generate_logical_plan ast1 = Logical_plan.Scan("b1")); + let ast2 = Query(Select([Asterisk], [Join(Table("t1"), Cross, Table("t2"), None)])) in + assert(Logical_plan.generate_logical_plan ast2 = + Logical_plan.Join( + Logical_plan.Scan("t1"), + Cross, + Logical_plan.Scan("t2") + ) + ); +