This commit is contained in:
Simon Petit 2024-11-25 20:30:18 +01:00
commit 8eb8d86d97
11 changed files with 245 additions and 43 deletions

View File

@ -9,6 +9,11 @@ let alphanumeric = (alpha|digit)
rule read_token = parse rule read_token = parse
| "SELECT" { SELECT } | "SELECT" { SELECT }
| "AVG" { AVG }
| "MAX" { MAX }
| "MIN" { MIN }
| "SUM" { SUM }
| "COUNT" { COUNT }
| "DISTINCT" { DISTINCT } | "DISTINCT" { DISTINCT }
| "FROM" { FROM } | "FROM" { FROM }
| "LEFT" { LEFT } | "LEFT" { LEFT }
@ -17,11 +22,15 @@ rule read_token = parse
| "UNION" { UNION } | "UNION" { UNION }
| "JOIN" { JOIN } | "JOIN" { JOIN }
| "ON" { ON } | "ON" { ON }
| "GROUP" { GROUP }
| "BY" { BY }
| "*" { ASTERISK } | "*" { ASTERISK }
| "." { DOT } | "." { DOT }
| "=" { EQUALS_OPERATOR } | "=" { EQUALS_OPERATOR }
| "(" { LEFT_PAREN }
| ")" { RIGHT_PAREN }
| "," { COMMA }
| whitespace { read_token lexbuf } | whitespace { read_token lexbuf }
| "WHERE" { WHERE } | "WHERE" { WHERE }
| alpha alphanumeric* as ident { IDENT ident } | alpha alphanumeric* as ident { IDENT ident }
| "," { COMMA }
| eof { EOF } | eof { EOF }

View File

@ -6,7 +6,7 @@ and column =
| Column of string | Column of string
and table = and table =
| Table of string | Table of string
| Join of table * join_type * table | Join of table * join_type * table * condition option
and join_type = and join_type =
| Inner | Inner
| Left | Left
@ -15,4 +15,19 @@ and join_type =
| Cross | Cross
| Union | Union
| Natural | Natural
and condition =
| Condition of string * comparison
| And of condition * condition
| Or of condition * condition
| Not of condition
and comparison =
| Comparison of operator * string
and operator =
| Equals
| NotEquals
| LessThan
| GreaterThan
| LessEquals
| GreaterEquals
and search_condition =
| Search of string

11
lib/csv.ml Normal file
View File

@ -0,0 +1,11 @@
let load path delimiter header filter =
let ic = open_in path in
let rec aux acc =
try
let line = input_line ic
let rows = []
let line = read_line ic in
let columns = String.split_on_char delimiter line in

View File

@ -1,3 +1,8 @@
(library (library
(modules ast) (modules ast)
(name ast)) (name ast))
(library
(modules logical_plan)
(libraries ast)
(name logical_plan))

0
lib/files.ml Normal file
View File

27
lib/logical_plan.ml Normal file
View File

@ -0,0 +1,27 @@
type logical_plan =
| Scan of string (* Table name *)
(*| Filter of logical_plan * condition*)
| Join of logical_plan * Ast.join_type * logical_plan
let rec generate_logical_plan ast =
match ast with
| Ast.Query(Select(_, tables)) ->
let base_plan = generate_from_clause tables in
base_plan
and generate_from_clause tables =
match tables with
| [Table(name)] -> Scan(name)
| [Ast.Join(left, j_type, right, _)] ->
Join(
generate_from_clause [left],
j_type,
generate_from_clause [right]
)
| _ -> failwith "Unsupported table structure"
let evaluate_plan plan =
match plan with
| Scan(table) ->
| _ -> failwith "Unsupported plan"

0
lib/physical_plan.ml Normal file
View File

View File

@ -7,10 +7,12 @@ open Ast
%token LEFT RIGHT FULL INNER OUTER %token LEFT RIGHT FULL INNER OUTER
%token CROSS NATURAL UNION JOIN %token CROSS NATURAL UNION JOIN
%token GREATER_THAN_OPERATOR LESS_THAN_OPERATOR EQUALS_OPERATOR %token GREATER_THAN_OPERATOR LESS_THAN_OPERATOR EQUALS_OPERATOR
%token MAX MIN SUM COUNT AVG
%token <string> IDENT %token <string> IDENT
%token COMMA DOT %token COMMA DOT
%token LEFT_PAREN RIGHT_PAREN
%token ASTERISK %token ASTERISK
%token AS ON %token AS ON GROUP BY FILTER
%token OR AND NOT %token OR AND NOT
%token EOF %token EOF
%start main %start main
@ -23,9 +25,9 @@ main:
select_stmt : select_stmt :
| SELECT select_list table_expression { Select($2, $3) } | SELECT select_list table_expression { Select($2, $3) }
| SELECT set_identifier select_list table_expression { Select($3, $4) } | SELECT set_quantifier select_list table_expression { Select($3, $4) }
set_identifier : set_quantifier :
| ALL {} | ALL {}
| DISTINCT {} | DISTINCT {}
@ -37,6 +39,33 @@ select_sublist :
| IDENT { [Column($1)] } | IDENT { [Column($1)] }
| select_sublist COMMA IDENT { Column($3)::$1 } | select_sublist COMMA IDENT { Column($3)::$1 }
derived_column:
| value_expression {}
| value_expression as_clause {}
as_clause :
| AS column_name {}
| column_name {}
column_name :
| IDENT {}
value_expression:
| common_value_expression {}
common_value_expression:
| reference_value_expression {}
reference_value_expression:
| value_expression_primary {}
value_expression_primary:
| parenthesized_value_expression {}
| nonparenthesized_value_expression_primary {}
parenthesized_value_expression:
| LEFT_PAREN value_expression RIGHT_PAREN {}
table_expression: table_expression:
| from_clause { $1 } | from_clause { $1 }
| from_clause where_clause { $1 } | from_clause where_clause { $1 }
@ -53,7 +82,6 @@ table_reference :
(* | table_primary_or_joined_table sample_clause { $1 } *) (* | table_primary_or_joined_table sample_clause { $1 } *)
table_primary_or_joined_table: table_primary_or_joined_table:
(* | table_primary { Table($1) }*)
| table_primary { $1 } | table_primary { $1 }
| joined_table { $1 } | joined_table { $1 }
@ -70,24 +98,24 @@ joined_table :
| union_join { $1 } | union_join { $1 }
cross_join: cross_join:
| table_reference CROSS JOIN table_primary { Join($1, Cross, $4) } | table_reference CROSS JOIN table_primary { Join($1, Cross, $4, None) }
qualified_join: qualified_join:
| table_reference JOIN table_reference join_specification { Join($1, Left, $3) } | table_reference JOIN table_reference join_specification { Join($1, Left, $3, $4) }
| table_reference join_type JOIN table_reference join_specification { Join($1, $2, $4) } | table_reference join_type JOIN table_reference join_specification { Join($1, $2, $4, $5) }
join_specification: join_specification:
| join_condition {} | join_condition { $1 }
join_condition: join_condition:
| ON search_condition {} | ON search_condition { Some($2) }
natural_join: natural_join:
| table_reference NATURAL JOIN table_primary { Join($1, Natural, $4) } | table_reference NATURAL JOIN table_primary { Join($1, Natural, $4, None) }
| table_reference NATURAL join_type JOIN table_primary { Join($1, Natural, $5) } | table_reference NATURAL join_type JOIN table_primary { Join($1, Natural, $5, None) }
union_join: union_join:
| table_reference UNION JOIN table_primary { Join($1,Union, $4) } | table_reference UNION JOIN table_primary { Join($1, Union, $4, None) }
table_name : table_name :
| IDENT { Table($1) } | IDENT { Table($1) }
@ -107,43 +135,44 @@ where_clause :
| WHERE search_condition { } | WHERE search_condition { }
search_condition: search_condition:
| boolean_value_expression {} (*| IDENT EQUALS_OPERATOR IDENT {}*)
| boolean_value_expression { $1 }
boolean_value_expression: boolean_value_expression:
| boolean_term {} | boolean_term { $1 }
| boolean_value_expression OR boolean_term {} | boolean_value_expression OR boolean_term { Or($1, $3) }
boolean_term: boolean_term:
| boolean_factor {} | boolean_factor { $1 }
| boolean_term AND boolean_factor {} | boolean_term AND boolean_factor { And($1, $3) }
boolean_factor: boolean_factor:
| boolean_test {} | boolean_test { $1 }
| NOT boolean_test {} | NOT boolean_test { Not($2) }
boolean_test: boolean_test:
| boolean_primary {} | boolean_primary { $1 }
boolean_primary : boolean_primary :
| predicate {} | predicate { $1 }
| boolean_predicand {} (*| boolean_predicand {}*)
predicate : predicate :
| comparison_predicate {} | comparison_predicate { $1 }
comparison_predicate : comparison_predicate :
| row_value_predicand comparison_predicate_part2 {} | row_value_predicand comparison_predicate_part2 { Condition($1, $2) }
comparison_predicate_part2: comparison_predicate_part2:
| comp_op row_value_predicand {} | comp_op row_value_predicand { Comparison($1, $2) }
comp_op : comp_op :
| EQUALS_OPERATOR {} | EQUALS_OPERATOR { Equals }
| not_equals_operator {} | not_equals_operator { NotEquals }
| LESS_THAN_OPERATOR {} | LESS_THAN_OPERATOR { LessThan }
| GREATER_THAN_OPERATOR {} | GREATER_THAN_OPERATOR { GreaterThan }
| less_than_or_equals_operator {} | less_than_or_equals_operator { LessEquals }
| greater_than_or_equals_operator {} | greater_than_or_equals_operator { GreaterEquals }
not_equals_operator : not_equals_operator :
| LESS_THAN_OPERATOR GREATER_THAN_OPERATOR {} | LESS_THAN_OPERATOR GREATER_THAN_OPERATOR {}
@ -155,23 +184,68 @@ greater_than_or_equals_operator:
| GREATER_THAN_OPERATOR EQUALS_OPERATOR {} | GREATER_THAN_OPERATOR EQUALS_OPERATOR {}
row_value_predicand: row_value_predicand:
| row_value_special_case {} | row_value_special_case { $1 }
row_value_special_case : row_value_special_case :
| nonparenthesized_value_expression_primary {} | nonparenthesized_value_expression_primary { $1 }
nonparenthesized_value_expression_primary: nonparenthesized_value_expression_primary:
| column_reference {} | column_reference { $1 }
(* | set_function_specification { $1 }*)
set_function_specification:
| aggregate_function { $1 }
aggregate_function:
| COUNT LEFT_PAREN ASTERISK RIGHT_PAREN { Asterisk }
| COUNT LEFT_PAREN ASTERISK RIGHT_PAREN filter_clause { Asterisk }
| general_set_function { $1 }
| general_set_function filter_clause { $1 }
general_set_function:
| set_function_type LEFT_PAREN value_expression RIGHT_PAREN { $3 }
| set_function_type LEFT_PAREN set_quantifier value_expression RIGHT_PAREN { $4 }
set_function_type:
| computationnal_operation {}
computationnal_operation:
| AVG {}
| MAX {}
| MIN {}
| SUM {}
| COUNT {}
filter_clause :
| FILTER LEFT_PAREN WHERE search_condition RIGHT_PAREN {}
column_reference: column_reference:
| basic_identifier_chain {} | basic_identifier_chain { $1 }
basic_identifier_chain: basic_identifier_chain:
| identifier_chain {} | identifier_chain { $1 }
identifier_chain: identifier_chain:
| IDENT {} | IDENT { $1 }
| identifier_chain DOT IDENT {} (*| identifier_chain DOT IDENT {}*)
boolean_predicand: boolean_predicand:
| nonparenthesized_value_expression_primary {} | nonparenthesized_value_expression_primary {}
group_by_clause:
| GROUP BY grouping_element_list {}
| GROUP BY set_quantifier grouping_element_list {}
grouping_element_list :
| grouping_element {}
| grouping_element_list COMMA grouping_element_list {}
grouping_element:
| ordinary_grouping_set {}
ordinary_grouping_set :
| grouping_column_reference {}
grouping_column_reference:
| column_reference {}
(*| column_reference collate_clause {}*)

View File

@ -6,6 +6,49 @@ let parse query =
let () = let () =
assert(parse "SELECT ab FROM b1" = Query(Select([Column("ab")], [Table "b1"]))); assert(parse "SELECT ab FROM b1" = Query(Select([Column("ab")], [Table "b1"])));
assert(parse "SELECT ab FROM test" = Query(Select([Column("ab")], [Table "test"])));
assert(parse "SELECT * FROM b1" = Query(Select([Asterisk], [Table "b1"]))); assert(parse "SELECT * FROM b1" = Query(Select([Asterisk], [Table "b1"])));
assert(parse "SELECT * FROM t1 CROSS JOIN t2" = Query(Select([Asterisk], [Join(Table("t1"), Cross, Table("t2"))]))); assert(parse "SELECT * FROM t1 CROSS JOIN t2" = Query(Select([Asterisk], [Join(Table("t1"), Cross, Table("t2"), None)])));
assert(parse "SELECT * FROM t1 JOIN t2 ON a = b" = Query(Select([Asterisk], [Join(Table("t1"), Left, Table("t2"))]))); assert(parse "SELECT * FROM t1 JOIN t2 ON a = b" = Query(
Select([Asterisk], [
Join(
Table("t1"),
Left,
Table("t2"),
Some(
Condition(
"a",
Comparison(Equals, "b")
)
)
)
]
)
));
assert(parse "SELECT * FROM t1 JOIN t2 ON a = b JOIN t3 ON c = d" = Query(
Select([Asterisk], [
Join(
Join(
Table("t1"),
Left,
Table("t2"),
Some(
Condition(
"a",
Comparison(Equals, "b")
)
)
),
Left,
Table("t3"),
Some(
Condition(
"c",
Comparison(Equals, "d")
)
)
)
]
)
)
);

View File

@ -1,3 +1,7 @@
(test (test
(name SQL_parser) (name SQL_parser)
(libraries parser lexer ast)) (libraries parser lexer ast))
(test
(name logical_plan_test)
(libraries ast logical_plan))

14
test/logical_plan_test.ml Normal file
View File

@ -0,0 +1,14 @@
open Ast
let () =
let ast1 = Query(Select([Column("ab")], [Table "b1"])) in
assert( Logical_plan.generate_logical_plan ast1 = Logical_plan.Scan("b1"));
let ast2 = Query(Select([Asterisk], [Join(Table("t1"), Cross, Table("t2"), None)])) in
assert(Logical_plan.generate_logical_plan ast2 =
Logical_plan.Join(
Logical_plan.Scan("t1"),
Cross,
Logical_plan.Scan("t2")
)
);