Compare commits

...

5 Commits

Author SHA1 Message Date
334cd6af9d enhancing tests 2024-12-10 18:01:33 +01:00
55a81cfe8a adding literals + aggregate + functions 2024-12-10 18:01:22 +01:00
319f2f9b43 adding functions 2024-12-10 18:00:47 +01:00
4d62ebe2a3 adding tokens 2024-12-10 18:00:26 +01:00
87959fae33 ignoring this for now 2024-12-10 18:00:07 +01:00
6 changed files with 420 additions and 186 deletions

View File

@ -1,9 +1,4 @@
open Ast
open Printf open Printf
let () = let () =
let input = "SELECT aa, ab FROM b1" in printf "hello"
let lexbuf = Lexing.from_string input in
match Parser.main Lexer.read_token lexbuf with
| Query(Select (Column("ab")::Column("aa")::[], [Table "b1"])) -> printf("ok")
| _ -> printf("not ok")

View File

@ -8,44 +8,48 @@ let alpha = ['a'-'z' 'A'-'Z' '_']
let alphanumeric = (alpha|digit) let alphanumeric = (alpha|digit)
rule read_token = parse rule read_token = parse
| "SELECT" { SELECT } | "SELECT" { SELECT }
| "AVG" { AVG } | "AS" { AS }
| "MAX" { MAX } | "DATE" { DATE }
| "MIN" { MIN } | "TIME" { TIME }
| "SUM" { SUM } | "AVG" { AVG }
| "COUNT" { COUNT } | "MAX" { MAX }
| "DISTINCT" { DISTINCT } | "MIN" { MIN }
| "FROM" { FROM } | "SUM" { SUM }
| "INNER" { INNER } | "COUNT" { COUNT }
| "LEFT" { LEFT } | "DISTINCT" { DISTINCT }
| "RIGHT" { RIGHT } | "FROM" { FROM }
| "CROSS" { CROSS } | "INNER" { INNER }
| "UNION" { UNION } | "LEFT" { LEFT }
| "NATURAL" { NATURAL } | "RIGHT" { RIGHT }
| "JOIN" { JOIN } | "CROSS" { CROSS }
| "ON" { ON } | "UNION" { UNION }
| "OR" { OR } | "NATURAL" { NATURAL }
| "AND" { AND } | "JOIN" { JOIN }
| "WHERE" { WHERE } | "ON" { ON }
| "HAVING" { HAVING } | "OR" { OR }
| "BETWEEN" { BETWEEN } | "AND" { AND }
| "IN" { IN } | "WHERE" { WHERE }
| "LIKE" { LIKE } | "HAVING" { HAVING }
| "GROUP" { GROUP } | "BETWEEN" { BETWEEN }
| "BY" { BY } | "IN" { IN }
| "*" { ASTERISK } | "LIKE" { LIKE }
| "." { DOT } | "GROUP" { GROUP }
| "=" { EQUALS_OPERATOR } | "BY" { BY }
| "(" { LEFT_PAREN } | "*" { ASTERISK }
| ")" { RIGHT_PAREN } | "." { DOT }
| "," { COMMA } | "=" { EQUALS_OPERATOR }
| "|" { PIPE } | "(" { LEFT_PAREN }
| "'" { QUOTE } | ")" { RIGHT_PAREN }
| "+" { PLUS_SIGN } | ',' { COMMA }
| "-" { MINUS_SIGN } | "|" { PIPE }
| whitespace { read_token lexbuf } | "'" { QUOTE }
| digit+ { INTEGER } | "+" { PLUS_SIGN }
| digit+"."digit+ { FLOAT } | "-" { MINUS_SIGN }
| '.'digit+ { FLOAT } | ':' { COLON }
| alpha alphanumeric* as ident { IDENT ident } | whitespace { read_token lexbuf }
| eof { EOF } | digit+ as integer { INTEGER (int_of_string integer) }
| digit+"."digit+ { FLOAT }
| '.'digit+ { FLOAT }
| alpha alphanumeric* as ident { IDENT ident }
| eof { EOF }

View File

@ -1,9 +1,12 @@
type query = Query of select_stmt type query = Query of select_stmt
and select_stmt = and select_stmt =
| Select of column list * table list * filter option | Select of column list * table list option * filter option
and column = and column =
| Asterisk | Asterisk
| Column of string (* | Column of string *)
| Column of expression * as_clause option
and as_clause =
| As of string
and table = and table =
| Table of string | Table of string
| Join of table * join_type * table * condition option | Join of table * join_type * table * condition option
@ -20,8 +23,7 @@ and condition =
| And of condition * condition | And of condition * condition
| Or of condition * condition | Or of condition * condition
| Not of condition | Not of condition
and predicand = and predicand = expression
| Ref of string
and predicate = and predicate =
| Comparison of operator * predicand | Comparison of operator * predicand
| Between of predicand * predicand | Between of predicand * predicand
@ -39,8 +41,24 @@ and operator =
| GreaterEquals | GreaterEquals
and filter = and filter =
| Filter of condition | Filter of condition
and search_condition = and aggregate =
| Search of string | Aggregate of func * filter option
and func =
| Function of function_type * quantifier option * expression
and function_type =
| Avg
| Max
| Min
| Sum
| Count
and quantifier =
| All
| Distinct
and expression =
| Ref of string
| StringLiteral of string
| DateLiteral of string
| TimeLiteral of string
let rec pp_query fmt ast = let rec pp_query fmt ast =
match ast with match ast with
@ -58,8 +76,9 @@ and pp_columns cols =
and pp_column col = and pp_column col =
match col with match col with
| Column(name) -> name | Column(Ref(name),_) -> name
| Asterisk -> "*" | Asterisk -> "*"
| _ -> failwith "not supported"
and pp_tables tables = and pp_tables tables =
match tables with match tables with

View File

@ -19,9 +19,10 @@ let rec generate_from_clause tables =
let generate_logical_plan ast = let generate_logical_plan ast =
match ast with match ast with
| Ast.Query(Select(_, tables, _)) -> | Ast.Query(Select(_, Some(tables), _)) ->
let base_plan = generate_from_clause tables in let base_plan = generate_from_clause tables in
base_plan base_plan
| Ast.Query(Select(_, None, _)) -> failwith "not supported"
(*let evaluate_plan plan = (*let evaluate_plan plan =
match plan with match plan with

View File

@ -19,8 +19,11 @@ open Ast
%token AS ON GROUP BY FILTER %token AS ON GROUP BY FILTER
%token OR AND NOT %token OR AND NOT
%token TRUE FALSE UNKNOWN %token TRUE FALSE UNKNOWN
%token PLUS_SIGN MINUS_SIGN INTEGER FLOAT %token PLUS_SIGN MINUS_SIGN
%token UNDERSCORE QUOTE %token DATE TIME TIMESTAMP
%token <int> INTEGER
%token FLOAT
%token UNDERSCORE QUOTE COLON
%token EOF %token EOF
%start main %start main
%type <query> main %type <query> main
@ -48,18 +51,19 @@ greater_than_or_equals_operator:
(* 5.3 LITERAL *) (* 5.3 LITERAL *)
literal : literal :
| signed_numeric_literal {} | signed_numeric_literal { $1 }
| general_literal {} | general_literal { $1 }
unsigned_literal: unsigned_literal:
(* | unsigned_numeric_literal {}*) (* | unsigned_numeric_literal {}*)
| general_literal {} | general_literal { $1 }
general_literal: general_literal:
| character_string_literal {} | character_string_literal { $1 }
| datetime_literal { $1 }
character_string_literal: character_string_literal:
| QUOTE IDENT QUOTE {} | QUOTE IDENT QUOTE { StringLiteral($2) }
introducer: introducer:
| UNDERSCORE { } | UNDERSCORE { }
@ -78,12 +82,65 @@ sign:
| PLUS_SIGN {} | PLUS_SIGN {}
| MINUS_SIGN {} | MINUS_SIGN {}
unsigned_integer:
| INTEGER { $1 }
datetime_literal:
| date_literal { $1 }
| time_literal { $1 }
(*| timestamp_literal {}*)
date_literal:
| DATE date_string { DateLiteral($2) }
time_literal:
| TIME time_string { TimeLiteral($2) }
date_string:
| QUOTE unquoted_date_string QUOTE { $2 }
time_string:
| QUOTE unquoted_time_string QUOTE { $2 }
date_value:
| years_value MINUS_SIGN months_value MINUS_SIGN days_value { String.concat "-" [$1;$3;$5] }
time_value:
| hours_value COLON minutes_value COLON seconds_value { String.concat ":" [$1;$3;$5] }
unquoted_date_string:
| date_value { $1 }
unquoted_time_string:
| time_value { $1 }
years_value :
| datetime_value { $1 }
months_value :
| datetime_value { $1 }
days_value :
| datetime_value { $1 }
hours_value :
| datetime_value { $1 }
minutes_value :
| datetime_value { $1 }
seconds_value:
| datetime_value { $1 }
datetime_value :
| unsigned_integer { string_of_int $1 }
(***********) (***********)
(* 5.4 NAMES AND IDENTIFIER *) (* 5.4 NAMES AND IDENTIFIER *)
column_name : column_name :
| IDENT {} | IDENT { $1 }
table_name : table_name :
| IDENT { Table($1) } | IDENT { Table($1) }
@ -110,6 +167,7 @@ parenthesized_value_expression:
| LEFT_PAREN value_expression RIGHT_PAREN { $2 } | LEFT_PAREN value_expression RIGHT_PAREN { $2 }
nonparenthesized_value_expression_primary: nonparenthesized_value_expression_primary:
| unsigned_value_specification { $1 }
| column_reference { Ref($1) } | column_reference { Ref($1) }
(*| set_function_specification { Ref("function") } *) (*| set_function_specification { Ref("function") } *)
@ -118,8 +176,8 @@ nonparenthesized_value_expression_primary:
(* 6.4 VALUE SPECIFCATION / TARGET SPECIFICATION *) (* 6.4 VALUE SPECIFCATION / TARGET SPECIFICATION *)
unsigned_value_specification: unsigned_value_specification:
| unsigned_literal {} | unsigned_literal { $1 }
| general_value_specification {} (*| general_value_specification {}*)
general_value_specification: general_value_specification:
| {} | {}
@ -155,7 +213,7 @@ set_function_specification:
(* 6.25 VALUE EXPRESSION *) (* 6.25 VALUE EXPRESSION *)
value_expression: value_expression:
| common_value_expression {} | common_value_expression { $1 }
common_value_expression: common_value_expression:
(*| numeric_value_expression {}*) (*| numeric_value_expression {}*)
@ -170,7 +228,7 @@ reference_value_expression:
(* 6.28 STRING VALUE EXPRESSION *) (* 6.28 STRING VALUE EXPRESSION *)
string_value_expression : string_value_expression :
| character_value_expression {} | character_value_expression { $1 }
(* | blob_value_expression {} *) (* | blob_value_expression {} *)
character_primary: character_primary:
@ -264,8 +322,9 @@ row_value_special_case :
(* 7.4 TABLE EXPRESSION *) (* 7.4 TABLE EXPRESSION *)
table_expression: table_expression:
| from_clause { $1 } | { None }
| from_clause where_clause { $1 } | from_clause { Some($1) }
| from_clause where_clause { Some($1) }
(************************) (************************)
@ -395,15 +454,21 @@ query_specification :
select_list : select_list :
| ASTERISK { [Asterisk] } | ASTERISK { [Asterisk] }
| select_sublist {$1} | select_sublist { $1 }
(*
select_sublist : select_sublist :
| IDENT { [Column($1)] } | IDENT { [Column($1)] }
| select_sublist COMMA IDENT { Column($3)::$1 } | select_sublist COMMA IDENT { Column($3)::$1 }
*)
select_sublist :
| select_sublist_element { [$1] }
| select_sublist COMMA select_sublist_element { $3::$1 }
select_sublist_element : select_sublist_element :
| derived_column {} | derived_column { $1 }
| qualified_asterisk {} (* | qualified_asterisk {} *)
qualified_asterisk: qualified_asterisk:
| asterisked_identifier_chain {} | asterisked_identifier_chain {}
@ -416,12 +481,12 @@ asterisked_identifier :
| IDENT {} | IDENT {}
derived_column: derived_column:
| value_expression {} | value_expression { Column($1, None) }
| value_expression as_clause {} | value_expression as_clause { Column($1, Some($2)) }
as_clause : as_clause :
| AS column_name {} | AS column_name { As($2) }
| column_name {} (*| column_name { As($1) }*)
(****************************) (****************************)
@ -537,31 +602,31 @@ search_condition:
(* 10.9 AGGREGATE FUNCTION *) (* 10.9 AGGREGATE FUNCTION *)
aggregate_function: aggregate_function:
| COUNT LEFT_PAREN ASTERISK RIGHT_PAREN { } | COUNT LEFT_PAREN ASTERISK RIGHT_PAREN { Count }
| COUNT LEFT_PAREN ASTERISK RIGHT_PAREN filter_clause { } | COUNT LEFT_PAREN ASTERISK RIGHT_PAREN filter_clause { Count }
| general_set_function { } | general_set_function { Aggregate($1) }
| general_set_function filter_clause { } | general_set_function filter_clause { Aggregate($1, $2) }
general_set_function: general_set_function:
| set_function_type LEFT_PAREN value_expression RIGHT_PAREN { } | set_function_type LEFT_PAREN value_expression RIGHT_PAREN { Function($1, None, $3) }
| set_function_type LEFT_PAREN set_quantifier value_expression RIGHT_PAREN { } | set_function_type LEFT_PAREN set_quantifier value_expression RIGHT_PAREN { Function($1, $3, $4) }
set_function_type: set_function_type:
| computationnal_operation {} | computationnal_operation { $1 }
set_quantifier : set_quantifier :
| ALL {} | ALL { All }
| DISTINCT {} | DISTINCT { Distinct }
filter_clause : filter_clause :
| FILTER LEFT_PAREN WHERE search_condition RIGHT_PAREN {} | FILTER LEFT_PAREN WHERE search_condition RIGHT_PAREN { Filter($4) }
computationnal_operation: computationnal_operation:
| AVG {} | AVG { Avg }
| MAX {} | MAX { Max }
| MIN {} | MIN { Min }
| SUM {} | SUM { Sum }
| COUNT {} | COUNT { Count }
(***************************) (***************************)

View File

@ -11,70 +11,164 @@ let query_testable =
Alcotest.testable Ast.pp_query equal_ast Alcotest.testable Ast.pp_query equal_ast
let test_simple_select () = let test_simple_select () =
let q1 = parse "SELECT a FROM t" in let query = "SELECT a FROM t" in
let ast1 = Query(Select([Column("a")], [Table("t")], None)) in let q1 = parse query in
Alcotest.(check query_testable) "Ok" q1 ast1;
let q2 = parse "SELECT * FROM t" in
let ast2 = Query(Select([Asterisk], [Table("t")], None)) in
Alcotest.(check query_testable) "Ok2" q2 ast2
let test_default_join () =
let q1 = parse "SELECT a FROM t1 JOIN t2 ON a = b" in
let ast1 = Query( let ast1 = Query(
Select( Select(
[Column("a")], [
[Join( Column(
Table("t1"), Ref("a"),
Left, None
Table("t2"), )
Some( ],
Condition( Some(
Ref("a"), [
Comparison(Equals, Ref("b")) Table("t")
]
),
None
)
) in
Alcotest.(check query_testable) query q1 ast1;
let query = "SELECT * FROM t" in
let q2 = parse query in
let ast2 = Query(
Select(
[
Asterisk
],
Some(
[
Table("t")
]
t ),
None
)
) in
Alcotest.(check query_testable) query q2 ast2 ;
let query = "SELECT 'a'" in
let q3 = parse query in
let ast3 = Query(
Select(
[
Column(
StringLiteral("a"),
None
)
],
None,
None
)
) in
Alcotest.(check query_testable) query q3 ast3;
let q4 = parse "SELECT DATE '2024-12-25' AS date" in
let ast4 = Query(
Select(
[
Column(
DateLiteral("2024-12-25"),
Some(
As("date")
) )
) )
)], ],
None,
None
)
) in
Alcotest.(check query_testable) "OK" q4 ast4
let test_default_join () =
let q1 = parse "SELECT a FROM t1 JOIN t2 ON b = c" in
let ast1 = Query(
Select(
[
Column(
Ref("a"),
None
)
],
Some(
[
Join(
Table("t1"),
Left,
Table("t2"),
Some(
Condition(
Ref("b"),
Comparison(
Equals,
Ref("c"))
)
)
)
]
),
None None
) )
) in ) in
Alcotest.(check query_testable) "Ok" q1 ast1 Alcotest.(check query_testable) "Ok" q1 ast1
let test_left_join () = let test_left_join () =
let q1 = parse "SELECT a FROM t1 LEFT JOIN t2 ON a = b" in let query = "SELECT a FROM t1 LEFT JOIN t2 ON a = b" in
let q1 = parse query in
let ast1 = Query( let ast1 = Query(
Select([Column("a")], Select(
[Join( [
Table("t1"), Column(
Left, Ref("a"),
Table("t2"), None
Some(
Condition(
Ref("a"),
Comparison(Equals, Ref("b"))
)
) )
)], ],
Some(
[
Join(
Table("t1"),
Left,
Table("t2"),
Some(
Condition(
Ref("a"),
Comparison(Equals, Ref("b"))
)
)
)
]
),
None None
) )
) in ) in
Alcotest.(check query_testable) "Ok" q1 ast1 Alcotest.(check query_testable) query q1 ast1
let test_right_join () = let test_right_join () =
let q1 = parse "SELECT a FROM t1 RIGHT JOIN t2 ON a = b" in let q1 = parse "SELECT a FROM t1 RIGHT JOIN t2 ON a = b" in
let ast1 = Query( let ast1 = Query(
Select([Column("a")], Select(
[Join( [
Table("t1"), Column(
Right, Ref("a"),
Table("t2"), None
Some(
Condition(
Ref("a"),
Comparison(Equals, Ref("b"))
)
) )
)], ],
Some(
[
Join(
Table("t1"),
Right,
Table("t2"),
Some(
Condition(
Ref("a"),
Comparison(Equals, Ref("b"))
)
)
)
]
),
None None
) )
) in ) in
@ -83,18 +177,28 @@ let test_right_join () =
let test_inner_join () = let test_inner_join () =
let q1 = parse "SELECT a FROM t1 INNER JOIN t2 ON a = b" in let q1 = parse "SELECT a FROM t1 INNER JOIN t2 ON a = b" in
let ast1 = Query( let ast1 = Query(
Select([Column("a")], Select(
[Join( [
Table("t1"), Column(
Inner, Ref("a"),
Table("t2"), None
Some(
Condition(
Ref("a"),
Comparison(Equals, Ref("b"))
)
) )
)], ],
Some(
[
Join(
Table("t1"),
Inner,
Table("t2"),
Some(
Condition(
Ref("a"),
Comparison(Equals, Ref("b"))
)
)
)
]
),
None None
) )
) in ) in
@ -103,13 +207,23 @@ let test_inner_join () =
let test_union_join () = let test_union_join () =
let q1 = parse "SELECT a FROM t1 UNION JOIN t2" in let q1 = parse "SELECT a FROM t1 UNION JOIN t2" in
let ast1 = Query( let ast1 = Query(
Select([Column("a")], Select(
[Join( [
Table("t1"), Column(
Union, Ref("a"),
Table("t2"), None
None )
)], ],
Some(
[
Join(
Table("t1"),
Union,
Table("t2"),
None
)
]
),
None None
) )
) in ) in
@ -118,13 +232,23 @@ let test_union_join () =
let test_cross_join () = let test_cross_join () =
let q1 = parse "SELECT a FROM t1 CROSS JOIN t2" in let q1 = parse "SELECT a FROM t1 CROSS JOIN t2" in
let ast1 = Query( let ast1 = Query(
Select([Column("a")], Select(
[Join( [
Table("t1"), Column(
Cross, Ref("a"),
Table("t2"), None
None )
)], ],
Some(
[
Join(
Table("t1"),
Cross,
Table("t2"),
None
)
]
),
None None
) )
) in ) in
@ -133,13 +257,23 @@ let test_cross_join () =
let test_natural_join () = let test_natural_join () =
let q1 = parse "SELECT a FROM t1 NATURAL JOIN t2" in let q1 = parse "SELECT a FROM t1 NATURAL JOIN t2" in
let ast1 = Query( let ast1 = Query(
Select([Column("a")], Select(
[Join( [
Table("t1"), Column(
Natural, Ref("a"),
Table("t2"), None
None )
)], ],
Some(
[
Join(
Table("t1"),
Natural,
Table("t2"),
None
)
]
),
None None
) )
) in ) in
@ -148,30 +282,37 @@ let test_natural_join () =
let test_join_join () = let test_join_join () =
let q1 = parse "SELECT a FROM t1 JOIN t2 ON a = b JOIN t3 ON a = c" in let q1 = parse "SELECT a FROM t1 JOIN t2 ON a = b JOIN t3 ON a = c" in
let ast1 = Query( let ast1 = Query(
Select([Column("a")], [ Select(
Join( [
Column(
Ref("a"),
None
)
],
Some([
Join( Join(
Table("t1"), Join(
Table("t1"),
Left,
Table("t2"),
Some(
Condition(
Ref("a"),
Comparison(Equals, Ref("b"))
)
)
),
Left, Left,
Table("t2"), Table("t3"),
Some( Some(
Condition( Condition(
Ref("a"), Ref("a"),
Comparison(Equals, Ref("b")) Comparison(Equals, Ref("c"))
) )
) )
),
Left,
Table("t3"),
Some(
Condition(
Ref("a"),
Comparison(Equals, Ref("c"))
)
) )
) ]),
], None
None
) )
) in ) in
Alcotest.(check query_testable) "Ok" q1 ast1 Alcotest.(check query_testable) "Ok" q1 ast1
@ -180,8 +321,17 @@ let test_where_equals () =
let q1 = parse "SELECT a FROM t1 WHERE a = a OR a = b" in let q1 = parse "SELECT a FROM t1 WHERE a = a OR a = b" in
let ast1 = Query( let ast1 = Query(
Select( Select(
[Column("a")], [
[Table("t1")], Column(
Ref("a"),
None
)
],
Some(
[
Table("t1")
]
),
None None
) )
) in ) in