Compare commits

...

3 Commits

3 changed files with 153 additions and 58 deletions

View File

@ -18,6 +18,7 @@ rule read_token = parse
| "SUM" { SUM } | "SUM" { SUM }
| "COUNT" { COUNT } | "COUNT" { COUNT }
| "DISTINCT" { DISTINCT } | "DISTINCT" { DISTINCT }
| "SUBSTRING" { SUBSTRING }
| "FROM" { FROM } | "FROM" { FROM }
| "INNER" { INNER } | "INNER" { INNER }
| "LEFT" { LEFT } | "LEFT" { LEFT }
@ -31,13 +32,13 @@ rule read_token = parse
| "AND" { AND } | "AND" { AND }
| "WHERE" { WHERE } | "WHERE" { WHERE }
| "HAVING" { HAVING } | "HAVING" { HAVING }
| "NOT" { NOT }
| "BETWEEN" { BETWEEN } | "BETWEEN" { BETWEEN }
| "IN" { IN } | "IN" { IN }
| "LIKE" { LIKE } | "LIKE" { LIKE }
| "GROUP" { GROUP } | "GROUP" { GROUP }
| "BY" { BY } | "BY" { BY }
| "*" { ASTERISK } | "*" { ASTERISK }
| "." { DOT }
| "=" { EQUALS_OPERATOR } | "=" { EQUALS_OPERATOR }
| "(" { LEFT_PAREN } | "(" { LEFT_PAREN }
| ")" { RIGHT_PAREN } | ")" { RIGHT_PAREN }
@ -49,7 +50,7 @@ rule read_token = parse
| ':' { COLON } | ':' { COLON }
| whitespace { read_token lexbuf } | whitespace { read_token lexbuf }
| digit+ as integer { INTEGER (int_of_string integer) } | digit+ as integer { INTEGER (int_of_string integer) }
| digit+"."digit+ { FLOAT } | digit+"."digit+ as number { FLOAT (float_of_string number) }
| '.'digit+ { FLOAT } | '.'digit+ as number { FLOAT (float_of_string number) }
| alpha alphanumeric* as ident { IDENT ident } | alpha alphanumeric* as ident { IDENT ident }
| eof { EOF } | eof { EOF }

View File

@ -2,13 +2,16 @@ type query =
(*| Select of column list * table list option * filter option*) (*| Select of column list * table list option * filter option*)
| Select of column list * table_expression | Select of column list * table_expression
| CreateSchema of string | CreateSchema of string
| CreateTable of table | CreateTable of table_scope option * table
| DropSchema of string | DropSchema of string
| DropTable of table | DropTable of table
| DropColumn of string
and table_scope =
| Global
| Local
and column = and column =
| Asterisk | Asterisk
(* | Column of string *) | Column of expression_primary * as_clause option
| Column of expression * as_clause option
and as_clause = and as_clause =
| As of string | As of string
and table_expression = and table_expression =
@ -25,19 +28,18 @@ and join_type =
| Union | Union
| Natural | Natural
and condition = and condition =
| Condition of predicand * predicate | Condition of expression_primary * predicate
| And of condition * condition | And of condition * condition
| Or of condition * condition | Or of condition * condition
| Not of condition | Not of condition
and predicand = expression
and predicate = and predicate =
| Comparison of operator * predicand | Comparison of operator * expression_primary
| Between of predicand * predicand | Between of expression_primary * expression_primary
| NotBetween of predicand * predicand | NotBetween of expression_primary * expression_primary
| In of predicand list | In of expression_primary list
| NotIn of predicand list | NotIn of expression_primary list
| Like of predicand | Like of expression_primary
| NotLike of predicand | NotLike of expression_primary
and operator = and operator =
| Equals | Equals
| NotEquals | NotEquals
@ -47,11 +49,11 @@ and operator =
| GreaterEquals | GreaterEquals
and filter = condition and filter = condition
and group = and group =
| Group of quantifier option * expression list option | Group of quantifier option * expression_primary list option
and aggregate = and aggregate =
| Aggregate of func * filter option | Aggregate of func * filter option
and func = and func =
| Function of function_type * quantifier option * expression | Function of function_type * quantifier option * expression_primary
and function_type = and function_type =
| Avg | Avg
| Max | Max
@ -61,11 +63,22 @@ and function_type =
and quantifier = and quantifier =
| All | All
| Distinct | Distinct
and expression = and expression_primary =
| Ref of string | Ref of string
| StringLiteral of string | StringLiteral of string
| DateLiteral of string | DateLiteral of string
| TimeLiteral of string | TimeLiteral of string
| TimestampLiteral of string
| Concatenation of expression_primary * expression_primary
| Numeric of expression_primary * sign * expression_primary
| Signed of sign * expression_primary
| Substring of expression_primary * expression_primary
and sign =
| Plus
| Minus
| Times
| Divide
let rec pp_query fmt ast = let rec pp_query fmt ast =
match ast with match ast with
@ -86,9 +99,11 @@ and pp_column col =
and pp_expression exp = and pp_expression exp =
match exp with match exp with
| Ref(name) -> name | Ref(name) -> name
| StringLiteral(name) -> "'"^name^"'" | StringLiteral(s) -> "'" ^ s ^ "'"
| DateLiteral(name) -> "'"^name^"'" | DateLiteral(d) -> "'" ^ d ^ "'"
| TimeLiteral(name) -> "'"^name^"'" | TimeLiteral(t) -> "'" ^ t ^ "'"
| TimestampLiteral(ts) -> "'" ^ ts ^ "'"
| _ -> "Expression not yet supported"
and pp_table_expression table_exp = and pp_table_expression table_exp =
match table_exp with match table_exp with
@ -137,8 +152,8 @@ and pp_predicate pred =
| Comparison(op, exp) -> pp_operator op ^ pp_expression exp | Comparison(op, exp) -> pp_operator op ^ pp_expression exp
| Between(exp1, exp2) -> "BETWEEN " ^ pp_expression exp1 ^ " AND " ^pp_expression exp2 | Between(exp1, exp2) -> "BETWEEN " ^ pp_expression exp1 ^ " AND " ^pp_expression exp2
| NotBetween(exp1, exp2) -> "NOT BETWEEN " ^ pp_expression exp1 ^ " AND " ^pp_expression exp2 | NotBetween(exp1, exp2) -> "NOT BETWEEN " ^ pp_expression exp1 ^ " AND " ^pp_expression exp2
| Like(exp) -> "LIKE " ^ pp_expression exp (*| Like(exp) -> "LIKE " ^ pp_expression exp
| NotLike(exp) -> " NOT LIKE " ^ pp_expression exp | NotLike(exp) -> " NOT LIKE " ^ pp_expression exp*)
| _ -> failwith "Predicate not supported" | _ -> failwith "Predicate not supported"
and pp_operator op = and pp_operator op =

View File

@ -11,18 +11,19 @@ open Ast
%token CROSS NATURAL UNION JOIN %token CROSS NATURAL UNION JOIN
%token GREATER_THAN_OPERATOR LESS_THAN_OPERATOR EQUALS_OPERATOR %token GREATER_THAN_OPERATOR LESS_THAN_OPERATOR EQUALS_OPERATOR
%token MAX MIN SUM COUNT AVG %token MAX MIN SUM COUNT AVG
%token SUBSTRING
%token ASYMMETRIC SYMMETRIC %token ASYMMETRIC SYMMETRIC
%token <string> IDENT %token <string> IDENT
%token COMMA DOT PIPE %token COMMA PIPE
%token LEFT_PAREN RIGHT_PAREN %token LEFT_PAREN RIGHT_PAREN
%token ASTERISK %token ASTERISK SOLIDUS
%token AS ON GROUP BY FILTER %token AS ON GROUP BY FILTER
%token OR AND NOT %token OR AND NOT
%token TRUE FALSE UNKNOWN %token TRUE FALSE UNKNOWN
%token PLUS_SIGN MINUS_SIGN %token PLUS_SIGN MINUS_SIGN
%token DATE TIME TIMESTAMP %token DATE TIME TIMESTAMP
%token <int> INTEGER %token <int> INTEGER
%token FLOAT %token <float> FLOAT
%token UNDERSCORE QUOTE COLON %token UNDERSCORE QUOTE COLON
%token EOF %token EOF
%start main %start main
@ -36,6 +37,7 @@ main:
| table_definition EOF { $1 } | table_definition EOF { $1 }
| drop_schema_statement EOF { $1 } | drop_schema_statement EOF { $1 }
| drop_table_statement EOF { $1 } | drop_table_statement EOF { $1 }
| drop_column_definition EOF { $1 }
(* 5.2 TOKEN / SEPARATOR *) (* 5.2 TOKEN / SEPARATOR *)
@ -59,7 +61,7 @@ literal :
| general_literal { $1 } | general_literal { $1 }
unsigned_literal: unsigned_literal:
(* | unsigned_numeric_literal {}*) (* | unsigned_numeric_literal { $1 }*)
| general_literal { $1 } | general_literal { $1 }
general_literal: general_literal:
@ -77,14 +79,15 @@ signed_numeric_literal:
| unsigned_numeric_literal {} | unsigned_numeric_literal {}
unsigned_numeric_literal : unsigned_numeric_literal :
| exact_numeric_literal {} | exact_numeric_literal { $1 }
exact_numeric_literal: exact_numeric_literal:
| FLOAT {} | FLOAT { }
| INTEGER {}
sign: sign:
| PLUS_SIGN {} | PLUS_SIGN { Plus }
| MINUS_SIGN {} | MINUS_SIGN { Minus }
unsigned_integer: unsigned_integer:
| INTEGER { $1 } | INTEGER { $1 }
@ -92,7 +95,7 @@ unsigned_integer:
datetime_literal: datetime_literal:
| date_literal { $1 } | date_literal { $1 }
| time_literal { $1 } | time_literal { $1 }
(*| timestamp_literal {}*) | timestamp_literal { $1 }
date_literal: date_literal:
| DATE date_string { DateLiteral($2) } | DATE date_string { DateLiteral($2) }
@ -100,12 +103,18 @@ date_literal:
time_literal: time_literal:
| TIME time_string { TimeLiteral($2) } | TIME time_string { TimeLiteral($2) }
timestamp_literal:
| TIMESTAMP timestamp_string { TimestampLiteral($2) }
date_string: date_string:
| QUOTE unquoted_date_string QUOTE { $2 } | QUOTE unquoted_date_string QUOTE { $2 }
time_string: time_string:
| QUOTE unquoted_time_string QUOTE { $2 } | QUOTE unquoted_time_string QUOTE { $2 }
timestamp_string:
| QUOTE unquoted_timestamp_string QUOTE { $2 }
date_value: date_value:
| years_value MINUS_SIGN months_value MINUS_SIGN days_value { String.concat "-" [$1;$3;$5] } | years_value MINUS_SIGN months_value MINUS_SIGN days_value { String.concat "-" [$1;$3;$5] }
@ -118,6 +127,9 @@ unquoted_date_string:
unquoted_time_string: unquoted_time_string:
| time_value { $1 } | time_value { $1 }
unquoted_timestamp_string:
| unquoted_date_string unquoted_time_string { $1 ^ " " ^$2 }
years_value : years_value :
| datetime_value { $1 } | datetime_value { $1 }
@ -154,10 +166,6 @@ schema_name:
(****************************) (****************************)
character_value_expression:
(* | concatenation {} *)
| character_factor { $1 }
(* 6. SCALAR EXPRESSION *) (* 6. SCALAR EXPRESSION *)
@ -171,6 +179,7 @@ parenthesized_value_expression:
| LEFT_PAREN value_expression RIGHT_PAREN { $2 } | LEFT_PAREN value_expression RIGHT_PAREN { $2 }
nonparenthesized_value_expression_primary: nonparenthesized_value_expression_primary:
(* Return StringLiteral, TimeLiteral, DateLiteral, TimestampLiteral *)
| unsigned_value_specification { $1 } | unsigned_value_specification { $1 }
| column_reference { Ref($1) } | column_reference { Ref($1) }
| set_function_specification { Ref("function") } | set_function_specification { Ref("function") }
@ -218,29 +227,70 @@ set_function_specification:
value_expression: value_expression:
| common_value_expression { $1 } | common_value_expression { $1 }
(*| boolean_value_expression { $1 }*)
(* To avoid cycles and therefore reduce/reduce conflicts
instead of having all *_value_expression below,
they are cascaded :
numeric_value_expression -> string_value_expression
-> datetime_value_expression -> reference_value_expression
in the last step of the *_value_expression preceding
instead of value_expression_primary
This way the parser covers all of them every time, and
prevent reduce/reduce conflicts *)
common_value_expression: common_value_expression:
(*| numeric_value_expression {}*) | numeric_value_expression { $1 }
(*| string_value_expression {} *) (* | string_value_expression { $1 } *)
| reference_value_expression { $1 } (* | datetime_value_expression { $1 } *)
(* | reference_value_expression { $1 }*)
reference_value_expression: reference_value_expression:
| value_expression_primary { $1 } | value_expression_primary { $1 }
(*************************) (*************************)
(* 6.26 NUMERIC VALUE EXPRESSION *)
numeric_value_expression:
| term { $1 }
| numeric_value_expression PLUS_SIGN term { Numeric($1, Plus, $3) }
| numeric_value_expression MINUS_SIGN term { Numeric($1, Minus, $3) }
term:
| factor { $1 }
| term ASTERISK factor { Numeric($1, Times, $3) }
| term SOLIDUS factor { Numeric($1, Divide, $3) }
factor:
| numeric_primary { $1 }
| sign numeric_primary { Signed($1, $2) }
numeric_primary:
| string_value_expression { $1 }
(* | value_expression_primary { $1 }*)
(*********************************)
(* 6.28 STRING VALUE EXPRESSION *) (* 6.28 STRING VALUE EXPRESSION *)
string_value_expression : string_value_expression :
| character_value_expression { $1 } | character_value_expression { $1 }
(* | blob_value_expression {} *) (* | blob_value_expression {} *)
character_value_expression:
(* Return Concatenation of char_expression * expression_primary *)
| concatenation { $1 }
(* Return Char of string *)
| character_factor { $1 }
character_primary: character_primary:
| value_expression_primary { $1 } | datetime_value_expression { $1 }
(* | string_value_function {} *) (* | value_expression_primary { $1 }*)
| string_value_function { $1 }
concatenation: concatenation:
| character_value_expression concatenation_operator character_factor {} | character_value_expression concatenation_operator character_factor { Concatenation($1, $3) }
character_factor : character_factor :
| character_primary { $1 } | character_primary { $1 }
@ -250,13 +300,39 @@ character_factor :
(* 6.29 STRING VALUE FUNCTION *) (* 6.29 STRING VALUE FUNCTION *)
string_value_function: string_value_function:
| character_value_function {} | character_value_function { $1 }
character_value_function : character_value_function :
| {} | character_substring_function { $1 }
character_substring_function :
| SUBSTRING LEFT_PAREN character_value_expression FROM start_position RIGHT_PAREN { Substring($3, $5) }
start_position:
| numeric_value_expression { $1 }
(*************************) (*************************)
(* 6.30 DATETIME VALUE EXPRESSION *)
datetime_value_expression:
| datetime_term { $1 }
datetime_term :
| datetime_factor { $1 }
datetime_factor:
| datetime_primary { $1 }
datetime_primary:
| reference_value_expression { $1 }
(* | datetime_value_function { $1 }*)
(**********************************)
(* 6.31 DATETIME VALUE FUCTION *)
(*******************************)
(* 6.34 BOOLEAN VALUE EXPRESSION *) (* 6.34 BOOLEAN VALUE EXPRESSION *)
boolean_value_expression: boolean_value_expression:
@ -330,11 +406,11 @@ table_expression:
| from_clause { TableExpression(Some($1), None, None) } | from_clause { TableExpression(Some($1), None, None) }
| from_clause where_clause { TableExpression(Some($1), Some($2), None) } | from_clause where_clause { TableExpression(Some($1), Some($2), None) }
| from_clause where_clause group_by_clause { TableExpression(Some($1), Some($2), Some($3)) } | from_clause where_clause group_by_clause { TableExpression(Some($1), Some($2), Some($3)) }
(* | from_clause where_clause group_by_clause having_clause { Some($1) } | from_clause where_clause group_by_clause having_clause { TableExpression(Some($1), Some($2), Some($3)) }
| from_clause group_by_clause { Some($1) } | from_clause where_clause having_clause { TableExpression(Some($1), Some($2), None) }
| from_clause group_by_clause having_clause { Some($1) } | from_clause group_by_clause { TableExpression(Some($1), None, Some($2)) }
| from_clause having_clause { Some($1) } | from_clause group_by_clause having_clause { TableExpression(Some($1), None, Some($2)) }
*) | from_clause having_clause { TableExpression(Some($1), None, None) }
(************************) (************************)
@ -484,15 +560,17 @@ select_sublist_element :
| derived_column { $1 } | derived_column { $1 }
(* | qualified_asterisk {} *) (* | qualified_asterisk {} *)
(*
qualified_asterisk: qualified_asterisk:
| asterisked_identifier_chain {} | asterisked_identifier_chain DOT ASTERISK {}
asterisked_identifier_chain: asterisked_identifier_chain:
| asterisked_identifier {} | asterisked_identifier {}
| asterisked_identifier_chain DOT asterisked_identifier {} | asterisked_identifier_chain DOT asterisked_identifier {}
asterisked_identifier : asterisked_identifier :
| IDENT {} | ASTERISK {}
*)
derived_column: derived_column:
| value_expression { Column($1, None) } | value_expression { Column($1, None) }
@ -666,22 +744,23 @@ drop_schema_statement:
(* 11.3 TABLE DEFINITION *) (* 11.3 TABLE DEFINITION *)
table_definition : table_definition :
| CREATE TABLE table_name { CreateTable($3) } | CREATE TABLE table_name { CreateTable(None, $3) }
| CREATE table_scope TABLE table_name { CreateTable(Some($2), $4) }
table_scope : table_scope :
| global_or_local TEMPORARY {} | global_or_local TEMPORARY { $1 }
global_or_local : global_or_local :
| GLOBAL {} | GLOBAL { Global }
| LOCAL {} | LOCAL { Local }
(*************************) (*************************)
(* 11.18 DROP COLUMN DEFINITION *) (* 11.18 DROP COLUMN DEFINITION *)
drop_column_definition: drop_column_definition:
| DROP column_name {} | DROP column_name { DropColumn($2) }
| DROP COLUMN column_name {} | DROP COLUMN column_name { DropColumn($3) }
(********************************) (********************************)