ocaml_sql_parser/parser/parser.mly
2024-12-16 16:04:37 +00:00

774 lines
17 KiB
OCaml

%{
(* Header: Define the AST type *)
open Ast
%}
%token CREATE SCHEMA TABLE COLUMN
%token DROP
%token LOCAL GLOBAL TEMPORARY
%token SELECT ALL DISTINCT FROM WHERE HAVING BETWEEN IN LIKE IS
%token LEFT RIGHT FULL INNER OUTER
%token CROSS NATURAL UNION JOIN
%token GREATER_THAN_OPERATOR LESS_THAN_OPERATOR EQUALS_OPERATOR
%token MAX MIN SUM COUNT AVG
%token SUBSTRING
%token ASYMMETRIC SYMMETRIC
%token <string> IDENT
%token COMMA PIPE
%token LEFT_PAREN RIGHT_PAREN
%token ASTERISK SOLIDUS
%token AS ON GROUP BY FILTER
%token OR AND NOT
%token TRUE FALSE UNKNOWN
%token PLUS_SIGN MINUS_SIGN
%token DATE TIME TIMESTAMP
%token <int> INTEGER
%token <float> FLOAT
%token QUOTE COLON
%token EOF
%start main
%type <query> main
%%
main:
| query_specification EOF { $1 }
| schema_definition EOF { $1 }
| table_definition EOF { $1 }
| drop_schema_statement EOF { $1 }
| drop_table_statement EOF { $1 }
| drop_column_definition EOF { $1 }
(* 5.2 TOKEN / SEPARATOR *)
concatenation_operator:
| PIPE PIPE {}
not_equals_operator :
| LESS_THAN_OPERATOR GREATER_THAN_OPERATOR {}
less_than_or_equals_operator:
| LESS_THAN_OPERATOR EQUALS_OPERATOR {}
greater_than_or_equals_operator:
| GREATER_THAN_OPERATOR EQUALS_OPERATOR {}
(*********************)
(* 5.3 LITERAL *)
literal :
| general_literal { $1 }
(* | signed_numeric_literal { $1 }*)
unsigned_literal:
| unsigned_numeric_literal { $1 }
| general_literal { $1 }
general_literal:
| character_string_literal { $1 }
| datetime_literal { $1 }
character_string_literal:
| QUOTE IDENT QUOTE { StringLiteral($2) }
(*
signed_numeric_literal:
| sign unsigned_numeric_literal { Signed($1,$2) }
| unsigned_numeric_literal { $1 }
*)
unsigned_numeric_literal :
| exact_numeric_literal { $1 }
exact_numeric_literal:
| FLOAT { FloatLiteral($1) }
| INTEGER { IntegerLiteral($1) }
sign:
| PLUS_SIGN { Plus }
| MINUS_SIGN { Minus }
unsigned_integer:
| INTEGER { $1 }
datetime_literal:
| date_literal { $1 }
| time_literal { $1 }
| timestamp_literal { $1 }
date_literal:
| DATE date_string { DateLiteral($2) }
time_literal:
| TIME time_string { TimeLiteral($2) }
timestamp_literal:
| TIMESTAMP timestamp_string { TimestampLiteral($2) }
date_string:
| QUOTE unquoted_date_string QUOTE { $2 }
time_string:
| QUOTE unquoted_time_string QUOTE { $2 }
timestamp_string:
| QUOTE unquoted_timestamp_string QUOTE { $2 }
date_value:
| years_value MINUS_SIGN months_value MINUS_SIGN days_value { String.concat "-" [$1;$3;$5] }
time_value:
| hours_value COLON minutes_value COLON seconds_value { String.concat ":" [$1;$3;$5] }
unquoted_date_string:
| date_value { $1 }
unquoted_time_string:
| time_value { $1 }
unquoted_timestamp_string:
| unquoted_date_string unquoted_time_string { $1 ^ " " ^$2 }
years_value :
| datetime_value { $1 }
months_value :
| datetime_value { $1 }
days_value :
| datetime_value { $1 }
hours_value :
| datetime_value { $1 }
minutes_value :
| datetime_value { $1 }
seconds_value:
| datetime_value { $1 }
datetime_value :
| unsigned_integer { string_of_int $1 }
(***********)
(* 5.4 NAMES AND IDENTIFIER *)
column_name :
| IDENT { $1 }
table_name :
| IDENT { Table($1) }
schema_name:
| IDENT { $1 }
(****************************)
(* 6. SCALAR EXPRESSION *)
(* 6.3 VALUE EXPRESSION PRIMARY *)
value_expression_primary:
| parenthesized_value_expression { $1 }
| nonparenthesized_value_expression_primary { $1 }
parenthesized_value_expression:
| LEFT_PAREN value_expression RIGHT_PAREN { $2 }
nonparenthesized_value_expression_primary:
(* Return StringLiteral, TimeLiteral, DateLiteral, TimestampLiteral *)
| unsigned_value_specification { $1 }
| column_reference { Ref($1) }
| set_function_specification { Ref("function") }
(***************************)
(* 6.4 VALUE SPECIFCATION / TARGET SPECIFICATION *)
unsigned_value_specification:
| unsigned_literal { $1 }
(*********************************************)
(* 6.6 IDENTIFIER CHAIN *)
basic_identifier_chain:
| identifier_chain { $1 }
identifier_chain:
| IDENT { $1 }
(*| identifier_chain DOT IDENT {}*)
(************************)
(* 6.7 COLUMN REFERENCE *)
column_reference:
| basic_identifier_chain { $1 }
(************************)
(* 6.9 SET FUCTION SPECIFICATION *)
set_function_specification:
| aggregate_function { $1 }
(*********************************)
(* 6.25 VALUE EXPRESSION *)
value_expression:
| common_value_expression { $1 }
(*| boolean_value_expression { $1 }*)
(* To avoid cycles and therefore reduce/reduce conflicts
instead of having all *_value_expression below,
they are cascaded :
numeric_value_expression -> string_value_expression
-> datetime_value_expression -> reference_value_expression
in the last step of the *_value_expression preceding
instead of value_expression_primary
This way the parser covers all of them every time, and
prevent reduce/reduce conflicts *)
common_value_expression:
| numeric_value_expression { $1 }
(* | string_value_expression { $1 } *)
(* | datetime_value_expression { $1 } *)
(* | reference_value_expression { $1 }*)
reference_value_expression:
| value_expression_primary { $1 }
(*************************)
(* 6.26 NUMERIC VALUE EXPRESSION *)
numeric_value_expression:
| term { $1 }
| numeric_value_expression PLUS_SIGN term { Numeric($1, Plus, $3) }
| numeric_value_expression MINUS_SIGN term { Numeric($1, Minus, $3) }
term:
| factor { $1 }
| term ASTERISK factor { Numeric($1, Times, $3) }
| term SOLIDUS factor { Numeric($1, Divide, $3) }
factor:
| numeric_primary { $1 }
| sign numeric_primary { Signed($1, $2) }
numeric_primary:
| string_value_expression { $1 }
(* | value_expression_primary { $1 }*)
(*********************************)
(* 6.28 STRING VALUE EXPRESSION *)
string_value_expression :
| character_value_expression { $1 }
(* | blob_value_expression {} *)
character_value_expression:
(* Return Concatenation of char_expression * expression_primary *)
| concatenation { $1 }
(* Return Char of string *)
| character_factor { $1 }
character_primary:
| datetime_value_expression { $1 }
(* | value_expression_primary { $1 }*)
| string_value_function { $1 }
concatenation:
| character_value_expression concatenation_operator character_factor { Concatenation($1, $3) }
character_factor :
| character_primary { $1 }
(********************************)
(* 6.29 STRING VALUE FUNCTION *)
string_value_function:
| character_value_function { $1 }
character_value_function :
| character_substring_function { $1 }
character_substring_function :
| SUBSTRING LEFT_PAREN character_value_expression FROM start_position RIGHT_PAREN { Substring($3, $5) }
start_position:
| numeric_value_expression { $1 }
(*************************)
(* 6.30 DATETIME VALUE EXPRESSION *)
datetime_value_expression:
| datetime_term { $1 }
datetime_term :
| datetime_factor { $1 }
datetime_factor:
| datetime_primary { $1 }
datetime_primary:
| reference_value_expression { $1 }
(* | datetime_value_function { $1 }*)
(**********************************)
(* 6.31 DATETIME VALUE FUCTION *)
(*******************************)
(* 6.34 BOOLEAN VALUE EXPRESSION *)
boolean_value_expression:
| boolean_term { $1 }
| boolean_value_expression OR boolean_term { Or($1, $3) }
boolean_term:
| boolean_factor { $1 }
| boolean_term AND boolean_factor { And($1, $3) }
boolean_factor:
| boolean_test { $1 }
| NOT boolean_test { Not($2) }
boolean_test:
| boolean_primary { $1 }
| boolean_primary IS truth_value { $1 }
| boolean_primary IS NOT truth_value { $1 }
truth_value:
| TRUE {}
| FALSE {}
| UNKNOWN {}
boolean_primary :
| predicate { $1 }
| boolean_predicand { $1 }
boolean_predicand:
| parenthesized_boolean_value_expression { $1 }
(* | nonparenthesized_value_expression_primary { $1 }*)
parenthesized_boolean_value_expression:
| LEFT_PAREN boolean_value_expression RIGHT_PAREN { $2 }
(****************************)
(************************)
(* 7. QUERY EXPRESSION *)
(* 7.1 ROW VALUE CONSTRUCTOR *)
row_value_constructor_predicand:
| common_value_expression { $1 }
(*************************)
(* 7.2 ROW VALUE EXPRESION *)
row_value_expression :
| row_value_special_case { $1 }
row_value_predicand:
| row_value_special_case { $1 }
(* | row_value_constructor_predicand { $1 } *)
row_value_special_case :
| nonparenthesized_value_expression_primary { $1 }
(***********************)
(* 7.3 TABLE VALUE CONSTRUCTOR *)
(*******************************)
(* 7.4 TABLE EXPRESSION *)
table_expression:
| { TableExpression(None, None, None) }
| from_clause { TableExpression(Some($1), None, None) }
| from_clause where_clause { TableExpression(Some($1), Some($2), None) }
| from_clause where_clause group_by_clause { TableExpression(Some($1), Some($2), Some($3)) }
| from_clause where_clause group_by_clause having_clause { TableExpression(Some($1), Some($2), Some($3)) }
| from_clause where_clause having_clause { TableExpression(Some($1), Some($2), None) }
| from_clause group_by_clause { TableExpression(Some($1), None, Some($2)) }
| from_clause group_by_clause having_clause { TableExpression(Some($1), None, Some($2)) }
| from_clause having_clause { TableExpression(Some($1), None, None) }
(************************)
(* 7.5 FROM CLAUSE *)
from_clause :
| FROM table_reference_list { $2 }
table_reference_list :
| table_reference { [$1] }
| table_reference_list COMMA table_reference { $3::$1 }
(*******************)
(* 7.6 TABLE REFERENCE *)
table_reference :
| table_primary_or_joined_table { $1 }
(* | table_primary_or_joined_table sample_clause { $1 } *)
table_primary_or_joined_table:
| table_primary { $1 }
| joined_table { $1 }
table_primary :
| table_or_query_name { $1 }
table_or_query_name:
| table_name { $1 }
(***********************)
(* 7.7 JOINED TABLE *)
joined_table :
| cross_join { $1 }
| qualified_join { $1 }
| natural_join { $1 }
| union_join { $1 }
cross_join:
| table_reference CROSS JOIN table_primary { Join($1, Cross, $4, None) }
qualified_join:
| table_reference JOIN table_reference join_specification { Join($1, Left, $3, Some($4)) }
| table_reference join_type JOIN table_reference join_specification { Join($1, $2, $4, Some($5)) }
natural_join:
| table_reference NATURAL JOIN table_primary { Join($1, Natural, $4, None) }
| table_reference NATURAL join_type JOIN table_primary { Join($1, Natural, $5, None) }
union_join:
| table_reference UNION JOIN table_primary { Join($1, Union, $4, None) }
join_specification:
| join_condition { $1 }
(* | named_columns_join {} *)
join_condition:
| ON search_condition { $2 }
join_type:
| INNER { Inner }
| outer_join_type { $1 }
| outer_join_type OUTER { $1 }
outer_join_type:
| LEFT { Left }
| RIGHT { Right }
| FULL { Full }
(*
named_columns_join:
| USING LEFT_PAREN join_column_list RIGHT_PAREN {}
join_column_list :
| column_name_list {}
*)
(********************)
(* 7.8 WHERE CLAUSE *)
where_clause :
| WHERE search_condition { $2 }
(********************)
(* 7.9 GROUP BY CLAUSE *)
group_by_clause:
| GROUP BY grouping_element_list { Group(None, Some($3)) }
| GROUP BY set_quantifier grouping_element_list { Group(Some($3), Some($4)) }
grouping_element_list :
| grouping_element { [$1] }
| grouping_element_list COMMA grouping_element { $3::$1 }
grouping_element:
| ordinary_grouping_set { $1 }
ordinary_grouping_set :
| grouping_column_reference { $1 }
(*| LEFT_PAREN grouping_column_reference_list RIGHT_PAREN { $2 }*)
grouping_column_reference:
| column_reference { Ref($1) }
(*| column_reference collate_clause {}*)
(*
grouping_column_reference_list :
| grouping_column_reference { [
*)
(***********************)
(* 7.10 HAVING CLAUSE *)
having_clause :
| HAVING search_condition { $2 }
(**********************)
(* 7.11 WINDOW CLAUSE *)
(**********************)
(* 7.12 QUERY SPECIFICATION *)
query_specification :
| SELECT select_list table_expression { Select($2, $3) }
| SELECT set_quantifier select_list table_expression { Select($3, $4) }
select_list :
| ASTERISK { [Asterisk] }
| select_sublist { $1 }
(*
select_sublist :
| IDENT { [Column($1)] }
| select_sublist COMMA IDENT { Column($3)::$1 }
*)
select_sublist :
| select_sublist_element { [$1] }
| select_sublist COMMA select_sublist_element { $3::$1 }
select_sublist_element :
| derived_column { $1 }
(* | qualified_asterisk {} *)
(*
qualified_asterisk:
| asterisked_identifier_chain DOT ASTERISK {}
asterisked_identifier_chain:
| asterisked_identifier {}
| asterisked_identifier_chain DOT asterisked_identifier {}
asterisked_identifier :
| ASTERISK {}
*)
derived_column:
| value_expression { Column($1, None) }
| value_expression as_clause { Column($1, Some($2)) }
as_clause :
| AS column_name { As($2) }
(*| column_name { As($1) }*)
(****************************)
(* 7.13 QUERY EXPRESSION *)
(*************************)
(* 7.14 SEARCH OR CYCLE CLAUSE *)
(*******************************)
(* 7.15 SUBQUERY *)
(*****************)
(***********************)
(* 8. PREDICATES *)
(* 8.1 PREDICATE *)
predicate :
| comparison_predicate { $1 }
| in_predicate { $1 }
| between_predicate { $1 }
| like_predicate { $1 }
(*****************)
(* 8.2 COMPARISON PREDICATE *)
comparison_predicate :
| row_value_predicand comparison_predicate_part2 { Condition($1, $2) }
comparison_predicate_part2:
| comp_op row_value_predicand { Comparison($1, $2) }
comp_op :
| EQUALS_OPERATOR { Equals }
| not_equals_operator { NotEquals }
| LESS_THAN_OPERATOR { LessThan }
| GREATER_THAN_OPERATOR { GreaterThan }
| less_than_or_equals_operator { LessEquals }
| greater_than_or_equals_operator { GreaterEquals }
(****************************)
(* 8.3 BETWEEN PREDICATE *)
between_predicate :
| row_value_predicand between_predicate_part2 { Condition($1, $2) }
between_predicate_part2 :
| BETWEEN between_symetry row_value_predicand AND row_value_predicand { Between($3, $5) }
| NOT BETWEEN between_symetry row_value_predicand AND row_value_predicand {NotBetween($4, $6) }
between_symetry :
| {}
| ASYMMETRIC {}
| SYMMETRIC {}
(*************************)
(* 8.4 IN PREDICATE *)
in_predicate :
| row_value_predicand in_predicate_part2 { Condition($1, $2) }
in_predicate_part2:
| IN in_predicate_value { In($2) }
| NOT IN in_predicate_value { NotIn($3) }
in_predicate_value:
| LEFT_PAREN in_value_list RIGHT_PAREN { $2 }
in_value_list:
| row_value_expression { [$1] }
| in_value_list COMMA row_value_expression { $3::$1 }
(********************)
(* 8.5 LIKE PREDICATE *)
like_predicate :
| character_like_predicate { $1 }
character_like_predicate :
| row_value_predicand character_like_predicate_part2 { Condition($1, $2) }
character_like_predicate_part2:
| LIKE character_pattern { Like($2) }
| NOT LIKE character_pattern { NotLike($3) }
character_pattern :
| character_value_expression { $1 }
(**********************)
(* 8.6 SIMILAR PREDICATE *)
(*************************)
(* 8.19 SEARCH CONDITION *)
search_condition:
| boolean_value_expression { $1 }
(*************************)
(*****************)
(* 10 ADDITIONAL COMMON ELEMENTS *)
(* 10.9 AGGREGATE FUNCTION *)
aggregate_function:
| COUNT LEFT_PAREN ASTERISK RIGHT_PAREN { Aggregate(Function(Count, None, Ref("*")), None) }
| COUNT LEFT_PAREN ASTERISK RIGHT_PAREN filter_clause { Aggregate(Function(Count, None, Ref("*")), Some($5)) }
| general_set_function { Aggregate($1, None) }
| general_set_function filter_clause { Aggregate($1, Some($2)) }
general_set_function:
| set_function_type LEFT_PAREN value_expression RIGHT_PAREN { Function($1, None, $3) }
| set_function_type LEFT_PAREN set_quantifier value_expression RIGHT_PAREN { Function($1, Some($3), $4) }
set_function_type:
| computationnal_operation { $1 }
set_quantifier :
| ALL { All }
| DISTINCT { Distinct }
filter_clause :
| FILTER LEFT_PAREN WHERE search_condition RIGHT_PAREN { $4 }
computationnal_operation:
| AVG { Avg }
| MAX { Max }
| MIN { Min }
| SUM { Sum }
| COUNT { Count }
(***************************)
(*********************************)
(* 11.1 SCHEMA DEFINITION *)
schema_definition:
| CREATE SCHEMA schema_name_clause { CreateSchema($3) }
schema_name_clause :
| schema_name { $1 }
(**************************)
(* 11.2 DROP SCHEMA STATEMENT *)
drop_schema_statement:
| DROP SCHEMA schema_name { DropSchema($3) }
(******************************)
(* 11.3 TABLE DEFINITION *)
table_definition :
| CREATE TABLE table_name { CreateTable(None, $3) }
| CREATE table_scope TABLE table_name { CreateTable(Some($2), $4) }
table_scope :
| global_or_local TEMPORARY { $1 }
global_or_local :
| GLOBAL { Global }
| LOCAL { Local }
(*************************)
(* 11.18 DROP COLUMN DEFINITION *)
drop_column_definition:
| DROP column_name { DropColumn($2) }
| DROP COLUMN column_name { DropColumn($3) }
(********************************)
(* 11.21 DROP TABLE STATEMENT *)
drop_table_statement:
| DROP TABLE table_name { DropTable($3) }
(******************************)
(* 14 DATA MANIPULATION *)
(* 14.9 MERGE STATEMENT *)
(************************)
(************************)