%{
(* Header: Define the AST type *)
open Ast
%}

%token CREATE SCHEMA TABLE COLUMN
%token DROP
%token LOCAL GLOBAL TEMPORARY
%token SELECT ALL DISTINCT FROM WHERE HAVING BETWEEN IN LIKE IS 
%token LEFT RIGHT FULL INNER OUTER
%token CROSS NATURAL UNION JOIN
%token GREATER_THAN_OPERATOR LESS_THAN_OPERATOR EQUALS_OPERATOR
%token MAX MIN SUM COUNT AVG
%token SUBSTRING
%token ASYMMETRIC SYMMETRIC
%token <string> IDENT
%token COMMA PIPE
%token LEFT_PAREN RIGHT_PAREN
%token ASTERISK SOLIDUS
%token AS ON GROUP BY FILTER
%token OR AND NOT 
%token TRUE FALSE UNKNOWN
%token PLUS_SIGN MINUS_SIGN 
%token DATE TIME TIMESTAMP
%token <int> INTEGER 
%token <float> FLOAT
%token QUOTE COLON
%token EOF
%start main
%type <query> main

%%

main:
  | query_specification EOF { $1 }
  | schema_definition EOF { $1 }
  | table_definition EOF { $1 }
  | drop_schema_statement EOF { $1 }
  | drop_table_statement EOF { $1 }
  | drop_column_definition EOF { $1 }

(* 5.2 TOKEN / SEPARATOR *)

concatenation_operator:
  | PIPE PIPE {}

not_equals_operator :
  | LESS_THAN_OPERATOR GREATER_THAN_OPERATOR {}

less_than_or_equals_operator:
  | LESS_THAN_OPERATOR EQUALS_OPERATOR {}

greater_than_or_equals_operator:
  | GREATER_THAN_OPERATOR EQUALS_OPERATOR {}

(*********************)

(* 5.3 LITERAL *)
literal :
  | signed_numeric_literal { $1 }
  | general_literal { $1 }

unsigned_literal:
  | unsigned_numeric_literal { $1 }
  | general_literal { $1 }

general_literal:
  | character_string_literal { $1 }
  | datetime_literal { $1 }

character_string_literal:
  | QUOTE IDENT QUOTE { StringLiteral($2) }


signed_numeric_literal:
  | sign unsigned_numeric_literal { Signed($1,$2) }
  | unsigned_numeric_literal { $1 }

unsigned_numeric_literal :
  | exact_numeric_literal { $1 }

exact_numeric_literal:
  | FLOAT { FloatLiteral($1) }
  | INTEGER { IntegerLiteral($1) }

sign:
  | PLUS_SIGN { Plus }
  | MINUS_SIGN { Minus }

unsigned_integer:
  | INTEGER { $1 }

datetime_literal:
  | date_literal { $1 }
  | time_literal { $1 }
  | timestamp_literal { $1 }

date_literal:
  | DATE date_string { DateLiteral($2) }

time_literal:
  | TIME time_string { TimeLiteral($2) }

timestamp_literal:
  | TIMESTAMP timestamp_string { TimestampLiteral($2) }

date_string:
  | QUOTE unquoted_date_string QUOTE { $2 }

time_string:
  | QUOTE unquoted_time_string QUOTE { $2 }

timestamp_string:
  | QUOTE unquoted_timestamp_string QUOTE { $2 }

date_value:
  | years_value MINUS_SIGN months_value MINUS_SIGN days_value { String.concat "-" [$1;$3;$5] }

time_value:
  | hours_value COLON minutes_value COLON seconds_value { String.concat ":" [$1;$3;$5] }

unquoted_date_string:
  | date_value { $1 }

unquoted_time_string:
  | time_value { $1 }

unquoted_timestamp_string:
  | unquoted_date_string unquoted_time_string { $1 ^ " " ^$2 }

years_value :
  | datetime_value { $1 }

months_value :
  | datetime_value { $1 }

days_value :
  | datetime_value { $1 }

hours_value :
  | datetime_value { $1 }

minutes_value :
  | datetime_value { $1 }

seconds_value:
  | datetime_value { $1 }

datetime_value :
  | unsigned_integer { string_of_int $1 }

(***********)

(* 5.4 NAMES AND IDENTIFIER *)

column_name :
  | IDENT { $1 }

table_name :
  | IDENT { Table($1) }

schema_name:
  | IDENT { $1 }

(****************************)


(* 6. SCALAR EXPRESSION *)

(* 6.3 VALUE EXPRESSION PRIMARY *)

value_expression_primary:
  | parenthesized_value_expression { $1 }
  | nonparenthesized_value_expression_primary { $1 }

parenthesized_value_expression:
  | LEFT_PAREN value_expression RIGHT_PAREN { $2 }

nonparenthesized_value_expression_primary:
  (* Return StringLiteral, TimeLiteral, DateLiteral, TimestampLiteral *)
  | unsigned_value_specification { $1 } 
  | column_reference { Ref($1) }
  | set_function_specification { Ref("function") } 

(***************************)

(* 6.4 VALUE SPECIFCATION / TARGET SPECIFICATION *)

unsigned_value_specification:
  | unsigned_literal { $1 }

(*********************************************)

(* 6.6 IDENTIFIER CHAIN *)

basic_identifier_chain:
  | identifier_chain { $1 }

identifier_chain:
  | IDENT { $1 }
  (*| identifier_chain DOT IDENT  {}*)


(************************)

(* 6.7 COLUMN REFERENCE *)

column_reference:
  | basic_identifier_chain { $1 }

(************************)

(* 6.9 SET FUCTION SPECIFICATION *)

set_function_specification:
  | aggregate_function { $1 }

(*********************************)

(* 6.25 VALUE EXPRESSION *)

value_expression:
  | common_value_expression { $1 }
  (*| boolean_value_expression { $1 }*)

(* To avoid cycles and therefore reduce/reduce conflicts
instead of having all *_value_expression below,
they are cascaded : 
numeric_value_expression -> string_value_expression 
  -> datetime_value_expression -> reference_value_expression
in the last step of the *_value_expression preceding
instead of value_expression_primary

This way the parser covers all of them every time, and 
prevent reduce/reduce conflicts *)
common_value_expression:
  | numeric_value_expression { $1 }
(*  | string_value_expression { $1 } *)
(*  | datetime_value_expression { $1 } *)
(*  | reference_value_expression { $1 }*)

reference_value_expression:
  | value_expression_primary { $1 }

(*************************)

(* 6.26 NUMERIC VALUE EXPRESSION *)

numeric_value_expression:
  | term { $1 }
  | numeric_value_expression PLUS_SIGN term { Numeric($1, Plus, $3) }
  | numeric_value_expression MINUS_SIGN term { Numeric($1, Minus, $3) }

term:
  | factor { $1 }
  | term ASTERISK factor { Numeric($1, Times, $3) }
  | term SOLIDUS factor { Numeric($1, Divide, $3) }

factor:
  | numeric_primary { $1 }
  | sign numeric_primary { Signed($1, $2) }

numeric_primary:
  | string_value_expression { $1 }
(*  | value_expression_primary { $1 }*)

(*********************************)

(* 6.28 STRING VALUE EXPRESSION *)

string_value_expression :
  | character_value_expression { $1 } 
  (* | blob_value_expression {} *)

character_value_expression:
(* Return Concatenation of char_expression * expression_primary *)
  | concatenation { $1 } 
(* Return Char of string *)
  | character_factor { $1 }

character_primary:
  | datetime_value_expression { $1 }
(*  | value_expression_primary { $1 }*)
  | string_value_function { $1 } 

concatenation:
  | character_value_expression concatenation_operator character_factor { Concatenation($1, $3) }

character_factor :
  | character_primary { $1 }

(********************************)

(* 6.29 STRING VALUE FUNCTION *)

string_value_function:
  | character_value_function { $1 } 

character_value_function :
  | character_substring_function { $1 }

character_substring_function :
  | SUBSTRING LEFT_PAREN character_value_expression FROM start_position RIGHT_PAREN { Substring($3, $5) }

start_position:
  | numeric_value_expression { $1 }
(*************************)

(* 6.30 DATETIME VALUE EXPRESSION *)

datetime_value_expression:
  | datetime_term { $1 }

datetime_term :
  | datetime_factor { $1 }

datetime_factor:
  | datetime_primary { $1 }

datetime_primary:
  | reference_value_expression { $1 }
(*  | datetime_value_function { $1 }*)

(**********************************)

(* 6.31 DATETIME VALUE FUCTION *)

(*******************************)

(* 6.34 BOOLEAN VALUE EXPRESSION *)

boolean_value_expression:
  | boolean_term { $1 }
  | boolean_value_expression OR boolean_term { Or($1, $3) }

boolean_term:
  | boolean_factor { $1 }
  | boolean_term AND boolean_factor { And($1, $3) }

boolean_factor:
  | boolean_test { $1 }
  | NOT boolean_test { Not($2) }

boolean_test:
  | boolean_primary { $1 }
  | boolean_primary IS truth_value { $1 }
  | boolean_primary IS NOT truth_value { $1 }

truth_value:
  | TRUE {}
  | FALSE {}
  | UNKNOWN {}

boolean_primary :
  | predicate { $1 }
  | boolean_predicand { $1 }

boolean_predicand:
  | parenthesized_boolean_value_expression { $1 }
(*  | nonparenthesized_value_expression_primary { $1 }*)

parenthesized_boolean_value_expression:
  | LEFT_PAREN boolean_value_expression RIGHT_PAREN { $2 }

(****************************)

(************************)

(* 7. QUERY EXPRESSION *)

(* 7.1 ROW VALUE CONSTRUCTOR *)

row_value_constructor_predicand:
  | common_value_expression { $1 }

(*************************)

(* 7.2 ROW VALUE EXPRESION *)

row_value_expression :
  | row_value_special_case { $1 }

row_value_predicand: 
  | row_value_special_case { $1 }
(*  | row_value_constructor_predicand { $1 } *)

row_value_special_case :
  | nonparenthesized_value_expression_primary { $1 }

(***********************)

(* 7.3 TABLE VALUE CONSTRUCTOR *)

(*******************************)

(* 7.4 TABLE EXPRESSION *)

table_expression:
  | { TableExpression(None, None, None) }
  | from_clause { TableExpression(Some($1), None, None) }
  | from_clause where_clause { TableExpression(Some($1), Some($2), None) }
  | from_clause where_clause group_by_clause { TableExpression(Some($1), Some($2), Some($3)) }
  | from_clause where_clause group_by_clause having_clause { TableExpression(Some($1), Some($2), Some($3)) }
  | from_clause where_clause having_clause { TableExpression(Some($1), Some($2), None) }
  | from_clause group_by_clause { TableExpression(Some($1), None, Some($2)) }
  | from_clause group_by_clause having_clause { TableExpression(Some($1), None, Some($2)) }
  | from_clause having_clause { TableExpression(Some($1), None, None) }

(************************)

(* 7.5 FROM CLAUSE *)

from_clause :
  | FROM table_reference_list { $2 }

table_reference_list :
  | table_reference { [$1] }
  | table_reference_list COMMA table_reference { $3::$1 }

(*******************)

(* 7.6 TABLE REFERENCE *)

table_reference :
  | table_primary_or_joined_table { $1 }
(*  | table_primary_or_joined_table sample_clause { $1 } *)

table_primary_or_joined_table:
  | table_primary { $1 }
  | joined_table { $1 }

table_primary :
  | table_or_query_name { $1 }

table_or_query_name:
  | table_name { $1 }

(***********************)

(* 7.7 JOINED TABLE *)

joined_table :
  | cross_join { $1 }
  | qualified_join { $1 } 
  | natural_join { $1 }
  | union_join { $1 }

cross_join:
  | table_reference CROSS JOIN table_primary { Join($1, Cross, $4, None) }

qualified_join:
  | table_reference JOIN table_reference join_specification { Join($1, Left, $3, Some($4)) }
  | table_reference join_type JOIN table_reference join_specification { Join($1, $2, $4, Some($5)) }

natural_join:
  | table_reference NATURAL JOIN table_primary { Join($1, Natural, $4, None) }
  | table_reference NATURAL join_type JOIN table_primary { Join($1, Natural, $5, None) }

union_join:
  | table_reference UNION JOIN table_primary { Join($1, Union, $4, None) }

join_specification:
  | join_condition { $1 }
  (* | named_columns_join {} *)

join_condition:
  | ON search_condition { $2 }

join_type:
  | INNER { Inner }
  | outer_join_type { $1 }
  | outer_join_type OUTER { $1 }

outer_join_type:
  | LEFT { Left }
  | RIGHT { Right }
  | FULL { Full }

(*
named_columns_join:
  | USING LEFT_PAREN join_column_list RIGHT_PAREN {}

join_column_list :
  | column_name_list {}
*)

(********************)

(* 7.8 WHERE CLAUSE *)

where_clause : 
  | WHERE search_condition { $2 }

(********************)

(* 7.9 GROUP BY CLAUSE *)

group_by_clause:
  | GROUP BY grouping_element_list { Group(None, Some($3)) }
  | GROUP BY set_quantifier grouping_element_list { Group(Some($3), Some($4)) }

grouping_element_list :
  | grouping_element { [$1] }
  | grouping_element_list COMMA grouping_element { $3::$1 }

grouping_element:
  | ordinary_grouping_set { $1 }

ordinary_grouping_set :
  | grouping_column_reference { $1 }
  (*| LEFT_PAREN grouping_column_reference_list RIGHT_PAREN { $2 }*)

grouping_column_reference:
  | column_reference { Ref($1) }
  (*| column_reference collate_clause {}*)
(*
grouping_column_reference_list :
  | grouping_column_reference { [
*)
(***********************)

(* 7.10 HAVING CLAUSE *)

having_clause :
  | HAVING search_condition { $2 }

(**********************)

(* 7.11 WINDOW CLAUSE *)

(**********************)

(* 7.12 QUERY SPECIFICATION *)

query_specification :
  | SELECT select_list table_expression { Select($2, $3) }
  | SELECT set_quantifier select_list table_expression { Select($3, $4) }

select_list :
  | ASTERISK { [Asterisk] }
  | select_sublist { $1 }

(*
select_sublist :
  | IDENT { [Column($1)] }
  | select_sublist COMMA IDENT { Column($3)::$1 }
*)

select_sublist :
  | select_sublist_element { [$1] }
  | select_sublist COMMA select_sublist_element { $3::$1 }

select_sublist_element :
  | derived_column { $1 }
(*  | qualified_asterisk {} *)

(*
qualified_asterisk:
  | asterisked_identifier_chain DOT ASTERISK {}

asterisked_identifier_chain:
  | asterisked_identifier {}
  | asterisked_identifier_chain DOT asterisked_identifier {}

asterisked_identifier :
  | ASTERISK {}
*)

derived_column:
  | value_expression { Column($1, None) }
  | value_expression as_clause { Column($1, Some($2)) }

as_clause :
  | AS column_name { As($2) }
  (*| column_name { As($1) }*)

(****************************)

(* 7.13 QUERY EXPRESSION *)

(*************************)

(* 7.14 SEARCH OR CYCLE CLAUSE *)

(*******************************)

(* 7.15 SUBQUERY *)
(*****************)

(***********************)

(* 8. PREDICATES *)

(* 8.1 PREDICATE *)

predicate :
  | comparison_predicate { $1 }
  | in_predicate { $1 } 
  | between_predicate { $1 }
  | like_predicate { $1 }

(*****************)

(* 8.2 COMPARISON PREDICATE *)

comparison_predicate :
  | row_value_predicand comparison_predicate_part2 { Condition($1, $2) }

comparison_predicate_part2:
  | comp_op row_value_predicand { Comparison($1, $2) }

comp_op :
  | EQUALS_OPERATOR { Equals }
  | not_equals_operator { NotEquals }
  | LESS_THAN_OPERATOR { LessThan }
  | GREATER_THAN_OPERATOR { GreaterThan }
  | less_than_or_equals_operator { LessEquals }
  | greater_than_or_equals_operator { GreaterEquals }

(****************************)

(* 8.3 BETWEEN PREDICATE *)

between_predicate :
  | row_value_predicand between_predicate_part2 { Condition($1, $2) }

between_predicate_part2 :
  | BETWEEN between_symetry row_value_predicand AND row_value_predicand { Between($3, $5) }
  | NOT BETWEEN between_symetry row_value_predicand AND row_value_predicand {NotBetween($4, $6) }

between_symetry :
  | {} 
  | ASYMMETRIC {}
  | SYMMETRIC {}

(*************************)

(* 8.4 IN PREDICATE *)

in_predicate :
  | row_value_predicand in_predicate_part2 { Condition($1, $2) }

in_predicate_part2:
  | IN in_predicate_value { In($2) }
  | NOT IN in_predicate_value { NotIn($3) }

in_predicate_value:
  | LEFT_PAREN in_value_list RIGHT_PAREN { $2 }

in_value_list:
  | row_value_expression { [$1] }
  | in_value_list COMMA row_value_expression { $3::$1 }

(********************)

(* 8.5 LIKE PREDICATE *)

like_predicate :
  | character_like_predicate { $1 }

character_like_predicate :
  | row_value_predicand character_like_predicate_part2 { Condition($1, $2) }

character_like_predicate_part2:
  | LIKE character_pattern { Like($2) } 
  | NOT LIKE character_pattern { NotLike($3) }

character_pattern :
  | character_value_expression { $1 }

(**********************)

(* 8.6 SIMILAR PREDICATE *)

(*************************)

(* 8.19 SEARCH CONDITION *)

search_condition:
  | boolean_value_expression { $1 }

(*************************)

(*****************)

(* 10 ADDITIONAL COMMON ELEMENTS *)

(* 10.9 AGGREGATE FUNCTION *)

aggregate_function:
  | COUNT LEFT_PAREN ASTERISK RIGHT_PAREN { Aggregate(Function(Count, None, Ref("*")), None) }
  | COUNT LEFT_PAREN ASTERISK RIGHT_PAREN filter_clause { Aggregate(Function(Count, None, Ref("*")), Some($5))  }
  | general_set_function { Aggregate($1, None) }
  | general_set_function filter_clause { Aggregate($1, Some($2)) }

general_set_function:
  | set_function_type LEFT_PAREN value_expression RIGHT_PAREN { Function($1, None, $3)  }
  | set_function_type LEFT_PAREN set_quantifier value_expression RIGHT_PAREN { Function($1, Some($3), $4) }

set_function_type:
  | computationnal_operation { $1 }

set_quantifier :
  | ALL { All }
  | DISTINCT { Distinct }

filter_clause :
  | FILTER LEFT_PAREN WHERE search_condition RIGHT_PAREN { $4 }

computationnal_operation:
  | AVG { Avg }
  | MAX { Max }
  | MIN { Min }
  | SUM { Sum }
  | COUNT { Count }

(***************************)

(*********************************)

(* 11.1 SCHEMA DEFINITION *)

schema_definition:
  | CREATE SCHEMA schema_name_clause { CreateSchema($3) } 

schema_name_clause :
  | schema_name { $1 }

(**************************)

(* 11.2 DROP SCHEMA STATEMENT *)

drop_schema_statement:
  | DROP SCHEMA schema_name { DropSchema($3) }

(******************************)

(* 11.3 TABLE DEFINITION *)

table_definition :
  | CREATE TABLE table_name  { CreateTable(None, $3) }
  | CREATE table_scope TABLE table_name  { CreateTable(Some($2), $4) }

table_scope :
  | global_or_local TEMPORARY { $1 }

global_or_local :
  | GLOBAL { Global }
  | LOCAL { Local }

(*************************)

(* 11.18 DROP COLUMN DEFINITION *)

drop_column_definition:
  | DROP column_name { DropColumn($2) }
  | DROP COLUMN column_name { DropColumn($3) }

(********************************)

(* 11.21 DROP TABLE STATEMENT *)

drop_table_statement:
  | DROP TABLE table_name { DropTable($3) }

(******************************)

(* 14 DATA MANIPULATION *)
(* 14.9 MERGE STATEMENT *)

(************************)
(************************)