sqlglot.parser
1from __future__ import annotations 2 3import logging 4import typing as t 5from collections import defaultdict 6 7from sqlglot import exp 8from sqlglot.errors import ErrorLevel, ParseError, concat_messages, merge_errors 9from sqlglot.helper import apply_index_offset, ensure_list, seq_get 10from sqlglot.time import format_time 11from sqlglot.tokens import Token, Tokenizer, TokenType 12from sqlglot.trie import TrieResult, in_trie, new_trie 13 14if t.TYPE_CHECKING: 15 from sqlglot._typing import E, Lit 16 from sqlglot.dialects.dialect import Dialect, DialectType 17 18 T = t.TypeVar("T") 19 20logger = logging.getLogger("sqlglot") 21 22OPTIONS_TYPE = t.Dict[str, t.Sequence[t.Union[t.Sequence[str], str]]] 23 24 25def build_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 26 if len(args) == 1 and args[0].is_star: 27 return exp.StarMap(this=args[0]) 28 29 keys = [] 30 values = [] 31 for i in range(0, len(args), 2): 32 keys.append(args[i]) 33 values.append(args[i + 1]) 34 35 return exp.VarMap(keys=exp.array(*keys, copy=False), values=exp.array(*values, copy=False)) 36 37 38def build_like(args: t.List) -> exp.Escape | exp.Like: 39 like = exp.Like(this=seq_get(args, 1), expression=seq_get(args, 0)) 40 return exp.Escape(this=like, expression=seq_get(args, 2)) if len(args) > 2 else like 41 42 43def binary_range_parser( 44 expr_type: t.Type[exp.Expression], reverse_args: bool = False 45) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 46 def _parse_binary_range( 47 self: Parser, this: t.Optional[exp.Expression] 48 ) -> t.Optional[exp.Expression]: 49 expression = self._parse_bitwise() 50 if reverse_args: 51 this, expression = expression, this 52 return self._parse_escape(self.expression(expr_type, this=this, expression=expression)) 53 54 return _parse_binary_range 55 56 57def build_logarithm(args: t.List, dialect: Dialect) -> exp.Func: 58 # Default argument order is base, expression 59 this = seq_get(args, 0) 60 expression = seq_get(args, 1) 61 62 if expression: 63 if not dialect.LOG_BASE_FIRST: 64 this, expression = expression, this 65 return exp.Log(this=this, expression=expression) 66 67 return (exp.Ln if dialect.parser_class.LOG_DEFAULTS_TO_LN else exp.Log)(this=this) 68 69 70def build_hex(args: t.List, dialect: Dialect) -> exp.Hex | exp.LowerHex: 71 arg = seq_get(args, 0) 72 return exp.LowerHex(this=arg) if dialect.HEX_LOWERCASE else exp.Hex(this=arg) 73 74 75def build_lower(args: t.List) -> exp.Lower | exp.Hex: 76 # LOWER(HEX(..)) can be simplified to LowerHex to simplify its transpilation 77 arg = seq_get(args, 0) 78 return exp.LowerHex(this=arg.this) if isinstance(arg, exp.Hex) else exp.Lower(this=arg) 79 80 81def build_upper(args: t.List) -> exp.Upper | exp.Hex: 82 # UPPER(HEX(..)) can be simplified to Hex to simplify its transpilation 83 arg = seq_get(args, 0) 84 return exp.Hex(this=arg.this) if isinstance(arg, exp.Hex) else exp.Upper(this=arg) 85 86 87def build_extract_json_with_path(expr_type: t.Type[E]) -> t.Callable[[t.List, Dialect], E]: 88 def _builder(args: t.List, dialect: Dialect) -> E: 89 expression = expr_type( 90 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 91 ) 92 if len(args) > 2 and expr_type is exp.JSONExtract: 93 expression.set("expressions", args[2:]) 94 95 return expression 96 97 return _builder 98 99 100def build_mod(args: t.List) -> exp.Mod: 101 this = seq_get(args, 0) 102 expression = seq_get(args, 1) 103 104 # Wrap the operands if they are binary nodes, e.g. MOD(a + 1, 7) -> (a + 1) % 7 105 this = exp.Paren(this=this) if isinstance(this, exp.Binary) else this 106 expression = exp.Paren(this=expression) if isinstance(expression, exp.Binary) else expression 107 108 return exp.Mod(this=this, expression=expression) 109 110 111class _Parser(type): 112 def __new__(cls, clsname, bases, attrs): 113 klass = super().__new__(cls, clsname, bases, attrs) 114 115 klass.SHOW_TRIE = new_trie(key.split(" ") for key in klass.SHOW_PARSERS) 116 klass.SET_TRIE = new_trie(key.split(" ") for key in klass.SET_PARSERS) 117 118 return klass 119 120 121class Parser(metaclass=_Parser): 122 """ 123 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 124 125 Args: 126 error_level: The desired error level. 127 Default: ErrorLevel.IMMEDIATE 128 error_message_context: The amount of context to capture from a query string when displaying 129 the error message (in number of characters). 130 Default: 100 131 max_errors: Maximum number of error messages to include in a raised ParseError. 132 This is only relevant if error_level is ErrorLevel.RAISE. 133 Default: 3 134 """ 135 136 FUNCTIONS: t.Dict[str, t.Callable] = { 137 **{name: func.from_arg_list for name, func in exp.FUNCTION_BY_NAME.items()}, 138 "CONCAT": lambda args, dialect: exp.Concat( 139 expressions=args, 140 safe=not dialect.STRICT_STRING_CONCAT, 141 coalesce=dialect.CONCAT_COALESCE, 142 ), 143 "CONCAT_WS": lambda args, dialect: exp.ConcatWs( 144 expressions=args, 145 safe=not dialect.STRICT_STRING_CONCAT, 146 coalesce=dialect.CONCAT_COALESCE, 147 ), 148 "DATE_TO_DATE_STR": lambda args: exp.Cast( 149 this=seq_get(args, 0), 150 to=exp.DataType(this=exp.DataType.Type.TEXT), 151 ), 152 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 153 "HEX": build_hex, 154 "JSON_EXTRACT": build_extract_json_with_path(exp.JSONExtract), 155 "JSON_EXTRACT_SCALAR": build_extract_json_with_path(exp.JSONExtractScalar), 156 "JSON_EXTRACT_PATH_TEXT": build_extract_json_with_path(exp.JSONExtractScalar), 157 "LIKE": build_like, 158 "LOG": build_logarithm, 159 "LOG2": lambda args: exp.Log(this=exp.Literal.number(2), expression=seq_get(args, 0)), 160 "LOG10": lambda args: exp.Log(this=exp.Literal.number(10), expression=seq_get(args, 0)), 161 "LOWER": build_lower, 162 "MOD": build_mod, 163 "TIME_TO_TIME_STR": lambda args: exp.Cast( 164 this=seq_get(args, 0), 165 to=exp.DataType(this=exp.DataType.Type.TEXT), 166 ), 167 "TO_HEX": build_hex, 168 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 169 this=exp.Cast( 170 this=seq_get(args, 0), 171 to=exp.DataType(this=exp.DataType.Type.TEXT), 172 ), 173 start=exp.Literal.number(1), 174 length=exp.Literal.number(10), 175 ), 176 "UNNEST": lambda args: exp.Unnest(expressions=ensure_list(seq_get(args, 0))), 177 "UPPER": build_upper, 178 "VAR_MAP": build_var_map, 179 } 180 181 NO_PAREN_FUNCTIONS = { 182 TokenType.CURRENT_DATE: exp.CurrentDate, 183 TokenType.CURRENT_DATETIME: exp.CurrentDate, 184 TokenType.CURRENT_TIME: exp.CurrentTime, 185 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 186 TokenType.CURRENT_USER: exp.CurrentUser, 187 } 188 189 STRUCT_TYPE_TOKENS = { 190 TokenType.NESTED, 191 TokenType.OBJECT, 192 TokenType.STRUCT, 193 } 194 195 NESTED_TYPE_TOKENS = { 196 TokenType.ARRAY, 197 TokenType.LIST, 198 TokenType.LOWCARDINALITY, 199 TokenType.MAP, 200 TokenType.NULLABLE, 201 *STRUCT_TYPE_TOKENS, 202 } 203 204 ENUM_TYPE_TOKENS = { 205 TokenType.ENUM, 206 TokenType.ENUM8, 207 TokenType.ENUM16, 208 } 209 210 AGGREGATE_TYPE_TOKENS = { 211 TokenType.AGGREGATEFUNCTION, 212 TokenType.SIMPLEAGGREGATEFUNCTION, 213 } 214 215 TYPE_TOKENS = { 216 TokenType.BIT, 217 TokenType.BOOLEAN, 218 TokenType.TINYINT, 219 TokenType.UTINYINT, 220 TokenType.SMALLINT, 221 TokenType.USMALLINT, 222 TokenType.INT, 223 TokenType.UINT, 224 TokenType.BIGINT, 225 TokenType.UBIGINT, 226 TokenType.INT128, 227 TokenType.UINT128, 228 TokenType.INT256, 229 TokenType.UINT256, 230 TokenType.MEDIUMINT, 231 TokenType.UMEDIUMINT, 232 TokenType.FIXEDSTRING, 233 TokenType.FLOAT, 234 TokenType.DOUBLE, 235 TokenType.CHAR, 236 TokenType.NCHAR, 237 TokenType.VARCHAR, 238 TokenType.NVARCHAR, 239 TokenType.BPCHAR, 240 TokenType.TEXT, 241 TokenType.MEDIUMTEXT, 242 TokenType.LONGTEXT, 243 TokenType.MEDIUMBLOB, 244 TokenType.LONGBLOB, 245 TokenType.BINARY, 246 TokenType.VARBINARY, 247 TokenType.JSON, 248 TokenType.JSONB, 249 TokenType.INTERVAL, 250 TokenType.TINYBLOB, 251 TokenType.TINYTEXT, 252 TokenType.TIME, 253 TokenType.TIMETZ, 254 TokenType.TIMESTAMP, 255 TokenType.TIMESTAMP_S, 256 TokenType.TIMESTAMP_MS, 257 TokenType.TIMESTAMP_NS, 258 TokenType.TIMESTAMPTZ, 259 TokenType.TIMESTAMPLTZ, 260 TokenType.TIMESTAMPNTZ, 261 TokenType.DATETIME, 262 TokenType.DATETIME64, 263 TokenType.DATE, 264 TokenType.DATE32, 265 TokenType.INT4RANGE, 266 TokenType.INT4MULTIRANGE, 267 TokenType.INT8RANGE, 268 TokenType.INT8MULTIRANGE, 269 TokenType.NUMRANGE, 270 TokenType.NUMMULTIRANGE, 271 TokenType.TSRANGE, 272 TokenType.TSMULTIRANGE, 273 TokenType.TSTZRANGE, 274 TokenType.TSTZMULTIRANGE, 275 TokenType.DATERANGE, 276 TokenType.DATEMULTIRANGE, 277 TokenType.DECIMAL, 278 TokenType.UDECIMAL, 279 TokenType.BIGDECIMAL, 280 TokenType.UUID, 281 TokenType.GEOGRAPHY, 282 TokenType.GEOMETRY, 283 TokenType.HLLSKETCH, 284 TokenType.HSTORE, 285 TokenType.PSEUDO_TYPE, 286 TokenType.SUPER, 287 TokenType.SERIAL, 288 TokenType.SMALLSERIAL, 289 TokenType.BIGSERIAL, 290 TokenType.XML, 291 TokenType.YEAR, 292 TokenType.UNIQUEIDENTIFIER, 293 TokenType.USERDEFINED, 294 TokenType.MONEY, 295 TokenType.SMALLMONEY, 296 TokenType.ROWVERSION, 297 TokenType.IMAGE, 298 TokenType.VARIANT, 299 TokenType.OBJECT, 300 TokenType.OBJECT_IDENTIFIER, 301 TokenType.INET, 302 TokenType.IPADDRESS, 303 TokenType.IPPREFIX, 304 TokenType.IPV4, 305 TokenType.IPV6, 306 TokenType.UNKNOWN, 307 TokenType.NULL, 308 TokenType.NAME, 309 TokenType.TDIGEST, 310 *ENUM_TYPE_TOKENS, 311 *NESTED_TYPE_TOKENS, 312 *AGGREGATE_TYPE_TOKENS, 313 } 314 315 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 316 TokenType.BIGINT: TokenType.UBIGINT, 317 TokenType.INT: TokenType.UINT, 318 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 319 TokenType.SMALLINT: TokenType.USMALLINT, 320 TokenType.TINYINT: TokenType.UTINYINT, 321 TokenType.DECIMAL: TokenType.UDECIMAL, 322 } 323 324 SUBQUERY_PREDICATES = { 325 TokenType.ANY: exp.Any, 326 TokenType.ALL: exp.All, 327 TokenType.EXISTS: exp.Exists, 328 TokenType.SOME: exp.Any, 329 } 330 331 RESERVED_TOKENS = { 332 *Tokenizer.SINGLE_TOKENS.values(), 333 TokenType.SELECT, 334 } - {TokenType.IDENTIFIER} 335 336 DB_CREATABLES = { 337 TokenType.DATABASE, 338 TokenType.DICTIONARY, 339 TokenType.MODEL, 340 TokenType.SCHEMA, 341 TokenType.SEQUENCE, 342 TokenType.STORAGE_INTEGRATION, 343 TokenType.TABLE, 344 TokenType.TAG, 345 TokenType.VIEW, 346 TokenType.WAREHOUSE, 347 TokenType.STREAMLIT, 348 } 349 350 CREATABLES = { 351 TokenType.COLUMN, 352 TokenType.CONSTRAINT, 353 TokenType.FOREIGN_KEY, 354 TokenType.FUNCTION, 355 TokenType.INDEX, 356 TokenType.PROCEDURE, 357 *DB_CREATABLES, 358 } 359 360 # Tokens that can represent identifiers 361 ID_VAR_TOKENS = { 362 TokenType.VAR, 363 TokenType.ANTI, 364 TokenType.APPLY, 365 TokenType.ASC, 366 TokenType.ASOF, 367 TokenType.AUTO_INCREMENT, 368 TokenType.BEGIN, 369 TokenType.BPCHAR, 370 TokenType.CACHE, 371 TokenType.CASE, 372 TokenType.COLLATE, 373 TokenType.COMMAND, 374 TokenType.COMMENT, 375 TokenType.COMMIT, 376 TokenType.CONSTRAINT, 377 TokenType.COPY, 378 TokenType.DEFAULT, 379 TokenType.DELETE, 380 TokenType.DESC, 381 TokenType.DESCRIBE, 382 TokenType.DICTIONARY, 383 TokenType.DIV, 384 TokenType.END, 385 TokenType.EXECUTE, 386 TokenType.ESCAPE, 387 TokenType.FALSE, 388 TokenType.FIRST, 389 TokenType.FILTER, 390 TokenType.FINAL, 391 TokenType.FORMAT, 392 TokenType.FULL, 393 TokenType.IDENTIFIER, 394 TokenType.IS, 395 TokenType.ISNULL, 396 TokenType.INTERVAL, 397 TokenType.KEEP, 398 TokenType.KILL, 399 TokenType.LEFT, 400 TokenType.LOAD, 401 TokenType.MERGE, 402 TokenType.NATURAL, 403 TokenType.NEXT, 404 TokenType.OFFSET, 405 TokenType.OPERATOR, 406 TokenType.ORDINALITY, 407 TokenType.OVERLAPS, 408 TokenType.OVERWRITE, 409 TokenType.PARTITION, 410 TokenType.PERCENT, 411 TokenType.PIVOT, 412 TokenType.PRAGMA, 413 TokenType.RANGE, 414 TokenType.RECURSIVE, 415 TokenType.REFERENCES, 416 TokenType.REFRESH, 417 TokenType.REPLACE, 418 TokenType.RIGHT, 419 TokenType.ROLLUP, 420 TokenType.ROW, 421 TokenType.ROWS, 422 TokenType.SEMI, 423 TokenType.SET, 424 TokenType.SETTINGS, 425 TokenType.SHOW, 426 TokenType.TEMPORARY, 427 TokenType.TOP, 428 TokenType.TRUE, 429 TokenType.TRUNCATE, 430 TokenType.UNIQUE, 431 TokenType.UNNEST, 432 TokenType.UNPIVOT, 433 TokenType.UPDATE, 434 TokenType.USE, 435 TokenType.VOLATILE, 436 TokenType.WINDOW, 437 *CREATABLES, 438 *SUBQUERY_PREDICATES, 439 *TYPE_TOKENS, 440 *NO_PAREN_FUNCTIONS, 441 } 442 443 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 444 445 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 446 TokenType.ANTI, 447 TokenType.APPLY, 448 TokenType.ASOF, 449 TokenType.FULL, 450 TokenType.LEFT, 451 TokenType.LOCK, 452 TokenType.NATURAL, 453 TokenType.OFFSET, 454 TokenType.RIGHT, 455 TokenType.SEMI, 456 TokenType.WINDOW, 457 } 458 459 ALIAS_TOKENS = ID_VAR_TOKENS 460 461 ARRAY_CONSTRUCTORS = { 462 "ARRAY": exp.Array, 463 "LIST": exp.List, 464 } 465 466 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 467 468 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 469 470 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 471 472 FUNC_TOKENS = { 473 TokenType.COLLATE, 474 TokenType.COMMAND, 475 TokenType.CURRENT_DATE, 476 TokenType.CURRENT_DATETIME, 477 TokenType.CURRENT_TIMESTAMP, 478 TokenType.CURRENT_TIME, 479 TokenType.CURRENT_USER, 480 TokenType.FILTER, 481 TokenType.FIRST, 482 TokenType.FORMAT, 483 TokenType.GLOB, 484 TokenType.IDENTIFIER, 485 TokenType.INDEX, 486 TokenType.ISNULL, 487 TokenType.ILIKE, 488 TokenType.INSERT, 489 TokenType.LIKE, 490 TokenType.MERGE, 491 TokenType.OFFSET, 492 TokenType.PRIMARY_KEY, 493 TokenType.RANGE, 494 TokenType.REPLACE, 495 TokenType.RLIKE, 496 TokenType.ROW, 497 TokenType.UNNEST, 498 TokenType.VAR, 499 TokenType.LEFT, 500 TokenType.RIGHT, 501 TokenType.SEQUENCE, 502 TokenType.DATE, 503 TokenType.DATETIME, 504 TokenType.TABLE, 505 TokenType.TIMESTAMP, 506 TokenType.TIMESTAMPTZ, 507 TokenType.TRUNCATE, 508 TokenType.WINDOW, 509 TokenType.XOR, 510 *TYPE_TOKENS, 511 *SUBQUERY_PREDICATES, 512 } 513 514 CONJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 515 TokenType.AND: exp.And, 516 } 517 518 ASSIGNMENT: t.Dict[TokenType, t.Type[exp.Expression]] = { 519 TokenType.COLON_EQ: exp.PropertyEQ, 520 } 521 522 DISJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 523 TokenType.OR: exp.Or, 524 } 525 526 EQUALITY = { 527 TokenType.EQ: exp.EQ, 528 TokenType.NEQ: exp.NEQ, 529 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 530 } 531 532 COMPARISON = { 533 TokenType.GT: exp.GT, 534 TokenType.GTE: exp.GTE, 535 TokenType.LT: exp.LT, 536 TokenType.LTE: exp.LTE, 537 } 538 539 BITWISE = { 540 TokenType.AMP: exp.BitwiseAnd, 541 TokenType.CARET: exp.BitwiseXor, 542 TokenType.PIPE: exp.BitwiseOr, 543 } 544 545 TERM = { 546 TokenType.DASH: exp.Sub, 547 TokenType.PLUS: exp.Add, 548 TokenType.MOD: exp.Mod, 549 TokenType.COLLATE: exp.Collate, 550 } 551 552 FACTOR = { 553 TokenType.DIV: exp.IntDiv, 554 TokenType.LR_ARROW: exp.Distance, 555 TokenType.SLASH: exp.Div, 556 TokenType.STAR: exp.Mul, 557 } 558 559 EXPONENT: t.Dict[TokenType, t.Type[exp.Expression]] = {} 560 561 TIMES = { 562 TokenType.TIME, 563 TokenType.TIMETZ, 564 } 565 566 TIMESTAMPS = { 567 TokenType.TIMESTAMP, 568 TokenType.TIMESTAMPTZ, 569 TokenType.TIMESTAMPLTZ, 570 *TIMES, 571 } 572 573 SET_OPERATIONS = { 574 TokenType.UNION, 575 TokenType.INTERSECT, 576 TokenType.EXCEPT, 577 } 578 579 JOIN_METHODS = { 580 TokenType.ASOF, 581 TokenType.NATURAL, 582 TokenType.POSITIONAL, 583 } 584 585 JOIN_SIDES = { 586 TokenType.LEFT, 587 TokenType.RIGHT, 588 TokenType.FULL, 589 } 590 591 JOIN_KINDS = { 592 TokenType.ANTI, 593 TokenType.CROSS, 594 TokenType.INNER, 595 TokenType.OUTER, 596 TokenType.SEMI, 597 TokenType.STRAIGHT_JOIN, 598 } 599 600 JOIN_HINTS: t.Set[str] = set() 601 602 LAMBDAS = { 603 TokenType.ARROW: lambda self, expressions: self.expression( 604 exp.Lambda, 605 this=self._replace_lambda( 606 self._parse_assignment(), 607 expressions, 608 ), 609 expressions=expressions, 610 ), 611 TokenType.FARROW: lambda self, expressions: self.expression( 612 exp.Kwarg, 613 this=exp.var(expressions[0].name), 614 expression=self._parse_assignment(), 615 ), 616 } 617 618 COLUMN_OPERATORS = { 619 TokenType.DOT: None, 620 TokenType.DCOLON: lambda self, this, to: self.expression( 621 exp.Cast if self.STRICT_CAST else exp.TryCast, 622 this=this, 623 to=to, 624 ), 625 TokenType.ARROW: lambda self, this, path: self.expression( 626 exp.JSONExtract, 627 this=this, 628 expression=self.dialect.to_json_path(path), 629 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 630 ), 631 TokenType.DARROW: lambda self, this, path: self.expression( 632 exp.JSONExtractScalar, 633 this=this, 634 expression=self.dialect.to_json_path(path), 635 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 636 ), 637 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 638 exp.JSONBExtract, 639 this=this, 640 expression=path, 641 ), 642 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 643 exp.JSONBExtractScalar, 644 this=this, 645 expression=path, 646 ), 647 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 648 exp.JSONBContains, 649 this=this, 650 expression=key, 651 ), 652 } 653 654 EXPRESSION_PARSERS = { 655 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 656 exp.Column: lambda self: self._parse_column(), 657 exp.Condition: lambda self: self._parse_assignment(), 658 exp.DataType: lambda self: self._parse_types(allow_identifiers=False), 659 exp.Expression: lambda self: self._parse_expression(), 660 exp.From: lambda self: self._parse_from(joins=True), 661 exp.Group: lambda self: self._parse_group(), 662 exp.Having: lambda self: self._parse_having(), 663 exp.Identifier: lambda self: self._parse_id_var(), 664 exp.Join: lambda self: self._parse_join(), 665 exp.Lambda: lambda self: self._parse_lambda(), 666 exp.Lateral: lambda self: self._parse_lateral(), 667 exp.Limit: lambda self: self._parse_limit(), 668 exp.Offset: lambda self: self._parse_offset(), 669 exp.Order: lambda self: self._parse_order(), 670 exp.Ordered: lambda self: self._parse_ordered(), 671 exp.Properties: lambda self: self._parse_properties(), 672 exp.Qualify: lambda self: self._parse_qualify(), 673 exp.Returning: lambda self: self._parse_returning(), 674 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 675 exp.Table: lambda self: self._parse_table_parts(), 676 exp.TableAlias: lambda self: self._parse_table_alias(), 677 exp.When: lambda self: seq_get(self._parse_when_matched(), 0), 678 exp.Where: lambda self: self._parse_where(), 679 exp.Window: lambda self: self._parse_named_window(), 680 exp.With: lambda self: self._parse_with(), 681 "JOIN_TYPE": lambda self: self._parse_join_parts(), 682 } 683 684 STATEMENT_PARSERS = { 685 TokenType.ALTER: lambda self: self._parse_alter(), 686 TokenType.BEGIN: lambda self: self._parse_transaction(), 687 TokenType.CACHE: lambda self: self._parse_cache(), 688 TokenType.COMMENT: lambda self: self._parse_comment(), 689 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 690 TokenType.COPY: lambda self: self._parse_copy(), 691 TokenType.CREATE: lambda self: self._parse_create(), 692 TokenType.DELETE: lambda self: self._parse_delete(), 693 TokenType.DESC: lambda self: self._parse_describe(), 694 TokenType.DESCRIBE: lambda self: self._parse_describe(), 695 TokenType.DROP: lambda self: self._parse_drop(), 696 TokenType.INSERT: lambda self: self._parse_insert(), 697 TokenType.KILL: lambda self: self._parse_kill(), 698 TokenType.LOAD: lambda self: self._parse_load(), 699 TokenType.MERGE: lambda self: self._parse_merge(), 700 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 701 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 702 TokenType.REFRESH: lambda self: self._parse_refresh(), 703 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 704 TokenType.SET: lambda self: self._parse_set(), 705 TokenType.TRUNCATE: lambda self: self._parse_truncate_table(), 706 TokenType.UNCACHE: lambda self: self._parse_uncache(), 707 TokenType.UPDATE: lambda self: self._parse_update(), 708 TokenType.USE: lambda self: self.expression( 709 exp.Use, 710 kind=self._parse_var_from_options(self.USABLES, raise_unmatched=False), 711 this=self._parse_table(schema=False), 712 ), 713 TokenType.SEMICOLON: lambda self: self.expression(exp.Semicolon), 714 } 715 716 UNARY_PARSERS = { 717 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 718 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 719 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 720 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 721 TokenType.PIPE_SLASH: lambda self: self.expression(exp.Sqrt, this=self._parse_unary()), 722 TokenType.DPIPE_SLASH: lambda self: self.expression(exp.Cbrt, this=self._parse_unary()), 723 } 724 725 STRING_PARSERS = { 726 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 727 exp.RawString, this=token.text 728 ), 729 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 730 exp.National, this=token.text 731 ), 732 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 733 TokenType.STRING: lambda self, token: self.expression( 734 exp.Literal, this=token.text, is_string=True 735 ), 736 TokenType.UNICODE_STRING: lambda self, token: self.expression( 737 exp.UnicodeString, 738 this=token.text, 739 escape=self._match_text_seq("UESCAPE") and self._parse_string(), 740 ), 741 } 742 743 NUMERIC_PARSERS = { 744 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 745 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 746 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 747 TokenType.NUMBER: lambda self, token: self.expression( 748 exp.Literal, this=token.text, is_string=False 749 ), 750 } 751 752 PRIMARY_PARSERS = { 753 **STRING_PARSERS, 754 **NUMERIC_PARSERS, 755 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 756 TokenType.NULL: lambda self, _: self.expression(exp.Null), 757 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 758 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 759 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 760 TokenType.STAR: lambda self, _: self.expression( 761 exp.Star, 762 **{ 763 "except": self._parse_star_op("EXCEPT", "EXCLUDE"), 764 "replace": self._parse_star_op("REPLACE"), 765 "rename": self._parse_star_op("RENAME"), 766 }, 767 ), 768 } 769 770 PLACEHOLDER_PARSERS = { 771 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 772 TokenType.PARAMETER: lambda self: self._parse_parameter(), 773 TokenType.COLON: lambda self: ( 774 self.expression(exp.Placeholder, this=self._prev.text) 775 if self._match(TokenType.NUMBER) or self._match_set(self.ID_VAR_TOKENS) 776 else None 777 ), 778 } 779 780 RANGE_PARSERS = { 781 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 782 TokenType.GLOB: binary_range_parser(exp.Glob), 783 TokenType.ILIKE: binary_range_parser(exp.ILike), 784 TokenType.IN: lambda self, this: self._parse_in(this), 785 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 786 TokenType.IS: lambda self, this: self._parse_is(this), 787 TokenType.LIKE: binary_range_parser(exp.Like), 788 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 789 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 790 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 791 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 792 } 793 794 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 795 "ALLOWED_VALUES": lambda self: self.expression( 796 exp.AllowedValuesProperty, expressions=self._parse_csv(self._parse_primary) 797 ), 798 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 799 "AUTO": lambda self: self._parse_auto_property(), 800 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 801 "BACKUP": lambda self: self.expression( 802 exp.BackupProperty, this=self._parse_var(any_token=True) 803 ), 804 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 805 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 806 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 807 "CHECKSUM": lambda self: self._parse_checksum(), 808 "CLUSTER BY": lambda self: self._parse_cluster(), 809 "CLUSTERED": lambda self: self._parse_clustered_by(), 810 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 811 exp.CollateProperty, **kwargs 812 ), 813 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 814 "CONTAINS": lambda self: self._parse_contains_property(), 815 "COPY": lambda self: self._parse_copy_property(), 816 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 817 "DATA_DELETION": lambda self: self._parse_data_deletion_property(), 818 "DEFINER": lambda self: self._parse_definer(), 819 "DETERMINISTIC": lambda self: self.expression( 820 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 821 ), 822 "DISTKEY": lambda self: self._parse_distkey(), 823 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 824 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 825 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 826 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 827 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 828 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 829 "FREESPACE": lambda self: self._parse_freespace(), 830 "GLOBAL": lambda self: self.expression(exp.GlobalProperty), 831 "HEAP": lambda self: self.expression(exp.HeapProperty), 832 "ICEBERG": lambda self: self.expression(exp.IcebergProperty), 833 "IMMUTABLE": lambda self: self.expression( 834 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 835 ), 836 "INHERITS": lambda self: self.expression( 837 exp.InheritsProperty, expressions=self._parse_wrapped_csv(self._parse_table) 838 ), 839 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 840 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 841 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 842 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 843 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 844 "LIKE": lambda self: self._parse_create_like(), 845 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 846 "LOCK": lambda self: self._parse_locking(), 847 "LOCKING": lambda self: self._parse_locking(), 848 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 849 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 850 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 851 "MODIFIES": lambda self: self._parse_modifies_property(), 852 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 853 "NO": lambda self: self._parse_no_property(), 854 "ON": lambda self: self._parse_on_property(), 855 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 856 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 857 "PARTITION": lambda self: self._parse_partitioned_of(), 858 "PARTITION BY": lambda self: self._parse_partitioned_by(), 859 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 860 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 861 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 862 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 863 "READS": lambda self: self._parse_reads_property(), 864 "REMOTE": lambda self: self._parse_remote_with_connection(), 865 "RETURNS": lambda self: self._parse_returns(), 866 "STRICT": lambda self: self.expression(exp.StrictProperty), 867 "ROW": lambda self: self._parse_row(), 868 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 869 "SAMPLE": lambda self: self.expression( 870 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 871 ), 872 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 873 "SETTINGS": lambda self: self.expression( 874 exp.SettingsProperty, expressions=self._parse_csv(self._parse_set_item) 875 ), 876 "SHARING": lambda self: self._parse_property_assignment(exp.SharingProperty), 877 "SORTKEY": lambda self: self._parse_sortkey(), 878 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 879 "STABLE": lambda self: self.expression( 880 exp.StabilityProperty, this=exp.Literal.string("STABLE") 881 ), 882 "STORED": lambda self: self._parse_stored(), 883 "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(), 884 "TBLPROPERTIES": lambda self: self._parse_wrapped_properties(), 885 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 886 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 887 "TO": lambda self: self._parse_to_table(), 888 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 889 "TRANSFORM": lambda self: self.expression( 890 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 891 ), 892 "TTL": lambda self: self._parse_ttl(), 893 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 894 "UNLOGGED": lambda self: self.expression(exp.UnloggedProperty), 895 "VOLATILE": lambda self: self._parse_volatile_property(), 896 "WITH": lambda self: self._parse_with_property(), 897 } 898 899 CONSTRAINT_PARSERS = { 900 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 901 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 902 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 903 "CHARACTER SET": lambda self: self.expression( 904 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 905 ), 906 "CHECK": lambda self: self.expression( 907 exp.CheckColumnConstraint, 908 this=self._parse_wrapped(self._parse_assignment), 909 enforced=self._match_text_seq("ENFORCED"), 910 ), 911 "COLLATE": lambda self: self.expression( 912 exp.CollateColumnConstraint, this=self._parse_var(any_token=True) 913 ), 914 "COMMENT": lambda self: self.expression( 915 exp.CommentColumnConstraint, this=self._parse_string() 916 ), 917 "COMPRESS": lambda self: self._parse_compress(), 918 "CLUSTERED": lambda self: self.expression( 919 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 920 ), 921 "NONCLUSTERED": lambda self: self.expression( 922 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 923 ), 924 "DEFAULT": lambda self: self.expression( 925 exp.DefaultColumnConstraint, this=self._parse_bitwise() 926 ), 927 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 928 "EPHEMERAL": lambda self: self.expression( 929 exp.EphemeralColumnConstraint, this=self._parse_bitwise() 930 ), 931 "EXCLUDE": lambda self: self.expression( 932 exp.ExcludeColumnConstraint, this=self._parse_index_params() 933 ), 934 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 935 "FORMAT": lambda self: self.expression( 936 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 937 ), 938 "GENERATED": lambda self: self._parse_generated_as_identity(), 939 "IDENTITY": lambda self: self._parse_auto_increment(), 940 "INLINE": lambda self: self._parse_inline(), 941 "LIKE": lambda self: self._parse_create_like(), 942 "NOT": lambda self: self._parse_not_constraint(), 943 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 944 "ON": lambda self: ( 945 self._match(TokenType.UPDATE) 946 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 947 ) 948 or self.expression(exp.OnProperty, this=self._parse_id_var()), 949 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 950 "PERIOD": lambda self: self._parse_period_for_system_time(), 951 "PRIMARY KEY": lambda self: self._parse_primary_key(), 952 "REFERENCES": lambda self: self._parse_references(match=False), 953 "TITLE": lambda self: self.expression( 954 exp.TitleColumnConstraint, this=self._parse_var_or_string() 955 ), 956 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 957 "UNIQUE": lambda self: self._parse_unique(), 958 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 959 "WITH": lambda self: self.expression( 960 exp.Properties, expressions=self._parse_wrapped_properties() 961 ), 962 } 963 964 ALTER_PARSERS = { 965 "ADD": lambda self: self._parse_alter_table_add(), 966 "ALTER": lambda self: self._parse_alter_table_alter(), 967 "CLUSTER BY": lambda self: self._parse_cluster(wrapped=True), 968 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 969 "DROP": lambda self: self._parse_alter_table_drop(), 970 "RENAME": lambda self: self._parse_alter_table_rename(), 971 "SET": lambda self: self._parse_alter_table_set(), 972 } 973 974 ALTER_ALTER_PARSERS = { 975 "DISTKEY": lambda self: self._parse_alter_diststyle(), 976 "DISTSTYLE": lambda self: self._parse_alter_diststyle(), 977 "SORTKEY": lambda self: self._parse_alter_sortkey(), 978 "COMPOUND": lambda self: self._parse_alter_sortkey(compound=True), 979 } 980 981 SCHEMA_UNNAMED_CONSTRAINTS = { 982 "CHECK", 983 "EXCLUDE", 984 "FOREIGN KEY", 985 "LIKE", 986 "PERIOD", 987 "PRIMARY KEY", 988 "UNIQUE", 989 } 990 991 NO_PAREN_FUNCTION_PARSERS = { 992 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 993 "CASE": lambda self: self._parse_case(), 994 "IF": lambda self: self._parse_if(), 995 "NEXT": lambda self: self._parse_next_value_for(), 996 } 997 998 INVALID_FUNC_NAME_TOKENS = { 999 TokenType.IDENTIFIER, 1000 TokenType.STRING, 1001 } 1002 1003 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 1004 1005 KEY_VALUE_DEFINITIONS = (exp.Alias, exp.EQ, exp.PropertyEQ, exp.Slice) 1006 1007 FUNCTION_PARSERS = { 1008 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 1009 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 1010 "DECODE": lambda self: self._parse_decode(), 1011 "EXTRACT": lambda self: self._parse_extract(), 1012 "GAP_FILL": lambda self: self._parse_gap_fill(), 1013 "JSON_OBJECT": lambda self: self._parse_json_object(), 1014 "JSON_OBJECTAGG": lambda self: self._parse_json_object(agg=True), 1015 "JSON_TABLE": lambda self: self._parse_json_table(), 1016 "MATCH": lambda self: self._parse_match_against(), 1017 "OPENJSON": lambda self: self._parse_open_json(), 1018 "POSITION": lambda self: self._parse_position(), 1019 "PREDICT": lambda self: self._parse_predict(), 1020 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 1021 "STRING_AGG": lambda self: self._parse_string_agg(), 1022 "SUBSTRING": lambda self: self._parse_substring(), 1023 "TRIM": lambda self: self._parse_trim(), 1024 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 1025 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 1026 } 1027 1028 QUERY_MODIFIER_PARSERS = { 1029 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 1030 TokenType.PREWHERE: lambda self: ("prewhere", self._parse_prewhere()), 1031 TokenType.WHERE: lambda self: ("where", self._parse_where()), 1032 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 1033 TokenType.HAVING: lambda self: ("having", self._parse_having()), 1034 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 1035 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 1036 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 1037 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 1038 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 1039 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 1040 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 1041 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 1042 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1043 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1044 TokenType.CLUSTER_BY: lambda self: ( 1045 "cluster", 1046 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 1047 ), 1048 TokenType.DISTRIBUTE_BY: lambda self: ( 1049 "distribute", 1050 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 1051 ), 1052 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 1053 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 1054 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 1055 } 1056 1057 SET_PARSERS = { 1058 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 1059 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 1060 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 1061 "TRANSACTION": lambda self: self._parse_set_transaction(), 1062 } 1063 1064 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 1065 1066 TYPE_LITERAL_PARSERS = { 1067 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 1068 } 1069 1070 TYPE_CONVERTERS: t.Dict[exp.DataType.Type, t.Callable[[exp.DataType], exp.DataType]] = {} 1071 1072 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 1073 1074 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 1075 1076 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 1077 TRANSACTION_CHARACTERISTICS: OPTIONS_TYPE = { 1078 "ISOLATION": ( 1079 ("LEVEL", "REPEATABLE", "READ"), 1080 ("LEVEL", "READ", "COMMITTED"), 1081 ("LEVEL", "READ", "UNCOMITTED"), 1082 ("LEVEL", "SERIALIZABLE"), 1083 ), 1084 "READ": ("WRITE", "ONLY"), 1085 } 1086 1087 CONFLICT_ACTIONS: OPTIONS_TYPE = dict.fromkeys( 1088 ("ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK", "UPDATE"), tuple() 1089 ) 1090 CONFLICT_ACTIONS["DO"] = ("NOTHING", "UPDATE") 1091 1092 CREATE_SEQUENCE: OPTIONS_TYPE = { 1093 "SCALE": ("EXTEND", "NOEXTEND"), 1094 "SHARD": ("EXTEND", "NOEXTEND"), 1095 "NO": ("CYCLE", "CACHE", "MAXVALUE", "MINVALUE"), 1096 **dict.fromkeys( 1097 ( 1098 "SESSION", 1099 "GLOBAL", 1100 "KEEP", 1101 "NOKEEP", 1102 "ORDER", 1103 "NOORDER", 1104 "NOCACHE", 1105 "CYCLE", 1106 "NOCYCLE", 1107 "NOMINVALUE", 1108 "NOMAXVALUE", 1109 "NOSCALE", 1110 "NOSHARD", 1111 ), 1112 tuple(), 1113 ), 1114 } 1115 1116 ISOLATED_LOADING_OPTIONS: OPTIONS_TYPE = {"FOR": ("ALL", "INSERT", "NONE")} 1117 1118 USABLES: OPTIONS_TYPE = dict.fromkeys(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA"), tuple()) 1119 1120 CAST_ACTIONS: OPTIONS_TYPE = dict.fromkeys(("RENAME", "ADD"), ("FIELDS",)) 1121 1122 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 1123 1124 CLONE_KEYWORDS = {"CLONE", "COPY"} 1125 HISTORICAL_DATA_KIND = {"TIMESTAMP", "OFFSET", "STATEMENT", "STREAM"} 1126 1127 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS", "WITH"} 1128 1129 OPTYPE_FOLLOW_TOKENS = {TokenType.COMMA, TokenType.R_PAREN} 1130 1131 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 1132 1133 VIEW_ATTRIBUTES = {"ENCRYPTION", "SCHEMABINDING", "VIEW_METADATA"} 1134 1135 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 1136 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 1137 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 1138 1139 JSON_KEY_VALUE_SEPARATOR_TOKENS = {TokenType.COLON, TokenType.COMMA, TokenType.IS} 1140 1141 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 1142 1143 ADD_CONSTRAINT_TOKENS = { 1144 TokenType.CONSTRAINT, 1145 TokenType.FOREIGN_KEY, 1146 TokenType.INDEX, 1147 TokenType.KEY, 1148 TokenType.PRIMARY_KEY, 1149 TokenType.UNIQUE, 1150 } 1151 1152 DISTINCT_TOKENS = {TokenType.DISTINCT} 1153 1154 NULL_TOKENS = {TokenType.NULL} 1155 1156 UNNEST_OFFSET_ALIAS_TOKENS = ID_VAR_TOKENS - SET_OPERATIONS 1157 1158 SELECT_START_TOKENS = {TokenType.L_PAREN, TokenType.WITH, TokenType.SELECT} 1159 1160 COPY_INTO_VARLEN_OPTIONS = {"FILE_FORMAT", "COPY_OPTIONS", "FORMAT_OPTIONS", "CREDENTIAL"} 1161 1162 STRICT_CAST = True 1163 1164 PREFIXED_PIVOT_COLUMNS = False 1165 IDENTIFY_PIVOT_STRINGS = False 1166 1167 LOG_DEFAULTS_TO_LN = False 1168 1169 # Whether ADD is present for each column added by ALTER TABLE 1170 ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN = True 1171 1172 # Whether the table sample clause expects CSV syntax 1173 TABLESAMPLE_CSV = False 1174 1175 # The default method used for table sampling 1176 DEFAULT_SAMPLING_METHOD: t.Optional[str] = None 1177 1178 # Whether the SET command needs a delimiter (e.g. "=") for assignments 1179 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 1180 1181 # Whether the TRIM function expects the characters to trim as its first argument 1182 TRIM_PATTERN_FIRST = False 1183 1184 # Whether string aliases are supported `SELECT COUNT(*) 'count'` 1185 STRING_ALIASES = False 1186 1187 # Whether query modifiers such as LIMIT are attached to the UNION node (vs its right operand) 1188 MODIFIERS_ATTACHED_TO_SET_OP = True 1189 SET_OP_MODIFIERS = {"order", "limit", "offset"} 1190 1191 # Whether to parse IF statements that aren't followed by a left parenthesis as commands 1192 NO_PAREN_IF_COMMANDS = True 1193 1194 # Whether the -> and ->> operators expect documents of type JSON (e.g. Postgres) 1195 JSON_ARROWS_REQUIRE_JSON_TYPE = False 1196 1197 # Whether the `:` operator is used to extract a value from a JSON document 1198 COLON_IS_JSON_EXTRACT = False 1199 1200 # Whether or not a VALUES keyword needs to be followed by '(' to form a VALUES clause. 1201 # If this is True and '(' is not found, the keyword will be treated as an identifier 1202 VALUES_FOLLOWED_BY_PAREN = True 1203 1204 # Whether implicit unnesting is supported, e.g. SELECT 1 FROM y.z AS z, z.a (Redshift) 1205 SUPPORTS_IMPLICIT_UNNEST = False 1206 1207 # Whether or not interval spans are supported, INTERVAL 1 YEAR TO MONTHS 1208 INTERVAL_SPANS = True 1209 1210 # Whether a PARTITION clause can follow a table reference 1211 SUPPORTS_PARTITION_SELECTION = False 1212 1213 __slots__ = ( 1214 "error_level", 1215 "error_message_context", 1216 "max_errors", 1217 "dialect", 1218 "sql", 1219 "errors", 1220 "_tokens", 1221 "_index", 1222 "_curr", 1223 "_next", 1224 "_prev", 1225 "_prev_comments", 1226 ) 1227 1228 # Autofilled 1229 SHOW_TRIE: t.Dict = {} 1230 SET_TRIE: t.Dict = {} 1231 1232 def __init__( 1233 self, 1234 error_level: t.Optional[ErrorLevel] = None, 1235 error_message_context: int = 100, 1236 max_errors: int = 3, 1237 dialect: DialectType = None, 1238 ): 1239 from sqlglot.dialects import Dialect 1240 1241 self.error_level = error_level or ErrorLevel.IMMEDIATE 1242 self.error_message_context = error_message_context 1243 self.max_errors = max_errors 1244 self.dialect = Dialect.get_or_raise(dialect) 1245 self.reset() 1246 1247 def reset(self): 1248 self.sql = "" 1249 self.errors = [] 1250 self._tokens = [] 1251 self._index = 0 1252 self._curr = None 1253 self._next = None 1254 self._prev = None 1255 self._prev_comments = None 1256 1257 def parse( 1258 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1259 ) -> t.List[t.Optional[exp.Expression]]: 1260 """ 1261 Parses a list of tokens and returns a list of syntax trees, one tree 1262 per parsed SQL statement. 1263 1264 Args: 1265 raw_tokens: The list of tokens. 1266 sql: The original SQL string, used to produce helpful debug messages. 1267 1268 Returns: 1269 The list of the produced syntax trees. 1270 """ 1271 return self._parse( 1272 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1273 ) 1274 1275 def parse_into( 1276 self, 1277 expression_types: exp.IntoType, 1278 raw_tokens: t.List[Token], 1279 sql: t.Optional[str] = None, 1280 ) -> t.List[t.Optional[exp.Expression]]: 1281 """ 1282 Parses a list of tokens into a given Expression type. If a collection of Expression 1283 types is given instead, this method will try to parse the token list into each one 1284 of them, stopping at the first for which the parsing succeeds. 1285 1286 Args: 1287 expression_types: The expression type(s) to try and parse the token list into. 1288 raw_tokens: The list of tokens. 1289 sql: The original SQL string, used to produce helpful debug messages. 1290 1291 Returns: 1292 The target Expression. 1293 """ 1294 errors = [] 1295 for expression_type in ensure_list(expression_types): 1296 parser = self.EXPRESSION_PARSERS.get(expression_type) 1297 if not parser: 1298 raise TypeError(f"No parser registered for {expression_type}") 1299 1300 try: 1301 return self._parse(parser, raw_tokens, sql) 1302 except ParseError as e: 1303 e.errors[0]["into_expression"] = expression_type 1304 errors.append(e) 1305 1306 raise ParseError( 1307 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1308 errors=merge_errors(errors), 1309 ) from errors[-1] 1310 1311 def _parse( 1312 self, 1313 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1314 raw_tokens: t.List[Token], 1315 sql: t.Optional[str] = None, 1316 ) -> t.List[t.Optional[exp.Expression]]: 1317 self.reset() 1318 self.sql = sql or "" 1319 1320 total = len(raw_tokens) 1321 chunks: t.List[t.List[Token]] = [[]] 1322 1323 for i, token in enumerate(raw_tokens): 1324 if token.token_type == TokenType.SEMICOLON: 1325 if token.comments: 1326 chunks.append([token]) 1327 1328 if i < total - 1: 1329 chunks.append([]) 1330 else: 1331 chunks[-1].append(token) 1332 1333 expressions = [] 1334 1335 for tokens in chunks: 1336 self._index = -1 1337 self._tokens = tokens 1338 self._advance() 1339 1340 expressions.append(parse_method(self)) 1341 1342 if self._index < len(self._tokens): 1343 self.raise_error("Invalid expression / Unexpected token") 1344 1345 self.check_errors() 1346 1347 return expressions 1348 1349 def check_errors(self) -> None: 1350 """Logs or raises any found errors, depending on the chosen error level setting.""" 1351 if self.error_level == ErrorLevel.WARN: 1352 for error in self.errors: 1353 logger.error(str(error)) 1354 elif self.error_level == ErrorLevel.RAISE and self.errors: 1355 raise ParseError( 1356 concat_messages(self.errors, self.max_errors), 1357 errors=merge_errors(self.errors), 1358 ) 1359 1360 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1361 """ 1362 Appends an error in the list of recorded errors or raises it, depending on the chosen 1363 error level setting. 1364 """ 1365 token = token or self._curr or self._prev or Token.string("") 1366 start = token.start 1367 end = token.end + 1 1368 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1369 highlight = self.sql[start:end] 1370 end_context = self.sql[end : end + self.error_message_context] 1371 1372 error = ParseError.new( 1373 f"{message}. Line {token.line}, Col: {token.col}.\n" 1374 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1375 description=message, 1376 line=token.line, 1377 col=token.col, 1378 start_context=start_context, 1379 highlight=highlight, 1380 end_context=end_context, 1381 ) 1382 1383 if self.error_level == ErrorLevel.IMMEDIATE: 1384 raise error 1385 1386 self.errors.append(error) 1387 1388 def expression( 1389 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1390 ) -> E: 1391 """ 1392 Creates a new, validated Expression. 1393 1394 Args: 1395 exp_class: The expression class to instantiate. 1396 comments: An optional list of comments to attach to the expression. 1397 kwargs: The arguments to set for the expression along with their respective values. 1398 1399 Returns: 1400 The target expression. 1401 """ 1402 instance = exp_class(**kwargs) 1403 instance.add_comments(comments) if comments else self._add_comments(instance) 1404 return self.validate_expression(instance) 1405 1406 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1407 if expression and self._prev_comments: 1408 expression.add_comments(self._prev_comments) 1409 self._prev_comments = None 1410 1411 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1412 """ 1413 Validates an Expression, making sure that all its mandatory arguments are set. 1414 1415 Args: 1416 expression: The expression to validate. 1417 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1418 1419 Returns: 1420 The validated expression. 1421 """ 1422 if self.error_level != ErrorLevel.IGNORE: 1423 for error_message in expression.error_messages(args): 1424 self.raise_error(error_message) 1425 1426 return expression 1427 1428 def _find_sql(self, start: Token, end: Token) -> str: 1429 return self.sql[start.start : end.end + 1] 1430 1431 def _is_connected(self) -> bool: 1432 return self._prev and self._curr and self._prev.end + 1 == self._curr.start 1433 1434 def _advance(self, times: int = 1) -> None: 1435 self._index += times 1436 self._curr = seq_get(self._tokens, self._index) 1437 self._next = seq_get(self._tokens, self._index + 1) 1438 1439 if self._index > 0: 1440 self._prev = self._tokens[self._index - 1] 1441 self._prev_comments = self._prev.comments 1442 else: 1443 self._prev = None 1444 self._prev_comments = None 1445 1446 def _retreat(self, index: int) -> None: 1447 if index != self._index: 1448 self._advance(index - self._index) 1449 1450 def _warn_unsupported(self) -> None: 1451 if len(self._tokens) <= 1: 1452 return 1453 1454 # We use _find_sql because self.sql may comprise multiple chunks, and we're only 1455 # interested in emitting a warning for the one being currently processed. 1456 sql = self._find_sql(self._tokens[0], self._tokens[-1])[: self.error_message_context] 1457 1458 logger.warning( 1459 f"'{sql}' contains unsupported syntax. Falling back to parsing as a 'Command'." 1460 ) 1461 1462 def _parse_command(self) -> exp.Command: 1463 self._warn_unsupported() 1464 return self.expression( 1465 exp.Command, this=self._prev.text.upper(), expression=self._parse_string() 1466 ) 1467 1468 def _try_parse(self, parse_method: t.Callable[[], T], retreat: bool = False) -> t.Optional[T]: 1469 """ 1470 Attemps to backtrack if a parse function that contains a try/catch internally raises an error. This behavior can 1471 be different depending on the uset-set ErrorLevel, so _try_parse aims to solve this by setting & resetting 1472 the parser state accordingly 1473 """ 1474 index = self._index 1475 error_level = self.error_level 1476 1477 self.error_level = ErrorLevel.IMMEDIATE 1478 try: 1479 this = parse_method() 1480 except ParseError: 1481 this = None 1482 finally: 1483 if not this or retreat: 1484 self._retreat(index) 1485 self.error_level = error_level 1486 1487 return this 1488 1489 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1490 start = self._prev 1491 exists = self._parse_exists() if allow_exists else None 1492 1493 self._match(TokenType.ON) 1494 1495 materialized = self._match_text_seq("MATERIALIZED") 1496 kind = self._match_set(self.CREATABLES) and self._prev 1497 if not kind: 1498 return self._parse_as_command(start) 1499 1500 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1501 this = self._parse_user_defined_function(kind=kind.token_type) 1502 elif kind.token_type == TokenType.TABLE: 1503 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1504 elif kind.token_type == TokenType.COLUMN: 1505 this = self._parse_column() 1506 else: 1507 this = self._parse_id_var() 1508 1509 self._match(TokenType.IS) 1510 1511 return self.expression( 1512 exp.Comment, 1513 this=this, 1514 kind=kind.text, 1515 expression=self._parse_string(), 1516 exists=exists, 1517 materialized=materialized, 1518 ) 1519 1520 def _parse_to_table( 1521 self, 1522 ) -> exp.ToTableProperty: 1523 table = self._parse_table_parts(schema=True) 1524 return self.expression(exp.ToTableProperty, this=table) 1525 1526 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1527 def _parse_ttl(self) -> exp.Expression: 1528 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1529 this = self._parse_bitwise() 1530 1531 if self._match_text_seq("DELETE"): 1532 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1533 if self._match_text_seq("RECOMPRESS"): 1534 return self.expression( 1535 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1536 ) 1537 if self._match_text_seq("TO", "DISK"): 1538 return self.expression( 1539 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1540 ) 1541 if self._match_text_seq("TO", "VOLUME"): 1542 return self.expression( 1543 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1544 ) 1545 1546 return this 1547 1548 expressions = self._parse_csv(_parse_ttl_action) 1549 where = self._parse_where() 1550 group = self._parse_group() 1551 1552 aggregates = None 1553 if group and self._match(TokenType.SET): 1554 aggregates = self._parse_csv(self._parse_set_item) 1555 1556 return self.expression( 1557 exp.MergeTreeTTL, 1558 expressions=expressions, 1559 where=where, 1560 group=group, 1561 aggregates=aggregates, 1562 ) 1563 1564 def _parse_statement(self) -> t.Optional[exp.Expression]: 1565 if self._curr is None: 1566 return None 1567 1568 if self._match_set(self.STATEMENT_PARSERS): 1569 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1570 1571 if self._match_set(self.dialect.tokenizer.COMMANDS): 1572 return self._parse_command() 1573 1574 expression = self._parse_expression() 1575 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1576 return self._parse_query_modifiers(expression) 1577 1578 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1579 start = self._prev 1580 temporary = self._match(TokenType.TEMPORARY) 1581 materialized = self._match_text_seq("MATERIALIZED") 1582 1583 kind = self._match_set(self.CREATABLES) and self._prev.text 1584 if not kind: 1585 return self._parse_as_command(start) 1586 1587 if_exists = exists or self._parse_exists() 1588 table = self._parse_table_parts( 1589 schema=True, is_db_reference=self._prev.token_type == TokenType.SCHEMA 1590 ) 1591 1592 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 1593 1594 if self._match(TokenType.L_PAREN, advance=False): 1595 expressions = self._parse_wrapped_csv(self._parse_types) 1596 else: 1597 expressions = None 1598 1599 return self.expression( 1600 exp.Drop, 1601 comments=start.comments, 1602 exists=if_exists, 1603 this=table, 1604 expressions=expressions, 1605 kind=kind.upper(), 1606 temporary=temporary, 1607 materialized=materialized, 1608 cascade=self._match_text_seq("CASCADE"), 1609 constraints=self._match_text_seq("CONSTRAINTS"), 1610 purge=self._match_text_seq("PURGE"), 1611 cluster=cluster, 1612 ) 1613 1614 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1615 return ( 1616 self._match_text_seq("IF") 1617 and (not not_ or self._match(TokenType.NOT)) 1618 and self._match(TokenType.EXISTS) 1619 ) 1620 1621 def _parse_create(self) -> exp.Create | exp.Command: 1622 # Note: this can't be None because we've matched a statement parser 1623 start = self._prev 1624 comments = self._prev_comments 1625 1626 replace = ( 1627 start.token_type == TokenType.REPLACE 1628 or self._match_pair(TokenType.OR, TokenType.REPLACE) 1629 or self._match_pair(TokenType.OR, TokenType.ALTER) 1630 ) 1631 1632 unique = self._match(TokenType.UNIQUE) 1633 1634 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1635 self._advance() 1636 1637 properties = None 1638 create_token = self._match_set(self.CREATABLES) and self._prev 1639 1640 if not create_token: 1641 # exp.Properties.Location.POST_CREATE 1642 properties = self._parse_properties() 1643 create_token = self._match_set(self.CREATABLES) and self._prev 1644 1645 if not properties or not create_token: 1646 return self._parse_as_command(start) 1647 1648 exists = self._parse_exists(not_=True) 1649 this = None 1650 expression: t.Optional[exp.Expression] = None 1651 indexes = None 1652 no_schema_binding = None 1653 begin = None 1654 end = None 1655 clone = None 1656 1657 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1658 nonlocal properties 1659 if properties and temp_props: 1660 properties.expressions.extend(temp_props.expressions) 1661 elif temp_props: 1662 properties = temp_props 1663 1664 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1665 this = self._parse_user_defined_function(kind=create_token.token_type) 1666 1667 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1668 extend_props(self._parse_properties()) 1669 1670 expression = self._match(TokenType.ALIAS) and self._parse_heredoc() 1671 extend_props(self._parse_properties()) 1672 1673 if not expression: 1674 if self._match(TokenType.COMMAND): 1675 expression = self._parse_as_command(self._prev) 1676 else: 1677 begin = self._match(TokenType.BEGIN) 1678 return_ = self._match_text_seq("RETURN") 1679 1680 if self._match(TokenType.STRING, advance=False): 1681 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 1682 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 1683 expression = self._parse_string() 1684 extend_props(self._parse_properties()) 1685 else: 1686 expression = self._parse_statement() 1687 1688 end = self._match_text_seq("END") 1689 1690 if return_: 1691 expression = self.expression(exp.Return, this=expression) 1692 elif create_token.token_type == TokenType.INDEX: 1693 # Postgres allows anonymous indexes, eg. CREATE INDEX IF NOT EXISTS ON t(c) 1694 if not self._match(TokenType.ON): 1695 index = self._parse_id_var() 1696 anonymous = False 1697 else: 1698 index = None 1699 anonymous = True 1700 1701 this = self._parse_index(index=index, anonymous=anonymous) 1702 elif create_token.token_type in self.DB_CREATABLES: 1703 table_parts = self._parse_table_parts( 1704 schema=True, is_db_reference=create_token.token_type == TokenType.SCHEMA 1705 ) 1706 1707 # exp.Properties.Location.POST_NAME 1708 self._match(TokenType.COMMA) 1709 extend_props(self._parse_properties(before=True)) 1710 1711 this = self._parse_schema(this=table_parts) 1712 1713 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1714 extend_props(self._parse_properties()) 1715 1716 self._match(TokenType.ALIAS) 1717 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1718 # exp.Properties.Location.POST_ALIAS 1719 extend_props(self._parse_properties()) 1720 1721 if create_token.token_type == TokenType.SEQUENCE: 1722 expression = self._parse_types() 1723 extend_props(self._parse_properties()) 1724 else: 1725 expression = self._parse_ddl_select() 1726 1727 if create_token.token_type == TokenType.TABLE: 1728 # exp.Properties.Location.POST_EXPRESSION 1729 extend_props(self._parse_properties()) 1730 1731 indexes = [] 1732 while True: 1733 index = self._parse_index() 1734 1735 # exp.Properties.Location.POST_INDEX 1736 extend_props(self._parse_properties()) 1737 1738 if not index: 1739 break 1740 else: 1741 self._match(TokenType.COMMA) 1742 indexes.append(index) 1743 elif create_token.token_type == TokenType.VIEW: 1744 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1745 no_schema_binding = True 1746 1747 shallow = self._match_text_seq("SHALLOW") 1748 1749 if self._match_texts(self.CLONE_KEYWORDS): 1750 copy = self._prev.text.lower() == "copy" 1751 clone = self.expression( 1752 exp.Clone, this=self._parse_table(schema=True), shallow=shallow, copy=copy 1753 ) 1754 1755 if self._curr and not self._match_set((TokenType.R_PAREN, TokenType.COMMA), advance=False): 1756 return self._parse_as_command(start) 1757 1758 return self.expression( 1759 exp.Create, 1760 comments=comments, 1761 this=this, 1762 kind=create_token.text.upper(), 1763 replace=replace, 1764 unique=unique, 1765 expression=expression, 1766 exists=exists, 1767 properties=properties, 1768 indexes=indexes, 1769 no_schema_binding=no_schema_binding, 1770 begin=begin, 1771 end=end, 1772 clone=clone, 1773 ) 1774 1775 def _parse_sequence_properties(self) -> t.Optional[exp.SequenceProperties]: 1776 seq = exp.SequenceProperties() 1777 1778 options = [] 1779 index = self._index 1780 1781 while self._curr: 1782 self._match(TokenType.COMMA) 1783 if self._match_text_seq("INCREMENT"): 1784 self._match_text_seq("BY") 1785 self._match_text_seq("=") 1786 seq.set("increment", self._parse_term()) 1787 elif self._match_text_seq("MINVALUE"): 1788 seq.set("minvalue", self._parse_term()) 1789 elif self._match_text_seq("MAXVALUE"): 1790 seq.set("maxvalue", self._parse_term()) 1791 elif self._match(TokenType.START_WITH) or self._match_text_seq("START"): 1792 self._match_text_seq("=") 1793 seq.set("start", self._parse_term()) 1794 elif self._match_text_seq("CACHE"): 1795 # T-SQL allows empty CACHE which is initialized dynamically 1796 seq.set("cache", self._parse_number() or True) 1797 elif self._match_text_seq("OWNED", "BY"): 1798 # "OWNED BY NONE" is the default 1799 seq.set("owned", None if self._match_text_seq("NONE") else self._parse_column()) 1800 else: 1801 opt = self._parse_var_from_options(self.CREATE_SEQUENCE, raise_unmatched=False) 1802 if opt: 1803 options.append(opt) 1804 else: 1805 break 1806 1807 seq.set("options", options if options else None) 1808 return None if self._index == index else seq 1809 1810 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1811 # only used for teradata currently 1812 self._match(TokenType.COMMA) 1813 1814 kwargs = { 1815 "no": self._match_text_seq("NO"), 1816 "dual": self._match_text_seq("DUAL"), 1817 "before": self._match_text_seq("BEFORE"), 1818 "default": self._match_text_seq("DEFAULT"), 1819 "local": (self._match_text_seq("LOCAL") and "LOCAL") 1820 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 1821 "after": self._match_text_seq("AFTER"), 1822 "minimum": self._match_texts(("MIN", "MINIMUM")), 1823 "maximum": self._match_texts(("MAX", "MAXIMUM")), 1824 } 1825 1826 if self._match_texts(self.PROPERTY_PARSERS): 1827 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 1828 try: 1829 return parser(self, **{k: v for k, v in kwargs.items() if v}) 1830 except TypeError: 1831 self.raise_error(f"Cannot parse property '{self._prev.text}'") 1832 1833 return None 1834 1835 def _parse_wrapped_properties(self) -> t.List[exp.Expression]: 1836 return self._parse_wrapped_csv(self._parse_property) 1837 1838 def _parse_property(self) -> t.Optional[exp.Expression]: 1839 if self._match_texts(self.PROPERTY_PARSERS): 1840 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1841 1842 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 1843 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 1844 1845 if self._match_text_seq("COMPOUND", "SORTKEY"): 1846 return self._parse_sortkey(compound=True) 1847 1848 if self._match_text_seq("SQL", "SECURITY"): 1849 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1850 1851 index = self._index 1852 key = self._parse_column() 1853 1854 if not self._match(TokenType.EQ): 1855 self._retreat(index) 1856 return self._parse_sequence_properties() 1857 1858 # Transform the key to exp.Dot if it's dotted identifiers wrapped in exp.Column or to exp.Var otherwise 1859 if isinstance(key, exp.Column): 1860 key = key.to_dot() if len(key.parts) > 1 else exp.var(key.name) 1861 1862 value = self._parse_bitwise() or self._parse_var(any_token=True) 1863 1864 # Transform the value to exp.Var if it was parsed as exp.Column(exp.Identifier()) 1865 if isinstance(value, exp.Column): 1866 value = exp.var(value.name) 1867 1868 return self.expression(exp.Property, this=key, value=value) 1869 1870 def _parse_stored(self) -> exp.FileFormatProperty: 1871 self._match(TokenType.ALIAS) 1872 1873 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 1874 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 1875 1876 return self.expression( 1877 exp.FileFormatProperty, 1878 this=( 1879 self.expression( 1880 exp.InputOutputFormat, input_format=input_format, output_format=output_format 1881 ) 1882 if input_format or output_format 1883 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var() 1884 ), 1885 ) 1886 1887 def _parse_unquoted_field(self) -> t.Optional[exp.Expression]: 1888 field = self._parse_field() 1889 if isinstance(field, exp.Identifier) and not field.quoted: 1890 field = exp.var(field) 1891 1892 return field 1893 1894 def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E: 1895 self._match(TokenType.EQ) 1896 self._match(TokenType.ALIAS) 1897 1898 return self.expression(exp_class, this=self._parse_unquoted_field(), **kwargs) 1899 1900 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 1901 properties = [] 1902 while True: 1903 if before: 1904 prop = self._parse_property_before() 1905 else: 1906 prop = self._parse_property() 1907 if not prop: 1908 break 1909 for p in ensure_list(prop): 1910 properties.append(p) 1911 1912 if properties: 1913 return self.expression(exp.Properties, expressions=properties) 1914 1915 return None 1916 1917 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 1918 return self.expression( 1919 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 1920 ) 1921 1922 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 1923 if self._index >= 2: 1924 pre_volatile_token = self._tokens[self._index - 2] 1925 else: 1926 pre_volatile_token = None 1927 1928 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 1929 return exp.VolatileProperty() 1930 1931 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 1932 1933 def _parse_retention_period(self) -> exp.Var: 1934 # Parse TSQL's HISTORY_RETENTION_PERIOD: {INFINITE | <number> DAY | DAYS | MONTH ...} 1935 number = self._parse_number() 1936 number_str = f"{number} " if number else "" 1937 unit = self._parse_var(any_token=True) 1938 return exp.var(f"{number_str}{unit}") 1939 1940 def _parse_system_versioning_property( 1941 self, with_: bool = False 1942 ) -> exp.WithSystemVersioningProperty: 1943 self._match(TokenType.EQ) 1944 prop = self.expression( 1945 exp.WithSystemVersioningProperty, 1946 **{ # type: ignore 1947 "on": True, 1948 "with": with_, 1949 }, 1950 ) 1951 1952 if self._match_text_seq("OFF"): 1953 prop.set("on", False) 1954 return prop 1955 1956 self._match(TokenType.ON) 1957 if self._match(TokenType.L_PAREN): 1958 while self._curr and not self._match(TokenType.R_PAREN): 1959 if self._match_text_seq("HISTORY_TABLE", "="): 1960 prop.set("this", self._parse_table_parts()) 1961 elif self._match_text_seq("DATA_CONSISTENCY_CHECK", "="): 1962 prop.set("data_consistency", self._advance_any() and self._prev.text.upper()) 1963 elif self._match_text_seq("HISTORY_RETENTION_PERIOD", "="): 1964 prop.set("retention_period", self._parse_retention_period()) 1965 1966 self._match(TokenType.COMMA) 1967 1968 return prop 1969 1970 def _parse_data_deletion_property(self) -> exp.DataDeletionProperty: 1971 self._match(TokenType.EQ) 1972 on = self._match_text_seq("ON") or not self._match_text_seq("OFF") 1973 prop = self.expression(exp.DataDeletionProperty, on=on) 1974 1975 if self._match(TokenType.L_PAREN): 1976 while self._curr and not self._match(TokenType.R_PAREN): 1977 if self._match_text_seq("FILTER_COLUMN", "="): 1978 prop.set("filter_column", self._parse_column()) 1979 elif self._match_text_seq("RETENTION_PERIOD", "="): 1980 prop.set("retention_period", self._parse_retention_period()) 1981 1982 self._match(TokenType.COMMA) 1983 1984 return prop 1985 1986 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 1987 if self._match_text_seq("(", "SYSTEM_VERSIONING"): 1988 prop = self._parse_system_versioning_property(with_=True) 1989 self._match_r_paren() 1990 return prop 1991 1992 if self._match(TokenType.L_PAREN, advance=False): 1993 return self._parse_wrapped_properties() 1994 1995 if self._match_text_seq("JOURNAL"): 1996 return self._parse_withjournaltable() 1997 1998 if self._match_texts(self.VIEW_ATTRIBUTES): 1999 return self.expression(exp.ViewAttributeProperty, this=self._prev.text.upper()) 2000 2001 if self._match_text_seq("DATA"): 2002 return self._parse_withdata(no=False) 2003 elif self._match_text_seq("NO", "DATA"): 2004 return self._parse_withdata(no=True) 2005 2006 if self._match(TokenType.SERDE_PROPERTIES, advance=False): 2007 return self._parse_serde_properties(with_=True) 2008 2009 if not self._next: 2010 return None 2011 2012 return self._parse_withisolatedloading() 2013 2014 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 2015 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 2016 self._match(TokenType.EQ) 2017 2018 user = self._parse_id_var() 2019 self._match(TokenType.PARAMETER) 2020 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 2021 2022 if not user or not host: 2023 return None 2024 2025 return exp.DefinerProperty(this=f"{user}@{host}") 2026 2027 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 2028 self._match(TokenType.TABLE) 2029 self._match(TokenType.EQ) 2030 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 2031 2032 def _parse_log(self, no: bool = False) -> exp.LogProperty: 2033 return self.expression(exp.LogProperty, no=no) 2034 2035 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 2036 return self.expression(exp.JournalProperty, **kwargs) 2037 2038 def _parse_checksum(self) -> exp.ChecksumProperty: 2039 self._match(TokenType.EQ) 2040 2041 on = None 2042 if self._match(TokenType.ON): 2043 on = True 2044 elif self._match_text_seq("OFF"): 2045 on = False 2046 2047 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 2048 2049 def _parse_cluster(self, wrapped: bool = False) -> exp.Cluster: 2050 return self.expression( 2051 exp.Cluster, 2052 expressions=( 2053 self._parse_wrapped_csv(self._parse_ordered) 2054 if wrapped 2055 else self._parse_csv(self._parse_ordered) 2056 ), 2057 ) 2058 2059 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 2060 self._match_text_seq("BY") 2061 2062 self._match_l_paren() 2063 expressions = self._parse_csv(self._parse_column) 2064 self._match_r_paren() 2065 2066 if self._match_text_seq("SORTED", "BY"): 2067 self._match_l_paren() 2068 sorted_by = self._parse_csv(self._parse_ordered) 2069 self._match_r_paren() 2070 else: 2071 sorted_by = None 2072 2073 self._match(TokenType.INTO) 2074 buckets = self._parse_number() 2075 self._match_text_seq("BUCKETS") 2076 2077 return self.expression( 2078 exp.ClusteredByProperty, 2079 expressions=expressions, 2080 sorted_by=sorted_by, 2081 buckets=buckets, 2082 ) 2083 2084 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 2085 if not self._match_text_seq("GRANTS"): 2086 self._retreat(self._index - 1) 2087 return None 2088 2089 return self.expression(exp.CopyGrantsProperty) 2090 2091 def _parse_freespace(self) -> exp.FreespaceProperty: 2092 self._match(TokenType.EQ) 2093 return self.expression( 2094 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 2095 ) 2096 2097 def _parse_mergeblockratio( 2098 self, no: bool = False, default: bool = False 2099 ) -> exp.MergeBlockRatioProperty: 2100 if self._match(TokenType.EQ): 2101 return self.expression( 2102 exp.MergeBlockRatioProperty, 2103 this=self._parse_number(), 2104 percent=self._match(TokenType.PERCENT), 2105 ) 2106 2107 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 2108 2109 def _parse_datablocksize( 2110 self, 2111 default: t.Optional[bool] = None, 2112 minimum: t.Optional[bool] = None, 2113 maximum: t.Optional[bool] = None, 2114 ) -> exp.DataBlocksizeProperty: 2115 self._match(TokenType.EQ) 2116 size = self._parse_number() 2117 2118 units = None 2119 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 2120 units = self._prev.text 2121 2122 return self.expression( 2123 exp.DataBlocksizeProperty, 2124 size=size, 2125 units=units, 2126 default=default, 2127 minimum=minimum, 2128 maximum=maximum, 2129 ) 2130 2131 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 2132 self._match(TokenType.EQ) 2133 always = self._match_text_seq("ALWAYS") 2134 manual = self._match_text_seq("MANUAL") 2135 never = self._match_text_seq("NEVER") 2136 default = self._match_text_seq("DEFAULT") 2137 2138 autotemp = None 2139 if self._match_text_seq("AUTOTEMP"): 2140 autotemp = self._parse_schema() 2141 2142 return self.expression( 2143 exp.BlockCompressionProperty, 2144 always=always, 2145 manual=manual, 2146 never=never, 2147 default=default, 2148 autotemp=autotemp, 2149 ) 2150 2151 def _parse_withisolatedloading(self) -> t.Optional[exp.IsolatedLoadingProperty]: 2152 index = self._index 2153 no = self._match_text_seq("NO") 2154 concurrent = self._match_text_seq("CONCURRENT") 2155 2156 if not self._match_text_seq("ISOLATED", "LOADING"): 2157 self._retreat(index) 2158 return None 2159 2160 target = self._parse_var_from_options(self.ISOLATED_LOADING_OPTIONS, raise_unmatched=False) 2161 return self.expression( 2162 exp.IsolatedLoadingProperty, no=no, concurrent=concurrent, target=target 2163 ) 2164 2165 def _parse_locking(self) -> exp.LockingProperty: 2166 if self._match(TokenType.TABLE): 2167 kind = "TABLE" 2168 elif self._match(TokenType.VIEW): 2169 kind = "VIEW" 2170 elif self._match(TokenType.ROW): 2171 kind = "ROW" 2172 elif self._match_text_seq("DATABASE"): 2173 kind = "DATABASE" 2174 else: 2175 kind = None 2176 2177 if kind in ("DATABASE", "TABLE", "VIEW"): 2178 this = self._parse_table_parts() 2179 else: 2180 this = None 2181 2182 if self._match(TokenType.FOR): 2183 for_or_in = "FOR" 2184 elif self._match(TokenType.IN): 2185 for_or_in = "IN" 2186 else: 2187 for_or_in = None 2188 2189 if self._match_text_seq("ACCESS"): 2190 lock_type = "ACCESS" 2191 elif self._match_texts(("EXCL", "EXCLUSIVE")): 2192 lock_type = "EXCLUSIVE" 2193 elif self._match_text_seq("SHARE"): 2194 lock_type = "SHARE" 2195 elif self._match_text_seq("READ"): 2196 lock_type = "READ" 2197 elif self._match_text_seq("WRITE"): 2198 lock_type = "WRITE" 2199 elif self._match_text_seq("CHECKSUM"): 2200 lock_type = "CHECKSUM" 2201 else: 2202 lock_type = None 2203 2204 override = self._match_text_seq("OVERRIDE") 2205 2206 return self.expression( 2207 exp.LockingProperty, 2208 this=this, 2209 kind=kind, 2210 for_or_in=for_or_in, 2211 lock_type=lock_type, 2212 override=override, 2213 ) 2214 2215 def _parse_partition_by(self) -> t.List[exp.Expression]: 2216 if self._match(TokenType.PARTITION_BY): 2217 return self._parse_csv(self._parse_assignment) 2218 return [] 2219 2220 def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec: 2221 def _parse_partition_bound_expr() -> t.Optional[exp.Expression]: 2222 if self._match_text_seq("MINVALUE"): 2223 return exp.var("MINVALUE") 2224 if self._match_text_seq("MAXVALUE"): 2225 return exp.var("MAXVALUE") 2226 return self._parse_bitwise() 2227 2228 this: t.Optional[exp.Expression | t.List[exp.Expression]] = None 2229 expression = None 2230 from_expressions = None 2231 to_expressions = None 2232 2233 if self._match(TokenType.IN): 2234 this = self._parse_wrapped_csv(self._parse_bitwise) 2235 elif self._match(TokenType.FROM): 2236 from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2237 self._match_text_seq("TO") 2238 to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2239 elif self._match_text_seq("WITH", "(", "MODULUS"): 2240 this = self._parse_number() 2241 self._match_text_seq(",", "REMAINDER") 2242 expression = self._parse_number() 2243 self._match_r_paren() 2244 else: 2245 self.raise_error("Failed to parse partition bound spec.") 2246 2247 return self.expression( 2248 exp.PartitionBoundSpec, 2249 this=this, 2250 expression=expression, 2251 from_expressions=from_expressions, 2252 to_expressions=to_expressions, 2253 ) 2254 2255 # https://www.postgresql.org/docs/current/sql-createtable.html 2256 def _parse_partitioned_of(self) -> t.Optional[exp.PartitionedOfProperty]: 2257 if not self._match_text_seq("OF"): 2258 self._retreat(self._index - 1) 2259 return None 2260 2261 this = self._parse_table(schema=True) 2262 2263 if self._match(TokenType.DEFAULT): 2264 expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT") 2265 elif self._match_text_seq("FOR", "VALUES"): 2266 expression = self._parse_partition_bound_spec() 2267 else: 2268 self.raise_error("Expecting either DEFAULT or FOR VALUES clause.") 2269 2270 return self.expression(exp.PartitionedOfProperty, this=this, expression=expression) 2271 2272 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 2273 self._match(TokenType.EQ) 2274 return self.expression( 2275 exp.PartitionedByProperty, 2276 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 2277 ) 2278 2279 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 2280 if self._match_text_seq("AND", "STATISTICS"): 2281 statistics = True 2282 elif self._match_text_seq("AND", "NO", "STATISTICS"): 2283 statistics = False 2284 else: 2285 statistics = None 2286 2287 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 2288 2289 def _parse_contains_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2290 if self._match_text_seq("SQL"): 2291 return self.expression(exp.SqlReadWriteProperty, this="CONTAINS SQL") 2292 return None 2293 2294 def _parse_modifies_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2295 if self._match_text_seq("SQL", "DATA"): 2296 return self.expression(exp.SqlReadWriteProperty, this="MODIFIES SQL DATA") 2297 return None 2298 2299 def _parse_no_property(self) -> t.Optional[exp.Expression]: 2300 if self._match_text_seq("PRIMARY", "INDEX"): 2301 return exp.NoPrimaryIndexProperty() 2302 if self._match_text_seq("SQL"): 2303 return self.expression(exp.SqlReadWriteProperty, this="NO SQL") 2304 return None 2305 2306 def _parse_on_property(self) -> t.Optional[exp.Expression]: 2307 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 2308 return exp.OnCommitProperty() 2309 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 2310 return exp.OnCommitProperty(delete=True) 2311 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 2312 2313 def _parse_reads_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2314 if self._match_text_seq("SQL", "DATA"): 2315 return self.expression(exp.SqlReadWriteProperty, this="READS SQL DATA") 2316 return None 2317 2318 def _parse_distkey(self) -> exp.DistKeyProperty: 2319 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 2320 2321 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 2322 table = self._parse_table(schema=True) 2323 2324 options = [] 2325 while self._match_texts(("INCLUDING", "EXCLUDING")): 2326 this = self._prev.text.upper() 2327 2328 id_var = self._parse_id_var() 2329 if not id_var: 2330 return None 2331 2332 options.append( 2333 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 2334 ) 2335 2336 return self.expression(exp.LikeProperty, this=table, expressions=options) 2337 2338 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 2339 return self.expression( 2340 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 2341 ) 2342 2343 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 2344 self._match(TokenType.EQ) 2345 return self.expression( 2346 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 2347 ) 2348 2349 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 2350 self._match_text_seq("WITH", "CONNECTION") 2351 return self.expression( 2352 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 2353 ) 2354 2355 def _parse_returns(self) -> exp.ReturnsProperty: 2356 value: t.Optional[exp.Expression] 2357 null = None 2358 is_table = self._match(TokenType.TABLE) 2359 2360 if is_table: 2361 if self._match(TokenType.LT): 2362 value = self.expression( 2363 exp.Schema, 2364 this="TABLE", 2365 expressions=self._parse_csv(self._parse_struct_types), 2366 ) 2367 if not self._match(TokenType.GT): 2368 self.raise_error("Expecting >") 2369 else: 2370 value = self._parse_schema(exp.var("TABLE")) 2371 elif self._match_text_seq("NULL", "ON", "NULL", "INPUT"): 2372 null = True 2373 value = None 2374 else: 2375 value = self._parse_types() 2376 2377 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table, null=null) 2378 2379 def _parse_describe(self) -> exp.Describe: 2380 kind = self._match_set(self.CREATABLES) and self._prev.text 2381 style = self._match_texts(("EXTENDED", "FORMATTED", "HISTORY")) and self._prev.text.upper() 2382 if self._match(TokenType.DOT): 2383 style = None 2384 self._retreat(self._index - 2) 2385 this = self._parse_table(schema=True) 2386 properties = self._parse_properties() 2387 expressions = properties.expressions if properties else None 2388 return self.expression( 2389 exp.Describe, this=this, style=style, kind=kind, expressions=expressions 2390 ) 2391 2392 def _parse_insert(self) -> exp.Insert: 2393 comments = ensure_list(self._prev_comments) 2394 hint = self._parse_hint() 2395 overwrite = self._match(TokenType.OVERWRITE) 2396 ignore = self._match(TokenType.IGNORE) 2397 local = self._match_text_seq("LOCAL") 2398 alternative = None 2399 is_function = None 2400 2401 if self._match_text_seq("DIRECTORY"): 2402 this: t.Optional[exp.Expression] = self.expression( 2403 exp.Directory, 2404 this=self._parse_var_or_string(), 2405 local=local, 2406 row_format=self._parse_row_format(match_row=True), 2407 ) 2408 else: 2409 if self._match(TokenType.OR): 2410 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 2411 2412 self._match(TokenType.INTO) 2413 comments += ensure_list(self._prev_comments) 2414 self._match(TokenType.TABLE) 2415 is_function = self._match(TokenType.FUNCTION) 2416 2417 this = ( 2418 self._parse_table(schema=True, parse_partition=True) 2419 if not is_function 2420 else self._parse_function() 2421 ) 2422 2423 returning = self._parse_returning() 2424 2425 return self.expression( 2426 exp.Insert, 2427 comments=comments, 2428 hint=hint, 2429 is_function=is_function, 2430 this=this, 2431 stored=self._match_text_seq("STORED") and self._parse_stored(), 2432 by_name=self._match_text_seq("BY", "NAME"), 2433 exists=self._parse_exists(), 2434 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) and self._parse_assignment(), 2435 expression=self._parse_derived_table_values() or self._parse_ddl_select(), 2436 conflict=self._parse_on_conflict(), 2437 returning=returning or self._parse_returning(), 2438 overwrite=overwrite, 2439 alternative=alternative, 2440 ignore=ignore, 2441 ) 2442 2443 def _parse_kill(self) -> exp.Kill: 2444 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 2445 2446 return self.expression( 2447 exp.Kill, 2448 this=self._parse_primary(), 2449 kind=kind, 2450 ) 2451 2452 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 2453 conflict = self._match_text_seq("ON", "CONFLICT") 2454 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 2455 2456 if not conflict and not duplicate: 2457 return None 2458 2459 conflict_keys = None 2460 constraint = None 2461 2462 if conflict: 2463 if self._match_text_seq("ON", "CONSTRAINT"): 2464 constraint = self._parse_id_var() 2465 elif self._match(TokenType.L_PAREN): 2466 conflict_keys = self._parse_csv(self._parse_id_var) 2467 self._match_r_paren() 2468 2469 action = self._parse_var_from_options(self.CONFLICT_ACTIONS) 2470 if self._prev.token_type == TokenType.UPDATE: 2471 self._match(TokenType.SET) 2472 expressions = self._parse_csv(self._parse_equality) 2473 else: 2474 expressions = None 2475 2476 return self.expression( 2477 exp.OnConflict, 2478 duplicate=duplicate, 2479 expressions=expressions, 2480 action=action, 2481 conflict_keys=conflict_keys, 2482 constraint=constraint, 2483 ) 2484 2485 def _parse_returning(self) -> t.Optional[exp.Returning]: 2486 if not self._match(TokenType.RETURNING): 2487 return None 2488 return self.expression( 2489 exp.Returning, 2490 expressions=self._parse_csv(self._parse_expression), 2491 into=self._match(TokenType.INTO) and self._parse_table_part(), 2492 ) 2493 2494 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2495 if not self._match(TokenType.FORMAT): 2496 return None 2497 return self._parse_row_format() 2498 2499 def _parse_serde_properties(self, with_: bool = False) -> t.Optional[exp.SerdeProperties]: 2500 index = self._index 2501 with_ = with_ or self._match_text_seq("WITH") 2502 2503 if not self._match(TokenType.SERDE_PROPERTIES): 2504 self._retreat(index) 2505 return None 2506 return self.expression( 2507 exp.SerdeProperties, 2508 **{ # type: ignore 2509 "expressions": self._parse_wrapped_properties(), 2510 "with": with_, 2511 }, 2512 ) 2513 2514 def _parse_row_format( 2515 self, match_row: bool = False 2516 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2517 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 2518 return None 2519 2520 if self._match_text_seq("SERDE"): 2521 this = self._parse_string() 2522 2523 serde_properties = self._parse_serde_properties() 2524 2525 return self.expression( 2526 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 2527 ) 2528 2529 self._match_text_seq("DELIMITED") 2530 2531 kwargs = {} 2532 2533 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 2534 kwargs["fields"] = self._parse_string() 2535 if self._match_text_seq("ESCAPED", "BY"): 2536 kwargs["escaped"] = self._parse_string() 2537 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 2538 kwargs["collection_items"] = self._parse_string() 2539 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 2540 kwargs["map_keys"] = self._parse_string() 2541 if self._match_text_seq("LINES", "TERMINATED", "BY"): 2542 kwargs["lines"] = self._parse_string() 2543 if self._match_text_seq("NULL", "DEFINED", "AS"): 2544 kwargs["null"] = self._parse_string() 2545 2546 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 2547 2548 def _parse_load(self) -> exp.LoadData | exp.Command: 2549 if self._match_text_seq("DATA"): 2550 local = self._match_text_seq("LOCAL") 2551 self._match_text_seq("INPATH") 2552 inpath = self._parse_string() 2553 overwrite = self._match(TokenType.OVERWRITE) 2554 self._match_pair(TokenType.INTO, TokenType.TABLE) 2555 2556 return self.expression( 2557 exp.LoadData, 2558 this=self._parse_table(schema=True), 2559 local=local, 2560 overwrite=overwrite, 2561 inpath=inpath, 2562 partition=self._parse_partition(), 2563 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 2564 serde=self._match_text_seq("SERDE") and self._parse_string(), 2565 ) 2566 return self._parse_as_command(self._prev) 2567 2568 def _parse_delete(self) -> exp.Delete: 2569 # This handles MySQL's "Multiple-Table Syntax" 2570 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 2571 tables = None 2572 comments = self._prev_comments 2573 if not self._match(TokenType.FROM, advance=False): 2574 tables = self._parse_csv(self._parse_table) or None 2575 2576 returning = self._parse_returning() 2577 2578 return self.expression( 2579 exp.Delete, 2580 comments=comments, 2581 tables=tables, 2582 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 2583 using=self._match(TokenType.USING) and self._parse_table(joins=True), 2584 where=self._parse_where(), 2585 returning=returning or self._parse_returning(), 2586 limit=self._parse_limit(), 2587 ) 2588 2589 def _parse_update(self) -> exp.Update: 2590 comments = self._prev_comments 2591 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 2592 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 2593 returning = self._parse_returning() 2594 return self.expression( 2595 exp.Update, 2596 comments=comments, 2597 **{ # type: ignore 2598 "this": this, 2599 "expressions": expressions, 2600 "from": self._parse_from(joins=True), 2601 "where": self._parse_where(), 2602 "returning": returning or self._parse_returning(), 2603 "order": self._parse_order(), 2604 "limit": self._parse_limit(), 2605 }, 2606 ) 2607 2608 def _parse_uncache(self) -> exp.Uncache: 2609 if not self._match(TokenType.TABLE): 2610 self.raise_error("Expecting TABLE after UNCACHE") 2611 2612 return self.expression( 2613 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 2614 ) 2615 2616 def _parse_cache(self) -> exp.Cache: 2617 lazy = self._match_text_seq("LAZY") 2618 self._match(TokenType.TABLE) 2619 table = self._parse_table(schema=True) 2620 2621 options = [] 2622 if self._match_text_seq("OPTIONS"): 2623 self._match_l_paren() 2624 k = self._parse_string() 2625 self._match(TokenType.EQ) 2626 v = self._parse_string() 2627 options = [k, v] 2628 self._match_r_paren() 2629 2630 self._match(TokenType.ALIAS) 2631 return self.expression( 2632 exp.Cache, 2633 this=table, 2634 lazy=lazy, 2635 options=options, 2636 expression=self._parse_select(nested=True), 2637 ) 2638 2639 def _parse_partition(self) -> t.Optional[exp.Partition]: 2640 if not self._match(TokenType.PARTITION): 2641 return None 2642 2643 return self.expression( 2644 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_assignment) 2645 ) 2646 2647 def _parse_value(self) -> t.Optional[exp.Tuple]: 2648 if self._match(TokenType.L_PAREN): 2649 expressions = self._parse_csv(self._parse_expression) 2650 self._match_r_paren() 2651 return self.expression(exp.Tuple, expressions=expressions) 2652 2653 # In some dialects we can have VALUES 1, 2 which results in 1 column & 2 rows. 2654 expression = self._parse_expression() 2655 if expression: 2656 return self.expression(exp.Tuple, expressions=[expression]) 2657 return None 2658 2659 def _parse_projections(self) -> t.List[exp.Expression]: 2660 return self._parse_expressions() 2661 2662 def _parse_select( 2663 self, 2664 nested: bool = False, 2665 table: bool = False, 2666 parse_subquery_alias: bool = True, 2667 parse_set_operation: bool = True, 2668 ) -> t.Optional[exp.Expression]: 2669 cte = self._parse_with() 2670 2671 if cte: 2672 this = self._parse_statement() 2673 2674 if not this: 2675 self.raise_error("Failed to parse any statement following CTE") 2676 return cte 2677 2678 if "with" in this.arg_types: 2679 this.set("with", cte) 2680 else: 2681 self.raise_error(f"{this.key} does not support CTE") 2682 this = cte 2683 2684 return this 2685 2686 # duckdb supports leading with FROM x 2687 from_ = self._parse_from() if self._match(TokenType.FROM, advance=False) else None 2688 2689 if self._match(TokenType.SELECT): 2690 comments = self._prev_comments 2691 2692 hint = self._parse_hint() 2693 all_ = self._match(TokenType.ALL) 2694 distinct = self._match_set(self.DISTINCT_TOKENS) 2695 2696 kind = ( 2697 self._match(TokenType.ALIAS) 2698 and self._match_texts(("STRUCT", "VALUE")) 2699 and self._prev.text.upper() 2700 ) 2701 2702 if distinct: 2703 distinct = self.expression( 2704 exp.Distinct, 2705 on=self._parse_value() if self._match(TokenType.ON) else None, 2706 ) 2707 2708 if all_ and distinct: 2709 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 2710 2711 limit = self._parse_limit(top=True) 2712 projections = self._parse_projections() 2713 2714 this = self.expression( 2715 exp.Select, 2716 kind=kind, 2717 hint=hint, 2718 distinct=distinct, 2719 expressions=projections, 2720 limit=limit, 2721 ) 2722 this.comments = comments 2723 2724 into = self._parse_into() 2725 if into: 2726 this.set("into", into) 2727 2728 if not from_: 2729 from_ = self._parse_from() 2730 2731 if from_: 2732 this.set("from", from_) 2733 2734 this = self._parse_query_modifiers(this) 2735 elif (table or nested) and self._match(TokenType.L_PAREN): 2736 if self._match(TokenType.PIVOT): 2737 this = self._parse_simplified_pivot() 2738 elif self._match(TokenType.FROM): 2739 this = exp.select("*").from_( 2740 t.cast(exp.From, self._parse_from(skip_from_token=True)) 2741 ) 2742 else: 2743 this = ( 2744 self._parse_table() 2745 if table 2746 else self._parse_select(nested=True, parse_set_operation=False) 2747 ) 2748 this = self._parse_query_modifiers(self._parse_set_operations(this)) 2749 2750 self._match_r_paren() 2751 2752 # We return early here so that the UNION isn't attached to the subquery by the 2753 # following call to _parse_set_operations, but instead becomes the parent node 2754 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 2755 elif self._match(TokenType.VALUES, advance=False): 2756 this = self._parse_derived_table_values() 2757 elif from_: 2758 this = exp.select("*").from_(from_.this, copy=False) 2759 else: 2760 this = None 2761 2762 if parse_set_operation: 2763 return self._parse_set_operations(this) 2764 return this 2765 2766 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 2767 if not skip_with_token and not self._match(TokenType.WITH): 2768 return None 2769 2770 comments = self._prev_comments 2771 recursive = self._match(TokenType.RECURSIVE) 2772 2773 expressions = [] 2774 while True: 2775 expressions.append(self._parse_cte()) 2776 2777 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 2778 break 2779 else: 2780 self._match(TokenType.WITH) 2781 2782 return self.expression( 2783 exp.With, comments=comments, expressions=expressions, recursive=recursive 2784 ) 2785 2786 def _parse_cte(self) -> exp.CTE: 2787 alias = self._parse_table_alias(self.ID_VAR_TOKENS) 2788 if not alias or not alias.this: 2789 self.raise_error("Expected CTE to have alias") 2790 2791 self._match(TokenType.ALIAS) 2792 2793 if self._match_text_seq("NOT", "MATERIALIZED"): 2794 materialized = False 2795 elif self._match_text_seq("MATERIALIZED"): 2796 materialized = True 2797 else: 2798 materialized = None 2799 2800 return self.expression( 2801 exp.CTE, 2802 this=self._parse_wrapped(self._parse_statement), 2803 alias=alias, 2804 materialized=materialized, 2805 ) 2806 2807 def _parse_table_alias( 2808 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2809 ) -> t.Optional[exp.TableAlias]: 2810 any_token = self._match(TokenType.ALIAS) 2811 alias = ( 2812 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2813 or self._parse_string_as_identifier() 2814 ) 2815 2816 index = self._index 2817 if self._match(TokenType.L_PAREN): 2818 columns = self._parse_csv(self._parse_function_parameter) 2819 self._match_r_paren() if columns else self._retreat(index) 2820 else: 2821 columns = None 2822 2823 if not alias and not columns: 2824 return None 2825 2826 table_alias = self.expression(exp.TableAlias, this=alias, columns=columns) 2827 2828 # We bubble up comments from the Identifier to the TableAlias 2829 if isinstance(alias, exp.Identifier): 2830 table_alias.add_comments(alias.pop_comments()) 2831 2832 return table_alias 2833 2834 def _parse_subquery( 2835 self, this: t.Optional[exp.Expression], parse_alias: bool = True 2836 ) -> t.Optional[exp.Subquery]: 2837 if not this: 2838 return None 2839 2840 return self.expression( 2841 exp.Subquery, 2842 this=this, 2843 pivots=self._parse_pivots(), 2844 alias=self._parse_table_alias() if parse_alias else None, 2845 ) 2846 2847 def _implicit_unnests_to_explicit(self, this: E) -> E: 2848 from sqlglot.optimizer.normalize_identifiers import normalize_identifiers as _norm 2849 2850 refs = {_norm(this.args["from"].this.copy(), dialect=self.dialect).alias_or_name} 2851 for i, join in enumerate(this.args.get("joins") or []): 2852 table = join.this 2853 normalized_table = table.copy() 2854 normalized_table.meta["maybe_column"] = True 2855 normalized_table = _norm(normalized_table, dialect=self.dialect) 2856 2857 if isinstance(table, exp.Table) and not join.args.get("on"): 2858 if normalized_table.parts[0].name in refs: 2859 table_as_column = table.to_column() 2860 unnest = exp.Unnest(expressions=[table_as_column]) 2861 2862 # Table.to_column creates a parent Alias node that we want to convert to 2863 # a TableAlias and attach to the Unnest, so it matches the parser's output 2864 if isinstance(table.args.get("alias"), exp.TableAlias): 2865 table_as_column.replace(table_as_column.this) 2866 exp.alias_(unnest, None, table=[table.args["alias"].this], copy=False) 2867 2868 table.replace(unnest) 2869 2870 refs.add(normalized_table.alias_or_name) 2871 2872 return this 2873 2874 def _parse_query_modifiers( 2875 self, this: t.Optional[exp.Expression] 2876 ) -> t.Optional[exp.Expression]: 2877 if isinstance(this, (exp.Query, exp.Table)): 2878 for join in self._parse_joins(): 2879 this.append("joins", join) 2880 for lateral in iter(self._parse_lateral, None): 2881 this.append("laterals", lateral) 2882 2883 while True: 2884 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 2885 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 2886 key, expression = parser(self) 2887 2888 if expression: 2889 this.set(key, expression) 2890 if key == "limit": 2891 offset = expression.args.pop("offset", None) 2892 2893 if offset: 2894 offset = exp.Offset(expression=offset) 2895 this.set("offset", offset) 2896 2897 limit_by_expressions = expression.expressions 2898 expression.set("expressions", None) 2899 offset.set("expressions", limit_by_expressions) 2900 continue 2901 break 2902 2903 if self.SUPPORTS_IMPLICIT_UNNEST and this and "from" in this.args: 2904 this = self._implicit_unnests_to_explicit(this) 2905 2906 return this 2907 2908 def _parse_hint(self) -> t.Optional[exp.Hint]: 2909 if self._match(TokenType.HINT): 2910 hints = [] 2911 for hint in iter( 2912 lambda: self._parse_csv( 2913 lambda: self._parse_function() or self._parse_var(upper=True) 2914 ), 2915 [], 2916 ): 2917 hints.extend(hint) 2918 2919 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 2920 self.raise_error("Expected */ after HINT") 2921 2922 return self.expression(exp.Hint, expressions=hints) 2923 2924 return None 2925 2926 def _parse_into(self) -> t.Optional[exp.Into]: 2927 if not self._match(TokenType.INTO): 2928 return None 2929 2930 temp = self._match(TokenType.TEMPORARY) 2931 unlogged = self._match_text_seq("UNLOGGED") 2932 self._match(TokenType.TABLE) 2933 2934 return self.expression( 2935 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 2936 ) 2937 2938 def _parse_from( 2939 self, joins: bool = False, skip_from_token: bool = False 2940 ) -> t.Optional[exp.From]: 2941 if not skip_from_token and not self._match(TokenType.FROM): 2942 return None 2943 2944 return self.expression( 2945 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 2946 ) 2947 2948 def _parse_match_recognize_measure(self) -> exp.MatchRecognizeMeasure: 2949 return self.expression( 2950 exp.MatchRecognizeMeasure, 2951 window_frame=self._match_texts(("FINAL", "RUNNING")) and self._prev.text.upper(), 2952 this=self._parse_expression(), 2953 ) 2954 2955 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 2956 if not self._match(TokenType.MATCH_RECOGNIZE): 2957 return None 2958 2959 self._match_l_paren() 2960 2961 partition = self._parse_partition_by() 2962 order = self._parse_order() 2963 2964 measures = ( 2965 self._parse_csv(self._parse_match_recognize_measure) 2966 if self._match_text_seq("MEASURES") 2967 else None 2968 ) 2969 2970 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 2971 rows = exp.var("ONE ROW PER MATCH") 2972 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 2973 text = "ALL ROWS PER MATCH" 2974 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 2975 text += " SHOW EMPTY MATCHES" 2976 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 2977 text += " OMIT EMPTY MATCHES" 2978 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 2979 text += " WITH UNMATCHED ROWS" 2980 rows = exp.var(text) 2981 else: 2982 rows = None 2983 2984 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 2985 text = "AFTER MATCH SKIP" 2986 if self._match_text_seq("PAST", "LAST", "ROW"): 2987 text += " PAST LAST ROW" 2988 elif self._match_text_seq("TO", "NEXT", "ROW"): 2989 text += " TO NEXT ROW" 2990 elif self._match_text_seq("TO", "FIRST"): 2991 text += f" TO FIRST {self._advance_any().text}" # type: ignore 2992 elif self._match_text_seq("TO", "LAST"): 2993 text += f" TO LAST {self._advance_any().text}" # type: ignore 2994 after = exp.var(text) 2995 else: 2996 after = None 2997 2998 if self._match_text_seq("PATTERN"): 2999 self._match_l_paren() 3000 3001 if not self._curr: 3002 self.raise_error("Expecting )", self._curr) 3003 3004 paren = 1 3005 start = self._curr 3006 3007 while self._curr and paren > 0: 3008 if self._curr.token_type == TokenType.L_PAREN: 3009 paren += 1 3010 if self._curr.token_type == TokenType.R_PAREN: 3011 paren -= 1 3012 3013 end = self._prev 3014 self._advance() 3015 3016 if paren > 0: 3017 self.raise_error("Expecting )", self._curr) 3018 3019 pattern = exp.var(self._find_sql(start, end)) 3020 else: 3021 pattern = None 3022 3023 define = ( 3024 self._parse_csv(self._parse_name_as_expression) 3025 if self._match_text_seq("DEFINE") 3026 else None 3027 ) 3028 3029 self._match_r_paren() 3030 3031 return self.expression( 3032 exp.MatchRecognize, 3033 partition_by=partition, 3034 order=order, 3035 measures=measures, 3036 rows=rows, 3037 after=after, 3038 pattern=pattern, 3039 define=define, 3040 alias=self._parse_table_alias(), 3041 ) 3042 3043 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 3044 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 3045 if not cross_apply and self._match_pair(TokenType.OUTER, TokenType.APPLY): 3046 cross_apply = False 3047 3048 if cross_apply is not None: 3049 this = self._parse_select(table=True) 3050 view = None 3051 outer = None 3052 elif self._match(TokenType.LATERAL): 3053 this = self._parse_select(table=True) 3054 view = self._match(TokenType.VIEW) 3055 outer = self._match(TokenType.OUTER) 3056 else: 3057 return None 3058 3059 if not this: 3060 this = ( 3061 self._parse_unnest() 3062 or self._parse_function() 3063 or self._parse_id_var(any_token=False) 3064 ) 3065 3066 while self._match(TokenType.DOT): 3067 this = exp.Dot( 3068 this=this, 3069 expression=self._parse_function() or self._parse_id_var(any_token=False), 3070 ) 3071 3072 if view: 3073 table = self._parse_id_var(any_token=False) 3074 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 3075 table_alias: t.Optional[exp.TableAlias] = self.expression( 3076 exp.TableAlias, this=table, columns=columns 3077 ) 3078 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 3079 # We move the alias from the lateral's child node to the lateral itself 3080 table_alias = this.args["alias"].pop() 3081 else: 3082 table_alias = self._parse_table_alias() 3083 3084 return self.expression( 3085 exp.Lateral, 3086 this=this, 3087 view=view, 3088 outer=outer, 3089 alias=table_alias, 3090 cross_apply=cross_apply, 3091 ) 3092 3093 def _parse_join_parts( 3094 self, 3095 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 3096 return ( 3097 self._match_set(self.JOIN_METHODS) and self._prev, 3098 self._match_set(self.JOIN_SIDES) and self._prev, 3099 self._match_set(self.JOIN_KINDS) and self._prev, 3100 ) 3101 3102 def _parse_join( 3103 self, skip_join_token: bool = False, parse_bracket: bool = False 3104 ) -> t.Optional[exp.Join]: 3105 if self._match(TokenType.COMMA): 3106 return self.expression(exp.Join, this=self._parse_table()) 3107 3108 index = self._index 3109 method, side, kind = self._parse_join_parts() 3110 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 3111 join = self._match(TokenType.JOIN) or (kind and kind.token_type == TokenType.STRAIGHT_JOIN) 3112 3113 if not skip_join_token and not join: 3114 self._retreat(index) 3115 kind = None 3116 method = None 3117 side = None 3118 3119 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 3120 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 3121 3122 if not skip_join_token and not join and not outer_apply and not cross_apply: 3123 return None 3124 3125 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 3126 3127 if method: 3128 kwargs["method"] = method.text 3129 if side: 3130 kwargs["side"] = side.text 3131 if kind: 3132 kwargs["kind"] = kind.text 3133 if hint: 3134 kwargs["hint"] = hint 3135 3136 if self._match(TokenType.MATCH_CONDITION): 3137 kwargs["match_condition"] = self._parse_wrapped(self._parse_comparison) 3138 3139 if self._match(TokenType.ON): 3140 kwargs["on"] = self._parse_assignment() 3141 elif self._match(TokenType.USING): 3142 kwargs["using"] = self._parse_wrapped_id_vars() 3143 elif not isinstance(kwargs["this"], exp.Unnest) and not ( 3144 kind and kind.token_type == TokenType.CROSS 3145 ): 3146 index = self._index 3147 joins: t.Optional[list] = list(self._parse_joins()) 3148 3149 if joins and self._match(TokenType.ON): 3150 kwargs["on"] = self._parse_assignment() 3151 elif joins and self._match(TokenType.USING): 3152 kwargs["using"] = self._parse_wrapped_id_vars() 3153 else: 3154 joins = None 3155 self._retreat(index) 3156 3157 kwargs["this"].set("joins", joins if joins else None) 3158 3159 comments = [c for token in (method, side, kind) if token for c in token.comments] 3160 return self.expression(exp.Join, comments=comments, **kwargs) 3161 3162 def _parse_opclass(self) -> t.Optional[exp.Expression]: 3163 this = self._parse_assignment() 3164 3165 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 3166 return this 3167 3168 if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False): 3169 return self.expression(exp.Opclass, this=this, expression=self._parse_table_parts()) 3170 3171 return this 3172 3173 def _parse_index_params(self) -> exp.IndexParameters: 3174 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 3175 3176 if self._match(TokenType.L_PAREN, advance=False): 3177 columns = self._parse_wrapped_csv(self._parse_with_operator) 3178 else: 3179 columns = None 3180 3181 include = self._parse_wrapped_id_vars() if self._match_text_seq("INCLUDE") else None 3182 partition_by = self._parse_partition_by() 3183 with_storage = self._match(TokenType.WITH) and self._parse_wrapped_properties() 3184 tablespace = ( 3185 self._parse_var(any_token=True) 3186 if self._match_text_seq("USING", "INDEX", "TABLESPACE") 3187 else None 3188 ) 3189 where = self._parse_where() 3190 3191 on = self._parse_field() if self._match(TokenType.ON) else None 3192 3193 return self.expression( 3194 exp.IndexParameters, 3195 using=using, 3196 columns=columns, 3197 include=include, 3198 partition_by=partition_by, 3199 where=where, 3200 with_storage=with_storage, 3201 tablespace=tablespace, 3202 on=on, 3203 ) 3204 3205 def _parse_index( 3206 self, index: t.Optional[exp.Expression] = None, anonymous: bool = False 3207 ) -> t.Optional[exp.Index]: 3208 if index or anonymous: 3209 unique = None 3210 primary = None 3211 amp = None 3212 3213 self._match(TokenType.ON) 3214 self._match(TokenType.TABLE) # hive 3215 table = self._parse_table_parts(schema=True) 3216 else: 3217 unique = self._match(TokenType.UNIQUE) 3218 primary = self._match_text_seq("PRIMARY") 3219 amp = self._match_text_seq("AMP") 3220 3221 if not self._match(TokenType.INDEX): 3222 return None 3223 3224 index = self._parse_id_var() 3225 table = None 3226 3227 params = self._parse_index_params() 3228 3229 return self.expression( 3230 exp.Index, 3231 this=index, 3232 table=table, 3233 unique=unique, 3234 primary=primary, 3235 amp=amp, 3236 params=params, 3237 ) 3238 3239 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 3240 hints: t.List[exp.Expression] = [] 3241 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 3242 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 3243 hints.append( 3244 self.expression( 3245 exp.WithTableHint, 3246 expressions=self._parse_csv( 3247 lambda: self._parse_function() or self._parse_var(any_token=True) 3248 ), 3249 ) 3250 ) 3251 self._match_r_paren() 3252 else: 3253 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 3254 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 3255 hint = exp.IndexTableHint(this=self._prev.text.upper()) 3256 3257 self._match_set((TokenType.INDEX, TokenType.KEY)) 3258 if self._match(TokenType.FOR): 3259 hint.set("target", self._advance_any() and self._prev.text.upper()) 3260 3261 hint.set("expressions", self._parse_wrapped_id_vars()) 3262 hints.append(hint) 3263 3264 return hints or None 3265 3266 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 3267 return ( 3268 (not schema and self._parse_function(optional_parens=False)) 3269 or self._parse_id_var(any_token=False) 3270 or self._parse_string_as_identifier() 3271 or self._parse_placeholder() 3272 ) 3273 3274 def _parse_table_parts( 3275 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 3276 ) -> exp.Table: 3277 catalog = None 3278 db = None 3279 table: t.Optional[exp.Expression | str] = self._parse_table_part(schema=schema) 3280 3281 while self._match(TokenType.DOT): 3282 if catalog: 3283 # This allows nesting the table in arbitrarily many dot expressions if needed 3284 table = self.expression( 3285 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 3286 ) 3287 else: 3288 catalog = db 3289 db = table 3290 # "" used for tsql FROM a..b case 3291 table = self._parse_table_part(schema=schema) or "" 3292 3293 if ( 3294 wildcard 3295 and self._is_connected() 3296 and (isinstance(table, exp.Identifier) or not table) 3297 and self._match(TokenType.STAR) 3298 ): 3299 if isinstance(table, exp.Identifier): 3300 table.args["this"] += "*" 3301 else: 3302 table = exp.Identifier(this="*") 3303 3304 # We bubble up comments from the Identifier to the Table 3305 comments = table.pop_comments() if isinstance(table, exp.Expression) else None 3306 3307 if is_db_reference: 3308 catalog = db 3309 db = table 3310 table = None 3311 3312 if not table and not is_db_reference: 3313 self.raise_error(f"Expected table name but got {self._curr}") 3314 if not db and is_db_reference: 3315 self.raise_error(f"Expected database name but got {self._curr}") 3316 3317 return self.expression( 3318 exp.Table, 3319 comments=comments, 3320 this=table, 3321 db=db, 3322 catalog=catalog, 3323 pivots=self._parse_pivots(), 3324 ) 3325 3326 def _parse_table( 3327 self, 3328 schema: bool = False, 3329 joins: bool = False, 3330 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 3331 parse_bracket: bool = False, 3332 is_db_reference: bool = False, 3333 parse_partition: bool = False, 3334 ) -> t.Optional[exp.Expression]: 3335 lateral = self._parse_lateral() 3336 if lateral: 3337 return lateral 3338 3339 unnest = self._parse_unnest() 3340 if unnest: 3341 return unnest 3342 3343 values = self._parse_derived_table_values() 3344 if values: 3345 return values 3346 3347 subquery = self._parse_select(table=True) 3348 if subquery: 3349 if not subquery.args.get("pivots"): 3350 subquery.set("pivots", self._parse_pivots()) 3351 return subquery 3352 3353 bracket = parse_bracket and self._parse_bracket(None) 3354 bracket = self.expression(exp.Table, this=bracket) if bracket else None 3355 3356 only = self._match(TokenType.ONLY) 3357 3358 this = t.cast( 3359 exp.Expression, 3360 bracket 3361 or self._parse_bracket( 3362 self._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 3363 ), 3364 ) 3365 3366 if only: 3367 this.set("only", only) 3368 3369 # Postgres supports a wildcard (table) suffix operator, which is a no-op in this context 3370 self._match_text_seq("*") 3371 3372 parse_partition = parse_partition or self.SUPPORTS_PARTITION_SELECTION 3373 if parse_partition and self._match(TokenType.PARTITION, advance=False): 3374 this.set("partition", self._parse_partition()) 3375 3376 if schema: 3377 return self._parse_schema(this=this) 3378 3379 version = self._parse_version() 3380 3381 if version: 3382 this.set("version", version) 3383 3384 if self.dialect.ALIAS_POST_TABLESAMPLE: 3385 table_sample = self._parse_table_sample() 3386 3387 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3388 if alias: 3389 this.set("alias", alias) 3390 3391 if isinstance(this, exp.Table) and self._match_text_seq("AT"): 3392 return self.expression( 3393 exp.AtIndex, this=this.to_column(copy=False), expression=self._parse_id_var() 3394 ) 3395 3396 this.set("hints", self._parse_table_hints()) 3397 3398 if not this.args.get("pivots"): 3399 this.set("pivots", self._parse_pivots()) 3400 3401 if not self.dialect.ALIAS_POST_TABLESAMPLE: 3402 table_sample = self._parse_table_sample() 3403 3404 if table_sample: 3405 table_sample.set("this", this) 3406 this = table_sample 3407 3408 if joins: 3409 for join in self._parse_joins(): 3410 this.append("joins", join) 3411 3412 if self._match_pair(TokenType.WITH, TokenType.ORDINALITY): 3413 this.set("ordinality", True) 3414 this.set("alias", self._parse_table_alias()) 3415 3416 return this 3417 3418 def _parse_version(self) -> t.Optional[exp.Version]: 3419 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 3420 this = "TIMESTAMP" 3421 elif self._match(TokenType.VERSION_SNAPSHOT): 3422 this = "VERSION" 3423 else: 3424 return None 3425 3426 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 3427 kind = self._prev.text.upper() 3428 start = self._parse_bitwise() 3429 self._match_texts(("TO", "AND")) 3430 end = self._parse_bitwise() 3431 expression: t.Optional[exp.Expression] = self.expression( 3432 exp.Tuple, expressions=[start, end] 3433 ) 3434 elif self._match_text_seq("CONTAINED", "IN"): 3435 kind = "CONTAINED IN" 3436 expression = self.expression( 3437 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 3438 ) 3439 elif self._match(TokenType.ALL): 3440 kind = "ALL" 3441 expression = None 3442 else: 3443 self._match_text_seq("AS", "OF") 3444 kind = "AS OF" 3445 expression = self._parse_type() 3446 3447 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 3448 3449 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 3450 if not self._match(TokenType.UNNEST): 3451 return None 3452 3453 expressions = self._parse_wrapped_csv(self._parse_equality) 3454 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 3455 3456 alias = self._parse_table_alias() if with_alias else None 3457 3458 if alias: 3459 if self.dialect.UNNEST_COLUMN_ONLY: 3460 if alias.args.get("columns"): 3461 self.raise_error("Unexpected extra column alias in unnest.") 3462 3463 alias.set("columns", [alias.this]) 3464 alias.set("this", None) 3465 3466 columns = alias.args.get("columns") or [] 3467 if offset and len(expressions) < len(columns): 3468 offset = columns.pop() 3469 3470 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 3471 self._match(TokenType.ALIAS) 3472 offset = self._parse_id_var( 3473 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 3474 ) or exp.to_identifier("offset") 3475 3476 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 3477 3478 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 3479 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 3480 if not is_derived and not self._match_text_seq("VALUES"): 3481 return None 3482 3483 expressions = self._parse_csv(self._parse_value) 3484 alias = self._parse_table_alias() 3485 3486 if is_derived: 3487 self._match_r_paren() 3488 3489 return self.expression( 3490 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 3491 ) 3492 3493 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 3494 if not self._match(TokenType.TABLE_SAMPLE) and not ( 3495 as_modifier and self._match_text_seq("USING", "SAMPLE") 3496 ): 3497 return None 3498 3499 bucket_numerator = None 3500 bucket_denominator = None 3501 bucket_field = None 3502 percent = None 3503 size = None 3504 seed = None 3505 3506 method = self._parse_var(tokens=(TokenType.ROW,), upper=True) 3507 matched_l_paren = self._match(TokenType.L_PAREN) 3508 3509 if self.TABLESAMPLE_CSV: 3510 num = None 3511 expressions = self._parse_csv(self._parse_primary) 3512 else: 3513 expressions = None 3514 num = ( 3515 self._parse_factor() 3516 if self._match(TokenType.NUMBER, advance=False) 3517 else self._parse_primary() or self._parse_placeholder() 3518 ) 3519 3520 if self._match_text_seq("BUCKET"): 3521 bucket_numerator = self._parse_number() 3522 self._match_text_seq("OUT", "OF") 3523 bucket_denominator = bucket_denominator = self._parse_number() 3524 self._match(TokenType.ON) 3525 bucket_field = self._parse_field() 3526 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 3527 percent = num 3528 elif self._match(TokenType.ROWS) or not self.dialect.TABLESAMPLE_SIZE_IS_PERCENT: 3529 size = num 3530 else: 3531 percent = num 3532 3533 if matched_l_paren: 3534 self._match_r_paren() 3535 3536 if self._match(TokenType.L_PAREN): 3537 method = self._parse_var(upper=True) 3538 seed = self._match(TokenType.COMMA) and self._parse_number() 3539 self._match_r_paren() 3540 elif self._match_texts(("SEED", "REPEATABLE")): 3541 seed = self._parse_wrapped(self._parse_number) 3542 3543 if not method and self.DEFAULT_SAMPLING_METHOD: 3544 method = exp.var(self.DEFAULT_SAMPLING_METHOD) 3545 3546 return self.expression( 3547 exp.TableSample, 3548 expressions=expressions, 3549 method=method, 3550 bucket_numerator=bucket_numerator, 3551 bucket_denominator=bucket_denominator, 3552 bucket_field=bucket_field, 3553 percent=percent, 3554 size=size, 3555 seed=seed, 3556 ) 3557 3558 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 3559 return list(iter(self._parse_pivot, None)) or None 3560 3561 def _parse_joins(self) -> t.Iterator[exp.Join]: 3562 return iter(self._parse_join, None) 3563 3564 # https://duckdb.org/docs/sql/statements/pivot 3565 def _parse_simplified_pivot(self) -> exp.Pivot: 3566 def _parse_on() -> t.Optional[exp.Expression]: 3567 this = self._parse_bitwise() 3568 return self._parse_in(this) if self._match(TokenType.IN) else this 3569 3570 this = self._parse_table() 3571 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 3572 using = self._match(TokenType.USING) and self._parse_csv( 3573 lambda: self._parse_alias(self._parse_function()) 3574 ) 3575 group = self._parse_group() 3576 return self.expression( 3577 exp.Pivot, this=this, expressions=expressions, using=using, group=group 3578 ) 3579 3580 def _parse_pivot_in(self) -> exp.In: 3581 def _parse_aliased_expression() -> t.Optional[exp.Expression]: 3582 this = self._parse_assignment() 3583 3584 self._match(TokenType.ALIAS) 3585 alias = self._parse_field() 3586 if alias: 3587 return self.expression(exp.PivotAlias, this=this, alias=alias) 3588 3589 return this 3590 3591 value = self._parse_column() 3592 3593 if not self._match_pair(TokenType.IN, TokenType.L_PAREN): 3594 self.raise_error("Expecting IN (") 3595 3596 aliased_expressions = self._parse_csv(_parse_aliased_expression) 3597 3598 self._match_r_paren() 3599 return self.expression(exp.In, this=value, expressions=aliased_expressions) 3600 3601 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 3602 index = self._index 3603 include_nulls = None 3604 3605 if self._match(TokenType.PIVOT): 3606 unpivot = False 3607 elif self._match(TokenType.UNPIVOT): 3608 unpivot = True 3609 3610 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 3611 if self._match_text_seq("INCLUDE", "NULLS"): 3612 include_nulls = True 3613 elif self._match_text_seq("EXCLUDE", "NULLS"): 3614 include_nulls = False 3615 else: 3616 return None 3617 3618 expressions = [] 3619 3620 if not self._match(TokenType.L_PAREN): 3621 self._retreat(index) 3622 return None 3623 3624 if unpivot: 3625 expressions = self._parse_csv(self._parse_column) 3626 else: 3627 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 3628 3629 if not expressions: 3630 self.raise_error("Failed to parse PIVOT's aggregation list") 3631 3632 if not self._match(TokenType.FOR): 3633 self.raise_error("Expecting FOR") 3634 3635 field = self._parse_pivot_in() 3636 3637 self._match_r_paren() 3638 3639 pivot = self.expression( 3640 exp.Pivot, 3641 expressions=expressions, 3642 field=field, 3643 unpivot=unpivot, 3644 include_nulls=include_nulls, 3645 ) 3646 3647 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 3648 pivot.set("alias", self._parse_table_alias()) 3649 3650 if not unpivot: 3651 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 3652 3653 columns: t.List[exp.Expression] = [] 3654 for fld in pivot.args["field"].expressions: 3655 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 3656 for name in names: 3657 if self.PREFIXED_PIVOT_COLUMNS: 3658 name = f"{name}_{field_name}" if name else field_name 3659 else: 3660 name = f"{field_name}_{name}" if name else field_name 3661 3662 columns.append(exp.to_identifier(name)) 3663 3664 pivot.set("columns", columns) 3665 3666 return pivot 3667 3668 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 3669 return [agg.alias for agg in aggregations] 3670 3671 def _parse_prewhere(self, skip_where_token: bool = False) -> t.Optional[exp.PreWhere]: 3672 if not skip_where_token and not self._match(TokenType.PREWHERE): 3673 return None 3674 3675 return self.expression( 3676 exp.PreWhere, comments=self._prev_comments, this=self._parse_assignment() 3677 ) 3678 3679 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 3680 if not skip_where_token and not self._match(TokenType.WHERE): 3681 return None 3682 3683 return self.expression( 3684 exp.Where, comments=self._prev_comments, this=self._parse_assignment() 3685 ) 3686 3687 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 3688 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 3689 return None 3690 3691 elements: t.Dict[str, t.Any] = defaultdict(list) 3692 3693 if self._match(TokenType.ALL): 3694 elements["all"] = True 3695 elif self._match(TokenType.DISTINCT): 3696 elements["all"] = False 3697 3698 while True: 3699 expressions = self._parse_csv( 3700 lambda: None 3701 if self._match(TokenType.ROLLUP, advance=False) 3702 else self._parse_assignment() 3703 ) 3704 if expressions: 3705 elements["expressions"].extend(expressions) 3706 3707 grouping_sets = self._parse_grouping_sets() 3708 if grouping_sets: 3709 elements["grouping_sets"].extend(grouping_sets) 3710 3711 rollup = None 3712 cube = None 3713 totals = None 3714 3715 index = self._index 3716 with_ = self._match(TokenType.WITH) 3717 if self._match(TokenType.ROLLUP): 3718 rollup = with_ or self._parse_wrapped_csv(self._parse_column) 3719 elements["rollup"].extend(ensure_list(rollup)) 3720 3721 if self._match(TokenType.CUBE): 3722 cube = with_ or self._parse_wrapped_csv(self._parse_column) 3723 elements["cube"].extend(ensure_list(cube)) 3724 3725 if self._match_text_seq("TOTALS"): 3726 totals = True 3727 elements["totals"] = True # type: ignore 3728 3729 if not (grouping_sets or rollup or cube or totals): 3730 if with_: 3731 self._retreat(index) 3732 break 3733 3734 return self.expression(exp.Group, **elements) # type: ignore 3735 3736 def _parse_grouping_sets(self) -> t.Optional[t.List[exp.Expression]]: 3737 if not self._match(TokenType.GROUPING_SETS): 3738 return None 3739 3740 return self._parse_wrapped_csv(self._parse_grouping_set) 3741 3742 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 3743 if self._match(TokenType.L_PAREN): 3744 grouping_set = self._parse_csv(self._parse_column) 3745 self._match_r_paren() 3746 return self.expression(exp.Tuple, expressions=grouping_set) 3747 3748 return self._parse_column() 3749 3750 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 3751 if not skip_having_token and not self._match(TokenType.HAVING): 3752 return None 3753 return self.expression(exp.Having, this=self._parse_assignment()) 3754 3755 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 3756 if not self._match(TokenType.QUALIFY): 3757 return None 3758 return self.expression(exp.Qualify, this=self._parse_assignment()) 3759 3760 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 3761 if skip_start_token: 3762 start = None 3763 elif self._match(TokenType.START_WITH): 3764 start = self._parse_assignment() 3765 else: 3766 return None 3767 3768 self._match(TokenType.CONNECT_BY) 3769 nocycle = self._match_text_seq("NOCYCLE") 3770 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 3771 exp.Prior, this=self._parse_bitwise() 3772 ) 3773 connect = self._parse_assignment() 3774 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 3775 3776 if not start and self._match(TokenType.START_WITH): 3777 start = self._parse_assignment() 3778 3779 return self.expression(exp.Connect, start=start, connect=connect, nocycle=nocycle) 3780 3781 def _parse_name_as_expression(self) -> exp.Alias: 3782 return self.expression( 3783 exp.Alias, 3784 alias=self._parse_id_var(any_token=True), 3785 this=self._match(TokenType.ALIAS) and self._parse_assignment(), 3786 ) 3787 3788 def _parse_interpolate(self) -> t.Optional[t.List[exp.Expression]]: 3789 if self._match_text_seq("INTERPOLATE"): 3790 return self._parse_wrapped_csv(self._parse_name_as_expression) 3791 return None 3792 3793 def _parse_order( 3794 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 3795 ) -> t.Optional[exp.Expression]: 3796 siblings = None 3797 if not skip_order_token and not self._match(TokenType.ORDER_BY): 3798 if not self._match(TokenType.ORDER_SIBLINGS_BY): 3799 return this 3800 3801 siblings = True 3802 3803 return self.expression( 3804 exp.Order, 3805 this=this, 3806 expressions=self._parse_csv(self._parse_ordered), 3807 interpolate=self._parse_interpolate(), 3808 siblings=siblings, 3809 ) 3810 3811 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 3812 if not self._match(token): 3813 return None 3814 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 3815 3816 def _parse_ordered( 3817 self, parse_method: t.Optional[t.Callable] = None 3818 ) -> t.Optional[exp.Ordered]: 3819 this = parse_method() if parse_method else self._parse_assignment() 3820 if not this: 3821 return None 3822 3823 asc = self._match(TokenType.ASC) 3824 desc = self._match(TokenType.DESC) or (asc and False) 3825 3826 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 3827 is_nulls_last = self._match_text_seq("NULLS", "LAST") 3828 3829 nulls_first = is_nulls_first or False 3830 explicitly_null_ordered = is_nulls_first or is_nulls_last 3831 3832 if ( 3833 not explicitly_null_ordered 3834 and ( 3835 (not desc and self.dialect.NULL_ORDERING == "nulls_are_small") 3836 or (desc and self.dialect.NULL_ORDERING != "nulls_are_small") 3837 ) 3838 and self.dialect.NULL_ORDERING != "nulls_are_last" 3839 ): 3840 nulls_first = True 3841 3842 if self._match_text_seq("WITH", "FILL"): 3843 with_fill = self.expression( 3844 exp.WithFill, 3845 **{ # type: ignore 3846 "from": self._match(TokenType.FROM) and self._parse_bitwise(), 3847 "to": self._match_text_seq("TO") and self._parse_bitwise(), 3848 "step": self._match_text_seq("STEP") and self._parse_bitwise(), 3849 }, 3850 ) 3851 else: 3852 with_fill = None 3853 3854 return self.expression( 3855 exp.Ordered, this=this, desc=desc, nulls_first=nulls_first, with_fill=with_fill 3856 ) 3857 3858 def _parse_limit( 3859 self, 3860 this: t.Optional[exp.Expression] = None, 3861 top: bool = False, 3862 skip_limit_token: bool = False, 3863 ) -> t.Optional[exp.Expression]: 3864 if skip_limit_token or self._match(TokenType.TOP if top else TokenType.LIMIT): 3865 comments = self._prev_comments 3866 if top: 3867 limit_paren = self._match(TokenType.L_PAREN) 3868 expression = self._parse_term() if limit_paren else self._parse_number() 3869 3870 if limit_paren: 3871 self._match_r_paren() 3872 else: 3873 expression = self._parse_term() 3874 3875 if self._match(TokenType.COMMA): 3876 offset = expression 3877 expression = self._parse_term() 3878 else: 3879 offset = None 3880 3881 limit_exp = self.expression( 3882 exp.Limit, 3883 this=this, 3884 expression=expression, 3885 offset=offset, 3886 comments=comments, 3887 expressions=self._parse_limit_by(), 3888 ) 3889 3890 return limit_exp 3891 3892 if self._match(TokenType.FETCH): 3893 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 3894 direction = self._prev.text.upper() if direction else "FIRST" 3895 3896 count = self._parse_field(tokens=self.FETCH_TOKENS) 3897 percent = self._match(TokenType.PERCENT) 3898 3899 self._match_set((TokenType.ROW, TokenType.ROWS)) 3900 3901 only = self._match_text_seq("ONLY") 3902 with_ties = self._match_text_seq("WITH", "TIES") 3903 3904 if only and with_ties: 3905 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 3906 3907 return self.expression( 3908 exp.Fetch, 3909 direction=direction, 3910 count=count, 3911 percent=percent, 3912 with_ties=with_ties, 3913 ) 3914 3915 return this 3916 3917 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3918 if not self._match(TokenType.OFFSET): 3919 return this 3920 3921 count = self._parse_term() 3922 self._match_set((TokenType.ROW, TokenType.ROWS)) 3923 3924 return self.expression( 3925 exp.Offset, this=this, expression=count, expressions=self._parse_limit_by() 3926 ) 3927 3928 def _parse_limit_by(self) -> t.Optional[t.List[exp.Expression]]: 3929 return self._match_text_seq("BY") and self._parse_csv(self._parse_bitwise) 3930 3931 def _parse_locks(self) -> t.List[exp.Lock]: 3932 locks = [] 3933 while True: 3934 if self._match_text_seq("FOR", "UPDATE"): 3935 update = True 3936 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 3937 "LOCK", "IN", "SHARE", "MODE" 3938 ): 3939 update = False 3940 else: 3941 break 3942 3943 expressions = None 3944 if self._match_text_seq("OF"): 3945 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 3946 3947 wait: t.Optional[bool | exp.Expression] = None 3948 if self._match_text_seq("NOWAIT"): 3949 wait = True 3950 elif self._match_text_seq("WAIT"): 3951 wait = self._parse_primary() 3952 elif self._match_text_seq("SKIP", "LOCKED"): 3953 wait = False 3954 3955 locks.append( 3956 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 3957 ) 3958 3959 return locks 3960 3961 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3962 while this and self._match_set(self.SET_OPERATIONS): 3963 token_type = self._prev.token_type 3964 3965 if token_type == TokenType.UNION: 3966 operation: t.Type[exp.SetOperation] = exp.Union 3967 elif token_type == TokenType.EXCEPT: 3968 operation = exp.Except 3969 else: 3970 operation = exp.Intersect 3971 3972 comments = self._prev.comments 3973 distinct = self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL) 3974 by_name = self._match_text_seq("BY", "NAME") 3975 expression = self._parse_select(nested=True, parse_set_operation=False) 3976 3977 this = self.expression( 3978 operation, 3979 comments=comments, 3980 this=this, 3981 distinct=distinct, 3982 by_name=by_name, 3983 expression=expression, 3984 ) 3985 3986 if isinstance(this, exp.SetOperation) and self.MODIFIERS_ATTACHED_TO_SET_OP: 3987 expression = this.expression 3988 3989 if expression: 3990 for arg in self.SET_OP_MODIFIERS: 3991 expr = expression.args.get(arg) 3992 if expr: 3993 this.set(arg, expr.pop()) 3994 3995 return this 3996 3997 def _parse_expression(self) -> t.Optional[exp.Expression]: 3998 return self._parse_alias(self._parse_assignment()) 3999 4000 def _parse_assignment(self) -> t.Optional[exp.Expression]: 4001 this = self._parse_disjunction() 4002 4003 while self._match_set(self.ASSIGNMENT): 4004 this = self.expression( 4005 self.ASSIGNMENT[self._prev.token_type], 4006 this=this, 4007 comments=self._prev_comments, 4008 expression=self._parse_assignment(), 4009 ) 4010 4011 return this 4012 4013 def _parse_disjunction(self) -> t.Optional[exp.Expression]: 4014 return self._parse_tokens(self._parse_conjunction, self.DISJUNCTION) 4015 4016 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 4017 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 4018 4019 def _parse_equality(self) -> t.Optional[exp.Expression]: 4020 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 4021 4022 def _parse_comparison(self) -> t.Optional[exp.Expression]: 4023 return self._parse_tokens(self._parse_range, self.COMPARISON) 4024 4025 def _parse_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4026 this = this or self._parse_bitwise() 4027 negate = self._match(TokenType.NOT) 4028 4029 if self._match_set(self.RANGE_PARSERS): 4030 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 4031 if not expression: 4032 return this 4033 4034 this = expression 4035 elif self._match(TokenType.ISNULL): 4036 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4037 4038 # Postgres supports ISNULL and NOTNULL for conditions. 4039 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 4040 if self._match(TokenType.NOTNULL): 4041 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4042 this = self.expression(exp.Not, this=this) 4043 4044 if negate: 4045 this = self.expression(exp.Not, this=this) 4046 4047 if self._match(TokenType.IS): 4048 this = self._parse_is(this) 4049 4050 return this 4051 4052 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4053 index = self._index - 1 4054 negate = self._match(TokenType.NOT) 4055 4056 if self._match_text_seq("DISTINCT", "FROM"): 4057 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 4058 return self.expression(klass, this=this, expression=self._parse_bitwise()) 4059 4060 expression = self._parse_null() or self._parse_boolean() 4061 if not expression: 4062 self._retreat(index) 4063 return None 4064 4065 this = self.expression(exp.Is, this=this, expression=expression) 4066 return self.expression(exp.Not, this=this) if negate else this 4067 4068 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 4069 unnest = self._parse_unnest(with_alias=False) 4070 if unnest: 4071 this = self.expression(exp.In, this=this, unnest=unnest) 4072 elif self._match_set((TokenType.L_PAREN, TokenType.L_BRACKET)): 4073 matched_l_paren = self._prev.token_type == TokenType.L_PAREN 4074 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 4075 4076 if len(expressions) == 1 and isinstance(expressions[0], exp.Query): 4077 this = self.expression(exp.In, this=this, query=expressions[0].subquery(copy=False)) 4078 else: 4079 this = self.expression(exp.In, this=this, expressions=expressions) 4080 4081 if matched_l_paren: 4082 self._match_r_paren(this) 4083 elif not self._match(TokenType.R_BRACKET, expression=this): 4084 self.raise_error("Expecting ]") 4085 else: 4086 this = self.expression(exp.In, this=this, field=self._parse_field()) 4087 4088 return this 4089 4090 def _parse_between(self, this: t.Optional[exp.Expression]) -> exp.Between: 4091 low = self._parse_bitwise() 4092 self._match(TokenType.AND) 4093 high = self._parse_bitwise() 4094 return self.expression(exp.Between, this=this, low=low, high=high) 4095 4096 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4097 if not self._match(TokenType.ESCAPE): 4098 return this 4099 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 4100 4101 def _parse_interval(self, match_interval: bool = True) -> t.Optional[exp.Add | exp.Interval]: 4102 index = self._index 4103 4104 if not self._match(TokenType.INTERVAL) and match_interval: 4105 return None 4106 4107 if self._match(TokenType.STRING, advance=False): 4108 this = self._parse_primary() 4109 else: 4110 this = self._parse_term() 4111 4112 if not this or ( 4113 isinstance(this, exp.Column) 4114 and not this.table 4115 and not this.this.quoted 4116 and this.name.upper() == "IS" 4117 ): 4118 self._retreat(index) 4119 return None 4120 4121 unit = self._parse_function() or ( 4122 not self._match(TokenType.ALIAS, advance=False) 4123 and self._parse_var(any_token=True, upper=True) 4124 ) 4125 4126 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 4127 # each INTERVAL expression into this canonical form so it's easy to transpile 4128 if this and this.is_number: 4129 this = exp.Literal.string(this.name) 4130 elif this and this.is_string: 4131 parts = exp.INTERVAL_STRING_RE.findall(this.name) 4132 if len(parts) == 1: 4133 if unit: 4134 # Unconsume the eagerly-parsed unit, since the real unit was part of the string 4135 self._retreat(self._index - 1) 4136 4137 this = exp.Literal.string(parts[0][0]) 4138 unit = self.expression(exp.Var, this=parts[0][1].upper()) 4139 4140 if self.INTERVAL_SPANS and self._match_text_seq("TO"): 4141 unit = self.expression( 4142 exp.IntervalSpan, this=unit, expression=self._parse_var(any_token=True, upper=True) 4143 ) 4144 4145 interval = self.expression(exp.Interval, this=this, unit=unit) 4146 4147 index = self._index 4148 self._match(TokenType.PLUS) 4149 4150 # Convert INTERVAL 'val_1' unit_1 [+] ... [+] 'val_n' unit_n into a sum of intervals 4151 if self._match_set((TokenType.STRING, TokenType.NUMBER), advance=False): 4152 return self.expression( 4153 exp.Add, this=interval, expression=self._parse_interval(match_interval=False) 4154 ) 4155 4156 self._retreat(index) 4157 return interval 4158 4159 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 4160 this = self._parse_term() 4161 4162 while True: 4163 if self._match_set(self.BITWISE): 4164 this = self.expression( 4165 self.BITWISE[self._prev.token_type], 4166 this=this, 4167 expression=self._parse_term(), 4168 ) 4169 elif self.dialect.DPIPE_IS_STRING_CONCAT and self._match(TokenType.DPIPE): 4170 this = self.expression( 4171 exp.DPipe, 4172 this=this, 4173 expression=self._parse_term(), 4174 safe=not self.dialect.STRICT_STRING_CONCAT, 4175 ) 4176 elif self._match(TokenType.DQMARK): 4177 this = self.expression(exp.Coalesce, this=this, expressions=self._parse_term()) 4178 elif self._match_pair(TokenType.LT, TokenType.LT): 4179 this = self.expression( 4180 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 4181 ) 4182 elif self._match_pair(TokenType.GT, TokenType.GT): 4183 this = self.expression( 4184 exp.BitwiseRightShift, this=this, expression=self._parse_term() 4185 ) 4186 else: 4187 break 4188 4189 return this 4190 4191 def _parse_term(self) -> t.Optional[exp.Expression]: 4192 return self._parse_tokens(self._parse_factor, self.TERM) 4193 4194 def _parse_factor(self) -> t.Optional[exp.Expression]: 4195 parse_method = self._parse_exponent if self.EXPONENT else self._parse_unary 4196 this = parse_method() 4197 4198 while self._match_set(self.FACTOR): 4199 klass = self.FACTOR[self._prev.token_type] 4200 comments = self._prev_comments 4201 expression = parse_method() 4202 4203 if not expression and klass is exp.IntDiv and self._prev.text.isalpha(): 4204 self._retreat(self._index - 1) 4205 return this 4206 4207 this = self.expression(klass, this=this, comments=comments, expression=expression) 4208 4209 if isinstance(this, exp.Div): 4210 this.args["typed"] = self.dialect.TYPED_DIVISION 4211 this.args["safe"] = self.dialect.SAFE_DIVISION 4212 4213 return this 4214 4215 def _parse_exponent(self) -> t.Optional[exp.Expression]: 4216 return self._parse_tokens(self._parse_unary, self.EXPONENT) 4217 4218 def _parse_unary(self) -> t.Optional[exp.Expression]: 4219 if self._match_set(self.UNARY_PARSERS): 4220 return self.UNARY_PARSERS[self._prev.token_type](self) 4221 return self._parse_at_time_zone(self._parse_type()) 4222 4223 def _parse_type( 4224 self, parse_interval: bool = True, fallback_to_identifier: bool = False 4225 ) -> t.Optional[exp.Expression]: 4226 interval = parse_interval and self._parse_interval() 4227 if interval: 4228 return interval 4229 4230 index = self._index 4231 data_type = self._parse_types(check_func=True, allow_identifiers=False) 4232 4233 if data_type: 4234 index2 = self._index 4235 this = self._parse_primary() 4236 4237 if isinstance(this, exp.Literal): 4238 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 4239 if parser: 4240 return parser(self, this, data_type) 4241 4242 return self.expression(exp.Cast, this=this, to=data_type) 4243 4244 # The expressions arg gets set by the parser when we have something like DECIMAL(38, 0) 4245 # in the input SQL. In that case, we'll produce these tokens: DECIMAL ( 38 , 0 ) 4246 # 4247 # If the index difference here is greater than 1, that means the parser itself must have 4248 # consumed additional tokens such as the DECIMAL scale and precision in the above example. 4249 # 4250 # If it's not greater than 1, then it must be 1, because we've consumed at least the type 4251 # keyword, meaning that the expressions arg of the DataType must have gotten set by a 4252 # callable in the TYPE_CONVERTERS mapping. For example, Snowflake converts DECIMAL to 4253 # DECIMAL(38, 0)) in order to facilitate the data type's transpilation. 4254 # 4255 # In these cases, we don't really want to return the converted type, but instead retreat 4256 # and try to parse a Column or Identifier in the section below. 4257 if data_type.expressions and index2 - index > 1: 4258 self._retreat(index2) 4259 return self._parse_column_ops(data_type) 4260 4261 self._retreat(index) 4262 4263 if fallback_to_identifier: 4264 return self._parse_id_var() 4265 4266 this = self._parse_column() 4267 return this and self._parse_column_ops(this) 4268 4269 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 4270 this = self._parse_type() 4271 if not this: 4272 return None 4273 4274 if isinstance(this, exp.Column) and not this.table: 4275 this = exp.var(this.name.upper()) 4276 4277 return self.expression( 4278 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 4279 ) 4280 4281 def _parse_types( 4282 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 4283 ) -> t.Optional[exp.Expression]: 4284 index = self._index 4285 4286 this: t.Optional[exp.Expression] = None 4287 prefix = self._match_text_seq("SYSUDTLIB", ".") 4288 4289 if not self._match_set(self.TYPE_TOKENS): 4290 identifier = allow_identifiers and self._parse_id_var( 4291 any_token=False, tokens=(TokenType.VAR,) 4292 ) 4293 if identifier: 4294 tokens = self.dialect.tokenize(identifier.sql(dialect=self.dialect)) 4295 4296 if len(tokens) != 1: 4297 self.raise_error("Unexpected identifier", self._prev) 4298 4299 if tokens[0].token_type in self.TYPE_TOKENS: 4300 self._prev = tokens[0] 4301 elif self.dialect.SUPPORTS_USER_DEFINED_TYPES: 4302 type_name = identifier.name 4303 4304 while self._match(TokenType.DOT): 4305 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 4306 4307 this = exp.DataType.build(type_name, udt=True) 4308 else: 4309 self._retreat(self._index - 1) 4310 return None 4311 else: 4312 return None 4313 4314 type_token = self._prev.token_type 4315 4316 if type_token == TokenType.PSEUDO_TYPE: 4317 return self.expression(exp.PseudoType, this=self._prev.text.upper()) 4318 4319 if type_token == TokenType.OBJECT_IDENTIFIER: 4320 return self.expression(exp.ObjectIdentifier, this=self._prev.text.upper()) 4321 4322 # https://materialize.com/docs/sql/types/map/ 4323 if type_token == TokenType.MAP and self._match(TokenType.L_BRACKET): 4324 key_type = self._parse_types( 4325 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4326 ) 4327 if not self._match(TokenType.FARROW): 4328 self._retreat(index) 4329 return None 4330 4331 value_type = self._parse_types( 4332 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4333 ) 4334 if not self._match(TokenType.R_BRACKET): 4335 self._retreat(index) 4336 return None 4337 4338 return exp.DataType( 4339 this=exp.DataType.Type.MAP, 4340 expressions=[key_type, value_type], 4341 nested=True, 4342 prefix=prefix, 4343 ) 4344 4345 nested = type_token in self.NESTED_TYPE_TOKENS 4346 is_struct = type_token in self.STRUCT_TYPE_TOKENS 4347 is_aggregate = type_token in self.AGGREGATE_TYPE_TOKENS 4348 expressions = None 4349 maybe_func = False 4350 4351 if self._match(TokenType.L_PAREN): 4352 if is_struct: 4353 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 4354 elif nested: 4355 expressions = self._parse_csv( 4356 lambda: self._parse_types( 4357 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4358 ) 4359 ) 4360 elif type_token in self.ENUM_TYPE_TOKENS: 4361 expressions = self._parse_csv(self._parse_equality) 4362 elif is_aggregate: 4363 func_or_ident = self._parse_function(anonymous=True) or self._parse_id_var( 4364 any_token=False, tokens=(TokenType.VAR,) 4365 ) 4366 if not func_or_ident or not self._match(TokenType.COMMA): 4367 return None 4368 expressions = self._parse_csv( 4369 lambda: self._parse_types( 4370 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4371 ) 4372 ) 4373 expressions.insert(0, func_or_ident) 4374 else: 4375 expressions = self._parse_csv(self._parse_type_size) 4376 4377 if not expressions or not self._match(TokenType.R_PAREN): 4378 self._retreat(index) 4379 return None 4380 4381 maybe_func = True 4382 4383 values: t.Optional[t.List[exp.Expression]] = None 4384 4385 if nested and self._match(TokenType.LT): 4386 if is_struct: 4387 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 4388 else: 4389 expressions = self._parse_csv( 4390 lambda: self._parse_types( 4391 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4392 ) 4393 ) 4394 4395 if not self._match(TokenType.GT): 4396 self.raise_error("Expecting >") 4397 4398 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 4399 values = self._parse_csv(self._parse_assignment) 4400 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 4401 4402 if type_token in self.TIMESTAMPS: 4403 if self._match_text_seq("WITH", "TIME", "ZONE"): 4404 maybe_func = False 4405 tz_type = ( 4406 exp.DataType.Type.TIMETZ 4407 if type_token in self.TIMES 4408 else exp.DataType.Type.TIMESTAMPTZ 4409 ) 4410 this = exp.DataType(this=tz_type, expressions=expressions) 4411 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 4412 maybe_func = False 4413 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 4414 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 4415 maybe_func = False 4416 elif type_token == TokenType.INTERVAL: 4417 unit = self._parse_var(upper=True) 4418 if unit: 4419 if self._match_text_seq("TO"): 4420 unit = exp.IntervalSpan(this=unit, expression=self._parse_var(upper=True)) 4421 4422 this = self.expression(exp.DataType, this=self.expression(exp.Interval, unit=unit)) 4423 else: 4424 this = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 4425 4426 if maybe_func and check_func: 4427 index2 = self._index 4428 peek = self._parse_string() 4429 4430 if not peek: 4431 self._retreat(index) 4432 return None 4433 4434 self._retreat(index2) 4435 4436 if not this: 4437 if self._match_text_seq("UNSIGNED"): 4438 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 4439 if not unsigned_type_token: 4440 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 4441 4442 type_token = unsigned_type_token or type_token 4443 4444 this = exp.DataType( 4445 this=exp.DataType.Type[type_token.value], 4446 expressions=expressions, 4447 nested=nested, 4448 values=values, 4449 prefix=prefix, 4450 ) 4451 elif expressions: 4452 this.set("expressions", expressions) 4453 4454 # https://materialize.com/docs/sql/types/list/#type-name 4455 while self._match(TokenType.LIST): 4456 this = exp.DataType(this=exp.DataType.Type.LIST, expressions=[this], nested=True) 4457 4458 index = self._index 4459 4460 # Postgres supports the INT ARRAY[3] syntax as a synonym for INT[3] 4461 matched_array = self._match(TokenType.ARRAY) 4462 4463 while self._curr: 4464 matched_l_bracket = self._match(TokenType.L_BRACKET) 4465 if not matched_l_bracket and not matched_array: 4466 break 4467 4468 matched_array = False 4469 values = self._parse_csv(self._parse_assignment) or None 4470 if values and not schema: 4471 self._retreat(index) 4472 break 4473 4474 this = exp.DataType( 4475 this=exp.DataType.Type.ARRAY, expressions=[this], values=values, nested=True 4476 ) 4477 self._match(TokenType.R_BRACKET) 4478 4479 if self.TYPE_CONVERTERS and isinstance(this.this, exp.DataType.Type): 4480 converter = self.TYPE_CONVERTERS.get(this.this) 4481 if converter: 4482 this = converter(t.cast(exp.DataType, this)) 4483 4484 return this 4485 4486 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 4487 index = self._index 4488 this = ( 4489 self._parse_type(parse_interval=False, fallback_to_identifier=True) 4490 or self._parse_id_var() 4491 ) 4492 self._match(TokenType.COLON) 4493 4494 if ( 4495 type_required 4496 and not isinstance(this, exp.DataType) 4497 and not self._match_set(self.TYPE_TOKENS, advance=False) 4498 ): 4499 self._retreat(index) 4500 return self._parse_types() 4501 4502 return self._parse_column_def(this) 4503 4504 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4505 if not self._match_text_seq("AT", "TIME", "ZONE"): 4506 return this 4507 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 4508 4509 def _parse_column(self) -> t.Optional[exp.Expression]: 4510 this = self._parse_column_reference() 4511 column = self._parse_column_ops(this) if this else self._parse_bracket(this) 4512 4513 if self.dialect.SUPPORTS_COLUMN_JOIN_MARKS and column: 4514 column.set("join_mark", self._match(TokenType.JOIN_MARKER)) 4515 4516 return column 4517 4518 def _parse_column_reference(self) -> t.Optional[exp.Expression]: 4519 this = self._parse_field() 4520 if ( 4521 not this 4522 and self._match(TokenType.VALUES, advance=False) 4523 and self.VALUES_FOLLOWED_BY_PAREN 4524 and (not self._next or self._next.token_type != TokenType.L_PAREN) 4525 ): 4526 this = self._parse_id_var() 4527 4528 if isinstance(this, exp.Identifier): 4529 # We bubble up comments from the Identifier to the Column 4530 this = self.expression(exp.Column, comments=this.pop_comments(), this=this) 4531 4532 return this 4533 4534 def _parse_colon_as_json_extract( 4535 self, this: t.Optional[exp.Expression] 4536 ) -> t.Optional[exp.Expression]: 4537 casts = [] 4538 json_path = [] 4539 4540 while self._match(TokenType.COLON): 4541 start_index = self._index 4542 4543 # Snowflake allows reserved keywords as json keys but advance_any() excludes TokenType.SELECT from any_tokens=True 4544 path = self._parse_column_ops( 4545 self._parse_field(any_token=True, tokens=(TokenType.SELECT,)) 4546 ) 4547 4548 # The cast :: operator has a lower precedence than the extraction operator :, so 4549 # we rearrange the AST appropriately to avoid casting the JSON path 4550 while isinstance(path, exp.Cast): 4551 casts.append(path.to) 4552 path = path.this 4553 4554 if casts: 4555 dcolon_offset = next( 4556 i 4557 for i, t in enumerate(self._tokens[start_index:]) 4558 if t.token_type == TokenType.DCOLON 4559 ) 4560 end_token = self._tokens[start_index + dcolon_offset - 1] 4561 else: 4562 end_token = self._prev 4563 4564 if path: 4565 json_path.append(self._find_sql(self._tokens[start_index], end_token)) 4566 4567 if json_path: 4568 this = self.expression( 4569 exp.JSONExtract, 4570 this=this, 4571 expression=self.dialect.to_json_path(exp.Literal.string(".".join(json_path))), 4572 ) 4573 4574 while casts: 4575 this = self.expression(exp.Cast, this=this, to=casts.pop()) 4576 4577 return this 4578 4579 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4580 this = self._parse_bracket(this) 4581 4582 while self._match_set(self.COLUMN_OPERATORS): 4583 op_token = self._prev.token_type 4584 op = self.COLUMN_OPERATORS.get(op_token) 4585 4586 if op_token == TokenType.DCOLON: 4587 field = self._parse_types() 4588 if not field: 4589 self.raise_error("Expected type") 4590 elif op and self._curr: 4591 field = self._parse_column_reference() 4592 else: 4593 field = self._parse_field(any_token=True, anonymous_func=True) 4594 4595 if isinstance(field, exp.Func) and this: 4596 # bigquery allows function calls like x.y.count(...) 4597 # SAFE.SUBSTR(...) 4598 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 4599 this = exp.replace_tree( 4600 this, 4601 lambda n: ( 4602 self.expression(exp.Dot, this=n.args.get("table"), expression=n.this) 4603 if n.table 4604 else n.this 4605 ) 4606 if isinstance(n, exp.Column) 4607 else n, 4608 ) 4609 4610 if op: 4611 this = op(self, this, field) 4612 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 4613 this = self.expression( 4614 exp.Column, 4615 this=field, 4616 table=this.this, 4617 db=this.args.get("table"), 4618 catalog=this.args.get("db"), 4619 ) 4620 else: 4621 this = self.expression(exp.Dot, this=this, expression=field) 4622 4623 this = self._parse_bracket(this) 4624 4625 return self._parse_colon_as_json_extract(this) if self.COLON_IS_JSON_EXTRACT else this 4626 4627 def _parse_primary(self) -> t.Optional[exp.Expression]: 4628 if self._match_set(self.PRIMARY_PARSERS): 4629 token_type = self._prev.token_type 4630 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 4631 4632 if token_type == TokenType.STRING: 4633 expressions = [primary] 4634 while self._match(TokenType.STRING): 4635 expressions.append(exp.Literal.string(self._prev.text)) 4636 4637 if len(expressions) > 1: 4638 return self.expression(exp.Concat, expressions=expressions) 4639 4640 return primary 4641 4642 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 4643 return exp.Literal.number(f"0.{self._prev.text}") 4644 4645 if self._match(TokenType.L_PAREN): 4646 comments = self._prev_comments 4647 query = self._parse_select() 4648 4649 if query: 4650 expressions = [query] 4651 else: 4652 expressions = self._parse_expressions() 4653 4654 this = self._parse_query_modifiers(seq_get(expressions, 0)) 4655 4656 if not this and self._match(TokenType.R_PAREN, advance=False): 4657 this = self.expression(exp.Tuple) 4658 elif isinstance(this, exp.UNWRAPPED_QUERIES): 4659 this = self._parse_subquery(this=this, parse_alias=False) 4660 elif isinstance(this, exp.Subquery): 4661 this = self._parse_subquery( 4662 this=self._parse_set_operations(this), parse_alias=False 4663 ) 4664 elif len(expressions) > 1 or self._prev.token_type == TokenType.COMMA: 4665 this = self.expression(exp.Tuple, expressions=expressions) 4666 else: 4667 this = self.expression(exp.Paren, this=this) 4668 4669 if this: 4670 this.add_comments(comments) 4671 4672 self._match_r_paren(expression=this) 4673 return this 4674 4675 return None 4676 4677 def _parse_field( 4678 self, 4679 any_token: bool = False, 4680 tokens: t.Optional[t.Collection[TokenType]] = None, 4681 anonymous_func: bool = False, 4682 ) -> t.Optional[exp.Expression]: 4683 if anonymous_func: 4684 field = ( 4685 self._parse_function(anonymous=anonymous_func, any_token=any_token) 4686 or self._parse_primary() 4687 ) 4688 else: 4689 field = self._parse_primary() or self._parse_function( 4690 anonymous=anonymous_func, any_token=any_token 4691 ) 4692 return field or self._parse_id_var(any_token=any_token, tokens=tokens) 4693 4694 def _parse_function( 4695 self, 4696 functions: t.Optional[t.Dict[str, t.Callable]] = None, 4697 anonymous: bool = False, 4698 optional_parens: bool = True, 4699 any_token: bool = False, 4700 ) -> t.Optional[exp.Expression]: 4701 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this) 4702 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences 4703 fn_syntax = False 4704 if ( 4705 self._match(TokenType.L_BRACE, advance=False) 4706 and self._next 4707 and self._next.text.upper() == "FN" 4708 ): 4709 self._advance(2) 4710 fn_syntax = True 4711 4712 func = self._parse_function_call( 4713 functions=functions, 4714 anonymous=anonymous, 4715 optional_parens=optional_parens, 4716 any_token=any_token, 4717 ) 4718 4719 if fn_syntax: 4720 self._match(TokenType.R_BRACE) 4721 4722 return func 4723 4724 def _parse_function_call( 4725 self, 4726 functions: t.Optional[t.Dict[str, t.Callable]] = None, 4727 anonymous: bool = False, 4728 optional_parens: bool = True, 4729 any_token: bool = False, 4730 ) -> t.Optional[exp.Expression]: 4731 if not self._curr: 4732 return None 4733 4734 comments = self._curr.comments 4735 token_type = self._curr.token_type 4736 this = self._curr.text 4737 upper = this.upper() 4738 4739 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 4740 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 4741 self._advance() 4742 return self._parse_window(parser(self)) 4743 4744 if not self._next or self._next.token_type != TokenType.L_PAREN: 4745 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 4746 self._advance() 4747 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 4748 4749 return None 4750 4751 if any_token: 4752 if token_type in self.RESERVED_TOKENS: 4753 return None 4754 elif token_type not in self.FUNC_TOKENS: 4755 return None 4756 4757 self._advance(2) 4758 4759 parser = self.FUNCTION_PARSERS.get(upper) 4760 if parser and not anonymous: 4761 this = parser(self) 4762 else: 4763 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 4764 4765 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 4766 this = self.expression(subquery_predicate, this=self._parse_select()) 4767 self._match_r_paren() 4768 return this 4769 4770 if functions is None: 4771 functions = self.FUNCTIONS 4772 4773 function = functions.get(upper) 4774 4775 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 4776 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 4777 4778 if alias: 4779 args = self._kv_to_prop_eq(args) 4780 4781 if function and not anonymous: 4782 if "dialect" in function.__code__.co_varnames: 4783 func = function(args, dialect=self.dialect) 4784 else: 4785 func = function(args) 4786 4787 func = self.validate_expression(func, args) 4788 if not self.dialect.NORMALIZE_FUNCTIONS: 4789 func.meta["name"] = this 4790 4791 this = func 4792 else: 4793 if token_type == TokenType.IDENTIFIER: 4794 this = exp.Identifier(this=this, quoted=True) 4795 this = self.expression(exp.Anonymous, this=this, expressions=args) 4796 4797 if isinstance(this, exp.Expression): 4798 this.add_comments(comments) 4799 4800 self._match_r_paren(this) 4801 return self._parse_window(this) 4802 4803 def _kv_to_prop_eq(self, expressions: t.List[exp.Expression]) -> t.List[exp.Expression]: 4804 transformed = [] 4805 4806 for e in expressions: 4807 if isinstance(e, self.KEY_VALUE_DEFINITIONS): 4808 if isinstance(e, exp.Alias): 4809 e = self.expression(exp.PropertyEQ, this=e.args.get("alias"), expression=e.this) 4810 4811 if not isinstance(e, exp.PropertyEQ): 4812 e = self.expression( 4813 exp.PropertyEQ, this=exp.to_identifier(e.this.name), expression=e.expression 4814 ) 4815 4816 if isinstance(e.this, exp.Column): 4817 e.this.replace(e.this.this) 4818 4819 transformed.append(e) 4820 4821 return transformed 4822 4823 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 4824 return self._parse_column_def(self._parse_id_var()) 4825 4826 def _parse_user_defined_function( 4827 self, kind: t.Optional[TokenType] = None 4828 ) -> t.Optional[exp.Expression]: 4829 this = self._parse_id_var() 4830 4831 while self._match(TokenType.DOT): 4832 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 4833 4834 if not self._match(TokenType.L_PAREN): 4835 return this 4836 4837 expressions = self._parse_csv(self._parse_function_parameter) 4838 self._match_r_paren() 4839 return self.expression( 4840 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 4841 ) 4842 4843 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 4844 literal = self._parse_primary() 4845 if literal: 4846 return self.expression(exp.Introducer, this=token.text, expression=literal) 4847 4848 return self.expression(exp.Identifier, this=token.text) 4849 4850 def _parse_session_parameter(self) -> exp.SessionParameter: 4851 kind = None 4852 this = self._parse_id_var() or self._parse_primary() 4853 4854 if this and self._match(TokenType.DOT): 4855 kind = this.name 4856 this = self._parse_var() or self._parse_primary() 4857 4858 return self.expression(exp.SessionParameter, this=this, kind=kind) 4859 4860 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 4861 return self._parse_id_var() 4862 4863 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 4864 index = self._index 4865 4866 if self._match(TokenType.L_PAREN): 4867 expressions = t.cast( 4868 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_lambda_arg) 4869 ) 4870 4871 if not self._match(TokenType.R_PAREN): 4872 self._retreat(index) 4873 else: 4874 expressions = [self._parse_lambda_arg()] 4875 4876 if self._match_set(self.LAMBDAS): 4877 return self.LAMBDAS[self._prev.token_type](self, expressions) 4878 4879 self._retreat(index) 4880 4881 this: t.Optional[exp.Expression] 4882 4883 if self._match(TokenType.DISTINCT): 4884 this = self.expression( 4885 exp.Distinct, expressions=self._parse_csv(self._parse_assignment) 4886 ) 4887 else: 4888 this = self._parse_select_or_expression(alias=alias) 4889 4890 return self._parse_limit( 4891 self._parse_order(self._parse_having_max(self._parse_respect_or_ignore_nulls(this))) 4892 ) 4893 4894 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4895 index = self._index 4896 if not self._match(TokenType.L_PAREN): 4897 return this 4898 4899 # Disambiguate between schema and subquery/CTE, e.g. in INSERT INTO table (<expr>), 4900 # expr can be of both types 4901 if self._match_set(self.SELECT_START_TOKENS): 4902 self._retreat(index) 4903 return this 4904 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 4905 self._match_r_paren() 4906 return self.expression(exp.Schema, this=this, expressions=args) 4907 4908 def _parse_field_def(self) -> t.Optional[exp.Expression]: 4909 return self._parse_column_def(self._parse_field(any_token=True)) 4910 4911 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4912 # column defs are not really columns, they're identifiers 4913 if isinstance(this, exp.Column): 4914 this = this.this 4915 4916 kind = self._parse_types(schema=True) 4917 4918 if self._match_text_seq("FOR", "ORDINALITY"): 4919 return self.expression(exp.ColumnDef, this=this, ordinality=True) 4920 4921 constraints: t.List[exp.Expression] = [] 4922 4923 if (not kind and self._match(TokenType.ALIAS)) or self._match_texts( 4924 ("ALIAS", "MATERIALIZED") 4925 ): 4926 persisted = self._prev.text.upper() == "MATERIALIZED" 4927 constraints.append( 4928 self.expression( 4929 exp.ComputedColumnConstraint, 4930 this=self._parse_assignment(), 4931 persisted=persisted or self._match_text_seq("PERSISTED"), 4932 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 4933 ) 4934 ) 4935 elif kind and self._match_pair(TokenType.ALIAS, TokenType.L_PAREN, advance=False): 4936 self._match(TokenType.ALIAS) 4937 constraints.append( 4938 self.expression(exp.TransformColumnConstraint, this=self._parse_field()) 4939 ) 4940 4941 while True: 4942 constraint = self._parse_column_constraint() 4943 if not constraint: 4944 break 4945 constraints.append(constraint) 4946 4947 if not kind and not constraints: 4948 return this 4949 4950 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 4951 4952 def _parse_auto_increment( 4953 self, 4954 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 4955 start = None 4956 increment = None 4957 4958 if self._match(TokenType.L_PAREN, advance=False): 4959 args = self._parse_wrapped_csv(self._parse_bitwise) 4960 start = seq_get(args, 0) 4961 increment = seq_get(args, 1) 4962 elif self._match_text_seq("START"): 4963 start = self._parse_bitwise() 4964 self._match_text_seq("INCREMENT") 4965 increment = self._parse_bitwise() 4966 4967 if start and increment: 4968 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 4969 4970 return exp.AutoIncrementColumnConstraint() 4971 4972 def _parse_auto_property(self) -> t.Optional[exp.AutoRefreshProperty]: 4973 if not self._match_text_seq("REFRESH"): 4974 self._retreat(self._index - 1) 4975 return None 4976 return self.expression(exp.AutoRefreshProperty, this=self._parse_var(upper=True)) 4977 4978 def _parse_compress(self) -> exp.CompressColumnConstraint: 4979 if self._match(TokenType.L_PAREN, advance=False): 4980 return self.expression( 4981 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 4982 ) 4983 4984 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 4985 4986 def _parse_generated_as_identity( 4987 self, 4988 ) -> ( 4989 exp.GeneratedAsIdentityColumnConstraint 4990 | exp.ComputedColumnConstraint 4991 | exp.GeneratedAsRowColumnConstraint 4992 ): 4993 if self._match_text_seq("BY", "DEFAULT"): 4994 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 4995 this = self.expression( 4996 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 4997 ) 4998 else: 4999 self._match_text_seq("ALWAYS") 5000 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 5001 5002 self._match(TokenType.ALIAS) 5003 5004 if self._match_text_seq("ROW"): 5005 start = self._match_text_seq("START") 5006 if not start: 5007 self._match(TokenType.END) 5008 hidden = self._match_text_seq("HIDDEN") 5009 return self.expression(exp.GeneratedAsRowColumnConstraint, start=start, hidden=hidden) 5010 5011 identity = self._match_text_seq("IDENTITY") 5012 5013 if self._match(TokenType.L_PAREN): 5014 if self._match(TokenType.START_WITH): 5015 this.set("start", self._parse_bitwise()) 5016 if self._match_text_seq("INCREMENT", "BY"): 5017 this.set("increment", self._parse_bitwise()) 5018 if self._match_text_seq("MINVALUE"): 5019 this.set("minvalue", self._parse_bitwise()) 5020 if self._match_text_seq("MAXVALUE"): 5021 this.set("maxvalue", self._parse_bitwise()) 5022 5023 if self._match_text_seq("CYCLE"): 5024 this.set("cycle", True) 5025 elif self._match_text_seq("NO", "CYCLE"): 5026 this.set("cycle", False) 5027 5028 if not identity: 5029 this.set("expression", self._parse_range()) 5030 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 5031 args = self._parse_csv(self._parse_bitwise) 5032 this.set("start", seq_get(args, 0)) 5033 this.set("increment", seq_get(args, 1)) 5034 5035 self._match_r_paren() 5036 5037 return this 5038 5039 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 5040 self._match_text_seq("LENGTH") 5041 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 5042 5043 def _parse_not_constraint(self) -> t.Optional[exp.Expression]: 5044 if self._match_text_seq("NULL"): 5045 return self.expression(exp.NotNullColumnConstraint) 5046 if self._match_text_seq("CASESPECIFIC"): 5047 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 5048 if self._match_text_seq("FOR", "REPLICATION"): 5049 return self.expression(exp.NotForReplicationColumnConstraint) 5050 return None 5051 5052 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 5053 if self._match(TokenType.CONSTRAINT): 5054 this = self._parse_id_var() 5055 else: 5056 this = None 5057 5058 if self._match_texts(self.CONSTRAINT_PARSERS): 5059 return self.expression( 5060 exp.ColumnConstraint, 5061 this=this, 5062 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 5063 ) 5064 5065 return this 5066 5067 def _parse_constraint(self) -> t.Optional[exp.Expression]: 5068 if not self._match(TokenType.CONSTRAINT): 5069 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 5070 5071 return self.expression( 5072 exp.Constraint, 5073 this=self._parse_id_var(), 5074 expressions=self._parse_unnamed_constraints(), 5075 ) 5076 5077 def _parse_unnamed_constraints(self) -> t.List[exp.Expression]: 5078 constraints = [] 5079 while True: 5080 constraint = self._parse_unnamed_constraint() or self._parse_function() 5081 if not constraint: 5082 break 5083 constraints.append(constraint) 5084 5085 return constraints 5086 5087 def _parse_unnamed_constraint( 5088 self, constraints: t.Optional[t.Collection[str]] = None 5089 ) -> t.Optional[exp.Expression]: 5090 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 5091 constraints or self.CONSTRAINT_PARSERS 5092 ): 5093 return None 5094 5095 constraint = self._prev.text.upper() 5096 if constraint not in self.CONSTRAINT_PARSERS: 5097 self.raise_error(f"No parser found for schema constraint {constraint}.") 5098 5099 return self.CONSTRAINT_PARSERS[constraint](self) 5100 5101 def _parse_unique(self) -> exp.UniqueColumnConstraint: 5102 self._match_text_seq("KEY") 5103 return self.expression( 5104 exp.UniqueColumnConstraint, 5105 this=self._parse_schema(self._parse_id_var(any_token=False)), 5106 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 5107 on_conflict=self._parse_on_conflict(), 5108 ) 5109 5110 def _parse_key_constraint_options(self) -> t.List[str]: 5111 options = [] 5112 while True: 5113 if not self._curr: 5114 break 5115 5116 if self._match(TokenType.ON): 5117 action = None 5118 on = self._advance_any() and self._prev.text 5119 5120 if self._match_text_seq("NO", "ACTION"): 5121 action = "NO ACTION" 5122 elif self._match_text_seq("CASCADE"): 5123 action = "CASCADE" 5124 elif self._match_text_seq("RESTRICT"): 5125 action = "RESTRICT" 5126 elif self._match_pair(TokenType.SET, TokenType.NULL): 5127 action = "SET NULL" 5128 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 5129 action = "SET DEFAULT" 5130 else: 5131 self.raise_error("Invalid key constraint") 5132 5133 options.append(f"ON {on} {action}") 5134 elif self._match_text_seq("NOT", "ENFORCED"): 5135 options.append("NOT ENFORCED") 5136 elif self._match_text_seq("DEFERRABLE"): 5137 options.append("DEFERRABLE") 5138 elif self._match_text_seq("INITIALLY", "DEFERRED"): 5139 options.append("INITIALLY DEFERRED") 5140 elif self._match_text_seq("NORELY"): 5141 options.append("NORELY") 5142 elif self._match_text_seq("MATCH", "FULL"): 5143 options.append("MATCH FULL") 5144 else: 5145 break 5146 5147 return options 5148 5149 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 5150 if match and not self._match(TokenType.REFERENCES): 5151 return None 5152 5153 expressions = None 5154 this = self._parse_table(schema=True) 5155 options = self._parse_key_constraint_options() 5156 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 5157 5158 def _parse_foreign_key(self) -> exp.ForeignKey: 5159 expressions = self._parse_wrapped_id_vars() 5160 reference = self._parse_references() 5161 options = {} 5162 5163 while self._match(TokenType.ON): 5164 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 5165 self.raise_error("Expected DELETE or UPDATE") 5166 5167 kind = self._prev.text.lower() 5168 5169 if self._match_text_seq("NO", "ACTION"): 5170 action = "NO ACTION" 5171 elif self._match(TokenType.SET): 5172 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 5173 action = "SET " + self._prev.text.upper() 5174 else: 5175 self._advance() 5176 action = self._prev.text.upper() 5177 5178 options[kind] = action 5179 5180 return self.expression( 5181 exp.ForeignKey, 5182 expressions=expressions, 5183 reference=reference, 5184 **options, # type: ignore 5185 ) 5186 5187 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 5188 return self._parse_field() 5189 5190 def _parse_period_for_system_time(self) -> t.Optional[exp.PeriodForSystemTimeConstraint]: 5191 if not self._match(TokenType.TIMESTAMP_SNAPSHOT): 5192 self._retreat(self._index - 1) 5193 return None 5194 5195 id_vars = self._parse_wrapped_id_vars() 5196 return self.expression( 5197 exp.PeriodForSystemTimeConstraint, 5198 this=seq_get(id_vars, 0), 5199 expression=seq_get(id_vars, 1), 5200 ) 5201 5202 def _parse_primary_key( 5203 self, wrapped_optional: bool = False, in_props: bool = False 5204 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 5205 desc = ( 5206 self._match_set((TokenType.ASC, TokenType.DESC)) 5207 and self._prev.token_type == TokenType.DESC 5208 ) 5209 5210 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 5211 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 5212 5213 expressions = self._parse_wrapped_csv( 5214 self._parse_primary_key_part, optional=wrapped_optional 5215 ) 5216 options = self._parse_key_constraint_options() 5217 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 5218 5219 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 5220 return self._parse_slice(self._parse_alias(self._parse_assignment(), explicit=True)) 5221 5222 def _parse_bracket(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 5223 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 5224 return this 5225 5226 bracket_kind = self._prev.token_type 5227 expressions = self._parse_csv( 5228 lambda: self._parse_bracket_key_value(is_map=bracket_kind == TokenType.L_BRACE) 5229 ) 5230 5231 if bracket_kind == TokenType.L_BRACKET and not self._match(TokenType.R_BRACKET): 5232 self.raise_error("Expected ]") 5233 elif bracket_kind == TokenType.L_BRACE and not self._match(TokenType.R_BRACE): 5234 self.raise_error("Expected }") 5235 5236 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 5237 if bracket_kind == TokenType.L_BRACE: 5238 this = self.expression(exp.Struct, expressions=self._kv_to_prop_eq(expressions)) 5239 elif not this: 5240 this = self.expression(exp.Array, expressions=expressions) 5241 else: 5242 constructor_type = self.ARRAY_CONSTRUCTORS.get(this.name.upper()) 5243 if constructor_type: 5244 return self.expression(constructor_type, expressions=expressions) 5245 5246 expressions = apply_index_offset(this, expressions, -self.dialect.INDEX_OFFSET) 5247 this = self.expression(exp.Bracket, this=this, expressions=expressions) 5248 5249 self._add_comments(this) 5250 return self._parse_bracket(this) 5251 5252 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5253 if self._match(TokenType.COLON): 5254 return self.expression(exp.Slice, this=this, expression=self._parse_assignment()) 5255 return this 5256 5257 def _parse_case(self) -> t.Optional[exp.Expression]: 5258 ifs = [] 5259 default = None 5260 5261 comments = self._prev_comments 5262 expression = self._parse_assignment() 5263 5264 while self._match(TokenType.WHEN): 5265 this = self._parse_assignment() 5266 self._match(TokenType.THEN) 5267 then = self._parse_assignment() 5268 ifs.append(self.expression(exp.If, this=this, true=then)) 5269 5270 if self._match(TokenType.ELSE): 5271 default = self._parse_assignment() 5272 5273 if not self._match(TokenType.END): 5274 if isinstance(default, exp.Interval) and default.this.sql().upper() == "END": 5275 default = exp.column("interval") 5276 else: 5277 self.raise_error("Expected END after CASE", self._prev) 5278 5279 return self.expression( 5280 exp.Case, comments=comments, this=expression, ifs=ifs, default=default 5281 ) 5282 5283 def _parse_if(self) -> t.Optional[exp.Expression]: 5284 if self._match(TokenType.L_PAREN): 5285 args = self._parse_csv(self._parse_assignment) 5286 this = self.validate_expression(exp.If.from_arg_list(args), args) 5287 self._match_r_paren() 5288 else: 5289 index = self._index - 1 5290 5291 if self.NO_PAREN_IF_COMMANDS and index == 0: 5292 return self._parse_as_command(self._prev) 5293 5294 condition = self._parse_assignment() 5295 5296 if not condition: 5297 self._retreat(index) 5298 return None 5299 5300 self._match(TokenType.THEN) 5301 true = self._parse_assignment() 5302 false = self._parse_assignment() if self._match(TokenType.ELSE) else None 5303 self._match(TokenType.END) 5304 this = self.expression(exp.If, this=condition, true=true, false=false) 5305 5306 return this 5307 5308 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 5309 if not self._match_text_seq("VALUE", "FOR"): 5310 self._retreat(self._index - 1) 5311 return None 5312 5313 return self.expression( 5314 exp.NextValueFor, 5315 this=self._parse_column(), 5316 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 5317 ) 5318 5319 def _parse_extract(self) -> exp.Extract: 5320 this = self._parse_function() or self._parse_var() or self._parse_type() 5321 5322 if self._match(TokenType.FROM): 5323 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 5324 5325 if not self._match(TokenType.COMMA): 5326 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 5327 5328 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 5329 5330 def _parse_gap_fill(self) -> exp.GapFill: 5331 self._match(TokenType.TABLE) 5332 this = self._parse_table() 5333 5334 self._match(TokenType.COMMA) 5335 args = [this, *self._parse_csv(self._parse_lambda)] 5336 5337 gap_fill = exp.GapFill.from_arg_list(args) 5338 return self.validate_expression(gap_fill, args) 5339 5340 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 5341 this = self._parse_assignment() 5342 5343 if not self._match(TokenType.ALIAS): 5344 if self._match(TokenType.COMMA): 5345 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 5346 5347 self.raise_error("Expected AS after CAST") 5348 5349 fmt = None 5350 to = self._parse_types() 5351 5352 if self._match(TokenType.FORMAT): 5353 fmt_string = self._parse_string() 5354 fmt = self._parse_at_time_zone(fmt_string) 5355 5356 if not to: 5357 to = exp.DataType.build(exp.DataType.Type.UNKNOWN) 5358 if to.this in exp.DataType.TEMPORAL_TYPES: 5359 this = self.expression( 5360 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 5361 this=this, 5362 format=exp.Literal.string( 5363 format_time( 5364 fmt_string.this if fmt_string else "", 5365 self.dialect.FORMAT_MAPPING or self.dialect.TIME_MAPPING, 5366 self.dialect.FORMAT_TRIE or self.dialect.TIME_TRIE, 5367 ) 5368 ), 5369 ) 5370 5371 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 5372 this.set("zone", fmt.args["zone"]) 5373 return this 5374 elif not to: 5375 self.raise_error("Expected TYPE after CAST") 5376 elif isinstance(to, exp.Identifier): 5377 to = exp.DataType.build(to.name, udt=True) 5378 elif to.this == exp.DataType.Type.CHAR: 5379 if self._match(TokenType.CHARACTER_SET): 5380 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 5381 5382 return self.expression( 5383 exp.Cast if strict else exp.TryCast, 5384 this=this, 5385 to=to, 5386 format=fmt, 5387 safe=safe, 5388 action=self._parse_var_from_options(self.CAST_ACTIONS, raise_unmatched=False), 5389 ) 5390 5391 def _parse_string_agg(self) -> exp.Expression: 5392 if self._match(TokenType.DISTINCT): 5393 args: t.List[t.Optional[exp.Expression]] = [ 5394 self.expression(exp.Distinct, expressions=[self._parse_assignment()]) 5395 ] 5396 if self._match(TokenType.COMMA): 5397 args.extend(self._parse_csv(self._parse_assignment)) 5398 else: 5399 args = self._parse_csv(self._parse_assignment) # type: ignore 5400 5401 index = self._index 5402 if not self._match(TokenType.R_PAREN) and args: 5403 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 5404 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 5405 args[-1] = self._parse_limit(this=self._parse_order(this=args[-1])) 5406 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 5407 5408 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 5409 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 5410 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 5411 if not self._match_text_seq("WITHIN", "GROUP"): 5412 self._retreat(index) 5413 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 5414 5415 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 5416 order = self._parse_order(this=seq_get(args, 0)) 5417 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 5418 5419 def _parse_convert( 5420 self, strict: bool, safe: t.Optional[bool] = None 5421 ) -> t.Optional[exp.Expression]: 5422 this = self._parse_bitwise() 5423 5424 if self._match(TokenType.USING): 5425 to: t.Optional[exp.Expression] = self.expression( 5426 exp.CharacterSet, this=self._parse_var() 5427 ) 5428 elif self._match(TokenType.COMMA): 5429 to = self._parse_types() 5430 else: 5431 to = None 5432 5433 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, safe=safe) 5434 5435 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 5436 """ 5437 There are generally two variants of the DECODE function: 5438 5439 - DECODE(bin, charset) 5440 - DECODE(expression, search, result [, search, result] ... [, default]) 5441 5442 The second variant will always be parsed into a CASE expression. Note that NULL 5443 needs special treatment, since we need to explicitly check for it with `IS NULL`, 5444 instead of relying on pattern matching. 5445 """ 5446 args = self._parse_csv(self._parse_assignment) 5447 5448 if len(args) < 3: 5449 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 5450 5451 expression, *expressions = args 5452 if not expression: 5453 return None 5454 5455 ifs = [] 5456 for search, result in zip(expressions[::2], expressions[1::2]): 5457 if not search or not result: 5458 return None 5459 5460 if isinstance(search, exp.Literal): 5461 ifs.append( 5462 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 5463 ) 5464 elif isinstance(search, exp.Null): 5465 ifs.append( 5466 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 5467 ) 5468 else: 5469 cond = exp.or_( 5470 exp.EQ(this=expression.copy(), expression=search), 5471 exp.and_( 5472 exp.Is(this=expression.copy(), expression=exp.Null()), 5473 exp.Is(this=search.copy(), expression=exp.Null()), 5474 copy=False, 5475 ), 5476 copy=False, 5477 ) 5478 ifs.append(exp.If(this=cond, true=result)) 5479 5480 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 5481 5482 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 5483 self._match_text_seq("KEY") 5484 key = self._parse_column() 5485 self._match_set(self.JSON_KEY_VALUE_SEPARATOR_TOKENS) 5486 self._match_text_seq("VALUE") 5487 value = self._parse_bitwise() 5488 5489 if not key and not value: 5490 return None 5491 return self.expression(exp.JSONKeyValue, this=key, expression=value) 5492 5493 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5494 if not this or not self._match_text_seq("FORMAT", "JSON"): 5495 return this 5496 5497 return self.expression(exp.FormatJson, this=this) 5498 5499 def _parse_on_handling(self, on: str, *values: str) -> t.Optional[str]: 5500 # Parses the "X ON Y" syntax, i.e. NULL ON NULL (Oracle, T-SQL) 5501 for value in values: 5502 if self._match_text_seq(value, "ON", on): 5503 return f"{value} ON {on}" 5504 5505 return None 5506 5507 @t.overload 5508 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 5509 5510 @t.overload 5511 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 5512 5513 def _parse_json_object(self, agg=False): 5514 star = self._parse_star() 5515 expressions = ( 5516 [star] 5517 if star 5518 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 5519 ) 5520 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 5521 5522 unique_keys = None 5523 if self._match_text_seq("WITH", "UNIQUE"): 5524 unique_keys = True 5525 elif self._match_text_seq("WITHOUT", "UNIQUE"): 5526 unique_keys = False 5527 5528 self._match_text_seq("KEYS") 5529 5530 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 5531 self._parse_type() 5532 ) 5533 encoding = self._match_text_seq("ENCODING") and self._parse_var() 5534 5535 return self.expression( 5536 exp.JSONObjectAgg if agg else exp.JSONObject, 5537 expressions=expressions, 5538 null_handling=null_handling, 5539 unique_keys=unique_keys, 5540 return_type=return_type, 5541 encoding=encoding, 5542 ) 5543 5544 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 5545 def _parse_json_column_def(self) -> exp.JSONColumnDef: 5546 if not self._match_text_seq("NESTED"): 5547 this = self._parse_id_var() 5548 kind = self._parse_types(allow_identifiers=False) 5549 nested = None 5550 else: 5551 this = None 5552 kind = None 5553 nested = True 5554 5555 path = self._match_text_seq("PATH") and self._parse_string() 5556 nested_schema = nested and self._parse_json_schema() 5557 5558 return self.expression( 5559 exp.JSONColumnDef, 5560 this=this, 5561 kind=kind, 5562 path=path, 5563 nested_schema=nested_schema, 5564 ) 5565 5566 def _parse_json_schema(self) -> exp.JSONSchema: 5567 self._match_text_seq("COLUMNS") 5568 return self.expression( 5569 exp.JSONSchema, 5570 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 5571 ) 5572 5573 def _parse_json_table(self) -> exp.JSONTable: 5574 this = self._parse_format_json(self._parse_bitwise()) 5575 path = self._match(TokenType.COMMA) and self._parse_string() 5576 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 5577 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 5578 schema = self._parse_json_schema() 5579 5580 return exp.JSONTable( 5581 this=this, 5582 schema=schema, 5583 path=path, 5584 error_handling=error_handling, 5585 empty_handling=empty_handling, 5586 ) 5587 5588 def _parse_match_against(self) -> exp.MatchAgainst: 5589 expressions = self._parse_csv(self._parse_column) 5590 5591 self._match_text_seq(")", "AGAINST", "(") 5592 5593 this = self._parse_string() 5594 5595 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 5596 modifier = "IN NATURAL LANGUAGE MODE" 5597 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 5598 modifier = f"{modifier} WITH QUERY EXPANSION" 5599 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 5600 modifier = "IN BOOLEAN MODE" 5601 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 5602 modifier = "WITH QUERY EXPANSION" 5603 else: 5604 modifier = None 5605 5606 return self.expression( 5607 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 5608 ) 5609 5610 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 5611 def _parse_open_json(self) -> exp.OpenJSON: 5612 this = self._parse_bitwise() 5613 path = self._match(TokenType.COMMA) and self._parse_string() 5614 5615 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 5616 this = self._parse_field(any_token=True) 5617 kind = self._parse_types() 5618 path = self._parse_string() 5619 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 5620 5621 return self.expression( 5622 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 5623 ) 5624 5625 expressions = None 5626 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 5627 self._match_l_paren() 5628 expressions = self._parse_csv(_parse_open_json_column_def) 5629 5630 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 5631 5632 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 5633 args = self._parse_csv(self._parse_bitwise) 5634 5635 if self._match(TokenType.IN): 5636 return self.expression( 5637 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 5638 ) 5639 5640 if haystack_first: 5641 haystack = seq_get(args, 0) 5642 needle = seq_get(args, 1) 5643 else: 5644 needle = seq_get(args, 0) 5645 haystack = seq_get(args, 1) 5646 5647 return self.expression( 5648 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 5649 ) 5650 5651 def _parse_predict(self) -> exp.Predict: 5652 self._match_text_seq("MODEL") 5653 this = self._parse_table() 5654 5655 self._match(TokenType.COMMA) 5656 self._match_text_seq("TABLE") 5657 5658 return self.expression( 5659 exp.Predict, 5660 this=this, 5661 expression=self._parse_table(), 5662 params_struct=self._match(TokenType.COMMA) and self._parse_bitwise(), 5663 ) 5664 5665 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 5666 args = self._parse_csv(self._parse_table) 5667 return exp.JoinHint(this=func_name.upper(), expressions=args) 5668 5669 def _parse_substring(self) -> exp.Substring: 5670 # Postgres supports the form: substring(string [from int] [for int]) 5671 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 5672 5673 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 5674 5675 if self._match(TokenType.FROM): 5676 args.append(self._parse_bitwise()) 5677 if self._match(TokenType.FOR): 5678 if len(args) == 1: 5679 args.append(exp.Literal.number(1)) 5680 args.append(self._parse_bitwise()) 5681 5682 return self.validate_expression(exp.Substring.from_arg_list(args), args) 5683 5684 def _parse_trim(self) -> exp.Trim: 5685 # https://www.w3resource.com/sql/character-functions/trim.php 5686 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 5687 5688 position = None 5689 collation = None 5690 expression = None 5691 5692 if self._match_texts(self.TRIM_TYPES): 5693 position = self._prev.text.upper() 5694 5695 this = self._parse_bitwise() 5696 if self._match_set((TokenType.FROM, TokenType.COMMA)): 5697 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 5698 expression = self._parse_bitwise() 5699 5700 if invert_order: 5701 this, expression = expression, this 5702 5703 if self._match(TokenType.COLLATE): 5704 collation = self._parse_bitwise() 5705 5706 return self.expression( 5707 exp.Trim, this=this, position=position, expression=expression, collation=collation 5708 ) 5709 5710 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 5711 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 5712 5713 def _parse_named_window(self) -> t.Optional[exp.Expression]: 5714 return self._parse_window(self._parse_id_var(), alias=True) 5715 5716 def _parse_respect_or_ignore_nulls( 5717 self, this: t.Optional[exp.Expression] 5718 ) -> t.Optional[exp.Expression]: 5719 if self._match_text_seq("IGNORE", "NULLS"): 5720 return self.expression(exp.IgnoreNulls, this=this) 5721 if self._match_text_seq("RESPECT", "NULLS"): 5722 return self.expression(exp.RespectNulls, this=this) 5723 return this 5724 5725 def _parse_having_max(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5726 if self._match(TokenType.HAVING): 5727 self._match_texts(("MAX", "MIN")) 5728 max = self._prev.text.upper() != "MIN" 5729 return self.expression( 5730 exp.HavingMax, this=this, expression=self._parse_column(), max=max 5731 ) 5732 5733 return this 5734 5735 def _parse_window( 5736 self, this: t.Optional[exp.Expression], alias: bool = False 5737 ) -> t.Optional[exp.Expression]: 5738 func = this 5739 comments = func.comments if isinstance(func, exp.Expression) else None 5740 5741 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 5742 self._match(TokenType.WHERE) 5743 this = self.expression( 5744 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 5745 ) 5746 self._match_r_paren() 5747 5748 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 5749 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 5750 if self._match_text_seq("WITHIN", "GROUP"): 5751 order = self._parse_wrapped(self._parse_order) 5752 this = self.expression(exp.WithinGroup, this=this, expression=order) 5753 5754 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 5755 # Some dialects choose to implement and some do not. 5756 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 5757 5758 # There is some code above in _parse_lambda that handles 5759 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 5760 5761 # The below changes handle 5762 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 5763 5764 # Oracle allows both formats 5765 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 5766 # and Snowflake chose to do the same for familiarity 5767 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 5768 if isinstance(this, exp.AggFunc): 5769 ignore_respect = this.find(exp.IgnoreNulls, exp.RespectNulls) 5770 5771 if ignore_respect and ignore_respect is not this: 5772 ignore_respect.replace(ignore_respect.this) 5773 this = self.expression(ignore_respect.__class__, this=this) 5774 5775 this = self._parse_respect_or_ignore_nulls(this) 5776 5777 # bigquery select from window x AS (partition by ...) 5778 if alias: 5779 over = None 5780 self._match(TokenType.ALIAS) 5781 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 5782 return this 5783 else: 5784 over = self._prev.text.upper() 5785 5786 if comments and isinstance(func, exp.Expression): 5787 func.pop_comments() 5788 5789 if not self._match(TokenType.L_PAREN): 5790 return self.expression( 5791 exp.Window, 5792 comments=comments, 5793 this=this, 5794 alias=self._parse_id_var(False), 5795 over=over, 5796 ) 5797 5798 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 5799 5800 first = self._match(TokenType.FIRST) 5801 if self._match_text_seq("LAST"): 5802 first = False 5803 5804 partition, order = self._parse_partition_and_order() 5805 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 5806 5807 if kind: 5808 self._match(TokenType.BETWEEN) 5809 start = self._parse_window_spec() 5810 self._match(TokenType.AND) 5811 end = self._parse_window_spec() 5812 5813 spec = self.expression( 5814 exp.WindowSpec, 5815 kind=kind, 5816 start=start["value"], 5817 start_side=start["side"], 5818 end=end["value"], 5819 end_side=end["side"], 5820 ) 5821 else: 5822 spec = None 5823 5824 self._match_r_paren() 5825 5826 window = self.expression( 5827 exp.Window, 5828 comments=comments, 5829 this=this, 5830 partition_by=partition, 5831 order=order, 5832 spec=spec, 5833 alias=window_alias, 5834 over=over, 5835 first=first, 5836 ) 5837 5838 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 5839 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 5840 return self._parse_window(window, alias=alias) 5841 5842 return window 5843 5844 def _parse_partition_and_order( 5845 self, 5846 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 5847 return self._parse_partition_by(), self._parse_order() 5848 5849 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 5850 self._match(TokenType.BETWEEN) 5851 5852 return { 5853 "value": ( 5854 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 5855 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 5856 or self._parse_bitwise() 5857 ), 5858 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 5859 } 5860 5861 def _parse_alias( 5862 self, this: t.Optional[exp.Expression], explicit: bool = False 5863 ) -> t.Optional[exp.Expression]: 5864 any_token = self._match(TokenType.ALIAS) 5865 comments = self._prev_comments or [] 5866 5867 if explicit and not any_token: 5868 return this 5869 5870 if self._match(TokenType.L_PAREN): 5871 aliases = self.expression( 5872 exp.Aliases, 5873 comments=comments, 5874 this=this, 5875 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 5876 ) 5877 self._match_r_paren(aliases) 5878 return aliases 5879 5880 alias = self._parse_id_var(any_token, tokens=self.ALIAS_TOKENS) or ( 5881 self.STRING_ALIASES and self._parse_string_as_identifier() 5882 ) 5883 5884 if alias: 5885 comments.extend(alias.pop_comments()) 5886 this = self.expression(exp.Alias, comments=comments, this=this, alias=alias) 5887 column = this.this 5888 5889 # Moves the comment next to the alias in `expr /* comment */ AS alias` 5890 if not this.comments and column and column.comments: 5891 this.comments = column.pop_comments() 5892 5893 return this 5894 5895 def _parse_id_var( 5896 self, 5897 any_token: bool = True, 5898 tokens: t.Optional[t.Collection[TokenType]] = None, 5899 ) -> t.Optional[exp.Expression]: 5900 expression = self._parse_identifier() 5901 if not expression and ( 5902 (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS) 5903 ): 5904 quoted = self._prev.token_type == TokenType.STRING 5905 expression = self.expression(exp.Identifier, this=self._prev.text, quoted=quoted) 5906 5907 return expression 5908 5909 def _parse_string(self) -> t.Optional[exp.Expression]: 5910 if self._match_set(self.STRING_PARSERS): 5911 return self.STRING_PARSERS[self._prev.token_type](self, self._prev) 5912 return self._parse_placeholder() 5913 5914 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 5915 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 5916 5917 def _parse_number(self) -> t.Optional[exp.Expression]: 5918 if self._match_set(self.NUMERIC_PARSERS): 5919 return self.NUMERIC_PARSERS[self._prev.token_type](self, self._prev) 5920 return self._parse_placeholder() 5921 5922 def _parse_identifier(self) -> t.Optional[exp.Expression]: 5923 if self._match(TokenType.IDENTIFIER): 5924 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 5925 return self._parse_placeholder() 5926 5927 def _parse_var( 5928 self, 5929 any_token: bool = False, 5930 tokens: t.Optional[t.Collection[TokenType]] = None, 5931 upper: bool = False, 5932 ) -> t.Optional[exp.Expression]: 5933 if ( 5934 (any_token and self._advance_any()) 5935 or self._match(TokenType.VAR) 5936 or (self._match_set(tokens) if tokens else False) 5937 ): 5938 return self.expression( 5939 exp.Var, this=self._prev.text.upper() if upper else self._prev.text 5940 ) 5941 return self._parse_placeholder() 5942 5943 def _advance_any(self, ignore_reserved: bool = False) -> t.Optional[Token]: 5944 if self._curr and (ignore_reserved or self._curr.token_type not in self.RESERVED_TOKENS): 5945 self._advance() 5946 return self._prev 5947 return None 5948 5949 def _parse_var_or_string(self) -> t.Optional[exp.Expression]: 5950 return self._parse_var() or self._parse_string() 5951 5952 def _parse_primary_or_var(self) -> t.Optional[exp.Expression]: 5953 return self._parse_primary() or self._parse_var(any_token=True) 5954 5955 def _parse_null(self) -> t.Optional[exp.Expression]: 5956 if self._match_set(self.NULL_TOKENS): 5957 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 5958 return self._parse_placeholder() 5959 5960 def _parse_boolean(self) -> t.Optional[exp.Expression]: 5961 if self._match(TokenType.TRUE): 5962 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 5963 if self._match(TokenType.FALSE): 5964 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 5965 return self._parse_placeholder() 5966 5967 def _parse_star(self) -> t.Optional[exp.Expression]: 5968 if self._match(TokenType.STAR): 5969 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 5970 return self._parse_placeholder() 5971 5972 def _parse_parameter(self) -> exp.Parameter: 5973 this = self._parse_identifier() or self._parse_primary_or_var() 5974 return self.expression(exp.Parameter, this=this) 5975 5976 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 5977 if self._match_set(self.PLACEHOLDER_PARSERS): 5978 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 5979 if placeholder: 5980 return placeholder 5981 self._advance(-1) 5982 return None 5983 5984 def _parse_star_op(self, *keywords: str) -> t.Optional[t.List[exp.Expression]]: 5985 if not self._match_texts(keywords): 5986 return None 5987 if self._match(TokenType.L_PAREN, advance=False): 5988 return self._parse_wrapped_csv(self._parse_expression) 5989 5990 expression = self._parse_expression() 5991 return [expression] if expression else None 5992 5993 def _parse_csv( 5994 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 5995 ) -> t.List[exp.Expression]: 5996 parse_result = parse_method() 5997 items = [parse_result] if parse_result is not None else [] 5998 5999 while self._match(sep): 6000 self._add_comments(parse_result) 6001 parse_result = parse_method() 6002 if parse_result is not None: 6003 items.append(parse_result) 6004 6005 return items 6006 6007 def _parse_tokens( 6008 self, parse_method: t.Callable, expressions: t.Dict 6009 ) -> t.Optional[exp.Expression]: 6010 this = parse_method() 6011 6012 while self._match_set(expressions): 6013 this = self.expression( 6014 expressions[self._prev.token_type], 6015 this=this, 6016 comments=self._prev_comments, 6017 expression=parse_method(), 6018 ) 6019 6020 return this 6021 6022 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 6023 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 6024 6025 def _parse_wrapped_csv( 6026 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 6027 ) -> t.List[exp.Expression]: 6028 return self._parse_wrapped( 6029 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 6030 ) 6031 6032 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 6033 wrapped = self._match(TokenType.L_PAREN) 6034 if not wrapped and not optional: 6035 self.raise_error("Expecting (") 6036 parse_result = parse_method() 6037 if wrapped: 6038 self._match_r_paren() 6039 return parse_result 6040 6041 def _parse_expressions(self) -> t.List[exp.Expression]: 6042 return self._parse_csv(self._parse_expression) 6043 6044 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 6045 return self._parse_select() or self._parse_set_operations( 6046 self._parse_expression() if alias else self._parse_assignment() 6047 ) 6048 6049 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 6050 return self._parse_query_modifiers( 6051 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 6052 ) 6053 6054 def _parse_transaction(self) -> exp.Transaction | exp.Command: 6055 this = None 6056 if self._match_texts(self.TRANSACTION_KIND): 6057 this = self._prev.text 6058 6059 self._match_texts(("TRANSACTION", "WORK")) 6060 6061 modes = [] 6062 while True: 6063 mode = [] 6064 while self._match(TokenType.VAR): 6065 mode.append(self._prev.text) 6066 6067 if mode: 6068 modes.append(" ".join(mode)) 6069 if not self._match(TokenType.COMMA): 6070 break 6071 6072 return self.expression(exp.Transaction, this=this, modes=modes) 6073 6074 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 6075 chain = None 6076 savepoint = None 6077 is_rollback = self._prev.token_type == TokenType.ROLLBACK 6078 6079 self._match_texts(("TRANSACTION", "WORK")) 6080 6081 if self._match_text_seq("TO"): 6082 self._match_text_seq("SAVEPOINT") 6083 savepoint = self._parse_id_var() 6084 6085 if self._match(TokenType.AND): 6086 chain = not self._match_text_seq("NO") 6087 self._match_text_seq("CHAIN") 6088 6089 if is_rollback: 6090 return self.expression(exp.Rollback, savepoint=savepoint) 6091 6092 return self.expression(exp.Commit, chain=chain) 6093 6094 def _parse_refresh(self) -> exp.Refresh: 6095 self._match(TokenType.TABLE) 6096 return self.expression(exp.Refresh, this=self._parse_string() or self._parse_table()) 6097 6098 def _parse_add_column(self) -> t.Optional[exp.Expression]: 6099 if not self._match_text_seq("ADD"): 6100 return None 6101 6102 self._match(TokenType.COLUMN) 6103 exists_column = self._parse_exists(not_=True) 6104 expression = self._parse_field_def() 6105 6106 if expression: 6107 expression.set("exists", exists_column) 6108 6109 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 6110 if self._match_texts(("FIRST", "AFTER")): 6111 position = self._prev.text 6112 column_position = self.expression( 6113 exp.ColumnPosition, this=self._parse_column(), position=position 6114 ) 6115 expression.set("position", column_position) 6116 6117 return expression 6118 6119 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 6120 drop = self._match(TokenType.DROP) and self._parse_drop() 6121 if drop and not isinstance(drop, exp.Command): 6122 drop.set("kind", drop.args.get("kind", "COLUMN")) 6123 return drop 6124 6125 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 6126 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 6127 return self.expression( 6128 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 6129 ) 6130 6131 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 6132 index = self._index - 1 6133 6134 if self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False): 6135 return self._parse_csv( 6136 lambda: self.expression( 6137 exp.AddConstraint, expressions=self._parse_csv(self._parse_constraint) 6138 ) 6139 ) 6140 6141 self._retreat(index) 6142 if not self.ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN and self._match_text_seq("ADD"): 6143 return self._parse_wrapped_csv(self._parse_field_def, optional=True) 6144 return self._parse_wrapped_csv(self._parse_add_column, optional=True) 6145 6146 def _parse_alter_table_alter(self) -> t.Optional[exp.Expression]: 6147 if self._match_texts(self.ALTER_ALTER_PARSERS): 6148 return self.ALTER_ALTER_PARSERS[self._prev.text.upper()](self) 6149 6150 # Many dialects support the ALTER [COLUMN] syntax, so if there is no 6151 # keyword after ALTER we default to parsing this statement 6152 self._match(TokenType.COLUMN) 6153 column = self._parse_field(any_token=True) 6154 6155 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 6156 return self.expression(exp.AlterColumn, this=column, drop=True) 6157 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 6158 return self.expression(exp.AlterColumn, this=column, default=self._parse_assignment()) 6159 if self._match(TokenType.COMMENT): 6160 return self.expression(exp.AlterColumn, this=column, comment=self._parse_string()) 6161 if self._match_text_seq("DROP", "NOT", "NULL"): 6162 return self.expression( 6163 exp.AlterColumn, 6164 this=column, 6165 drop=True, 6166 allow_null=True, 6167 ) 6168 if self._match_text_seq("SET", "NOT", "NULL"): 6169 return self.expression( 6170 exp.AlterColumn, 6171 this=column, 6172 allow_null=False, 6173 ) 6174 self._match_text_seq("SET", "DATA") 6175 self._match_text_seq("TYPE") 6176 return self.expression( 6177 exp.AlterColumn, 6178 this=column, 6179 dtype=self._parse_types(), 6180 collate=self._match(TokenType.COLLATE) and self._parse_term(), 6181 using=self._match(TokenType.USING) and self._parse_assignment(), 6182 ) 6183 6184 def _parse_alter_diststyle(self) -> exp.AlterDistStyle: 6185 if self._match_texts(("ALL", "EVEN", "AUTO")): 6186 return self.expression(exp.AlterDistStyle, this=exp.var(self._prev.text.upper())) 6187 6188 self._match_text_seq("KEY", "DISTKEY") 6189 return self.expression(exp.AlterDistStyle, this=self._parse_column()) 6190 6191 def _parse_alter_sortkey(self, compound: t.Optional[bool] = None) -> exp.AlterSortKey: 6192 if compound: 6193 self._match_text_seq("SORTKEY") 6194 6195 if self._match(TokenType.L_PAREN, advance=False): 6196 return self.expression( 6197 exp.AlterSortKey, expressions=self._parse_wrapped_id_vars(), compound=compound 6198 ) 6199 6200 self._match_texts(("AUTO", "NONE")) 6201 return self.expression( 6202 exp.AlterSortKey, this=exp.var(self._prev.text.upper()), compound=compound 6203 ) 6204 6205 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 6206 index = self._index - 1 6207 6208 partition_exists = self._parse_exists() 6209 if self._match(TokenType.PARTITION, advance=False): 6210 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 6211 6212 self._retreat(index) 6213 return self._parse_csv(self._parse_drop_column) 6214 6215 def _parse_alter_table_rename(self) -> t.Optional[exp.RenameTable | exp.RenameColumn]: 6216 if self._match(TokenType.COLUMN): 6217 exists = self._parse_exists() 6218 old_column = self._parse_column() 6219 to = self._match_text_seq("TO") 6220 new_column = self._parse_column() 6221 6222 if old_column is None or to is None or new_column is None: 6223 return None 6224 6225 return self.expression(exp.RenameColumn, this=old_column, to=new_column, exists=exists) 6226 6227 self._match_text_seq("TO") 6228 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 6229 6230 def _parse_alter_table_set(self) -> exp.AlterSet: 6231 alter_set = self.expression(exp.AlterSet) 6232 6233 if self._match(TokenType.L_PAREN, advance=False) or self._match_text_seq( 6234 "TABLE", "PROPERTIES" 6235 ): 6236 alter_set.set("expressions", self._parse_wrapped_csv(self._parse_assignment)) 6237 elif self._match_text_seq("FILESTREAM_ON", advance=False): 6238 alter_set.set("expressions", [self._parse_assignment()]) 6239 elif self._match_texts(("LOGGED", "UNLOGGED")): 6240 alter_set.set("option", exp.var(self._prev.text.upper())) 6241 elif self._match_text_seq("WITHOUT") and self._match_texts(("CLUSTER", "OIDS")): 6242 alter_set.set("option", exp.var(f"WITHOUT {self._prev.text.upper()}")) 6243 elif self._match_text_seq("LOCATION"): 6244 alter_set.set("location", self._parse_field()) 6245 elif self._match_text_seq("ACCESS", "METHOD"): 6246 alter_set.set("access_method", self._parse_field()) 6247 elif self._match_text_seq("TABLESPACE"): 6248 alter_set.set("tablespace", self._parse_field()) 6249 elif self._match_text_seq("FILE", "FORMAT") or self._match_text_seq("FILEFORMAT"): 6250 alter_set.set("file_format", [self._parse_field()]) 6251 elif self._match_text_seq("STAGE_FILE_FORMAT"): 6252 alter_set.set("file_format", self._parse_wrapped_options()) 6253 elif self._match_text_seq("STAGE_COPY_OPTIONS"): 6254 alter_set.set("copy_options", self._parse_wrapped_options()) 6255 elif self._match_text_seq("TAG") or self._match_text_seq("TAGS"): 6256 alter_set.set("tag", self._parse_csv(self._parse_assignment)) 6257 else: 6258 if self._match_text_seq("SERDE"): 6259 alter_set.set("serde", self._parse_field()) 6260 6261 alter_set.set("expressions", [self._parse_properties()]) 6262 6263 return alter_set 6264 6265 def _parse_alter(self) -> exp.AlterTable | exp.Command: 6266 start = self._prev 6267 6268 if not self._match(TokenType.TABLE): 6269 return self._parse_as_command(start) 6270 6271 exists = self._parse_exists() 6272 only = self._match_text_seq("ONLY") 6273 this = self._parse_table(schema=True) 6274 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 6275 6276 if self._next: 6277 self._advance() 6278 6279 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 6280 if parser: 6281 actions = ensure_list(parser(self)) 6282 options = self._parse_csv(self._parse_property) 6283 6284 if not self._curr and actions: 6285 return self.expression( 6286 exp.AlterTable, 6287 this=this, 6288 exists=exists, 6289 actions=actions, 6290 only=only, 6291 options=options, 6292 cluster=cluster, 6293 ) 6294 6295 return self._parse_as_command(start) 6296 6297 def _parse_merge(self) -> exp.Merge: 6298 self._match(TokenType.INTO) 6299 target = self._parse_table() 6300 6301 if target and self._match(TokenType.ALIAS, advance=False): 6302 target.set("alias", self._parse_table_alias()) 6303 6304 self._match(TokenType.USING) 6305 using = self._parse_table() 6306 6307 self._match(TokenType.ON) 6308 on = self._parse_assignment() 6309 6310 return self.expression( 6311 exp.Merge, 6312 this=target, 6313 using=using, 6314 on=on, 6315 expressions=self._parse_when_matched(), 6316 ) 6317 6318 def _parse_when_matched(self) -> t.List[exp.When]: 6319 whens = [] 6320 6321 while self._match(TokenType.WHEN): 6322 matched = not self._match(TokenType.NOT) 6323 self._match_text_seq("MATCHED") 6324 source = ( 6325 False 6326 if self._match_text_seq("BY", "TARGET") 6327 else self._match_text_seq("BY", "SOURCE") 6328 ) 6329 condition = self._parse_assignment() if self._match(TokenType.AND) else None 6330 6331 self._match(TokenType.THEN) 6332 6333 if self._match(TokenType.INSERT): 6334 _this = self._parse_star() 6335 if _this: 6336 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=_this) 6337 else: 6338 then = self.expression( 6339 exp.Insert, 6340 this=self._parse_value(), 6341 expression=self._match_text_seq("VALUES") and self._parse_value(), 6342 ) 6343 elif self._match(TokenType.UPDATE): 6344 expressions = self._parse_star() 6345 if expressions: 6346 then = self.expression(exp.Update, expressions=expressions) 6347 else: 6348 then = self.expression( 6349 exp.Update, 6350 expressions=self._match(TokenType.SET) 6351 and self._parse_csv(self._parse_equality), 6352 ) 6353 elif self._match(TokenType.DELETE): 6354 then = self.expression(exp.Var, this=self._prev.text) 6355 else: 6356 then = None 6357 6358 whens.append( 6359 self.expression( 6360 exp.When, 6361 matched=matched, 6362 source=source, 6363 condition=condition, 6364 then=then, 6365 ) 6366 ) 6367 return whens 6368 6369 def _parse_show(self) -> t.Optional[exp.Expression]: 6370 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 6371 if parser: 6372 return parser(self) 6373 return self._parse_as_command(self._prev) 6374 6375 def _parse_set_item_assignment( 6376 self, kind: t.Optional[str] = None 6377 ) -> t.Optional[exp.Expression]: 6378 index = self._index 6379 6380 if kind in ("GLOBAL", "SESSION") and self._match_text_seq("TRANSACTION"): 6381 return self._parse_set_transaction(global_=kind == "GLOBAL") 6382 6383 left = self._parse_primary() or self._parse_column() 6384 assignment_delimiter = self._match_texts(("=", "TO")) 6385 6386 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 6387 self._retreat(index) 6388 return None 6389 6390 right = self._parse_statement() or self._parse_id_var() 6391 if isinstance(right, (exp.Column, exp.Identifier)): 6392 right = exp.var(right.name) 6393 6394 this = self.expression(exp.EQ, this=left, expression=right) 6395 return self.expression(exp.SetItem, this=this, kind=kind) 6396 6397 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 6398 self._match_text_seq("TRANSACTION") 6399 characteristics = self._parse_csv( 6400 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 6401 ) 6402 return self.expression( 6403 exp.SetItem, 6404 expressions=characteristics, 6405 kind="TRANSACTION", 6406 **{"global": global_}, # type: ignore 6407 ) 6408 6409 def _parse_set_item(self) -> t.Optional[exp.Expression]: 6410 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 6411 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 6412 6413 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 6414 index = self._index 6415 set_ = self.expression( 6416 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 6417 ) 6418 6419 if self._curr: 6420 self._retreat(index) 6421 return self._parse_as_command(self._prev) 6422 6423 return set_ 6424 6425 def _parse_var_from_options( 6426 self, options: OPTIONS_TYPE, raise_unmatched: bool = True 6427 ) -> t.Optional[exp.Var]: 6428 start = self._curr 6429 if not start: 6430 return None 6431 6432 option = start.text.upper() 6433 continuations = options.get(option) 6434 6435 index = self._index 6436 self._advance() 6437 for keywords in continuations or []: 6438 if isinstance(keywords, str): 6439 keywords = (keywords,) 6440 6441 if self._match_text_seq(*keywords): 6442 option = f"{option} {' '.join(keywords)}" 6443 break 6444 else: 6445 if continuations or continuations is None: 6446 if raise_unmatched: 6447 self.raise_error(f"Unknown option {option}") 6448 6449 self._retreat(index) 6450 return None 6451 6452 return exp.var(option) 6453 6454 def _parse_as_command(self, start: Token) -> exp.Command: 6455 while self._curr: 6456 self._advance() 6457 text = self._find_sql(start, self._prev) 6458 size = len(start.text) 6459 self._warn_unsupported() 6460 return exp.Command(this=text[:size], expression=text[size:]) 6461 6462 def _parse_dict_property(self, this: str) -> exp.DictProperty: 6463 settings = [] 6464 6465 self._match_l_paren() 6466 kind = self._parse_id_var() 6467 6468 if self._match(TokenType.L_PAREN): 6469 while True: 6470 key = self._parse_id_var() 6471 value = self._parse_primary() 6472 6473 if not key and value is None: 6474 break 6475 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 6476 self._match(TokenType.R_PAREN) 6477 6478 self._match_r_paren() 6479 6480 return self.expression( 6481 exp.DictProperty, 6482 this=this, 6483 kind=kind.this if kind else None, 6484 settings=settings, 6485 ) 6486 6487 def _parse_dict_range(self, this: str) -> exp.DictRange: 6488 self._match_l_paren() 6489 has_min = self._match_text_seq("MIN") 6490 if has_min: 6491 min = self._parse_var() or self._parse_primary() 6492 self._match_text_seq("MAX") 6493 max = self._parse_var() or self._parse_primary() 6494 else: 6495 max = self._parse_var() or self._parse_primary() 6496 min = exp.Literal.number(0) 6497 self._match_r_paren() 6498 return self.expression(exp.DictRange, this=this, min=min, max=max) 6499 6500 def _parse_comprehension( 6501 self, this: t.Optional[exp.Expression] 6502 ) -> t.Optional[exp.Comprehension]: 6503 index = self._index 6504 expression = self._parse_column() 6505 if not self._match(TokenType.IN): 6506 self._retreat(index - 1) 6507 return None 6508 iterator = self._parse_column() 6509 condition = self._parse_assignment() if self._match_text_seq("IF") else None 6510 return self.expression( 6511 exp.Comprehension, 6512 this=this, 6513 expression=expression, 6514 iterator=iterator, 6515 condition=condition, 6516 ) 6517 6518 def _parse_heredoc(self) -> t.Optional[exp.Heredoc]: 6519 if self._match(TokenType.HEREDOC_STRING): 6520 return self.expression(exp.Heredoc, this=self._prev.text) 6521 6522 if not self._match_text_seq("$"): 6523 return None 6524 6525 tags = ["$"] 6526 tag_text = None 6527 6528 if self._is_connected(): 6529 self._advance() 6530 tags.append(self._prev.text.upper()) 6531 else: 6532 self.raise_error("No closing $ found") 6533 6534 if tags[-1] != "$": 6535 if self._is_connected() and self._match_text_seq("$"): 6536 tag_text = tags[-1] 6537 tags.append("$") 6538 else: 6539 self.raise_error("No closing $ found") 6540 6541 heredoc_start = self._curr 6542 6543 while self._curr: 6544 if self._match_text_seq(*tags, advance=False): 6545 this = self._find_sql(heredoc_start, self._prev) 6546 self._advance(len(tags)) 6547 return self.expression(exp.Heredoc, this=this, tag=tag_text) 6548 6549 self._advance() 6550 6551 self.raise_error(f"No closing {''.join(tags)} found") 6552 return None 6553 6554 def _find_parser( 6555 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 6556 ) -> t.Optional[t.Callable]: 6557 if not self._curr: 6558 return None 6559 6560 index = self._index 6561 this = [] 6562 while True: 6563 # The current token might be multiple words 6564 curr = self._curr.text.upper() 6565 key = curr.split(" ") 6566 this.append(curr) 6567 6568 self._advance() 6569 result, trie = in_trie(trie, key) 6570 if result == TrieResult.FAILED: 6571 break 6572 6573 if result == TrieResult.EXISTS: 6574 subparser = parsers[" ".join(this)] 6575 return subparser 6576 6577 self._retreat(index) 6578 return None 6579 6580 def _match(self, token_type, advance=True, expression=None): 6581 if not self._curr: 6582 return None 6583 6584 if self._curr.token_type == token_type: 6585 if advance: 6586 self._advance() 6587 self._add_comments(expression) 6588 return True 6589 6590 return None 6591 6592 def _match_set(self, types, advance=True): 6593 if not self._curr: 6594 return None 6595 6596 if self._curr.token_type in types: 6597 if advance: 6598 self._advance() 6599 return True 6600 6601 return None 6602 6603 def _match_pair(self, token_type_a, token_type_b, advance=True): 6604 if not self._curr or not self._next: 6605 return None 6606 6607 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 6608 if advance: 6609 self._advance(2) 6610 return True 6611 6612 return None 6613 6614 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 6615 if not self._match(TokenType.L_PAREN, expression=expression): 6616 self.raise_error("Expecting (") 6617 6618 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 6619 if not self._match(TokenType.R_PAREN, expression=expression): 6620 self.raise_error("Expecting )") 6621 6622 def _match_texts(self, texts, advance=True): 6623 if self._curr and self._curr.text.upper() in texts: 6624 if advance: 6625 self._advance() 6626 return True 6627 return None 6628 6629 def _match_text_seq(self, *texts, advance=True): 6630 index = self._index 6631 for text in texts: 6632 if self._curr and self._curr.text.upper() == text: 6633 self._advance() 6634 else: 6635 self._retreat(index) 6636 return None 6637 6638 if not advance: 6639 self._retreat(index) 6640 6641 return True 6642 6643 def _replace_lambda( 6644 self, node: t.Optional[exp.Expression], expressions: t.List[exp.Expression] 6645 ) -> t.Optional[exp.Expression]: 6646 if not node: 6647 return node 6648 6649 lambda_types = {e.name: e.args.get("to") or False for e in expressions} 6650 6651 for column in node.find_all(exp.Column): 6652 typ = lambda_types.get(column.parts[0].name) 6653 if typ is not None: 6654 dot_or_id = column.to_dot() if column.table else column.this 6655 6656 if typ: 6657 dot_or_id = self.expression( 6658 exp.Cast, 6659 this=dot_or_id, 6660 to=typ, 6661 ) 6662 6663 parent = column.parent 6664 6665 while isinstance(parent, exp.Dot): 6666 if not isinstance(parent.parent, exp.Dot): 6667 parent.replace(dot_or_id) 6668 break 6669 parent = parent.parent 6670 else: 6671 if column is node: 6672 node = dot_or_id 6673 else: 6674 column.replace(dot_or_id) 6675 return node 6676 6677 def _parse_truncate_table(self) -> t.Optional[exp.TruncateTable] | exp.Expression: 6678 start = self._prev 6679 6680 # Not to be confused with TRUNCATE(number, decimals) function call 6681 if self._match(TokenType.L_PAREN): 6682 self._retreat(self._index - 2) 6683 return self._parse_function() 6684 6685 # Clickhouse supports TRUNCATE DATABASE as well 6686 is_database = self._match(TokenType.DATABASE) 6687 6688 self._match(TokenType.TABLE) 6689 6690 exists = self._parse_exists(not_=False) 6691 6692 expressions = self._parse_csv( 6693 lambda: self._parse_table(schema=True, is_db_reference=is_database) 6694 ) 6695 6696 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 6697 6698 if self._match_text_seq("RESTART", "IDENTITY"): 6699 identity = "RESTART" 6700 elif self._match_text_seq("CONTINUE", "IDENTITY"): 6701 identity = "CONTINUE" 6702 else: 6703 identity = None 6704 6705 if self._match_text_seq("CASCADE") or self._match_text_seq("RESTRICT"): 6706 option = self._prev.text 6707 else: 6708 option = None 6709 6710 partition = self._parse_partition() 6711 6712 # Fallback case 6713 if self._curr: 6714 return self._parse_as_command(start) 6715 6716 return self.expression( 6717 exp.TruncateTable, 6718 expressions=expressions, 6719 is_database=is_database, 6720 exists=exists, 6721 cluster=cluster, 6722 identity=identity, 6723 option=option, 6724 partition=partition, 6725 ) 6726 6727 def _parse_with_operator(self) -> t.Optional[exp.Expression]: 6728 this = self._parse_ordered(self._parse_opclass) 6729 6730 if not self._match(TokenType.WITH): 6731 return this 6732 6733 op = self._parse_var(any_token=True) 6734 6735 return self.expression(exp.WithOperator, this=this, op=op) 6736 6737 def _parse_wrapped_options(self) -> t.List[t.Optional[exp.Expression]]: 6738 self._match(TokenType.EQ) 6739 self._match(TokenType.L_PAREN) 6740 6741 opts: t.List[t.Optional[exp.Expression]] = [] 6742 while self._curr and not self._match(TokenType.R_PAREN): 6743 if self._match_text_seq("FORMAT_NAME", "="): 6744 # The FORMAT_NAME can be set to an identifier for Snowflake and T-SQL, 6745 # so we parse it separately to use _parse_field() 6746 prop = self.expression( 6747 exp.Property, this=exp.var("FORMAT_NAME"), value=self._parse_field() 6748 ) 6749 opts.append(prop) 6750 else: 6751 opts.append(self._parse_property()) 6752 6753 self._match(TokenType.COMMA) 6754 6755 return opts 6756 6757 def _parse_copy_parameters(self) -> t.List[exp.CopyParameter]: 6758 sep = TokenType.COMMA if self.dialect.COPY_PARAMS_ARE_CSV else None 6759 6760 options = [] 6761 while self._curr and not self._match(TokenType.R_PAREN, advance=False): 6762 option = self._parse_var(any_token=True) 6763 prev = self._prev.text.upper() 6764 6765 # Different dialects might separate options and values by white space, "=" and "AS" 6766 self._match(TokenType.EQ) 6767 self._match(TokenType.ALIAS) 6768 6769 param = self.expression(exp.CopyParameter, this=option) 6770 6771 if prev in self.COPY_INTO_VARLEN_OPTIONS and self._match( 6772 TokenType.L_PAREN, advance=False 6773 ): 6774 # Snowflake FILE_FORMAT case, Databricks COPY & FORMAT options 6775 param.set("expressions", self._parse_wrapped_options()) 6776 elif prev == "FILE_FORMAT": 6777 # T-SQL's external file format case 6778 param.set("expression", self._parse_field()) 6779 else: 6780 param.set("expression", self._parse_unquoted_field()) 6781 6782 options.append(param) 6783 self._match(sep) 6784 6785 return options 6786 6787 def _parse_credentials(self) -> t.Optional[exp.Credentials]: 6788 expr = self.expression(exp.Credentials) 6789 6790 if self._match_text_seq("STORAGE_INTEGRATION", "="): 6791 expr.set("storage", self._parse_field()) 6792 if self._match_text_seq("CREDENTIALS"): 6793 # Snowflake case: CREDENTIALS = (...), Redshift case: CREDENTIALS <string> 6794 creds = ( 6795 self._parse_wrapped_options() if self._match(TokenType.EQ) else self._parse_field() 6796 ) 6797 expr.set("credentials", creds) 6798 if self._match_text_seq("ENCRYPTION"): 6799 expr.set("encryption", self._parse_wrapped_options()) 6800 if self._match_text_seq("IAM_ROLE"): 6801 expr.set("iam_role", self._parse_field()) 6802 if self._match_text_seq("REGION"): 6803 expr.set("region", self._parse_field()) 6804 6805 return expr 6806 6807 def _parse_file_location(self) -> t.Optional[exp.Expression]: 6808 return self._parse_field() 6809 6810 def _parse_copy(self) -> exp.Copy | exp.Command: 6811 start = self._prev 6812 6813 self._match(TokenType.INTO) 6814 6815 this = ( 6816 self._parse_select(nested=True, parse_subquery_alias=False) 6817 if self._match(TokenType.L_PAREN, advance=False) 6818 else self._parse_table(schema=True) 6819 ) 6820 6821 kind = self._match(TokenType.FROM) or not self._match_text_seq("TO") 6822 6823 files = self._parse_csv(self._parse_file_location) 6824 credentials = self._parse_credentials() 6825 6826 self._match_text_seq("WITH") 6827 6828 params = self._parse_wrapped(self._parse_copy_parameters, optional=True) 6829 6830 # Fallback case 6831 if self._curr: 6832 return self._parse_as_command(start) 6833 6834 return self.expression( 6835 exp.Copy, 6836 this=this, 6837 kind=kind, 6838 credentials=credentials, 6839 files=files, 6840 params=params, 6841 )
26def build_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 27 if len(args) == 1 and args[0].is_star: 28 return exp.StarMap(this=args[0]) 29 30 keys = [] 31 values = [] 32 for i in range(0, len(args), 2): 33 keys.append(args[i]) 34 values.append(args[i + 1]) 35 36 return exp.VarMap(keys=exp.array(*keys, copy=False), values=exp.array(*values, copy=False))
44def binary_range_parser( 45 expr_type: t.Type[exp.Expression], reverse_args: bool = False 46) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 47 def _parse_binary_range( 48 self: Parser, this: t.Optional[exp.Expression] 49 ) -> t.Optional[exp.Expression]: 50 expression = self._parse_bitwise() 51 if reverse_args: 52 this, expression = expression, this 53 return self._parse_escape(self.expression(expr_type, this=this, expression=expression)) 54 55 return _parse_binary_range
58def build_logarithm(args: t.List, dialect: Dialect) -> exp.Func: 59 # Default argument order is base, expression 60 this = seq_get(args, 0) 61 expression = seq_get(args, 1) 62 63 if expression: 64 if not dialect.LOG_BASE_FIRST: 65 this, expression = expression, this 66 return exp.Log(this=this, expression=expression) 67 68 return (exp.Ln if dialect.parser_class.LOG_DEFAULTS_TO_LN else exp.Log)(this=this)
88def build_extract_json_with_path(expr_type: t.Type[E]) -> t.Callable[[t.List, Dialect], E]: 89 def _builder(args: t.List, dialect: Dialect) -> E: 90 expression = expr_type( 91 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 92 ) 93 if len(args) > 2 and expr_type is exp.JSONExtract: 94 expression.set("expressions", args[2:]) 95 96 return expression 97 98 return _builder
101def build_mod(args: t.List) -> exp.Mod: 102 this = seq_get(args, 0) 103 expression = seq_get(args, 1) 104 105 # Wrap the operands if they are binary nodes, e.g. MOD(a + 1, 7) -> (a + 1) % 7 106 this = exp.Paren(this=this) if isinstance(this, exp.Binary) else this 107 expression = exp.Paren(this=expression) if isinstance(expression, exp.Binary) else expression 108 109 return exp.Mod(this=this, expression=expression)
122class Parser(metaclass=_Parser): 123 """ 124 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 125 126 Args: 127 error_level: The desired error level. 128 Default: ErrorLevel.IMMEDIATE 129 error_message_context: The amount of context to capture from a query string when displaying 130 the error message (in number of characters). 131 Default: 100 132 max_errors: Maximum number of error messages to include in a raised ParseError. 133 This is only relevant if error_level is ErrorLevel.RAISE. 134 Default: 3 135 """ 136 137 FUNCTIONS: t.Dict[str, t.Callable] = { 138 **{name: func.from_arg_list for name, func in exp.FUNCTION_BY_NAME.items()}, 139 "CONCAT": lambda args, dialect: exp.Concat( 140 expressions=args, 141 safe=not dialect.STRICT_STRING_CONCAT, 142 coalesce=dialect.CONCAT_COALESCE, 143 ), 144 "CONCAT_WS": lambda args, dialect: exp.ConcatWs( 145 expressions=args, 146 safe=not dialect.STRICT_STRING_CONCAT, 147 coalesce=dialect.CONCAT_COALESCE, 148 ), 149 "DATE_TO_DATE_STR": lambda args: exp.Cast( 150 this=seq_get(args, 0), 151 to=exp.DataType(this=exp.DataType.Type.TEXT), 152 ), 153 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 154 "HEX": build_hex, 155 "JSON_EXTRACT": build_extract_json_with_path(exp.JSONExtract), 156 "JSON_EXTRACT_SCALAR": build_extract_json_with_path(exp.JSONExtractScalar), 157 "JSON_EXTRACT_PATH_TEXT": build_extract_json_with_path(exp.JSONExtractScalar), 158 "LIKE": build_like, 159 "LOG": build_logarithm, 160 "LOG2": lambda args: exp.Log(this=exp.Literal.number(2), expression=seq_get(args, 0)), 161 "LOG10": lambda args: exp.Log(this=exp.Literal.number(10), expression=seq_get(args, 0)), 162 "LOWER": build_lower, 163 "MOD": build_mod, 164 "TIME_TO_TIME_STR": lambda args: exp.Cast( 165 this=seq_get(args, 0), 166 to=exp.DataType(this=exp.DataType.Type.TEXT), 167 ), 168 "TO_HEX": build_hex, 169 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 170 this=exp.Cast( 171 this=seq_get(args, 0), 172 to=exp.DataType(this=exp.DataType.Type.TEXT), 173 ), 174 start=exp.Literal.number(1), 175 length=exp.Literal.number(10), 176 ), 177 "UNNEST": lambda args: exp.Unnest(expressions=ensure_list(seq_get(args, 0))), 178 "UPPER": build_upper, 179 "VAR_MAP": build_var_map, 180 } 181 182 NO_PAREN_FUNCTIONS = { 183 TokenType.CURRENT_DATE: exp.CurrentDate, 184 TokenType.CURRENT_DATETIME: exp.CurrentDate, 185 TokenType.CURRENT_TIME: exp.CurrentTime, 186 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 187 TokenType.CURRENT_USER: exp.CurrentUser, 188 } 189 190 STRUCT_TYPE_TOKENS = { 191 TokenType.NESTED, 192 TokenType.OBJECT, 193 TokenType.STRUCT, 194 } 195 196 NESTED_TYPE_TOKENS = { 197 TokenType.ARRAY, 198 TokenType.LIST, 199 TokenType.LOWCARDINALITY, 200 TokenType.MAP, 201 TokenType.NULLABLE, 202 *STRUCT_TYPE_TOKENS, 203 } 204 205 ENUM_TYPE_TOKENS = { 206 TokenType.ENUM, 207 TokenType.ENUM8, 208 TokenType.ENUM16, 209 } 210 211 AGGREGATE_TYPE_TOKENS = { 212 TokenType.AGGREGATEFUNCTION, 213 TokenType.SIMPLEAGGREGATEFUNCTION, 214 } 215 216 TYPE_TOKENS = { 217 TokenType.BIT, 218 TokenType.BOOLEAN, 219 TokenType.TINYINT, 220 TokenType.UTINYINT, 221 TokenType.SMALLINT, 222 TokenType.USMALLINT, 223 TokenType.INT, 224 TokenType.UINT, 225 TokenType.BIGINT, 226 TokenType.UBIGINT, 227 TokenType.INT128, 228 TokenType.UINT128, 229 TokenType.INT256, 230 TokenType.UINT256, 231 TokenType.MEDIUMINT, 232 TokenType.UMEDIUMINT, 233 TokenType.FIXEDSTRING, 234 TokenType.FLOAT, 235 TokenType.DOUBLE, 236 TokenType.CHAR, 237 TokenType.NCHAR, 238 TokenType.VARCHAR, 239 TokenType.NVARCHAR, 240 TokenType.BPCHAR, 241 TokenType.TEXT, 242 TokenType.MEDIUMTEXT, 243 TokenType.LONGTEXT, 244 TokenType.MEDIUMBLOB, 245 TokenType.LONGBLOB, 246 TokenType.BINARY, 247 TokenType.VARBINARY, 248 TokenType.JSON, 249 TokenType.JSONB, 250 TokenType.INTERVAL, 251 TokenType.TINYBLOB, 252 TokenType.TINYTEXT, 253 TokenType.TIME, 254 TokenType.TIMETZ, 255 TokenType.TIMESTAMP, 256 TokenType.TIMESTAMP_S, 257 TokenType.TIMESTAMP_MS, 258 TokenType.TIMESTAMP_NS, 259 TokenType.TIMESTAMPTZ, 260 TokenType.TIMESTAMPLTZ, 261 TokenType.TIMESTAMPNTZ, 262 TokenType.DATETIME, 263 TokenType.DATETIME64, 264 TokenType.DATE, 265 TokenType.DATE32, 266 TokenType.INT4RANGE, 267 TokenType.INT4MULTIRANGE, 268 TokenType.INT8RANGE, 269 TokenType.INT8MULTIRANGE, 270 TokenType.NUMRANGE, 271 TokenType.NUMMULTIRANGE, 272 TokenType.TSRANGE, 273 TokenType.TSMULTIRANGE, 274 TokenType.TSTZRANGE, 275 TokenType.TSTZMULTIRANGE, 276 TokenType.DATERANGE, 277 TokenType.DATEMULTIRANGE, 278 TokenType.DECIMAL, 279 TokenType.UDECIMAL, 280 TokenType.BIGDECIMAL, 281 TokenType.UUID, 282 TokenType.GEOGRAPHY, 283 TokenType.GEOMETRY, 284 TokenType.HLLSKETCH, 285 TokenType.HSTORE, 286 TokenType.PSEUDO_TYPE, 287 TokenType.SUPER, 288 TokenType.SERIAL, 289 TokenType.SMALLSERIAL, 290 TokenType.BIGSERIAL, 291 TokenType.XML, 292 TokenType.YEAR, 293 TokenType.UNIQUEIDENTIFIER, 294 TokenType.USERDEFINED, 295 TokenType.MONEY, 296 TokenType.SMALLMONEY, 297 TokenType.ROWVERSION, 298 TokenType.IMAGE, 299 TokenType.VARIANT, 300 TokenType.OBJECT, 301 TokenType.OBJECT_IDENTIFIER, 302 TokenType.INET, 303 TokenType.IPADDRESS, 304 TokenType.IPPREFIX, 305 TokenType.IPV4, 306 TokenType.IPV6, 307 TokenType.UNKNOWN, 308 TokenType.NULL, 309 TokenType.NAME, 310 TokenType.TDIGEST, 311 *ENUM_TYPE_TOKENS, 312 *NESTED_TYPE_TOKENS, 313 *AGGREGATE_TYPE_TOKENS, 314 } 315 316 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 317 TokenType.BIGINT: TokenType.UBIGINT, 318 TokenType.INT: TokenType.UINT, 319 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 320 TokenType.SMALLINT: TokenType.USMALLINT, 321 TokenType.TINYINT: TokenType.UTINYINT, 322 TokenType.DECIMAL: TokenType.UDECIMAL, 323 } 324 325 SUBQUERY_PREDICATES = { 326 TokenType.ANY: exp.Any, 327 TokenType.ALL: exp.All, 328 TokenType.EXISTS: exp.Exists, 329 TokenType.SOME: exp.Any, 330 } 331 332 RESERVED_TOKENS = { 333 *Tokenizer.SINGLE_TOKENS.values(), 334 TokenType.SELECT, 335 } - {TokenType.IDENTIFIER} 336 337 DB_CREATABLES = { 338 TokenType.DATABASE, 339 TokenType.DICTIONARY, 340 TokenType.MODEL, 341 TokenType.SCHEMA, 342 TokenType.SEQUENCE, 343 TokenType.STORAGE_INTEGRATION, 344 TokenType.TABLE, 345 TokenType.TAG, 346 TokenType.VIEW, 347 TokenType.WAREHOUSE, 348 TokenType.STREAMLIT, 349 } 350 351 CREATABLES = { 352 TokenType.COLUMN, 353 TokenType.CONSTRAINT, 354 TokenType.FOREIGN_KEY, 355 TokenType.FUNCTION, 356 TokenType.INDEX, 357 TokenType.PROCEDURE, 358 *DB_CREATABLES, 359 } 360 361 # Tokens that can represent identifiers 362 ID_VAR_TOKENS = { 363 TokenType.VAR, 364 TokenType.ANTI, 365 TokenType.APPLY, 366 TokenType.ASC, 367 TokenType.ASOF, 368 TokenType.AUTO_INCREMENT, 369 TokenType.BEGIN, 370 TokenType.BPCHAR, 371 TokenType.CACHE, 372 TokenType.CASE, 373 TokenType.COLLATE, 374 TokenType.COMMAND, 375 TokenType.COMMENT, 376 TokenType.COMMIT, 377 TokenType.CONSTRAINT, 378 TokenType.COPY, 379 TokenType.DEFAULT, 380 TokenType.DELETE, 381 TokenType.DESC, 382 TokenType.DESCRIBE, 383 TokenType.DICTIONARY, 384 TokenType.DIV, 385 TokenType.END, 386 TokenType.EXECUTE, 387 TokenType.ESCAPE, 388 TokenType.FALSE, 389 TokenType.FIRST, 390 TokenType.FILTER, 391 TokenType.FINAL, 392 TokenType.FORMAT, 393 TokenType.FULL, 394 TokenType.IDENTIFIER, 395 TokenType.IS, 396 TokenType.ISNULL, 397 TokenType.INTERVAL, 398 TokenType.KEEP, 399 TokenType.KILL, 400 TokenType.LEFT, 401 TokenType.LOAD, 402 TokenType.MERGE, 403 TokenType.NATURAL, 404 TokenType.NEXT, 405 TokenType.OFFSET, 406 TokenType.OPERATOR, 407 TokenType.ORDINALITY, 408 TokenType.OVERLAPS, 409 TokenType.OVERWRITE, 410 TokenType.PARTITION, 411 TokenType.PERCENT, 412 TokenType.PIVOT, 413 TokenType.PRAGMA, 414 TokenType.RANGE, 415 TokenType.RECURSIVE, 416 TokenType.REFERENCES, 417 TokenType.REFRESH, 418 TokenType.REPLACE, 419 TokenType.RIGHT, 420 TokenType.ROLLUP, 421 TokenType.ROW, 422 TokenType.ROWS, 423 TokenType.SEMI, 424 TokenType.SET, 425 TokenType.SETTINGS, 426 TokenType.SHOW, 427 TokenType.TEMPORARY, 428 TokenType.TOP, 429 TokenType.TRUE, 430 TokenType.TRUNCATE, 431 TokenType.UNIQUE, 432 TokenType.UNNEST, 433 TokenType.UNPIVOT, 434 TokenType.UPDATE, 435 TokenType.USE, 436 TokenType.VOLATILE, 437 TokenType.WINDOW, 438 *CREATABLES, 439 *SUBQUERY_PREDICATES, 440 *TYPE_TOKENS, 441 *NO_PAREN_FUNCTIONS, 442 } 443 444 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 445 446 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 447 TokenType.ANTI, 448 TokenType.APPLY, 449 TokenType.ASOF, 450 TokenType.FULL, 451 TokenType.LEFT, 452 TokenType.LOCK, 453 TokenType.NATURAL, 454 TokenType.OFFSET, 455 TokenType.RIGHT, 456 TokenType.SEMI, 457 TokenType.WINDOW, 458 } 459 460 ALIAS_TOKENS = ID_VAR_TOKENS 461 462 ARRAY_CONSTRUCTORS = { 463 "ARRAY": exp.Array, 464 "LIST": exp.List, 465 } 466 467 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 468 469 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 470 471 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 472 473 FUNC_TOKENS = { 474 TokenType.COLLATE, 475 TokenType.COMMAND, 476 TokenType.CURRENT_DATE, 477 TokenType.CURRENT_DATETIME, 478 TokenType.CURRENT_TIMESTAMP, 479 TokenType.CURRENT_TIME, 480 TokenType.CURRENT_USER, 481 TokenType.FILTER, 482 TokenType.FIRST, 483 TokenType.FORMAT, 484 TokenType.GLOB, 485 TokenType.IDENTIFIER, 486 TokenType.INDEX, 487 TokenType.ISNULL, 488 TokenType.ILIKE, 489 TokenType.INSERT, 490 TokenType.LIKE, 491 TokenType.MERGE, 492 TokenType.OFFSET, 493 TokenType.PRIMARY_KEY, 494 TokenType.RANGE, 495 TokenType.REPLACE, 496 TokenType.RLIKE, 497 TokenType.ROW, 498 TokenType.UNNEST, 499 TokenType.VAR, 500 TokenType.LEFT, 501 TokenType.RIGHT, 502 TokenType.SEQUENCE, 503 TokenType.DATE, 504 TokenType.DATETIME, 505 TokenType.TABLE, 506 TokenType.TIMESTAMP, 507 TokenType.TIMESTAMPTZ, 508 TokenType.TRUNCATE, 509 TokenType.WINDOW, 510 TokenType.XOR, 511 *TYPE_TOKENS, 512 *SUBQUERY_PREDICATES, 513 } 514 515 CONJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 516 TokenType.AND: exp.And, 517 } 518 519 ASSIGNMENT: t.Dict[TokenType, t.Type[exp.Expression]] = { 520 TokenType.COLON_EQ: exp.PropertyEQ, 521 } 522 523 DISJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 524 TokenType.OR: exp.Or, 525 } 526 527 EQUALITY = { 528 TokenType.EQ: exp.EQ, 529 TokenType.NEQ: exp.NEQ, 530 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 531 } 532 533 COMPARISON = { 534 TokenType.GT: exp.GT, 535 TokenType.GTE: exp.GTE, 536 TokenType.LT: exp.LT, 537 TokenType.LTE: exp.LTE, 538 } 539 540 BITWISE = { 541 TokenType.AMP: exp.BitwiseAnd, 542 TokenType.CARET: exp.BitwiseXor, 543 TokenType.PIPE: exp.BitwiseOr, 544 } 545 546 TERM = { 547 TokenType.DASH: exp.Sub, 548 TokenType.PLUS: exp.Add, 549 TokenType.MOD: exp.Mod, 550 TokenType.COLLATE: exp.Collate, 551 } 552 553 FACTOR = { 554 TokenType.DIV: exp.IntDiv, 555 TokenType.LR_ARROW: exp.Distance, 556 TokenType.SLASH: exp.Div, 557 TokenType.STAR: exp.Mul, 558 } 559 560 EXPONENT: t.Dict[TokenType, t.Type[exp.Expression]] = {} 561 562 TIMES = { 563 TokenType.TIME, 564 TokenType.TIMETZ, 565 } 566 567 TIMESTAMPS = { 568 TokenType.TIMESTAMP, 569 TokenType.TIMESTAMPTZ, 570 TokenType.TIMESTAMPLTZ, 571 *TIMES, 572 } 573 574 SET_OPERATIONS = { 575 TokenType.UNION, 576 TokenType.INTERSECT, 577 TokenType.EXCEPT, 578 } 579 580 JOIN_METHODS = { 581 TokenType.ASOF, 582 TokenType.NATURAL, 583 TokenType.POSITIONAL, 584 } 585 586 JOIN_SIDES = { 587 TokenType.LEFT, 588 TokenType.RIGHT, 589 TokenType.FULL, 590 } 591 592 JOIN_KINDS = { 593 TokenType.ANTI, 594 TokenType.CROSS, 595 TokenType.INNER, 596 TokenType.OUTER, 597 TokenType.SEMI, 598 TokenType.STRAIGHT_JOIN, 599 } 600 601 JOIN_HINTS: t.Set[str] = set() 602 603 LAMBDAS = { 604 TokenType.ARROW: lambda self, expressions: self.expression( 605 exp.Lambda, 606 this=self._replace_lambda( 607 self._parse_assignment(), 608 expressions, 609 ), 610 expressions=expressions, 611 ), 612 TokenType.FARROW: lambda self, expressions: self.expression( 613 exp.Kwarg, 614 this=exp.var(expressions[0].name), 615 expression=self._parse_assignment(), 616 ), 617 } 618 619 COLUMN_OPERATORS = { 620 TokenType.DOT: None, 621 TokenType.DCOLON: lambda self, this, to: self.expression( 622 exp.Cast if self.STRICT_CAST else exp.TryCast, 623 this=this, 624 to=to, 625 ), 626 TokenType.ARROW: lambda self, this, path: self.expression( 627 exp.JSONExtract, 628 this=this, 629 expression=self.dialect.to_json_path(path), 630 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 631 ), 632 TokenType.DARROW: lambda self, this, path: self.expression( 633 exp.JSONExtractScalar, 634 this=this, 635 expression=self.dialect.to_json_path(path), 636 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 637 ), 638 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 639 exp.JSONBExtract, 640 this=this, 641 expression=path, 642 ), 643 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 644 exp.JSONBExtractScalar, 645 this=this, 646 expression=path, 647 ), 648 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 649 exp.JSONBContains, 650 this=this, 651 expression=key, 652 ), 653 } 654 655 EXPRESSION_PARSERS = { 656 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 657 exp.Column: lambda self: self._parse_column(), 658 exp.Condition: lambda self: self._parse_assignment(), 659 exp.DataType: lambda self: self._parse_types(allow_identifiers=False), 660 exp.Expression: lambda self: self._parse_expression(), 661 exp.From: lambda self: self._parse_from(joins=True), 662 exp.Group: lambda self: self._parse_group(), 663 exp.Having: lambda self: self._parse_having(), 664 exp.Identifier: lambda self: self._parse_id_var(), 665 exp.Join: lambda self: self._parse_join(), 666 exp.Lambda: lambda self: self._parse_lambda(), 667 exp.Lateral: lambda self: self._parse_lateral(), 668 exp.Limit: lambda self: self._parse_limit(), 669 exp.Offset: lambda self: self._parse_offset(), 670 exp.Order: lambda self: self._parse_order(), 671 exp.Ordered: lambda self: self._parse_ordered(), 672 exp.Properties: lambda self: self._parse_properties(), 673 exp.Qualify: lambda self: self._parse_qualify(), 674 exp.Returning: lambda self: self._parse_returning(), 675 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 676 exp.Table: lambda self: self._parse_table_parts(), 677 exp.TableAlias: lambda self: self._parse_table_alias(), 678 exp.When: lambda self: seq_get(self._parse_when_matched(), 0), 679 exp.Where: lambda self: self._parse_where(), 680 exp.Window: lambda self: self._parse_named_window(), 681 exp.With: lambda self: self._parse_with(), 682 "JOIN_TYPE": lambda self: self._parse_join_parts(), 683 } 684 685 STATEMENT_PARSERS = { 686 TokenType.ALTER: lambda self: self._parse_alter(), 687 TokenType.BEGIN: lambda self: self._parse_transaction(), 688 TokenType.CACHE: lambda self: self._parse_cache(), 689 TokenType.COMMENT: lambda self: self._parse_comment(), 690 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 691 TokenType.COPY: lambda self: self._parse_copy(), 692 TokenType.CREATE: lambda self: self._parse_create(), 693 TokenType.DELETE: lambda self: self._parse_delete(), 694 TokenType.DESC: lambda self: self._parse_describe(), 695 TokenType.DESCRIBE: lambda self: self._parse_describe(), 696 TokenType.DROP: lambda self: self._parse_drop(), 697 TokenType.INSERT: lambda self: self._parse_insert(), 698 TokenType.KILL: lambda self: self._parse_kill(), 699 TokenType.LOAD: lambda self: self._parse_load(), 700 TokenType.MERGE: lambda self: self._parse_merge(), 701 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 702 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 703 TokenType.REFRESH: lambda self: self._parse_refresh(), 704 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 705 TokenType.SET: lambda self: self._parse_set(), 706 TokenType.TRUNCATE: lambda self: self._parse_truncate_table(), 707 TokenType.UNCACHE: lambda self: self._parse_uncache(), 708 TokenType.UPDATE: lambda self: self._parse_update(), 709 TokenType.USE: lambda self: self.expression( 710 exp.Use, 711 kind=self._parse_var_from_options(self.USABLES, raise_unmatched=False), 712 this=self._parse_table(schema=False), 713 ), 714 TokenType.SEMICOLON: lambda self: self.expression(exp.Semicolon), 715 } 716 717 UNARY_PARSERS = { 718 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 719 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 720 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 721 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 722 TokenType.PIPE_SLASH: lambda self: self.expression(exp.Sqrt, this=self._parse_unary()), 723 TokenType.DPIPE_SLASH: lambda self: self.expression(exp.Cbrt, this=self._parse_unary()), 724 } 725 726 STRING_PARSERS = { 727 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 728 exp.RawString, this=token.text 729 ), 730 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 731 exp.National, this=token.text 732 ), 733 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 734 TokenType.STRING: lambda self, token: self.expression( 735 exp.Literal, this=token.text, is_string=True 736 ), 737 TokenType.UNICODE_STRING: lambda self, token: self.expression( 738 exp.UnicodeString, 739 this=token.text, 740 escape=self._match_text_seq("UESCAPE") and self._parse_string(), 741 ), 742 } 743 744 NUMERIC_PARSERS = { 745 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 746 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 747 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 748 TokenType.NUMBER: lambda self, token: self.expression( 749 exp.Literal, this=token.text, is_string=False 750 ), 751 } 752 753 PRIMARY_PARSERS = { 754 **STRING_PARSERS, 755 **NUMERIC_PARSERS, 756 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 757 TokenType.NULL: lambda self, _: self.expression(exp.Null), 758 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 759 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 760 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 761 TokenType.STAR: lambda self, _: self.expression( 762 exp.Star, 763 **{ 764 "except": self._parse_star_op("EXCEPT", "EXCLUDE"), 765 "replace": self._parse_star_op("REPLACE"), 766 "rename": self._parse_star_op("RENAME"), 767 }, 768 ), 769 } 770 771 PLACEHOLDER_PARSERS = { 772 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 773 TokenType.PARAMETER: lambda self: self._parse_parameter(), 774 TokenType.COLON: lambda self: ( 775 self.expression(exp.Placeholder, this=self._prev.text) 776 if self._match(TokenType.NUMBER) or self._match_set(self.ID_VAR_TOKENS) 777 else None 778 ), 779 } 780 781 RANGE_PARSERS = { 782 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 783 TokenType.GLOB: binary_range_parser(exp.Glob), 784 TokenType.ILIKE: binary_range_parser(exp.ILike), 785 TokenType.IN: lambda self, this: self._parse_in(this), 786 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 787 TokenType.IS: lambda self, this: self._parse_is(this), 788 TokenType.LIKE: binary_range_parser(exp.Like), 789 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 790 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 791 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 792 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 793 } 794 795 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 796 "ALLOWED_VALUES": lambda self: self.expression( 797 exp.AllowedValuesProperty, expressions=self._parse_csv(self._parse_primary) 798 ), 799 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 800 "AUTO": lambda self: self._parse_auto_property(), 801 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 802 "BACKUP": lambda self: self.expression( 803 exp.BackupProperty, this=self._parse_var(any_token=True) 804 ), 805 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 806 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 807 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 808 "CHECKSUM": lambda self: self._parse_checksum(), 809 "CLUSTER BY": lambda self: self._parse_cluster(), 810 "CLUSTERED": lambda self: self._parse_clustered_by(), 811 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 812 exp.CollateProperty, **kwargs 813 ), 814 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 815 "CONTAINS": lambda self: self._parse_contains_property(), 816 "COPY": lambda self: self._parse_copy_property(), 817 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 818 "DATA_DELETION": lambda self: self._parse_data_deletion_property(), 819 "DEFINER": lambda self: self._parse_definer(), 820 "DETERMINISTIC": lambda self: self.expression( 821 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 822 ), 823 "DISTKEY": lambda self: self._parse_distkey(), 824 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 825 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 826 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 827 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 828 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 829 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 830 "FREESPACE": lambda self: self._parse_freespace(), 831 "GLOBAL": lambda self: self.expression(exp.GlobalProperty), 832 "HEAP": lambda self: self.expression(exp.HeapProperty), 833 "ICEBERG": lambda self: self.expression(exp.IcebergProperty), 834 "IMMUTABLE": lambda self: self.expression( 835 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 836 ), 837 "INHERITS": lambda self: self.expression( 838 exp.InheritsProperty, expressions=self._parse_wrapped_csv(self._parse_table) 839 ), 840 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 841 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 842 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 843 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 844 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 845 "LIKE": lambda self: self._parse_create_like(), 846 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 847 "LOCK": lambda self: self._parse_locking(), 848 "LOCKING": lambda self: self._parse_locking(), 849 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 850 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 851 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 852 "MODIFIES": lambda self: self._parse_modifies_property(), 853 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 854 "NO": lambda self: self._parse_no_property(), 855 "ON": lambda self: self._parse_on_property(), 856 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 857 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 858 "PARTITION": lambda self: self._parse_partitioned_of(), 859 "PARTITION BY": lambda self: self._parse_partitioned_by(), 860 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 861 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 862 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 863 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 864 "READS": lambda self: self._parse_reads_property(), 865 "REMOTE": lambda self: self._parse_remote_with_connection(), 866 "RETURNS": lambda self: self._parse_returns(), 867 "STRICT": lambda self: self.expression(exp.StrictProperty), 868 "ROW": lambda self: self._parse_row(), 869 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 870 "SAMPLE": lambda self: self.expression( 871 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 872 ), 873 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 874 "SETTINGS": lambda self: self.expression( 875 exp.SettingsProperty, expressions=self._parse_csv(self._parse_set_item) 876 ), 877 "SHARING": lambda self: self._parse_property_assignment(exp.SharingProperty), 878 "SORTKEY": lambda self: self._parse_sortkey(), 879 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 880 "STABLE": lambda self: self.expression( 881 exp.StabilityProperty, this=exp.Literal.string("STABLE") 882 ), 883 "STORED": lambda self: self._parse_stored(), 884 "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(), 885 "TBLPROPERTIES": lambda self: self._parse_wrapped_properties(), 886 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 887 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 888 "TO": lambda self: self._parse_to_table(), 889 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 890 "TRANSFORM": lambda self: self.expression( 891 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 892 ), 893 "TTL": lambda self: self._parse_ttl(), 894 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 895 "UNLOGGED": lambda self: self.expression(exp.UnloggedProperty), 896 "VOLATILE": lambda self: self._parse_volatile_property(), 897 "WITH": lambda self: self._parse_with_property(), 898 } 899 900 CONSTRAINT_PARSERS = { 901 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 902 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 903 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 904 "CHARACTER SET": lambda self: self.expression( 905 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 906 ), 907 "CHECK": lambda self: self.expression( 908 exp.CheckColumnConstraint, 909 this=self._parse_wrapped(self._parse_assignment), 910 enforced=self._match_text_seq("ENFORCED"), 911 ), 912 "COLLATE": lambda self: self.expression( 913 exp.CollateColumnConstraint, this=self._parse_var(any_token=True) 914 ), 915 "COMMENT": lambda self: self.expression( 916 exp.CommentColumnConstraint, this=self._parse_string() 917 ), 918 "COMPRESS": lambda self: self._parse_compress(), 919 "CLUSTERED": lambda self: self.expression( 920 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 921 ), 922 "NONCLUSTERED": lambda self: self.expression( 923 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 924 ), 925 "DEFAULT": lambda self: self.expression( 926 exp.DefaultColumnConstraint, this=self._parse_bitwise() 927 ), 928 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 929 "EPHEMERAL": lambda self: self.expression( 930 exp.EphemeralColumnConstraint, this=self._parse_bitwise() 931 ), 932 "EXCLUDE": lambda self: self.expression( 933 exp.ExcludeColumnConstraint, this=self._parse_index_params() 934 ), 935 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 936 "FORMAT": lambda self: self.expression( 937 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 938 ), 939 "GENERATED": lambda self: self._parse_generated_as_identity(), 940 "IDENTITY": lambda self: self._parse_auto_increment(), 941 "INLINE": lambda self: self._parse_inline(), 942 "LIKE": lambda self: self._parse_create_like(), 943 "NOT": lambda self: self._parse_not_constraint(), 944 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 945 "ON": lambda self: ( 946 self._match(TokenType.UPDATE) 947 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 948 ) 949 or self.expression(exp.OnProperty, this=self._parse_id_var()), 950 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 951 "PERIOD": lambda self: self._parse_period_for_system_time(), 952 "PRIMARY KEY": lambda self: self._parse_primary_key(), 953 "REFERENCES": lambda self: self._parse_references(match=False), 954 "TITLE": lambda self: self.expression( 955 exp.TitleColumnConstraint, this=self._parse_var_or_string() 956 ), 957 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 958 "UNIQUE": lambda self: self._parse_unique(), 959 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 960 "WITH": lambda self: self.expression( 961 exp.Properties, expressions=self._parse_wrapped_properties() 962 ), 963 } 964 965 ALTER_PARSERS = { 966 "ADD": lambda self: self._parse_alter_table_add(), 967 "ALTER": lambda self: self._parse_alter_table_alter(), 968 "CLUSTER BY": lambda self: self._parse_cluster(wrapped=True), 969 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 970 "DROP": lambda self: self._parse_alter_table_drop(), 971 "RENAME": lambda self: self._parse_alter_table_rename(), 972 "SET": lambda self: self._parse_alter_table_set(), 973 } 974 975 ALTER_ALTER_PARSERS = { 976 "DISTKEY": lambda self: self._parse_alter_diststyle(), 977 "DISTSTYLE": lambda self: self._parse_alter_diststyle(), 978 "SORTKEY": lambda self: self._parse_alter_sortkey(), 979 "COMPOUND": lambda self: self._parse_alter_sortkey(compound=True), 980 } 981 982 SCHEMA_UNNAMED_CONSTRAINTS = { 983 "CHECK", 984 "EXCLUDE", 985 "FOREIGN KEY", 986 "LIKE", 987 "PERIOD", 988 "PRIMARY KEY", 989 "UNIQUE", 990 } 991 992 NO_PAREN_FUNCTION_PARSERS = { 993 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 994 "CASE": lambda self: self._parse_case(), 995 "IF": lambda self: self._parse_if(), 996 "NEXT": lambda self: self._parse_next_value_for(), 997 } 998 999 INVALID_FUNC_NAME_TOKENS = { 1000 TokenType.IDENTIFIER, 1001 TokenType.STRING, 1002 } 1003 1004 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 1005 1006 KEY_VALUE_DEFINITIONS = (exp.Alias, exp.EQ, exp.PropertyEQ, exp.Slice) 1007 1008 FUNCTION_PARSERS = { 1009 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 1010 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 1011 "DECODE": lambda self: self._parse_decode(), 1012 "EXTRACT": lambda self: self._parse_extract(), 1013 "GAP_FILL": lambda self: self._parse_gap_fill(), 1014 "JSON_OBJECT": lambda self: self._parse_json_object(), 1015 "JSON_OBJECTAGG": lambda self: self._parse_json_object(agg=True), 1016 "JSON_TABLE": lambda self: self._parse_json_table(), 1017 "MATCH": lambda self: self._parse_match_against(), 1018 "OPENJSON": lambda self: self._parse_open_json(), 1019 "POSITION": lambda self: self._parse_position(), 1020 "PREDICT": lambda self: self._parse_predict(), 1021 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 1022 "STRING_AGG": lambda self: self._parse_string_agg(), 1023 "SUBSTRING": lambda self: self._parse_substring(), 1024 "TRIM": lambda self: self._parse_trim(), 1025 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 1026 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 1027 } 1028 1029 QUERY_MODIFIER_PARSERS = { 1030 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 1031 TokenType.PREWHERE: lambda self: ("prewhere", self._parse_prewhere()), 1032 TokenType.WHERE: lambda self: ("where", self._parse_where()), 1033 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 1034 TokenType.HAVING: lambda self: ("having", self._parse_having()), 1035 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 1036 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 1037 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 1038 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 1039 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 1040 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 1041 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 1042 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 1043 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1044 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1045 TokenType.CLUSTER_BY: lambda self: ( 1046 "cluster", 1047 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 1048 ), 1049 TokenType.DISTRIBUTE_BY: lambda self: ( 1050 "distribute", 1051 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 1052 ), 1053 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 1054 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 1055 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 1056 } 1057 1058 SET_PARSERS = { 1059 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 1060 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 1061 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 1062 "TRANSACTION": lambda self: self._parse_set_transaction(), 1063 } 1064 1065 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 1066 1067 TYPE_LITERAL_PARSERS = { 1068 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 1069 } 1070 1071 TYPE_CONVERTERS: t.Dict[exp.DataType.Type, t.Callable[[exp.DataType], exp.DataType]] = {} 1072 1073 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 1074 1075 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 1076 1077 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 1078 TRANSACTION_CHARACTERISTICS: OPTIONS_TYPE = { 1079 "ISOLATION": ( 1080 ("LEVEL", "REPEATABLE", "READ"), 1081 ("LEVEL", "READ", "COMMITTED"), 1082 ("LEVEL", "READ", "UNCOMITTED"), 1083 ("LEVEL", "SERIALIZABLE"), 1084 ), 1085 "READ": ("WRITE", "ONLY"), 1086 } 1087 1088 CONFLICT_ACTIONS: OPTIONS_TYPE = dict.fromkeys( 1089 ("ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK", "UPDATE"), tuple() 1090 ) 1091 CONFLICT_ACTIONS["DO"] = ("NOTHING", "UPDATE") 1092 1093 CREATE_SEQUENCE: OPTIONS_TYPE = { 1094 "SCALE": ("EXTEND", "NOEXTEND"), 1095 "SHARD": ("EXTEND", "NOEXTEND"), 1096 "NO": ("CYCLE", "CACHE", "MAXVALUE", "MINVALUE"), 1097 **dict.fromkeys( 1098 ( 1099 "SESSION", 1100 "GLOBAL", 1101 "KEEP", 1102 "NOKEEP", 1103 "ORDER", 1104 "NOORDER", 1105 "NOCACHE", 1106 "CYCLE", 1107 "NOCYCLE", 1108 "NOMINVALUE", 1109 "NOMAXVALUE", 1110 "NOSCALE", 1111 "NOSHARD", 1112 ), 1113 tuple(), 1114 ), 1115 } 1116 1117 ISOLATED_LOADING_OPTIONS: OPTIONS_TYPE = {"FOR": ("ALL", "INSERT", "NONE")} 1118 1119 USABLES: OPTIONS_TYPE = dict.fromkeys(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA"), tuple()) 1120 1121 CAST_ACTIONS: OPTIONS_TYPE = dict.fromkeys(("RENAME", "ADD"), ("FIELDS",)) 1122 1123 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 1124 1125 CLONE_KEYWORDS = {"CLONE", "COPY"} 1126 HISTORICAL_DATA_KIND = {"TIMESTAMP", "OFFSET", "STATEMENT", "STREAM"} 1127 1128 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS", "WITH"} 1129 1130 OPTYPE_FOLLOW_TOKENS = {TokenType.COMMA, TokenType.R_PAREN} 1131 1132 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 1133 1134 VIEW_ATTRIBUTES = {"ENCRYPTION", "SCHEMABINDING", "VIEW_METADATA"} 1135 1136 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 1137 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 1138 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 1139 1140 JSON_KEY_VALUE_SEPARATOR_TOKENS = {TokenType.COLON, TokenType.COMMA, TokenType.IS} 1141 1142 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 1143 1144 ADD_CONSTRAINT_TOKENS = { 1145 TokenType.CONSTRAINT, 1146 TokenType.FOREIGN_KEY, 1147 TokenType.INDEX, 1148 TokenType.KEY, 1149 TokenType.PRIMARY_KEY, 1150 TokenType.UNIQUE, 1151 } 1152 1153 DISTINCT_TOKENS = {TokenType.DISTINCT} 1154 1155 NULL_TOKENS = {TokenType.NULL} 1156 1157 UNNEST_OFFSET_ALIAS_TOKENS = ID_VAR_TOKENS - SET_OPERATIONS 1158 1159 SELECT_START_TOKENS = {TokenType.L_PAREN, TokenType.WITH, TokenType.SELECT} 1160 1161 COPY_INTO_VARLEN_OPTIONS = {"FILE_FORMAT", "COPY_OPTIONS", "FORMAT_OPTIONS", "CREDENTIAL"} 1162 1163 STRICT_CAST = True 1164 1165 PREFIXED_PIVOT_COLUMNS = False 1166 IDENTIFY_PIVOT_STRINGS = False 1167 1168 LOG_DEFAULTS_TO_LN = False 1169 1170 # Whether ADD is present for each column added by ALTER TABLE 1171 ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN = True 1172 1173 # Whether the table sample clause expects CSV syntax 1174 TABLESAMPLE_CSV = False 1175 1176 # The default method used for table sampling 1177 DEFAULT_SAMPLING_METHOD: t.Optional[str] = None 1178 1179 # Whether the SET command needs a delimiter (e.g. "=") for assignments 1180 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 1181 1182 # Whether the TRIM function expects the characters to trim as its first argument 1183 TRIM_PATTERN_FIRST = False 1184 1185 # Whether string aliases are supported `SELECT COUNT(*) 'count'` 1186 STRING_ALIASES = False 1187 1188 # Whether query modifiers such as LIMIT are attached to the UNION node (vs its right operand) 1189 MODIFIERS_ATTACHED_TO_SET_OP = True 1190 SET_OP_MODIFIERS = {"order", "limit", "offset"} 1191 1192 # Whether to parse IF statements that aren't followed by a left parenthesis as commands 1193 NO_PAREN_IF_COMMANDS = True 1194 1195 # Whether the -> and ->> operators expect documents of type JSON (e.g. Postgres) 1196 JSON_ARROWS_REQUIRE_JSON_TYPE = False 1197 1198 # Whether the `:` operator is used to extract a value from a JSON document 1199 COLON_IS_JSON_EXTRACT = False 1200 1201 # Whether or not a VALUES keyword needs to be followed by '(' to form a VALUES clause. 1202 # If this is True and '(' is not found, the keyword will be treated as an identifier 1203 VALUES_FOLLOWED_BY_PAREN = True 1204 1205 # Whether implicit unnesting is supported, e.g. SELECT 1 FROM y.z AS z, z.a (Redshift) 1206 SUPPORTS_IMPLICIT_UNNEST = False 1207 1208 # Whether or not interval spans are supported, INTERVAL 1 YEAR TO MONTHS 1209 INTERVAL_SPANS = True 1210 1211 # Whether a PARTITION clause can follow a table reference 1212 SUPPORTS_PARTITION_SELECTION = False 1213 1214 __slots__ = ( 1215 "error_level", 1216 "error_message_context", 1217 "max_errors", 1218 "dialect", 1219 "sql", 1220 "errors", 1221 "_tokens", 1222 "_index", 1223 "_curr", 1224 "_next", 1225 "_prev", 1226 "_prev_comments", 1227 ) 1228 1229 # Autofilled 1230 SHOW_TRIE: t.Dict = {} 1231 SET_TRIE: t.Dict = {} 1232 1233 def __init__( 1234 self, 1235 error_level: t.Optional[ErrorLevel] = None, 1236 error_message_context: int = 100, 1237 max_errors: int = 3, 1238 dialect: DialectType = None, 1239 ): 1240 from sqlglot.dialects import Dialect 1241 1242 self.error_level = error_level or ErrorLevel.IMMEDIATE 1243 self.error_message_context = error_message_context 1244 self.max_errors = max_errors 1245 self.dialect = Dialect.get_or_raise(dialect) 1246 self.reset() 1247 1248 def reset(self): 1249 self.sql = "" 1250 self.errors = [] 1251 self._tokens = [] 1252 self._index = 0 1253 self._curr = None 1254 self._next = None 1255 self._prev = None 1256 self._prev_comments = None 1257 1258 def parse( 1259 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1260 ) -> t.List[t.Optional[exp.Expression]]: 1261 """ 1262 Parses a list of tokens and returns a list of syntax trees, one tree 1263 per parsed SQL statement. 1264 1265 Args: 1266 raw_tokens: The list of tokens. 1267 sql: The original SQL string, used to produce helpful debug messages. 1268 1269 Returns: 1270 The list of the produced syntax trees. 1271 """ 1272 return self._parse( 1273 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1274 ) 1275 1276 def parse_into( 1277 self, 1278 expression_types: exp.IntoType, 1279 raw_tokens: t.List[Token], 1280 sql: t.Optional[str] = None, 1281 ) -> t.List[t.Optional[exp.Expression]]: 1282 """ 1283 Parses a list of tokens into a given Expression type. If a collection of Expression 1284 types is given instead, this method will try to parse the token list into each one 1285 of them, stopping at the first for which the parsing succeeds. 1286 1287 Args: 1288 expression_types: The expression type(s) to try and parse the token list into. 1289 raw_tokens: The list of tokens. 1290 sql: The original SQL string, used to produce helpful debug messages. 1291 1292 Returns: 1293 The target Expression. 1294 """ 1295 errors = [] 1296 for expression_type in ensure_list(expression_types): 1297 parser = self.EXPRESSION_PARSERS.get(expression_type) 1298 if not parser: 1299 raise TypeError(f"No parser registered for {expression_type}") 1300 1301 try: 1302 return self._parse(parser, raw_tokens, sql) 1303 except ParseError as e: 1304 e.errors[0]["into_expression"] = expression_type 1305 errors.append(e) 1306 1307 raise ParseError( 1308 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1309 errors=merge_errors(errors), 1310 ) from errors[-1] 1311 1312 def _parse( 1313 self, 1314 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1315 raw_tokens: t.List[Token], 1316 sql: t.Optional[str] = None, 1317 ) -> t.List[t.Optional[exp.Expression]]: 1318 self.reset() 1319 self.sql = sql or "" 1320 1321 total = len(raw_tokens) 1322 chunks: t.List[t.List[Token]] = [[]] 1323 1324 for i, token in enumerate(raw_tokens): 1325 if token.token_type == TokenType.SEMICOLON: 1326 if token.comments: 1327 chunks.append([token]) 1328 1329 if i < total - 1: 1330 chunks.append([]) 1331 else: 1332 chunks[-1].append(token) 1333 1334 expressions = [] 1335 1336 for tokens in chunks: 1337 self._index = -1 1338 self._tokens = tokens 1339 self._advance() 1340 1341 expressions.append(parse_method(self)) 1342 1343 if self._index < len(self._tokens): 1344 self.raise_error("Invalid expression / Unexpected token") 1345 1346 self.check_errors() 1347 1348 return expressions 1349 1350 def check_errors(self) -> None: 1351 """Logs or raises any found errors, depending on the chosen error level setting.""" 1352 if self.error_level == ErrorLevel.WARN: 1353 for error in self.errors: 1354 logger.error(str(error)) 1355 elif self.error_level == ErrorLevel.RAISE and self.errors: 1356 raise ParseError( 1357 concat_messages(self.errors, self.max_errors), 1358 errors=merge_errors(self.errors), 1359 ) 1360 1361 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1362 """ 1363 Appends an error in the list of recorded errors or raises it, depending on the chosen 1364 error level setting. 1365 """ 1366 token = token or self._curr or self._prev or Token.string("") 1367 start = token.start 1368 end = token.end + 1 1369 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1370 highlight = self.sql[start:end] 1371 end_context = self.sql[end : end + self.error_message_context] 1372 1373 error = ParseError.new( 1374 f"{message}. Line {token.line}, Col: {token.col}.\n" 1375 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1376 description=message, 1377 line=token.line, 1378 col=token.col, 1379 start_context=start_context, 1380 highlight=highlight, 1381 end_context=end_context, 1382 ) 1383 1384 if self.error_level == ErrorLevel.IMMEDIATE: 1385 raise error 1386 1387 self.errors.append(error) 1388 1389 def expression( 1390 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1391 ) -> E: 1392 """ 1393 Creates a new, validated Expression. 1394 1395 Args: 1396 exp_class: The expression class to instantiate. 1397 comments: An optional list of comments to attach to the expression. 1398 kwargs: The arguments to set for the expression along with their respective values. 1399 1400 Returns: 1401 The target expression. 1402 """ 1403 instance = exp_class(**kwargs) 1404 instance.add_comments(comments) if comments else self._add_comments(instance) 1405 return self.validate_expression(instance) 1406 1407 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1408 if expression and self._prev_comments: 1409 expression.add_comments(self._prev_comments) 1410 self._prev_comments = None 1411 1412 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1413 """ 1414 Validates an Expression, making sure that all its mandatory arguments are set. 1415 1416 Args: 1417 expression: The expression to validate. 1418 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1419 1420 Returns: 1421 The validated expression. 1422 """ 1423 if self.error_level != ErrorLevel.IGNORE: 1424 for error_message in expression.error_messages(args): 1425 self.raise_error(error_message) 1426 1427 return expression 1428 1429 def _find_sql(self, start: Token, end: Token) -> str: 1430 return self.sql[start.start : end.end + 1] 1431 1432 def _is_connected(self) -> bool: 1433 return self._prev and self._curr and self._prev.end + 1 == self._curr.start 1434 1435 def _advance(self, times: int = 1) -> None: 1436 self._index += times 1437 self._curr = seq_get(self._tokens, self._index) 1438 self._next = seq_get(self._tokens, self._index + 1) 1439 1440 if self._index > 0: 1441 self._prev = self._tokens[self._index - 1] 1442 self._prev_comments = self._prev.comments 1443 else: 1444 self._prev = None 1445 self._prev_comments = None 1446 1447 def _retreat(self, index: int) -> None: 1448 if index != self._index: 1449 self._advance(index - self._index) 1450 1451 def _warn_unsupported(self) -> None: 1452 if len(self._tokens) <= 1: 1453 return 1454 1455 # We use _find_sql because self.sql may comprise multiple chunks, and we're only 1456 # interested in emitting a warning for the one being currently processed. 1457 sql = self._find_sql(self._tokens[0], self._tokens[-1])[: self.error_message_context] 1458 1459 logger.warning( 1460 f"'{sql}' contains unsupported syntax. Falling back to parsing as a 'Command'." 1461 ) 1462 1463 def _parse_command(self) -> exp.Command: 1464 self._warn_unsupported() 1465 return self.expression( 1466 exp.Command, this=self._prev.text.upper(), expression=self._parse_string() 1467 ) 1468 1469 def _try_parse(self, parse_method: t.Callable[[], T], retreat: bool = False) -> t.Optional[T]: 1470 """ 1471 Attemps to backtrack if a parse function that contains a try/catch internally raises an error. This behavior can 1472 be different depending on the uset-set ErrorLevel, so _try_parse aims to solve this by setting & resetting 1473 the parser state accordingly 1474 """ 1475 index = self._index 1476 error_level = self.error_level 1477 1478 self.error_level = ErrorLevel.IMMEDIATE 1479 try: 1480 this = parse_method() 1481 except ParseError: 1482 this = None 1483 finally: 1484 if not this or retreat: 1485 self._retreat(index) 1486 self.error_level = error_level 1487 1488 return this 1489 1490 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1491 start = self._prev 1492 exists = self._parse_exists() if allow_exists else None 1493 1494 self._match(TokenType.ON) 1495 1496 materialized = self._match_text_seq("MATERIALIZED") 1497 kind = self._match_set(self.CREATABLES) and self._prev 1498 if not kind: 1499 return self._parse_as_command(start) 1500 1501 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1502 this = self._parse_user_defined_function(kind=kind.token_type) 1503 elif kind.token_type == TokenType.TABLE: 1504 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1505 elif kind.token_type == TokenType.COLUMN: 1506 this = self._parse_column() 1507 else: 1508 this = self._parse_id_var() 1509 1510 self._match(TokenType.IS) 1511 1512 return self.expression( 1513 exp.Comment, 1514 this=this, 1515 kind=kind.text, 1516 expression=self._parse_string(), 1517 exists=exists, 1518 materialized=materialized, 1519 ) 1520 1521 def _parse_to_table( 1522 self, 1523 ) -> exp.ToTableProperty: 1524 table = self._parse_table_parts(schema=True) 1525 return self.expression(exp.ToTableProperty, this=table) 1526 1527 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1528 def _parse_ttl(self) -> exp.Expression: 1529 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1530 this = self._parse_bitwise() 1531 1532 if self._match_text_seq("DELETE"): 1533 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1534 if self._match_text_seq("RECOMPRESS"): 1535 return self.expression( 1536 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1537 ) 1538 if self._match_text_seq("TO", "DISK"): 1539 return self.expression( 1540 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1541 ) 1542 if self._match_text_seq("TO", "VOLUME"): 1543 return self.expression( 1544 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1545 ) 1546 1547 return this 1548 1549 expressions = self._parse_csv(_parse_ttl_action) 1550 where = self._parse_where() 1551 group = self._parse_group() 1552 1553 aggregates = None 1554 if group and self._match(TokenType.SET): 1555 aggregates = self._parse_csv(self._parse_set_item) 1556 1557 return self.expression( 1558 exp.MergeTreeTTL, 1559 expressions=expressions, 1560 where=where, 1561 group=group, 1562 aggregates=aggregates, 1563 ) 1564 1565 def _parse_statement(self) -> t.Optional[exp.Expression]: 1566 if self._curr is None: 1567 return None 1568 1569 if self._match_set(self.STATEMENT_PARSERS): 1570 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1571 1572 if self._match_set(self.dialect.tokenizer.COMMANDS): 1573 return self._parse_command() 1574 1575 expression = self._parse_expression() 1576 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1577 return self._parse_query_modifiers(expression) 1578 1579 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1580 start = self._prev 1581 temporary = self._match(TokenType.TEMPORARY) 1582 materialized = self._match_text_seq("MATERIALIZED") 1583 1584 kind = self._match_set(self.CREATABLES) and self._prev.text 1585 if not kind: 1586 return self._parse_as_command(start) 1587 1588 if_exists = exists or self._parse_exists() 1589 table = self._parse_table_parts( 1590 schema=True, is_db_reference=self._prev.token_type == TokenType.SCHEMA 1591 ) 1592 1593 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 1594 1595 if self._match(TokenType.L_PAREN, advance=False): 1596 expressions = self._parse_wrapped_csv(self._parse_types) 1597 else: 1598 expressions = None 1599 1600 return self.expression( 1601 exp.Drop, 1602 comments=start.comments, 1603 exists=if_exists, 1604 this=table, 1605 expressions=expressions, 1606 kind=kind.upper(), 1607 temporary=temporary, 1608 materialized=materialized, 1609 cascade=self._match_text_seq("CASCADE"), 1610 constraints=self._match_text_seq("CONSTRAINTS"), 1611 purge=self._match_text_seq("PURGE"), 1612 cluster=cluster, 1613 ) 1614 1615 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1616 return ( 1617 self._match_text_seq("IF") 1618 and (not not_ or self._match(TokenType.NOT)) 1619 and self._match(TokenType.EXISTS) 1620 ) 1621 1622 def _parse_create(self) -> exp.Create | exp.Command: 1623 # Note: this can't be None because we've matched a statement parser 1624 start = self._prev 1625 comments = self._prev_comments 1626 1627 replace = ( 1628 start.token_type == TokenType.REPLACE 1629 or self._match_pair(TokenType.OR, TokenType.REPLACE) 1630 or self._match_pair(TokenType.OR, TokenType.ALTER) 1631 ) 1632 1633 unique = self._match(TokenType.UNIQUE) 1634 1635 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1636 self._advance() 1637 1638 properties = None 1639 create_token = self._match_set(self.CREATABLES) and self._prev 1640 1641 if not create_token: 1642 # exp.Properties.Location.POST_CREATE 1643 properties = self._parse_properties() 1644 create_token = self._match_set(self.CREATABLES) and self._prev 1645 1646 if not properties or not create_token: 1647 return self._parse_as_command(start) 1648 1649 exists = self._parse_exists(not_=True) 1650 this = None 1651 expression: t.Optional[exp.Expression] = None 1652 indexes = None 1653 no_schema_binding = None 1654 begin = None 1655 end = None 1656 clone = None 1657 1658 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1659 nonlocal properties 1660 if properties and temp_props: 1661 properties.expressions.extend(temp_props.expressions) 1662 elif temp_props: 1663 properties = temp_props 1664 1665 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1666 this = self._parse_user_defined_function(kind=create_token.token_type) 1667 1668 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1669 extend_props(self._parse_properties()) 1670 1671 expression = self._match(TokenType.ALIAS) and self._parse_heredoc() 1672 extend_props(self._parse_properties()) 1673 1674 if not expression: 1675 if self._match(TokenType.COMMAND): 1676 expression = self._parse_as_command(self._prev) 1677 else: 1678 begin = self._match(TokenType.BEGIN) 1679 return_ = self._match_text_seq("RETURN") 1680 1681 if self._match(TokenType.STRING, advance=False): 1682 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 1683 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 1684 expression = self._parse_string() 1685 extend_props(self._parse_properties()) 1686 else: 1687 expression = self._parse_statement() 1688 1689 end = self._match_text_seq("END") 1690 1691 if return_: 1692 expression = self.expression(exp.Return, this=expression) 1693 elif create_token.token_type == TokenType.INDEX: 1694 # Postgres allows anonymous indexes, eg. CREATE INDEX IF NOT EXISTS ON t(c) 1695 if not self._match(TokenType.ON): 1696 index = self._parse_id_var() 1697 anonymous = False 1698 else: 1699 index = None 1700 anonymous = True 1701 1702 this = self._parse_index(index=index, anonymous=anonymous) 1703 elif create_token.token_type in self.DB_CREATABLES: 1704 table_parts = self._parse_table_parts( 1705 schema=True, is_db_reference=create_token.token_type == TokenType.SCHEMA 1706 ) 1707 1708 # exp.Properties.Location.POST_NAME 1709 self._match(TokenType.COMMA) 1710 extend_props(self._parse_properties(before=True)) 1711 1712 this = self._parse_schema(this=table_parts) 1713 1714 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1715 extend_props(self._parse_properties()) 1716 1717 self._match(TokenType.ALIAS) 1718 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1719 # exp.Properties.Location.POST_ALIAS 1720 extend_props(self._parse_properties()) 1721 1722 if create_token.token_type == TokenType.SEQUENCE: 1723 expression = self._parse_types() 1724 extend_props(self._parse_properties()) 1725 else: 1726 expression = self._parse_ddl_select() 1727 1728 if create_token.token_type == TokenType.TABLE: 1729 # exp.Properties.Location.POST_EXPRESSION 1730 extend_props(self._parse_properties()) 1731 1732 indexes = [] 1733 while True: 1734 index = self._parse_index() 1735 1736 # exp.Properties.Location.POST_INDEX 1737 extend_props(self._parse_properties()) 1738 1739 if not index: 1740 break 1741 else: 1742 self._match(TokenType.COMMA) 1743 indexes.append(index) 1744 elif create_token.token_type == TokenType.VIEW: 1745 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1746 no_schema_binding = True 1747 1748 shallow = self._match_text_seq("SHALLOW") 1749 1750 if self._match_texts(self.CLONE_KEYWORDS): 1751 copy = self._prev.text.lower() == "copy" 1752 clone = self.expression( 1753 exp.Clone, this=self._parse_table(schema=True), shallow=shallow, copy=copy 1754 ) 1755 1756 if self._curr and not self._match_set((TokenType.R_PAREN, TokenType.COMMA), advance=False): 1757 return self._parse_as_command(start) 1758 1759 return self.expression( 1760 exp.Create, 1761 comments=comments, 1762 this=this, 1763 kind=create_token.text.upper(), 1764 replace=replace, 1765 unique=unique, 1766 expression=expression, 1767 exists=exists, 1768 properties=properties, 1769 indexes=indexes, 1770 no_schema_binding=no_schema_binding, 1771 begin=begin, 1772 end=end, 1773 clone=clone, 1774 ) 1775 1776 def _parse_sequence_properties(self) -> t.Optional[exp.SequenceProperties]: 1777 seq = exp.SequenceProperties() 1778 1779 options = [] 1780 index = self._index 1781 1782 while self._curr: 1783 self._match(TokenType.COMMA) 1784 if self._match_text_seq("INCREMENT"): 1785 self._match_text_seq("BY") 1786 self._match_text_seq("=") 1787 seq.set("increment", self._parse_term()) 1788 elif self._match_text_seq("MINVALUE"): 1789 seq.set("minvalue", self._parse_term()) 1790 elif self._match_text_seq("MAXVALUE"): 1791 seq.set("maxvalue", self._parse_term()) 1792 elif self._match(TokenType.START_WITH) or self._match_text_seq("START"): 1793 self._match_text_seq("=") 1794 seq.set("start", self._parse_term()) 1795 elif self._match_text_seq("CACHE"): 1796 # T-SQL allows empty CACHE which is initialized dynamically 1797 seq.set("cache", self._parse_number() or True) 1798 elif self._match_text_seq("OWNED", "BY"): 1799 # "OWNED BY NONE" is the default 1800 seq.set("owned", None if self._match_text_seq("NONE") else self._parse_column()) 1801 else: 1802 opt = self._parse_var_from_options(self.CREATE_SEQUENCE, raise_unmatched=False) 1803 if opt: 1804 options.append(opt) 1805 else: 1806 break 1807 1808 seq.set("options", options if options else None) 1809 return None if self._index == index else seq 1810 1811 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1812 # only used for teradata currently 1813 self._match(TokenType.COMMA) 1814 1815 kwargs = { 1816 "no": self._match_text_seq("NO"), 1817 "dual": self._match_text_seq("DUAL"), 1818 "before": self._match_text_seq("BEFORE"), 1819 "default": self._match_text_seq("DEFAULT"), 1820 "local": (self._match_text_seq("LOCAL") and "LOCAL") 1821 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 1822 "after": self._match_text_seq("AFTER"), 1823 "minimum": self._match_texts(("MIN", "MINIMUM")), 1824 "maximum": self._match_texts(("MAX", "MAXIMUM")), 1825 } 1826 1827 if self._match_texts(self.PROPERTY_PARSERS): 1828 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 1829 try: 1830 return parser(self, **{k: v for k, v in kwargs.items() if v}) 1831 except TypeError: 1832 self.raise_error(f"Cannot parse property '{self._prev.text}'") 1833 1834 return None 1835 1836 def _parse_wrapped_properties(self) -> t.List[exp.Expression]: 1837 return self._parse_wrapped_csv(self._parse_property) 1838 1839 def _parse_property(self) -> t.Optional[exp.Expression]: 1840 if self._match_texts(self.PROPERTY_PARSERS): 1841 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1842 1843 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 1844 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 1845 1846 if self._match_text_seq("COMPOUND", "SORTKEY"): 1847 return self._parse_sortkey(compound=True) 1848 1849 if self._match_text_seq("SQL", "SECURITY"): 1850 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1851 1852 index = self._index 1853 key = self._parse_column() 1854 1855 if not self._match(TokenType.EQ): 1856 self._retreat(index) 1857 return self._parse_sequence_properties() 1858 1859 # Transform the key to exp.Dot if it's dotted identifiers wrapped in exp.Column or to exp.Var otherwise 1860 if isinstance(key, exp.Column): 1861 key = key.to_dot() if len(key.parts) > 1 else exp.var(key.name) 1862 1863 value = self._parse_bitwise() or self._parse_var(any_token=True) 1864 1865 # Transform the value to exp.Var if it was parsed as exp.Column(exp.Identifier()) 1866 if isinstance(value, exp.Column): 1867 value = exp.var(value.name) 1868 1869 return self.expression(exp.Property, this=key, value=value) 1870 1871 def _parse_stored(self) -> exp.FileFormatProperty: 1872 self._match(TokenType.ALIAS) 1873 1874 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 1875 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 1876 1877 return self.expression( 1878 exp.FileFormatProperty, 1879 this=( 1880 self.expression( 1881 exp.InputOutputFormat, input_format=input_format, output_format=output_format 1882 ) 1883 if input_format or output_format 1884 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var() 1885 ), 1886 ) 1887 1888 def _parse_unquoted_field(self) -> t.Optional[exp.Expression]: 1889 field = self._parse_field() 1890 if isinstance(field, exp.Identifier) and not field.quoted: 1891 field = exp.var(field) 1892 1893 return field 1894 1895 def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E: 1896 self._match(TokenType.EQ) 1897 self._match(TokenType.ALIAS) 1898 1899 return self.expression(exp_class, this=self._parse_unquoted_field(), **kwargs) 1900 1901 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 1902 properties = [] 1903 while True: 1904 if before: 1905 prop = self._parse_property_before() 1906 else: 1907 prop = self._parse_property() 1908 if not prop: 1909 break 1910 for p in ensure_list(prop): 1911 properties.append(p) 1912 1913 if properties: 1914 return self.expression(exp.Properties, expressions=properties) 1915 1916 return None 1917 1918 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 1919 return self.expression( 1920 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 1921 ) 1922 1923 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 1924 if self._index >= 2: 1925 pre_volatile_token = self._tokens[self._index - 2] 1926 else: 1927 pre_volatile_token = None 1928 1929 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 1930 return exp.VolatileProperty() 1931 1932 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 1933 1934 def _parse_retention_period(self) -> exp.Var: 1935 # Parse TSQL's HISTORY_RETENTION_PERIOD: {INFINITE | <number> DAY | DAYS | MONTH ...} 1936 number = self._parse_number() 1937 number_str = f"{number} " if number else "" 1938 unit = self._parse_var(any_token=True) 1939 return exp.var(f"{number_str}{unit}") 1940 1941 def _parse_system_versioning_property( 1942 self, with_: bool = False 1943 ) -> exp.WithSystemVersioningProperty: 1944 self._match(TokenType.EQ) 1945 prop = self.expression( 1946 exp.WithSystemVersioningProperty, 1947 **{ # type: ignore 1948 "on": True, 1949 "with": with_, 1950 }, 1951 ) 1952 1953 if self._match_text_seq("OFF"): 1954 prop.set("on", False) 1955 return prop 1956 1957 self._match(TokenType.ON) 1958 if self._match(TokenType.L_PAREN): 1959 while self._curr and not self._match(TokenType.R_PAREN): 1960 if self._match_text_seq("HISTORY_TABLE", "="): 1961 prop.set("this", self._parse_table_parts()) 1962 elif self._match_text_seq("DATA_CONSISTENCY_CHECK", "="): 1963 prop.set("data_consistency", self._advance_any() and self._prev.text.upper()) 1964 elif self._match_text_seq("HISTORY_RETENTION_PERIOD", "="): 1965 prop.set("retention_period", self._parse_retention_period()) 1966 1967 self._match(TokenType.COMMA) 1968 1969 return prop 1970 1971 def _parse_data_deletion_property(self) -> exp.DataDeletionProperty: 1972 self._match(TokenType.EQ) 1973 on = self._match_text_seq("ON") or not self._match_text_seq("OFF") 1974 prop = self.expression(exp.DataDeletionProperty, on=on) 1975 1976 if self._match(TokenType.L_PAREN): 1977 while self._curr and not self._match(TokenType.R_PAREN): 1978 if self._match_text_seq("FILTER_COLUMN", "="): 1979 prop.set("filter_column", self._parse_column()) 1980 elif self._match_text_seq("RETENTION_PERIOD", "="): 1981 prop.set("retention_period", self._parse_retention_period()) 1982 1983 self._match(TokenType.COMMA) 1984 1985 return prop 1986 1987 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 1988 if self._match_text_seq("(", "SYSTEM_VERSIONING"): 1989 prop = self._parse_system_versioning_property(with_=True) 1990 self._match_r_paren() 1991 return prop 1992 1993 if self._match(TokenType.L_PAREN, advance=False): 1994 return self._parse_wrapped_properties() 1995 1996 if self._match_text_seq("JOURNAL"): 1997 return self._parse_withjournaltable() 1998 1999 if self._match_texts(self.VIEW_ATTRIBUTES): 2000 return self.expression(exp.ViewAttributeProperty, this=self._prev.text.upper()) 2001 2002 if self._match_text_seq("DATA"): 2003 return self._parse_withdata(no=False) 2004 elif self._match_text_seq("NO", "DATA"): 2005 return self._parse_withdata(no=True) 2006 2007 if self._match(TokenType.SERDE_PROPERTIES, advance=False): 2008 return self._parse_serde_properties(with_=True) 2009 2010 if not self._next: 2011 return None 2012 2013 return self._parse_withisolatedloading() 2014 2015 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 2016 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 2017 self._match(TokenType.EQ) 2018 2019 user = self._parse_id_var() 2020 self._match(TokenType.PARAMETER) 2021 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 2022 2023 if not user or not host: 2024 return None 2025 2026 return exp.DefinerProperty(this=f"{user}@{host}") 2027 2028 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 2029 self._match(TokenType.TABLE) 2030 self._match(TokenType.EQ) 2031 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 2032 2033 def _parse_log(self, no: bool = False) -> exp.LogProperty: 2034 return self.expression(exp.LogProperty, no=no) 2035 2036 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 2037 return self.expression(exp.JournalProperty, **kwargs) 2038 2039 def _parse_checksum(self) -> exp.ChecksumProperty: 2040 self._match(TokenType.EQ) 2041 2042 on = None 2043 if self._match(TokenType.ON): 2044 on = True 2045 elif self._match_text_seq("OFF"): 2046 on = False 2047 2048 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 2049 2050 def _parse_cluster(self, wrapped: bool = False) -> exp.Cluster: 2051 return self.expression( 2052 exp.Cluster, 2053 expressions=( 2054 self._parse_wrapped_csv(self._parse_ordered) 2055 if wrapped 2056 else self._parse_csv(self._parse_ordered) 2057 ), 2058 ) 2059 2060 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 2061 self._match_text_seq("BY") 2062 2063 self._match_l_paren() 2064 expressions = self._parse_csv(self._parse_column) 2065 self._match_r_paren() 2066 2067 if self._match_text_seq("SORTED", "BY"): 2068 self._match_l_paren() 2069 sorted_by = self._parse_csv(self._parse_ordered) 2070 self._match_r_paren() 2071 else: 2072 sorted_by = None 2073 2074 self._match(TokenType.INTO) 2075 buckets = self._parse_number() 2076 self._match_text_seq("BUCKETS") 2077 2078 return self.expression( 2079 exp.ClusteredByProperty, 2080 expressions=expressions, 2081 sorted_by=sorted_by, 2082 buckets=buckets, 2083 ) 2084 2085 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 2086 if not self._match_text_seq("GRANTS"): 2087 self._retreat(self._index - 1) 2088 return None 2089 2090 return self.expression(exp.CopyGrantsProperty) 2091 2092 def _parse_freespace(self) -> exp.FreespaceProperty: 2093 self._match(TokenType.EQ) 2094 return self.expression( 2095 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 2096 ) 2097 2098 def _parse_mergeblockratio( 2099 self, no: bool = False, default: bool = False 2100 ) -> exp.MergeBlockRatioProperty: 2101 if self._match(TokenType.EQ): 2102 return self.expression( 2103 exp.MergeBlockRatioProperty, 2104 this=self._parse_number(), 2105 percent=self._match(TokenType.PERCENT), 2106 ) 2107 2108 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 2109 2110 def _parse_datablocksize( 2111 self, 2112 default: t.Optional[bool] = None, 2113 minimum: t.Optional[bool] = None, 2114 maximum: t.Optional[bool] = None, 2115 ) -> exp.DataBlocksizeProperty: 2116 self._match(TokenType.EQ) 2117 size = self._parse_number() 2118 2119 units = None 2120 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 2121 units = self._prev.text 2122 2123 return self.expression( 2124 exp.DataBlocksizeProperty, 2125 size=size, 2126 units=units, 2127 default=default, 2128 minimum=minimum, 2129 maximum=maximum, 2130 ) 2131 2132 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 2133 self._match(TokenType.EQ) 2134 always = self._match_text_seq("ALWAYS") 2135 manual = self._match_text_seq("MANUAL") 2136 never = self._match_text_seq("NEVER") 2137 default = self._match_text_seq("DEFAULT") 2138 2139 autotemp = None 2140 if self._match_text_seq("AUTOTEMP"): 2141 autotemp = self._parse_schema() 2142 2143 return self.expression( 2144 exp.BlockCompressionProperty, 2145 always=always, 2146 manual=manual, 2147 never=never, 2148 default=default, 2149 autotemp=autotemp, 2150 ) 2151 2152 def _parse_withisolatedloading(self) -> t.Optional[exp.IsolatedLoadingProperty]: 2153 index = self._index 2154 no = self._match_text_seq("NO") 2155 concurrent = self._match_text_seq("CONCURRENT") 2156 2157 if not self._match_text_seq("ISOLATED", "LOADING"): 2158 self._retreat(index) 2159 return None 2160 2161 target = self._parse_var_from_options(self.ISOLATED_LOADING_OPTIONS, raise_unmatched=False) 2162 return self.expression( 2163 exp.IsolatedLoadingProperty, no=no, concurrent=concurrent, target=target 2164 ) 2165 2166 def _parse_locking(self) -> exp.LockingProperty: 2167 if self._match(TokenType.TABLE): 2168 kind = "TABLE" 2169 elif self._match(TokenType.VIEW): 2170 kind = "VIEW" 2171 elif self._match(TokenType.ROW): 2172 kind = "ROW" 2173 elif self._match_text_seq("DATABASE"): 2174 kind = "DATABASE" 2175 else: 2176 kind = None 2177 2178 if kind in ("DATABASE", "TABLE", "VIEW"): 2179 this = self._parse_table_parts() 2180 else: 2181 this = None 2182 2183 if self._match(TokenType.FOR): 2184 for_or_in = "FOR" 2185 elif self._match(TokenType.IN): 2186 for_or_in = "IN" 2187 else: 2188 for_or_in = None 2189 2190 if self._match_text_seq("ACCESS"): 2191 lock_type = "ACCESS" 2192 elif self._match_texts(("EXCL", "EXCLUSIVE")): 2193 lock_type = "EXCLUSIVE" 2194 elif self._match_text_seq("SHARE"): 2195 lock_type = "SHARE" 2196 elif self._match_text_seq("READ"): 2197 lock_type = "READ" 2198 elif self._match_text_seq("WRITE"): 2199 lock_type = "WRITE" 2200 elif self._match_text_seq("CHECKSUM"): 2201 lock_type = "CHECKSUM" 2202 else: 2203 lock_type = None 2204 2205 override = self._match_text_seq("OVERRIDE") 2206 2207 return self.expression( 2208 exp.LockingProperty, 2209 this=this, 2210 kind=kind, 2211 for_or_in=for_or_in, 2212 lock_type=lock_type, 2213 override=override, 2214 ) 2215 2216 def _parse_partition_by(self) -> t.List[exp.Expression]: 2217 if self._match(TokenType.PARTITION_BY): 2218 return self._parse_csv(self._parse_assignment) 2219 return [] 2220 2221 def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec: 2222 def _parse_partition_bound_expr() -> t.Optional[exp.Expression]: 2223 if self._match_text_seq("MINVALUE"): 2224 return exp.var("MINVALUE") 2225 if self._match_text_seq("MAXVALUE"): 2226 return exp.var("MAXVALUE") 2227 return self._parse_bitwise() 2228 2229 this: t.Optional[exp.Expression | t.List[exp.Expression]] = None 2230 expression = None 2231 from_expressions = None 2232 to_expressions = None 2233 2234 if self._match(TokenType.IN): 2235 this = self._parse_wrapped_csv(self._parse_bitwise) 2236 elif self._match(TokenType.FROM): 2237 from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2238 self._match_text_seq("TO") 2239 to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2240 elif self._match_text_seq("WITH", "(", "MODULUS"): 2241 this = self._parse_number() 2242 self._match_text_seq(",", "REMAINDER") 2243 expression = self._parse_number() 2244 self._match_r_paren() 2245 else: 2246 self.raise_error("Failed to parse partition bound spec.") 2247 2248 return self.expression( 2249 exp.PartitionBoundSpec, 2250 this=this, 2251 expression=expression, 2252 from_expressions=from_expressions, 2253 to_expressions=to_expressions, 2254 ) 2255 2256 # https://www.postgresql.org/docs/current/sql-createtable.html 2257 def _parse_partitioned_of(self) -> t.Optional[exp.PartitionedOfProperty]: 2258 if not self._match_text_seq("OF"): 2259 self._retreat(self._index - 1) 2260 return None 2261 2262 this = self._parse_table(schema=True) 2263 2264 if self._match(TokenType.DEFAULT): 2265 expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT") 2266 elif self._match_text_seq("FOR", "VALUES"): 2267 expression = self._parse_partition_bound_spec() 2268 else: 2269 self.raise_error("Expecting either DEFAULT or FOR VALUES clause.") 2270 2271 return self.expression(exp.PartitionedOfProperty, this=this, expression=expression) 2272 2273 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 2274 self._match(TokenType.EQ) 2275 return self.expression( 2276 exp.PartitionedByProperty, 2277 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 2278 ) 2279 2280 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 2281 if self._match_text_seq("AND", "STATISTICS"): 2282 statistics = True 2283 elif self._match_text_seq("AND", "NO", "STATISTICS"): 2284 statistics = False 2285 else: 2286 statistics = None 2287 2288 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 2289 2290 def _parse_contains_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2291 if self._match_text_seq("SQL"): 2292 return self.expression(exp.SqlReadWriteProperty, this="CONTAINS SQL") 2293 return None 2294 2295 def _parse_modifies_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2296 if self._match_text_seq("SQL", "DATA"): 2297 return self.expression(exp.SqlReadWriteProperty, this="MODIFIES SQL DATA") 2298 return None 2299 2300 def _parse_no_property(self) -> t.Optional[exp.Expression]: 2301 if self._match_text_seq("PRIMARY", "INDEX"): 2302 return exp.NoPrimaryIndexProperty() 2303 if self._match_text_seq("SQL"): 2304 return self.expression(exp.SqlReadWriteProperty, this="NO SQL") 2305 return None 2306 2307 def _parse_on_property(self) -> t.Optional[exp.Expression]: 2308 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 2309 return exp.OnCommitProperty() 2310 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 2311 return exp.OnCommitProperty(delete=True) 2312 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 2313 2314 def _parse_reads_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2315 if self._match_text_seq("SQL", "DATA"): 2316 return self.expression(exp.SqlReadWriteProperty, this="READS SQL DATA") 2317 return None 2318 2319 def _parse_distkey(self) -> exp.DistKeyProperty: 2320 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 2321 2322 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 2323 table = self._parse_table(schema=True) 2324 2325 options = [] 2326 while self._match_texts(("INCLUDING", "EXCLUDING")): 2327 this = self._prev.text.upper() 2328 2329 id_var = self._parse_id_var() 2330 if not id_var: 2331 return None 2332 2333 options.append( 2334 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 2335 ) 2336 2337 return self.expression(exp.LikeProperty, this=table, expressions=options) 2338 2339 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 2340 return self.expression( 2341 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 2342 ) 2343 2344 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 2345 self._match(TokenType.EQ) 2346 return self.expression( 2347 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 2348 ) 2349 2350 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 2351 self._match_text_seq("WITH", "CONNECTION") 2352 return self.expression( 2353 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 2354 ) 2355 2356 def _parse_returns(self) -> exp.ReturnsProperty: 2357 value: t.Optional[exp.Expression] 2358 null = None 2359 is_table = self._match(TokenType.TABLE) 2360 2361 if is_table: 2362 if self._match(TokenType.LT): 2363 value = self.expression( 2364 exp.Schema, 2365 this="TABLE", 2366 expressions=self._parse_csv(self._parse_struct_types), 2367 ) 2368 if not self._match(TokenType.GT): 2369 self.raise_error("Expecting >") 2370 else: 2371 value = self._parse_schema(exp.var("TABLE")) 2372 elif self._match_text_seq("NULL", "ON", "NULL", "INPUT"): 2373 null = True 2374 value = None 2375 else: 2376 value = self._parse_types() 2377 2378 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table, null=null) 2379 2380 def _parse_describe(self) -> exp.Describe: 2381 kind = self._match_set(self.CREATABLES) and self._prev.text 2382 style = self._match_texts(("EXTENDED", "FORMATTED", "HISTORY")) and self._prev.text.upper() 2383 if self._match(TokenType.DOT): 2384 style = None 2385 self._retreat(self._index - 2) 2386 this = self._parse_table(schema=True) 2387 properties = self._parse_properties() 2388 expressions = properties.expressions if properties else None 2389 return self.expression( 2390 exp.Describe, this=this, style=style, kind=kind, expressions=expressions 2391 ) 2392 2393 def _parse_insert(self) -> exp.Insert: 2394 comments = ensure_list(self._prev_comments) 2395 hint = self._parse_hint() 2396 overwrite = self._match(TokenType.OVERWRITE) 2397 ignore = self._match(TokenType.IGNORE) 2398 local = self._match_text_seq("LOCAL") 2399 alternative = None 2400 is_function = None 2401 2402 if self._match_text_seq("DIRECTORY"): 2403 this: t.Optional[exp.Expression] = self.expression( 2404 exp.Directory, 2405 this=self._parse_var_or_string(), 2406 local=local, 2407 row_format=self._parse_row_format(match_row=True), 2408 ) 2409 else: 2410 if self._match(TokenType.OR): 2411 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 2412 2413 self._match(TokenType.INTO) 2414 comments += ensure_list(self._prev_comments) 2415 self._match(TokenType.TABLE) 2416 is_function = self._match(TokenType.FUNCTION) 2417 2418 this = ( 2419 self._parse_table(schema=True, parse_partition=True) 2420 if not is_function 2421 else self._parse_function() 2422 ) 2423 2424 returning = self._parse_returning() 2425 2426 return self.expression( 2427 exp.Insert, 2428 comments=comments, 2429 hint=hint, 2430 is_function=is_function, 2431 this=this, 2432 stored=self._match_text_seq("STORED") and self._parse_stored(), 2433 by_name=self._match_text_seq("BY", "NAME"), 2434 exists=self._parse_exists(), 2435 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) and self._parse_assignment(), 2436 expression=self._parse_derived_table_values() or self._parse_ddl_select(), 2437 conflict=self._parse_on_conflict(), 2438 returning=returning or self._parse_returning(), 2439 overwrite=overwrite, 2440 alternative=alternative, 2441 ignore=ignore, 2442 ) 2443 2444 def _parse_kill(self) -> exp.Kill: 2445 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 2446 2447 return self.expression( 2448 exp.Kill, 2449 this=self._parse_primary(), 2450 kind=kind, 2451 ) 2452 2453 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 2454 conflict = self._match_text_seq("ON", "CONFLICT") 2455 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 2456 2457 if not conflict and not duplicate: 2458 return None 2459 2460 conflict_keys = None 2461 constraint = None 2462 2463 if conflict: 2464 if self._match_text_seq("ON", "CONSTRAINT"): 2465 constraint = self._parse_id_var() 2466 elif self._match(TokenType.L_PAREN): 2467 conflict_keys = self._parse_csv(self._parse_id_var) 2468 self._match_r_paren() 2469 2470 action = self._parse_var_from_options(self.CONFLICT_ACTIONS) 2471 if self._prev.token_type == TokenType.UPDATE: 2472 self._match(TokenType.SET) 2473 expressions = self._parse_csv(self._parse_equality) 2474 else: 2475 expressions = None 2476 2477 return self.expression( 2478 exp.OnConflict, 2479 duplicate=duplicate, 2480 expressions=expressions, 2481 action=action, 2482 conflict_keys=conflict_keys, 2483 constraint=constraint, 2484 ) 2485 2486 def _parse_returning(self) -> t.Optional[exp.Returning]: 2487 if not self._match(TokenType.RETURNING): 2488 return None 2489 return self.expression( 2490 exp.Returning, 2491 expressions=self._parse_csv(self._parse_expression), 2492 into=self._match(TokenType.INTO) and self._parse_table_part(), 2493 ) 2494 2495 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2496 if not self._match(TokenType.FORMAT): 2497 return None 2498 return self._parse_row_format() 2499 2500 def _parse_serde_properties(self, with_: bool = False) -> t.Optional[exp.SerdeProperties]: 2501 index = self._index 2502 with_ = with_ or self._match_text_seq("WITH") 2503 2504 if not self._match(TokenType.SERDE_PROPERTIES): 2505 self._retreat(index) 2506 return None 2507 return self.expression( 2508 exp.SerdeProperties, 2509 **{ # type: ignore 2510 "expressions": self._parse_wrapped_properties(), 2511 "with": with_, 2512 }, 2513 ) 2514 2515 def _parse_row_format( 2516 self, match_row: bool = False 2517 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2518 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 2519 return None 2520 2521 if self._match_text_seq("SERDE"): 2522 this = self._parse_string() 2523 2524 serde_properties = self._parse_serde_properties() 2525 2526 return self.expression( 2527 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 2528 ) 2529 2530 self._match_text_seq("DELIMITED") 2531 2532 kwargs = {} 2533 2534 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 2535 kwargs["fields"] = self._parse_string() 2536 if self._match_text_seq("ESCAPED", "BY"): 2537 kwargs["escaped"] = self._parse_string() 2538 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 2539 kwargs["collection_items"] = self._parse_string() 2540 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 2541 kwargs["map_keys"] = self._parse_string() 2542 if self._match_text_seq("LINES", "TERMINATED", "BY"): 2543 kwargs["lines"] = self._parse_string() 2544 if self._match_text_seq("NULL", "DEFINED", "AS"): 2545 kwargs["null"] = self._parse_string() 2546 2547 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 2548 2549 def _parse_load(self) -> exp.LoadData | exp.Command: 2550 if self._match_text_seq("DATA"): 2551 local = self._match_text_seq("LOCAL") 2552 self._match_text_seq("INPATH") 2553 inpath = self._parse_string() 2554 overwrite = self._match(TokenType.OVERWRITE) 2555 self._match_pair(TokenType.INTO, TokenType.TABLE) 2556 2557 return self.expression( 2558 exp.LoadData, 2559 this=self._parse_table(schema=True), 2560 local=local, 2561 overwrite=overwrite, 2562 inpath=inpath, 2563 partition=self._parse_partition(), 2564 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 2565 serde=self._match_text_seq("SERDE") and self._parse_string(), 2566 ) 2567 return self._parse_as_command(self._prev) 2568 2569 def _parse_delete(self) -> exp.Delete: 2570 # This handles MySQL's "Multiple-Table Syntax" 2571 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 2572 tables = None 2573 comments = self._prev_comments 2574 if not self._match(TokenType.FROM, advance=False): 2575 tables = self._parse_csv(self._parse_table) or None 2576 2577 returning = self._parse_returning() 2578 2579 return self.expression( 2580 exp.Delete, 2581 comments=comments, 2582 tables=tables, 2583 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 2584 using=self._match(TokenType.USING) and self._parse_table(joins=True), 2585 where=self._parse_where(), 2586 returning=returning or self._parse_returning(), 2587 limit=self._parse_limit(), 2588 ) 2589 2590 def _parse_update(self) -> exp.Update: 2591 comments = self._prev_comments 2592 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 2593 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 2594 returning = self._parse_returning() 2595 return self.expression( 2596 exp.Update, 2597 comments=comments, 2598 **{ # type: ignore 2599 "this": this, 2600 "expressions": expressions, 2601 "from": self._parse_from(joins=True), 2602 "where": self._parse_where(), 2603 "returning": returning or self._parse_returning(), 2604 "order": self._parse_order(), 2605 "limit": self._parse_limit(), 2606 }, 2607 ) 2608 2609 def _parse_uncache(self) -> exp.Uncache: 2610 if not self._match(TokenType.TABLE): 2611 self.raise_error("Expecting TABLE after UNCACHE") 2612 2613 return self.expression( 2614 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 2615 ) 2616 2617 def _parse_cache(self) -> exp.Cache: 2618 lazy = self._match_text_seq("LAZY") 2619 self._match(TokenType.TABLE) 2620 table = self._parse_table(schema=True) 2621 2622 options = [] 2623 if self._match_text_seq("OPTIONS"): 2624 self._match_l_paren() 2625 k = self._parse_string() 2626 self._match(TokenType.EQ) 2627 v = self._parse_string() 2628 options = [k, v] 2629 self._match_r_paren() 2630 2631 self._match(TokenType.ALIAS) 2632 return self.expression( 2633 exp.Cache, 2634 this=table, 2635 lazy=lazy, 2636 options=options, 2637 expression=self._parse_select(nested=True), 2638 ) 2639 2640 def _parse_partition(self) -> t.Optional[exp.Partition]: 2641 if not self._match(TokenType.PARTITION): 2642 return None 2643 2644 return self.expression( 2645 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_assignment) 2646 ) 2647 2648 def _parse_value(self) -> t.Optional[exp.Tuple]: 2649 if self._match(TokenType.L_PAREN): 2650 expressions = self._parse_csv(self._parse_expression) 2651 self._match_r_paren() 2652 return self.expression(exp.Tuple, expressions=expressions) 2653 2654 # In some dialects we can have VALUES 1, 2 which results in 1 column & 2 rows. 2655 expression = self._parse_expression() 2656 if expression: 2657 return self.expression(exp.Tuple, expressions=[expression]) 2658 return None 2659 2660 def _parse_projections(self) -> t.List[exp.Expression]: 2661 return self._parse_expressions() 2662 2663 def _parse_select( 2664 self, 2665 nested: bool = False, 2666 table: bool = False, 2667 parse_subquery_alias: bool = True, 2668 parse_set_operation: bool = True, 2669 ) -> t.Optional[exp.Expression]: 2670 cte = self._parse_with() 2671 2672 if cte: 2673 this = self._parse_statement() 2674 2675 if not this: 2676 self.raise_error("Failed to parse any statement following CTE") 2677 return cte 2678 2679 if "with" in this.arg_types: 2680 this.set("with", cte) 2681 else: 2682 self.raise_error(f"{this.key} does not support CTE") 2683 this = cte 2684 2685 return this 2686 2687 # duckdb supports leading with FROM x 2688 from_ = self._parse_from() if self._match(TokenType.FROM, advance=False) else None 2689 2690 if self._match(TokenType.SELECT): 2691 comments = self._prev_comments 2692 2693 hint = self._parse_hint() 2694 all_ = self._match(TokenType.ALL) 2695 distinct = self._match_set(self.DISTINCT_TOKENS) 2696 2697 kind = ( 2698 self._match(TokenType.ALIAS) 2699 and self._match_texts(("STRUCT", "VALUE")) 2700 and self._prev.text.upper() 2701 ) 2702 2703 if distinct: 2704 distinct = self.expression( 2705 exp.Distinct, 2706 on=self._parse_value() if self._match(TokenType.ON) else None, 2707 ) 2708 2709 if all_ and distinct: 2710 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 2711 2712 limit = self._parse_limit(top=True) 2713 projections = self._parse_projections() 2714 2715 this = self.expression( 2716 exp.Select, 2717 kind=kind, 2718 hint=hint, 2719 distinct=distinct, 2720 expressions=projections, 2721 limit=limit, 2722 ) 2723 this.comments = comments 2724 2725 into = self._parse_into() 2726 if into: 2727 this.set("into", into) 2728 2729 if not from_: 2730 from_ = self._parse_from() 2731 2732 if from_: 2733 this.set("from", from_) 2734 2735 this = self._parse_query_modifiers(this) 2736 elif (table or nested) and self._match(TokenType.L_PAREN): 2737 if self._match(TokenType.PIVOT): 2738 this = self._parse_simplified_pivot() 2739 elif self._match(TokenType.FROM): 2740 this = exp.select("*").from_( 2741 t.cast(exp.From, self._parse_from(skip_from_token=True)) 2742 ) 2743 else: 2744 this = ( 2745 self._parse_table() 2746 if table 2747 else self._parse_select(nested=True, parse_set_operation=False) 2748 ) 2749 this = self._parse_query_modifiers(self._parse_set_operations(this)) 2750 2751 self._match_r_paren() 2752 2753 # We return early here so that the UNION isn't attached to the subquery by the 2754 # following call to _parse_set_operations, but instead becomes the parent node 2755 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 2756 elif self._match(TokenType.VALUES, advance=False): 2757 this = self._parse_derived_table_values() 2758 elif from_: 2759 this = exp.select("*").from_(from_.this, copy=False) 2760 else: 2761 this = None 2762 2763 if parse_set_operation: 2764 return self._parse_set_operations(this) 2765 return this 2766 2767 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 2768 if not skip_with_token and not self._match(TokenType.WITH): 2769 return None 2770 2771 comments = self._prev_comments 2772 recursive = self._match(TokenType.RECURSIVE) 2773 2774 expressions = [] 2775 while True: 2776 expressions.append(self._parse_cte()) 2777 2778 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 2779 break 2780 else: 2781 self._match(TokenType.WITH) 2782 2783 return self.expression( 2784 exp.With, comments=comments, expressions=expressions, recursive=recursive 2785 ) 2786 2787 def _parse_cte(self) -> exp.CTE: 2788 alias = self._parse_table_alias(self.ID_VAR_TOKENS) 2789 if not alias or not alias.this: 2790 self.raise_error("Expected CTE to have alias") 2791 2792 self._match(TokenType.ALIAS) 2793 2794 if self._match_text_seq("NOT", "MATERIALIZED"): 2795 materialized = False 2796 elif self._match_text_seq("MATERIALIZED"): 2797 materialized = True 2798 else: 2799 materialized = None 2800 2801 return self.expression( 2802 exp.CTE, 2803 this=self._parse_wrapped(self._parse_statement), 2804 alias=alias, 2805 materialized=materialized, 2806 ) 2807 2808 def _parse_table_alias( 2809 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2810 ) -> t.Optional[exp.TableAlias]: 2811 any_token = self._match(TokenType.ALIAS) 2812 alias = ( 2813 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2814 or self._parse_string_as_identifier() 2815 ) 2816 2817 index = self._index 2818 if self._match(TokenType.L_PAREN): 2819 columns = self._parse_csv(self._parse_function_parameter) 2820 self._match_r_paren() if columns else self._retreat(index) 2821 else: 2822 columns = None 2823 2824 if not alias and not columns: 2825 return None 2826 2827 table_alias = self.expression(exp.TableAlias, this=alias, columns=columns) 2828 2829 # We bubble up comments from the Identifier to the TableAlias 2830 if isinstance(alias, exp.Identifier): 2831 table_alias.add_comments(alias.pop_comments()) 2832 2833 return table_alias 2834 2835 def _parse_subquery( 2836 self, this: t.Optional[exp.Expression], parse_alias: bool = True 2837 ) -> t.Optional[exp.Subquery]: 2838 if not this: 2839 return None 2840 2841 return self.expression( 2842 exp.Subquery, 2843 this=this, 2844 pivots=self._parse_pivots(), 2845 alias=self._parse_table_alias() if parse_alias else None, 2846 ) 2847 2848 def _implicit_unnests_to_explicit(self, this: E) -> E: 2849 from sqlglot.optimizer.normalize_identifiers import normalize_identifiers as _norm 2850 2851 refs = {_norm(this.args["from"].this.copy(), dialect=self.dialect).alias_or_name} 2852 for i, join in enumerate(this.args.get("joins") or []): 2853 table = join.this 2854 normalized_table = table.copy() 2855 normalized_table.meta["maybe_column"] = True 2856 normalized_table = _norm(normalized_table, dialect=self.dialect) 2857 2858 if isinstance(table, exp.Table) and not join.args.get("on"): 2859 if normalized_table.parts[0].name in refs: 2860 table_as_column = table.to_column() 2861 unnest = exp.Unnest(expressions=[table_as_column]) 2862 2863 # Table.to_column creates a parent Alias node that we want to convert to 2864 # a TableAlias and attach to the Unnest, so it matches the parser's output 2865 if isinstance(table.args.get("alias"), exp.TableAlias): 2866 table_as_column.replace(table_as_column.this) 2867 exp.alias_(unnest, None, table=[table.args["alias"].this], copy=False) 2868 2869 table.replace(unnest) 2870 2871 refs.add(normalized_table.alias_or_name) 2872 2873 return this 2874 2875 def _parse_query_modifiers( 2876 self, this: t.Optional[exp.Expression] 2877 ) -> t.Optional[exp.Expression]: 2878 if isinstance(this, (exp.Query, exp.Table)): 2879 for join in self._parse_joins(): 2880 this.append("joins", join) 2881 for lateral in iter(self._parse_lateral, None): 2882 this.append("laterals", lateral) 2883 2884 while True: 2885 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 2886 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 2887 key, expression = parser(self) 2888 2889 if expression: 2890 this.set(key, expression) 2891 if key == "limit": 2892 offset = expression.args.pop("offset", None) 2893 2894 if offset: 2895 offset = exp.Offset(expression=offset) 2896 this.set("offset", offset) 2897 2898 limit_by_expressions = expression.expressions 2899 expression.set("expressions", None) 2900 offset.set("expressions", limit_by_expressions) 2901 continue 2902 break 2903 2904 if self.SUPPORTS_IMPLICIT_UNNEST and this and "from" in this.args: 2905 this = self._implicit_unnests_to_explicit(this) 2906 2907 return this 2908 2909 def _parse_hint(self) -> t.Optional[exp.Hint]: 2910 if self._match(TokenType.HINT): 2911 hints = [] 2912 for hint in iter( 2913 lambda: self._parse_csv( 2914 lambda: self._parse_function() or self._parse_var(upper=True) 2915 ), 2916 [], 2917 ): 2918 hints.extend(hint) 2919 2920 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 2921 self.raise_error("Expected */ after HINT") 2922 2923 return self.expression(exp.Hint, expressions=hints) 2924 2925 return None 2926 2927 def _parse_into(self) -> t.Optional[exp.Into]: 2928 if not self._match(TokenType.INTO): 2929 return None 2930 2931 temp = self._match(TokenType.TEMPORARY) 2932 unlogged = self._match_text_seq("UNLOGGED") 2933 self._match(TokenType.TABLE) 2934 2935 return self.expression( 2936 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 2937 ) 2938 2939 def _parse_from( 2940 self, joins: bool = False, skip_from_token: bool = False 2941 ) -> t.Optional[exp.From]: 2942 if not skip_from_token and not self._match(TokenType.FROM): 2943 return None 2944 2945 return self.expression( 2946 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 2947 ) 2948 2949 def _parse_match_recognize_measure(self) -> exp.MatchRecognizeMeasure: 2950 return self.expression( 2951 exp.MatchRecognizeMeasure, 2952 window_frame=self._match_texts(("FINAL", "RUNNING")) and self._prev.text.upper(), 2953 this=self._parse_expression(), 2954 ) 2955 2956 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 2957 if not self._match(TokenType.MATCH_RECOGNIZE): 2958 return None 2959 2960 self._match_l_paren() 2961 2962 partition = self._parse_partition_by() 2963 order = self._parse_order() 2964 2965 measures = ( 2966 self._parse_csv(self._parse_match_recognize_measure) 2967 if self._match_text_seq("MEASURES") 2968 else None 2969 ) 2970 2971 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 2972 rows = exp.var("ONE ROW PER MATCH") 2973 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 2974 text = "ALL ROWS PER MATCH" 2975 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 2976 text += " SHOW EMPTY MATCHES" 2977 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 2978 text += " OMIT EMPTY MATCHES" 2979 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 2980 text += " WITH UNMATCHED ROWS" 2981 rows = exp.var(text) 2982 else: 2983 rows = None 2984 2985 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 2986 text = "AFTER MATCH SKIP" 2987 if self._match_text_seq("PAST", "LAST", "ROW"): 2988 text += " PAST LAST ROW" 2989 elif self._match_text_seq("TO", "NEXT", "ROW"): 2990 text += " TO NEXT ROW" 2991 elif self._match_text_seq("TO", "FIRST"): 2992 text += f" TO FIRST {self._advance_any().text}" # type: ignore 2993 elif self._match_text_seq("TO", "LAST"): 2994 text += f" TO LAST {self._advance_any().text}" # type: ignore 2995 after = exp.var(text) 2996 else: 2997 after = None 2998 2999 if self._match_text_seq("PATTERN"): 3000 self._match_l_paren() 3001 3002 if not self._curr: 3003 self.raise_error("Expecting )", self._curr) 3004 3005 paren = 1 3006 start = self._curr 3007 3008 while self._curr and paren > 0: 3009 if self._curr.token_type == TokenType.L_PAREN: 3010 paren += 1 3011 if self._curr.token_type == TokenType.R_PAREN: 3012 paren -= 1 3013 3014 end = self._prev 3015 self._advance() 3016 3017 if paren > 0: 3018 self.raise_error("Expecting )", self._curr) 3019 3020 pattern = exp.var(self._find_sql(start, end)) 3021 else: 3022 pattern = None 3023 3024 define = ( 3025 self._parse_csv(self._parse_name_as_expression) 3026 if self._match_text_seq("DEFINE") 3027 else None 3028 ) 3029 3030 self._match_r_paren() 3031 3032 return self.expression( 3033 exp.MatchRecognize, 3034 partition_by=partition, 3035 order=order, 3036 measures=measures, 3037 rows=rows, 3038 after=after, 3039 pattern=pattern, 3040 define=define, 3041 alias=self._parse_table_alias(), 3042 ) 3043 3044 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 3045 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 3046 if not cross_apply and self._match_pair(TokenType.OUTER, TokenType.APPLY): 3047 cross_apply = False 3048 3049 if cross_apply is not None: 3050 this = self._parse_select(table=True) 3051 view = None 3052 outer = None 3053 elif self._match(TokenType.LATERAL): 3054 this = self._parse_select(table=True) 3055 view = self._match(TokenType.VIEW) 3056 outer = self._match(TokenType.OUTER) 3057 else: 3058 return None 3059 3060 if not this: 3061 this = ( 3062 self._parse_unnest() 3063 or self._parse_function() 3064 or self._parse_id_var(any_token=False) 3065 ) 3066 3067 while self._match(TokenType.DOT): 3068 this = exp.Dot( 3069 this=this, 3070 expression=self._parse_function() or self._parse_id_var(any_token=False), 3071 ) 3072 3073 if view: 3074 table = self._parse_id_var(any_token=False) 3075 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 3076 table_alias: t.Optional[exp.TableAlias] = self.expression( 3077 exp.TableAlias, this=table, columns=columns 3078 ) 3079 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 3080 # We move the alias from the lateral's child node to the lateral itself 3081 table_alias = this.args["alias"].pop() 3082 else: 3083 table_alias = self._parse_table_alias() 3084 3085 return self.expression( 3086 exp.Lateral, 3087 this=this, 3088 view=view, 3089 outer=outer, 3090 alias=table_alias, 3091 cross_apply=cross_apply, 3092 ) 3093 3094 def _parse_join_parts( 3095 self, 3096 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 3097 return ( 3098 self._match_set(self.JOIN_METHODS) and self._prev, 3099 self._match_set(self.JOIN_SIDES) and self._prev, 3100 self._match_set(self.JOIN_KINDS) and self._prev, 3101 ) 3102 3103 def _parse_join( 3104 self, skip_join_token: bool = False, parse_bracket: bool = False 3105 ) -> t.Optional[exp.Join]: 3106 if self._match(TokenType.COMMA): 3107 return self.expression(exp.Join, this=self._parse_table()) 3108 3109 index = self._index 3110 method, side, kind = self._parse_join_parts() 3111 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 3112 join = self._match(TokenType.JOIN) or (kind and kind.token_type == TokenType.STRAIGHT_JOIN) 3113 3114 if not skip_join_token and not join: 3115 self._retreat(index) 3116 kind = None 3117 method = None 3118 side = None 3119 3120 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 3121 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 3122 3123 if not skip_join_token and not join and not outer_apply and not cross_apply: 3124 return None 3125 3126 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 3127 3128 if method: 3129 kwargs["method"] = method.text 3130 if side: 3131 kwargs["side"] = side.text 3132 if kind: 3133 kwargs["kind"] = kind.text 3134 if hint: 3135 kwargs["hint"] = hint 3136 3137 if self._match(TokenType.MATCH_CONDITION): 3138 kwargs["match_condition"] = self._parse_wrapped(self._parse_comparison) 3139 3140 if self._match(TokenType.ON): 3141 kwargs["on"] = self._parse_assignment() 3142 elif self._match(TokenType.USING): 3143 kwargs["using"] = self._parse_wrapped_id_vars() 3144 elif not isinstance(kwargs["this"], exp.Unnest) and not ( 3145 kind and kind.token_type == TokenType.CROSS 3146 ): 3147 index = self._index 3148 joins: t.Optional[list] = list(self._parse_joins()) 3149 3150 if joins and self._match(TokenType.ON): 3151 kwargs["on"] = self._parse_assignment() 3152 elif joins and self._match(TokenType.USING): 3153 kwargs["using"] = self._parse_wrapped_id_vars() 3154 else: 3155 joins = None 3156 self._retreat(index) 3157 3158 kwargs["this"].set("joins", joins if joins else None) 3159 3160 comments = [c for token in (method, side, kind) if token for c in token.comments] 3161 return self.expression(exp.Join, comments=comments, **kwargs) 3162 3163 def _parse_opclass(self) -> t.Optional[exp.Expression]: 3164 this = self._parse_assignment() 3165 3166 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 3167 return this 3168 3169 if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False): 3170 return self.expression(exp.Opclass, this=this, expression=self._parse_table_parts()) 3171 3172 return this 3173 3174 def _parse_index_params(self) -> exp.IndexParameters: 3175 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 3176 3177 if self._match(TokenType.L_PAREN, advance=False): 3178 columns = self._parse_wrapped_csv(self._parse_with_operator) 3179 else: 3180 columns = None 3181 3182 include = self._parse_wrapped_id_vars() if self._match_text_seq("INCLUDE") else None 3183 partition_by = self._parse_partition_by() 3184 with_storage = self._match(TokenType.WITH) and self._parse_wrapped_properties() 3185 tablespace = ( 3186 self._parse_var(any_token=True) 3187 if self._match_text_seq("USING", "INDEX", "TABLESPACE") 3188 else None 3189 ) 3190 where = self._parse_where() 3191 3192 on = self._parse_field() if self._match(TokenType.ON) else None 3193 3194 return self.expression( 3195 exp.IndexParameters, 3196 using=using, 3197 columns=columns, 3198 include=include, 3199 partition_by=partition_by, 3200 where=where, 3201 with_storage=with_storage, 3202 tablespace=tablespace, 3203 on=on, 3204 ) 3205 3206 def _parse_index( 3207 self, index: t.Optional[exp.Expression] = None, anonymous: bool = False 3208 ) -> t.Optional[exp.Index]: 3209 if index or anonymous: 3210 unique = None 3211 primary = None 3212 amp = None 3213 3214 self._match(TokenType.ON) 3215 self._match(TokenType.TABLE) # hive 3216 table = self._parse_table_parts(schema=True) 3217 else: 3218 unique = self._match(TokenType.UNIQUE) 3219 primary = self._match_text_seq("PRIMARY") 3220 amp = self._match_text_seq("AMP") 3221 3222 if not self._match(TokenType.INDEX): 3223 return None 3224 3225 index = self._parse_id_var() 3226 table = None 3227 3228 params = self._parse_index_params() 3229 3230 return self.expression( 3231 exp.Index, 3232 this=index, 3233 table=table, 3234 unique=unique, 3235 primary=primary, 3236 amp=amp, 3237 params=params, 3238 ) 3239 3240 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 3241 hints: t.List[exp.Expression] = [] 3242 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 3243 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 3244 hints.append( 3245 self.expression( 3246 exp.WithTableHint, 3247 expressions=self._parse_csv( 3248 lambda: self._parse_function() or self._parse_var(any_token=True) 3249 ), 3250 ) 3251 ) 3252 self._match_r_paren() 3253 else: 3254 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 3255 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 3256 hint = exp.IndexTableHint(this=self._prev.text.upper()) 3257 3258 self._match_set((TokenType.INDEX, TokenType.KEY)) 3259 if self._match(TokenType.FOR): 3260 hint.set("target", self._advance_any() and self._prev.text.upper()) 3261 3262 hint.set("expressions", self._parse_wrapped_id_vars()) 3263 hints.append(hint) 3264 3265 return hints or None 3266 3267 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 3268 return ( 3269 (not schema and self._parse_function(optional_parens=False)) 3270 or self._parse_id_var(any_token=False) 3271 or self._parse_string_as_identifier() 3272 or self._parse_placeholder() 3273 ) 3274 3275 def _parse_table_parts( 3276 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 3277 ) -> exp.Table: 3278 catalog = None 3279 db = None 3280 table: t.Optional[exp.Expression | str] = self._parse_table_part(schema=schema) 3281 3282 while self._match(TokenType.DOT): 3283 if catalog: 3284 # This allows nesting the table in arbitrarily many dot expressions if needed 3285 table = self.expression( 3286 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 3287 ) 3288 else: 3289 catalog = db 3290 db = table 3291 # "" used for tsql FROM a..b case 3292 table = self._parse_table_part(schema=schema) or "" 3293 3294 if ( 3295 wildcard 3296 and self._is_connected() 3297 and (isinstance(table, exp.Identifier) or not table) 3298 and self._match(TokenType.STAR) 3299 ): 3300 if isinstance(table, exp.Identifier): 3301 table.args["this"] += "*" 3302 else: 3303 table = exp.Identifier(this="*") 3304 3305 # We bubble up comments from the Identifier to the Table 3306 comments = table.pop_comments() if isinstance(table, exp.Expression) else None 3307 3308 if is_db_reference: 3309 catalog = db 3310 db = table 3311 table = None 3312 3313 if not table and not is_db_reference: 3314 self.raise_error(f"Expected table name but got {self._curr}") 3315 if not db and is_db_reference: 3316 self.raise_error(f"Expected database name but got {self._curr}") 3317 3318 return self.expression( 3319 exp.Table, 3320 comments=comments, 3321 this=table, 3322 db=db, 3323 catalog=catalog, 3324 pivots=self._parse_pivots(), 3325 ) 3326 3327 def _parse_table( 3328 self, 3329 schema: bool = False, 3330 joins: bool = False, 3331 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 3332 parse_bracket: bool = False, 3333 is_db_reference: bool = False, 3334 parse_partition: bool = False, 3335 ) -> t.Optional[exp.Expression]: 3336 lateral = self._parse_lateral() 3337 if lateral: 3338 return lateral 3339 3340 unnest = self._parse_unnest() 3341 if unnest: 3342 return unnest 3343 3344 values = self._parse_derived_table_values() 3345 if values: 3346 return values 3347 3348 subquery = self._parse_select(table=True) 3349 if subquery: 3350 if not subquery.args.get("pivots"): 3351 subquery.set("pivots", self._parse_pivots()) 3352 return subquery 3353 3354 bracket = parse_bracket and self._parse_bracket(None) 3355 bracket = self.expression(exp.Table, this=bracket) if bracket else None 3356 3357 only = self._match(TokenType.ONLY) 3358 3359 this = t.cast( 3360 exp.Expression, 3361 bracket 3362 or self._parse_bracket( 3363 self._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 3364 ), 3365 ) 3366 3367 if only: 3368 this.set("only", only) 3369 3370 # Postgres supports a wildcard (table) suffix operator, which is a no-op in this context 3371 self._match_text_seq("*") 3372 3373 parse_partition = parse_partition or self.SUPPORTS_PARTITION_SELECTION 3374 if parse_partition and self._match(TokenType.PARTITION, advance=False): 3375 this.set("partition", self._parse_partition()) 3376 3377 if schema: 3378 return self._parse_schema(this=this) 3379 3380 version = self._parse_version() 3381 3382 if version: 3383 this.set("version", version) 3384 3385 if self.dialect.ALIAS_POST_TABLESAMPLE: 3386 table_sample = self._parse_table_sample() 3387 3388 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3389 if alias: 3390 this.set("alias", alias) 3391 3392 if isinstance(this, exp.Table) and self._match_text_seq("AT"): 3393 return self.expression( 3394 exp.AtIndex, this=this.to_column(copy=False), expression=self._parse_id_var() 3395 ) 3396 3397 this.set("hints", self._parse_table_hints()) 3398 3399 if not this.args.get("pivots"): 3400 this.set("pivots", self._parse_pivots()) 3401 3402 if not self.dialect.ALIAS_POST_TABLESAMPLE: 3403 table_sample = self._parse_table_sample() 3404 3405 if table_sample: 3406 table_sample.set("this", this) 3407 this = table_sample 3408 3409 if joins: 3410 for join in self._parse_joins(): 3411 this.append("joins", join) 3412 3413 if self._match_pair(TokenType.WITH, TokenType.ORDINALITY): 3414 this.set("ordinality", True) 3415 this.set("alias", self._parse_table_alias()) 3416 3417 return this 3418 3419 def _parse_version(self) -> t.Optional[exp.Version]: 3420 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 3421 this = "TIMESTAMP" 3422 elif self._match(TokenType.VERSION_SNAPSHOT): 3423 this = "VERSION" 3424 else: 3425 return None 3426 3427 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 3428 kind = self._prev.text.upper() 3429 start = self._parse_bitwise() 3430 self._match_texts(("TO", "AND")) 3431 end = self._parse_bitwise() 3432 expression: t.Optional[exp.Expression] = self.expression( 3433 exp.Tuple, expressions=[start, end] 3434 ) 3435 elif self._match_text_seq("CONTAINED", "IN"): 3436 kind = "CONTAINED IN" 3437 expression = self.expression( 3438 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 3439 ) 3440 elif self._match(TokenType.ALL): 3441 kind = "ALL" 3442 expression = None 3443 else: 3444 self._match_text_seq("AS", "OF") 3445 kind = "AS OF" 3446 expression = self._parse_type() 3447 3448 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 3449 3450 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 3451 if not self._match(TokenType.UNNEST): 3452 return None 3453 3454 expressions = self._parse_wrapped_csv(self._parse_equality) 3455 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 3456 3457 alias = self._parse_table_alias() if with_alias else None 3458 3459 if alias: 3460 if self.dialect.UNNEST_COLUMN_ONLY: 3461 if alias.args.get("columns"): 3462 self.raise_error("Unexpected extra column alias in unnest.") 3463 3464 alias.set("columns", [alias.this]) 3465 alias.set("this", None) 3466 3467 columns = alias.args.get("columns") or [] 3468 if offset and len(expressions) < len(columns): 3469 offset = columns.pop() 3470 3471 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 3472 self._match(TokenType.ALIAS) 3473 offset = self._parse_id_var( 3474 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 3475 ) or exp.to_identifier("offset") 3476 3477 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 3478 3479 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 3480 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 3481 if not is_derived and not self._match_text_seq("VALUES"): 3482 return None 3483 3484 expressions = self._parse_csv(self._parse_value) 3485 alias = self._parse_table_alias() 3486 3487 if is_derived: 3488 self._match_r_paren() 3489 3490 return self.expression( 3491 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 3492 ) 3493 3494 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 3495 if not self._match(TokenType.TABLE_SAMPLE) and not ( 3496 as_modifier and self._match_text_seq("USING", "SAMPLE") 3497 ): 3498 return None 3499 3500 bucket_numerator = None 3501 bucket_denominator = None 3502 bucket_field = None 3503 percent = None 3504 size = None 3505 seed = None 3506 3507 method = self._parse_var(tokens=(TokenType.ROW,), upper=True) 3508 matched_l_paren = self._match(TokenType.L_PAREN) 3509 3510 if self.TABLESAMPLE_CSV: 3511 num = None 3512 expressions = self._parse_csv(self._parse_primary) 3513 else: 3514 expressions = None 3515 num = ( 3516 self._parse_factor() 3517 if self._match(TokenType.NUMBER, advance=False) 3518 else self._parse_primary() or self._parse_placeholder() 3519 ) 3520 3521 if self._match_text_seq("BUCKET"): 3522 bucket_numerator = self._parse_number() 3523 self._match_text_seq("OUT", "OF") 3524 bucket_denominator = bucket_denominator = self._parse_number() 3525 self._match(TokenType.ON) 3526 bucket_field = self._parse_field() 3527 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 3528 percent = num 3529 elif self._match(TokenType.ROWS) or not self.dialect.TABLESAMPLE_SIZE_IS_PERCENT: 3530 size = num 3531 else: 3532 percent = num 3533 3534 if matched_l_paren: 3535 self._match_r_paren() 3536 3537 if self._match(TokenType.L_PAREN): 3538 method = self._parse_var(upper=True) 3539 seed = self._match(TokenType.COMMA) and self._parse_number() 3540 self._match_r_paren() 3541 elif self._match_texts(("SEED", "REPEATABLE")): 3542 seed = self._parse_wrapped(self._parse_number) 3543 3544 if not method and self.DEFAULT_SAMPLING_METHOD: 3545 method = exp.var(self.DEFAULT_SAMPLING_METHOD) 3546 3547 return self.expression( 3548 exp.TableSample, 3549 expressions=expressions, 3550 method=method, 3551 bucket_numerator=bucket_numerator, 3552 bucket_denominator=bucket_denominator, 3553 bucket_field=bucket_field, 3554 percent=percent, 3555 size=size, 3556 seed=seed, 3557 ) 3558 3559 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 3560 return list(iter(self._parse_pivot, None)) or None 3561 3562 def _parse_joins(self) -> t.Iterator[exp.Join]: 3563 return iter(self._parse_join, None) 3564 3565 # https://duckdb.org/docs/sql/statements/pivot 3566 def _parse_simplified_pivot(self) -> exp.Pivot: 3567 def _parse_on() -> t.Optional[exp.Expression]: 3568 this = self._parse_bitwise() 3569 return self._parse_in(this) if self._match(TokenType.IN) else this 3570 3571 this = self._parse_table() 3572 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 3573 using = self._match(TokenType.USING) and self._parse_csv( 3574 lambda: self._parse_alias(self._parse_function()) 3575 ) 3576 group = self._parse_group() 3577 return self.expression( 3578 exp.Pivot, this=this, expressions=expressions, using=using, group=group 3579 ) 3580 3581 def _parse_pivot_in(self) -> exp.In: 3582 def _parse_aliased_expression() -> t.Optional[exp.Expression]: 3583 this = self._parse_assignment() 3584 3585 self._match(TokenType.ALIAS) 3586 alias = self._parse_field() 3587 if alias: 3588 return self.expression(exp.PivotAlias, this=this, alias=alias) 3589 3590 return this 3591 3592 value = self._parse_column() 3593 3594 if not self._match_pair(TokenType.IN, TokenType.L_PAREN): 3595 self.raise_error("Expecting IN (") 3596 3597 aliased_expressions = self._parse_csv(_parse_aliased_expression) 3598 3599 self._match_r_paren() 3600 return self.expression(exp.In, this=value, expressions=aliased_expressions) 3601 3602 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 3603 index = self._index 3604 include_nulls = None 3605 3606 if self._match(TokenType.PIVOT): 3607 unpivot = False 3608 elif self._match(TokenType.UNPIVOT): 3609 unpivot = True 3610 3611 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 3612 if self._match_text_seq("INCLUDE", "NULLS"): 3613 include_nulls = True 3614 elif self._match_text_seq("EXCLUDE", "NULLS"): 3615 include_nulls = False 3616 else: 3617 return None 3618 3619 expressions = [] 3620 3621 if not self._match(TokenType.L_PAREN): 3622 self._retreat(index) 3623 return None 3624 3625 if unpivot: 3626 expressions = self._parse_csv(self._parse_column) 3627 else: 3628 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 3629 3630 if not expressions: 3631 self.raise_error("Failed to parse PIVOT's aggregation list") 3632 3633 if not self._match(TokenType.FOR): 3634 self.raise_error("Expecting FOR") 3635 3636 field = self._parse_pivot_in() 3637 3638 self._match_r_paren() 3639 3640 pivot = self.expression( 3641 exp.Pivot, 3642 expressions=expressions, 3643 field=field, 3644 unpivot=unpivot, 3645 include_nulls=include_nulls, 3646 ) 3647 3648 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 3649 pivot.set("alias", self._parse_table_alias()) 3650 3651 if not unpivot: 3652 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 3653 3654 columns: t.List[exp.Expression] = [] 3655 for fld in pivot.args["field"].expressions: 3656 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 3657 for name in names: 3658 if self.PREFIXED_PIVOT_COLUMNS: 3659 name = f"{name}_{field_name}" if name else field_name 3660 else: 3661 name = f"{field_name}_{name}" if name else field_name 3662 3663 columns.append(exp.to_identifier(name)) 3664 3665 pivot.set("columns", columns) 3666 3667 return pivot 3668 3669 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 3670 return [agg.alias for agg in aggregations] 3671 3672 def _parse_prewhere(self, skip_where_token: bool = False) -> t.Optional[exp.PreWhere]: 3673 if not skip_where_token and not self._match(TokenType.PREWHERE): 3674 return None 3675 3676 return self.expression( 3677 exp.PreWhere, comments=self._prev_comments, this=self._parse_assignment() 3678 ) 3679 3680 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 3681 if not skip_where_token and not self._match(TokenType.WHERE): 3682 return None 3683 3684 return self.expression( 3685 exp.Where, comments=self._prev_comments, this=self._parse_assignment() 3686 ) 3687 3688 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 3689 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 3690 return None 3691 3692 elements: t.Dict[str, t.Any] = defaultdict(list) 3693 3694 if self._match(TokenType.ALL): 3695 elements["all"] = True 3696 elif self._match(TokenType.DISTINCT): 3697 elements["all"] = False 3698 3699 while True: 3700 expressions = self._parse_csv( 3701 lambda: None 3702 if self._match(TokenType.ROLLUP, advance=False) 3703 else self._parse_assignment() 3704 ) 3705 if expressions: 3706 elements["expressions"].extend(expressions) 3707 3708 grouping_sets = self._parse_grouping_sets() 3709 if grouping_sets: 3710 elements["grouping_sets"].extend(grouping_sets) 3711 3712 rollup = None 3713 cube = None 3714 totals = None 3715 3716 index = self._index 3717 with_ = self._match(TokenType.WITH) 3718 if self._match(TokenType.ROLLUP): 3719 rollup = with_ or self._parse_wrapped_csv(self._parse_column) 3720 elements["rollup"].extend(ensure_list(rollup)) 3721 3722 if self._match(TokenType.CUBE): 3723 cube = with_ or self._parse_wrapped_csv(self._parse_column) 3724 elements["cube"].extend(ensure_list(cube)) 3725 3726 if self._match_text_seq("TOTALS"): 3727 totals = True 3728 elements["totals"] = True # type: ignore 3729 3730 if not (grouping_sets or rollup or cube or totals): 3731 if with_: 3732 self._retreat(index) 3733 break 3734 3735 return self.expression(exp.Group, **elements) # type: ignore 3736 3737 def _parse_grouping_sets(self) -> t.Optional[t.List[exp.Expression]]: 3738 if not self._match(TokenType.GROUPING_SETS): 3739 return None 3740 3741 return self._parse_wrapped_csv(self._parse_grouping_set) 3742 3743 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 3744 if self._match(TokenType.L_PAREN): 3745 grouping_set = self._parse_csv(self._parse_column) 3746 self._match_r_paren() 3747 return self.expression(exp.Tuple, expressions=grouping_set) 3748 3749 return self._parse_column() 3750 3751 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 3752 if not skip_having_token and not self._match(TokenType.HAVING): 3753 return None 3754 return self.expression(exp.Having, this=self._parse_assignment()) 3755 3756 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 3757 if not self._match(TokenType.QUALIFY): 3758 return None 3759 return self.expression(exp.Qualify, this=self._parse_assignment()) 3760 3761 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 3762 if skip_start_token: 3763 start = None 3764 elif self._match(TokenType.START_WITH): 3765 start = self._parse_assignment() 3766 else: 3767 return None 3768 3769 self._match(TokenType.CONNECT_BY) 3770 nocycle = self._match_text_seq("NOCYCLE") 3771 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 3772 exp.Prior, this=self._parse_bitwise() 3773 ) 3774 connect = self._parse_assignment() 3775 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 3776 3777 if not start and self._match(TokenType.START_WITH): 3778 start = self._parse_assignment() 3779 3780 return self.expression(exp.Connect, start=start, connect=connect, nocycle=nocycle) 3781 3782 def _parse_name_as_expression(self) -> exp.Alias: 3783 return self.expression( 3784 exp.Alias, 3785 alias=self._parse_id_var(any_token=True), 3786 this=self._match(TokenType.ALIAS) and self._parse_assignment(), 3787 ) 3788 3789 def _parse_interpolate(self) -> t.Optional[t.List[exp.Expression]]: 3790 if self._match_text_seq("INTERPOLATE"): 3791 return self._parse_wrapped_csv(self._parse_name_as_expression) 3792 return None 3793 3794 def _parse_order( 3795 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 3796 ) -> t.Optional[exp.Expression]: 3797 siblings = None 3798 if not skip_order_token and not self._match(TokenType.ORDER_BY): 3799 if not self._match(TokenType.ORDER_SIBLINGS_BY): 3800 return this 3801 3802 siblings = True 3803 3804 return self.expression( 3805 exp.Order, 3806 this=this, 3807 expressions=self._parse_csv(self._parse_ordered), 3808 interpolate=self._parse_interpolate(), 3809 siblings=siblings, 3810 ) 3811 3812 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 3813 if not self._match(token): 3814 return None 3815 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 3816 3817 def _parse_ordered( 3818 self, parse_method: t.Optional[t.Callable] = None 3819 ) -> t.Optional[exp.Ordered]: 3820 this = parse_method() if parse_method else self._parse_assignment() 3821 if not this: 3822 return None 3823 3824 asc = self._match(TokenType.ASC) 3825 desc = self._match(TokenType.DESC) or (asc and False) 3826 3827 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 3828 is_nulls_last = self._match_text_seq("NULLS", "LAST") 3829 3830 nulls_first = is_nulls_first or False 3831 explicitly_null_ordered = is_nulls_first or is_nulls_last 3832 3833 if ( 3834 not explicitly_null_ordered 3835 and ( 3836 (not desc and self.dialect.NULL_ORDERING == "nulls_are_small") 3837 or (desc and self.dialect.NULL_ORDERING != "nulls_are_small") 3838 ) 3839 and self.dialect.NULL_ORDERING != "nulls_are_last" 3840 ): 3841 nulls_first = True 3842 3843 if self._match_text_seq("WITH", "FILL"): 3844 with_fill = self.expression( 3845 exp.WithFill, 3846 **{ # type: ignore 3847 "from": self._match(TokenType.FROM) and self._parse_bitwise(), 3848 "to": self._match_text_seq("TO") and self._parse_bitwise(), 3849 "step": self._match_text_seq("STEP") and self._parse_bitwise(), 3850 }, 3851 ) 3852 else: 3853 with_fill = None 3854 3855 return self.expression( 3856 exp.Ordered, this=this, desc=desc, nulls_first=nulls_first, with_fill=with_fill 3857 ) 3858 3859 def _parse_limit( 3860 self, 3861 this: t.Optional[exp.Expression] = None, 3862 top: bool = False, 3863 skip_limit_token: bool = False, 3864 ) -> t.Optional[exp.Expression]: 3865 if skip_limit_token or self._match(TokenType.TOP if top else TokenType.LIMIT): 3866 comments = self._prev_comments 3867 if top: 3868 limit_paren = self._match(TokenType.L_PAREN) 3869 expression = self._parse_term() if limit_paren else self._parse_number() 3870 3871 if limit_paren: 3872 self._match_r_paren() 3873 else: 3874 expression = self._parse_term() 3875 3876 if self._match(TokenType.COMMA): 3877 offset = expression 3878 expression = self._parse_term() 3879 else: 3880 offset = None 3881 3882 limit_exp = self.expression( 3883 exp.Limit, 3884 this=this, 3885 expression=expression, 3886 offset=offset, 3887 comments=comments, 3888 expressions=self._parse_limit_by(), 3889 ) 3890 3891 return limit_exp 3892 3893 if self._match(TokenType.FETCH): 3894 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 3895 direction = self._prev.text.upper() if direction else "FIRST" 3896 3897 count = self._parse_field(tokens=self.FETCH_TOKENS) 3898 percent = self._match(TokenType.PERCENT) 3899 3900 self._match_set((TokenType.ROW, TokenType.ROWS)) 3901 3902 only = self._match_text_seq("ONLY") 3903 with_ties = self._match_text_seq("WITH", "TIES") 3904 3905 if only and with_ties: 3906 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 3907 3908 return self.expression( 3909 exp.Fetch, 3910 direction=direction, 3911 count=count, 3912 percent=percent, 3913 with_ties=with_ties, 3914 ) 3915 3916 return this 3917 3918 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3919 if not self._match(TokenType.OFFSET): 3920 return this 3921 3922 count = self._parse_term() 3923 self._match_set((TokenType.ROW, TokenType.ROWS)) 3924 3925 return self.expression( 3926 exp.Offset, this=this, expression=count, expressions=self._parse_limit_by() 3927 ) 3928 3929 def _parse_limit_by(self) -> t.Optional[t.List[exp.Expression]]: 3930 return self._match_text_seq("BY") and self._parse_csv(self._parse_bitwise) 3931 3932 def _parse_locks(self) -> t.List[exp.Lock]: 3933 locks = [] 3934 while True: 3935 if self._match_text_seq("FOR", "UPDATE"): 3936 update = True 3937 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 3938 "LOCK", "IN", "SHARE", "MODE" 3939 ): 3940 update = False 3941 else: 3942 break 3943 3944 expressions = None 3945 if self._match_text_seq("OF"): 3946 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 3947 3948 wait: t.Optional[bool | exp.Expression] = None 3949 if self._match_text_seq("NOWAIT"): 3950 wait = True 3951 elif self._match_text_seq("WAIT"): 3952 wait = self._parse_primary() 3953 elif self._match_text_seq("SKIP", "LOCKED"): 3954 wait = False 3955 3956 locks.append( 3957 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 3958 ) 3959 3960 return locks 3961 3962 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3963 while this and self._match_set(self.SET_OPERATIONS): 3964 token_type = self._prev.token_type 3965 3966 if token_type == TokenType.UNION: 3967 operation: t.Type[exp.SetOperation] = exp.Union 3968 elif token_type == TokenType.EXCEPT: 3969 operation = exp.Except 3970 else: 3971 operation = exp.Intersect 3972 3973 comments = self._prev.comments 3974 distinct = self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL) 3975 by_name = self._match_text_seq("BY", "NAME") 3976 expression = self._parse_select(nested=True, parse_set_operation=False) 3977 3978 this = self.expression( 3979 operation, 3980 comments=comments, 3981 this=this, 3982 distinct=distinct, 3983 by_name=by_name, 3984 expression=expression, 3985 ) 3986 3987 if isinstance(this, exp.SetOperation) and self.MODIFIERS_ATTACHED_TO_SET_OP: 3988 expression = this.expression 3989 3990 if expression: 3991 for arg in self.SET_OP_MODIFIERS: 3992 expr = expression.args.get(arg) 3993 if expr: 3994 this.set(arg, expr.pop()) 3995 3996 return this 3997 3998 def _parse_expression(self) -> t.Optional[exp.Expression]: 3999 return self._parse_alias(self._parse_assignment()) 4000 4001 def _parse_assignment(self) -> t.Optional[exp.Expression]: 4002 this = self._parse_disjunction() 4003 4004 while self._match_set(self.ASSIGNMENT): 4005 this = self.expression( 4006 self.ASSIGNMENT[self._prev.token_type], 4007 this=this, 4008 comments=self._prev_comments, 4009 expression=self._parse_assignment(), 4010 ) 4011 4012 return this 4013 4014 def _parse_disjunction(self) -> t.Optional[exp.Expression]: 4015 return self._parse_tokens(self._parse_conjunction, self.DISJUNCTION) 4016 4017 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 4018 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 4019 4020 def _parse_equality(self) -> t.Optional[exp.Expression]: 4021 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 4022 4023 def _parse_comparison(self) -> t.Optional[exp.Expression]: 4024 return self._parse_tokens(self._parse_range, self.COMPARISON) 4025 4026 def _parse_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4027 this = this or self._parse_bitwise() 4028 negate = self._match(TokenType.NOT) 4029 4030 if self._match_set(self.RANGE_PARSERS): 4031 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 4032 if not expression: 4033 return this 4034 4035 this = expression 4036 elif self._match(TokenType.ISNULL): 4037 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4038 4039 # Postgres supports ISNULL and NOTNULL for conditions. 4040 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 4041 if self._match(TokenType.NOTNULL): 4042 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4043 this = self.expression(exp.Not, this=this) 4044 4045 if negate: 4046 this = self.expression(exp.Not, this=this) 4047 4048 if self._match(TokenType.IS): 4049 this = self._parse_is(this) 4050 4051 return this 4052 4053 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4054 index = self._index - 1 4055 negate = self._match(TokenType.NOT) 4056 4057 if self._match_text_seq("DISTINCT", "FROM"): 4058 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 4059 return self.expression(klass, this=this, expression=self._parse_bitwise()) 4060 4061 expression = self._parse_null() or self._parse_boolean() 4062 if not expression: 4063 self._retreat(index) 4064 return None 4065 4066 this = self.expression(exp.Is, this=this, expression=expression) 4067 return self.expression(exp.Not, this=this) if negate else this 4068 4069 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 4070 unnest = self._parse_unnest(with_alias=False) 4071 if unnest: 4072 this = self.expression(exp.In, this=this, unnest=unnest) 4073 elif self._match_set((TokenType.L_PAREN, TokenType.L_BRACKET)): 4074 matched_l_paren = self._prev.token_type == TokenType.L_PAREN 4075 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 4076 4077 if len(expressions) == 1 and isinstance(expressions[0], exp.Query): 4078 this = self.expression(exp.In, this=this, query=expressions[0].subquery(copy=False)) 4079 else: 4080 this = self.expression(exp.In, this=this, expressions=expressions) 4081 4082 if matched_l_paren: 4083 self._match_r_paren(this) 4084 elif not self._match(TokenType.R_BRACKET, expression=this): 4085 self.raise_error("Expecting ]") 4086 else: 4087 this = self.expression(exp.In, this=this, field=self._parse_field()) 4088 4089 return this 4090 4091 def _parse_between(self, this: t.Optional[exp.Expression]) -> exp.Between: 4092 low = self._parse_bitwise() 4093 self._match(TokenType.AND) 4094 high = self._parse_bitwise() 4095 return self.expression(exp.Between, this=this, low=low, high=high) 4096 4097 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4098 if not self._match(TokenType.ESCAPE): 4099 return this 4100 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 4101 4102 def _parse_interval(self, match_interval: bool = True) -> t.Optional[exp.Add | exp.Interval]: 4103 index = self._index 4104 4105 if not self._match(TokenType.INTERVAL) and match_interval: 4106 return None 4107 4108 if self._match(TokenType.STRING, advance=False): 4109 this = self._parse_primary() 4110 else: 4111 this = self._parse_term() 4112 4113 if not this or ( 4114 isinstance(this, exp.Column) 4115 and not this.table 4116 and not this.this.quoted 4117 and this.name.upper() == "IS" 4118 ): 4119 self._retreat(index) 4120 return None 4121 4122 unit = self._parse_function() or ( 4123 not self._match(TokenType.ALIAS, advance=False) 4124 and self._parse_var(any_token=True, upper=True) 4125 ) 4126 4127 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 4128 # each INTERVAL expression into this canonical form so it's easy to transpile 4129 if this and this.is_number: 4130 this = exp.Literal.string(this.name) 4131 elif this and this.is_string: 4132 parts = exp.INTERVAL_STRING_RE.findall(this.name) 4133 if len(parts) == 1: 4134 if unit: 4135 # Unconsume the eagerly-parsed unit, since the real unit was part of the string 4136 self._retreat(self._index - 1) 4137 4138 this = exp.Literal.string(parts[0][0]) 4139 unit = self.expression(exp.Var, this=parts[0][1].upper()) 4140 4141 if self.INTERVAL_SPANS and self._match_text_seq("TO"): 4142 unit = self.expression( 4143 exp.IntervalSpan, this=unit, expression=self._parse_var(any_token=True, upper=True) 4144 ) 4145 4146 interval = self.expression(exp.Interval, this=this, unit=unit) 4147 4148 index = self._index 4149 self._match(TokenType.PLUS) 4150 4151 # Convert INTERVAL 'val_1' unit_1 [+] ... [+] 'val_n' unit_n into a sum of intervals 4152 if self._match_set((TokenType.STRING, TokenType.NUMBER), advance=False): 4153 return self.expression( 4154 exp.Add, this=interval, expression=self._parse_interval(match_interval=False) 4155 ) 4156 4157 self._retreat(index) 4158 return interval 4159 4160 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 4161 this = self._parse_term() 4162 4163 while True: 4164 if self._match_set(self.BITWISE): 4165 this = self.expression( 4166 self.BITWISE[self._prev.token_type], 4167 this=this, 4168 expression=self._parse_term(), 4169 ) 4170 elif self.dialect.DPIPE_IS_STRING_CONCAT and self._match(TokenType.DPIPE): 4171 this = self.expression( 4172 exp.DPipe, 4173 this=this, 4174 expression=self._parse_term(), 4175 safe=not self.dialect.STRICT_STRING_CONCAT, 4176 ) 4177 elif self._match(TokenType.DQMARK): 4178 this = self.expression(exp.Coalesce, this=this, expressions=self._parse_term()) 4179 elif self._match_pair(TokenType.LT, TokenType.LT): 4180 this = self.expression( 4181 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 4182 ) 4183 elif self._match_pair(TokenType.GT, TokenType.GT): 4184 this = self.expression( 4185 exp.BitwiseRightShift, this=this, expression=self._parse_term() 4186 ) 4187 else: 4188 break 4189 4190 return this 4191 4192 def _parse_term(self) -> t.Optional[exp.Expression]: 4193 return self._parse_tokens(self._parse_factor, self.TERM) 4194 4195 def _parse_factor(self) -> t.Optional[exp.Expression]: 4196 parse_method = self._parse_exponent if self.EXPONENT else self._parse_unary 4197 this = parse_method() 4198 4199 while self._match_set(self.FACTOR): 4200 klass = self.FACTOR[self._prev.token_type] 4201 comments = self._prev_comments 4202 expression = parse_method() 4203 4204 if not expression and klass is exp.IntDiv and self._prev.text.isalpha(): 4205 self._retreat(self._index - 1) 4206 return this 4207 4208 this = self.expression(klass, this=this, comments=comments, expression=expression) 4209 4210 if isinstance(this, exp.Div): 4211 this.args["typed"] = self.dialect.TYPED_DIVISION 4212 this.args["safe"] = self.dialect.SAFE_DIVISION 4213 4214 return this 4215 4216 def _parse_exponent(self) -> t.Optional[exp.Expression]: 4217 return self._parse_tokens(self._parse_unary, self.EXPONENT) 4218 4219 def _parse_unary(self) -> t.Optional[exp.Expression]: 4220 if self._match_set(self.UNARY_PARSERS): 4221 return self.UNARY_PARSERS[self._prev.token_type](self) 4222 return self._parse_at_time_zone(self._parse_type()) 4223 4224 def _parse_type( 4225 self, parse_interval: bool = True, fallback_to_identifier: bool = False 4226 ) -> t.Optional[exp.Expression]: 4227 interval = parse_interval and self._parse_interval() 4228 if interval: 4229 return interval 4230 4231 index = self._index 4232 data_type = self._parse_types(check_func=True, allow_identifiers=False) 4233 4234 if data_type: 4235 index2 = self._index 4236 this = self._parse_primary() 4237 4238 if isinstance(this, exp.Literal): 4239 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 4240 if parser: 4241 return parser(self, this, data_type) 4242 4243 return self.expression(exp.Cast, this=this, to=data_type) 4244 4245 # The expressions arg gets set by the parser when we have something like DECIMAL(38, 0) 4246 # in the input SQL. In that case, we'll produce these tokens: DECIMAL ( 38 , 0 ) 4247 # 4248 # If the index difference here is greater than 1, that means the parser itself must have 4249 # consumed additional tokens such as the DECIMAL scale and precision in the above example. 4250 # 4251 # If it's not greater than 1, then it must be 1, because we've consumed at least the type 4252 # keyword, meaning that the expressions arg of the DataType must have gotten set by a 4253 # callable in the TYPE_CONVERTERS mapping. For example, Snowflake converts DECIMAL to 4254 # DECIMAL(38, 0)) in order to facilitate the data type's transpilation. 4255 # 4256 # In these cases, we don't really want to return the converted type, but instead retreat 4257 # and try to parse a Column or Identifier in the section below. 4258 if data_type.expressions and index2 - index > 1: 4259 self._retreat(index2) 4260 return self._parse_column_ops(data_type) 4261 4262 self._retreat(index) 4263 4264 if fallback_to_identifier: 4265 return self._parse_id_var() 4266 4267 this = self._parse_column() 4268 return this and self._parse_column_ops(this) 4269 4270 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 4271 this = self._parse_type() 4272 if not this: 4273 return None 4274 4275 if isinstance(this, exp.Column) and not this.table: 4276 this = exp.var(this.name.upper()) 4277 4278 return self.expression( 4279 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 4280 ) 4281 4282 def _parse_types( 4283 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 4284 ) -> t.Optional[exp.Expression]: 4285 index = self._index 4286 4287 this: t.Optional[exp.Expression] = None 4288 prefix = self._match_text_seq("SYSUDTLIB", ".") 4289 4290 if not self._match_set(self.TYPE_TOKENS): 4291 identifier = allow_identifiers and self._parse_id_var( 4292 any_token=False, tokens=(TokenType.VAR,) 4293 ) 4294 if identifier: 4295 tokens = self.dialect.tokenize(identifier.sql(dialect=self.dialect)) 4296 4297 if len(tokens) != 1: 4298 self.raise_error("Unexpected identifier", self._prev) 4299 4300 if tokens[0].token_type in self.TYPE_TOKENS: 4301 self._prev = tokens[0] 4302 elif self.dialect.SUPPORTS_USER_DEFINED_TYPES: 4303 type_name = identifier.name 4304 4305 while self._match(TokenType.DOT): 4306 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 4307 4308 this = exp.DataType.build(type_name, udt=True) 4309 else: 4310 self._retreat(self._index - 1) 4311 return None 4312 else: 4313 return None 4314 4315 type_token = self._prev.token_type 4316 4317 if type_token == TokenType.PSEUDO_TYPE: 4318 return self.expression(exp.PseudoType, this=self._prev.text.upper()) 4319 4320 if type_token == TokenType.OBJECT_IDENTIFIER: 4321 return self.expression(exp.ObjectIdentifier, this=self._prev.text.upper()) 4322 4323 # https://materialize.com/docs/sql/types/map/ 4324 if type_token == TokenType.MAP and self._match(TokenType.L_BRACKET): 4325 key_type = self._parse_types( 4326 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4327 ) 4328 if not self._match(TokenType.FARROW): 4329 self._retreat(index) 4330 return None 4331 4332 value_type = self._parse_types( 4333 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4334 ) 4335 if not self._match(TokenType.R_BRACKET): 4336 self._retreat(index) 4337 return None 4338 4339 return exp.DataType( 4340 this=exp.DataType.Type.MAP, 4341 expressions=[key_type, value_type], 4342 nested=True, 4343 prefix=prefix, 4344 ) 4345 4346 nested = type_token in self.NESTED_TYPE_TOKENS 4347 is_struct = type_token in self.STRUCT_TYPE_TOKENS 4348 is_aggregate = type_token in self.AGGREGATE_TYPE_TOKENS 4349 expressions = None 4350 maybe_func = False 4351 4352 if self._match(TokenType.L_PAREN): 4353 if is_struct: 4354 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 4355 elif nested: 4356 expressions = self._parse_csv( 4357 lambda: self._parse_types( 4358 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4359 ) 4360 ) 4361 elif type_token in self.ENUM_TYPE_TOKENS: 4362 expressions = self._parse_csv(self._parse_equality) 4363 elif is_aggregate: 4364 func_or_ident = self._parse_function(anonymous=True) or self._parse_id_var( 4365 any_token=False, tokens=(TokenType.VAR,) 4366 ) 4367 if not func_or_ident or not self._match(TokenType.COMMA): 4368 return None 4369 expressions = self._parse_csv( 4370 lambda: self._parse_types( 4371 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4372 ) 4373 ) 4374 expressions.insert(0, func_or_ident) 4375 else: 4376 expressions = self._parse_csv(self._parse_type_size) 4377 4378 if not expressions or not self._match(TokenType.R_PAREN): 4379 self._retreat(index) 4380 return None 4381 4382 maybe_func = True 4383 4384 values: t.Optional[t.List[exp.Expression]] = None 4385 4386 if nested and self._match(TokenType.LT): 4387 if is_struct: 4388 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 4389 else: 4390 expressions = self._parse_csv( 4391 lambda: self._parse_types( 4392 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4393 ) 4394 ) 4395 4396 if not self._match(TokenType.GT): 4397 self.raise_error("Expecting >") 4398 4399 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 4400 values = self._parse_csv(self._parse_assignment) 4401 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 4402 4403 if type_token in self.TIMESTAMPS: 4404 if self._match_text_seq("WITH", "TIME", "ZONE"): 4405 maybe_func = False 4406 tz_type = ( 4407 exp.DataType.Type.TIMETZ 4408 if type_token in self.TIMES 4409 else exp.DataType.Type.TIMESTAMPTZ 4410 ) 4411 this = exp.DataType(this=tz_type, expressions=expressions) 4412 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 4413 maybe_func = False 4414 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 4415 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 4416 maybe_func = False 4417 elif type_token == TokenType.INTERVAL: 4418 unit = self._parse_var(upper=True) 4419 if unit: 4420 if self._match_text_seq("TO"): 4421 unit = exp.IntervalSpan(this=unit, expression=self._parse_var(upper=True)) 4422 4423 this = self.expression(exp.DataType, this=self.expression(exp.Interval, unit=unit)) 4424 else: 4425 this = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 4426 4427 if maybe_func and check_func: 4428 index2 = self._index 4429 peek = self._parse_string() 4430 4431 if not peek: 4432 self._retreat(index) 4433 return None 4434 4435 self._retreat(index2) 4436 4437 if not this: 4438 if self._match_text_seq("UNSIGNED"): 4439 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 4440 if not unsigned_type_token: 4441 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 4442 4443 type_token = unsigned_type_token or type_token 4444 4445 this = exp.DataType( 4446 this=exp.DataType.Type[type_token.value], 4447 expressions=expressions, 4448 nested=nested, 4449 values=values, 4450 prefix=prefix, 4451 ) 4452 elif expressions: 4453 this.set("expressions", expressions) 4454 4455 # https://materialize.com/docs/sql/types/list/#type-name 4456 while self._match(TokenType.LIST): 4457 this = exp.DataType(this=exp.DataType.Type.LIST, expressions=[this], nested=True) 4458 4459 index = self._index 4460 4461 # Postgres supports the INT ARRAY[3] syntax as a synonym for INT[3] 4462 matched_array = self._match(TokenType.ARRAY) 4463 4464 while self._curr: 4465 matched_l_bracket = self._match(TokenType.L_BRACKET) 4466 if not matched_l_bracket and not matched_array: 4467 break 4468 4469 matched_array = False 4470 values = self._parse_csv(self._parse_assignment) or None 4471 if values and not schema: 4472 self._retreat(index) 4473 break 4474 4475 this = exp.DataType( 4476 this=exp.DataType.Type.ARRAY, expressions=[this], values=values, nested=True 4477 ) 4478 self._match(TokenType.R_BRACKET) 4479 4480 if self.TYPE_CONVERTERS and isinstance(this.this, exp.DataType.Type): 4481 converter = self.TYPE_CONVERTERS.get(this.this) 4482 if converter: 4483 this = converter(t.cast(exp.DataType, this)) 4484 4485 return this 4486 4487 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 4488 index = self._index 4489 this = ( 4490 self._parse_type(parse_interval=False, fallback_to_identifier=True) 4491 or self._parse_id_var() 4492 ) 4493 self._match(TokenType.COLON) 4494 4495 if ( 4496 type_required 4497 and not isinstance(this, exp.DataType) 4498 and not self._match_set(self.TYPE_TOKENS, advance=False) 4499 ): 4500 self._retreat(index) 4501 return self._parse_types() 4502 4503 return self._parse_column_def(this) 4504 4505 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4506 if not self._match_text_seq("AT", "TIME", "ZONE"): 4507 return this 4508 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 4509 4510 def _parse_column(self) -> t.Optional[exp.Expression]: 4511 this = self._parse_column_reference() 4512 column = self._parse_column_ops(this) if this else self._parse_bracket(this) 4513 4514 if self.dialect.SUPPORTS_COLUMN_JOIN_MARKS and column: 4515 column.set("join_mark", self._match(TokenType.JOIN_MARKER)) 4516 4517 return column 4518 4519 def _parse_column_reference(self) -> t.Optional[exp.Expression]: 4520 this = self._parse_field() 4521 if ( 4522 not this 4523 and self._match(TokenType.VALUES, advance=False) 4524 and self.VALUES_FOLLOWED_BY_PAREN 4525 and (not self._next or self._next.token_type != TokenType.L_PAREN) 4526 ): 4527 this = self._parse_id_var() 4528 4529 if isinstance(this, exp.Identifier): 4530 # We bubble up comments from the Identifier to the Column 4531 this = self.expression(exp.Column, comments=this.pop_comments(), this=this) 4532 4533 return this 4534 4535 def _parse_colon_as_json_extract( 4536 self, this: t.Optional[exp.Expression] 4537 ) -> t.Optional[exp.Expression]: 4538 casts = [] 4539 json_path = [] 4540 4541 while self._match(TokenType.COLON): 4542 start_index = self._index 4543 4544 # Snowflake allows reserved keywords as json keys but advance_any() excludes TokenType.SELECT from any_tokens=True 4545 path = self._parse_column_ops( 4546 self._parse_field(any_token=True, tokens=(TokenType.SELECT,)) 4547 ) 4548 4549 # The cast :: operator has a lower precedence than the extraction operator :, so 4550 # we rearrange the AST appropriately to avoid casting the JSON path 4551 while isinstance(path, exp.Cast): 4552 casts.append(path.to) 4553 path = path.this 4554 4555 if casts: 4556 dcolon_offset = next( 4557 i 4558 for i, t in enumerate(self._tokens[start_index:]) 4559 if t.token_type == TokenType.DCOLON 4560 ) 4561 end_token = self._tokens[start_index + dcolon_offset - 1] 4562 else: 4563 end_token = self._prev 4564 4565 if path: 4566 json_path.append(self._find_sql(self._tokens[start_index], end_token)) 4567 4568 if json_path: 4569 this = self.expression( 4570 exp.JSONExtract, 4571 this=this, 4572 expression=self.dialect.to_json_path(exp.Literal.string(".".join(json_path))), 4573 ) 4574 4575 while casts: 4576 this = self.expression(exp.Cast, this=this, to=casts.pop()) 4577 4578 return this 4579 4580 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4581 this = self._parse_bracket(this) 4582 4583 while self._match_set(self.COLUMN_OPERATORS): 4584 op_token = self._prev.token_type 4585 op = self.COLUMN_OPERATORS.get(op_token) 4586 4587 if op_token == TokenType.DCOLON: 4588 field = self._parse_types() 4589 if not field: 4590 self.raise_error("Expected type") 4591 elif op and self._curr: 4592 field = self._parse_column_reference() 4593 else: 4594 field = self._parse_field(any_token=True, anonymous_func=True) 4595 4596 if isinstance(field, exp.Func) and this: 4597 # bigquery allows function calls like x.y.count(...) 4598 # SAFE.SUBSTR(...) 4599 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 4600 this = exp.replace_tree( 4601 this, 4602 lambda n: ( 4603 self.expression(exp.Dot, this=n.args.get("table"), expression=n.this) 4604 if n.table 4605 else n.this 4606 ) 4607 if isinstance(n, exp.Column) 4608 else n, 4609 ) 4610 4611 if op: 4612 this = op(self, this, field) 4613 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 4614 this = self.expression( 4615 exp.Column, 4616 this=field, 4617 table=this.this, 4618 db=this.args.get("table"), 4619 catalog=this.args.get("db"), 4620 ) 4621 else: 4622 this = self.expression(exp.Dot, this=this, expression=field) 4623 4624 this = self._parse_bracket(this) 4625 4626 return self._parse_colon_as_json_extract(this) if self.COLON_IS_JSON_EXTRACT else this 4627 4628 def _parse_primary(self) -> t.Optional[exp.Expression]: 4629 if self._match_set(self.PRIMARY_PARSERS): 4630 token_type = self._prev.token_type 4631 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 4632 4633 if token_type == TokenType.STRING: 4634 expressions = [primary] 4635 while self._match(TokenType.STRING): 4636 expressions.append(exp.Literal.string(self._prev.text)) 4637 4638 if len(expressions) > 1: 4639 return self.expression(exp.Concat, expressions=expressions) 4640 4641 return primary 4642 4643 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 4644 return exp.Literal.number(f"0.{self._prev.text}") 4645 4646 if self._match(TokenType.L_PAREN): 4647 comments = self._prev_comments 4648 query = self._parse_select() 4649 4650 if query: 4651 expressions = [query] 4652 else: 4653 expressions = self._parse_expressions() 4654 4655 this = self._parse_query_modifiers(seq_get(expressions, 0)) 4656 4657 if not this and self._match(TokenType.R_PAREN, advance=False): 4658 this = self.expression(exp.Tuple) 4659 elif isinstance(this, exp.UNWRAPPED_QUERIES): 4660 this = self._parse_subquery(this=this, parse_alias=False) 4661 elif isinstance(this, exp.Subquery): 4662 this = self._parse_subquery( 4663 this=self._parse_set_operations(this), parse_alias=False 4664 ) 4665 elif len(expressions) > 1 or self._prev.token_type == TokenType.COMMA: 4666 this = self.expression(exp.Tuple, expressions=expressions) 4667 else: 4668 this = self.expression(exp.Paren, this=this) 4669 4670 if this: 4671 this.add_comments(comments) 4672 4673 self._match_r_paren(expression=this) 4674 return this 4675 4676 return None 4677 4678 def _parse_field( 4679 self, 4680 any_token: bool = False, 4681 tokens: t.Optional[t.Collection[TokenType]] = None, 4682 anonymous_func: bool = False, 4683 ) -> t.Optional[exp.Expression]: 4684 if anonymous_func: 4685 field = ( 4686 self._parse_function(anonymous=anonymous_func, any_token=any_token) 4687 or self._parse_primary() 4688 ) 4689 else: 4690 field = self._parse_primary() or self._parse_function( 4691 anonymous=anonymous_func, any_token=any_token 4692 ) 4693 return field or self._parse_id_var(any_token=any_token, tokens=tokens) 4694 4695 def _parse_function( 4696 self, 4697 functions: t.Optional[t.Dict[str, t.Callable]] = None, 4698 anonymous: bool = False, 4699 optional_parens: bool = True, 4700 any_token: bool = False, 4701 ) -> t.Optional[exp.Expression]: 4702 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this) 4703 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences 4704 fn_syntax = False 4705 if ( 4706 self._match(TokenType.L_BRACE, advance=False) 4707 and self._next 4708 and self._next.text.upper() == "FN" 4709 ): 4710 self._advance(2) 4711 fn_syntax = True 4712 4713 func = self._parse_function_call( 4714 functions=functions, 4715 anonymous=anonymous, 4716 optional_parens=optional_parens, 4717 any_token=any_token, 4718 ) 4719 4720 if fn_syntax: 4721 self._match(TokenType.R_BRACE) 4722 4723 return func 4724 4725 def _parse_function_call( 4726 self, 4727 functions: t.Optional[t.Dict[str, t.Callable]] = None, 4728 anonymous: bool = False, 4729 optional_parens: bool = True, 4730 any_token: bool = False, 4731 ) -> t.Optional[exp.Expression]: 4732 if not self._curr: 4733 return None 4734 4735 comments = self._curr.comments 4736 token_type = self._curr.token_type 4737 this = self._curr.text 4738 upper = this.upper() 4739 4740 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 4741 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 4742 self._advance() 4743 return self._parse_window(parser(self)) 4744 4745 if not self._next or self._next.token_type != TokenType.L_PAREN: 4746 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 4747 self._advance() 4748 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 4749 4750 return None 4751 4752 if any_token: 4753 if token_type in self.RESERVED_TOKENS: 4754 return None 4755 elif token_type not in self.FUNC_TOKENS: 4756 return None 4757 4758 self._advance(2) 4759 4760 parser = self.FUNCTION_PARSERS.get(upper) 4761 if parser and not anonymous: 4762 this = parser(self) 4763 else: 4764 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 4765 4766 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 4767 this = self.expression(subquery_predicate, this=self._parse_select()) 4768 self._match_r_paren() 4769 return this 4770 4771 if functions is None: 4772 functions = self.FUNCTIONS 4773 4774 function = functions.get(upper) 4775 4776 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 4777 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 4778 4779 if alias: 4780 args = self._kv_to_prop_eq(args) 4781 4782 if function and not anonymous: 4783 if "dialect" in function.__code__.co_varnames: 4784 func = function(args, dialect=self.dialect) 4785 else: 4786 func = function(args) 4787 4788 func = self.validate_expression(func, args) 4789 if not self.dialect.NORMALIZE_FUNCTIONS: 4790 func.meta["name"] = this 4791 4792 this = func 4793 else: 4794 if token_type == TokenType.IDENTIFIER: 4795 this = exp.Identifier(this=this, quoted=True) 4796 this = self.expression(exp.Anonymous, this=this, expressions=args) 4797 4798 if isinstance(this, exp.Expression): 4799 this.add_comments(comments) 4800 4801 self._match_r_paren(this) 4802 return self._parse_window(this) 4803 4804 def _kv_to_prop_eq(self, expressions: t.List[exp.Expression]) -> t.List[exp.Expression]: 4805 transformed = [] 4806 4807 for e in expressions: 4808 if isinstance(e, self.KEY_VALUE_DEFINITIONS): 4809 if isinstance(e, exp.Alias): 4810 e = self.expression(exp.PropertyEQ, this=e.args.get("alias"), expression=e.this) 4811 4812 if not isinstance(e, exp.PropertyEQ): 4813 e = self.expression( 4814 exp.PropertyEQ, this=exp.to_identifier(e.this.name), expression=e.expression 4815 ) 4816 4817 if isinstance(e.this, exp.Column): 4818 e.this.replace(e.this.this) 4819 4820 transformed.append(e) 4821 4822 return transformed 4823 4824 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 4825 return self._parse_column_def(self._parse_id_var()) 4826 4827 def _parse_user_defined_function( 4828 self, kind: t.Optional[TokenType] = None 4829 ) -> t.Optional[exp.Expression]: 4830 this = self._parse_id_var() 4831 4832 while self._match(TokenType.DOT): 4833 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 4834 4835 if not self._match(TokenType.L_PAREN): 4836 return this 4837 4838 expressions = self._parse_csv(self._parse_function_parameter) 4839 self._match_r_paren() 4840 return self.expression( 4841 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 4842 ) 4843 4844 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 4845 literal = self._parse_primary() 4846 if literal: 4847 return self.expression(exp.Introducer, this=token.text, expression=literal) 4848 4849 return self.expression(exp.Identifier, this=token.text) 4850 4851 def _parse_session_parameter(self) -> exp.SessionParameter: 4852 kind = None 4853 this = self._parse_id_var() or self._parse_primary() 4854 4855 if this and self._match(TokenType.DOT): 4856 kind = this.name 4857 this = self._parse_var() or self._parse_primary() 4858 4859 return self.expression(exp.SessionParameter, this=this, kind=kind) 4860 4861 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 4862 return self._parse_id_var() 4863 4864 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 4865 index = self._index 4866 4867 if self._match(TokenType.L_PAREN): 4868 expressions = t.cast( 4869 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_lambda_arg) 4870 ) 4871 4872 if not self._match(TokenType.R_PAREN): 4873 self._retreat(index) 4874 else: 4875 expressions = [self._parse_lambda_arg()] 4876 4877 if self._match_set(self.LAMBDAS): 4878 return self.LAMBDAS[self._prev.token_type](self, expressions) 4879 4880 self._retreat(index) 4881 4882 this: t.Optional[exp.Expression] 4883 4884 if self._match(TokenType.DISTINCT): 4885 this = self.expression( 4886 exp.Distinct, expressions=self._parse_csv(self._parse_assignment) 4887 ) 4888 else: 4889 this = self._parse_select_or_expression(alias=alias) 4890 4891 return self._parse_limit( 4892 self._parse_order(self._parse_having_max(self._parse_respect_or_ignore_nulls(this))) 4893 ) 4894 4895 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4896 index = self._index 4897 if not self._match(TokenType.L_PAREN): 4898 return this 4899 4900 # Disambiguate between schema and subquery/CTE, e.g. in INSERT INTO table (<expr>), 4901 # expr can be of both types 4902 if self._match_set(self.SELECT_START_TOKENS): 4903 self._retreat(index) 4904 return this 4905 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 4906 self._match_r_paren() 4907 return self.expression(exp.Schema, this=this, expressions=args) 4908 4909 def _parse_field_def(self) -> t.Optional[exp.Expression]: 4910 return self._parse_column_def(self._parse_field(any_token=True)) 4911 4912 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4913 # column defs are not really columns, they're identifiers 4914 if isinstance(this, exp.Column): 4915 this = this.this 4916 4917 kind = self._parse_types(schema=True) 4918 4919 if self._match_text_seq("FOR", "ORDINALITY"): 4920 return self.expression(exp.ColumnDef, this=this, ordinality=True) 4921 4922 constraints: t.List[exp.Expression] = [] 4923 4924 if (not kind and self._match(TokenType.ALIAS)) or self._match_texts( 4925 ("ALIAS", "MATERIALIZED") 4926 ): 4927 persisted = self._prev.text.upper() == "MATERIALIZED" 4928 constraints.append( 4929 self.expression( 4930 exp.ComputedColumnConstraint, 4931 this=self._parse_assignment(), 4932 persisted=persisted or self._match_text_seq("PERSISTED"), 4933 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 4934 ) 4935 ) 4936 elif kind and self._match_pair(TokenType.ALIAS, TokenType.L_PAREN, advance=False): 4937 self._match(TokenType.ALIAS) 4938 constraints.append( 4939 self.expression(exp.TransformColumnConstraint, this=self._parse_field()) 4940 ) 4941 4942 while True: 4943 constraint = self._parse_column_constraint() 4944 if not constraint: 4945 break 4946 constraints.append(constraint) 4947 4948 if not kind and not constraints: 4949 return this 4950 4951 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 4952 4953 def _parse_auto_increment( 4954 self, 4955 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 4956 start = None 4957 increment = None 4958 4959 if self._match(TokenType.L_PAREN, advance=False): 4960 args = self._parse_wrapped_csv(self._parse_bitwise) 4961 start = seq_get(args, 0) 4962 increment = seq_get(args, 1) 4963 elif self._match_text_seq("START"): 4964 start = self._parse_bitwise() 4965 self._match_text_seq("INCREMENT") 4966 increment = self._parse_bitwise() 4967 4968 if start and increment: 4969 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 4970 4971 return exp.AutoIncrementColumnConstraint() 4972 4973 def _parse_auto_property(self) -> t.Optional[exp.AutoRefreshProperty]: 4974 if not self._match_text_seq("REFRESH"): 4975 self._retreat(self._index - 1) 4976 return None 4977 return self.expression(exp.AutoRefreshProperty, this=self._parse_var(upper=True)) 4978 4979 def _parse_compress(self) -> exp.CompressColumnConstraint: 4980 if self._match(TokenType.L_PAREN, advance=False): 4981 return self.expression( 4982 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 4983 ) 4984 4985 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 4986 4987 def _parse_generated_as_identity( 4988 self, 4989 ) -> ( 4990 exp.GeneratedAsIdentityColumnConstraint 4991 | exp.ComputedColumnConstraint 4992 | exp.GeneratedAsRowColumnConstraint 4993 ): 4994 if self._match_text_seq("BY", "DEFAULT"): 4995 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 4996 this = self.expression( 4997 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 4998 ) 4999 else: 5000 self._match_text_seq("ALWAYS") 5001 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 5002 5003 self._match(TokenType.ALIAS) 5004 5005 if self._match_text_seq("ROW"): 5006 start = self._match_text_seq("START") 5007 if not start: 5008 self._match(TokenType.END) 5009 hidden = self._match_text_seq("HIDDEN") 5010 return self.expression(exp.GeneratedAsRowColumnConstraint, start=start, hidden=hidden) 5011 5012 identity = self._match_text_seq("IDENTITY") 5013 5014 if self._match(TokenType.L_PAREN): 5015 if self._match(TokenType.START_WITH): 5016 this.set("start", self._parse_bitwise()) 5017 if self._match_text_seq("INCREMENT", "BY"): 5018 this.set("increment", self._parse_bitwise()) 5019 if self._match_text_seq("MINVALUE"): 5020 this.set("minvalue", self._parse_bitwise()) 5021 if self._match_text_seq("MAXVALUE"): 5022 this.set("maxvalue", self._parse_bitwise()) 5023 5024 if self._match_text_seq("CYCLE"): 5025 this.set("cycle", True) 5026 elif self._match_text_seq("NO", "CYCLE"): 5027 this.set("cycle", False) 5028 5029 if not identity: 5030 this.set("expression", self._parse_range()) 5031 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 5032 args = self._parse_csv(self._parse_bitwise) 5033 this.set("start", seq_get(args, 0)) 5034 this.set("increment", seq_get(args, 1)) 5035 5036 self._match_r_paren() 5037 5038 return this 5039 5040 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 5041 self._match_text_seq("LENGTH") 5042 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 5043 5044 def _parse_not_constraint(self) -> t.Optional[exp.Expression]: 5045 if self._match_text_seq("NULL"): 5046 return self.expression(exp.NotNullColumnConstraint) 5047 if self._match_text_seq("CASESPECIFIC"): 5048 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 5049 if self._match_text_seq("FOR", "REPLICATION"): 5050 return self.expression(exp.NotForReplicationColumnConstraint) 5051 return None 5052 5053 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 5054 if self._match(TokenType.CONSTRAINT): 5055 this = self._parse_id_var() 5056 else: 5057 this = None 5058 5059 if self._match_texts(self.CONSTRAINT_PARSERS): 5060 return self.expression( 5061 exp.ColumnConstraint, 5062 this=this, 5063 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 5064 ) 5065 5066 return this 5067 5068 def _parse_constraint(self) -> t.Optional[exp.Expression]: 5069 if not self._match(TokenType.CONSTRAINT): 5070 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 5071 5072 return self.expression( 5073 exp.Constraint, 5074 this=self._parse_id_var(), 5075 expressions=self._parse_unnamed_constraints(), 5076 ) 5077 5078 def _parse_unnamed_constraints(self) -> t.List[exp.Expression]: 5079 constraints = [] 5080 while True: 5081 constraint = self._parse_unnamed_constraint() or self._parse_function() 5082 if not constraint: 5083 break 5084 constraints.append(constraint) 5085 5086 return constraints 5087 5088 def _parse_unnamed_constraint( 5089 self, constraints: t.Optional[t.Collection[str]] = None 5090 ) -> t.Optional[exp.Expression]: 5091 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 5092 constraints or self.CONSTRAINT_PARSERS 5093 ): 5094 return None 5095 5096 constraint = self._prev.text.upper() 5097 if constraint not in self.CONSTRAINT_PARSERS: 5098 self.raise_error(f"No parser found for schema constraint {constraint}.") 5099 5100 return self.CONSTRAINT_PARSERS[constraint](self) 5101 5102 def _parse_unique(self) -> exp.UniqueColumnConstraint: 5103 self._match_text_seq("KEY") 5104 return self.expression( 5105 exp.UniqueColumnConstraint, 5106 this=self._parse_schema(self._parse_id_var(any_token=False)), 5107 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 5108 on_conflict=self._parse_on_conflict(), 5109 ) 5110 5111 def _parse_key_constraint_options(self) -> t.List[str]: 5112 options = [] 5113 while True: 5114 if not self._curr: 5115 break 5116 5117 if self._match(TokenType.ON): 5118 action = None 5119 on = self._advance_any() and self._prev.text 5120 5121 if self._match_text_seq("NO", "ACTION"): 5122 action = "NO ACTION" 5123 elif self._match_text_seq("CASCADE"): 5124 action = "CASCADE" 5125 elif self._match_text_seq("RESTRICT"): 5126 action = "RESTRICT" 5127 elif self._match_pair(TokenType.SET, TokenType.NULL): 5128 action = "SET NULL" 5129 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 5130 action = "SET DEFAULT" 5131 else: 5132 self.raise_error("Invalid key constraint") 5133 5134 options.append(f"ON {on} {action}") 5135 elif self._match_text_seq("NOT", "ENFORCED"): 5136 options.append("NOT ENFORCED") 5137 elif self._match_text_seq("DEFERRABLE"): 5138 options.append("DEFERRABLE") 5139 elif self._match_text_seq("INITIALLY", "DEFERRED"): 5140 options.append("INITIALLY DEFERRED") 5141 elif self._match_text_seq("NORELY"): 5142 options.append("NORELY") 5143 elif self._match_text_seq("MATCH", "FULL"): 5144 options.append("MATCH FULL") 5145 else: 5146 break 5147 5148 return options 5149 5150 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 5151 if match and not self._match(TokenType.REFERENCES): 5152 return None 5153 5154 expressions = None 5155 this = self._parse_table(schema=True) 5156 options = self._parse_key_constraint_options() 5157 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 5158 5159 def _parse_foreign_key(self) -> exp.ForeignKey: 5160 expressions = self._parse_wrapped_id_vars() 5161 reference = self._parse_references() 5162 options = {} 5163 5164 while self._match(TokenType.ON): 5165 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 5166 self.raise_error("Expected DELETE or UPDATE") 5167 5168 kind = self._prev.text.lower() 5169 5170 if self._match_text_seq("NO", "ACTION"): 5171 action = "NO ACTION" 5172 elif self._match(TokenType.SET): 5173 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 5174 action = "SET " + self._prev.text.upper() 5175 else: 5176 self._advance() 5177 action = self._prev.text.upper() 5178 5179 options[kind] = action 5180 5181 return self.expression( 5182 exp.ForeignKey, 5183 expressions=expressions, 5184 reference=reference, 5185 **options, # type: ignore 5186 ) 5187 5188 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 5189 return self._parse_field() 5190 5191 def _parse_period_for_system_time(self) -> t.Optional[exp.PeriodForSystemTimeConstraint]: 5192 if not self._match(TokenType.TIMESTAMP_SNAPSHOT): 5193 self._retreat(self._index - 1) 5194 return None 5195 5196 id_vars = self._parse_wrapped_id_vars() 5197 return self.expression( 5198 exp.PeriodForSystemTimeConstraint, 5199 this=seq_get(id_vars, 0), 5200 expression=seq_get(id_vars, 1), 5201 ) 5202 5203 def _parse_primary_key( 5204 self, wrapped_optional: bool = False, in_props: bool = False 5205 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 5206 desc = ( 5207 self._match_set((TokenType.ASC, TokenType.DESC)) 5208 and self._prev.token_type == TokenType.DESC 5209 ) 5210 5211 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 5212 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 5213 5214 expressions = self._parse_wrapped_csv( 5215 self._parse_primary_key_part, optional=wrapped_optional 5216 ) 5217 options = self._parse_key_constraint_options() 5218 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 5219 5220 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 5221 return self._parse_slice(self._parse_alias(self._parse_assignment(), explicit=True)) 5222 5223 def _parse_bracket(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 5224 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 5225 return this 5226 5227 bracket_kind = self._prev.token_type 5228 expressions = self._parse_csv( 5229 lambda: self._parse_bracket_key_value(is_map=bracket_kind == TokenType.L_BRACE) 5230 ) 5231 5232 if bracket_kind == TokenType.L_BRACKET and not self._match(TokenType.R_BRACKET): 5233 self.raise_error("Expected ]") 5234 elif bracket_kind == TokenType.L_BRACE and not self._match(TokenType.R_BRACE): 5235 self.raise_error("Expected }") 5236 5237 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 5238 if bracket_kind == TokenType.L_BRACE: 5239 this = self.expression(exp.Struct, expressions=self._kv_to_prop_eq(expressions)) 5240 elif not this: 5241 this = self.expression(exp.Array, expressions=expressions) 5242 else: 5243 constructor_type = self.ARRAY_CONSTRUCTORS.get(this.name.upper()) 5244 if constructor_type: 5245 return self.expression(constructor_type, expressions=expressions) 5246 5247 expressions = apply_index_offset(this, expressions, -self.dialect.INDEX_OFFSET) 5248 this = self.expression(exp.Bracket, this=this, expressions=expressions) 5249 5250 self._add_comments(this) 5251 return self._parse_bracket(this) 5252 5253 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5254 if self._match(TokenType.COLON): 5255 return self.expression(exp.Slice, this=this, expression=self._parse_assignment()) 5256 return this 5257 5258 def _parse_case(self) -> t.Optional[exp.Expression]: 5259 ifs = [] 5260 default = None 5261 5262 comments = self._prev_comments 5263 expression = self._parse_assignment() 5264 5265 while self._match(TokenType.WHEN): 5266 this = self._parse_assignment() 5267 self._match(TokenType.THEN) 5268 then = self._parse_assignment() 5269 ifs.append(self.expression(exp.If, this=this, true=then)) 5270 5271 if self._match(TokenType.ELSE): 5272 default = self._parse_assignment() 5273 5274 if not self._match(TokenType.END): 5275 if isinstance(default, exp.Interval) and default.this.sql().upper() == "END": 5276 default = exp.column("interval") 5277 else: 5278 self.raise_error("Expected END after CASE", self._prev) 5279 5280 return self.expression( 5281 exp.Case, comments=comments, this=expression, ifs=ifs, default=default 5282 ) 5283 5284 def _parse_if(self) -> t.Optional[exp.Expression]: 5285 if self._match(TokenType.L_PAREN): 5286 args = self._parse_csv(self._parse_assignment) 5287 this = self.validate_expression(exp.If.from_arg_list(args), args) 5288 self._match_r_paren() 5289 else: 5290 index = self._index - 1 5291 5292 if self.NO_PAREN_IF_COMMANDS and index == 0: 5293 return self._parse_as_command(self._prev) 5294 5295 condition = self._parse_assignment() 5296 5297 if not condition: 5298 self._retreat(index) 5299 return None 5300 5301 self._match(TokenType.THEN) 5302 true = self._parse_assignment() 5303 false = self._parse_assignment() if self._match(TokenType.ELSE) else None 5304 self._match(TokenType.END) 5305 this = self.expression(exp.If, this=condition, true=true, false=false) 5306 5307 return this 5308 5309 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 5310 if not self._match_text_seq("VALUE", "FOR"): 5311 self._retreat(self._index - 1) 5312 return None 5313 5314 return self.expression( 5315 exp.NextValueFor, 5316 this=self._parse_column(), 5317 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 5318 ) 5319 5320 def _parse_extract(self) -> exp.Extract: 5321 this = self._parse_function() or self._parse_var() or self._parse_type() 5322 5323 if self._match(TokenType.FROM): 5324 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 5325 5326 if not self._match(TokenType.COMMA): 5327 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 5328 5329 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 5330 5331 def _parse_gap_fill(self) -> exp.GapFill: 5332 self._match(TokenType.TABLE) 5333 this = self._parse_table() 5334 5335 self._match(TokenType.COMMA) 5336 args = [this, *self._parse_csv(self._parse_lambda)] 5337 5338 gap_fill = exp.GapFill.from_arg_list(args) 5339 return self.validate_expression(gap_fill, args) 5340 5341 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 5342 this = self._parse_assignment() 5343 5344 if not self._match(TokenType.ALIAS): 5345 if self._match(TokenType.COMMA): 5346 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 5347 5348 self.raise_error("Expected AS after CAST") 5349 5350 fmt = None 5351 to = self._parse_types() 5352 5353 if self._match(TokenType.FORMAT): 5354 fmt_string = self._parse_string() 5355 fmt = self._parse_at_time_zone(fmt_string) 5356 5357 if not to: 5358 to = exp.DataType.build(exp.DataType.Type.UNKNOWN) 5359 if to.this in exp.DataType.TEMPORAL_TYPES: 5360 this = self.expression( 5361 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 5362 this=this, 5363 format=exp.Literal.string( 5364 format_time( 5365 fmt_string.this if fmt_string else "", 5366 self.dialect.FORMAT_MAPPING or self.dialect.TIME_MAPPING, 5367 self.dialect.FORMAT_TRIE or self.dialect.TIME_TRIE, 5368 ) 5369 ), 5370 ) 5371 5372 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 5373 this.set("zone", fmt.args["zone"]) 5374 return this 5375 elif not to: 5376 self.raise_error("Expected TYPE after CAST") 5377 elif isinstance(to, exp.Identifier): 5378 to = exp.DataType.build(to.name, udt=True) 5379 elif to.this == exp.DataType.Type.CHAR: 5380 if self._match(TokenType.CHARACTER_SET): 5381 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 5382 5383 return self.expression( 5384 exp.Cast if strict else exp.TryCast, 5385 this=this, 5386 to=to, 5387 format=fmt, 5388 safe=safe, 5389 action=self._parse_var_from_options(self.CAST_ACTIONS, raise_unmatched=False), 5390 ) 5391 5392 def _parse_string_agg(self) -> exp.Expression: 5393 if self._match(TokenType.DISTINCT): 5394 args: t.List[t.Optional[exp.Expression]] = [ 5395 self.expression(exp.Distinct, expressions=[self._parse_assignment()]) 5396 ] 5397 if self._match(TokenType.COMMA): 5398 args.extend(self._parse_csv(self._parse_assignment)) 5399 else: 5400 args = self._parse_csv(self._parse_assignment) # type: ignore 5401 5402 index = self._index 5403 if not self._match(TokenType.R_PAREN) and args: 5404 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 5405 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 5406 args[-1] = self._parse_limit(this=self._parse_order(this=args[-1])) 5407 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 5408 5409 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 5410 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 5411 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 5412 if not self._match_text_seq("WITHIN", "GROUP"): 5413 self._retreat(index) 5414 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 5415 5416 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 5417 order = self._parse_order(this=seq_get(args, 0)) 5418 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 5419 5420 def _parse_convert( 5421 self, strict: bool, safe: t.Optional[bool] = None 5422 ) -> t.Optional[exp.Expression]: 5423 this = self._parse_bitwise() 5424 5425 if self._match(TokenType.USING): 5426 to: t.Optional[exp.Expression] = self.expression( 5427 exp.CharacterSet, this=self._parse_var() 5428 ) 5429 elif self._match(TokenType.COMMA): 5430 to = self._parse_types() 5431 else: 5432 to = None 5433 5434 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, safe=safe) 5435 5436 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 5437 """ 5438 There are generally two variants of the DECODE function: 5439 5440 - DECODE(bin, charset) 5441 - DECODE(expression, search, result [, search, result] ... [, default]) 5442 5443 The second variant will always be parsed into a CASE expression. Note that NULL 5444 needs special treatment, since we need to explicitly check for it with `IS NULL`, 5445 instead of relying on pattern matching. 5446 """ 5447 args = self._parse_csv(self._parse_assignment) 5448 5449 if len(args) < 3: 5450 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 5451 5452 expression, *expressions = args 5453 if not expression: 5454 return None 5455 5456 ifs = [] 5457 for search, result in zip(expressions[::2], expressions[1::2]): 5458 if not search or not result: 5459 return None 5460 5461 if isinstance(search, exp.Literal): 5462 ifs.append( 5463 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 5464 ) 5465 elif isinstance(search, exp.Null): 5466 ifs.append( 5467 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 5468 ) 5469 else: 5470 cond = exp.or_( 5471 exp.EQ(this=expression.copy(), expression=search), 5472 exp.and_( 5473 exp.Is(this=expression.copy(), expression=exp.Null()), 5474 exp.Is(this=search.copy(), expression=exp.Null()), 5475 copy=False, 5476 ), 5477 copy=False, 5478 ) 5479 ifs.append(exp.If(this=cond, true=result)) 5480 5481 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 5482 5483 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 5484 self._match_text_seq("KEY") 5485 key = self._parse_column() 5486 self._match_set(self.JSON_KEY_VALUE_SEPARATOR_TOKENS) 5487 self._match_text_seq("VALUE") 5488 value = self._parse_bitwise() 5489 5490 if not key and not value: 5491 return None 5492 return self.expression(exp.JSONKeyValue, this=key, expression=value) 5493 5494 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5495 if not this or not self._match_text_seq("FORMAT", "JSON"): 5496 return this 5497 5498 return self.expression(exp.FormatJson, this=this) 5499 5500 def _parse_on_handling(self, on: str, *values: str) -> t.Optional[str]: 5501 # Parses the "X ON Y" syntax, i.e. NULL ON NULL (Oracle, T-SQL) 5502 for value in values: 5503 if self._match_text_seq(value, "ON", on): 5504 return f"{value} ON {on}" 5505 5506 return None 5507 5508 @t.overload 5509 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 5510 5511 @t.overload 5512 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 5513 5514 def _parse_json_object(self, agg=False): 5515 star = self._parse_star() 5516 expressions = ( 5517 [star] 5518 if star 5519 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 5520 ) 5521 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 5522 5523 unique_keys = None 5524 if self._match_text_seq("WITH", "UNIQUE"): 5525 unique_keys = True 5526 elif self._match_text_seq("WITHOUT", "UNIQUE"): 5527 unique_keys = False 5528 5529 self._match_text_seq("KEYS") 5530 5531 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 5532 self._parse_type() 5533 ) 5534 encoding = self._match_text_seq("ENCODING") and self._parse_var() 5535 5536 return self.expression( 5537 exp.JSONObjectAgg if agg else exp.JSONObject, 5538 expressions=expressions, 5539 null_handling=null_handling, 5540 unique_keys=unique_keys, 5541 return_type=return_type, 5542 encoding=encoding, 5543 ) 5544 5545 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 5546 def _parse_json_column_def(self) -> exp.JSONColumnDef: 5547 if not self._match_text_seq("NESTED"): 5548 this = self._parse_id_var() 5549 kind = self._parse_types(allow_identifiers=False) 5550 nested = None 5551 else: 5552 this = None 5553 kind = None 5554 nested = True 5555 5556 path = self._match_text_seq("PATH") and self._parse_string() 5557 nested_schema = nested and self._parse_json_schema() 5558 5559 return self.expression( 5560 exp.JSONColumnDef, 5561 this=this, 5562 kind=kind, 5563 path=path, 5564 nested_schema=nested_schema, 5565 ) 5566 5567 def _parse_json_schema(self) -> exp.JSONSchema: 5568 self._match_text_seq("COLUMNS") 5569 return self.expression( 5570 exp.JSONSchema, 5571 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 5572 ) 5573 5574 def _parse_json_table(self) -> exp.JSONTable: 5575 this = self._parse_format_json(self._parse_bitwise()) 5576 path = self._match(TokenType.COMMA) and self._parse_string() 5577 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 5578 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 5579 schema = self._parse_json_schema() 5580 5581 return exp.JSONTable( 5582 this=this, 5583 schema=schema, 5584 path=path, 5585 error_handling=error_handling, 5586 empty_handling=empty_handling, 5587 ) 5588 5589 def _parse_match_against(self) -> exp.MatchAgainst: 5590 expressions = self._parse_csv(self._parse_column) 5591 5592 self._match_text_seq(")", "AGAINST", "(") 5593 5594 this = self._parse_string() 5595 5596 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 5597 modifier = "IN NATURAL LANGUAGE MODE" 5598 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 5599 modifier = f"{modifier} WITH QUERY EXPANSION" 5600 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 5601 modifier = "IN BOOLEAN MODE" 5602 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 5603 modifier = "WITH QUERY EXPANSION" 5604 else: 5605 modifier = None 5606 5607 return self.expression( 5608 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 5609 ) 5610 5611 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 5612 def _parse_open_json(self) -> exp.OpenJSON: 5613 this = self._parse_bitwise() 5614 path = self._match(TokenType.COMMA) and self._parse_string() 5615 5616 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 5617 this = self._parse_field(any_token=True) 5618 kind = self._parse_types() 5619 path = self._parse_string() 5620 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 5621 5622 return self.expression( 5623 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 5624 ) 5625 5626 expressions = None 5627 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 5628 self._match_l_paren() 5629 expressions = self._parse_csv(_parse_open_json_column_def) 5630 5631 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 5632 5633 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 5634 args = self._parse_csv(self._parse_bitwise) 5635 5636 if self._match(TokenType.IN): 5637 return self.expression( 5638 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 5639 ) 5640 5641 if haystack_first: 5642 haystack = seq_get(args, 0) 5643 needle = seq_get(args, 1) 5644 else: 5645 needle = seq_get(args, 0) 5646 haystack = seq_get(args, 1) 5647 5648 return self.expression( 5649 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 5650 ) 5651 5652 def _parse_predict(self) -> exp.Predict: 5653 self._match_text_seq("MODEL") 5654 this = self._parse_table() 5655 5656 self._match(TokenType.COMMA) 5657 self._match_text_seq("TABLE") 5658 5659 return self.expression( 5660 exp.Predict, 5661 this=this, 5662 expression=self._parse_table(), 5663 params_struct=self._match(TokenType.COMMA) and self._parse_bitwise(), 5664 ) 5665 5666 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 5667 args = self._parse_csv(self._parse_table) 5668 return exp.JoinHint(this=func_name.upper(), expressions=args) 5669 5670 def _parse_substring(self) -> exp.Substring: 5671 # Postgres supports the form: substring(string [from int] [for int]) 5672 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 5673 5674 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 5675 5676 if self._match(TokenType.FROM): 5677 args.append(self._parse_bitwise()) 5678 if self._match(TokenType.FOR): 5679 if len(args) == 1: 5680 args.append(exp.Literal.number(1)) 5681 args.append(self._parse_bitwise()) 5682 5683 return self.validate_expression(exp.Substring.from_arg_list(args), args) 5684 5685 def _parse_trim(self) -> exp.Trim: 5686 # https://www.w3resource.com/sql/character-functions/trim.php 5687 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 5688 5689 position = None 5690 collation = None 5691 expression = None 5692 5693 if self._match_texts(self.TRIM_TYPES): 5694 position = self._prev.text.upper() 5695 5696 this = self._parse_bitwise() 5697 if self._match_set((TokenType.FROM, TokenType.COMMA)): 5698 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 5699 expression = self._parse_bitwise() 5700 5701 if invert_order: 5702 this, expression = expression, this 5703 5704 if self._match(TokenType.COLLATE): 5705 collation = self._parse_bitwise() 5706 5707 return self.expression( 5708 exp.Trim, this=this, position=position, expression=expression, collation=collation 5709 ) 5710 5711 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 5712 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 5713 5714 def _parse_named_window(self) -> t.Optional[exp.Expression]: 5715 return self._parse_window(self._parse_id_var(), alias=True) 5716 5717 def _parse_respect_or_ignore_nulls( 5718 self, this: t.Optional[exp.Expression] 5719 ) -> t.Optional[exp.Expression]: 5720 if self._match_text_seq("IGNORE", "NULLS"): 5721 return self.expression(exp.IgnoreNulls, this=this) 5722 if self._match_text_seq("RESPECT", "NULLS"): 5723 return self.expression(exp.RespectNulls, this=this) 5724 return this 5725 5726 def _parse_having_max(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5727 if self._match(TokenType.HAVING): 5728 self._match_texts(("MAX", "MIN")) 5729 max = self._prev.text.upper() != "MIN" 5730 return self.expression( 5731 exp.HavingMax, this=this, expression=self._parse_column(), max=max 5732 ) 5733 5734 return this 5735 5736 def _parse_window( 5737 self, this: t.Optional[exp.Expression], alias: bool = False 5738 ) -> t.Optional[exp.Expression]: 5739 func = this 5740 comments = func.comments if isinstance(func, exp.Expression) else None 5741 5742 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 5743 self._match(TokenType.WHERE) 5744 this = self.expression( 5745 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 5746 ) 5747 self._match_r_paren() 5748 5749 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 5750 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 5751 if self._match_text_seq("WITHIN", "GROUP"): 5752 order = self._parse_wrapped(self._parse_order) 5753 this = self.expression(exp.WithinGroup, this=this, expression=order) 5754 5755 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 5756 # Some dialects choose to implement and some do not. 5757 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 5758 5759 # There is some code above in _parse_lambda that handles 5760 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 5761 5762 # The below changes handle 5763 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 5764 5765 # Oracle allows both formats 5766 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 5767 # and Snowflake chose to do the same for familiarity 5768 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 5769 if isinstance(this, exp.AggFunc): 5770 ignore_respect = this.find(exp.IgnoreNulls, exp.RespectNulls) 5771 5772 if ignore_respect and ignore_respect is not this: 5773 ignore_respect.replace(ignore_respect.this) 5774 this = self.expression(ignore_respect.__class__, this=this) 5775 5776 this = self._parse_respect_or_ignore_nulls(this) 5777 5778 # bigquery select from window x AS (partition by ...) 5779 if alias: 5780 over = None 5781 self._match(TokenType.ALIAS) 5782 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 5783 return this 5784 else: 5785 over = self._prev.text.upper() 5786 5787 if comments and isinstance(func, exp.Expression): 5788 func.pop_comments() 5789 5790 if not self._match(TokenType.L_PAREN): 5791 return self.expression( 5792 exp.Window, 5793 comments=comments, 5794 this=this, 5795 alias=self._parse_id_var(False), 5796 over=over, 5797 ) 5798 5799 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 5800 5801 first = self._match(TokenType.FIRST) 5802 if self._match_text_seq("LAST"): 5803 first = False 5804 5805 partition, order = self._parse_partition_and_order() 5806 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 5807 5808 if kind: 5809 self._match(TokenType.BETWEEN) 5810 start = self._parse_window_spec() 5811 self._match(TokenType.AND) 5812 end = self._parse_window_spec() 5813 5814 spec = self.expression( 5815 exp.WindowSpec, 5816 kind=kind, 5817 start=start["value"], 5818 start_side=start["side"], 5819 end=end["value"], 5820 end_side=end["side"], 5821 ) 5822 else: 5823 spec = None 5824 5825 self._match_r_paren() 5826 5827 window = self.expression( 5828 exp.Window, 5829 comments=comments, 5830 this=this, 5831 partition_by=partition, 5832 order=order, 5833 spec=spec, 5834 alias=window_alias, 5835 over=over, 5836 first=first, 5837 ) 5838 5839 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 5840 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 5841 return self._parse_window(window, alias=alias) 5842 5843 return window 5844 5845 def _parse_partition_and_order( 5846 self, 5847 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 5848 return self._parse_partition_by(), self._parse_order() 5849 5850 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 5851 self._match(TokenType.BETWEEN) 5852 5853 return { 5854 "value": ( 5855 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 5856 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 5857 or self._parse_bitwise() 5858 ), 5859 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 5860 } 5861 5862 def _parse_alias( 5863 self, this: t.Optional[exp.Expression], explicit: bool = False 5864 ) -> t.Optional[exp.Expression]: 5865 any_token = self._match(TokenType.ALIAS) 5866 comments = self._prev_comments or [] 5867 5868 if explicit and not any_token: 5869 return this 5870 5871 if self._match(TokenType.L_PAREN): 5872 aliases = self.expression( 5873 exp.Aliases, 5874 comments=comments, 5875 this=this, 5876 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 5877 ) 5878 self._match_r_paren(aliases) 5879 return aliases 5880 5881 alias = self._parse_id_var(any_token, tokens=self.ALIAS_TOKENS) or ( 5882 self.STRING_ALIASES and self._parse_string_as_identifier() 5883 ) 5884 5885 if alias: 5886 comments.extend(alias.pop_comments()) 5887 this = self.expression(exp.Alias, comments=comments, this=this, alias=alias) 5888 column = this.this 5889 5890 # Moves the comment next to the alias in `expr /* comment */ AS alias` 5891 if not this.comments and column and column.comments: 5892 this.comments = column.pop_comments() 5893 5894 return this 5895 5896 def _parse_id_var( 5897 self, 5898 any_token: bool = True, 5899 tokens: t.Optional[t.Collection[TokenType]] = None, 5900 ) -> t.Optional[exp.Expression]: 5901 expression = self._parse_identifier() 5902 if not expression and ( 5903 (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS) 5904 ): 5905 quoted = self._prev.token_type == TokenType.STRING 5906 expression = self.expression(exp.Identifier, this=self._prev.text, quoted=quoted) 5907 5908 return expression 5909 5910 def _parse_string(self) -> t.Optional[exp.Expression]: 5911 if self._match_set(self.STRING_PARSERS): 5912 return self.STRING_PARSERS[self._prev.token_type](self, self._prev) 5913 return self._parse_placeholder() 5914 5915 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 5916 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 5917 5918 def _parse_number(self) -> t.Optional[exp.Expression]: 5919 if self._match_set(self.NUMERIC_PARSERS): 5920 return self.NUMERIC_PARSERS[self._prev.token_type](self, self._prev) 5921 return self._parse_placeholder() 5922 5923 def _parse_identifier(self) -> t.Optional[exp.Expression]: 5924 if self._match(TokenType.IDENTIFIER): 5925 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 5926 return self._parse_placeholder() 5927 5928 def _parse_var( 5929 self, 5930 any_token: bool = False, 5931 tokens: t.Optional[t.Collection[TokenType]] = None, 5932 upper: bool = False, 5933 ) -> t.Optional[exp.Expression]: 5934 if ( 5935 (any_token and self._advance_any()) 5936 or self._match(TokenType.VAR) 5937 or (self._match_set(tokens) if tokens else False) 5938 ): 5939 return self.expression( 5940 exp.Var, this=self._prev.text.upper() if upper else self._prev.text 5941 ) 5942 return self._parse_placeholder() 5943 5944 def _advance_any(self, ignore_reserved: bool = False) -> t.Optional[Token]: 5945 if self._curr and (ignore_reserved or self._curr.token_type not in self.RESERVED_TOKENS): 5946 self._advance() 5947 return self._prev 5948 return None 5949 5950 def _parse_var_or_string(self) -> t.Optional[exp.Expression]: 5951 return self._parse_var() or self._parse_string() 5952 5953 def _parse_primary_or_var(self) -> t.Optional[exp.Expression]: 5954 return self._parse_primary() or self._parse_var(any_token=True) 5955 5956 def _parse_null(self) -> t.Optional[exp.Expression]: 5957 if self._match_set(self.NULL_TOKENS): 5958 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 5959 return self._parse_placeholder() 5960 5961 def _parse_boolean(self) -> t.Optional[exp.Expression]: 5962 if self._match(TokenType.TRUE): 5963 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 5964 if self._match(TokenType.FALSE): 5965 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 5966 return self._parse_placeholder() 5967 5968 def _parse_star(self) -> t.Optional[exp.Expression]: 5969 if self._match(TokenType.STAR): 5970 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 5971 return self._parse_placeholder() 5972 5973 def _parse_parameter(self) -> exp.Parameter: 5974 this = self._parse_identifier() or self._parse_primary_or_var() 5975 return self.expression(exp.Parameter, this=this) 5976 5977 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 5978 if self._match_set(self.PLACEHOLDER_PARSERS): 5979 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 5980 if placeholder: 5981 return placeholder 5982 self._advance(-1) 5983 return None 5984 5985 def _parse_star_op(self, *keywords: str) -> t.Optional[t.List[exp.Expression]]: 5986 if not self._match_texts(keywords): 5987 return None 5988 if self._match(TokenType.L_PAREN, advance=False): 5989 return self._parse_wrapped_csv(self._parse_expression) 5990 5991 expression = self._parse_expression() 5992 return [expression] if expression else None 5993 5994 def _parse_csv( 5995 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 5996 ) -> t.List[exp.Expression]: 5997 parse_result = parse_method() 5998 items = [parse_result] if parse_result is not None else [] 5999 6000 while self._match(sep): 6001 self._add_comments(parse_result) 6002 parse_result = parse_method() 6003 if parse_result is not None: 6004 items.append(parse_result) 6005 6006 return items 6007 6008 def _parse_tokens( 6009 self, parse_method: t.Callable, expressions: t.Dict 6010 ) -> t.Optional[exp.Expression]: 6011 this = parse_method() 6012 6013 while self._match_set(expressions): 6014 this = self.expression( 6015 expressions[self._prev.token_type], 6016 this=this, 6017 comments=self._prev_comments, 6018 expression=parse_method(), 6019 ) 6020 6021 return this 6022 6023 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 6024 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 6025 6026 def _parse_wrapped_csv( 6027 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 6028 ) -> t.List[exp.Expression]: 6029 return self._parse_wrapped( 6030 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 6031 ) 6032 6033 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 6034 wrapped = self._match(TokenType.L_PAREN) 6035 if not wrapped and not optional: 6036 self.raise_error("Expecting (") 6037 parse_result = parse_method() 6038 if wrapped: 6039 self._match_r_paren() 6040 return parse_result 6041 6042 def _parse_expressions(self) -> t.List[exp.Expression]: 6043 return self._parse_csv(self._parse_expression) 6044 6045 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 6046 return self._parse_select() or self._parse_set_operations( 6047 self._parse_expression() if alias else self._parse_assignment() 6048 ) 6049 6050 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 6051 return self._parse_query_modifiers( 6052 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 6053 ) 6054 6055 def _parse_transaction(self) -> exp.Transaction | exp.Command: 6056 this = None 6057 if self._match_texts(self.TRANSACTION_KIND): 6058 this = self._prev.text 6059 6060 self._match_texts(("TRANSACTION", "WORK")) 6061 6062 modes = [] 6063 while True: 6064 mode = [] 6065 while self._match(TokenType.VAR): 6066 mode.append(self._prev.text) 6067 6068 if mode: 6069 modes.append(" ".join(mode)) 6070 if not self._match(TokenType.COMMA): 6071 break 6072 6073 return self.expression(exp.Transaction, this=this, modes=modes) 6074 6075 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 6076 chain = None 6077 savepoint = None 6078 is_rollback = self._prev.token_type == TokenType.ROLLBACK 6079 6080 self._match_texts(("TRANSACTION", "WORK")) 6081 6082 if self._match_text_seq("TO"): 6083 self._match_text_seq("SAVEPOINT") 6084 savepoint = self._parse_id_var() 6085 6086 if self._match(TokenType.AND): 6087 chain = not self._match_text_seq("NO") 6088 self._match_text_seq("CHAIN") 6089 6090 if is_rollback: 6091 return self.expression(exp.Rollback, savepoint=savepoint) 6092 6093 return self.expression(exp.Commit, chain=chain) 6094 6095 def _parse_refresh(self) -> exp.Refresh: 6096 self._match(TokenType.TABLE) 6097 return self.expression(exp.Refresh, this=self._parse_string() or self._parse_table()) 6098 6099 def _parse_add_column(self) -> t.Optional[exp.Expression]: 6100 if not self._match_text_seq("ADD"): 6101 return None 6102 6103 self._match(TokenType.COLUMN) 6104 exists_column = self._parse_exists(not_=True) 6105 expression = self._parse_field_def() 6106 6107 if expression: 6108 expression.set("exists", exists_column) 6109 6110 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 6111 if self._match_texts(("FIRST", "AFTER")): 6112 position = self._prev.text 6113 column_position = self.expression( 6114 exp.ColumnPosition, this=self._parse_column(), position=position 6115 ) 6116 expression.set("position", column_position) 6117 6118 return expression 6119 6120 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 6121 drop = self._match(TokenType.DROP) and self._parse_drop() 6122 if drop and not isinstance(drop, exp.Command): 6123 drop.set("kind", drop.args.get("kind", "COLUMN")) 6124 return drop 6125 6126 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 6127 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 6128 return self.expression( 6129 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 6130 ) 6131 6132 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 6133 index = self._index - 1 6134 6135 if self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False): 6136 return self._parse_csv( 6137 lambda: self.expression( 6138 exp.AddConstraint, expressions=self._parse_csv(self._parse_constraint) 6139 ) 6140 ) 6141 6142 self._retreat(index) 6143 if not self.ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN and self._match_text_seq("ADD"): 6144 return self._parse_wrapped_csv(self._parse_field_def, optional=True) 6145 return self._parse_wrapped_csv(self._parse_add_column, optional=True) 6146 6147 def _parse_alter_table_alter(self) -> t.Optional[exp.Expression]: 6148 if self._match_texts(self.ALTER_ALTER_PARSERS): 6149 return self.ALTER_ALTER_PARSERS[self._prev.text.upper()](self) 6150 6151 # Many dialects support the ALTER [COLUMN] syntax, so if there is no 6152 # keyword after ALTER we default to parsing this statement 6153 self._match(TokenType.COLUMN) 6154 column = self._parse_field(any_token=True) 6155 6156 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 6157 return self.expression(exp.AlterColumn, this=column, drop=True) 6158 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 6159 return self.expression(exp.AlterColumn, this=column, default=self._parse_assignment()) 6160 if self._match(TokenType.COMMENT): 6161 return self.expression(exp.AlterColumn, this=column, comment=self._parse_string()) 6162 if self._match_text_seq("DROP", "NOT", "NULL"): 6163 return self.expression( 6164 exp.AlterColumn, 6165 this=column, 6166 drop=True, 6167 allow_null=True, 6168 ) 6169 if self._match_text_seq("SET", "NOT", "NULL"): 6170 return self.expression( 6171 exp.AlterColumn, 6172 this=column, 6173 allow_null=False, 6174 ) 6175 self._match_text_seq("SET", "DATA") 6176 self._match_text_seq("TYPE") 6177 return self.expression( 6178 exp.AlterColumn, 6179 this=column, 6180 dtype=self._parse_types(), 6181 collate=self._match(TokenType.COLLATE) and self._parse_term(), 6182 using=self._match(TokenType.USING) and self._parse_assignment(), 6183 ) 6184 6185 def _parse_alter_diststyle(self) -> exp.AlterDistStyle: 6186 if self._match_texts(("ALL", "EVEN", "AUTO")): 6187 return self.expression(exp.AlterDistStyle, this=exp.var(self._prev.text.upper())) 6188 6189 self._match_text_seq("KEY", "DISTKEY") 6190 return self.expression(exp.AlterDistStyle, this=self._parse_column()) 6191 6192 def _parse_alter_sortkey(self, compound: t.Optional[bool] = None) -> exp.AlterSortKey: 6193 if compound: 6194 self._match_text_seq("SORTKEY") 6195 6196 if self._match(TokenType.L_PAREN, advance=False): 6197 return self.expression( 6198 exp.AlterSortKey, expressions=self._parse_wrapped_id_vars(), compound=compound 6199 ) 6200 6201 self._match_texts(("AUTO", "NONE")) 6202 return self.expression( 6203 exp.AlterSortKey, this=exp.var(self._prev.text.upper()), compound=compound 6204 ) 6205 6206 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 6207 index = self._index - 1 6208 6209 partition_exists = self._parse_exists() 6210 if self._match(TokenType.PARTITION, advance=False): 6211 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 6212 6213 self._retreat(index) 6214 return self._parse_csv(self._parse_drop_column) 6215 6216 def _parse_alter_table_rename(self) -> t.Optional[exp.RenameTable | exp.RenameColumn]: 6217 if self._match(TokenType.COLUMN): 6218 exists = self._parse_exists() 6219 old_column = self._parse_column() 6220 to = self._match_text_seq("TO") 6221 new_column = self._parse_column() 6222 6223 if old_column is None or to is None or new_column is None: 6224 return None 6225 6226 return self.expression(exp.RenameColumn, this=old_column, to=new_column, exists=exists) 6227 6228 self._match_text_seq("TO") 6229 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 6230 6231 def _parse_alter_table_set(self) -> exp.AlterSet: 6232 alter_set = self.expression(exp.AlterSet) 6233 6234 if self._match(TokenType.L_PAREN, advance=False) or self._match_text_seq( 6235 "TABLE", "PROPERTIES" 6236 ): 6237 alter_set.set("expressions", self._parse_wrapped_csv(self._parse_assignment)) 6238 elif self._match_text_seq("FILESTREAM_ON", advance=False): 6239 alter_set.set("expressions", [self._parse_assignment()]) 6240 elif self._match_texts(("LOGGED", "UNLOGGED")): 6241 alter_set.set("option", exp.var(self._prev.text.upper())) 6242 elif self._match_text_seq("WITHOUT") and self._match_texts(("CLUSTER", "OIDS")): 6243 alter_set.set("option", exp.var(f"WITHOUT {self._prev.text.upper()}")) 6244 elif self._match_text_seq("LOCATION"): 6245 alter_set.set("location", self._parse_field()) 6246 elif self._match_text_seq("ACCESS", "METHOD"): 6247 alter_set.set("access_method", self._parse_field()) 6248 elif self._match_text_seq("TABLESPACE"): 6249 alter_set.set("tablespace", self._parse_field()) 6250 elif self._match_text_seq("FILE", "FORMAT") or self._match_text_seq("FILEFORMAT"): 6251 alter_set.set("file_format", [self._parse_field()]) 6252 elif self._match_text_seq("STAGE_FILE_FORMAT"): 6253 alter_set.set("file_format", self._parse_wrapped_options()) 6254 elif self._match_text_seq("STAGE_COPY_OPTIONS"): 6255 alter_set.set("copy_options", self._parse_wrapped_options()) 6256 elif self._match_text_seq("TAG") or self._match_text_seq("TAGS"): 6257 alter_set.set("tag", self._parse_csv(self._parse_assignment)) 6258 else: 6259 if self._match_text_seq("SERDE"): 6260 alter_set.set("serde", self._parse_field()) 6261 6262 alter_set.set("expressions", [self._parse_properties()]) 6263 6264 return alter_set 6265 6266 def _parse_alter(self) -> exp.AlterTable | exp.Command: 6267 start = self._prev 6268 6269 if not self._match(TokenType.TABLE): 6270 return self._parse_as_command(start) 6271 6272 exists = self._parse_exists() 6273 only = self._match_text_seq("ONLY") 6274 this = self._parse_table(schema=True) 6275 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 6276 6277 if self._next: 6278 self._advance() 6279 6280 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 6281 if parser: 6282 actions = ensure_list(parser(self)) 6283 options = self._parse_csv(self._parse_property) 6284 6285 if not self._curr and actions: 6286 return self.expression( 6287 exp.AlterTable, 6288 this=this, 6289 exists=exists, 6290 actions=actions, 6291 only=only, 6292 options=options, 6293 cluster=cluster, 6294 ) 6295 6296 return self._parse_as_command(start) 6297 6298 def _parse_merge(self) -> exp.Merge: 6299 self._match(TokenType.INTO) 6300 target = self._parse_table() 6301 6302 if target and self._match(TokenType.ALIAS, advance=False): 6303 target.set("alias", self._parse_table_alias()) 6304 6305 self._match(TokenType.USING) 6306 using = self._parse_table() 6307 6308 self._match(TokenType.ON) 6309 on = self._parse_assignment() 6310 6311 return self.expression( 6312 exp.Merge, 6313 this=target, 6314 using=using, 6315 on=on, 6316 expressions=self._parse_when_matched(), 6317 ) 6318 6319 def _parse_when_matched(self) -> t.List[exp.When]: 6320 whens = [] 6321 6322 while self._match(TokenType.WHEN): 6323 matched = not self._match(TokenType.NOT) 6324 self._match_text_seq("MATCHED") 6325 source = ( 6326 False 6327 if self._match_text_seq("BY", "TARGET") 6328 else self._match_text_seq("BY", "SOURCE") 6329 ) 6330 condition = self._parse_assignment() if self._match(TokenType.AND) else None 6331 6332 self._match(TokenType.THEN) 6333 6334 if self._match(TokenType.INSERT): 6335 _this = self._parse_star() 6336 if _this: 6337 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=_this) 6338 else: 6339 then = self.expression( 6340 exp.Insert, 6341 this=self._parse_value(), 6342 expression=self._match_text_seq("VALUES") and self._parse_value(), 6343 ) 6344 elif self._match(TokenType.UPDATE): 6345 expressions = self._parse_star() 6346 if expressions: 6347 then = self.expression(exp.Update, expressions=expressions) 6348 else: 6349 then = self.expression( 6350 exp.Update, 6351 expressions=self._match(TokenType.SET) 6352 and self._parse_csv(self._parse_equality), 6353 ) 6354 elif self._match(TokenType.DELETE): 6355 then = self.expression(exp.Var, this=self._prev.text) 6356 else: 6357 then = None 6358 6359 whens.append( 6360 self.expression( 6361 exp.When, 6362 matched=matched, 6363 source=source, 6364 condition=condition, 6365 then=then, 6366 ) 6367 ) 6368 return whens 6369 6370 def _parse_show(self) -> t.Optional[exp.Expression]: 6371 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 6372 if parser: 6373 return parser(self) 6374 return self._parse_as_command(self._prev) 6375 6376 def _parse_set_item_assignment( 6377 self, kind: t.Optional[str] = None 6378 ) -> t.Optional[exp.Expression]: 6379 index = self._index 6380 6381 if kind in ("GLOBAL", "SESSION") and self._match_text_seq("TRANSACTION"): 6382 return self._parse_set_transaction(global_=kind == "GLOBAL") 6383 6384 left = self._parse_primary() or self._parse_column() 6385 assignment_delimiter = self._match_texts(("=", "TO")) 6386 6387 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 6388 self._retreat(index) 6389 return None 6390 6391 right = self._parse_statement() or self._parse_id_var() 6392 if isinstance(right, (exp.Column, exp.Identifier)): 6393 right = exp.var(right.name) 6394 6395 this = self.expression(exp.EQ, this=left, expression=right) 6396 return self.expression(exp.SetItem, this=this, kind=kind) 6397 6398 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 6399 self._match_text_seq("TRANSACTION") 6400 characteristics = self._parse_csv( 6401 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 6402 ) 6403 return self.expression( 6404 exp.SetItem, 6405 expressions=characteristics, 6406 kind="TRANSACTION", 6407 **{"global": global_}, # type: ignore 6408 ) 6409 6410 def _parse_set_item(self) -> t.Optional[exp.Expression]: 6411 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 6412 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 6413 6414 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 6415 index = self._index 6416 set_ = self.expression( 6417 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 6418 ) 6419 6420 if self._curr: 6421 self._retreat(index) 6422 return self._parse_as_command(self._prev) 6423 6424 return set_ 6425 6426 def _parse_var_from_options( 6427 self, options: OPTIONS_TYPE, raise_unmatched: bool = True 6428 ) -> t.Optional[exp.Var]: 6429 start = self._curr 6430 if not start: 6431 return None 6432 6433 option = start.text.upper() 6434 continuations = options.get(option) 6435 6436 index = self._index 6437 self._advance() 6438 for keywords in continuations or []: 6439 if isinstance(keywords, str): 6440 keywords = (keywords,) 6441 6442 if self._match_text_seq(*keywords): 6443 option = f"{option} {' '.join(keywords)}" 6444 break 6445 else: 6446 if continuations or continuations is None: 6447 if raise_unmatched: 6448 self.raise_error(f"Unknown option {option}") 6449 6450 self._retreat(index) 6451 return None 6452 6453 return exp.var(option) 6454 6455 def _parse_as_command(self, start: Token) -> exp.Command: 6456 while self._curr: 6457 self._advance() 6458 text = self._find_sql(start, self._prev) 6459 size = len(start.text) 6460 self._warn_unsupported() 6461 return exp.Command(this=text[:size], expression=text[size:]) 6462 6463 def _parse_dict_property(self, this: str) -> exp.DictProperty: 6464 settings = [] 6465 6466 self._match_l_paren() 6467 kind = self._parse_id_var() 6468 6469 if self._match(TokenType.L_PAREN): 6470 while True: 6471 key = self._parse_id_var() 6472 value = self._parse_primary() 6473 6474 if not key and value is None: 6475 break 6476 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 6477 self._match(TokenType.R_PAREN) 6478 6479 self._match_r_paren() 6480 6481 return self.expression( 6482 exp.DictProperty, 6483 this=this, 6484 kind=kind.this if kind else None, 6485 settings=settings, 6486 ) 6487 6488 def _parse_dict_range(self, this: str) -> exp.DictRange: 6489 self._match_l_paren() 6490 has_min = self._match_text_seq("MIN") 6491 if has_min: 6492 min = self._parse_var() or self._parse_primary() 6493 self._match_text_seq("MAX") 6494 max = self._parse_var() or self._parse_primary() 6495 else: 6496 max = self._parse_var() or self._parse_primary() 6497 min = exp.Literal.number(0) 6498 self._match_r_paren() 6499 return self.expression(exp.DictRange, this=this, min=min, max=max) 6500 6501 def _parse_comprehension( 6502 self, this: t.Optional[exp.Expression] 6503 ) -> t.Optional[exp.Comprehension]: 6504 index = self._index 6505 expression = self._parse_column() 6506 if not self._match(TokenType.IN): 6507 self._retreat(index - 1) 6508 return None 6509 iterator = self._parse_column() 6510 condition = self._parse_assignment() if self._match_text_seq("IF") else None 6511 return self.expression( 6512 exp.Comprehension, 6513 this=this, 6514 expression=expression, 6515 iterator=iterator, 6516 condition=condition, 6517 ) 6518 6519 def _parse_heredoc(self) -> t.Optional[exp.Heredoc]: 6520 if self._match(TokenType.HEREDOC_STRING): 6521 return self.expression(exp.Heredoc, this=self._prev.text) 6522 6523 if not self._match_text_seq("$"): 6524 return None 6525 6526 tags = ["$"] 6527 tag_text = None 6528 6529 if self._is_connected(): 6530 self._advance() 6531 tags.append(self._prev.text.upper()) 6532 else: 6533 self.raise_error("No closing $ found") 6534 6535 if tags[-1] != "$": 6536 if self._is_connected() and self._match_text_seq("$"): 6537 tag_text = tags[-1] 6538 tags.append("$") 6539 else: 6540 self.raise_error("No closing $ found") 6541 6542 heredoc_start = self._curr 6543 6544 while self._curr: 6545 if self._match_text_seq(*tags, advance=False): 6546 this = self._find_sql(heredoc_start, self._prev) 6547 self._advance(len(tags)) 6548 return self.expression(exp.Heredoc, this=this, tag=tag_text) 6549 6550 self._advance() 6551 6552 self.raise_error(f"No closing {''.join(tags)} found") 6553 return None 6554 6555 def _find_parser( 6556 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 6557 ) -> t.Optional[t.Callable]: 6558 if not self._curr: 6559 return None 6560 6561 index = self._index 6562 this = [] 6563 while True: 6564 # The current token might be multiple words 6565 curr = self._curr.text.upper() 6566 key = curr.split(" ") 6567 this.append(curr) 6568 6569 self._advance() 6570 result, trie = in_trie(trie, key) 6571 if result == TrieResult.FAILED: 6572 break 6573 6574 if result == TrieResult.EXISTS: 6575 subparser = parsers[" ".join(this)] 6576 return subparser 6577 6578 self._retreat(index) 6579 return None 6580 6581 def _match(self, token_type, advance=True, expression=None): 6582 if not self._curr: 6583 return None 6584 6585 if self._curr.token_type == token_type: 6586 if advance: 6587 self._advance() 6588 self._add_comments(expression) 6589 return True 6590 6591 return None 6592 6593 def _match_set(self, types, advance=True): 6594 if not self._curr: 6595 return None 6596 6597 if self._curr.token_type in types: 6598 if advance: 6599 self._advance() 6600 return True 6601 6602 return None 6603 6604 def _match_pair(self, token_type_a, token_type_b, advance=True): 6605 if not self._curr or not self._next: 6606 return None 6607 6608 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 6609 if advance: 6610 self._advance(2) 6611 return True 6612 6613 return None 6614 6615 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 6616 if not self._match(TokenType.L_PAREN, expression=expression): 6617 self.raise_error("Expecting (") 6618 6619 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 6620 if not self._match(TokenType.R_PAREN, expression=expression): 6621 self.raise_error("Expecting )") 6622 6623 def _match_texts(self, texts, advance=True): 6624 if self._curr and self._curr.text.upper() in texts: 6625 if advance: 6626 self._advance() 6627 return True 6628 return None 6629 6630 def _match_text_seq(self, *texts, advance=True): 6631 index = self._index 6632 for text in texts: 6633 if self._curr and self._curr.text.upper() == text: 6634 self._advance() 6635 else: 6636 self._retreat(index) 6637 return None 6638 6639 if not advance: 6640 self._retreat(index) 6641 6642 return True 6643 6644 def _replace_lambda( 6645 self, node: t.Optional[exp.Expression], expressions: t.List[exp.Expression] 6646 ) -> t.Optional[exp.Expression]: 6647 if not node: 6648 return node 6649 6650 lambda_types = {e.name: e.args.get("to") or False for e in expressions} 6651 6652 for column in node.find_all(exp.Column): 6653 typ = lambda_types.get(column.parts[0].name) 6654 if typ is not None: 6655 dot_or_id = column.to_dot() if column.table else column.this 6656 6657 if typ: 6658 dot_or_id = self.expression( 6659 exp.Cast, 6660 this=dot_or_id, 6661 to=typ, 6662 ) 6663 6664 parent = column.parent 6665 6666 while isinstance(parent, exp.Dot): 6667 if not isinstance(parent.parent, exp.Dot): 6668 parent.replace(dot_or_id) 6669 break 6670 parent = parent.parent 6671 else: 6672 if column is node: 6673 node = dot_or_id 6674 else: 6675 column.replace(dot_or_id) 6676 return node 6677 6678 def _parse_truncate_table(self) -> t.Optional[exp.TruncateTable] | exp.Expression: 6679 start = self._prev 6680 6681 # Not to be confused with TRUNCATE(number, decimals) function call 6682 if self._match(TokenType.L_PAREN): 6683 self._retreat(self._index - 2) 6684 return self._parse_function() 6685 6686 # Clickhouse supports TRUNCATE DATABASE as well 6687 is_database = self._match(TokenType.DATABASE) 6688 6689 self._match(TokenType.TABLE) 6690 6691 exists = self._parse_exists(not_=False) 6692 6693 expressions = self._parse_csv( 6694 lambda: self._parse_table(schema=True, is_db_reference=is_database) 6695 ) 6696 6697 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 6698 6699 if self._match_text_seq("RESTART", "IDENTITY"): 6700 identity = "RESTART" 6701 elif self._match_text_seq("CONTINUE", "IDENTITY"): 6702 identity = "CONTINUE" 6703 else: 6704 identity = None 6705 6706 if self._match_text_seq("CASCADE") or self._match_text_seq("RESTRICT"): 6707 option = self._prev.text 6708 else: 6709 option = None 6710 6711 partition = self._parse_partition() 6712 6713 # Fallback case 6714 if self._curr: 6715 return self._parse_as_command(start) 6716 6717 return self.expression( 6718 exp.TruncateTable, 6719 expressions=expressions, 6720 is_database=is_database, 6721 exists=exists, 6722 cluster=cluster, 6723 identity=identity, 6724 option=option, 6725 partition=partition, 6726 ) 6727 6728 def _parse_with_operator(self) -> t.Optional[exp.Expression]: 6729 this = self._parse_ordered(self._parse_opclass) 6730 6731 if not self._match(TokenType.WITH): 6732 return this 6733 6734 op = self._parse_var(any_token=True) 6735 6736 return self.expression(exp.WithOperator, this=this, op=op) 6737 6738 def _parse_wrapped_options(self) -> t.List[t.Optional[exp.Expression]]: 6739 self._match(TokenType.EQ) 6740 self._match(TokenType.L_PAREN) 6741 6742 opts: t.List[t.Optional[exp.Expression]] = [] 6743 while self._curr and not self._match(TokenType.R_PAREN): 6744 if self._match_text_seq("FORMAT_NAME", "="): 6745 # The FORMAT_NAME can be set to an identifier for Snowflake and T-SQL, 6746 # so we parse it separately to use _parse_field() 6747 prop = self.expression( 6748 exp.Property, this=exp.var("FORMAT_NAME"), value=self._parse_field() 6749 ) 6750 opts.append(prop) 6751 else: 6752 opts.append(self._parse_property()) 6753 6754 self._match(TokenType.COMMA) 6755 6756 return opts 6757 6758 def _parse_copy_parameters(self) -> t.List[exp.CopyParameter]: 6759 sep = TokenType.COMMA if self.dialect.COPY_PARAMS_ARE_CSV else None 6760 6761 options = [] 6762 while self._curr and not self._match(TokenType.R_PAREN, advance=False): 6763 option = self._parse_var(any_token=True) 6764 prev = self._prev.text.upper() 6765 6766 # Different dialects might separate options and values by white space, "=" and "AS" 6767 self._match(TokenType.EQ) 6768 self._match(TokenType.ALIAS) 6769 6770 param = self.expression(exp.CopyParameter, this=option) 6771 6772 if prev in self.COPY_INTO_VARLEN_OPTIONS and self._match( 6773 TokenType.L_PAREN, advance=False 6774 ): 6775 # Snowflake FILE_FORMAT case, Databricks COPY & FORMAT options 6776 param.set("expressions", self._parse_wrapped_options()) 6777 elif prev == "FILE_FORMAT": 6778 # T-SQL's external file format case 6779 param.set("expression", self._parse_field()) 6780 else: 6781 param.set("expression", self._parse_unquoted_field()) 6782 6783 options.append(param) 6784 self._match(sep) 6785 6786 return options 6787 6788 def _parse_credentials(self) -> t.Optional[exp.Credentials]: 6789 expr = self.expression(exp.Credentials) 6790 6791 if self._match_text_seq("STORAGE_INTEGRATION", "="): 6792 expr.set("storage", self._parse_field()) 6793 if self._match_text_seq("CREDENTIALS"): 6794 # Snowflake case: CREDENTIALS = (...), Redshift case: CREDENTIALS <string> 6795 creds = ( 6796 self._parse_wrapped_options() if self._match(TokenType.EQ) else self._parse_field() 6797 ) 6798 expr.set("credentials", creds) 6799 if self._match_text_seq("ENCRYPTION"): 6800 expr.set("encryption", self._parse_wrapped_options()) 6801 if self._match_text_seq("IAM_ROLE"): 6802 expr.set("iam_role", self._parse_field()) 6803 if self._match_text_seq("REGION"): 6804 expr.set("region", self._parse_field()) 6805 6806 return expr 6807 6808 def _parse_file_location(self) -> t.Optional[exp.Expression]: 6809 return self._parse_field() 6810 6811 def _parse_copy(self) -> exp.Copy | exp.Command: 6812 start = self._prev 6813 6814 self._match(TokenType.INTO) 6815 6816 this = ( 6817 self._parse_select(nested=True, parse_subquery_alias=False) 6818 if self._match(TokenType.L_PAREN, advance=False) 6819 else self._parse_table(schema=True) 6820 ) 6821 6822 kind = self._match(TokenType.FROM) or not self._match_text_seq("TO") 6823 6824 files = self._parse_csv(self._parse_file_location) 6825 credentials = self._parse_credentials() 6826 6827 self._match_text_seq("WITH") 6828 6829 params = self._parse_wrapped(self._parse_copy_parameters, optional=True) 6830 6831 # Fallback case 6832 if self._curr: 6833 return self._parse_as_command(start) 6834 6835 return self.expression( 6836 exp.Copy, 6837 this=this, 6838 kind=kind, 6839 credentials=credentials, 6840 files=files, 6841 params=params, 6842 )
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
1233 def __init__( 1234 self, 1235 error_level: t.Optional[ErrorLevel] = None, 1236 error_message_context: int = 100, 1237 max_errors: int = 3, 1238 dialect: DialectType = None, 1239 ): 1240 from sqlglot.dialects import Dialect 1241 1242 self.error_level = error_level or ErrorLevel.IMMEDIATE 1243 self.error_message_context = error_message_context 1244 self.max_errors = max_errors 1245 self.dialect = Dialect.get_or_raise(dialect) 1246 self.reset()
1258 def parse( 1259 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1260 ) -> t.List[t.Optional[exp.Expression]]: 1261 """ 1262 Parses a list of tokens and returns a list of syntax trees, one tree 1263 per parsed SQL statement. 1264 1265 Args: 1266 raw_tokens: The list of tokens. 1267 sql: The original SQL string, used to produce helpful debug messages. 1268 1269 Returns: 1270 The list of the produced syntax trees. 1271 """ 1272 return self._parse( 1273 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1274 )
Parses a list of tokens and returns a list of syntax trees, one tree per parsed SQL statement.
Arguments:
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The list of the produced syntax trees.
1276 def parse_into( 1277 self, 1278 expression_types: exp.IntoType, 1279 raw_tokens: t.List[Token], 1280 sql: t.Optional[str] = None, 1281 ) -> t.List[t.Optional[exp.Expression]]: 1282 """ 1283 Parses a list of tokens into a given Expression type. If a collection of Expression 1284 types is given instead, this method will try to parse the token list into each one 1285 of them, stopping at the first for which the parsing succeeds. 1286 1287 Args: 1288 expression_types: The expression type(s) to try and parse the token list into. 1289 raw_tokens: The list of tokens. 1290 sql: The original SQL string, used to produce helpful debug messages. 1291 1292 Returns: 1293 The target Expression. 1294 """ 1295 errors = [] 1296 for expression_type in ensure_list(expression_types): 1297 parser = self.EXPRESSION_PARSERS.get(expression_type) 1298 if not parser: 1299 raise TypeError(f"No parser registered for {expression_type}") 1300 1301 try: 1302 return self._parse(parser, raw_tokens, sql) 1303 except ParseError as e: 1304 e.errors[0]["into_expression"] = expression_type 1305 errors.append(e) 1306 1307 raise ParseError( 1308 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1309 errors=merge_errors(errors), 1310 ) from errors[-1]
Parses a list of tokens into a given Expression type. If a collection of Expression types is given instead, this method will try to parse the token list into each one of them, stopping at the first for which the parsing succeeds.
Arguments:
- expression_types: The expression type(s) to try and parse the token list into.
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The target Expression.
1350 def check_errors(self) -> None: 1351 """Logs or raises any found errors, depending on the chosen error level setting.""" 1352 if self.error_level == ErrorLevel.WARN: 1353 for error in self.errors: 1354 logger.error(str(error)) 1355 elif self.error_level == ErrorLevel.RAISE and self.errors: 1356 raise ParseError( 1357 concat_messages(self.errors, self.max_errors), 1358 errors=merge_errors(self.errors), 1359 )
Logs or raises any found errors, depending on the chosen error level setting.
1361 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1362 """ 1363 Appends an error in the list of recorded errors or raises it, depending on the chosen 1364 error level setting. 1365 """ 1366 token = token or self._curr or self._prev or Token.string("") 1367 start = token.start 1368 end = token.end + 1 1369 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1370 highlight = self.sql[start:end] 1371 end_context = self.sql[end : end + self.error_message_context] 1372 1373 error = ParseError.new( 1374 f"{message}. Line {token.line}, Col: {token.col}.\n" 1375 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1376 description=message, 1377 line=token.line, 1378 col=token.col, 1379 start_context=start_context, 1380 highlight=highlight, 1381 end_context=end_context, 1382 ) 1383 1384 if self.error_level == ErrorLevel.IMMEDIATE: 1385 raise error 1386 1387 self.errors.append(error)
Appends an error in the list of recorded errors or raises it, depending on the chosen error level setting.
1389 def expression( 1390 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1391 ) -> E: 1392 """ 1393 Creates a new, validated Expression. 1394 1395 Args: 1396 exp_class: The expression class to instantiate. 1397 comments: An optional list of comments to attach to the expression. 1398 kwargs: The arguments to set for the expression along with their respective values. 1399 1400 Returns: 1401 The target expression. 1402 """ 1403 instance = exp_class(**kwargs) 1404 instance.add_comments(comments) if comments else self._add_comments(instance) 1405 return self.validate_expression(instance)
Creates a new, validated Expression.
Arguments:
- exp_class: The expression class to instantiate.
- comments: An optional list of comments to attach to the expression.
- kwargs: The arguments to set for the expression along with their respective values.
Returns:
The target expression.
1412 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1413 """ 1414 Validates an Expression, making sure that all its mandatory arguments are set. 1415 1416 Args: 1417 expression: The expression to validate. 1418 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1419 1420 Returns: 1421 The validated expression. 1422 """ 1423 if self.error_level != ErrorLevel.IGNORE: 1424 for error_message in expression.error_messages(args): 1425 self.raise_error(error_message) 1426 1427 return expression
Validates an Expression, making sure that all its mandatory arguments are set.
Arguments:
- expression: The expression to validate.
- args: An optional list of items that was used to instantiate the expression, if it's a Func.
Returns:
The validated expression.