From f3fcc1013dfcfdaa388ba3426ed82c4fe0eefab1 Mon Sep 17 00:00:00 2001 From: Jo <46752250+georgesittas@users.noreply.github.com> Date: Fri, 10 Jan 2025 10:23:48 +0200 Subject: [PATCH] Fix(parser)!: allow limit, offset to be used as both modifiers and aliases (#4589) --- sqlglot/parser.py | 28 +++++++++++++++++++++++++++- tests/dialects/test_spark.py | 8 ++++++++ tests/fixtures/identity.sql | 4 +++- 3 files changed, 38 insertions(+), 2 deletions(-) diff --git a/sqlglot/parser.py b/sqlglot/parser.py index e45f90fadb..902641e7f4 100644 --- a/sqlglot/parser.py +++ b/sqlglot/parser.py @@ -497,6 +497,7 @@ class Parser(metaclass=_Parser): TokenType.KEEP, TokenType.KILL, TokenType.LEFT, + TokenType.LIMIT, TokenType.LOAD, TokenType.MERGE, TokenType.NATURAL, @@ -552,7 +553,6 @@ class Parser(metaclass=_Parser): TokenType.LEFT, TokenType.LOCK, TokenType.NATURAL, - TokenType.OFFSET, TokenType.RIGHT, TokenType.SEMI, TokenType.WINDOW, @@ -1329,6 +1329,8 @@ class Parser(metaclass=_Parser): # The style options for the DESCRIBE statement DESCRIBE_STYLES = {"ANALYZE", "EXTENDED", "FORMATTED", "HISTORY"} + AMBIGUOUS_ALIAS_TOKENS = (TokenType.LIMIT, TokenType.OFFSET) + OPERATION_MODIFIERS: t.Set[str] = set() STRICT_CAST = True @@ -3179,6 +3181,12 @@ def _parse_cte(self) -> t.Optional[exp.CTE]: def _parse_table_alias( self, alias_tokens: t.Optional[t.Collection[TokenType]] = None ) -> t.Optional[exp.TableAlias]: + # In some dialects, LIMIT and OFFSET can act as both identifiers and keywords (clauses) + # so this section tries to parse the clause version and if it fails, it treats the token + # as an identifier (alias) + if self._can_parse_limit_or_offset(): + return None + any_token = self._match(TokenType.ALIAS) alias = ( self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) @@ -4428,6 +4436,18 @@ def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[e exp.Offset, this=this, expression=count, expressions=self._parse_limit_by() ) + def _can_parse_limit_or_offset(self) -> bool: + if not self._match_set(self.AMBIGUOUS_ALIAS_TOKENS, advance=False): + return False + + index = self._index + result = bool( + self._try_parse(self._parse_limit, retreat=True) + or self._try_parse(self._parse_offset, retreat=True) + ) + self._retreat(index) + return result + def _parse_limit_by(self) -> t.Optional[t.List[exp.Expression]]: return self._match_text_seq("BY") and self._parse_csv(self._parse_bitwise) @@ -6637,6 +6657,12 @@ def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: def _parse_alias( self, this: t.Optional[exp.Expression], explicit: bool = False ) -> t.Optional[exp.Expression]: + # In some dialects, LIMIT and OFFSET can act as both identifiers and keywords (clauses) + # so this section tries to parse the clause version and if it fails, it treats the token + # as an identifier (alias) + if self._can_parse_limit_or_offset(): + return this + any_token = self._match(TokenType.ALIAS) comments = self._prev_comments or [] diff --git a/tests/dialects/test_spark.py b/tests/dialects/test_spark.py index 9d422dde5d..4a54482116 100644 --- a/tests/dialects/test_spark.py +++ b/tests/dialects/test_spark.py @@ -263,6 +263,14 @@ def test_spark(self): self.validate_identity("TRIM(LEADING 'SL' FROM 'SSparkSQLS')") self.validate_identity("TRIM(TRAILING 'SL' FROM 'SSparkSQLS')") self.validate_identity("SPLIT(str, pattern, lim)") + self.validate_identity( + "SELECT 1 limit", + "SELECT 1 AS limit", + ) + self.validate_identity( + "SELECT 1 offset", + "SELECT 1 AS offset", + ) self.validate_identity( "SELECT UNIX_TIMESTAMP()", "SELECT UNIX_TIMESTAMP(CURRENT_TIMESTAMP())", diff --git a/tests/fixtures/identity.sql b/tests/fixtures/identity.sql index e2d97b43ba..d9b809b419 100644 --- a/tests/fixtures/identity.sql +++ b/tests/fixtures/identity.sql @@ -882,4 +882,6 @@ GRANT SELECT ON orders TO ROLE PUBLIC GRANT SELECT ON nation TO alice WITH GRANT OPTION GRANT DELETE ON SCHEMA finance TO bob SELECT attach -SELECT detach \ No newline at end of file +SELECT detach +SELECT 1 OFFSET 1 +SELECT 1 LIMIT 1