Skip to content

Commit

Permalink
Fix(parser)!: allow limit, offset to be used as both modifiers and al…
Browse files Browse the repository at this point in the history
…iases (#4589)
  • Loading branch information
georgesittas authored Jan 10, 2025
1 parent 3bfd6de commit f3fcc10
Show file tree
Hide file tree
Showing 3 changed files with 38 additions and 2 deletions.
28 changes: 27 additions & 1 deletion sqlglot/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -497,6 +497,7 @@ class Parser(metaclass=_Parser):
TokenType.KEEP,
TokenType.KILL,
TokenType.LEFT,
TokenType.LIMIT,
TokenType.LOAD,
TokenType.MERGE,
TokenType.NATURAL,
Expand Down Expand Up @@ -552,7 +553,6 @@ class Parser(metaclass=_Parser):
TokenType.LEFT,
TokenType.LOCK,
TokenType.NATURAL,
TokenType.OFFSET,
TokenType.RIGHT,
TokenType.SEMI,
TokenType.WINDOW,
Expand Down Expand Up @@ -1329,6 +1329,8 @@ class Parser(metaclass=_Parser):
# The style options for the DESCRIBE statement
DESCRIBE_STYLES = {"ANALYZE", "EXTENDED", "FORMATTED", "HISTORY"}

AMBIGUOUS_ALIAS_TOKENS = (TokenType.LIMIT, TokenType.OFFSET)

OPERATION_MODIFIERS: t.Set[str] = set()

STRICT_CAST = True
Expand Down Expand Up @@ -3179,6 +3181,12 @@ def _parse_cte(self) -> t.Optional[exp.CTE]:
def _parse_table_alias(
self, alias_tokens: t.Optional[t.Collection[TokenType]] = None
) -> t.Optional[exp.TableAlias]:
# In some dialects, LIMIT and OFFSET can act as both identifiers and keywords (clauses)
# so this section tries to parse the clause version and if it fails, it treats the token
# as an identifier (alias)
if self._can_parse_limit_or_offset():
return None

any_token = self._match(TokenType.ALIAS)
alias = (
self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS)
Expand Down Expand Up @@ -4428,6 +4436,18 @@ def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[e
exp.Offset, this=this, expression=count, expressions=self._parse_limit_by()
)

def _can_parse_limit_or_offset(self) -> bool:
if not self._match_set(self.AMBIGUOUS_ALIAS_TOKENS, advance=False):
return False

index = self._index
result = bool(
self._try_parse(self._parse_limit, retreat=True)
or self._try_parse(self._parse_offset, retreat=True)
)
self._retreat(index)
return result

def _parse_limit_by(self) -> t.Optional[t.List[exp.Expression]]:
return self._match_text_seq("BY") and self._parse_csv(self._parse_bitwise)

Expand Down Expand Up @@ -6637,6 +6657,12 @@ def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]:
def _parse_alias(
self, this: t.Optional[exp.Expression], explicit: bool = False
) -> t.Optional[exp.Expression]:
# In some dialects, LIMIT and OFFSET can act as both identifiers and keywords (clauses)
# so this section tries to parse the clause version and if it fails, it treats the token
# as an identifier (alias)
if self._can_parse_limit_or_offset():
return this

any_token = self._match(TokenType.ALIAS)
comments = self._prev_comments or []

Expand Down
8 changes: 8 additions & 0 deletions tests/dialects/test_spark.py
Original file line number Diff line number Diff line change
Expand Up @@ -263,6 +263,14 @@ def test_spark(self):
self.validate_identity("TRIM(LEADING 'SL' FROM 'SSparkSQLS')")
self.validate_identity("TRIM(TRAILING 'SL' FROM 'SSparkSQLS')")
self.validate_identity("SPLIT(str, pattern, lim)")
self.validate_identity(
"SELECT 1 limit",
"SELECT 1 AS limit",
)
self.validate_identity(
"SELECT 1 offset",
"SELECT 1 AS offset",
)
self.validate_identity(
"SELECT UNIX_TIMESTAMP()",
"SELECT UNIX_TIMESTAMP(CURRENT_TIMESTAMP())",
Expand Down
4 changes: 3 additions & 1 deletion tests/fixtures/identity.sql
Original file line number Diff line number Diff line change
Expand Up @@ -882,4 +882,6 @@ GRANT SELECT ON orders TO ROLE PUBLIC
GRANT SELECT ON nation TO alice WITH GRANT OPTION
GRANT DELETE ON SCHEMA finance TO bob
SELECT attach
SELECT detach
SELECT detach
SELECT 1 OFFSET 1
SELECT 1 LIMIT 1

0 comments on commit f3fcc10

Please sign in to comment.