From 57daac4139dd6c513de24cf380a7deb1f53dad92 Mon Sep 17 00:00:00 2001 From: EmirVildanov Date: Fri, 1 Mar 2024 12:09:10 +0500 Subject: [PATCH] feat: add sql grammar with pratt parser and tests (#983) --- grammars/Cargo.toml | 1 + grammars/src/grammars/sql.pest | 190 +++++++++++++++++++++++++++++++++ grammars/src/lib.rs | 174 +++++++++++++++++++++++++++++- grammars/tests/sql.rs | 118 ++++++++++++++++++++ 4 files changed, 482 insertions(+), 1 deletion(-) create mode 100644 grammars/src/grammars/sql.pest create mode 100644 grammars/tests/sql.rs diff --git a/grammars/Cargo.toml b/grammars/Cargo.toml index 4f3f2df4..f3478c49 100644 --- a/grammars/Cargo.toml +++ b/grammars/Cargo.toml @@ -20,6 +20,7 @@ pest_derive = { path = "../derive", version = "2.7.7" } [dev-dependencies] criterion = "0.5" pretty_assertions = "1.3.0" +lazy_static = "1.4.0" [[bench]] name = "json" diff --git a/grammars/src/grammars/sql.pest b/grammars/src/grammars/sql.pest new file mode 100644 index 00000000..eea96175 --- /dev/null +++ b/grammars/src/grammars/sql.pest @@ -0,0 +1,190 @@ +Command = _{ SOI ~ (Query | ExplainQuery | DDL | ACL) ~ EOF } + +ACL = _{ DropRole | DropUser | CreateRole | CreateUser | AlterUser | GrantPrivilege | RevokePrivilege } + CreateUser = { + ^"create" ~ ^"user" ~ Identifier ~ (^"with")? ~ ^"password" ~ SingleQuotedString ~ + AuthMethod? + } + AlterUser = { + ^"alter" ~ ^"user" ~ Identifier ~ (^"with")? ~ AlterOption + } + AlterOption = _{ AlterLogin | AlterNoLogin | AlterPassword } + AlterLogin = { ^"login" } + AlterNoLogin = { ^"nologin" } + AlterPassword = { ^"password" ~ SingleQuotedString ~ AuthMethod? } + AuthMethod = { ^"using" ~ (ChapSha1 | Md5 | Ldap) } + ChapSha1 = { ^"chap-sha1" } + Md5 = { ^"md5" } + Ldap = { ^"ldap" } + DropUser = { ^"drop" ~ ^"user" ~ Identifier } + CreateRole = { ^"create" ~ ^"role" ~ Identifier } + DropRole = { ^"drop" ~ ^"role" ~ Identifier } + GrantPrivilege = { ^"grant" ~ PrivBlock ~ ^"to" ~ Identifier } + RevokePrivilege = { ^"revoke" ~ PrivBlock ~ ^"from" ~ Identifier } + PrivBlock = _{ PrivBlockPrivilege | PrivBlockRolePass } + PrivBlockPrivilege = {Privilege ~ (PrivBlockUser | PrivBlockSpecificUser | PrivBlockRole + | PrivBlockSpecificRole | PrivBlockTable | PrivBlockSpecificTable)} + PrivBlockUser = { ^"user" } + PrivBlockSpecificUser = { ^"on" ~ ^"user" ~ Identifier } + PrivBlockRole = { ^"role" } + PrivBlockSpecificRole = { ^"on" ~ ^"role" ~ Identifier } + PrivBlockTable = { ^"table" } + PrivBlockSpecificTable = { ^"on" ~ ^"table" ~ Identifier } + PrivBlockRolePass = { Identifier } + Privilege = _{ PrivilegeRead | PrivilegeWrite | PrivilegeExecute | + PrivilegeCreate | PrivilegeAlter | PrivilegeDrop | + PrivilegeSession | PrivilegeUsage } + PrivilegeAlter = { ^"alter" } + PrivilegeCreate = { ^"create" } + PrivilegeDrop = { ^"drop" } + PrivilegeExecute = { ^"execute" } + PrivilegeRead = { ^"read" } + PrivilegeSession = { ^"session" } + PrivilegeUsage = { ^"usage" } + PrivilegeWrite = { ^"write" } + +DDL = _{ CreateTable | DropTable | CreateProc } + CreateTable = { + ^"create" ~ ^"table" ~ Identifier ~ + "(" ~ Columns ~ "," ~ PrimaryKey ~ ")" ~ + Distribution + } + Columns = { ColumnDef ~ ("," ~ ColumnDef)* } + ColumnDef = { Identifier ~ ColumnDefType ~ ColumnDefIsNull? } + ColumnDefIsNull = { NotFlag? ~ ^"null" } + PrimaryKey = { + ^"primary" ~ ^"key" ~ + "(" ~ Identifier ~ ("," ~ Identifier)* ~ ")" + } + Distribution = { ^"distributed" ~ (Global | Sharding) } + Global = { ^"globally" } + Sharding = { ^"by" ~ "(" ~ Identifier ~ ("," ~ Identifier)* ~ ")"} + DropTable = { ^"drop" ~ ^"table" ~ Identifier } + + CreateProc = { + ^"create" ~ ^"procedure" ~ Identifier ~ + "(" ~ ProcParams? ~ ")" ~ (^"language" ~ ProcLanguage)? ~ + ((^"as" ~ "$$" ~ ProcBody ~ "$$") | (^"begin" ~ "atomic" ~ ProcBody ~ "end")) + } + ProcParams = { ProcParamDef ~ ("," ~ ProcParamDef)* } + ProcParamDef = { ColumnDefType } + ProcLanguage = { SQL } + SQL = { ^"sql" } + ProcBody = { (Insert | Update | Delete) } + +ExplainQuery = _{ Explain } + Explain = { ^"explain" ~ Query } + +Query = { (SelectWithOptionalContinuation | Values | Insert | Update | Delete) } + SelectWithOptionalContinuation = { Select ~ (ExceptContinuation | UnionAllContinuation)? } + ExceptContinuation = { ((^"except" ~ ^"distinct") | ^"except") ~ Select } + UnionAllContinuation = { ^"union" ~ ^"all" ~ Select } + Select = { + ^"select" ~ Projection ~ ^"from" ~ Scan ~ + Join? ~ WhereClause? ~ + (^"group" ~ ^"by" ~ GroupBy)? ~ + (^"having" ~ Having)? + } + Projection = { Distinct? ~ ProjectionElement ~ ("," ~ ProjectionElement)* } + ProjectionElement = _{ Asterisk | Column } + Column = { Expr ~ ((^"as")? ~ Identifier)? } + Asterisk = { "*" } + WhereClause = _{ ^"where" ~ Selection } + Selection = { Expr } + Scan = { (Identifier | SubQuery) ~ ((^"as")? ~ Identifier)? } + Join = { JoinKind? ~ ^"join" ~ Scan ~ ^"on" ~ Expr } + JoinKind = _{ ( InnerJoinKind | LeftJoinKind ) } + InnerJoinKind = { ^"inner" } + LeftJoinKind = { ^"left" ~ (^"outer")? } + GroupBy = { Expr ~ ("," ~ Expr)* } + Having = { Expr } + SubQuery = { "(" ~ (SelectWithOptionalContinuation | Values) ~ ")" } + Insert = { ^"insert" ~ ^"into" ~ Identifier ~ ("(" ~ TargetColumns ~ ")")? ~ (Values | Select) ~ OnConflict? } + TargetColumns = { Identifier ~ ("," ~ Identifier)* } + OnConflict = _{ ^"on conflict" ~ ^"do" ~ (DoNothing | DoReplace | DoFail) } + DoReplace = { ^"replace" } + DoNothing = { ^"nothing" } + DoFail = { ^"fail" } + Update = { ^"update" ~ Identifier ~ ^"set" ~ UpdateList ~ (UpdateFrom | WhereClause)? } + UpdateList = { UpdateItem ~ ("," ~ UpdateItem)* } + UpdateItem = { Identifier ~ "=" ~ Expr } + UpdateFrom = _{ ^"from" ~ Scan ~ (^"where" ~ Expr)? } + Values = { ^"values" ~ Row ~ ("," ~ Row)* } + Delete = { ^"delete" ~ ^"from" ~ Identifier ~ (^"where" ~ DeleteFilter)? } + DeleteFilter = { Expr } + +Identifier = @{ DoubleQuotedIdentifier | IdentifierInner } + DoubleQuotedIdentifier = @{ ("\"" ~ IdentifierInner ~ "\"") } + IdentifierInner = @{ !(Keyword ~ ("(" | WHITESPACE | "," | EOF)) ~ (IdentifierNonDigit ~ (IdentifierNonDigit | ASCII_DIGIT)*) } + IdentifierNonDigit = _{ ('a'..'z' | 'A' .. 'Z' | 'А' .. 'Я' | 'а' .. 'я' | "-" | "_") } + Keyword = { ^"left" | ^"having" | ^"not" | ^"inner" | ^"group" + | ^"on" | ^"join" | ^"from" | ^"exists" | ^"except" + | ^"union" | ^"where" | ^"distinct" | ^"between" | ^"option" + | ^"values"} + +Expr = { ExprAtomValue ~ (ExprInfixOp ~ ExprAtomValue)* } + ExprInfixOp = _{ Between | ArithInfixOp | CmpInfixOp | ConcatInfixOp | And | Or } + Between = { NotFlag? ~ ^"between" } + And = { ^"and" } + Or = { ^"or" } + ConcatInfixOp = { "||" } + ArithInfixOp = _{ Add | Subtract | Multiply | Divide } + Add = { "+" } + Subtract = { "-" } + Multiply = { "*" } + Divide = { "/" } + CmpInfixOp = _{ NotEq | GtEq | Gt | LtEq | Lt | Eq | Lt | In } + Eq = { "=" } + Gt = { ">" } + GtEq = { ">=" } + Lt = { "<" } + LtEq = { "<=" } + NotEq = { "<>" | "!=" } + In = { NotFlag? ~ ^"in" } + ExprAtomValue = _{ UnaryNot* ~ AtomicExpr ~ IsNullPostfix? } + UnaryNot = @{ NotFlag } + IsNullPostfix = { ^"is" ~ NotFlag? ~ ^"null" } + AtomicExpr = _{ Literal | Parameter | Cast | IdentifierWithOptionalContinuation | ExpressionInParentheses | UnaryOperator | SubQuery | Row } + Literal = _{ True | False | Null | Double | Decimal | Unsigned | Integer | SingleQuotedString } + True = { ^"true" } + False = { ^"false" } + Null = { ^"null" } + Decimal = @{ Integer ~ ("." ~ ASCII_DIGIT*) } + Double = @{ Integer ~ ("." ~ ASCII_DIGIT*)? ~ (^"e" ~ Integer) } + Integer = @{ ("+" | "-")? ~ ASCII_DIGIT+ } + Unsigned = @{ ASCII_DIGIT+ } + SingleQuotedString = @{ OnlyQuotesSequence | AnythingButQuotesSequence } + OnlyQuotesSequence = @{ ("'" ~ "'")+ } + AnythingButQuotesSequence = @{ "'" ~ (!("'") ~ ANY)* ~ "'" } + Parameter = { PgParameter | QuestionParameter } + QuestionParameter = @{ "?" } + PgParameter = { "$" ~ Unsigned } + IdentifierWithOptionalContinuation = { Identifier ~ (ReferenceContinuation | FunctionInvocationContinuation)? } + ReferenceContinuation = { "." ~ Identifier } + FunctionInvocationContinuation = { "(" ~ (CountAsterisk | FunctionArgs)? ~ ")" } + FunctionArgs = { Distinct? ~ (Expr ~ ("," ~ Expr)*)? } + CountAsterisk = { "*" } + ExpressionInParentheses = { "(" ~ Expr ~ ")" } + Cast = { ^"cast" ~ "(" ~ Expr ~ ^"as" ~ TypeCast ~ ")" } + TypeCast = _{ TypeAny | ColumnDefType } + ColumnDefType = { TypeBool | TypeDecimal | TypeDouble | TypeInt | TypeNumber + | TypeScalar | TypeString | TypeText | TypeUnsigned | TypeVarchar } + TypeAny = { ^"any" } + TypeBool = { (^"boolean" | ^"bool") } + TypeDecimal = { ^"decimal" } + TypeDouble = { ^"double" } + TypeInt = { (^"integer" | ^"int") } + TypeNumber = { ^"number" } + TypeScalar = { ^"scalar" } + TypeString = { ^"string" } + TypeText = { ^"text" } + TypeUnsigned = { ^"unsigned" } + TypeVarchar = { ^"varchar" ~ "(" ~ Unsigned ~ ")" } + UnaryOperator = _{ Exists } + Exists = { NotFlag? ~ ^"exists" ~ SubQuery } + Row = { "(" ~ Expr ~ ("," ~ Expr)* ~ ")" } + +Distinct = { ^"distinct" } +NotFlag = { ^"not" } +EOF = { EOI | ";" } +WHITESPACE = _{ " " | "\t" | "\n" | "\r\n" } diff --git a/grammars/src/lib.rs b/grammars/src/lib.rs index cc5d88df..edd551a0 100644 --- a/grammars/src/lib.rs +++ b/grammars/src/lib.rs @@ -50,13 +50,29 @@ pub mod toml { pub struct TomlParser; } +/// Grammar rules of an SQL parser +#[allow(missing_docs)] +pub mod sql { + /// SQL parser. + /// Grammar is a tinkered version of the one used in distributed SQL executor module named + /// [sbroad](https://git.picodata.io/picodata/picodata/sbroad/-/blob/main/sbroad-core/src/frontend/sql/query.pest). + /// Being a submodule of [Picodata](https://git.picodata.io/picodata/picodata/picodata) (that + /// operates with Tarantool database) it tries to simulate SQLite flavour (Tarantool uses + /// SQLite to execute SQL queries). + #[derive(Parser)] + #[grammar = "grammars/sql.pest"] + pub struct SqlParser; +} + #[cfg(test)] mod tests { + use pest::iterators::Pairs; use std::convert::TryInto; + use pest::pratt_parser::PrattParser; use pest::Parser; - use crate::{json, toml}; + use crate::{json, sql, toml}; fn test_toml_deep_nesting(input: &str) { const ERROR: &str = "call limit reached"; @@ -104,4 +120,160 @@ mod tests { assert!(s2.is_err()); assert_eq!(s2.unwrap_err().variant.message(), ERROR); } + + #[test] + fn sql_check_expressions_priorities() { + lazy_static::lazy_static! { + static ref PRATT_PARSER: PrattParser = { + use pest::pratt_parser::{Assoc::{Left, Right}, Op}; + use sql::Rule::{Add, And, Between, ConcatInfixOp, Divide, Eq, Gt, GtEq, In, + IsNullPostfix, Lt, LtEq, Multiply, NotEq, Or, Subtract, UnaryNot}; + + // Precedence is defined lowest to highest. + PrattParser::new() + .op(Op::infix(Or, Left)) + .op(Op::infix(Between, Left)) + .op(Op::infix(And, Left)) + .op(Op::prefix(UnaryNot)) + .op( + Op::infix(Eq, Right) | Op::infix(NotEq, Right) | Op::infix(NotEq, Right) + | Op::infix(Gt, Right) | Op::infix(GtEq, Right) | Op::infix(Lt, Right) + | Op::infix(LtEq, Right) | Op::infix(In, Right) + ) + .op(Op::infix(Add, Left) | Op::infix(Subtract, Left)) + .op(Op::infix(Multiply, Left) | Op::infix(Divide, Left) | Op::infix(ConcatInfixOp, Left)) + .op(Op::postfix(IsNullPostfix)) + }; + } + + #[derive(Debug, PartialEq, Eq)] + enum ArithOp { + Add, + Mult, + } + + #[derive(Debug, PartialEq, Eq)] + enum BoolOp { + And, + Or, + Eq, + In, + } + + #[derive(Debug, PartialEq, Eq)] + enum InfixOp { + ArithInfix(ArithOp), + BoolInfix(BoolOp), + } + + #[derive(Debug, PartialEq, Eq)] + enum Expr { + SubQuery, + Infix { + left: Box, + op: InfixOp, + right: Box, + }, + ArithValue(u64), + BoolConst(bool), + Not { + child: Box, + }, + IsNull { + child: Box, + }, + } + + // Example of SQL expression containing many operators with different priorities. + // Should be interpreted as + // `(not ((1 + 1 * 2) = 3)) or ((false is null) and (1 in (select * from t where true)))` + let input = r#"not 1 + 1 * 2 = 3 + or false is null + and 1 in ( + select "name", avg("grade") from students + where "age" > 14 + group by "class" + )"#; + + let res_pairs = sql::SqlParser::parse(sql::Rule::Expr, input).unwrap(); + fn parse_expr(expression_pairs: Pairs<'_, sql::Rule>) -> Expr { + PRATT_PARSER + .map_primary(|primary| match primary.as_rule() { + sql::Rule::Expr => parse_expr(primary.into_inner()), + sql::Rule::SubQuery => Expr::SubQuery, + sql::Rule::Unsigned => { + let u64_value = primary.as_str().parse::().unwrap(); + Expr::ArithValue(u64_value) + } + sql::Rule::True | sql::Rule::False => { + let bool_value = primary.as_str().parse::().unwrap(); + Expr::BoolConst(bool_value) + } + rule => unreachable!("Expr::parse expected atomic rule, found {:?}", rule), + }) + .map_infix(|lhs, op, rhs| { + let op = match op.as_rule() { + sql::Rule::And => InfixOp::BoolInfix(BoolOp::And), + sql::Rule::Or => InfixOp::BoolInfix(BoolOp::Or), + sql::Rule::Eq => InfixOp::BoolInfix(BoolOp::Eq), + sql::Rule::In => InfixOp::BoolInfix(BoolOp::In), + sql::Rule::Multiply => InfixOp::ArithInfix(ArithOp::Mult), + sql::Rule::Add => InfixOp::ArithInfix(ArithOp::Add), + rule => { + unreachable!("Expr::parse expected infix operation, found {:?}", rule) + } + }; + Expr::Infix { + left: Box::new(lhs), + op, + right: Box::new(rhs), + } + }) + .map_prefix(|op, child| match op.as_rule() { + sql::Rule::UnaryNot => Expr::Not { + child: Box::new(child), + }, + rule => unreachable!("Expr::parse expected prefix operator, found {:?}", rule), + }) + .map_postfix(|child, op| match op.as_rule() { + sql::Rule::IsNullPostfix => Expr::IsNull { + child: Box::new(child), + }, + rule => unreachable!("Expr::parse expected postfix operator, found {:?}", rule), + }) + .parse(expression_pairs) + } + + let actual_expr = parse_expr(res_pairs); + let expected_expr = Expr::Infix { + op: InfixOp::BoolInfix(BoolOp::Or), + left: Box::new(Expr::Not { + child: Box::new(Expr::Infix { + left: Box::new(Expr::Infix { + left: Box::new(Expr::ArithValue(1)), + op: InfixOp::ArithInfix(ArithOp::Add), + right: Box::new(Expr::Infix { + left: Box::new(Expr::ArithValue(1)), + op: InfixOp::ArithInfix(ArithOp::Mult), + right: Box::new(Expr::ArithValue(2)), + }), + }), + op: InfixOp::BoolInfix(BoolOp::Eq), + right: Box::new(Expr::ArithValue(3)), + }), + }), + right: Box::new(Expr::Infix { + left: Box::new(Expr::IsNull { + child: Box::new(Expr::BoolConst(false)), + }), + op: InfixOp::BoolInfix(BoolOp::And), + right: Box::new(Expr::Infix { + left: Box::new(Expr::ArithValue(1)), + op: InfixOp::BoolInfix(BoolOp::In), + right: Box::new(Expr::SubQuery), + }), + }), + }; + assert_eq!(expected_expr, actual_expr); + } } diff --git a/grammars/tests/sql.rs b/grammars/tests/sql.rs new file mode 100644 index 00000000..ec6fa9c2 --- /dev/null +++ b/grammars/tests/sql.rs @@ -0,0 +1,118 @@ +// pest. The Elegant Parser +// Copyright (c) 2018 Dragoș Tiselice +// +// Licensed under the Apache License, Version 2.0 +// or the MIT +// license , at your +// option. All files in the project carrying such notice may not be copied, +// modified, or distributed except according to those terms. + +#[macro_use] +extern crate pest; +extern crate pest_grammars; + +use pest_grammars::sql::*; + +#[test] +fn sql_simple_select() { + parses_to! { + parser: SqlParser, + input: "select * from table", + rule: Rule::Command, + tokens: [ + Query (0, 19, [ + SelectWithOptionalContinuation(0, 19, [ + Select(0, 19, [ + Projection(7, 9, [ + Asterisk(7, 8) + ]), + Scan(14, 19, [ + Identifier(14, 19) + ]) + ]) + ]) + ]), + EOF(19, 19, [EOI(19, 19)]) + ] + } +} + +#[test] +fn sql_create_user() { + parses_to! { + parser: SqlParser, + input: r#"create user "my_user" with password 'strong_password123'"#, + rule: Rule::Command, + tokens: [ + CreateUser(0, 56, [ + Identifier(12, 21), + SingleQuotedString(36, 56) + ]), + EOF(56, 56, [EOI(56, 56)]) + ] + } +} + +#[test] +fn sql_insert_from_select() { + parses_to! { + parser: SqlParser, + input: r#"insert into "my_table" ("col_1", "col_2") + select "name", "class", avg("age") + from "students" + where "age" > 15 + group by "age""#, + rule: Rule::Command, + tokens: [ + Query(0, 196, [ + Insert(0, 196, [ + Identifier(12, 22), + TargetColumns(24, 40, [ + Identifier(24, 31), + Identifier(33, 40) + ]), + Select(60, 196, [ + Projection(67, 113, [ + Column(67, 73, [Expr(67, 73, [ + IdentifierWithOptionalContinuation(67, 73, [Identifier(67, 73)]) + ]) + ]), + Column(75, 82, [Expr(75, 82, [ + IdentifierWithOptionalContinuation(75, 82, [Identifier(75, 82)]) + ]) + ]), + Column(84, 113, [ + Expr(84, 113, [ + IdentifierWithOptionalContinuation(84, 94, [ + Identifier(84, 87), + FunctionInvocationContinuation(87, 94, [ + FunctionArgs(88, 93, [ + Expr(88, 93, [ + IdentifierWithOptionalContinuation(88, 93, [ + Identifier(88, 93) + ]) + ]) + ]) + ]) + ]) + ]) + ]) + ]), + Scan(118, 147, [Identifier(118, 128)]), + Selection(153, 182, [ + Expr(153, 182, [ + IdentifierWithOptionalContinuation(153, 159, [Identifier(153, 158)]), + Gt(159, 160), + Unsigned(161, 163)] + )]), + GroupBy(191, 196, [ + Expr(191, 196, [ + IdentifierWithOptionalContinuation(191, 196, [Identifier(191, 196)]) + ]) + ]) + ]) + ]) + ]), + EOF(196, 196, [EOI(196, 196)])] + } +}