diff --git a/crates/rsonpath-syntax/src/error.rs b/crates/rsonpath-syntax/src/error.rs index c858c3ee..1dc875aa 100644 --- a/crates/rsonpath-syntax/src/error.rs +++ b/crates/rsonpath-syntax/src/error.rs @@ -25,7 +25,22 @@ pub(crate) struct ParseErrorBuilder { #[derive(Debug, Error)] pub struct ParseError { input: String, - syntax_errors: Vec, + inner: InnerParseError, +} + +impl ParseError { + /// Returns whether the error was caused by exceeding the parser's nesting limit. + #[inline] + #[must_use] + pub fn is_nesting_limit_exceeded(&self) -> bool { + matches!(self.inner, InnerParseError::RecursionLimit(_)) + } +} + +#[derive(Debug)] +enum InnerParseError { + Syntax(Vec), + RecursionLimit(usize), } impl ParseErrorBuilder { @@ -48,7 +63,14 @@ impl ParseErrorBuilder { pub(crate) fn build(self, str: String) -> ParseError { ParseError { input: str, - syntax_errors: self.syntax_errors, + inner: InnerParseError::Syntax(self.syntax_errors), + } + } + + pub(crate) fn recursion_limit_exceeded(str: String, recursion_limit: usize) -> ParseError { + ParseError { + input: str, + inner: InnerParseError::RecursionLimit(recursion_limit), } } } @@ -73,22 +95,42 @@ impl ParseError { #[inline(always)] fn fmt_parse_error(error: &ParseError, style: &ErrorStyleImpl, f: &mut fmt::Formatter<'_>) -> fmt::Result { - let mut suggestion = Suggestion::new(); - for syntax_error in &error.syntax_errors { - writeln!( - f, - "{}", - syntax_error.display(&error.input, &mut suggestion, style.clone()) - )?; - } + match &error.inner { + InnerParseError::Syntax(syntax_errors) => { + let mut suggestion = Suggestion::new(); + for syntax_error in syntax_errors { + writeln!( + f, + "{}", + syntax_error.display(&error.input, &mut suggestion, style.clone()) + )?; + } - if let Some(suggestion) = suggestion.build(&error.input) { - writeln!( - f, - "{} did you mean `{}` ?", - style.note_prefix(&"suggestion:"), - style.suggestion(&suggestion) - )?; + if let Some(suggestion) = suggestion.build(&error.input) { + writeln!( + f, + "{} did you mean `{}` ?", + style.note_prefix(&"suggestion:"), + style.suggestion(&suggestion) + )?; + } + } + InnerParseError::RecursionLimit(limit) => { + writeln!( + f, + "{} {}", + style.error_prefix(&"error:"), + style.error_message(&"nesting level exceeded") + )?; + writeln!(f)?; + writeln!(f, " {}", error.input)?; + writeln!( + f, + "{} the parser limits nesting to {}; this applies to filter logical expressions", + style.note_prefix(&"note:"), + limit + )?; + } } Ok(()) @@ -144,7 +186,6 @@ pub(crate) enum SyntaxErrorKind { InvalidComparable, NonSingularQueryInComparison, InvalidFilter, - QueryNestingLimitExceeded(usize), } impl SyntaxError { @@ -301,8 +342,7 @@ impl SyntaxError { | SyntaxErrorKind::InvalidComparisonOperator | SyntaxErrorKind::InvalidFilter | SyntaxErrorKind::MissingComparisonOperator - | SyntaxErrorKind::InvalidComparable - | SyntaxErrorKind::QueryNestingLimitExceeded(_) => suggestion.invalidate(), + | SyntaxErrorKind::InvalidComparable => suggestion.invalidate(), } // Generic notes. @@ -414,6 +454,7 @@ impl DisplayableSyntaxErrorBuilder { pub(crate) enum InternalParseError<'a> { SyntaxError(SyntaxError, &'a str), SyntaxErrors(Vec, &'a str), + RecursionLimitExceeded, NomError(nom::error::Error<&'a str>), } @@ -705,7 +746,6 @@ impl SyntaxErrorKind { Self::InvalidComparable => "invalid right-hand side of comparison".to_string(), Self::NonSingularQueryInComparison => "non-singular query used in comparison".to_string(), Self::InvalidFilter => "invalid filter expression syntax".to_string(), - Self::QueryNestingLimitExceeded(_) => "query nesting limit exceeded".to_string(), } } @@ -746,9 +786,6 @@ impl SyntaxErrorKind { Self::MissingComparisonOperator => "expected a comparison operator here".to_string(), Self::InvalidComparisonOperator => "not a valid comparison operator".to_string(), Self::InvalidFilter => "not a valid filter expression".to_string(), - Self::QueryNestingLimitExceeded(_) => { - "starting here, the filter expression is excessively deeply nested".to_string() - } } } } diff --git a/crates/rsonpath-syntax/src/lib.rs b/crates/rsonpath-syntax/src/lib.rs index 4ecf75b9..c9d390ed 100644 --- a/crates/rsonpath-syntax/src/lib.rs +++ b/crates/rsonpath-syntax/src/lib.rs @@ -208,9 +208,10 @@ impl ParserBuilder { /// Override the default recursion limit in a query. /// Defaults to [Parser::RECURSION_LIMIT_DEFAULT]. /// - /// JSONPath queries are inherently recursive, since the [`LogicalExpr`] in a filter - /// can test arbitrary nested JSONPath queries. Our parser implementation is recursive, - /// so an excessively nested query could overflow the stack. + /// JSONPath queries are inherently recursive, since + /// - [`LogicalExpr`] can be an arbitrarily deep tree of AND/OR operators; + /// - the [`TestExpr`] in a filter can test arbitrary nested JSONPath queries. + /// Our parser implementation is recursive, so an excessively nested query could overflow the stack. /// /// The limit can be relaxed here, or removed entirely by passing [`None`]. /// @@ -350,7 +351,7 @@ impl Parser { /// The parser defaults to this strict behavior unless configured with /// [`ParserBuilder::allow_surrounding_whitespace`]. /// - /// There is a limit on the complexity of the query measured as the depth of nested filter test queries. + /// There is a limit on the complexity of the query measured as the depth of nested filter expressions. /// This limit defaults to [`RECURSION_LIMIT_DEFAULT`](Self::RECURSION_LIMIT_DEFAULT) and can be overridden /// with [`ParserBuilder::set_recursion_limit`]. #[inline] @@ -864,158 +865,6 @@ impl JsonPathQuery { pub fn segments(&self) -> &[Segment] { &self.segments } - - /// Check the nesting level of the query. - /// - /// JSONPath queries are recursive, as a [`Selector::Filter`] can contain - /// a [`TestExpr`] that is an arbitrary query, which can in turn contain a filter - /// with a test expression, etc. ad nauseam. - /// - /// ## Returns - /// A query with no nested test expressions has nesting level 0. - /// If it contains any test expressions, and the maximum nesting level - /// of queries in those is n, then the nesting level of the entire query is n+1. - /// - /// Checking this takes time proportional to the size of the query. - /// - /// ## Examples - /// - /// Queries with no test filters have nesting level 0. - /// - /// ```rust - /// # use rsonpath_syntax::prelude::*; - /// let query = parse_json_path_query("$[0]['name']").unwrap(); - /// - /// assert_eq!(query.nesting_level(), 0); - /// ``` - /// - /// Single-nested filters have nesting level 1. - /// - /// ```rust - /// # use rsonpath_syntax::prelude::*; - /// let query = parse_json_path_query("$[0][?@.name]").unwrap(); - /// - /// assert_eq!(query.nesting_level(), 1); - /// ``` - /// - /// ... and so on ... - /// - /// ```rust - /// # use rsonpath_syntax::prelude::*; - /// let query = parse_json_path_query("$[0][?@[0][?@[0][?@]]]").unwrap(); - /// - /// assert_eq!(query.nesting_level(), 3); - /// ``` - /// - /// Singular queries can add only one level, since they allow no filter selectors. - /// - /// ```rust - /// # use rsonpath_syntax::prelude::*; - /// let query = parse_json_path_query("$[0][?@[0][?@.x == true]]").unwrap(); - /// - /// assert_eq!(query.nesting_level(), 2); - /// ``` - #[inline] - #[must_use] - pub fn nesting_level(&self) -> usize { - let mut result = 0; - let mut nested_queries = iter_nested(self).collect::>(); - let mut scratch = vec![]; - - while !nested_queries.is_empty() { - result += 1; - scratch.extend(nested_queries.drain(..).filter_map(iter_nested_either).flatten()); - std::mem::swap(&mut scratch, &mut nested_queries); - } - - return result; - - #[derive(Clone, Copy)] - enum EitherQuery<'a> { - Full(&'a JsonPathQuery), - Singular, - } - - fn iter_nested_either(q: EitherQuery) -> Option> { - match q { - EitherQuery::Full(q) => Some(iter_nested(q)), - EitherQuery::Singular => None, - } - } - - fn iter_nested(q: &JsonPathQuery) -> impl Iterator { - q.segments - .iter() - .flat_map(|x| x.selectors().iter()) - .filter_map(|s| match s { - Selector::Filter(f) => Some(f), - _ => None, - }) - .flat_map(iter_queries_in_filter) - } - - fn iter_queries_in_filter(f: &LogicalExpr) -> impl Iterator { - return TreeWalker::new(f).flatten(); - - struct TreeWalker<'a> { - current: Option<&'a LogicalExpr>, - stack: Vec<&'a LogicalExpr>, - leftover: Option>, - } - - impl<'a> TreeWalker<'a> { - fn new(f: &'a LogicalExpr) -> Self { - Self { - current: Some(f), - stack: vec![], - leftover: None, - } - } - } - - impl<'a> Iterator for TreeWalker<'a> { - type Item = Option>; - - fn next(&mut self) -> Option { - if let Some(x) = self.leftover.take() { - return Some(Some(x)); - } - match self.current? { - LogicalExpr::Test(TestExpr::Absolute(q) | TestExpr::Relative(q)) => { - self.current = self.stack.pop(); - Some(Some(EitherQuery::Full(q))) - } - LogicalExpr::Comparison(cmp) => { - let lhs = match cmp.lhs() { - Comparable::RelativeSingularQuery(_) | Comparable::AbsoluteSingularQuery(_) => { - Some(EitherQuery::Singular) - } - Comparable::Literal(_) => None, - }; - let rhs = match cmp.rhs() { - Comparable::RelativeSingularQuery(_) | Comparable::AbsoluteSingularQuery(_) => { - Some(EitherQuery::Singular) - } - Comparable::Literal(_) => None, - }; - self.current = self.stack.pop(); - self.leftover = rhs; - Some(lhs) - } - LogicalExpr::Not(node) => { - self.current = Some(node.as_ref()); - Some(None) - } - LogicalExpr::And(l, r) | LogicalExpr::Or(l, r) => { - self.stack.push(l.as_ref()); - self.current = Some(r); - Some(None) - } - } - } - } - } - } } impl Segment { diff --git a/crates/rsonpath-syntax/src/parser.rs b/crates/rsonpath-syntax/src/parser.rs index d7172d3e..c789b3ae 100644 --- a/crates/rsonpath-syntax/src/parser.rs +++ b/crates/rsonpath-syntax/src/parser.rs @@ -103,6 +103,14 @@ fn parse_json_path_query(q: &str, ctx: ParseCtx) -> Result { parse_error.add_many(errs); rest } + Err(InternalParseError::RecursionLimitExceeded) => { + return Err(ParseErrorBuilder::recursion_limit_exceeded( + original_input.to_owned(), + ctx.options + .recursion_limit + .expect("recursion limit should exists when exceeded"), + )); + } Err(InternalParseError::NomError(err)) => panic!( "unexpected parser error; raw nom errors should never be produced; this is a bug\ncontext:\n{err}" ), @@ -401,6 +409,10 @@ fn logical_expr<'q>(q: &'q str, ctx: ParseCtx) -> IResult<&'q str, LogicalExpr, Or, } + let Some(ctx) = ctx.increase_nesting() else { + return Err(Err::Failure(InternalParseError::RecursionLimitExceeded)); + }; + let (rest, this_expr) = ignore_whitespace(|q| parse_single(q, ctx))(q)?; let mut loop_rest = skip_whitespace(rest); let mut final_expr = this_expr; @@ -557,15 +569,6 @@ impl FilterQuery { } fn filter_query<'q>(q: &'q str, ctx: ParseCtx) -> IResult<&'q str, FilterQuery, InternalParseError<'q>> { - let Some(ctx) = ctx.increase_nesting() else { - return fail( - SyntaxErrorKind::QueryNestingLimitExceeded(ctx.options.recursion_limit.expect("limit exists if exceeded")), - q.len(), - q.len(), - "", - ); - }; - let (rest, root_type) = alt(( value(RootSelectorType::Absolute, char('$')), value(RootSelectorType::Relative, char('@')), @@ -600,6 +603,9 @@ fn filter_query<'q>(q: &'q str, ctx: ParseCtx) -> IResult<&'q str, FilterQuery, syntax_errors.append(&mut errs); rest } + Err(InternalParseError::RecursionLimitExceeded) => { + return Err(Err::Failure(InternalParseError::RecursionLimitExceeded)); + } Err(InternalParseError::NomError(err)) => panic!( "unexpected parser error; raw nom errors should never be produced; this is a bug\ncontext:\n{err}" ), diff --git a/fuzz/fuzz_targets/query_fuzz_round_trip.rs b/fuzz/fuzz_targets/query_fuzz_round_trip.rs index fc33769d..511a4b07 100644 --- a/fuzz/fuzz_targets/query_fuzz_round_trip.rs +++ b/fuzz/fuzz_targets/query_fuzz_round_trip.rs @@ -5,11 +5,11 @@ use libfuzzer_sys::{fuzz_target, Corpus}; use rsonpath_syntax::JsonPathQuery; fuzz_target!(|data: JsonPathQuery| -> Corpus { - if data.nesting_level() > rsonpath_syntax::Parser::RECURSION_LIMIT_DEFAULT { - return Corpus::Reject; - } let str = data.to_string(); - let query = rsonpath_syntax::parse(&str).expect("should parse"); - assert_eq!(data, query, "query string: {str}"); + match rsonpath_syntax::parse(&str) { + Ok(query) => assert_eq!(data, query, "query string: {str}"), + Err(err) if err.is_nesting_limit_exceeded() => return Corpus::Reject, + Err(_) => panic!("expected parse to succeed"), + } Corpus::Keep });