Skip to content

Commit

Permalink
make nesting limit visible in the error
Browse files Browse the repository at this point in the history
  • Loading branch information
V0ldek committed Feb 29, 2024
1 parent 7cf0da9 commit cc16ac5
Show file tree
Hide file tree
Showing 4 changed files with 86 additions and 194 deletions.
85 changes: 61 additions & 24 deletions crates/rsonpath-syntax/src/error.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,22 @@ pub(crate) struct ParseErrorBuilder {
#[derive(Debug, Error)]
pub struct ParseError {
input: String,
syntax_errors: Vec<SyntaxError>,
inner: InnerParseError,
}

impl ParseError {
/// Returns whether the error was caused by exceeding the parser's nesting limit.
#[inline]
#[must_use]
pub fn is_nesting_limit_exceeded(&self) -> bool {
matches!(self.inner, InnerParseError::RecursionLimit(_))
}
}

#[derive(Debug)]
enum InnerParseError {
Syntax(Vec<SyntaxError>),
RecursionLimit(usize),
}

impl ParseErrorBuilder {
Expand All @@ -48,7 +63,14 @@ impl ParseErrorBuilder {
pub(crate) fn build(self, str: String) -> ParseError {
ParseError {
input: str,
syntax_errors: self.syntax_errors,
inner: InnerParseError::Syntax(self.syntax_errors),
}
}

pub(crate) fn recursion_limit_exceeded(str: String, recursion_limit: usize) -> ParseError {
ParseError {
input: str,
inner: InnerParseError::RecursionLimit(recursion_limit),
}
}
}
Expand All @@ -73,22 +95,42 @@ impl ParseError {

#[inline(always)]
fn fmt_parse_error(error: &ParseError, style: &ErrorStyleImpl, f: &mut fmt::Formatter<'_>) -> fmt::Result {
let mut suggestion = Suggestion::new();
for syntax_error in &error.syntax_errors {
writeln!(
f,
"{}",
syntax_error.display(&error.input, &mut suggestion, style.clone())
)?;
}
match &error.inner {
InnerParseError::Syntax(syntax_errors) => {
let mut suggestion = Suggestion::new();
for syntax_error in syntax_errors {
writeln!(
f,
"{}",
syntax_error.display(&error.input, &mut suggestion, style.clone())
)?;
}

if let Some(suggestion) = suggestion.build(&error.input) {
writeln!(
f,
"{} did you mean `{}` ?",
style.note_prefix(&"suggestion:"),
style.suggestion(&suggestion)
)?;
if let Some(suggestion) = suggestion.build(&error.input) {
writeln!(
f,
"{} did you mean `{}` ?",
style.note_prefix(&"suggestion:"),
style.suggestion(&suggestion)
)?;
}
}
InnerParseError::RecursionLimit(limit) => {
writeln!(
f,
"{} {}",
style.error_prefix(&"error:"),
style.error_message(&"nesting level exceeded")
)?;
writeln!(f)?;
writeln!(f, " {}", error.input)?;
writeln!(
f,
"{} the parser limits nesting to {}; this applies to filter logical expressions",
style.note_prefix(&"note:"),
limit
)?;
}
}

Ok(())
Expand Down Expand Up @@ -144,7 +186,6 @@ pub(crate) enum SyntaxErrorKind {
InvalidComparable,
NonSingularQueryInComparison,
InvalidFilter,
QueryNestingLimitExceeded(usize),
}

impl SyntaxError {
Expand Down Expand Up @@ -301,8 +342,7 @@ impl SyntaxError {
| SyntaxErrorKind::InvalidComparisonOperator
| SyntaxErrorKind::InvalidFilter
| SyntaxErrorKind::MissingComparisonOperator
| SyntaxErrorKind::InvalidComparable
| SyntaxErrorKind::QueryNestingLimitExceeded(_) => suggestion.invalidate(),
| SyntaxErrorKind::InvalidComparable => suggestion.invalidate(),
}

// Generic notes.
Expand Down Expand Up @@ -414,6 +454,7 @@ impl DisplayableSyntaxErrorBuilder {
pub(crate) enum InternalParseError<'a> {
SyntaxError(SyntaxError, &'a str),
SyntaxErrors(Vec<SyntaxError>, &'a str),
RecursionLimitExceeded,
NomError(nom::error::Error<&'a str>),
}

Expand Down Expand Up @@ -705,7 +746,6 @@ impl SyntaxErrorKind {
Self::InvalidComparable => "invalid right-hand side of comparison".to_string(),
Self::NonSingularQueryInComparison => "non-singular query used in comparison".to_string(),
Self::InvalidFilter => "invalid filter expression syntax".to_string(),
Self::QueryNestingLimitExceeded(_) => "query nesting limit exceeded".to_string(),
}
}

Expand Down Expand Up @@ -746,9 +786,6 @@ impl SyntaxErrorKind {
Self::MissingComparisonOperator => "expected a comparison operator here".to_string(),
Self::InvalidComparisonOperator => "not a valid comparison operator".to_string(),
Self::InvalidFilter => "not a valid filter expression".to_string(),
Self::QueryNestingLimitExceeded(_) => {
"starting here, the filter expression is excessively deeply nested".to_string()
}
}
}
}
Expand Down
161 changes: 5 additions & 156 deletions crates/rsonpath-syntax/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -208,9 +208,10 @@ impl ParserBuilder {
/// Override the default recursion limit in a query.
/// Defaults to [Parser::RECURSION_LIMIT_DEFAULT].
///
/// JSONPath queries are inherently recursive, since the [`LogicalExpr`] in a filter
/// can test arbitrary nested JSONPath queries. Our parser implementation is recursive,
/// so an excessively nested query could overflow the stack.
/// JSONPath queries are inherently recursive, since
/// - [`LogicalExpr`] can be an arbitrarily deep tree of AND/OR operators;
/// - the [`TestExpr`] in a filter can test arbitrary nested JSONPath queries.
/// Our parser implementation is recursive, so an excessively nested query could overflow the stack.
///
/// The limit can be relaxed here, or removed entirely by passing [`None`].
///
Expand Down Expand Up @@ -350,7 +351,7 @@ impl Parser {
/// The parser defaults to this strict behavior unless configured with
/// [`ParserBuilder::allow_surrounding_whitespace`].
///
/// There is a limit on the complexity of the query measured as the depth of nested filter test queries.
/// There is a limit on the complexity of the query measured as the depth of nested filter expressions.
/// This limit defaults to [`RECURSION_LIMIT_DEFAULT`](Self::RECURSION_LIMIT_DEFAULT) and can be overridden
/// with [`ParserBuilder::set_recursion_limit`].
#[inline]
Expand Down Expand Up @@ -864,158 +865,6 @@ impl JsonPathQuery {
pub fn segments(&self) -> &[Segment] {
&self.segments
}

/// Check the nesting level of the query.
///
/// JSONPath queries are recursive, as a [`Selector::Filter`] can contain
/// a [`TestExpr`] that is an arbitrary query, which can in turn contain a filter
/// with a test expression, etc. ad nauseam.
///
/// ## Returns
/// A query with no nested test expressions has nesting level 0.
/// If it contains any test expressions, and the maximum nesting level
/// of queries in those is n, then the nesting level of the entire query is n+1.
///
/// Checking this takes time proportional to the size of the query.
///
/// ## Examples
///
/// Queries with no test filters have nesting level 0.
///
/// ```rust
/// # use rsonpath_syntax::prelude::*;
/// let query = parse_json_path_query("$[0]['name']").unwrap();
///
/// assert_eq!(query.nesting_level(), 0);
/// ```
///
/// Single-nested filters have nesting level 1.
///
/// ```rust
/// # use rsonpath_syntax::prelude::*;
/// let query = parse_json_path_query("$[0][[email protected]]").unwrap();
///
/// assert_eq!(query.nesting_level(), 1);
/// ```
///
/// ... and so on ...
///
/// ```rust
/// # use rsonpath_syntax::prelude::*;
/// let query = parse_json_path_query("$[0][?@[0][?@[0][?@]]]").unwrap();
///
/// assert_eq!(query.nesting_level(), 3);
/// ```
///
/// Singular queries can add only one level, since they allow no filter selectors.
///
/// ```rust
/// # use rsonpath_syntax::prelude::*;
/// let query = parse_json_path_query("$[0][?@[0][[email protected] == true]]").unwrap();
///
/// assert_eq!(query.nesting_level(), 2);
/// ```
#[inline]
#[must_use]
pub fn nesting_level(&self) -> usize {
let mut result = 0;
let mut nested_queries = iter_nested(self).collect::<Vec<_>>();
let mut scratch = vec![];

while !nested_queries.is_empty() {
result += 1;
scratch.extend(nested_queries.drain(..).filter_map(iter_nested_either).flatten());
std::mem::swap(&mut scratch, &mut nested_queries);
}

return result;

#[derive(Clone, Copy)]
enum EitherQuery<'a> {
Full(&'a JsonPathQuery),
Singular,
}

fn iter_nested_either(q: EitherQuery) -> Option<impl Iterator<Item = EitherQuery>> {
match q {
EitherQuery::Full(q) => Some(iter_nested(q)),
EitherQuery::Singular => None,
}
}

fn iter_nested(q: &JsonPathQuery) -> impl Iterator<Item = EitherQuery> {
q.segments
.iter()
.flat_map(|x| x.selectors().iter())
.filter_map(|s| match s {
Selector::Filter(f) => Some(f),
_ => None,
})
.flat_map(iter_queries_in_filter)
}

fn iter_queries_in_filter(f: &LogicalExpr) -> impl Iterator<Item = EitherQuery> {
return TreeWalker::new(f).flatten();

struct TreeWalker<'a> {
current: Option<&'a LogicalExpr>,
stack: Vec<&'a LogicalExpr>,
leftover: Option<EitherQuery<'a>>,
}

impl<'a> TreeWalker<'a> {
fn new(f: &'a LogicalExpr) -> Self {
Self {
current: Some(f),
stack: vec![],
leftover: None,
}
}
}

impl<'a> Iterator for TreeWalker<'a> {
type Item = Option<EitherQuery<'a>>;

fn next(&mut self) -> Option<Self::Item> {
if let Some(x) = self.leftover.take() {
return Some(Some(x));
}
match self.current? {
LogicalExpr::Test(TestExpr::Absolute(q) | TestExpr::Relative(q)) => {
self.current = self.stack.pop();
Some(Some(EitherQuery::Full(q)))
}
LogicalExpr::Comparison(cmp) => {
let lhs = match cmp.lhs() {
Comparable::RelativeSingularQuery(_) | Comparable::AbsoluteSingularQuery(_) => {
Some(EitherQuery::Singular)
}
Comparable::Literal(_) => None,
};
let rhs = match cmp.rhs() {
Comparable::RelativeSingularQuery(_) | Comparable::AbsoluteSingularQuery(_) => {
Some(EitherQuery::Singular)
}
Comparable::Literal(_) => None,
};
self.current = self.stack.pop();
self.leftover = rhs;
Some(lhs)
}
LogicalExpr::Not(node) => {
self.current = Some(node.as_ref());
Some(None)
}
LogicalExpr::And(l, r) | LogicalExpr::Or(l, r) => {
self.stack.push(l.as_ref());
self.current = Some(r);
Some(None)
}
}
}
}
}
}
}

impl Segment {
Expand Down
24 changes: 15 additions & 9 deletions crates/rsonpath-syntax/src/parser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,14 @@ fn parse_json_path_query(q: &str, ctx: ParseCtx) -> Result<JsonPathQuery> {
parse_error.add_many(errs);
rest
}
Err(InternalParseError::RecursionLimitExceeded) => {
return Err(ParseErrorBuilder::recursion_limit_exceeded(
original_input.to_owned(),
ctx.options
.recursion_limit
.expect("recursion limit should exists when exceeded"),
));
}
Err(InternalParseError::NomError(err)) => panic!(
"unexpected parser error; raw nom errors should never be produced; this is a bug\ncontext:\n{err}"
),
Expand Down Expand Up @@ -401,6 +409,10 @@ fn logical_expr<'q>(q: &'q str, ctx: ParseCtx) -> IResult<&'q str, LogicalExpr,
Or,
}

let Some(ctx) = ctx.increase_nesting() else {
return Err(Err::Failure(InternalParseError::RecursionLimitExceeded));
};

let (rest, this_expr) = ignore_whitespace(|q| parse_single(q, ctx))(q)?;
let mut loop_rest = skip_whitespace(rest);
let mut final_expr = this_expr;
Expand Down Expand Up @@ -557,15 +569,6 @@ impl FilterQuery {
}

fn filter_query<'q>(q: &'q str, ctx: ParseCtx) -> IResult<&'q str, FilterQuery, InternalParseError<'q>> {
let Some(ctx) = ctx.increase_nesting() else {
return fail(
SyntaxErrorKind::QueryNestingLimitExceeded(ctx.options.recursion_limit.expect("limit exists if exceeded")),
q.len(),
q.len(),
"",
);
};

let (rest, root_type) = alt((
value(RootSelectorType::Absolute, char('$')),
value(RootSelectorType::Relative, char('@')),
Expand Down Expand Up @@ -600,6 +603,9 @@ fn filter_query<'q>(q: &'q str, ctx: ParseCtx) -> IResult<&'q str, FilterQuery,
syntax_errors.append(&mut errs);
rest
}
Err(InternalParseError::RecursionLimitExceeded) => {
return Err(Err::Failure(InternalParseError::RecursionLimitExceeded));
}
Err(InternalParseError::NomError(err)) => panic!(
"unexpected parser error; raw nom errors should never be produced; this is a bug\ncontext:\n{err}"
),
Expand Down
10 changes: 5 additions & 5 deletions fuzz/fuzz_targets/query_fuzz_round_trip.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,11 @@ use libfuzzer_sys::{fuzz_target, Corpus};
use rsonpath_syntax::JsonPathQuery;

fuzz_target!(|data: JsonPathQuery| -> Corpus {
if data.nesting_level() > rsonpath_syntax::Parser::RECURSION_LIMIT_DEFAULT {
return Corpus::Reject;
}
let str = data.to_string();
let query = rsonpath_syntax::parse(&str).expect("should parse");
assert_eq!(data, query, "query string: {str}");
match rsonpath_syntax::parse(&str) {
Ok(query) => assert_eq!(data, query, "query string: {str}"),
Err(err) if err.is_nesting_limit_exceeded() => return Corpus::Reject,
Err(_) => panic!("expected parse to succeed"),
}
Corpus::Keep
});

0 comments on commit cc16ac5

Please sign in to comment.