From 0b9aa8007bd299eced19477cd01b782adc3a00ba Mon Sep 17 00:00:00 2001 From: Zack Slayton Date: Tue, 7 Jan 2025 13:37:08 -0500 Subject: [PATCH] cleanup --- src/lazy/encoding.rs | 4 ++ src/lazy/text/buffer.rs | 88 +++-------------------------------------- 2 files changed, 9 insertions(+), 83 deletions(-) diff --git a/src/lazy/encoding.rs b/src/lazy/encoding.rs index ae6e6232..3d9c66ba 100644 --- a/src/lazy/encoding.rs +++ b/src/lazy/encoding.rs @@ -257,8 +257,10 @@ pub trait TextEncoding<'top>: encoded_text_value: EncodedTextValue<'top, Self>, ) -> Self::Value<'top>; + /// Matches an expression that appears in value position. fn value_expr_matcher() -> impl IonParser<'top, LazyRawValueExpr<'top, Self>>; + /// Matches an expression that appears in struct field position. Does NOT match trailing commas. fn field_expr_matcher() -> impl IonParser<'top, LazyRawFieldExpr<'top, Self>>; fn list_matcher() -> impl IonParser<'top, EncodedTextValue<'top, Self>> { @@ -282,6 +284,8 @@ pub trait TextEncoding<'top>: .map(|nested_expr_cache| EncodedTextValue::new(MatchedValue::Struct(nested_expr_cache))) } + /// Logic common to parsing all container types. + /// Caches all subexpressions in the bump allocator for future reference. fn container_matcher( label: &'static str, mut opening_token: &str, diff --git a/src/lazy/text/buffer.rs b/src/lazy/text/buffer.rs index 91351f44..403f5525 100644 --- a/src/lazy/text/buffer.rs +++ b/src/lazy/text/buffer.rs @@ -1,4 +1,3 @@ -#![deny(dead_code)] use std::fmt::{Debug, Formatter}; use std::ops::Range; use std::str::FromStr; @@ -385,31 +384,6 @@ impl<'top> TextBuffer<'top> { .parse_next(self) } - /// Matches an optional annotations sequence and a value, including operators. - pub fn match_sexp_item(&mut self) -> IonParseResult<'top, Option>> { - let (maybe_sexp_value, matched_input) = whitespace_and_then(alt(( - ")".value(None), - ( - opt(Self::match_annotations), - // We need the s-expression parser to recognize the input `--3` as the operator `--` and the - // int `3` while recognizing the input `-3` as the int `-3`. If `match_operator` runs before - // `match_value`, it will consume the sign (`-`) of negative number values, treating - // `-3` as an operator (`-`) and an int (`3`). Thus, we run `match_value` first. - whitespace_and_then(alt((Self::match_value::, Self::match_operator))), - ) - .map(Some), - ))) - .with_taken() - .parse_next(self)?; - - let Some((maybe_annotations, value)) = maybe_sexp_value else { - return Ok(None); - }; - Ok(Some( - matched_input.apply_annotations::(maybe_annotations, value), - )) - } - /// Matches either: /// * A macro invocation /// * An optional annotations sequence and a value @@ -598,63 +572,6 @@ impl<'top> TextBuffer<'top> { .parse_next(self) } - /// Matches a single value in a list OR the end of the list, allowing for leading whitespace - /// and comments in either case. - /// - /// If a value is found, returns `Ok(Some(value))`. If the end of the list is found, returns - /// `Ok(None)`. - pub fn match_list_item(&mut self) -> IonParseResult<'top, Option>> { - preceded( - // Some amount of whitespace/comments... - Self::match_optional_comments_and_whitespace, - // ...followed by either the end of the list... - alt(( - "]".value(None), - // ...or a value... - terminated( - Self::match_annotated_value::.map(Some), - // ...followed by a comma or end-of-list - Self::match_delimiter_after_list_value, - ), - )), - ) - .parse_next(self) - } - - /// Matches either: - /// * An e-expression (i.e. macro invocation) - /// * An optional annotations sequence and a value - pub fn match_list_item_1_1( - &mut self, - ) -> IonParseResult<'top, Option>> { - whitespace_and_then(alt(( - terminated( - Self::match_e_expression, - Self::match_delimiter_after_list_value, - ) - .map(|matched| Some(RawValueExpr::EExp(matched))), - "]".value(None), - // .map(|maybe_matched| maybe_matched.map(RawValueExpr::ValueLiteral)), - terminated( - Self::match_annotated_value::.map(Some), - // ...followed by a comma or end-of-list - Self::match_delimiter_after_list_value, - ) - .map(|maybe_matched| maybe_matched.map(RawValueExpr::ValueLiteral)), - ))) - .parse_next(self) - } - - /// Matches syntax that is expected to follow a value in a list: any amount of whitespace and/or - /// comments followed by either a comma (consumed) or an end-of-list `]` (not consumed). - fn match_delimiter_after_list_value(&mut self) -> IonMatchResult<'top> { - preceded( - Self::match_optional_comments_and_whitespace, - alt((",", peek("]"))), - ) - .parse_next(self) - } - pub fn match_e_expression_arg_group( &mut self, parameter: &'top Parameter, @@ -902,6 +819,8 @@ impl<'top> TextBuffer<'top> { } } + + pub fn match_empty_arg_group( &mut self, parameter: &'top Parameter, @@ -1733,6 +1652,9 @@ impl<'top> TextBuffer<'top> { pub fn full_match_timestamp<'t>( input: &mut TextBuffer<'t>, ) -> IonParseResult<'t, MatchedTimestamp> { + // TODO: As-is, matching common timestamps (those with greater than second precision) + // is slow because the parser tries each shorter arrangement in turn. We should + // rewrite this to use a single path that can accept any precision. alt(( TextBuffer::match_timestamp_y, TextBuffer::match_timestamp_ym,