From 66a36834f664e85bb07e4fdc05970653eae6ffe0 Mon Sep 17 00:00:00 2001 From: Manuel Hatzl <49341624+mhatzl@users.noreply.github.com> Date: Sun, 19 Nov 2023 18:08:34 +0100 Subject: [PATCH 1/4] fix: fix spans for ambiguous tokens --- commons/src/lexer/position.rs | 22 +++++++++++++++++++--- inline/src/element/formatting/ambiguous.rs | 6 +++--- inline/src/lib.rs | 4 +++- inline/src/parser.rs | 12 ------------ inline/src/tokenize/iterator.rs | 4 ---- parser/src/parser.rs | 12 ------------ 6 files changed, 25 insertions(+), 35 deletions(-) diff --git a/commons/src/lexer/position.rs b/commons/src/lexer/position.rs index 995d8077..5c66e3d6 100644 --- a/commons/src/lexer/position.rs +++ b/commons/src/lexer/position.rs @@ -90,6 +90,14 @@ impl AddAssign> for Position { } } +impl AddAssign for Position { + fn add_assign(&mut self, rhs: usize) { + self.col_utf8 += rhs; + self.col_utf16 += rhs; + self.col_grapheme += rhs; + } +} + impl Add for Position where Position: AddAssign, @@ -122,13 +130,21 @@ impl SubAssign for Position { impl SubAssign> for Position { fn sub_assign(&mut self, rhs: Option) { if let Some(rhs) = rhs { - self.col_utf8 += rhs.len_utf8; - self.col_utf16 += rhs.len_utf16; - self.col_grapheme += rhs.len_grapheme; + self.col_utf8 -= rhs.len_utf8; + self.col_utf16 -= rhs.len_utf16; + self.col_grapheme -= rhs.len_grapheme; } } } +impl SubAssign for Position { + fn sub_assign(&mut self, rhs: usize) { + self.col_utf8 -= rhs; + self.col_utf16 -= rhs; + self.col_grapheme -= rhs; + } +} + impl Sub for Position where Position: SubAssign, diff --git a/inline/src/element/formatting/ambiguous.rs b/inline/src/element/formatting/ambiguous.rs index f7fb0bc3..91257a33 100644 --- a/inline/src/element/formatting/ambiguous.rs +++ b/inline/src/element/formatting/ambiguous.rs @@ -144,7 +144,7 @@ fn resolve_closing<'slice, 'input>( close_token.kind, inner, None, // check for optional attributes here - open_token.start, + open_token.start + counterpart(close_token.kind).len(), // Because inner token gets closed close_token.end, false, )); @@ -271,12 +271,12 @@ fn to_inline<'input>( inner, None, inner_token.start, - inner_token.end, + end - outer_token.kind.len(), // Because the outer token is at "end" implicit_end, )], attributes, outer_token.start, - outer_token.end, + end, implicit_end, ) } else { diff --git a/inline/src/lib.rs b/inline/src/lib.rs index 5daa4e73..01149547 100644 --- a/inline/src/lib.rs +++ b/inline/src/lib.rs @@ -1,6 +1,8 @@ //! Crate for parsing Unimarkup inline elements. pub mod element; +pub mod parser; + mod tokenize; -pub mod parser; +pub use tokenize::kind::InlineTokenKind; diff --git a/inline/src/parser.rs b/inline/src/parser.rs index ea01b7a1..23c665d1 100644 --- a/inline/src/parser.rs +++ b/inline/src/parser.rs @@ -99,9 +99,6 @@ impl<'slice, 'input> InlineParser<'slice, 'input> { let mut inlines = Vec::default(); let mut format_closes = false; - #[cfg(debug_assertions)] - let mut curr_len = parser.iter.max_len(); - parser.iter.reset_peek(); 'outer: while let Some(kind) = parser.iter.peek_kind() { @@ -154,15 +151,6 @@ impl<'slice, 'input> InlineParser<'slice, 'input> { parser = updated_parser; inlines = updated_inlines; - - #[cfg(debug_assertions)] - { - assert!( - parser.iter.max_len() < curr_len, - "Parser consumed no token in iteration." - ); - curr_len = parser.iter.max_len(); - } } if !format_closes { diff --git a/inline/src/tokenize/iterator.rs b/inline/src/tokenize/iterator.rs index 500cac91..ab7a2bb7 100644 --- a/inline/src/tokenize/iterator.rs +++ b/inline/src/tokenize/iterator.rs @@ -51,10 +51,6 @@ impl<'slice, 'input> From> for TokenIterator } impl<'slice, 'input> InlineTokenIterator<'slice, 'input> { - pub fn max_len(&self) -> usize { - self.token_iter.max_len() - } - /// Resets peek to get `peek() == next()`. /// /// **Note:** Needed to reset peek index after using `peeking_next()`. diff --git a/parser/src/parser.rs b/parser/src/parser.rs index 0bbb14ba..54c7d6cd 100644 --- a/parser/src/parser.rs +++ b/parser/src/parser.rs @@ -81,9 +81,6 @@ impl<'slice, 'input> BlockParser<'slice, 'input> { pub fn parse(mut parser: Self) -> (Self, Blocks) { let mut blocks = Vec::default(); - #[cfg(debug_assertions)] - let mut curr_len = parser.iter.max_len(); - parser.iter.reset_peek(); 'outer: while let Some(kind) = parser.iter.peek_kind() { @@ -158,15 +155,6 @@ impl<'slice, 'input> BlockParser<'slice, 'input> { } } } - - #[cfg(debug_assertions)] - { - assert!( - parser.iter.max_len() < curr_len, - "Parser consumed no token in iteration." - ); - curr_len = parser.iter.max_len(); - } } // To consume tokens in end matching of peek_kind(), or consume EOI From e7ffc75cff0e44b79a86b2da70d24dea5fd31f46 Mon Sep 17 00:00:00 2001 From: Manuel Hatzl <49341624+mhatzl@users.noreply.github.com> Date: Sun, 19 Nov 2023 19:22:44 +0100 Subject: [PATCH 2/4] fix: reset peeked_cache on cache update --- inline/Cargo.toml | 1 + inline/src/element/formatting/ambiguous.rs | 4 +- inline/src/parser.rs | 6 +-- inline/src/tokenize/iterator.rs | 35 +++++++++++++--- inline/tests/mod.rs | 6 --- inline/tests/parser/mod.rs | 41 ++++++++++++++++--- inline/tests/spec/markup/bold.yml | 12 +++++- .../parser/bold/ambiguous-close.snap | 27 ++++++++++++ .../parser/bold/ambiguous-start.snap | 2 +- 9 files changed, 110 insertions(+), 24 deletions(-) create mode 100644 inline/tests/spec/snapshots/parser/bold/ambiguous-close.snap diff --git a/inline/Cargo.toml b/inline/Cargo.toml index c9f5aba9..3ef83b21 100644 --- a/inline/Cargo.toml +++ b/inline/Cargo.toml @@ -22,6 +22,7 @@ unimarkup-commons = { path = "../commons/", version = "0" } [dev-dependencies] unimarkup-commons = { path ="../commons/", version = "0", features = ["test_runner"] } +unimarkup-core = { path = "../core/", version = "0" } serde.workspace = true serde_yaml.workspace = true libtest-mimic = "0.6.1" diff --git a/inline/src/element/formatting/ambiguous.rs b/inline/src/element/formatting/ambiguous.rs index 91257a33..e67620d1 100644 --- a/inline/src/element/formatting/ambiguous.rs +++ b/inline/src/element/formatting/ambiguous.rs @@ -320,7 +320,7 @@ fn main_part(kind: InlineTokenKind) -> InlineTokenKind { InlineTokenKind::Italic | InlineTokenKind::BoldItalic => InlineTokenKind::Bold, InlineTokenKind::Underline => kind, InlineTokenKind::Subscript | InlineTokenKind::UnderlineSubscript => { - InlineTokenKind::Underline + InlineTokenKind::Subscript } _ => kind, } @@ -334,7 +334,7 @@ fn sub_part(kind: InlineTokenKind) -> InlineTokenKind { InlineTokenKind::Bold | InlineTokenKind::BoldItalic => InlineTokenKind::Italic, InlineTokenKind::Subscript => kind, InlineTokenKind::Underline | InlineTokenKind::UnderlineSubscript => { - InlineTokenKind::Subscript + InlineTokenKind::Underline } _ => kind, } diff --git a/inline/src/parser.rs b/inline/src/parser.rs index 23c665d1..742468fe 100644 --- a/inline/src/parser.rs +++ b/inline/src/parser.rs @@ -139,8 +139,8 @@ impl<'slice, 'input> InlineParser<'slice, 'input> { let success = parser.iter.rollback(checkpoint); debug_assert!( success, - "Rollback was not successful for checkpoint '{:?}'", - checkpoint + "Inline rollback was not successful at '{:?}'", + parser.iter.peek() ) } } @@ -218,7 +218,7 @@ mod test { #[test] fn dummy_for_debugging() { - let tokens = unimarkup_commons::lexer::token::lex_str("`a`"); + let tokens = unimarkup_commons::lexer::token::lex_str("**bold *+italic*** plain"); let mut inline_parser = InlineParser { iter: InlineTokenIterator::from(TokenIterator::from(&*tokens)), context: InlineContext::default(), diff --git a/inline/src/tokenize/iterator.rs b/inline/src/tokenize/iterator.rs index ab7a2bb7..3d2d57a5 100644 --- a/inline/src/tokenize/iterator.rs +++ b/inline/src/tokenize/iterator.rs @@ -95,7 +95,8 @@ impl<'slice, 'input> InlineTokenIterator<'slice, 'input> { } pub(crate) fn cache_token(&mut self, token: InlineToken<'input>) { - self.cached_token = Some(token) + self.cached_token = Some(token); + self.peeked_cache = false; } /// Marks the given format as being open. @@ -180,13 +181,21 @@ impl<'slice, 'input> InlineTokenIterator<'slice, 'input> { /// Creates a checkpoint of the current position in the uderlying [`TokenIterator`]. /// This may be used to `rollback()` to this checkoint at a later point. - pub fn checkpoint(&self) -> Checkpoint<'slice, 'input> { - self.token_iter.checkpoint() + pub fn checkpoint(&self) -> InlineCheckpoint<'slice, 'input> { + InlineCheckpoint { + iter_checkpoint: self.token_iter.checkpoint(), + cached_token: self.cached_token, + updated_prev: self.updated_prev, + peeked_cache: self.peeked_cache, + } } /// Rolls back the iterator to the given checkpoint. - pub fn rollback(&mut self, checkpoint: Checkpoint<'slice, 'input>) -> bool { - self.token_iter.rollback(checkpoint) + pub fn rollback(&mut self, checkpoint: InlineCheckpoint<'slice, 'input>) -> bool { + self.cached_token = checkpoint.cached_token; + self.updated_prev = checkpoint.updated_prev; + self.peeked_cache = checkpoint.peeked_cache; + self.token_iter.rollback(checkpoint.iter_checkpoint) } /// Skip all tokens until the main index is aligned with the current peek index. @@ -273,3 +282,19 @@ impl<'slice, 'input> PeekingNext for InlineTokenIterator<'slice, 'input> { } } } + +/// Inline checkpoint to rollback the iterator. +/// +/// **Note:** The checkpoint does not include the open formats map. +/// Element parsers must ensure that the open format map remains unchanged if an element could not be parsed. +#[derive(Debug, Clone, PartialEq, Eq)] +pub(crate) struct InlineCheckpoint<'slice, 'input> { + /// Checkpoint of the underlying [`TokenIterator`]. + iter_checkpoint: Checkpoint<'slice, 'input>, + /// Optional cached token used for splitting ambiguous tokens. + cached_token: Option>, + /// Optional token in case the previously returned token was changed after being returned by the iterator. + updated_prev: Option>, + /// Flag to mark if the cached token was viewed when peeking the next token. + peeked_cache: bool, +} diff --git a/inline/tests/mod.rs b/inline/tests/mod.rs index 935c495d..9364e615 100644 --- a/inline/tests/mod.rs +++ b/inline/tests/mod.rs @@ -1,19 +1,13 @@ -// mod lexer; mod parser; mod snapshot; -// use lexer::test_lexer_snapshots; use libtest_mimic::Arguments; use parser::test_parser_snapshots; pub(crate) use snapshot::*; fn main() { let args = Arguments::from_args(); - // let lexer_tests = test_lexer_snapshots(); let parser_tests = test_parser_snapshots(); - // let mut tests = lexer_tests; - // tests.append(&mut parser_tests); - libtest_mimic::run(&args, parser_tests).exit(); } diff --git a/inline/tests/parser/mod.rs b/inline/tests/parser/mod.rs index 1e3ef068..18238270 100644 --- a/inline/tests/parser/mod.rs +++ b/inline/tests/parser/mod.rs @@ -2,7 +2,7 @@ use std::panic; use crate::snapshot::Snapshot; use libtest_mimic::Trial; -use unimarkup_commons::test_runner::{self, snap_test_runner::SnapTestRunner}; +use unimarkup_commons::test_runner::{self, snap_test_runner::SnapTestRunner, test_file}; use unimarkup_inline::parser::InlineContext; mod snapshot; @@ -17,10 +17,12 @@ pub fn test_parser_snapshots() -> Vec { let mut test_runs = Vec::with_capacity(test_cases.len()); for case in test_cases { - let test_name = format!("{}::{}", module_path!(), case.test.name.as_str()); + let spec_test_name = format!("{}::spec::{}", module_path!(), case.test.name.as_str()); + let snap_test_name = format!("{}::snap::{}", module_path!(), case.test.name.as_str()); - let test_run = move || { - panic::catch_unwind(|| run_test_case(case)).map_err(|err| { + let cloned_case = case.clone(); + let spec_test_run = move || { + std::panic::catch_unwind(|| run_spec_test(cloned_case)).map_err(|err| { let panic_msg = err .downcast_ref::<&str>() .unwrap_or(&"Panic message not available"); @@ -29,13 +31,40 @@ pub fn test_parser_snapshots() -> Vec { }) }; - test_runs.push(Trial::test(test_name, test_run)); + let snap_test_run = move || { + panic::catch_unwind(|| run_snap_test(case)).map_err(|err| { + let panic_msg = err + .downcast_ref::<&str>() + .unwrap_or(&"Panic message not available"); + + format!("Test case panicked: {}", panic_msg).into() + }) + }; + + test_runs.push(Trial::test(spec_test_name, spec_test_run)); + test_runs.push(Trial::test(snap_test_name, snap_test_run)); } test_runs } -fn run_test_case(case: test_runner::test_file::TestCase) { +fn run_spec_test(case: test_runner::test_file::TestCase) { + test_runner::spec_test::assert_um_spec( + &case.file_name, + &case.test, + unimarkup_commons::config::Config::default(), + |test: &test_file::Test, cfg| { + let input = test.input.trim_end(); + let um = unimarkup_core::Unimarkup::parse(input, cfg); + test_file::TestOutputs { + html: Some(um.render_html().unwrap().to_string()), + um: Some(test.input.clone()), + } + }, + ); +} + +fn run_snap_test(case: test_runner::test_file::TestCase) { let tokens = unimarkup_commons::lexer::token::lex_str(&case.test.input); let runner = SnapTestRunner::with_fn(&case.test.name, &tokens, |slice| { diff --git a/inline/tests/spec/markup/bold.yml b/inline/tests/spec/markup/bold.yml index fbaea1ce..a8b8e90c 100644 --- a/inline/tests/spec/markup/bold.yml +++ b/inline/tests/spec/markup/bold.yml @@ -83,7 +83,7 @@ tests: The next ***word** is bolditalic. html: | - The next word is bold. + The next word is bolditalic. - name: ambiguous-end description: | @@ -94,3 +94,13 @@ tests: html: | The next word* is bold. + + - name: ambiguous-close + description: | + BoldItalic that's closed with an ambiguous token. + + input: | + **bold *+italic*** plain + + html: | + bold +italic plain diff --git a/inline/tests/spec/snapshots/parser/bold/ambiguous-close.snap b/inline/tests/spec/snapshots/parser/bold/ambiguous-close.snap new file mode 100644 index 00000000..7d98eeb8 --- /dev/null +++ b/inline/tests/spec/snapshots/parser/bold/ambiguous-close.snap @@ -0,0 +1,27 @@ +--- +source: inline/tests/parser/mod.rs +info: "Test 'ambiguous-close' from 'markup\\bold.yml'" +--- +Bold @ (1:1)->(1:19) ( + Plain @ (1:3)->(1:8) ( + bold + ^^^^^ + ) + Italic @ (1:8)->(1:17) ( + Plain @ (1:9)->(1:16) ( + +italic + ^^^^^^^ + ) + ) +) +Plain @ (1:19)->(1:25) ( + plain + ^^^^^^ +) + +--- +With input: + +**bold *+italic*** plain + + diff --git a/inline/tests/spec/snapshots/parser/bold/ambiguous-start.snap b/inline/tests/spec/snapshots/parser/bold/ambiguous-start.snap index b6de5705..3c813e71 100644 --- a/inline/tests/spec/snapshots/parser/bold/ambiguous-start.snap +++ b/inline/tests/spec/snapshots/parser/bold/ambiguous-start.snap @@ -7,7 +7,7 @@ Plain @ (1:1)->(1:10) ( ^^^^^^^^^ ) Italic @ (1:10)->(1:34) ( - Bold @ (1:10)->(1:19) ( + Bold @ (1:11)->(1:19) ( Plain @ (1:13)->(1:17) ( word ^^^^ From b5f16ae6ef202c2c6e76bcf6da06c6657f826cdf Mon Sep 17 00:00:00 2001 From: Manuel Hatzl <49341624+mhatzl@users.noreply.github.com> Date: Sun, 19 Nov 2023 20:26:49 +0100 Subject: [PATCH 3/4] feat: add textbox & hyperlink rendering --- inline/src/element/textbox/mod.rs | 1 + inline/src/parser.rs | 2 +- inline/tests/spec/markup/textbox.yml | 16 ++++++ .../parser/textbox/simple-textbox.snap | 17 ++++++ render/src/html/render.rs | 55 ++++++++++++++++++- render/src/html/tag.rs | 2 + render/src/render.rs | 31 +++++++++-- 7 files changed, 116 insertions(+), 8 deletions(-) create mode 100644 inline/tests/spec/markup/textbox.yml create mode 100644 inline/tests/spec/snapshots/parser/textbox/simple-textbox.snap diff --git a/inline/src/element/textbox/mod.rs b/inline/src/element/textbox/mod.rs index 3710ea7d..a5ccc6d8 100644 --- a/inline/src/element/textbox/mod.rs +++ b/inline/src/element/textbox/mod.rs @@ -87,6 +87,7 @@ pub(crate) fn parse<'slice, 'input>( } None => { scoped_parser.iter.rollback(checkpoint); + scoped_parser.iter.next(); // Consume open bracket } } diff --git a/inline/src/parser.rs b/inline/src/parser.rs index 742468fe..5f188f61 100644 --- a/inline/src/parser.rs +++ b/inline/src/parser.rs @@ -218,7 +218,7 @@ mod test { #[test] fn dummy_for_debugging() { - let tokens = unimarkup_commons::lexer::token::lex_str("**bold *+italic*** plain"); + let tokens = unimarkup_commons::lexer::token::lex_str("[Simple textbox]"); let mut inline_parser = InlineParser { iter: InlineTokenIterator::from(TokenIterator::from(&*tokens)), context: InlineContext::default(), diff --git a/inline/tests/spec/markup/textbox.yml b/inline/tests/spec/markup/textbox.yml new file mode 100644 index 00000000..af01a400 --- /dev/null +++ b/inline/tests/spec/markup/textbox.yml @@ -0,0 +1,16 @@ +# Unimarkup specification version +spec: "0.0.1" + +name: textbox +description: Contains tests for the textbox element. + +tests: + - name: simple-textbox + description: | + Parse a simple textbox. + + input: | + [Simple textbox] + + html: | + Simple textbox diff --git a/inline/tests/spec/snapshots/parser/textbox/simple-textbox.snap b/inline/tests/spec/snapshots/parser/textbox/simple-textbox.snap new file mode 100644 index 00000000..4a308774 --- /dev/null +++ b/inline/tests/spec/snapshots/parser/textbox/simple-textbox.snap @@ -0,0 +1,17 @@ +--- +source: inline/tests/parser/mod.rs +info: "Test 'simple-textbox' from 'markup\\textbox.yml'" +--- +TextBox @ (1:1)->(1:17) ( + Plain @ (1:2)->(1:16) ( + Simple textbox + ^^^^^^^^^^^^^^ + ) +) + +--- +With input: + +[Simple textbox] + + diff --git a/render/src/html/render.rs b/render/src/html/render.rs index bbdb4626..2bad88d7 100644 --- a/render/src/html/render.rs +++ b/render/src/html/render.rs @@ -2,9 +2,10 @@ use unimarkup_commons::lexer::{span::Span, symbol::SymbolKind, token::TokenKind} use unimarkup_inline::element::{ base::{EscapedNewline, EscapedPlain, EscapedWhitespace, Newline, Plain}, formatting::{ - Bold, Highlight, Italic, Overline, Quote, Strikethrough, Subscript, Superscript, Underline, - Verbatim, + Bold, Highlight, Italic, Math, Overline, Quote, Strikethrough, Subscript, Superscript, + Underline, Verbatim, }, + textbox::{hyperlink::Hyperlink, TextBox}, InlineElement, }; use unimarkup_parser::elements::indents::{BulletList, BulletListEntry}; @@ -128,6 +129,41 @@ impl Renderer for HtmlRenderer { Ok(html) } + fn render_textbox( + &mut self, + textbox: &TextBox, + context: &Context, + ) -> Result { + let inner = self.render_nested_inline(textbox.inner(), context)?; + + Ok(Html::nested( + HtmlTag::Span, + HtmlAttributes::default(), + inner, + )) + } + + fn render_hyperlink( + &mut self, + hyperlink: &Hyperlink, + context: &Context, + ) -> Result { + let inner = self.render_nested_inline(hyperlink.inner(), context)?; + let mut attributes = vec![HtmlAttribute { + name: "href".to_string(), + value: Some(hyperlink.link().to_string()), + }]; + + if let Some(link_text) = hyperlink.link_text() { + attributes.push(HtmlAttribute { + name: "title".to_string(), + value: Some(link_text.to_string()), + }) + } + + Ok(Html::nested(HtmlTag::A, HtmlAttributes(attributes), inner)) + } + fn render_bold( &mut self, bold: &Bold, @@ -255,6 +291,21 @@ impl Renderer for HtmlRenderer { Ok(html) } + fn render_inline_math( + &mut self, + math: &Math, + context: &Context, + ) -> Result { + // TODO: use proper math rendering once supported + let inner = self.render_nested_inline(math.inner(), context)?; + + Ok(Html::nested( + HtmlTag::Span, + HtmlAttributes::default(), + inner, + )) + } + fn render_plain( &mut self, plain: &Plain, diff --git a/render/src/html/tag.rs b/render/src/html/tag.rs index 69b1ad8a..308a4a25 100644 --- a/render/src/html/tag.rs +++ b/render/src/html/tag.rs @@ -29,6 +29,7 @@ pub enum HtmlTag { Br, Ul, Li, + A, } impl HtmlTag { @@ -57,6 +58,7 @@ impl HtmlTag { HtmlTag::Br => "br", HtmlTag::Ul => "ul", HtmlTag::Li => "li", + HtmlTag::A => "a", } } } diff --git a/render/src/render.rs b/render/src/render.rs index 14184bce..7368f662 100644 --- a/render/src/render.rs +++ b/render/src/render.rs @@ -4,9 +4,10 @@ use unimarkup_commons::{config::icu_locid::Locale, lexer::span::Span}; use unimarkup_inline::element::{ base::{EscapedNewline, EscapedPlain, EscapedWhitespace, Newline, Plain}, formatting::{ - Bold, Highlight, Italic, Overline, Quote, Strikethrough, Subscript, Superscript, Underline, - Verbatim, + Bold, Highlight, Italic, Math, Overline, Quote, Strikethrough, Subscript, Superscript, + Underline, Verbatim, }, + textbox::{hyperlink::Hyperlink, TextBox}, Inline, }; use unimarkup_parser::{ @@ -107,6 +108,20 @@ pub trait Renderer { //--------------------------------- INLINES --------------------------------- + /// Render a [`TextBox`] to the output format `T`. + fn render_textbox(&mut self, _textbox: &TextBox, _context: &Context) -> Result { + Err(RenderError::Unimplemented) + } + + /// Render a [`Hyperlink`] to the output format `T`. + fn render_hyperlink( + &mut self, + _hyperlink: &Hyperlink, + _context: &Context, + ) -> Result { + Err(RenderError::Unimplemented) + } + /// Render a [`Bold` formatting](unimarkup_inline::inlines::Inline) to the output format `T`. fn render_bold(&mut self, _bold: &Bold, _context: &Context) -> Result { Err(RenderError::Unimplemented) @@ -185,6 +200,11 @@ pub trait Renderer { Err(RenderError::Unimplemented) } + /// Render a [`Math`] to the output format `T`. + fn render_inline_math(&mut self, _math: &Math, _context: &Context) -> Result { + Err(RenderError::Unimplemented) + } + /// Render [`Plain` content](unimarkup_inline::inlines::Inline) to the output format `T`. fn render_plain(&mut self, _plain: &Plain, _context: &Context) -> Result { Err(RenderError::Unimplemented) @@ -330,9 +350,10 @@ pub trait Renderer { Inline::ImplicitNewline(implicit_newline) => { self.render_implicit_newline(implicit_newline, context) } - Inline::Math(_) => todo!(), - Inline::TextBox(_) => todo!(), - Inline::Hyperlink(_) => todo!(), + Inline::Math(math) => self.render_inline_math(math, context), + Inline::TextBox(textbox) => self.render_textbox(textbox, context), + Inline::Hyperlink(hyperlink) => self.render_hyperlink(hyperlink, context), + Inline::NamedSubstitution(_) => todo!(), Inline::ImplicitSubstitution(_) => todo!(), Inline::DirectUri(_) => todo!(), From 5d7b4b8a6ba9bfb869cfb7bad1d99a88d937ccb0 Mon Sep 17 00:00:00 2001 From: Manuel Hatzl <49341624+mhatzl@users.noreply.github.com> Date: Sun, 19 Nov 2023 23:24:07 +0100 Subject: [PATCH 4/4] feat: handle open formats without Vec --- inline/src/element/formatting/mod.rs | 19 +++++++- inline/src/element/formatting/scoped.rs | 4 +- inline/src/element/textbox/mod.rs | 20 ++++---- inline/src/parser.rs | 20 ++++---- inline/src/tokenize/iterator.rs | 64 +++++++++---------------- 5 files changed, 65 insertions(+), 62 deletions(-) diff --git a/inline/src/element/formatting/mod.rs b/inline/src/element/formatting/mod.rs index fa6bc79c..b0d63521 100644 --- a/inline/src/element/formatting/mod.rs +++ b/inline/src/element/formatting/mod.rs @@ -215,10 +215,25 @@ const STRIKETHROUGH_INDEX: usize = 5; const HIGHLIGHT_INDEX: usize = 6; const OVERLINE_INDEX: usize = 7; const QUOTE_INDEX: usize = 8; -pub(crate) const NR_OF_UNSCOPED_FORMATS: usize = 9; +const NR_OF_UNSCOPED_FORMATS: usize = 9; /// Type used to keep track of open formats that do not open their own scope. -pub(crate) type OpenFormatMap = [bool; NR_OF_UNSCOPED_FORMATS]; +#[derive(Debug, Default, Clone, Copy, PartialEq, Eq)] +pub(crate) struct OpenFormatMap([bool; NR_OF_UNSCOPED_FORMATS]); + +impl OpenFormatMap { + pub(crate) fn is_open(&self, index: usize) -> bool { + self.0[index] + } + + pub(crate) fn open(&mut self, index: usize) { + self.0[index] = true; + } + + pub(crate) fn close(&mut self, index: usize) { + self.0[index] = false; + } +} /// Returns the index in the open format map for the given unscoped format. pub(crate) fn map_index(kind: &InlineTokenKind) -> usize { diff --git a/inline/src/element/formatting/scoped.rs b/inline/src/element/formatting/scoped.rs index e4f78fde..6103b3ed 100644 --- a/inline/src/element/formatting/scoped.rs +++ b/inline/src/element/formatting/scoped.rs @@ -28,7 +28,7 @@ macro_rules! scoped_parser { // ignore implicits, because only escapes and logic elements are allowed in following inline verbatim let prev_context_flags = parser.context.flags; - let mut scoped_parser = + let (mut scoped_parser, outer_open_formats) = parser.nest_scoped(Some(Rc::new(|matcher: &mut dyn EndMatcher| { !matcher.prev_is_space() && matcher.consumed_matches(&[InlineTokenKind::$kind.into()]) @@ -41,7 +41,7 @@ macro_rules! scoped_parser { scoped_parser = updated_parser; let end_reached = scoped_parser.iter.end_reached(); - parser = scoped_parser.unfold(); + parser = scoped_parser.unfold(outer_open_formats); parser.context.flags = prev_context_flags; let prev_token = parser.iter.prev_token().expect( diff --git a/inline/src/element/textbox/mod.rs b/inline/src/element/textbox/mod.rs index a5ccc6d8..f9fa9373 100644 --- a/inline/src/element/textbox/mod.rs +++ b/inline/src/element/textbox/mod.rs @@ -73,9 +73,10 @@ pub(crate) fn parse<'slice, 'input>( open_token.kind ); - let mut scoped_parser = parser.nest_scoped(Some(Rc::new(|matcher: &mut dyn EndMatcher| { - matcher.consumed_matches(&[TokenKind::CloseBracket]) - }))); + let (mut scoped_parser, outer_open_formats) = + parser.nest_scoped(Some(Rc::new(|matcher: &mut dyn EndMatcher| { + matcher.consumed_matches(&[TokenKind::CloseBracket]) + }))); let checkpoint = scoped_parser.iter.checkpoint(); let (updated_parser, box_variant_opt) = parse_box_variant(scoped_parser); @@ -83,7 +84,7 @@ pub(crate) fn parse<'slice, 'input>( match box_variant_opt { Some(box_variant) => { - return (scoped_parser.unfold(), Some(box_variant)); + return (scoped_parser.unfold(outer_open_formats), Some(box_variant)); } None => { scoped_parser.iter.rollback(checkpoint); @@ -105,7 +106,7 @@ pub(crate) fn parse<'slice, 'input>( .expect("Inlines in textbox => previous token must exist.") }; let end_reached = scoped_parser.iter.end_reached(); - parser = scoped_parser.unfold(); + parser = scoped_parser.unfold(outer_open_formats); // check for `()` if end_reached && parser.iter.peek_kind() == Some(InlineTokenKind::OpenParenthesis) { @@ -113,9 +114,10 @@ pub(crate) fn parse<'slice, 'input>( .iter .next() .expect("Peeked before, so `next` must return Some."); // Consume open parenthesis - let mut link_parser = parser.nest_scoped(Some(Rc::new(|matcher: &mut dyn EndMatcher| { - matcher.consumed_matches(&[TokenKind::CloseParenthesis]) - }))); + let (mut link_parser, outer_open_formats) = + parser.nest_scoped(Some(Rc::new(|matcher: &mut dyn EndMatcher| { + matcher.consumed_matches(&[TokenKind::CloseParenthesis]) + }))); let link = link_parser .iter @@ -143,7 +145,7 @@ pub(crate) fn parse<'slice, 'input>( ) }; - parser = link_parser.unfold(); + parser = link_parser.unfold(outer_open_formats); return ( parser, diff --git a/inline/src/parser.rs b/inline/src/parser.rs index 5f188f61..da8b3a4a 100644 --- a/inline/src/parser.rs +++ b/inline/src/parser.rs @@ -3,7 +3,7 @@ use unimarkup_commons::lexer::token::iterator::{IteratorEndFn, IteratorPrefixFn, TokenIterator}; use crate::{ - element::Inline, + element::{formatting::OpenFormatMap, Inline}, tokenize::{iterator::InlineTokenIterator, kind::InlineTokenKind}, }; @@ -32,7 +32,7 @@ pub fn parse_inlines<'slice, 'input>( end_reached: updated_parser.iter.end_reached(), prefix_mismatch: updated_parser.iter.prefix_mismatch(), }; - inline_parser = updated_parser.unfold(); + inline_parser = updated_parser.unfold(OpenFormatMap::default()); ( inline_parser.iter.into(), @@ -162,15 +162,19 @@ impl<'slice, 'input> InlineParser<'slice, 'input> { } /// Create an inline parser that has this parser as parent. - pub fn nest_scoped(mut self, end_match: Option) -> Self { - self.iter = self.iter.nest_scoped(end_match); - self + /// Returns the nested parser, and the [`OpenFormatMap`] of the outer scope. + /// This [`OpenFormatMap`] must be used when calling `unfold()` to get correct inline formatting. + pub fn nest_scoped(mut self, end_match: Option) -> (Self, OpenFormatMap) { + let (scoped_iter, outer_open_formats) = self.iter.nest_scoped(end_match); + self.iter = scoped_iter; + + (self, outer_open_formats) } /// Returns the parent parser if this parser is nested. - /// Otherwise, self is returned unchanged. - pub fn unfold(mut self) -> Self { - self.iter = self.iter.unfold(); + /// Overrides the internal [`OpenFormatMap`] with the given one. + pub fn unfold(mut self, outer_open_formats: OpenFormatMap) -> Self { + self.iter = self.iter.unfold(outer_open_formats); self } } diff --git a/inline/src/tokenize/iterator.rs b/inline/src/tokenize/iterator.rs index 3d2d57a5..a70d007e 100644 --- a/inline/src/tokenize/iterator.rs +++ b/inline/src/tokenize/iterator.rs @@ -4,9 +4,7 @@ use unimarkup_commons::lexer::token::iterator::{ Checkpoint, IteratorEndFn, PeekingNext, TokenIterator, }; -use crate::element::formatting::{ - ambiguous::is_ambiguous, map_index, OpenFormatMap, NR_OF_UNSCOPED_FORMATS, -}; +use crate::element::formatting::{ambiguous::is_ambiguous, map_index, OpenFormatMap}; use super::{kind::InlineTokenKind, InlineToken}; @@ -28,8 +26,8 @@ pub(crate) struct InlineTokenIterator<'slice, 'input> { updated_prev: Option>, /// Flag to mark if the cached token was viewed when peeking the next token. peeked_cache: bool, - /// Flags for open formats per scope - open_formats: Vec, + /// Flags for open formats. + open_formats: OpenFormatMap, } impl<'slice, 'input> From> for InlineTokenIterator<'slice, 'input> { @@ -39,7 +37,7 @@ impl<'slice, 'input> From> for InlineTokenIterator cached_token: None, updated_prev: None, peeked_cache: false, - open_formats: vec![[false; NR_OF_UNSCOPED_FORMATS]], + open_formats: OpenFormatMap::default(), } } } @@ -101,16 +99,12 @@ impl<'slice, 'input> InlineTokenIterator<'slice, 'input> { /// Marks the given format as being open. pub(crate) fn open_format(&mut self, format: &InlineTokenKind) { - self.open_formats - .last_mut() - .expect("At least one open format map always exists.")[map_index(format)] = true; + self.open_formats.open(map_index(format)); } /// Removes the given format from the open format map. pub(crate) fn close_format(&mut self, format: &InlineTokenKind) { - self.open_formats - .last_mut() - .expect("At least one open format map always exists.")[map_index(format)] = false; + self.open_formats.close(map_index(format)); } /// Returns `true` if the given format would close given the current iterator state. @@ -129,31 +123,16 @@ impl<'slice, 'input> InlineTokenIterator<'slice, 'input> { let ambiguous_open = (format == InlineTokenKind::BoldItalic && (self .open_formats - .last() - .expect("At least one open format map always exists.") - [map_index(&InlineTokenKind::Italic)] - || self - .open_formats - .last() - .expect("At least one open format map always exists.") - [map_index(&InlineTokenKind::Bold)])) + .is_open(map_index(&InlineTokenKind::Italic)) + || self.open_formats.is_open(map_index(&InlineTokenKind::Bold)))) || (format == InlineTokenKind::UnderlineSubscript && (self .open_formats - .last() - .expect("At least one open format map always exists.") - [map_index(&InlineTokenKind::Underline)] + .is_open(map_index(&InlineTokenKind::Underline)) || self .open_formats - .last() - .expect("At least one open format map always exists.") - [map_index(&InlineTokenKind::Subscript)])); - ambiguous_open - || (!is_ambiguous(format) - && self - .open_formats - .last() - .expect("At least one open format map always exists.")[map_index(&format)]) + .is_open(map_index(&InlineTokenKind::Subscript)))); + ambiguous_open || (!is_ambiguous(format) && self.open_formats.is_open(map_index(&format))) } /// Nests this iterator, by creating a new iterator that has this iterator set as parent. @@ -162,19 +141,18 @@ impl<'slice, 'input> InlineTokenIterator<'slice, 'input> { /// # Arguments /// /// * `end_match` ... Optional matching function used to indicate the end of the created iterator - pub fn nest_scoped(mut self, end_match: Option) -> Self { + pub fn nest_scoped(mut self, end_match: Option) -> (Self, OpenFormatMap) { + let outer_open_formats = self.open_formats; + self.token_iter = self.token_iter.nest_scoped(None, end_match); - self.open_formats.push([false; NR_OF_UNSCOPED_FORMATS]); - self + self.open_formats = OpenFormatMap::default(); + + (self, outer_open_formats) } /// Returns the parent of this iterator if a parent exists, or leaves this iterator unchanged. - pub fn unfold(mut self) -> Self { - // Inline root is not scoped, so at least one open format map always remains - if self.token_iter.is_scoped() { - self.open_formats.pop(); - } - + pub fn unfold(mut self, outer_open_formats: OpenFormatMap) -> Self { + self.open_formats = outer_open_formats; self.token_iter = self.token_iter.into_inner(); self } @@ -187,6 +165,7 @@ impl<'slice, 'input> InlineTokenIterator<'slice, 'input> { cached_token: self.cached_token, updated_prev: self.updated_prev, peeked_cache: self.peeked_cache, + open_formats: self.open_formats, } } @@ -195,6 +174,7 @@ impl<'slice, 'input> InlineTokenIterator<'slice, 'input> { self.cached_token = checkpoint.cached_token; self.updated_prev = checkpoint.updated_prev; self.peeked_cache = checkpoint.peeked_cache; + self.open_formats = checkpoint.open_formats; self.token_iter.rollback(checkpoint.iter_checkpoint) } @@ -297,4 +277,6 @@ pub(crate) struct InlineCheckpoint<'slice, 'input> { updated_prev: Option>, /// Flag to mark if the cached token was viewed when peeking the next token. peeked_cache: bool, + /// Flags for open formats. + open_formats: OpenFormatMap, }