Skip to content

Commit

Permalink
feat!: improve rendering with fallbacks during parsing (#14)
Browse files Browse the repository at this point in the history
In this PR we introduce fallback during parsing. This makes it more
consistent with the https://asciimath.org/ parser.

Whenever a symbol is encountered, that can't be part of an expression
(like unary, binary or grouping expressions), we parse this symbol as
either `Var::UnknownOperator` for symbols that weren't recognized by the
lexer, or as `Var::Other` for symbols that were recognized but aren't
part of any expression. These are rendered as operators (`<mo>`).

Also, parser now closes grouping expressions implicitly with a close
ignored grouping token.

The unary and binary expression also get an empty operator as a default
expression instead of not parsing until end.
This makes it possible to get a visual feedback when live rendering
during typing of expressions. For example, previously `sqrt(` would not
render anything, but `sqrt(x)` would render $\sqrt{x}$. By introducing
the changes above, the `sqrt(` now renders to $\sqrt{}$.

Changelog:

fix: simplify ungroup_map by direct conversion
fix: use more operator constructors
fix: correctly handle `norm` groupings
chore: update alemat to latest version
fix: handle dots as operators instead of idents
fix: use more pre-defined operator constructors
fix: correctly lex set minus operators
fix: improve handling of cdots etc
fix: simplify handling of groupings
fix: improve handling of matrix groupings
chore: reformat for better readability
fix: correctly parse `cdots`, `ldots` etc
feat: implicitly close grouping expressions
feat!: remove `Token::is_var` method
feat: fallback to operator when no expr recognized
test: update snapshots
fix: fallback to operator when lexing unknown symbol
fix: add fallback to Element conversion for Other keyword
fix: add fallbacks when parsing color binary expr
fix: fallback to default expression when parsing unary
fix: correctly render `and` and `or` logicals
fix: render groupings in a `mrow` element
fix: correctly lex the divide symbol `-:`
  • Loading branch information
nfejzic authored Dec 27, 2023
1 parent 427d81c commit 7d8c86a
Show file tree
Hide file tree
Showing 22 changed files with 327 additions and 156 deletions.
2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ license = "Apache-2.0"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html

[dependencies]
alemat = "0.7.0"
alemat = "0.8.0"

[dev-dependencies]
insta = "1.34.0"
Expand Down
5 changes: 3 additions & 2 deletions src/lexer/keywords/groupings.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,8 @@ generate_impl!(
"abs" => Absolute,
"floor" => Floor,
"ceil" => Ceiling,
"norm" => Norm,
"norm" => NormFn,
"||" => Norm,
prefixes:
OpenParen => "(:",
OpenBrace => "{:"
Expand Down Expand Up @@ -67,7 +68,7 @@ impl From<GrpCtxt> for Element {
Grouping::Floor => Operator::rfloor().into(),
Grouping::Ceiling if is_opening => Operator::lceiling().into(),
Grouping::Ceiling => Operator::rceiling().into(),
Grouping::Norm => Operator::from("||").into(),
Grouping::NormFn | Grouping::Norm => Operator::norm().into(),
}
}
}
Expand Down
4 changes: 2 additions & 2 deletions src/lexer/keywords/logicals.rs
Original file line number Diff line number Diff line change
Expand Up @@ -32,8 +32,8 @@ impl From<Logical> for alemat::elements::Operator {
use alemat::elements::Operator;

match value {
Logical::And => Operator::vee(),
Logical::Or => Operator::wedge(),
Logical::And => Operator::wedge(),
Logical::Or => Operator::vee(),
Logical::Not => Operator::not(),
Logical::Implies => Operator::implies(),
Logical::If => Operator::from("if"),
Expand Down
7 changes: 4 additions & 3 deletions src/lexer/keywords/operators.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ generate_impl!(
"**" | "ast" => Asterisk,
"***" | "star" => Star,
"//" => ForwardSlashLiteral,
"\\" | "backslash" | "setminus" => Backslash,
"\\\\" | "backslash" | "setminus" => Backslash,
"xx" | "times" => Times,
"-:" | "div" => Divide,
"|><" | "ltimes" => LTimes,
Expand All @@ -31,6 +31,7 @@ generate_impl!(
"uu" | "cup" => Cup,
"uuu" | "bigcup" => BigCup,
prefixes:
Minus => "-:",
Dot => "**",
Asterisk => "***",
LTimes => "|><|",
Expand All @@ -55,8 +56,8 @@ impl From<Operator> for alemat::elements::Operator {
self::Operator::Dot => Operator::dot(),
self::Operator::Asterisk => Operator::asterisk(),
self::Operator::Star => Operator::star(),
self::Operator::ForwardSlashLiteral => Operator::from("/"),
self::Operator::Backslash => Operator::from("\\"),
self::Operator::ForwardSlashLiteral => Operator::solidus(),
self::Operator::Backslash => Operator::set_minus(),
self::Operator::Times => Operator::mult(),
self::Operator::Divide => Operator::div(),
self::Operator::LTimes => Operator::lfactor(),
Expand Down
18 changes: 10 additions & 8 deletions src/lexer/keywords/others.rs
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,8 @@ generate_impl!(
"aleph" => Aleph,
":." | "therefore" => Therefore,
":'" | "because" => Because,
"|...|" | "|ldots|" => LowDots,
"|cdots|" => CenterDots,
"..." | "ldots" => LowDots,
"cdots" => CenterDots,
"vdots" => VerticalDots,
"ddots" => DiagonalDots,
"|" => VerticalBar,
Expand Down Expand Up @@ -70,7 +70,7 @@ impl From<Other> for Element {
fn from(value: Other) -> Self {
match value {
Other::Comma => Operator::from(",").into(),
Other::ForwardSlash => Operator::from("/").into(),
Other::ForwardSlash => Operator::solidus().into(),
Other::Integral => Operator::integral().into(),
Other::OIntegral => Operator::circle_integral().into(),
Other::Partial => Operator::partial_diff().into(),
Expand All @@ -81,10 +81,10 @@ impl From<Other> for Element {
Other::Aleph => Ident::aleph().into(),
Other::Therefore => Operator::therefore().into(),
Other::Because => Operator::because().into(),
Other::LowDots => Ident::from("...").into(),
Other::CenterDots => Ident::from("⋯").into(),
Other::VerticalDots => Ident::from("⋮").into(),
Other::DiagonalDots => Ident::from("⋱").into(),
Other::LowDots => Operator::from("...").into(),
Other::CenterDots => Operator::from("⋯").into(),
Other::VerticalDots => Operator::from("⋮").into(),
Other::DiagonalDots => Operator::from("⋱").into(),
Other::VerticalBar => Operator::vert_bar().into(),
Other::VerticalBars => alemat::row![
Operator::vert_bar(),
Expand Down Expand Up @@ -112,7 +112,9 @@ impl From<Other> for Element {
Other::Rational => Ident::set_rational().into(),
Other::Irrational => Ident::set_irrational().into(),
Other::Integer => Ident::set_integer().into(),
_ => unreachable!("Element cannot be constructed from {:?}", value),

// Fallback to string representation
_ => Operator::from(value.as_ref().to_string()).into(),
}
}
}
2 changes: 1 addition & 1 deletion src/lexer/keywords/relations.rs
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ impl From<Relation> for Operator {
match value {
Relation::Eq => Operator::eq(),
Relation::NotEq => Operator::not_eq(),
Relation::Define => Operator::from(":="),
Relation::Define => Operator::assign(),
Relation::LessThan => Operator::lt(),
Relation::GreaterThan => Operator::gt(),
Relation::LessEqualThan => Operator::le(),
Expand Down
12 changes: 6 additions & 6 deletions src/lexer/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -250,16 +250,16 @@ impl<'src> TokenIterator<'src> {

fn lex_variable(&self, _: usize) -> Option<(Token<'src>, usize)> {
let mut cursor = self.curr;
let mut kind = TokenKind::Variable;

let sym = self.src.get(cursor)?;

if !sym.is_letter() {
return None;
}

cursor += 1;

if sym.content == "d" {
if !sym.is_letter() {
// ascii math interprets not-recognized symbols that are not letters as operators
kind = TokenKind::UnknownOperator;
} else if sym.content == "d" {
// might be derivative
if let Some(sym) = self.src.get(cursor) {
if matches!(sym.content, "x" | "y" | "z" | "t") {
Expand All @@ -274,7 +274,7 @@ impl<'src> TokenIterator<'src> {
};

let content = Symbol::as_str(self.src.get(self.curr..cursor)?)?;
let token = Token::with_span(content, TokenKind::Variable, span);
let token = Token::with_span(content, kind, span);

Some((token, cursor))
}
Expand Down
5 changes: 5 additions & 0 deletions src/lexer/tests/fallback.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
// this module tests fallback mechanism of the lexer

use super::Snapshot;

super::test_snap!(semicolon, "a + b;");
7 changes: 4 additions & 3 deletions src/lexer/tests/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ use super::Span;

mod accents;
mod arrows;
mod fallback;
mod font_commands;
mod functions;
mod greeks;
Expand All @@ -27,6 +28,9 @@ macro_rules! test_snap {

use test_snap;

test_snap!(skip_whitespace, " alpha 24.42");
test_snap!(division, "a/b");

struct Snapshot<T>(T);

impl std::fmt::Display for Snapshot<Token<'_>> {
Expand Down Expand Up @@ -70,9 +74,6 @@ impl std::fmt::Display for Snapshot<Span> {
}
}

test_snap!(skip_whitespace, " alpha 24.42");
test_snap!(division, "a/b");

#[test]
fn perf() {
let src = "gammag gammag gammag gammag gammag ".repeat(1_000);
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
---
source: src/lexer/tests/fallback.rs
expression: "Snapshot((*input, tokens))"
---
a + b;

a
^ -> Variable at: 0 -> 1

+
^ -> Operator(Plus) at: 2 -> 3

b
^ -> Variable at: 4 -> 5

;
^ -> UnknownOperator at: 5 -> 6


Original file line number Diff line number Diff line change
@@ -1,10 +1,13 @@
---
source: src/lexer/tests/numbers.rs
expression: "Snapshot((input, tokens))"
expression: "Snapshot((*input, tokens))"
---
24.42.

24.42
^^^^^ -> Number at: 0 -> 5

.
^ -> UnknownOperator at: 5 -> 6


Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
---
source: src/lexer/tests/others.rs
expression: "Snapshot((input, tokens))"
expression: "Snapshot((*input, tokens))"
---
int oint del grad oo |...|

Expand All @@ -19,7 +19,13 @@ int
oo
^^ -> Other(Infinity) at: 18 -> 20

|...|
^^^^^ -> Other(LowDots) at: 21 -> 26
|
^ -> Other(VerticalBar) at: 21 -> 22

...
^^^ -> Other(LowDots) at: 22 -> 25

|
^ -> Other(VerticalBar) at: 25 -> 26


40 changes: 21 additions & 19 deletions src/lexer/token.rs
Original file line number Diff line number Diff line change
Expand Up @@ -38,25 +38,6 @@ impl Token<'_> {
pub fn as_str(&self) -> &str {
self.content
}

pub fn is_var(&self) -> bool {
match self.kind {
TokenKind::Function(_)
| TokenKind::Number
| TokenKind::Greek(_)
| TokenKind::Variable
| TokenKind::Arrow(_)
| TokenKind::Relation(_)
| TokenKind::Logical(_)
| TokenKind::Operator(_) => true,
TokenKind::Other(other) => !matches!(
other,
Other::Fraction | Other::Power | Other::SquareRoot | Other::Root
),

_ => false,
}
}
}

/// Kind of token identified in ascii math input.
Expand All @@ -77,6 +58,10 @@ pub(crate) enum TokenKind {
/// Standard operators, e.g. +, -, *, |>< etc.
Operator(Operator),

/// Symbols that are not letters, and aren't explicitely defined in AsciiMath grammar fall back
/// to unknown operators.
UnknownOperator,

/// Relations in maths, e.g. =, !=, <, <= etc.
Relation(Relation),

Expand All @@ -101,3 +86,20 @@ pub(crate) enum TokenKind {
#[default]
Unimplemented,
}

impl TokenKind {
pub fn is_grouping_open(&self) -> bool {
let TokenKind::Grouping(grp) = self else {
return false;
};

!matches!(
grp,
Grouping::CloseParen
| Grouping::CloseBracket
| Grouping::CloseBrace
| Grouping::RightAngled
| Grouping::CloseIgnored
)
}
}
25 changes: 21 additions & 4 deletions src/parser/binary.rs
Original file line number Diff line number Diff line change
Expand Up @@ -91,16 +91,32 @@ impl Binary {
let token = parser.iter.peek()?;
let binary_kind = BinaryKind::try_from(token.kind()).ok()?;

let start = token.span().start;
let span = token.span();
let start = span.start;

parser.iter.next(); // skip binary token

let default_expr = || {
SimpleExpr::Var(Var {
kind: VarKind::UnknownOperator(String::default()),
span,
})
};

let expr_1 = match binary_kind {
BinaryKind::Color => Box::new(parser.parse_grouping_as_str()?),
_ => Box::new(parser.parse_simple_expr()?),
BinaryKind::Color => Box::new(parser.parse_grouping_as_str().unwrap_or_else(|| {
SimpleExpr::Var(Var {
kind: VarKind::Text(String::from("black")),
span: Span {
start: span.end,
end: span.end,
},
})
})),
_ => Box::new(parser.parse_simple_expr().unwrap_or_else(default_expr)),
};

let expr_2 = Box::new(parser.parse_simple_expr()?);
let expr_2 = Box::new(parser.parse_simple_expr().unwrap_or_else(default_expr));

let end = expr_2.span().end;

Expand All @@ -118,6 +134,7 @@ impl Binary {
impl IntoElements for Binary {
fn into_elements(self) -> Elements {
let to_elements = |expr: Box<SimpleExpr>| match *expr {
SimpleExpr::Grouping(grp) if grp.is_simple_grp() => grp.ungroup_into_elements(),
SimpleExpr::Grouping(grp) => grp.into_elements(),
_ => expr.into_elements(),
};
Expand Down
Loading

0 comments on commit 7d8c86a

Please sign in to comment.