From 3c97df7199c0db2c5f9336daf31a7ea0030fafd2 Mon Sep 17 00:00:00 2001 From: Vincenzo Mantova Date: Sun, 22 Oct 2023 17:00:35 +0100 Subject: [PATCH 1/2] add U+FE00 variant selector to \emptyset --- lib/LaTeXML/Engine/TeX_Fonts.pool.ltxml | 2 +- lib/LaTeXML/Engine/plain.pool.ltxml | 2 +- t/fonts/abxtest.xml | 2 +- t/fonts/omencodings.xml | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/lib/LaTeXML/Engine/TeX_Fonts.pool.ltxml b/lib/LaTeXML/Engine/TeX_Fonts.pool.ltxml index 75aad44ad9..30d74d350e 100644 --- a/lib/LaTeXML/Engine/TeX_Fonts.pool.ltxml +++ b/lib/LaTeXML/Engine/TeX_Fonts.pool.ltxml @@ -241,7 +241,7 @@ DeclareFontMap('OMS', # prime infty in ni bigtri.up bigtri.dn slash mapsto "\x{2032}", "\x{221E}", "\x{2208}", "\x{220B}", "\x{25B3}", "\x{25BD}", "/", "\x{21A6}", # forall exists not emptyset Re Im top bot - "\x{2200}", "\x{2203}", UTF(0xAC), "\x{2205}", "\x{211C}", "\x{2111}", "\x{22A4}", "\x{22A5}", + "\x{2200}", "\x{2203}", UTF(0xAC), "\x{2205}\x{FE00}", "\x{211C}", "\x{2111}", "\x{22A4}", "\x{22A5}", # aleph cal A cal B cal C cal D cal E cal F cal G "\x{2135}", "\x{1D49C}", "\x{212C}", "\x{1D49E}", "\x{1D49F}", "\x{2130}", "\x{2131}", "\x{1D4A2}", # cal H cal I cal J cal K cal L cal M cal N cal O diff --git a/lib/LaTeXML/Engine/plain.pool.ltxml b/lib/LaTeXML/Engine/plain.pool.ltxml index 36d4130889..291f2ad1a0 100644 --- a/lib/LaTeXML/Engine/plain.pool.ltxml +++ b/lib/LaTeXML/Engine/plain.pool.ltxml @@ -812,7 +812,7 @@ DefMathI('\Im', undef, "\x{2111}", role => 'OPFUNCTION', meaning => 'imagi DefMathI('\mho', undef, "\x{2127}"); DefMathI('\prime', undef, "\x{2032}", role => 'SUPOP', locked => 1); -DefMathI('\emptyset', undef, "\x{2205}", role => 'ID', meaning => 'empty-set'); +DefMathI('\emptyset', undef, "\x{2205}\x{FE00}", role => 'ID', meaning => 'empty-set'); DefMathI('\nabla', undef, "\x{2207}", role => 'OPERATOR'); DefMathI('\surd', undef, "\x{221A}", role => 'OPERATOR', meaning => 'square-root'); DefMathI('\top', undef, "\x{22A4}", role => 'ADDOP', meaning => 'top'); diff --git a/t/fonts/abxtest.xml b/t/fonts/abxtest.xml index 0b685145ef..a5d10dfb27 100644 --- a/t/fonts/abxtest.xml +++ b/t/fonts/abxtest.xml @@ -1483,7 +1483,7 @@ - + ∅︀ \emptyset diff --git a/t/fonts/omencodings.xml b/t/fonts/omencodings.xml index bcbea9ec0e..d217a8dbb4 100644 --- a/t/fonts/omencodings.xml +++ b/t/fonts/omencodings.xml @@ -295,7 +295,7 @@ ∀ ∃ ¬ - ∅ + ∅︀ ℜ ℑ ⊤ From 548685230c380879c5e00376ac7f0a919dd267a6 Mon Sep 17 00:00:00 2001 From: Vincenzo Mantova Date: Sun, 22 Oct 2023 17:00:35 +0100 Subject: [PATCH 2/2] add U+FE00, U+FE01 variant selectors for respectively caligraphic and script styles --- lib/LaTeXML/Post/MathML.pm | 3 ++ lib/LaTeXML/Util/Unicode.pm | 98 +++++++++++++++++++++++++++------- t/daemon/formats/mixedmath.xml | 2 +- 3 files changed, 83 insertions(+), 20 deletions(-) diff --git a/lib/LaTeXML/Post/MathML.pm b/lib/LaTeXML/Post/MathML.pm index 3c4403e9c7..78481f1683 100644 --- a/lib/LaTeXML/Post/MathML.pm +++ b/lib/LaTeXML/Post/MathML.pm @@ -651,6 +651,7 @@ my %safe_stretchy = map { $_ => 1; } "(", ")", "[", "]", "{", "}"; # Remaps some mathvariants to a simpler subset of Unicode my %plane1hackable = ( # CONSTANT script => 'script', + caligraphic => 'caligraphic', 'bold-script' => 'script', fraktur => 'fraktur', 'bold-fraktur' => 'fraktur', @@ -737,6 +738,8 @@ sub stylizeContent { $font = $variant = $color = $bgcolor = $opacity = undef; } # Needs no viz. styling attributes elsif (!$font) { } elsif ($font =~ /caligraphic/) { + # caligraphic is not a true mathvariant (see Unicode.pm), reset to script + $variant =~ s/caligraphic/script/; # Note that this is unlikely to have effect when plane1 chars are used! $class = ($class ? $class . ' ' : '') . 'ltx_font_mathcaligraphic'; } elsif ($font =~ /script/) { diff --git a/lib/LaTeXML/Util/Unicode.pm b/lib/LaTeXML/Util/Unicode.pm index be7b305407..4e747d01aa 100644 --- a/lib/LaTeXML/Util/Unicode.pm +++ b/lib/LaTeXML/Util/Unicode.pm @@ -58,10 +58,69 @@ my %unicode_map = ( # CONSTANT 'sans-serif-italic' => { makePlane1Map(0x1D608, undef, undef, undef) }, 'sans-serif-bold-italic' => { makePlane1Map(0x1D63C, 0x1D790, 0x1D7AA, undef) }, 'monospace' => { makePlane1Map(0x1D670, undef, undef, 0x1D7F6) }, - 'script' => { makePlane1Map(0x1D49C, undef, undef, undef), - B => "\x{212C}", E => "\x{2130}", F => "\x{2131}", H => "\x{210B}", I => "\x{2110}", - L => "\x{2112}", M => "\x{2133}", R => "\x{211B}", - e => "\x{212F}", g => "\x{210A}", o => "\x{2134}" }, + # since Unicode 14, capital script characters admit two variant selectors + # - U+FE00 for chancery style (caligraphic) + # - U+FE01 for roundhand style (script) + 'script' => { makePlane1Map(0x1D49C, undef, undef, undef), + A => "\x{1D49C}\x{FE01}", + B => "\x{212C}\x{FE01}", + C => "\x{1D49E}\x{FE01}", + D => "\x{1D49F}\x{FE01}", + E => "\x{2130}\x{FE01}", + F => "\x{2131}\x{FE01}", + G => "\x{1D4A2}\x{FE01}", + H => "\x{210B}\x{FE01}", + I => "\x{2110}\x{FE01}", + J => "\x{1D4A5}\x{FE01}", + K => "\x{1D4A6}\x{FE01}", + L => "\x{2112}\x{FE01}", + M => "\x{2133}\x{FE01}", + N => "\x{1D4A9}\x{FE01}", + O => "\x{1D4AA}\x{FE01}", + P => "\x{1D4AB}\x{FE01}", + Q => "\x{1D4AC}\x{FE01}", + R => "\x{211B}\x{FE01}", + S => "\x{1D4AE}\x{FE01}", + T => "\x{1D4AF}\x{FE01}", + U => "\x{1D4B0}\x{FE01}", + V => "\x{1D4B1}\x{FE01}", + W => "\x{1D4B2}\x{FE01}", + X => "\x{1D4B3}\x{FE01}", + Y => "\x{1D4B4}\x{FE01}", + Z => "\x{1D4B5}\x{FE01}", + e => "\x{212F}", + g => "\x{210A}", + o => "\x{2134}" }, + 'caligraphic' => { makePlane1Map(0x1D49C, undef, undef, undef), + A => "\x{1D49C}\x{FE00}", + B => "\x{212C}\x{FE00}", + C => "\x{1D49E}\x{FE00}", + D => "\x{1D49F}\x{FE00}", + E => "\x{2130}\x{FE00}", + F => "\x{2131}\x{FE00}", + G => "\x{1D4A2}\x{FE00}", + H => "\x{210B}\x{FE00}", + I => "\x{2110}\x{FE00}", + J => "\x{1D4A5}\x{FE00}", + K => "\x{1D4A6}\x{FE00}", + L => "\x{2112}\x{FE00}", + M => "\x{2133}\x{FE00}", + N => "\x{1D4A9}\x{FE00}", + O => "\x{1D4AA}\x{FE00}", + P => "\x{1D4AB}\x{FE00}", + Q => "\x{1D4AC}\x{FE00}", + R => "\x{211B}\x{FE00}", + S => "\x{1D4AE}\x{FE00}", + T => "\x{1D4AF}\x{FE00}", + U => "\x{1D4B0}\x{FE00}", + V => "\x{1D4B1}\x{FE00}", + W => "\x{1D4B2}\x{FE00}", + X => "\x{1D4B3}\x{FE00}", + Y => "\x{1D4B4}\x{FE00}", + Z => "\x{1D4B5}\x{FE00}", + e => "\x{212F}", + g => "\x{210A}", + o => "\x{2134}" }, 'bold-script' => { makePlane1Map(0x1D4D0, undef, undef, undef) }, 'fraktur' => { makePlane1Map(0x1D504, undef, undef, undef), C => "\x{212D}", H => "\x{210C}", I => "\x{2111}", R => "\x{211C}", Z => "\x{2128}" }, @@ -217,25 +276,25 @@ my %mathvariants = ( # CONSTANT 'bold italic' => 'bold-italic', 'doublestruck' => 'double-struck', 'blackboard' => 'double-struck', - 'blackboard bold' => 'double-struck', # all collapse - 'blackboard upright' => 'double-struck', # all collapse - 'blackboard bold upright' => 'double-struck', # all collapse + 'blackboard bold' => 'double-struck', # all collapse + 'blackboard upright' => 'double-struck', # all collapse + 'blackboard bold upright' => 'double-struck', # all collapse 'fraktur' => 'fraktur', - 'fraktur italic' => 'fraktur', # all collapse + 'fraktur italic' => 'fraktur', # all collapse 'fraktur bold' => 'bold-fraktur', 'script' => 'script', - 'script italic' => 'script', # all collapse + 'script italic' => 'script', # all collapse 'script bold' => 'bold-script', - 'caligraphic' => 'script', # NOTE: TeX caligraphic is NOT script! - 'caligraphic bold' => 'bold-script', # collapse - 'sansserif' => 'sans-serif', - 'sansserif bold' => 'bold-sans-serif', - 'sansserif italic' => 'sans-serif-italic', - 'sansserif bold italic' => 'sans-serif-bold-italic', - 'typewriter' => 'monospace', - 'typewriter bold' => 'monospace', - 'typewriter italic' => 'monospace', - 'typewriter bold italic' => 'monospace', + 'caligraphic' => 'caligraphic', # not a true mathvariant, supported via Unicode variation sequences + 'caligraphic bold' => 'bold-script', # collapse (NOTE: TeX caligraphic is NOT script!) + 'sansserif' => 'sans-serif', + 'sansserif bold' => 'bold-sans-serif', + 'sansserif italic' => 'sans-serif-italic', + 'sansserif bold italic' => 'sans-serif-bold-italic', + 'typewriter' => 'monospace', + 'typewriter bold' => 'monospace', + 'typewriter italic' => 'monospace', + 'typewriter bold italic' => 'monospace', ); # The font differences (from the containing context) have been deciphered @@ -310,6 +369,7 @@ C, C, C, C