diff --git a/lib/LaTeXML/Core/Definition.pm b/lib/LaTeXML/Core/Definition.pm index 2e2008b74..b426502b4 100644 --- a/lib/LaTeXML/Core/Definition.pm +++ b/lib/LaTeXML/Core/Definition.pm @@ -37,15 +37,15 @@ sub getCS { my ($self) = @_; return $$self{cs}; } +# Note: This returns the alias's CSName, if any. sub getCSName { my ($self) = @_; - return (defined $$self{alias} ? $$self{alias} : $$self{cs}->getCSName); } + return ($$self{alias} || $$self{cs})->getCSName; } -# NOTE: Need to clean up alias; really should already be Token (or Tokens?) -# and is not always a CS! +# NOTE: alias should be Token (or Tokens?) sub getCSorAlias { my ($self) = @_; - return (defined $$self{alias} ? T_CS($$self{alias}) : $$self{cs}); } + return $$self{alias} || $$self{cs}; } sub isExpandable { return 0; } @@ -109,7 +109,7 @@ sub stringify { my ($self) = @_; my $type = ref $self; $type =~ s/^LaTeXML:://; - my $name = ($$self{alias} || $$self{cs}->getCSName); + my $name = $self->getCSName; return $type . '[' . ($$self{parameters} ? $name . ' ' . Stringify($$self{parameters}) : $name) . ']'; } diff --git a/lib/LaTeXML/Core/Definition/Constructor.pm b/lib/LaTeXML/Core/Definition/Constructor.pm index 85ac60ec7..e2453bba3 100644 --- a/lib/LaTeXML/Core/Definition/Constructor.pm +++ b/lib/LaTeXML/Core/Definition/Constructor.pm @@ -60,10 +60,6 @@ sub getSizer { my ($self) = @_; return $$self{sizer}; } -sub getAlias { - my ($self) = @_; - return $$self{alias}; } - sub getNumArgs { my ($self) = @_; return $$self{nargs} if defined $$self{nargs}; diff --git a/lib/LaTeXML/Core/Definition/FontDef.pm b/lib/LaTeXML/Core/Definition/FontDef.pm index 23a7dc062..5ee7e3d7e 100644 --- a/lib/LaTeXML/Core/Definition/FontDef.pm +++ b/lib/LaTeXML/Core/Definition/FontDef.pm @@ -27,8 +27,7 @@ use base qw(LaTeXML::Core::Definition::Primitive); sub new { my ($class, $cs, $fontid, %traits) = @_; return bless { cs => $cs, parameters => undef, - fontID => $fontid, - locator => $STATE->getStomach->getGullet->getMouth->getLocator, + fontID => $fontid, %traits }, $class; } # Return the "font info" associated with the (TeX) font that this command selects (See \font) diff --git a/lib/LaTeXML/Core/Definition/Primitive.pm b/lib/LaTeXML/Core/Definition/Primitive.pm index 06f386388..a265fc895 100644 --- a/lib/LaTeXML/Core/Definition/Primitive.pm +++ b/lib/LaTeXML/Core/Definition/Primitive.pm @@ -64,10 +64,8 @@ sub invoke { my $replacement = $$self{replacement}; if (!ref $replacement) { - my $alias = $$self{alias}; - $alias = T_CS($alias) if $alias && !ref $alias; push(@result, Box($replacement, undef, undef, - Tokens($alias || $$self{cs}, ($parms ? $parms->revertArguments(@args) : ())), + Tokens($self->getCSorAlias, ($parms ? $parms->revertArguments(@args) : ())), (defined $replacement ? () : (isEmpty => 1)))); } else { push(@result, &{ $$self{replacement} }($stomach, @args)); } diff --git a/lib/LaTeXML/Core/Document.pm b/lib/LaTeXML/Core/Document.pm index 637945909..97aa4b54a 100644 --- a/lib/LaTeXML/Core/Document.pm +++ b/lib/LaTeXML/Core/Document.pm @@ -20,6 +20,7 @@ use LaTeXML::Common::Error; use LaTeXML::Common::XML; use LaTeXML::Util::Radix; use Unicode::Normalize; +use Data::Dumper; use Scalar::Util qw(blessed); use base qw(LaTeXML::Common::Object); @@ -1376,41 +1377,46 @@ sub makeError { # [xml:id and namespaced attributes are always allowed] sub setAttribute { my ($self, $node, $key, $value) = @_; - if (ref $value) { - if ($key eq '_box') { - return $self->setNodeBox($node, $value); } - elsif ($key eq '_font') { - return $self->setNodeFont($node, $value); } - elsif ((!blessed($value)) || !$value->can('toAttribute')) { - Warn('unexpected', (ref $value), $self, - "Don't know how to encode $value as an attribute value"); + my $model = $$self{model}; + ## First, a couple of special case internal attributes + if ($key eq '_box') { + return $self->setNodeBox($node, $value); } + elsif ($key eq '_font') { + return $self->setNodeFont($node, $value); } + ## Next, verify the attribute allowed by Model, else internal or namespaced + elsif (($key =~ /:/) || ($key =~ /^_/) + || $model->canHaveAttribute($model->getNodeQName($node), $key)) { + ## OK, we're going to use the value, so make sure it's a string. + if (ref $value) { + if ((!blessed($value)) || !$value->can('toAttribute')) { + Warn('unexpected', (ref $value), $self, + "While setting attribute $key, Don't know how to encode $value", + Dumper($value)); + return; } + else { + $value = $value->toAttribute; } } + if ((!defined $value) || ($value eq '')) { # Useless value, after all return; } - else { - $value = $value->toAttribute; } } - if ((defined $value) && ($value ne '')) { # Skip if `empty'; but 0 is OK! - if ($key eq 'xml:id') { # If it's an ID attribute - $value = recordID($self, $value, $node); # Do id book keeping - $node->setAttributeNS($LaTeXML::Common::XML::XML_NS, 'id', $value); } # and bypass all ns stuff - elsif ($key !~ /:/) { # No colon; no namespace (the common case!) - # Ignore attributes not allowed by the model, - # but accept "internal" attributes. - my $model = $$self{model}; - my $qname = $model->getNodeQName($node); - if ($model->canHaveAttribute($qname, $key) || $key =~ /^_/) { - $node->setAttribute($key => $value); } } - else { # Accept any namespaced attributes - my ($ns, $name) = $$self{model}->decodeQName($key); - if ($ns) { # If namespaced attribute (must have prefix! - my $prefix = $node->lookupNamespacePrefix($ns); # namespace already declared? - if (!$prefix) { # if namespace not already declared - $prefix = $$self{model}->getDocumentNamespacePrefix($ns, 1); # get the prefix to use - getDocument($self)->documentElement->setNamespace($ns, $prefix, 0); } # and declare it - if ($prefix eq '#default') { # Probably shouldn't happen...? + if ($key eq 'xml:id') { # If it's an ID attribute + $value = recordID($self, $value, $node); # Do id book keeping + ## and bypass all ns stuff + $node->setAttributeNS($LaTeXML::Common::XML::XML_NS, 'id', $value); } + elsif ($key =~ /:/) { # ANY namespaced attribute + my ($ns, $name) = $model->decodeQName($key); + if ($ns) { # If namespaced attribute (must have prefix! + my $prefix = $node->lookupNamespacePrefix($ns); # already declared? + if (!$prefix) { # Nope, not yet! + ## Create prefix to use, and declare it + $prefix = $model->getDocumentNamespacePrefix($ns, 1); + getDocument($self)->documentElement->setNamespace($ns, $prefix, 0); } + if ($prefix eq '#default') { # Probably shouldn't happen...? $node->setAttribute($name => $value); } else { $node->setAttributeNS($ns, "$prefix:$name" => $value); } } else { - $node->setAttribute($name => $value); } } } # redundant case... + $node->setAttribute($name => $value); } } + else { # Allowed (but NON-namespaced) or internal attribute + $node->setAttribute($key => $value); } } return; } sub addSSValues { @@ -1661,11 +1667,17 @@ sub collapseXMDual { #********************************************************************** # Record the Box that created this node. +# $box should be a Box/List/Whatsit object; else a previously recorded string sub setNodeBox { my ($self, $node, $box) = @_; return unless $box; - my $boxid = "$box"; - $$self{node_boxes}{$boxid} = $box; + my $boxid = "$box"; # Effectively the address + if (ref $box) { + $$self{node_boxes}{$boxid} = $box; } + elsif (!$$self{node_boxes}{$box}) { + # Could get string for $box when copying nodes; should already be internned + Warn('internal', 'nonbox', $self, + "setNodeBox recording unknown source box: $box"); } return $node->setAttribute(_box => $boxid); } sub getNodeBox { @@ -1676,14 +1688,20 @@ sub getNodeBox { if (my $boxid = $node->getAttribute('_box')) { return $$self{node_boxes}{$boxid}; } } +# Record the font used on this node. +# $font should be a Font object; else a previously recorded string sub setNodeFont { my ($self, $node, $font) = @_; - return unless ref $font; # ? - my $fontid = $font->toString; - $$self{node_fonts}{$fontid} = $font; + my $fontid = (ref $font ? $font->toString : $font); + return unless $font; # ? if ($node->nodeType == XML_ELEMENT_NODE) { + if (ref $font) { + $$self{node_fonts}{$fontid} = $font; } + elsif (!$$self{node_fonts}{$font}) { + # Could get string for $font when copying nodes; should already be internned + Warn('internal', 'nonfont', $self, + "setNodeFont recording unknown font: $font"); } $node->setAttribute(_font => $fontid); } - # otherwise, probably just ignorable? return; } # Possibly a sign of a design flaw; Set the node's font & all children that HAD the same font. @@ -1704,7 +1722,9 @@ sub getNodeFont { my $t; while ($node && (($t = $node->nodeType) != XML_ELEMENT_NODE)) { $node = $node->parentNode; } - return ($node && ($t == XML_ELEMENT_NODE) && $$self{node_fonts}{ $node->getAttribute('_font') }) + my $f; + return ($node && ($t == XML_ELEMENT_NODE) + && ($f = $node->getAttribute('_font')) && $$self{node_fonts}{$f}) || LaTeXML::Common::Font->textDefault(); } sub getNodeLanguage { diff --git a/lib/LaTeXML/Core/Whatsit.pm b/lib/LaTeXML/Core/Whatsit.pm index c6af364c7..9eee69be7 100644 --- a/lib/LaTeXML/Core/Whatsit.pm +++ b/lib/LaTeXML/Core/Whatsit.pm @@ -124,11 +124,7 @@ sub revert { @tokens = $self->substituteParameters($spec) if $spec ne ''; } else { - my $alias = $defn->getAlias; - if (defined $alias) { - push(@tokens, (ref $alias ? $alias : T_CS($alias))) if $alias ne ''; } - else { - push(@tokens, $defn->getCS); } + push(@tokens, $defn->getCSorAlias); if (my $parameters = $defn->getParameters) { push(@tokens, $parameters->revertArguments($self->getArgs)); } } if (defined(my $body = $self->getBody)) { diff --git a/lib/LaTeXML/Engine/LaTeX.pool.ltxml b/lib/LaTeXML/Engine/LaTeX.pool.ltxml index a1cb2e6ba..27e7a2c48 100644 --- a/lib/LaTeXML/Engine/LaTeX.pool.ltxml +++ b/lib/LaTeXML/Engine/LaTeX.pool.ltxml @@ -2049,7 +2049,7 @@ DefConstructorI('\lx@begin@display@math', undef, . "" . "" . "", - alias => '$$', + reversion => Tokens(T_MATH, T_MATH), beforeDigest => sub { $_[0]->beginMode('display_math'); }, properties => sub { RefStepID('equation') }, captureBody => 1); @@ -2241,12 +2241,11 @@ DefMacro('\(', '\lx@begin@inline@math'); DefMacro('\)', '\\lx@end@inline@math'); # Keep from expanding too early, if in alignments, or such. -DefMacroI('\ensuremath', undef, - Tokens(T_CS('\protect'), T_CS('\@ensuremath'))); -DefMacro('\@ensuremath{}', sub { +DefMacro('\ensuremath{}', sub { my ($gullet, $stuff) = @_; if (LookupValue('IN_MATH')) { $stuff->unlist; } - else { (T_MATH, $stuff->unlist, T_MATH); } }); + else { (T_MATH, $stuff->unlist, T_MATH); } }, + robust => 1, protected => 1); # Magic check that math-mode trigger follows our $MATHENVS = 'displaymath|equation*?|eqnarray*?' @@ -2916,47 +2915,26 @@ DefPrimitiveI('\tenlnw', undef, undef, font => { family => 'linew10' }); DefPrimitiveI('\tencirc', undef, undef, font => { family => 'lcircle10' }); DefPrimitiveI('\tencircw', undef, undef, font => { family => 'lcirclew10' }); -# At least all things on uclclist need to be macros -DefPrimitiveI('\lx@utf@OE', undef, "\x{0152}", alias => '\OE'); # LATIN CAPITAL LIGATURE OE -DefPrimitiveI('\lx@utf@oe', undef, "\x{0153}", alias => '\oe'); # LATIN SMALL LIGATURE OE -DefPrimitiveI('\lx@utf@AE', undef, UTF(0xC6), alias => '\AE'); # LATIN CAPITAL LETTER AE -DefPrimitiveI('\lx@utf@ae', undef, UTF(0xE6), alias => '\ae'); # LATIN SMALL LETTER AE -DefPrimitiveI('\lx@utf@AA', undef, UTF(0xC5), alias => '\AA'); # LATIN CAPITAL LETTER A WITH RING ABOVE -DefPrimitiveI('\lx@utf@aa', undef, UTF(0xE5), alias => '\aa'); # LATIN SMALL LETTER A WITH RING ABOVE -DefPrimitiveI('\lx@utf@O', undef, UTF(0xD8), alias => '\O'); # LATIN CAPITAL LETTER O WITH STROKE -DefPrimitiveI('\lx@utf@o', undef, UTF(0xF8), alias => '\o'); # LATIN SMALL LETTER O WITH STROKE -DefPrimitiveI('\lx@utf@L', undef, "\x{0141}", alias => '\L'); # LATIN CAPITAL LETTER L WITH STROKE -DefPrimitiveI('\lx@utf@l', undef, "\x{0142}", alias => '\l'); # LATIN SMALL LETTER L WITH STROKE -DefPrimitiveI('\lx@utf@ss', undef, UTF(0xDF), alias => '\ss'); # LATIN SMALL LETTER SHARP S -DefPrimitiveI('\lx@utf@dh', undef, UTF(0xf0), alias => '\dh'); # eth -DefPrimitiveI('\lx@utf@DH', undef, UTF(0xd0), alias => '\DH'); # Eth (looks same as \DJ!) -DefPrimitiveI('\lx@utf@dj', undef, "\x{0111}", alias => '\dj'); # d with stroke -DefPrimitiveI('\lx@utf@DJ', undef, "\x{0110}", alias => '\DJ'); # D with stroke (looks sames as \DH!) -DefPrimitiveI('\lx@utf@ng', undef, "\x{014B}", alias => '\ng'); -DefPrimitiveI('\lx@utf@NG', undef, "\x{014A}", alias => '\NG'); -DefPrimitiveI('\lx@utf@th', undef, UTF(0xFE), alias => '\th'); -DefPrimitiveI('\lx@utf@TH', undef, UTF(0xDE), alias => '\TH'); - -DefMacroI('\OE', undef, '\lx@utf@OE'); -DefMacroI('\oe', undef, '\lx@utf@oe'); -DefMacroI('\AE', undef, '\lx@utf@AE'); -DefMacroI('\ae', undef, '\lx@utf@ae'); -DefMacroI('\ae', undef, '\lx@utf@ae'); -DefMacroI('\AA', undef, '\lx@utf@AA'); -DefMacroI('\aa', undef, '\lx@utf@aa'); -DefMacroI('\O', undef, '\lx@utf@O'); -DefMacroI('\o', undef, '\lx@utf@o'); -DefMacroI('\L', undef, '\lx@utf@L'); -DefMacroI('\l', undef, '\lx@utf@l'); -DefMacroI('\ss', undef, '\lx@utf@ss'); -DefMacroI('\dh', undef, '\lx@utf@dh'); # in latex? -DefMacroI('\DH', undef, '\lx@utf@DH'); -DefMacroI('\dj', undef, '\lx@utf@dj'); -DefMacroI('\DJ', undef, '\lx@utf@DJ'); -DefMacroI('\ng', undef, '\lx@utf@ng'); -DefMacroI('\NG', undef, '\lx@utf@NG'); -DefMacroI('\th', undef, '\lx@utf@th'); -DefMacroI('\TH', undef, '\lx@utf@TH'); +# At least all things on uclclist need to be macros; robust achieves that +DefPrimitiveI('\OE', undef, "\x{0152}", robust => 1); # LATIN CAPITAL LIGATURE OE +DefPrimitiveI('\oe', undef, "\x{0153}", robust => 1); # LATIN SMALL LIGATURE OE +DefPrimitiveI('\AE', undef, UTF(0xC6), robust => 1); # LATIN CAPITAL LETTER AE +DefPrimitiveI('\ae', undef, UTF(0xE6), robust => 1); # LATIN SMALL LETTER AE +DefPrimitiveI('\AA', undef, UTF(0xC5), robust => 1); # LATIN CAPITAL LETTER A WITH RING ABOVE +DefPrimitiveI('\aa', undef, UTF(0xE5), robust => 1); # LATIN SMALL LETTER A WITH RING ABOVE +DefPrimitiveI('\O', undef, UTF(0xD8), robust => 1); # LATIN CAPITAL LETTER O WITH STROKE +DefPrimitiveI('\o', undef, UTF(0xF8), robust => 1); # LATIN SMALL LETTER O WITH STROKE +DefPrimitiveI('\L', undef, "\x{0141}", robust => 1); # LATIN CAPITAL LETTER L WITH STROKE +DefPrimitiveI('\l', undef, "\x{0142}", robust => 1); # LATIN SMALL LETTER L WITH STROKE +DefPrimitiveI('\ss', undef, UTF(0xDF), robust => 1); # LATIN SMALL LETTER SHARP S +DefPrimitiveI('\dh', undef, UTF(0xf0), robust => 1); # eth +DefPrimitiveI('\DH', undef, UTF(0xd0), robust => 1); # Eth (looks same as \DJ!) +DefPrimitiveI('\dj', undef, "\x{0111}", robust => 1); # d with stroke +DefPrimitiveI('\DJ', undef, "\x{0110}", robust => 1); # D with stroke (looks sames as \DH!) +DefPrimitiveI('\ng', undef, "\x{014B}", robust => 1); +DefPrimitiveI('\NG', undef, "\x{014A}", robust => 1); +DefPrimitiveI('\th', undef, UTF(0xFE), robust => 1); +DefPrimitiveI('\TH', undef, UTF(0xDE), robust => 1); #====================================================================== # C.8.2 Defining Environments @@ -4010,9 +3988,11 @@ Tag('ltx:*', 'afterClose:late' => sub { # These will get filled in during postprocessing. # * comes from hyperref -DefConstructor('\ref OptionalMatch:* Semiverbatim', "", - sizer => '()', # Don't actually know how big this will be! - properties => sub { (label => CleanLabel($_[2])); }); +DefConstructor('\ref OptionalMatch:* Semiverbatim', + "", + sizer => '()', # Don't actually know how big this will be! + properties => sub { (label => CleanLabel($_[2])); }, + robust => 1); # "page" does not make sense in xml. If the user really wants, they will need: # \usepackage{latexml} ... \iflatexml alternate\else page \pageref{label}\fi Let('\pageref', '\ref'); @@ -4393,7 +4373,8 @@ DefMacro('\cite[] Semiverbatim', sub { Tokens(Explode('cite')), Tokens($open, Invocation(T_CS('\@@bibref'), undef, $keys, undef, undef), - ($post ? ($ns, T_SPACE, $post) : ()), $close)); }); + ($post ? ($ns, T_SPACE, $post) : ()), $close)); }, + robust => 1); # NOTE: Eventually needs to be recognized by MakeBibliography # For now, defer until document end. @@ -4778,6 +4759,7 @@ DefMacro('\stretch{}', '0pt plus #1fill\relax'); DefPrimitive('\newlength DefToken', sub { my ($stomach, $cs) = @_; DefRegisterI($cs, undef, Glue(0), allocate => '\skip'); }); + DefPrimitive('\setlength {Variable}{Dimension}', sub { my ($stomach, $variable, $length) = @_; my ($defn, @params) = @$variable; @@ -5671,30 +5653,149 @@ DefMacroI('\@qrelax', undef, Tokens(Explode('relax'))); DefMacroI('\@spaces', undef, '\space\space\space\space'); Let('\@sptoken', T_SPACE); -DefMacroI('\@uclclist', undef, '\oe\OE\o\O\ae\AE\dh\DH\dj\DJ\l\L\ng\NG\ss\SS\th\TH'); +# NOTE: These ONLY work on robust commands, +# thus, preceded by \protect, and the CS has a trailing space added!!! +DefPrimitive('\AddToNoCaseChangeList{DefToken}', sub { + my ($stomach, $cs) = @_; + # This is analog of \l_text_case_exclude_arg_tl + my $protcs = $cs->getString . ' '; # As if robust! + $STATE->assignMapping(text_case_exclude => $protcs => 1); }); +DefMacro('\NoCaseChange{}', '#1', robust => 1); +RawTeX(<<'EoTeX'); +\AddToNoCaseChangeList{\NoCaseChange}% +\AddToNoCaseChangeList{\label}% +\AddToNoCaseChangeList{\ref}% +\AddToNoCaseChangeList{\cite}% +\AddToNoCaseChangeList{\ensuremath}% +\AddToNoCaseChangeList{\thanks}% +EoTeX +# Eventually latex3 has \Declare(Lower|Upper|Title)caseMapping +# Short of that... and noting that \@uclclist might be changed at any time... +sub prepareCaseMapping { + $STATE->assignMapping(text_uppercase => '\i ' => T_LETTER('I')); + $STATE->assignMapping(text_uppercase => '\j ' => T_LETTER('J')); + my @pairs = $STATE->lookupDefinition(T_CS('\@uclclist'))->getExpansion->unlist; + while (@pairs) { + my ($l, $u) = (shift(@pairs), shift(@pairs)); + $STATE->assignMapping(text_uppercase => $l->getString . ' ' => $u); + $STATE->assignMapping(text_lowercase => $u->getString . ' ' => $l); } + return; } +DefPrimitive('\lx@prepare@case@mapping', \&prepareCaseMapping); + +# Cases: lowercase,uppercase are obvious; +# There may be some controversy, but TO ME: +# capitalize means to uppercase the first character +# titlecase means capitalize all words, except "trivial" ones (and,or...) +# sentence case means capitalize first word +# LaTeX's \MakeTitlecase does sentence case, but this may change in future! +# There is also some confusion about whether things like sentence case should +# ONLY uppercase the first and lowercase the rest, or leave it alone. +# Apparently there are language issues? +# +# To handle more such cases, we'd probably want to refactor the following +# to be more reentrant, (eg. split arg into words) +# and deal locally with \@uclclist, \i,\j, etc. +# Can't even properly do sentence case, as it is! +# But let's try this for now (maybe reading latex.ltx is best long-run) +# latex3 uses \text_expand to manage the string; +# seemingly a loop readXToken is similar enogh? +sub latexChangeCase { + my ($gullet, $reqcase, $tokens) = @_; + my $case = $reqcase; + $case = 'upper' if $reqcase eq 'sentence' or $reqcase eq 'title'; + return $gullet->readingFromMouth(LaTeXML::Core::Mouth->new(), sub { + $gullet->unread($tokens); + my @toks = (); + my $inmath = 0; + ## Read while expanding, but careful not to expand \dont_expand'd tokens! + while (my $tok = $gullet->readXToken(0, 0, 0)) { + my $cc = $$tok[1]; + if ($cc == CC_MATH) { + $inmath = !$inmath; + push(@toks, $tok); } + elsif ($inmath) { + push(@toks, $tok); } + elsif (($cc == CC_LETTER) || ($cc == CC_OTHER)) { + # LaTeX 3 uses Unicode data tables; we'll punt to Perl + push(@toks, Token(($case eq 'upper' ? uc($$tok[0]) : lc($$tok[0])), $cc)); + $case = 'lower' if $reqcase eq 'sentence' or $reqcase eq 'title'; } + elsif ($cc == CC_SPACE) { + push(@toks, T_SPACE); # HACK (not $tok) to match latex3! + $case = 'upper' if $reqcase eq 'title'; } + elsif (($cc != CC_CS) && ($cc != CC_ACTIVE)) { + push(@toks, $tok); } + elsif ($tok->equals(T_CS('\protect'))) { + my $next = $gullet->readToken(0); # DONT expand this (yet) + ## but check if we're supposed to exclude arg from case change + my $string = $next->getString; + if ($STATE->lookupMapping('text_case_exclude', $string)) { + my $opt = $gullet->readOptional; + my $arg = $gullet->readArg; + # Wow! Optional arg gets case change! + $opt = latexChangeCase($gullet, $case, $opt) + if $opt && !$inmath; + push(@toks, $tok, $next, + ($opt ? (T_OTHER('['), $opt, T_OTHER(']')) : ()), + T_BEGIN, $arg, T_END); } + elsif (my $changed = $STATE->lookupMapping('text_' . $case . 'case', $string)) { + push(@toks, $changed); + $case = 'lower' if $reqcase eq 'sentence' or $reqcase eq 'title'; } + else { + push(@toks, $tok, $next); } } + else { + push(@toks, $tok); } } + return Tokens(@toks); }); } + +DefMacro('\lx@latex@changecase {} GeneralText', sub { + my ($gullet, $case, $tokens) = @_; + return latexChangeCase($gullet, lc(ToString($case)), $tokens); }); + +DefMacroI('\@uclclist', undef, '\oe\OE\o\O\ae\AE\dh\DH\dj\DJ\l\L\ng\NG\ss\SS\th\TH'); RawTeX(<<'EOL'); \DeclareRobustCommand{\MakeUppercase}[1]{{% - \def\i{I}\def\j{J}% - \def\reserved@a##1##2{\let##1##2\reserved@a}% - \expandafter\reserved@a\@uclclist\reserved@b{\reserved@b\@gobble}% - \let\UTF@two@octets@noexpand\@empty - \let\UTF@three@octets@noexpand\@empty - \let\UTF@four@octets@noexpand\@empty - \protected@edef\reserved@a{\uppercase{#1}}% - \reserved@a + \lx@prepare@case@mapping% + \def\({$}\let\)\(% + \let\UTF@two@octets@noexpand\@empty + \let\UTF@three@octets@noexpand\@empty + \let\UTF@four@octets@noexpand\@empty + \edef\reserved@a{\lx@latex@changecase{upper}{#1}}% + \reserved@a }} \DeclareRobustCommand{\MakeLowercase}[1]{{% - \def\reserved@a##1##2{\let##2##1\reserved@a}% - \expandafter\reserved@a\@uclclist\reserved@b{\reserved@b\@gobble}% - \let\UTF@two@octets@noexpand\@empty - \let\UTF@three@octets@noexpand\@empty - \let\UTF@four@octets@noexpand\@empty - \protected@edef\reserved@a{\lowercase{#1}}% - \reserved@a + \lx@prepare@case@mapping% + \def\({$}\let\)\(% + \let\UTF@two@octets@noexpand\@empty + \let\UTF@three@octets@noexpand\@empty + \let\UTF@four@octets@noexpand\@empty + \edef\reserved@a{\lx@latex@changecase{lower}{#1}}% + \reserved@a + }} +\DeclareRobustCommand{\MakeTitlecase}[1]{{% + \lx@prepare@case@mapping% + \def\({$}\let\)\(% + \let\UTF@two@octets@noexpand\@empty + \let\UTF@three@octets@noexpand\@empty + \let\UTF@four@octets@noexpand\@empty + \edef\reserved@a{\lx@latex@changecase{sentence}{#1}}% + \reserved@a }} + +%\DeclareRobustCommand{\MakeXTitlecase}[1]{{% +% \lx@prepare@case@mapping% +% \def\({$}\let\)\(% +% \let\UTF@two@octets@noexpand\@empty +% \let\UTF@three@octets@noexpand\@empty +% \let\UTF@four@octets@noexpand\@empty +% \edef\reserved@a{\lx@latex@changecase{title}{#1}}% +% \reserved@a +% }} + +% Trickery to avoid losing {} around arg \protected@edef\MakeUppercase#1{\MakeUppercase{#1}} \protected@edef\MakeLowercase#1{\MakeLowercase{#1}} +\protected@edef\MakeTitlecase#1{\MakeTitlecase{#1}} +%\protected@edef\MakeXTitlecase#1{\MakeXTitlecase{#1}} EOL #====================================================================== diff --git a/lib/LaTeXML/Engine/TeX_Debugging.pool.ltxml b/lib/LaTeXML/Engine/TeX_Debugging.pool.ltxml index 67ec45740..57b2ea7f3 100644 --- a/lib/LaTeXML/Engine/TeX_Debugging.pool.ltxml +++ b/lib/LaTeXML/Engine/TeX_Debugging.pool.ltxml @@ -108,15 +108,15 @@ DefMacro('\meaning Token', sub { $meaning .= ' at ' . $$fontinfo{at} if $$fontinfo{at}; $type = 'font'; } } elsif ($type =~ /(primitive|conditional|constructor)$/i) { - $definition = $definition->getCSorAlias; - $type = ref $definition; + ($definition) = $definition->getCSorAlias->unlist; # Get 1st Token! + $type = ref $definition; $type =~ s/^LaTeXML:://; } # The actual tests start here if ($type =~ /token$/i) { my $cc = $definition->getCatcode; my $char = $definition->toString; my $meaning_cc = $CATCODE_MEANING[$cc] || ''; - $meaning_cc .= ' ' if $meaning_cc; # append space separator if defined + $meaning_cc .= ' ' if $meaning_cc; # append space separator if defined $meaning = $meaning_cc . $char; } elsif ($type =~ /register$/i) { $meaning = $definition->getAddress; } diff --git a/lib/LaTeXML/Engine/TeX_Math.pool.ltxml b/lib/LaTeXML/Engine/TeX_Math.pool.ltxml index 92be7bd50..55cdccf44 100644 --- a/lib/LaTeXML/Engine/TeX_Math.pool.ltxml +++ b/lib/LaTeXML/Engine/TeX_Math.pool.ltxml @@ -119,8 +119,10 @@ DefConstructorI('\lx@begin@inmath@text', undef, "" . "#body" . "", - alias => T_MATH, beforeDigest => sub { $_[0]->beginMode('text'); }, captureBody => 1); -DefConstructorI('\lx@end@inmath@text', undef, "", alias => T_MATH, + alias => T_MATH, + beforeDigest => sub { $_[0]->beginMode('text'); }, captureBody => 1); +DefConstructorI('\lx@end@inmath@text', undef, "", + alias => T_MATH, beforeDigest => sub { $_[0]->endMode('text'); }); #====================================================================== diff --git a/lib/LaTeXML/Engine/TeX_Registers.pool.ltxml b/lib/LaTeXML/Engine/TeX_Registers.pool.ltxml index 583ffad02..7dfd6680c 100644 --- a/lib/LaTeXML/Engine/TeX_Registers.pool.ltxml +++ b/lib/LaTeXML/Engine/TeX_Registers.pool.ltxml @@ -79,7 +79,7 @@ DefMacro('\lx@alloc@ DefToken {} {} DefToken', sub { $tracker = ToString($tracker); my $next; if ($stored_registers{ ToString($allocator) }) { - $next = LaTeXML::Package::allocateRegister($type); + $next = LaTeXML::Package::allocateRegister($type, $cs); $next =~ s/^\Q$type\E//; } else { my $xnext = $STATE->lookupValue($tracker) || Number(0); diff --git a/lib/LaTeXML/Engine/plain.pool.ltxml b/lib/LaTeXML/Engine/plain.pool.ltxml index afa62213c..493361cff 100644 --- a/lib/LaTeXML/Engine/plain.pool.ltxml +++ b/lib/LaTeXML/Engine/plain.pool.ltxml @@ -95,8 +95,8 @@ DefConstructor('\TeX', . "X" . "", sizer => sub { (Dimension('1.9em'), Dimension('1.6ex'), Dimension('0.5ex')); }); -DefPrimitiveI('\i', undef, "\x{0131}"); # LATIN SMALL LETTER DOTLESS I -DefPrimitiveI('\j', undef, "\x{0237}"); +DefPrimitiveI('\i', undef, "\x{0131}", robust => 1); # LATIN SMALL LETTER DOTLESS I +DefPrimitiveI('\j', undef, "\x{0237}", robust => 1); #%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% # Alignment code @@ -159,8 +159,6 @@ RawTeX(<<'EoTeX'); \mathchardef\@m=1000 \mathchardef\@M=10000 \mathchardef\@MM=20000 - \countdef\m@ne=21\relax - \m@ne=-1 EoTeX #====================================================================== @@ -506,7 +504,8 @@ DefPrimitiveI('\break', undef, undef); DefPrimitiveI('\nobreak', undef, undef); DefPrimitiveI('\allowbreak', undef, undef); -Let(T_ACTIVE('~'), T_CS('\lx@NBSP')); +# Curiously, this must be expandable, else things like \MakeUppercase loop. +DefMacro(T_ACTIVE("~"), T_CS('\lx@NBSP')); DefMacroI('\slash', undef, '/'); DefPrimitiveI('\filbreak', undef, undef); diff --git a/lib/LaTeXML/Package.pm b/lib/LaTeXML/Package.pm index 60f04b7e3..0d583b58f 100644 --- a/lib/LaTeXML/Package.pm +++ b/lib/LaTeXML/Package.pm @@ -164,6 +164,8 @@ sub coerceCS { if (ref $cs) { } elsif ($cs =~ s/^\\csname\s+(.*)\\endcsname//) { $cs = T_CS('\\' . $1); } + elsif (length($cs) == 1) { # Match an active char + ($cs) = TokenizeInternal($cs)->unlist; } else { $cs = T_CS($cs); } return $cs; } @@ -593,7 +595,7 @@ our %allocations = ( '\box' => '\count14', '\toks' => '\count15'); sub allocateRegister { - my ($type) = @_; + my ($type, $cs) = @_; if (my $addr = $allocations{$type}) { # $addr is a Register but MUST be stored as \count<#> if (my $n = $STATE->lookupValue($addr)) { my $next = $n->valueOf + 1; @@ -605,7 +607,8 @@ sub allocateRegister { else { # If allocations not set up, punt to unallocated register return; } } else { - Error('misdefined', $type, undef, "Type $type is not an allocated register type"); + Error('misdefined', $type, undef, + "Type $type is not an allocated register type, for " . ToString($cs)); return; } } #====================================================================== @@ -639,9 +642,13 @@ sub NewCounter { NewCounter($within); } my $cs = T_CS("\\c\@$ctr"); my $prevdefn = $STATE->lookupMeaning($cs); - if ($prevdefn && ((ref $prevdefn) eq 'LaTeXML::Core::Definition::Register') - && (($$prevdefn{address} || '') =~ /^\\count/)) { - Info('unexpected', $cs, undef, "Counter $ctr was already allocated, skipping"); } + if ($prevdefn && ((ref $prevdefn) eq 'LaTeXML::Core::Definition::Register')) { + ## Note: it is quite reasonable to redefine counters, + ## in order to change reseting & nesting. So, don't be noisy! + # my $a = ($$prevdefn{address} || '') =~ /^\\count/; + # Info('unexpected', $cs, undef, + # "Counter $ctr was already ".($a ? 'allocated':'defined').", skipping"); + } else { Warn('unexpected', $cs, undef, "Counter " . ToString($cs) . " was already defined as $prevdefn; redefining") if $prevdefn; @@ -1260,7 +1267,7 @@ sub DefPrimitiveI { $paramlist = parseParameters($paramlist, $cs) if defined $paramlist && !ref $paramlist; my $mode = $options{mode}; my $bounded = $options{bounded}; - # Not sure robust entirely makes sense for Primitives, other than LaTeXML vs LaTeX mismatch + # robust makes $cs a protected Macro, expanding to primtive with munged cs my $defcs = ($options{robust} ? defRobustCS($cs, %options) : $cs); $STATE->installDefinition(LaTeXML::Core::Definition::Primitive ->new($defcs, $paramlist, $replacement, @@ -1276,7 +1283,7 @@ sub DefPrimitiveI { outer => $options{outer}, long => $options{long}, isPrefix => $options{isPrefix}, - alias => $options{alias}, + alias => (defined $options{alias} ? coerceCS($options{alias}) : undef), ), $options{scope}); AssignValue(ToString($cs) . ":locked" => 1) if $options{locked}; @@ -1305,7 +1312,7 @@ sub DefRegisterI { $paramlist = parseParameters($paramlist, $cs) if defined $paramlist && !ref $paramlist; my $type = $register_types{ ref $value }; my $address = ($options{address} ? ToString($options{address}) - : ($options{allocate} ? allocateRegister($options{allocate}) : undef)); + : ($options{allocate} ? allocateRegister($options{allocate}, $cs) : undef)); $address = ToString($cs) unless $address; if ((defined $value) && ((!defined $options{address}) || !defined LookupValue($address))) { AssignValue($address => $value, 'global'); } # Assign, but do not RE-assign @@ -1406,7 +1413,7 @@ sub DefConstructorI { $paramlist = parseParameters($paramlist, $cs) if defined $paramlist && !ref $paramlist; my $mode = $options{mode}; my $bounded = $options{bounded}; - # Not sure robust entirely makes sense for Constructors, other than LaTeXML vs LaTeX mismatch + # robust makes $cs a protected Macro, expanding to primtive with munged cs my $defcs = ($options{robust} ? defRobustCS($cs, %options) : $cs); $STATE->installDefinition(LaTeXML::Core::Definition::Constructor ->new($defcs, $paramlist, $replacement, @@ -1422,7 +1429,7 @@ sub DefConstructorI { beforeConstruct => flatten($options{beforeConstruct}), afterConstruct => flatten($options{afterConstruct}), nargs => $options{nargs}, - alias => (defined $options{alias} ? $options{alias} + alias => (defined $options{alias} ? coerceCS($options{alias}) : ($options{robust} ? $cs : undef)), reversion => $options{reversion}, attributeForm => $options{attributeForm}, @@ -1566,7 +1573,7 @@ sub DefMathI { my $nargs = ($paramlist ? scalar($paramlist->getParameters) : 0); my $csname = $cs->getString; my $meaning = $options{meaning}; - my $name = $options{alias} || $csname; + my $name = (defined $options{alias} ? ToString($options{alias}) : $csname); # Avoid undefs specifically, we'll be doing string comparisons $presentation = '' unless defined $presentation; $meaning = '' unless defined $meaning; @@ -1645,9 +1652,9 @@ sub defmath_common_constructor_options { my $sizer = inferSizer($options{sizer}, $options{reversion}); my $presentation_s = $presentation && ToString($presentation); return ( - alias => $options{alias} || $cs->getString, + alias => (defined $options{alias} ? coerceCS($options{alias}) : $cs), (defined $options{reversion} ? (reversion => $options{reversion}) : ()), - (defined $sizer ? (sizer => $sizer) : ()), + (defined $sizer ? (sizer => $sizer) : ()), beforeDigest => flatten(sub { requireMath($cs->getString); }, ($options{nogroup} ? () : (sub { $_[0]->bgroup; })), ($options{font} ? (sub { MergeFont(%{ $options{font} }); }) : ()), @@ -1768,9 +1775,8 @@ sub defmath_prim { my $locator = $stomach->getGullet->getLocator; my %properties = %options; my $font = LookupValue('font')->merge(%$reqfont)->specialize($string); - my $mode = (LookupValue('IN_MATH') ? 'math' : 'text'); - my $alias = (ref $options{alias} ? $options{alias} - : (defined $options{alias} ? T_CS($options{alias}) : undef)); + my $mode = (LookupValue('IN_MATH') ? 'math' : 'text'); + my $alias = (defined $options{alias} ? coerceCS($options{alias}) : undef); my $reversion = ((!defined $options{reversion}) && (($options{revert_as} || '') eq 'presentation') ? $presentation : $alias // $cs); diff --git a/lib/LaTeXML/Package/textcase.sty.ltxml b/lib/LaTeXML/Package/textcase.sty.ltxml index d82236342..f8b8e72e6 100644 --- a/lib/LaTeXML/Package/textcase.sty.ltxml +++ b/lib/LaTeXML/Package/textcase.sty.ltxml @@ -16,6 +16,11 @@ use warnings; use LaTeXML::Package; #====================================================================== -InputDefinitions('textcase', type => 'sty', noltxml => 1); +# This functionality is now included in base LaTeX +#InputDefinitions('textcase', type => 'sty', noltxml => 1); +# We only need to bind the old textcase names. +Let('\MakeTextUppercase', '\MakeUppercase'); +Let('\MakeTextLowercase', '\MakeLowercase'); +Let('\MakeTextTitlecase', '\MakeTitlecase'); #====================================================================== 1; diff --git a/t/expansion/lettercase.pdf b/t/expansion/lettercase.pdf index a7e4b0ef6..c00c8b45a 100644 Binary files a/t/expansion/lettercase.pdf and b/t/expansion/lettercase.pdf differ diff --git a/t/expansion/lettercase.xml b/t/expansion/lettercase.xml index c2e5fa6b0..ab14690e2 100644 --- a/t/expansion/lettercase.xml +++ b/t/expansion/lettercase.xml @@ -65,7 +65,7 @@ - (foo) + [foo] @@ -77,7 +77,7 @@ - [FOO] + (FOO) diff --git a/t/expansion/textcase.pdf b/t/expansion/textcase.pdf index 439c46288..863f490cc 100644 Binary files a/t/expansion/textcase.pdf and b/t/expansion/textcase.pdf differ diff --git a/t/expansion/textcase.xml b/t/expansion/textcase.xml index 5af9948e8..f397001e3 100644 --- a/t/expansion/textcase.xml +++ b/t/expansion/textcase.xml @@ -37,8 +37,7 @@ a - -OR EVEN + OR EVEN = @@ -104,8 +103,7 @@ OR EVEN HERE - -MORE TEXT

+ MORE TEXT

@@ -116,9 +114,9 @@ MORE TEXT

1.5Nested brace groups -

A B C +

A B C - D + d