Skip to content

Commit

Permalink
Subfigure model enhancements (#2302)
Browse files Browse the repository at this point in the history
* WIP on figure grid tests

* add trickier figure cases

* WIP auto-grouping panels

* update mixed figure test with some algorithms

* introduce Document::removeClass helper

* new approach: ltx_figure_panel class with auto-wrapping into panels, when needed

* more care for lead/mid/trail-ing captions; edge case checks

* more advanced handling of subfigures, via ltx:caption partitions

* also try ltx:figure for _CaptureBlock_; some more careful edge case testing

* update 3 new tests for complex figure grids

* remove leftover asset

* drop diagnostic xsl:messages

* use the local none.png asset, easier CI

* update CI-only SI test to use ltx_figure_panel class

* ensure a hard linebreak in flex figures for most Block elements

* re-add partition by breaks to compute ltx_flex_size_ value

* drop diagnostic message

* only use graphics candidates if they already point to physical file

* fix subtle delimiter bug in loadCompiledSchema; add getSchemaClassNames; ensure schema is loaded when accessed

* rely on schema class information for classifying figure pieces

* remove leftover debug print

* fix edge case for insertBlock over empty Document; see arXiv:1706.03762

* add Document::removeSSValues helper

* recognize edge case when a ltx:figure begins with a ltx:caption, without the lead-in tags

* initialize figure panel tag names on first call
  • Loading branch information
dginev authored Jan 28, 2024
1 parent 4411c51 commit 58389a8
Show file tree
Hide file tree
Showing 23 changed files with 3,416 additions and 162 deletions.
9 changes: 9 additions & 0 deletions MANIFEST
Original file line number Diff line number Diff line change
Expand Up @@ -942,6 +942,12 @@ t/complex/cleveref_minimal.xml
t/complex/equationnest.pdf
t/complex/equationnest.tex
t/complex/equationnest.xml
t/complex/figure_dual_caption.pdf
t/complex/figure_dual_caption.tex
t/complex/figure_dual_caption.xml
t/complex/figure_mixed_content.pdf
t/complex/figure_mixed_content.tex
t/complex/figure_mixed_content.xml
t/complex/hyperchars.pdf
t/complex/hyperchars.tex
t/complex/hyperchars.xml
Expand Down Expand Up @@ -1665,6 +1671,9 @@ t/structure/fancyhdr.xml
t/structure/figures.pdf
t/structure/figures.tex
t/structure/figures.xml
t/structure/figure_grids.pdf
t/structure/figure_grids.tex
t/structure/figure_grids.xml
t/structure/filelist.pdf
t/structure/filelist.tex
t/structure/filelist.xml
Expand Down
9 changes: 8 additions & 1 deletion lib/LaTeXML/Common/Model.pm
Original file line number Diff line number Diff line change
Expand Up @@ -146,7 +146,7 @@ sub loadCompiledSchema {
$self->addTagAttribute($tag, split(/,/, $attr));
$self->addTagContent($tag, split(/,/, $children)); }

elsif ($line =~ /^([^:=]+):=(.*?)$/) {
elsif ($line =~ /^([^:=]+):=\(?([^)]*?)\)?$/) {
my ($classname, $elements) = ($1, $2);
$self->setSchemaClass($classname, { map { ($_ => 1) } split(/,/, $elements) }); }
elsif ($line =~ /^([^=]+)=(.*?)$/) {
Expand Down Expand Up @@ -475,8 +475,15 @@ sub canHaveAttribute {
: ($$attr{'*'} ? 1
: 0)))); } }

sub getSchemaClassNames {
my ($self, $classname) = @_;
$self->loadSchema unless $$self{schema_loaded};
my $class_data = $$self{schemaclass}{$classname};
return $class_data ? (keys %$class_data) : (); }

sub isInSchemaClass {
my ($self, $classname, $tag) = @_;
$self->loadSchema unless $$self{schema_loaded};
$tag = $self->getNodeQName($tag) if ref $tag; # In case tag is a node.
my $class = $$self{schemaclass}{$classname};
return $class && $$class{$tag}; }
Expand Down
31 changes: 26 additions & 5 deletions lib/LaTeXML/Core/Document.pm
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ use LaTeXML::Common::XML;
use LaTeXML::Util::Radix;
use Unicode::Normalize;
use Scalar::Util qw(blessed);
use base qw(LaTeXML::Common::Object);
use base qw(LaTeXML::Common::Object);

#**********************************************************************
# These two element names are `leaks' of the document structure into
Expand Down Expand Up @@ -1408,9 +1408,9 @@ sub addSSValues {
my ($self, $node, $key, $values) = @_;
$values = $values->toAttribute if ref $values;
if ((defined $values) && ($values ne '')) { # Skip if `empty'; but 0 is OK!
my @values = split(/\s/, $values);
my @values = split(/\s+/, $values);
if (my $oldvalues = $node->getAttribute($key)) { # previous values?
my @old = split(/\s/, $oldvalues);
my @old = split(/\s+/, $oldvalues);
foreach my $new (@values) {
push(@old, $new) unless grep { $_ eq $new } @old; }
setAttribute($self, $node, $key => join(' ', sort @old)); }
Expand All @@ -1419,8 +1419,29 @@ sub addSSValues {
return; }

sub addClass {
my ($self, $node, $class) = @_;
return addSSValues($self, $node, class => $class); }
my ($self, $node, $value) = @_;
return addSSValues($self, $node, class => $value); }

sub removeSSValues {
my ($self, $node, $key, $values) = @_;
$values = $values->toAttribute if ref $values;
my @to_remove = split(/\s+/, ($values || ''));
return unless @to_remove;
if (my $current_values = $node->getAttribute($key)) {
my @current_values = split(/\s+/, $current_values);
my @updated = ();
foreach my $current_value (@current_values) {
push(@updated, $current_value) unless grep { $_ eq $current_value } @to_remove; }
if (@updated) {
setAttribute($self, $node, $key => join(' ', sort @updated)); }
else { # if no remaining values, delete the attribute
$node->removeAttribute($key); } }
return; }

sub removeClass {
my ($self, $node, $value) = @_;
removeSSValues($self, $node, class => $value);
return; }

#**********************************************************************
# Association of nodes and ids (xml:id)
Expand Down
181 changes: 118 additions & 63 deletions lib/LaTeXML/Package/LaTeX.pool.ltxml
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ use strict;
use warnings;
use LaTeXML::Package;
use LaTeXML::Util::Pathname;
use List::Util qw(min max);

#**********************************************************************
# Organized following
Expand Down Expand Up @@ -524,6 +525,7 @@ DefConstructor('\lx@note[]{}[]{}',
. "#4"
. "</ltx:note>",
mode => 'text', bounded => 1,
sizer => '#mark',
beforeDigest => sub { reenterTextMode(1); neutralizeFont(); },
properties => sub {
my $type = ToString($_[2]);
Expand Down Expand Up @@ -3389,78 +3391,131 @@ DefConstructor('\@@generic@caption[]{}', "<ltx:text class='ltx_caption'>#2</ltx:
Error('unexpected', '\caption', $_[0],
"Use of \\caption outside any known float"); });

sub add_subfigure_row_breaks {
my ($document, $node, $whatsit, @contents) = @_;
my $textwidth = ($whatsit->getProperty('floatwidth') || Dimension('345pt'))->valueOf();
my $current_width = 0;
my @new_contents = ();
for my $block (@contents) {
my $block_name = $document->getNodeQName($block);
if ($block_name eq 'ltx:break') {
our $FIGURE_PANEL_CLASS = 'ltx_figure_panel';

# most block-level elements are expected horizontally standalone in a {figure}.
# (especially ones that directly match LaTeX {envs})
our %standalone_panel_names = map { $_ => 1 }
qw(ltx:p ltx:listing ltx:math ltx:itemize ltx:enumerate ltx:quote ltx:theorem ltx:proof
ltx:description ltx:equation ltx:equationgroup ltx:verbatim);

sub arrange_panels_and_breaks {
my ($document, $node, $whatsit) = @_;
my $float_width = ($whatsit->getProperty('floatwidth') || Dimension('345pt'))->valueOf();
# Note that 0.0625x minimal panel width will naturally enforce a limit of max 16 panels per row.
# except that we are currently under-sizing(?) some individual subfigures, so enforce max 32.
my $min_panel_width = 0.03125 * $float_width;
my $current_width = 0;
my @all_panels = ();
my @all_contents = ();
# row contents will bookkeep triples of [node, name(optional), width(optional)]
my @row_contents = ();
my $panels_found = 0;
my $panel_break_names = LookupValue('figure_panel_break_names');
if (!$panel_break_names) {
# initialize schema-based tag names on first call for a given document;
my $model = $STATE->getModel;
$panel_break_names = { map { $_ => 1 } ('ltx:break',
$model->getSchemaClassNames('Meta'),
$model->getSchemaClassNames('SectionalFrontMatter'),
$model->getSchemaClassNames('Caption')) };
# what to do with footnotes? Move from Meta to Misc class?
$$panel_break_names{'ltx:note'} = undef;
AssignValue('figure_panel_break_names', $panel_break_names, 'global'); }
for my $child (element_nodes($node)) {
my $child_name = $document->getNodeQName($child);
if ($$panel_break_names{$child_name}) {
# reset bookkeeping on prior breaks, be they author-deposited or computed
$current_width = 0; }
# (caption/metadata bits also flush the current line, but we avoid the explicit break element)
$current_width = 0;
push(@all_contents, map { $$_[0] } @row_contents);
push(@all_contents, $child);
@row_contents = (); }
else {
my $block_width = $document->getNodeBox($block)->getWidth->valueOf();
if (@new_contents && ($current_width + $block_width > $textwidth)) {
my $is_standalone = $standalone_panel_names{$child_name};
if ($is_standalone && @row_contents) {
# if standalone, finalize current row content, and add a break
my $break = $document->insertElementBefore($child, 'break', 'class' => 'ltx_break');
push(@all_contents, map { $$_[0] } @row_contents);
push(@all_contents, $break);
$current_width = 0;
@row_contents = (); }
my $child_width = $document->getNodeBox($child)->getWidth->valueOf();
if (@row_contents && ($current_width + $child_width > $float_width)) {
# break when we exceed the line width, to keep close with the PDF arrangement
my $break = $document->insertElementBefore($block, 'break', 'class' => 'ltx_break');
push(@new_contents, $break);
$current_width = $block_width; }
my $break = $document->insertElementBefore($child, 'break', 'class' => 'ltx_break');
push(@all_contents, map { $$_[0] } @row_contents);
push(@all_contents, $break);
$document->addClass($child, $FIGURE_PANEL_CLASS);
@row_contents = ([$child, $child_name, $child_width]);
push(@all_panels, $child);
$current_width = $child_width; }
else {
$current_width += $block_width; } }
push(@new_contents, $block); }
return @new_contents; }

sub add_subfigure_row_sizes {
my ($document, $node, $whatsit, @contents) = @_;
my @row = ();
for my $child (@contents) {
if ($document->getNodeQName($child) eq 'ltx:break') {
next unless @row;
my $row_size = scalar(@row);
my $row_size_attr = "ltx_flex_size_$row_size";
if ($row_size > 4) { # add a generic class for larger rows, to style together easily
$row_size_attr .= " ltx_flex_size_many"; }
for my $cell (@row) {
my $class = $cell->getAttribute('class') || '';
$class .= ' ' if $class;
$cell->setAttribute('class', $class . $row_size_attr); }
@row = (); }
else {
push(@row, $child); } }
# Don't forget the last row, if any
if (@row) {
my $row_size = scalar(@row);
my $row_size_attr = "ltx_flex_size_$row_size";
if ($row_size > 4) { # add a generic class for larger rows, to style together easily
$row_size_attr .= " ltx_flex_size_many"; }
for my $cell (@row) {
my $class = $cell->getAttribute('class') || '';
$class .= ' ' if $class;
$cell->setAttribute('class', $class . $row_size_attr); } }
return; }

# Feature 1: Add flex classes to each subfigure cell in a figure.
# Feature 2: Add identifiers on each figure
sub SubfigureAndID {
# check if we can merge into the previous element
if (my $prev_panel = pop(@row_contents)) {
my $prev_node = $$prev_panel[0];
my $prev_width = $$prev_panel[2] || $document->getNodeBox($prev_node)->getWidth->valueOf();
# Heuristic:
# Do the widths indicate a large(8x) size discrepancy? Or a very small joint size?
# If so, contain the two pieces in a single panel.
my $prev_name = $$prev_panel[1] || $document->getNodeQName($prev_node);
if (($prev_width > 0 && $child_width > 0 &&
(max($prev_width, $child_width) / min($prev_width, $child_width) > 8)) ||
($prev_width + $child_width < $min_panel_width)) {
# avoid markup complexity - any time we do a merge, require a ltx:block marked as a panel.
# if one exists, reuse, otherwise create a wrapping one.
#my $prev_name = $$prev_panel[1] || $document->getNodeQName($prev_node);
if ($prev_name eq 'ltx:block') {
$child->unbindNode;
$prev_node->appendChild($child);
push(@row_contents, [$prev_node, $prev_name, $prev_width + $child_width]);
} elsif ($child_name eq 'ltx:block') {
$prev_node->unbindNode;
$child->appendChild($prev_node);
push(@row_contents, [$child, $child_name, $prev_width + $child_width]);
push(@all_panels, $child);
} else { # create a new block to hold the two siblings.
my $block = $document->wrapNodes('ltx:block', $prev_node, $child);
push(@row_contents, [$block, 'ltx:block', $prev_width + $child_width]);
pop(@all_panels); push(@all_panels, $block);
} }
else {
# otherwise keep the last panel as-is, and append a sibling
push(@row_contents, $prev_panel);
push(@row_contents, [$child, $child_name, $child_width]);
push(@all_panels, $child); } }
else { # no last panel? just add the child
push(@row_contents, [$child, $child_name, $child_width]);
push(@all_panels, $child);
# yet if standalone, finalize current row content, and add a break
if ($is_standalone && @row_contents) {
push(@all_contents, map { $$_[0] } @row_contents);
if (my $trailer = $child->nextSibling) {
if (!$$panel_break_names{ $document->getNodeQName($trailer) }) {
my $break = $document->insertElementBefore($trailer, 'break', 'class' => 'ltx_break');
push(@all_contents, $break); } }
@row_contents = ();
$current_width = 0; } }
$current_width += $child_width; } } }
push(@all_contents, map { $$_[0] } @row_contents);
# simplify: if we only ever added 1 panel, remove its class (so that we only mark complex figures)
if (scalar(@all_panels) > 1) {
for my $panel (@all_panels) {
$document->addClass($panel, $FIGURE_PANEL_CLASS); } }
return @all_contents; }

sub BuildPanelsAndID {
my ($document, $node, $whatsit, $id_kind) = @_;
# 1. Flex
# A little verbose, but much easier to do than XSLT 1.0 would allow.
my $contents_count = 0; # count with author breaks excluded
my @contents = grep { my $name = $document->getNodeQName($_);
my $ok = ($name =~ /^ltx:(?:graphics|float|figure|table|tabular|break|inline-logical-block|inline-block|listing)$/);
$contents_count += 1 if $ok and $name ne 'ltx:break';
$ok; } element_nodes($node);
if ($contents_count > 1) {
@contents = add_subfigure_row_breaks($document, $node, $whatsit, @contents);
add_subfigure_row_sizes($document, $node, $whatsit, @contents); }
# 1. Each figure/table/float is comprised of 1 or more "panels", possibly separated by line-breaks,
# where multiple panels tend to correspond to subfigures/subtables/subfloats.
my @contents = arrange_panels_and_breaks($document, $node, $whatsit);
# 2. IDs
# Note that even without \caption, we'd probably like to have xml:id.
return GenerateID($document, $node, $whatsit, $id_kind); }

Tag('ltx:figure', afterClose => sub { SubfigureAndID(@_, 'fig'); });
Tag('ltx:float', afterClose => sub { SubfigureAndID(@_, 'tab'); });
Tag('ltx:table', afterClose => sub { SubfigureAndID(@_, 'tab'); });
Tag('ltx:figure', afterClose => sub { BuildPanelsAndID(@_, 'fig'); });
Tag('ltx:float', afterClose => sub { BuildPanelsAndID(@_, 'tab'); });
Tag('ltx:table', afterClose => sub { BuildPanelsAndID(@_, 'tab'); });

# These may need to float up to where they're allowed,
# or they may need to close <p> or similar.
Expand Down
14 changes: 11 additions & 3 deletions lib/LaTeXML/Package/TeX.pool.ltxml
Original file line number Diff line number Diff line change
Expand Up @@ -2252,8 +2252,12 @@ Tag('svg:foreignObject', autoOpen => 1, autoClose => 1,
# from block, logical-block or sectional-block, or the inline- variants.
sub insertBlock {
my ($document, $contents, %blockattr) = @_;
my $model = $document->getModel;
my $context = $document->getElement; # Where we originally start inserting.
my $model = $document->getModel;
my $context = $document->getElement; # Where we originally start inserting.
if (!$context) {
# edge case: if we start the doc with a block, the context is empty
$document->absorb($contents);
return; }
my $context_tag = $document->getNodeQName($context);
my $is_svg = ($context_tag =~ /^svg:/); # svg is slightly tricky
my $ignorable_attr = $is_svg || !scalar(keys %blockattr); # if we do not REQUIRE the attributes
Expand Down Expand Up @@ -2292,7 +2296,7 @@ sub insertBlock {
my @candidates =
($inline
? (qw(ltx:inline-block ltx:inline-logical-block ltx:inline-sectional-block))
: (qw(ltx:block ltx:logical-block ltx:sectional-block)));
: (qw(ltx:block ltx:logical-block ltx:sectional-block ltx:figure)));
my @filtered_candidates = (); # Filtered containers that can contain the content
foreach my $candidate (@candidates) {
push(@filtered_candidates, $candidate)
Expand All @@ -2301,6 +2305,10 @@ sub insertBlock {
= grep { ($document->canContain($context_tag, $_) ? $_ : ()); } @filtered_candidates;
if (my $tag = $allowed_candidates[0] || $filtered_candidates[0]) {
$document->renameNode($container, $tag, 1); } # Rename the capture to the correct container
else { # we didn't know what to do?
Warn('malformed', '_CaptureBlock_', $document, "Did not find a block-like candidate in $context_tag (with attributes (" . join(";", map { "$_=$blockattr{$_}" } keys %blockattr) . ')');
$document->renameNode($container, 'ltx:block', 1);
}
}
return @nodes; }

Expand Down
28 changes: 16 additions & 12 deletions lib/LaTeXML/Post/Graphics.pm
Original file line number Diff line number Diff line change
Expand Up @@ -134,18 +134,22 @@ sub getGraphicsSourceTypes {
sub findGraphicFile {
my ($self, $doc, $node) = @_;
if (my $source = $node->getAttribute('graphic')) {
# Find all acceptable image files, in order of search paths
my ($dir, $name, $reqtype) = pathname_split($source);
# Ignore the requested type? Or should it increase desirability?
#
# If this is *NOT* a known graphics type, it would be best to treat it as
# a name suffix (e.g. name.1 from name.1.png)
if ((length($reqtype) > 0) && !(grep { $_ eq lc($reqtype) } $$self{graphics_types})) {
$name .= ".$reqtype"; }
my $file = pathname_concat($dir, $name);
my @paths = pathname_findall($file, paths => $LaTeXML::Post::Graphics::SEARCHPATHS,
# accept empty type, incase bad type name, but actual file's content is known type.
types => ['', $self->getGraphicsSourceTypes]);
# if we already have a usable candidate, save ourselves the work
my @paths = grep { -e $_ } split(',', $node->getAttribute('candidates') || '');
if (!scalar(@paths)) {
# Find all acceptable image files, in order of search paths
my ($dir, $name, $reqtype) = pathname_split($source);
# Ignore the requested type? Or should it increase desirability?
#
# If this is *NOT* a known graphics type, it would be best to treat it as
# a name suffix (e.g. name.1 from name.1.png)
if ((length($reqtype) > 0) && !(grep { $_ eq lc($reqtype) } $$self{graphics_types})) {
$name .= ".$reqtype"; }
my $file = pathname_concat($dir, $name);
@paths = pathname_findall($file, paths => $LaTeXML::Post::Graphics::SEARCHPATHS,
# accept empty type, incase bad type name, but actual file's content is known type.
types => ['', $self->getGraphicsSourceTypes]);
}
my ($best, $bestpath) = (-1, undef);
# Now, find the first image that is either the correct type,
# or has the most desirable type mapping
Expand Down
5 changes: 5 additions & 0 deletions lib/LaTeXML/Util/Image.pm
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,11 @@ sub image_candidates {
$path =~ s/^("+)(.+)\g1$/$2/; # unwrap if in quotes
my $searchpaths = $STATE->lookupValue('GRAPHICSPATHS');
my @candidates = pathname_findall($path, types => ['*'], paths => $searchpaths);
if (!@candidates) {
# if we have no candidates, also consult kpsewhich,
# e.g. for "example-image-a"
if (my $kpse_found = pathname_kpsewhich("$path.png", "$path.pdf")) {
@candidates = ($kpse_found); } }
if (my $base = $STATE->lookupValue('SOURCEDIRECTORY')) {
@candidates = map { pathname_relative($_, $base) } @candidates; }
return ($path, @candidates); }
Expand Down
Loading

0 comments on commit 58389a8

Please sign in to comment.