From a881a92d543aa154e9e9a7c9c5a109789962ec32 Mon Sep 17 00:00:00 2001 From: Vincenzo Mantova Date: Sun, 7 Jan 2024 19:39:30 +0000 Subject: [PATCH] initial implementation of -recorder (for latexmlpost) --- bin/latexmlpost | 23 +++++++++++++++++------ lib/LaTeXML/Common/XML.pm | 1 + lib/LaTeXML/Common/XML/XSLT.pm | 11 +++++++++++ lib/LaTeXML/Post/MakeBibliography.pm | 2 ++ lib/LaTeXML/Post/Writer.pm | 2 ++ lib/LaTeXML/Post/XSLT.pm | 11 ++++++++++- 6 files changed, 43 insertions(+), 7 deletions(-) diff --git a/bin/latexmlpost b/bin/latexmlpost index c1e0696839..21413284aa 100755 --- a/bin/latexmlpost +++ b/bin/latexmlpost @@ -30,7 +30,7 @@ use LaTeXML::Util::ObjectDB; #====================================================================== # undef => unspecified; 0 = NO, 1 = YES my ($help, $showversion, $verbosity, $validate, $omit_doctype) = (0, 0, 0, 1, 0); -my ($sourcedir, $destination, $logfile) = (undef, undef, undef); +my ($sourcedir, $destination, $logfile, $recorder, $recordfile) = (undef, undef, undef, undef, undef); my @paths = (); my ($format, $extension, $is_html, $urlstyle) = (undef, undef, undef, 'server'); my ($numbersections) = (1); @@ -58,11 +58,13 @@ my $splitback = [qw(bibliography appendix index)]; # Get the command line arguments. GetOptions("quiet" => sub { $verbosity--; }, - "verbose" => sub { $verbosity++; }, - "VERSION" => \$showversion, - "help|?" => \$help, - "log=s" => \$logfile, - "debug=s" => sub { no strict 'refs'; $LaTeXML::DEBUG{ lc($_[1]) } = 1; }, + "verbose" => sub { $verbosity++; }, + "VERSION" => \$showversion, + "help|?" => \$help, + "log=s" => \$logfile, + "recorder" => \$recorder, + "recordfile=s" => \$recordfile, + "debug=s" => sub { no strict 'refs'; $LaTeXML::DEBUG{ lc($_[1]) } = 1; }, # Source Options "sourcedirectory=s" => \$sourcedir, "validate!" => \$validate, @@ -165,6 +167,7 @@ if (my @fails = validate_args()) { my $starttime = StartTime(); my $latexmlpost; eval { # Catch errors + UseRecordFile($recordfile); UseLog($logfile); Note($LaTeXML::IDENTITY . ($prescan ? " scanning " : " paginating ") . $xmlfile); @@ -214,6 +217,7 @@ Note("Postprocessing " . ($code == 3 ? 'failed' : 'complete') . " " . $status . UseLog(undef); CheckDebuggable(); UseSTDERR(undef); +UseRecordFile(undef); exit($code == 3 ? 1 : 0); #====================================================================== @@ -247,6 +251,13 @@ sub validate_args { $logfile = pathname_make(dir => pathname_cwd(), name => $name, type => 'latexmlpost.log'); } } $logfile = 'latexmlpost.log' unless $logfile; + # Optionally create a .fls file for tracking the opening of files (option --recorder) + # Use $jobname.latexmlpost.fls, latexmlpost.fls, or argument of --recordfile + $recordfile = $recorder ? $recordfile : undef; + if ($pathname && $recorder && !$recordfile) { + my ($dir, $name, $ext) = pathname_split($pathname); + $recordfile = $name ? pathname_make(dir => pathname_cwd(), name => $name, type => 'latexmlpost.fls') : 'latexmlpost.fls'; } + #====================================================================== # Sanity check and Completion of options. #====================================================================== diff --git a/lib/LaTeXML/Common/XML.pm b/lib/LaTeXML/Common/XML.pm index 770ed6e24a..114fd35242 100644 --- a/lib/LaTeXML/Common/XML.pm +++ b/lib/LaTeXML/Common/XML.pm @@ -228,6 +228,7 @@ sub initialize_catalogs { return if $catalogs_initialized; $catalogs_initialized = 1; foreach my $catalog (pathname_findall('LaTeXML.catalog', installation_subdir => '.')) { + RecordInput($catalog); XML::LibXML->load_catalog($catalog); } return; } diff --git a/lib/LaTeXML/Common/XML/XSLT.pm b/lib/LaTeXML/Common/XML/XSLT.pm index a0f7cfda95..35b0ae6ab8 100644 --- a/lib/LaTeXML/Common/XML/XSLT.pm +++ b/lib/LaTeXML/Common/XML/XSLT.pm @@ -12,6 +12,9 @@ package LaTeXML::Common::XML::XSLT; use strict; use warnings; +use LaTeXML::Common::Error; +use URI; +use URI::Escape; use XML::LibXSLT; sub new { @@ -20,6 +23,10 @@ sub new { LaTeXML::Common::XML::initialize_catalogs(); # LaTeXML::Common::XML::initialize_input_callbacks($xslt,installation_subdir => 'resources/XSLT'); # Do we still need this logic, if callbacks work? + my $input_callbacks = XML::LibXML::InputCallback->new(); + $input_callbacks->register_callbacks([sub { RecordInput(uri_unescape(URI->new($_[0]))->path); return 0; }, undef, undef, undef]); + $xslt->input_callbacks($input_callbacks); + if (!ref $stylesheet) { $stylesheet = LaTeXML::Common::XML::Parser->new()->parseFile($stylesheet); } # $stylesheet = $xslt->parse_stylesheet_file($stylesheet); } @@ -31,5 +38,9 @@ sub transform { my ($self, $document, %params) = @_; return $$self{stylesheet}->transform($document, %params); } +sub security_callbacks { + my ($self, $security) = @_; + return $$self{stylesheet}->security_callbacks($security); } + #====================================================================== 1; diff --git a/lib/LaTeXML/Post/MakeBibliography.pm b/lib/LaTeXML/Post/MakeBibliography.pm index e01b19bc05..8ece7ef3d0 100644 --- a/lib/LaTeXML/Post/MakeBibliography.pm +++ b/lib/LaTeXML/Post/MakeBibliography.pm @@ -13,6 +13,7 @@ package LaTeXML::Post::MakeBibliography; use strict; use warnings; use LaTeXML::Util::Pathname; +use LaTeXML::Common::Error; use LaTeXML::Common::XML; use LaTeXML::Util::Radix; use charnames qw(:full); @@ -155,6 +156,7 @@ sub getBibliographies { $raw .= $rawbib; } else { # TODO: Is this a memory concern for large bib files? + RecordInput($rawbib); if (open(my $bibfh, '<', $rawbib)) { $raw .= join("", <$bibfh>); close $bibfh; } diff --git a/lib/LaTeXML/Post/Writer.pm b/lib/LaTeXML/Post/Writer.pm index da29b7ee8d..4f6203af24 100644 --- a/lib/LaTeXML/Post/Writer.pm +++ b/lib/LaTeXML/Post/Writer.pm @@ -14,6 +14,7 @@ package LaTeXML::Post::Writer; use strict; use warnings; use LaTeXML::Util::Pathname; +use LaTeXML::Common::Error; use LaTeXML::Common::XML; use LaTeXML::Post; use base qw(LaTeXML::Post::Processor); @@ -54,6 +55,7 @@ sub process { or return Fatal('I/O', $destdir, undef, "Couldn't create directory '$destdir'", "Response was: $!"); my $OUT; + RecordOutput($destination); open($OUT, '>', $destination) or return Fatal('I/O', $destdir, undef, "Couldn't write '$destination'", "Response was: $!"); print $OUT $serialized; diff --git a/lib/LaTeXML/Post/XSLT.pm b/lib/LaTeXML/Post/XSLT.pm index 385269cd88..1bd43b7dcc 100644 --- a/lib/LaTeXML/Post/XSLT.pm +++ b/lib/LaTeXML/Post/XSLT.pm @@ -76,6 +76,12 @@ sub process { if (my $icon = $params{ICON}) { # Hmm.... what type? could be various image types $params{ICON} = '"' . $self->copyResource($doc, $icon, undef) . '"'; } + + my $security = XML::LibXSLT::Security->new; + $security->register_callback(write_file => sub { RecordOutput($_[1]); return 1; }); + $security->register_callback(create_dir => sub { RecordOutput($_[1]); return 1; }); + $$self{stylesheet}->security_callbacks($security); + my $newdoc = $doc->new($$self{stylesheet}->transform($doc->getDocument, %params)); return $newdoc; } @@ -115,7 +121,10 @@ sub copyResource { if (!pathname_is_contained($dest, $doc->getSiteDirectory)) { $dest = pathname_make(dir => $doc->getSiteDirectory, name => $name, type => $ex); } } # Now, copy (unless in same place! happens a lot during testing!!!!) - pathname_copy($path, $dest) unless $path eq $dest; + if (!($path eq $dest)) { + RecordInput($path); + pathname_copy($path, $dest); + RecordOutput($dest); } # and return the relative path from the dest doc to the resource return pathname_relative($dest, $doc->getDestinationDirectory); } else {