-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtexexpand.pin
executable file
·1334 lines (1223 loc) · 43.7 KB
/
texexpand.pin
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
#- -*- perl -*- header inserted automatically
# $Id: texexpand.pin,v 1.12 2004/01/02 08:08:34 RRM Exp $
#
# texexpand for LaTeX2HTML v2K
# Based on texexpand by Robert Thau, MIT AI lab, including modifications by
# Franz Vojik <[email protected]>
# Nikos Drakos <[email protected]>
# Sebastian Rahtz <[email protected]>
# Maximilian Ott <[email protected]>
# Martin Boyer
# Herbert Swan
# Jens Lippmann
# Recognizes \documentclass, \documentstyle, \usepackage, \RequirePackage,
# \begin{verbatim}...\end{verbatim}, \begin{lstlisting}...\end{lstlisting},
# \begin{minted}...\end{minted},
# %begin{latexonly}...%end{latexonly}, \begin{latexonly}...\end{latexonly},
# \input, \include, \verb, \lstinline, \mintinline, \mint, \latex
# \endinput, \end{document}
# \includecomment, \excludecomment
# \begin{"to exclude"}, \end{"to exclude"}
# %begin{"to exclude"}, %end{"to exclude"}
###############################################################################
# Notes:
#
# General translation mechanism:
#
#
# The main program latex2html calls texexpand with the document name
# in order to expand some of its \input and \include statements, here
# also called 'merging', and to write a list of sensitized style, class,
# input, or include file names.
# When texexpand has finished, all is contained in one file, TMP_foo.
# (assumed foo.tex is the name of the document to translate).
#
# In this version, texexpand cares for following environments
# that may span include files / section boundaries:
# a) \begin{comment}
# b) %begin{comment}
# c) \begin{any} introduced with \excludecomment
# d) %begin{any}
# e) \begin{verbatim}
# f) \begin{lstlisting}
# g) \begin{minted}
# h) \begin{latexonly}
# i) %begin{latexonly}
#
# a)-d) cause texexpand to drop its contents, it will not show up in the
# output file. You can use this to 'comment out' a bunch of files, say.
#
# e)-g) prevent texexpand from expanding input files, but the environment
# content goes fully into the output file.
#
# Together with each merging of \input etc. there are so-called %%%texexpand
# markers accompanying the boundary.
#
# When latex2html reads in the output file, it uses these markers to write
# each part to a separate file, and process them further.
#
#
#
# Detailed technical notes:
#
# 1. %begin{latexonly} and %end{latexonly} have to be on a separate line.
# Anything between these tags (including the tags) is discarded.
# 2. \begin{latexonly} and \end{latexonly} have to be on a separate line.
# Anything between these tags (including the tags) is not expanded.
# 3. [%\]begin{"to exclude"} and [%\]end{"to exclude"} have to be on a
# separate line.
# Anything between these tags (including the tags) is discarded.
# 4. \begin{verbatim/verbatim*/lstlisting/minted} and
# \end{verbatim/verbatim*/lstlisting/minted} have to be on a separate line.
# Anything between these tags (including the tags) is not expanded.
# 5. The scope of any such tags may extend over several files.
# The opening tag for latexonly may occur on a different include level
# than the closing tag.
# The opening tag for verbatim/lstlisting/minted/"to exclude" must occur
# within the same file than the closing tag.
# 6. Warnings are printed when the document has been parsed and open
# tags remain.
# 7. When in a "to exclude"/verbatim/lstlisting/minted environment, texexpand
# won't recognize ANY command except the corresponding closing tag.
# There cannot be any nested constructions.
# This behaviour is identical to that of LaTeX.
# 8. \begin{latexonly},\end{latexonly} may be nested, whereas
# %begin{latexonly},%end{latexonly} may not be nested.
# 9. A "%" tag cannot close a "\" tag, and vice versa.
# 10. Every \document(class|style), \usepackage, \input and \include command
# has to be on a separate line.
# 11. Everything behind a `%' that isn't preceded by a `\' is regarded as
# a comment, i.e. it is printed but not interpreted.
# 12. If any command listed in 10. is preceded by an occurrence of
# `\verb', `\lstinline', `\mintinline', `\mint', `\latex'
# then it is NOT interpreted.
# This crashes on lines like this:
# blah blah \verb+foo foo+ \input{bar} % bar won't be loaded!
# 13. Packages provided via \usepackage are handled the same way as
# `options' in \document(class|style), i.e. they are included when
# -auto_exclude is off, the package isn't in @dont_include *OR* the
# package is in @do_include (new). They are added to the style file
# together with their options if the file itself hasn't been merged.
# \documentclass[options]{class} searches for every option.clo,
# \documentstyle[options]{style} searches for every option.sty.
# \usepackage[options]{packages} searches for every package.sty.
# 14. Each texinputs directory is searched for input files/styles. If it
# ends in `//', the whole subdirectory tree is searched.
# 15. \input / \include merge the given file (if found under the given
# name or with .tex extension) if its basename is in @do_include or if it
# isn't in @dont_include or if the given filename doesn't end in
# .sty/.clo/.cls when -auto_exclude is set.
#
###############################################################################
# History:
# mro = Marek Rouchal <[email protected]>
# jcl = Jens Lippmann <[email protected]>
#
# $Log: texexpand.pin,v $
# Revision 1.12 2004/01/02 08:08:34 RRM
# -- include support for -out <outfile> switch to avoid incompatibility
# when POSIXLY_CORRECT is set.
# Thanks to Juhapekka Tolvanen <[email protected]> for the problem report.
#
# Revision 1.11 2000/08/23 04:09:05 RRM
# -- fixed typo using $latexonlyenv instead of $latexonlytype
# -- keep $mute=0 for fake-env inside $latexonly envs.
# -- use \n instead of ',' as delimiter for STYLES lising,
# with LaTeX-2e documents, starting with \documentclass
#
# Revision 1.10 1999/11/03 11:29:50 RRM
# -- recoded $ignore_cmd_rx , thanks Achim Haertel for reporting problem
#
# Revision 1.9 1999/10/06 22:04:13 MRO
#
# -- texexpand: latex2html calls texexpand with the -out option instead of
# output redirection: this is safer on non-UNIX platforms
# -- pstoimg: now there's no default cropping (useful for standalone
# conversions). latex2html was changes appropriately
# -- minor cleanups in latex2html script and documentation
#
# Revision 1.8 1999/10/03 18:40:42 MRO
#
# -- some cleanups for beta2
# -- "make check" now checks all Perl code
#
# Revision 1.7 1999/09/16 11:27:01 RRM
# -- $keepcomments environments do not need to start at the beginning
# of the line
# -- %begin{latexonly} and $fakeenv environments are now correctly
# handled inside $keepcomments environments.
#
# Revision 1.6 1999/06/24 07:28:59 MRO
#
#
# -- removed L2HMODULE
# -- fixed processing of -info switch
# -- changed option order for dvips on win32 (thanks JCL)
# -- bumped version to 99.2a8
#
# Revision 1.5 1999/06/10 23:00:00 MRO
#
#
# -- fixed an artifact in the *ball icons
# -- cleanups
# -- option documentation added
# -- fixed bug in color perl (determining path to rgb/crayola)
#
# Revision 1.4 1999/06/02 12:11:23 RRM
# -- the option 'style_file' should be 'save_styles' ; fixed.
# -- extended $ignore_cmd_rx to ignore \input commands that are contained
# within conditional TeX code; (e.g. in macro definitions)
# -- ignore \usepackage commands in brackets; e.g. [\usepackage]
#
# Revision 1.3 1999/05/31 07:49:04 MRO
#
#
# - a lot of cleanups wrt. OS/2
# - make test now available (TEST.BAT on Win32, TEST.CMD on OS/2)
# - re-inserted L2HCONFIG environment
# - added some new subs to L2hos (path2os, path2URL, Cwd)
#
# Revision 1.2 1999/05/17 21:31:00 MRO
#
#
# -- make texexpand warning-free and start making it use strict
# compliant
#
# Revision 1.1 1999/05/11 06:10:02 MRO
#
#
# - merged config stuff, did first tries on Linux. Simple document
# passes! More test required, have to ger rid of Warnings in texexpand
#
# Revision 1.30 1999/04/09 18:09:21 JCL
# changed my e-Mail address
#
# Revision 1.29 1998/12/02 07:23:35 RRM
# -- closedir(SUBDIR) instead of close(SUBDIR) ; thanks Marek Bukowy
# else can run out of filehandles
#
# Revision 1.28 1998/08/14 09:35:21 RRM
# -- allow the arguments and options to \documentclass (style)
# and \usepackage commands to extend over several lines
#
# Revision 1.27 1998/07/03 11:44:54 RRM
# -- ignore $keepcomments environments when $latexonly
#
# Revision 1.26 1998/06/26 08:16:46 RRM
# -- quoted $dd for the sake of Win95 and DOS
#
# Revision 1.25 1998/05/14 13:34:11 latex2html
# texexpand for V98.2
#
# -- reordered some of the early code to use the $TEXINPUTS variable
# rather than $ENV{'TEXINPUTS'}
# -- LaTeX2HTML passes its value via the command-line
# -- Web2C should *not* be used
# -- there is no searching along paths for TeX, just for LaTeX2HTML
#
# Revision 1.24 1998/05/09 05:34:13 latex2html
# -- removed local customisation, sorry
# -- removed the old/commented call to use Override.pm
#
# Revision 1.23 1998/05/09 05:29:54 latex2html
# -- cosmetic changes to $debug messages
# -- removed duplicated path-searching
# -- fixed error whereby full path-names got lost
# -- experimented with the Web2C options
# Are these actually useful ?
#
# Revision 1.22 1998/04/28 11:53:08 latex2html
# implemented Fabrice Popineau's changes for Win32 compatibility
#
# -- more functions defined in Override.pm
# -- checks for kpsewhich and Web2C
#
# Revision 1.21 1998/02/19 22:26:49 latex2html
# th-darmstadt -> tu-darmstadt
#
# Revision 1.20 1997/12/04 07:35:25 RRM
# -- include a use lib command, to find the Override.pm module
# -- generalised pattern for matching verbatim-like environments
#
# Revision 1.19 1997/11/05 11:31:27 RRM
# -- changed the way Override.pm is called; this should work better.
#
# Revision 1.18 1997/10/14 16:28:16 JCL
# o added command line option -unsegment and $UNSEGMENT
# Use latex2html -unsegment, or texexpand -unsegment, or set $UNSEGMENT to 1
# in latex2html.config.
#
# Revision 1.17 1997/10/10 10:40:07 RRM
# -- Oops, didn't quite get that right last time.
#
# Revision 1.15 1997/10/09 07:11:14 RRM
# -- temporary fix to the Override problem
#
# Revision 1.14 1997/10/06 16:02:29 UW
# override.pm contains now unlink() too. Adapted the call to override.pm
# accordingly
#
# Revision 1.13 1997/10/06 14:49:37 UW
# Added support for override.pm to texepand.
# Furthermore, all references to the path-delimiter ':'
# should now be made via $envkey
# Texepand used previously the variable $DS as directory delimiter. Since
# all other modules use $dd, I changed $DS to $dd.
#
# Revision 1.12 1997/09/27 10:36:14 JCL
# o several enhancements to the inline documentation
# o small fix to &interprete, \input|include now doesn't loose the comment
# if merging fails
# o introduced -no_segments switch (or set shell variable $NO_SEGMENTS to 1):
# This will force a segmented document to expand its segment files, so
# that it may be processed as a whole with LaTeX2HTML.
# Use this feature to test a segmented document or whenever a document
# needs to be fully expanded.
# XtractFAQ will need this feature to determine the FAQ entries.
#
# Revision 1.11 1997/06/15 18:26:00 JCL
# Now texexpand will only merge files that exist *and* are readable.
# (Trying to merge a void link caused it to crash on my site.)
#
# Revision 1.10 1997/06/06 14:13:54 RRM
# This is the texexpand for V97.1.
#
# only dofference is that it is quieter under -debug .
# use -verbosity <num> as well, to get all the previous messages,
# when <num> is at least 2.
#
# Revision 1.9 1997/03/24 12:26:15 RRM
# Implemented a new class of environments: $keepcomments .
# This allows environments of TeX-like code to be preserved verbatim,
# and passed to LaTeX for processing: e.g. picture, makeimage, xy etc.
# Also, fixed the bug which loses any code on the same line as, but preceding
# an \input or \include command.
#
# Revision 1.8 1997/03/03 20:35:42 JCL
# added some comments
#
# Revision 1.7 1996/12/21 20:30:00 JCL
# - small changes to get verbatim parsed separately from verbatim*
# - provided expand test for regression suite
# - bound diagnostic status messages to debug level
#
# texexpand is operational
#
# Revision 1.6 1996/12/20 20:27:08 JCL
# fixed severe bug with my $DS variable :-[
#
# Revision 1.5 1996/12/20 18:51:54 JCL
# *** empty log message ***
#
# Revision 1.4 1996/12/20 01:29:39 JCL
# Moved initialisation tokens for @dont_include to latex2html.config,
# to have a more central place to control them.
#
# Revision 1.3 1996/12/18 04:36:58 JCL
# substantial changes to allow for environments grouping several files
# o chunked code into more functions
# o revised documentation
# o designed new parsing logic
# o introduced parsing of \includecomment, \excludecomment to care
# for self-defined comment environments
# o handles default "comment" environment as known from html.sty
# o and much more (see comments)
#
#
# V96.2a6 Fixed bug in recursive directory search for texinputs. Thanks to
# Marcus Harnisch <[email protected]> for reporting the bug.
# Included possibility of adding extensions to $TEXE_DONT_INCLUDE
# e.g. '.psfig', so that all files ending in .psfig won't be
# \input or \include 'ed. Same for $TEXE_DO_INCLUDE. Added `o'
# option to some regexps.
# -------
# V96.2a5 Followed suggestions by Jens Lippmann regarding file inclusion
# logic. Added \RequirePackage. Some minor changes.
# -------
# V96.2a4 Fixed severe bugs in comments regexp and usepackage logic.
# Thanks to Ross Moore <[email protected]> for reporting them.
# Added support for LaTeX2e .clo filename extension (see 7. above)
# Cleaned up some code, added more comments
# Added command line option -do_include
# -------
# V96.2a3 Fixed bugs & typos
# -------
# V96.2a2 Following suggestions made by
# Jens Lippmann <[email protected]>
# Added recursive directory search for include files.
# Added @do_include: Forces inclusion of packages (when found)
# Some bug fixes
# -------
# V96.2a1 released Thu Oct 24 16:51:36 MET 1996
# -------
# 21-NOV-96 mro
# Almost complete rewrite by Marek Rouchal <[email protected]>
#
###############################################################################
use vars qw($LATEX2HTMLDIR $SCRIPT);
#- the (texlive) wrapper sets these values
#- or it is stored in the enviroment
#unless @wrapper@ || @texlive@
BEGIN {
# print STDERR "scanning for l2hdir\n";
if($ENV{LATEX2HTMLDIR}) {
$LATEX2HTMLDIR = $ENV{LATEX2HTMLDIR};
} else {
$ENV{LATEX2HTMLDIR} = $LATEX2HTMLDIR = '@LATEX2HTMLDIR@';
}
if(-d $LATEX2HTMLDIR) {
push(@INC,$LATEX2HTMLDIR);
} else {
die qq{Fatal: Directory "$LATEX2HTMLDIR" does not exist.\n};
}
}
#fi
use L2hos;
my $RELEASE = '@distver@';
my $VERSION = '@release_date@';
my $envkey = L2hos->pathd();
# $dd is the directory delimiter character
my $dd = L2hos->dd();
my $prompt = "\ntexexpand:";
# Initialize styles to be excluded (if any).
# This is a sanity setup in case the \d is garbled during shell
# variable handling.
# The initialisation really comes from latex2html.config.
my @dont_include = ('\d+pt');
# These are the extensions to be auto-excluded
my $dont_include_ext_rx = 'sty|cls|clo';
if($ENV{'TEXE_DONT_INCLUDE'}) {
&process_dont_include(split(/$envkey/,$ENV{'TEXE_DONT_INCLUDE'}));
}
# Initialize styles to be included (if any). This overrides @dont_include
# These are the extensions to be auto-included
my $do_include_ext_rx = '';
if($ENV{'TEXE_DO_INCLUDE'}) {
&process_do_include(split(/$envkey/,$ENV{'TEXE_DO_INCLUDE'}));
}
# Parse arguments
use Getopt::Long;
my %opt = ();
unless(GetOptions(\%opt, qw(-help -version -debug -verbose -w
-do_include=s@ -dont_include=s@ -auto_exclude -unsegment
-save_styles=s -texinputs=s@ -output=s -out=s))) {
die "$prompt Error: Invalid option(s) specified.\n";
}
if($opt{help}) {
print STDERR "-- to be implemented --\n";
exit 0;
}
&banner();
if($opt{version}) {
exit 0;
}
my $debug = $opt{debug} || 0; # no debug by default
$debug = 2 if($opt{verbose});
if($opt{dont_include} && @{$opt{dont_include}}) {
&process_dont_include(@{$opt{dont_include}});
}
if($opt{do_include} && @{$opt{do_include}}) {
&process_do_include(@{$opt{do_include}});
}
my $TEXINPUTS = '';
if(@{$opt{texinputs}}) {
$TEXINPUTS = join($envkey, @{$opt{texinputs}});
}
unless(@ARGV) {
die "$prompt Error: No input file specified.\n";
}
my $infile = shift(@ARGV);
if(@ARGV) {
die "$prompt Error: More than one input file specified.\n";
}
#FP: Web2C does not use @texinputs at all
# moreover, it uses kpsewhich to find files, so no need to
# bother with @texinputs
# $Web2C = &find_executable('kpsewhich',$ENV{'PATH'});
#RRM: I don't think it is a good idea to use kpsewhich this way
my $Web2C = '';
# Initialize texinputs
my @texinputs = qw(.);
if($TEXINPUTS) {
my $dir;
foreach $dir (split(/$envkey/, $TEXINPUTS)) {
push (@texinputs, $dir)
if(($dir =~ /\S+/) && ($dir ne '.')); # save only if non-empty
}
}
## Ignore the environment
# if((!$TEXINPUTS)&&(defined $ENV{'TEXINPUTS'})) {
# foreach $dir (split(/$envkey/,$ENV{'TEXINPUTS'})) {
# push (@texinputs, $dir)
# if (($dir =~ /\S+/)&&($dir ne '.')); # save only if non-empty
# }
# }
## Expand paths with `~'
# $homeDir = (getpwuid($<))[7];
# grep(s|^~$dd|$homeDir$dd|, @texinputs);
# grep((m|^~([^$dd]+)$dd|) &&
# ($homeDir = (getpwnam($1))[7]) && (s||$homeDir$dd|), @texinputs);
&initialise;
&main;
exit(0);
sub banner {
print STDERR "texexpand V$RELEASE ($VERSION)\n";
}
sub initialise {
# Create generic regexp's:
# If this matches before a command, the command is ignored.
$ignore_cmd_rx =
# '(\\latex\W|\\verb|\\lstinline|\\mintinline|\\mint|\\expandafter|\\ifx|\\else\W|[\|\[\@]$)';
"(\\\\latex\\W|\\\\verb|\\\\lstinline|\\\\mintinline|\\\\mint|\\\\expandafter|\\\\ifx|\\\\else\\W|[\\|\\[\\@]\$)";
# This matches a square bracket pair (typically an option list).
$options_rx = '(\[[^\]]*\]|)';
# This matches a single argument.
$arg_rx = '\{([^\}]*)\}';
$fakeenv_rx = '(comment)';
$keepcomments_rx = '(picture|makeimage|xy|diagram)';
# Print environments
my $dir;
if ($debug) {
print STDERR "$prompt LaTeX2HTML inputs are in:";
# foreach $dir (@texinputs) { print STDERR "$prompt $dir"; }
if ($Web2C) {
print STDERR "$prompt " . `kpsewhich -expand-var \$TEXINPUTS` ;
#RRM: I cannot make this work, to replace the `...` in the line above
# local($kpse) = "kpsewhich -expand-var=\$TEXINPUTS";
# print STDERR "$prompt $kpse";
# $kpse = system($kpse);
# print STDERR "$prompt $kpse";
} else {
foreach $dir (@texinputs) { print STDERR "$prompt $dir"; }
}
if ($debug>1) {
print STDERR "\n$prompt Special names (not to be input or included):";
foreach $name (@dont_include) { print STDERR "$prompt $name"; }
print STDERR "\n$prompt Extensions of files not to be input or included: "
. "$dont_include_ext_rx";
print STDERR "\n$prompt Special names (to *be* input or included):";
foreach $name (@do_include) { print STDERR "$prompt $name"; }
print STDERR "\n$prompt Extensions of files to *be* input or included: "
. "$do_include_ext_rx\n";
}
}
print STDERR "\n$prompt %--- Expanding $infile" if ($debug>1);
}
sub main {
# Note that verbatim/lstlisting/minted/latexonly may split over different files!
# $verbatim is 1 if inside a verbatim environment,
# $latexonly is > 0 if inside latexonly environments
# $includelevel indicates the depth of include/input
local($includelevel) = 0;
local($verbatim,$verbatimname) = (0,"");
local($latexonly,$latexonlytype) = (0,"");
local($fakeenv,$fakeenvname,$fakeenvtype) = (0,"","");
local($keepcomments,$keepcommentsname) = (0,"");
local($active,$mute) = (1,0);
# Main procedure
$dont_include_rx = join("|",@dont_include);
$do_include_rx = join("|",@do_include);
if($opt{save_styles}) {
open(STYLES,">$opt{save_styles}")
|| die "$prompt Error: Cannot open style file '$opt{save_styles}': $!\n";
binmode STYLES;
}
my $out_file = $opt{output}||$opt{out};
if($out_file) {
open(OUT,">$out_file")
|| die "$prompt Error: Cannot open output file '$out_file': $!\n";
}
else {
open(OUT,">&STDOUT");
}
binmode OUT;
&process_file($infile); # the workhorse...
close(OUT) if $out_file;
close(STYLES) if ($opt{save_styles});
print STDERR "$prompt Warning: No ${latexonlytype}end\{latexonly\} found."
if ($latexonly);
print STDERR "$prompt Warning: No ${fakeenvtype}end\{$fakeenvname\} found."
if ($fakeenv);
print STDERR "$prompt Warning: No \\end\{$keepcommentsname\} found."
if ($keepcomments);
print STDERR "$prompt Warning: No \\end{verbatim} found."
if ($verbatim);
}
# Include and parse a file.
# This routine is recursive, see also &process_input_include_file,
# &process_document_header, and &process_package_cmd.
#
# Two global flags control the states of texexpand.
# o $active is true if we should interprete the lines to expand
# files, check for packages, etc.
# o $mute is true if we should prevent the lines from going
# into the out file.
#
# We have three general states of texexpand:
# 1) interprete the lines and pass them to the out file
# This is the normal case.
# Corresponding: $active true, $mute false
# 2) interprete minimal and suppress them
# This is when parsing inside a comment environment, which
# also would retain its body from LaTeX.
# => $active false, $mute true
# 3) interprete minimal and pass the lines to the out file
# This is inside a verbatim, lstlisting, minted or latexonly environment.
# The line of course must be at least interpreted to
# determine the closing tag.
# => $active false, $mute false
#
# Any environment may extend over several include files.
# Any environment except verbatim, lstlisting, minted and latexonly may have its
# opening or closing tag on different input levels.
# The comment and verbatim/lstlisting/minted environments cannot be nested, as
# is with LaTeX.
# We must at least parse verbatim/lstlisting/minted/comment environments in
# latexonly environments, to catch fake latexonly tags.
#
# The work scheme:
# Five functions influence texexpand's behavior.
# o &process_file opens the given file and parses the non-comment part in
# order to set $active and $mute (see above).
# It calls &interprete to interprete the non-comment content and either
# continues with the next line of its file or terminates if &interprete
# detected the \end{document} or an \endinput.
# o &interprete handles some LaTeX tags with respect to the three states
# controlled by $active and $mute.
# Regarding to \input|include, \document(class|style), and
# \(use|Require)package the functions &process_input_include_file,
# &process_document_header, and &process_package_cmd are called respectively.
# o These three functions check if the file name or option files are enabled
# or disabled for merging (via TEXE_DO_INCLUDE or TEXE_DONT_INCLUDE).
# Any file that is to include will be 'merged' into the current file, i.e.
# the function &process_file is called at this place in time (recursively).
# This will stop interpretation at the current line in file, start with the
# new file to process and continues with the next line as soon as the new
# file is interpreted to its end.
#
# The call tree (noweb+xy.sty would be handy here):
#
# main
# |
# v
# +->process_file
# | |
# | v
# | interprete (with respect to the current line, one of that three)
# | | | |
# | v v v
# | process_input_include_file process_document_header process_package_cmd
# | | | |
# | v v v
# +----+---------------------------+------------------------+
#
# Bugs:
# o Since the latexonly environment is not parsed, its contents
# might introduce environments which are not recognized.
# o The closing tag for latexonly is not found if hidden inside
# an input file.
# o One environment tag per line, yet!
# o If I would have to design test cases for this beast I would
# immediately desintegrate into a logic cloud.
#
# Notes:
# o Ok, I designed test cases for it.
# Please refer to test 'expand' of the regression test suite
# in the developers' module of the l2h repository.
# o -unsegment feature:
# In this (rare) case, the user wants to translate a segmented document
# not in segments but in a whole (for testing, say).
# We enable this by recognizing the \segment command in &interprete,
# causing the segment file to be treated like \input but loosing the first
# lines prior to \startdocument (incl.), as controlled via $segmentfile.
# On how to segment a document you are best guided by section
# ``Document Segmentation'' of the LaTeX2HTML manual.
#
sub process_file {
my ($infile, $cmd) = @_;
# $infile is filename, $cmd is \input, \include or undef
local(*IN);
local($comments,$before,$orig);
# Keep track of input/include level
$includelevel++;
open(IN,"<$infile") || die "$prompt Cannot open $infile\n";
binmode IN;
print STDERR "$prompt %--- Processing $infile" if ($debug > 1);
# if we don't include this file marker LaTeX2HTML won't split
# the document at this point
print OUT "%%% TEXEXPAND: $cmd FILE MARKER $infile\n"
if ($includelevel > 1 && $active);
if ($segmentfile) {
# This variable is set by &interprete to change the behavior of the
# next file to merge.
while(<IN>) {
# strip comments
s/(^|[^\\])(\\\\)*(%.*)/$comments = $3; $1.$2/e;
last if /^\s*\\startdocument/;
}
$segmentfile = 0;
}
while(<IN>) {
#for debugging
$orig = $_;
# lift comments from line
$comments = "";
if ($keepcomments) { $comments = '' }
else {
s/(^|[^\\])((?:\\\\)*)(%.*)/$comments = $3; $1.$2/e
}
# Deal with latexonly environment(s)
# begin/end tags must be on single line
if (!$fakeenv && !$verbatim && !$latexonly && (
($comments =~ /%\s*begin\s*\{\s*latexonly\s*\}/)||
($keepcomments && /%\s*begin\s*\{\s*latexonly\s*\}/))) {
# A comment latexonly environment. May not be nested.
$latexonly = 1;
$latexonlytype = "%";
$active = 0;
$mute=1;
}
elsif (!$fakeenv && !$verbatim &&
(!$latexonly || $latexonlytype eq "\\") &&
/^\s*\\begin\s*\{\s*latexonly\s*\}/) {
# A latexonly environment. LaTeX types may be nested,
# but discard them as long as we are in a latexonly
# comment part.
# We definitely don't like to push the "\\", "%" types
# onto a stack to keep track of them in alternating types.
# On the other hand we won't allow for a comment type
# part to close a LaTeX environment, eg.
$latexonly++;
$latexonlytype = "\\";
$active = 0;
}
elsif (!$fakeenv && !$verbatim && (
($comments =~ /%\s*begin\s*\{\s*$fakeenv_rx\s*\}/)||
($keepcomments && /%\s*begin\s*\{\s*$fakeenv_rx\s*\}/))) {
# Begin of a fake comment part. May not be nested.
$fakeenv=1;
$fakeenvtype="%";
# Remember the part name.
$fakeenvname = $1;
$active=0;
$mute=1 unless $latexonly;
}
elsif (!$fakeenv && !$verbatim && /^\s*\\begin\s*\{\s*$fakeenv_rx\s*\}/) {
# Begin of a fake environment. May not be nested.
$fakeenv="1";
$fakeenvtype="\\";
# Remember the environment name.
$fakeenvname = $1;
$active=0;
$mute=1 unless $latexonly;
}
elsif (!$fakeenv && !$verbatim && !$latexonly &&
/^\s*\\begin\s*\{\s*$keepcomments_rx\s*\}/) {
# Begin of a keepcomments environment. May be nested.
if (! $keepcomments) {
$keepcomments = 1;
# Remember the environment name.
$keepcommentsname = $1;
} elsif ($keepcommentsname eq $1) {
$keepcomments++;
}
$active=1;
$mute=1 unless $latexonly;
}
# elsif (!$fakeenv && !$verbatim && /\\begin\s*\{\s*verbatim(\*)?\s*\}/) {
elsif (!$fakeenv && !$verbatim && /\\begin\s*\{\s*((\w*[Vv]erbatim\w*|lstlisting|minted)\*?)\s*\}/) {
($before,$verbatimname) = ($`,$1);
($active,$verbatim) = (0,1)
unless ($before =~ /$ignore_cmd_rx/o);
}
print STDERR "$prompt %--line::${orig}%-- active=$active mute=$mute ".
"latexonly=$latexonly fakeenv=$fakeenv verbatim=$verbatim ".
"keepcomments=$keepcomments"
if ($debug > 1) && $orig =~ /\\begin|%\s*begin/;
# Interprete the single line, care for file to merge,
# locate new comment environments, etc.
# This one does recursive calls.
# Stop this file if we are told so.
last
unless &interprete($_, $comments);
last if $end_document;
# Sorry for that ifs...
if (!$fakeenv && !$verbatim && $latexonly && $latexonlytype eq "%" && (
($comments =~ /%\s*end\s*\{\s*latexonly\s*\}/)||
($keepcomments && /%\s*end\s*\{\s*latexonly\s*\}/))) {
# only %end{latexonly} can close the part
$latexonly=0;
$active = 1;
$mute = 0;
}
elsif (!$fakeenv && !$verbatim && $latexonly && $latexonlytype eq "\\" &&
/^\s*\\end\s*\{\s*latexonly\s*\}/) {
# only \end{latexonly} can close the environment
$latexonly--;
$active = ($latexonly ? 0 : 1);
}
elsif ($fakeenv && $fakeenvtype eq "%" && (
($comments =~ /%\s*end\s*\{\s*$fakeenv_rx\s*\}/)||
($keepcomments && /%\s*end\s*\{\s*$fakeenv_rx\s*\}/))) {
# only a matching %end{name} can close the part
if ($1 eq $fakeenvname) {
$fakeenv=0;
$active = ($latexonly ? 0 : 1);
$mute=0
unless $latexonly && $latexonlytype eq "%";
}
}
elsif ($fakeenv && $fakeenvtype eq "\\" &&
/^\s*\\end\s*\{\s*$fakeenv_rx\s*\}/) {
# only a matching \end{name} can close the environment
if ($1 eq $fakeenvname) {
$fakeenv=0;
$active = ($latexonly ? 0 : 1);
$mute=0 unless $latexonly;
}
}
elsif ($keepcomments &&
/^[^%]*?\\end\s*\{\s*$keepcomments_rx\s*\}/) {
# only a matching \end{name} can close the part
if ($1 eq $keepcommentsname) {
$keepcomments--;
$keepcommentsname = '' unless ($keepcomments);
$active = ($latexonly ? 0 : 1);
$mute=0
unless $latexonly && $latexonlytype eq "%";
}
}
# elsif ( /\\end\s*\{\s*verbatim(\*)?\s*\}/) {
elsif ( /\\end\s*\{\s*((\w*[Vv]erbatim\w*|lstlisting|minted)\*?)\s*\}/) {
if ($1 eq $verbatimname) {
$verbatim=0;
$active = ($latexonly ? 0 : 1);
}
}
print STDERR "$prompt %--line::${orig}%-- active=$active mute=$mute ".
"latexonly=$latexonly fakeenv=$fakeenv verbatim=$verbatim"
if ($debug > 1) && $orig =~ /\\end|%\s*end/;
}
print OUT "%%% TEXEXPAND: END FILE $infile\n"
if ($includelevel > 1 && $active);
close(IN);
$includelevel--;
}
# Handle the LaTeX tags \input, \include, \endinput, \documentclass,
# \documentstyle, \usepackage, \RequirePackage, \end{document},
# \includecomment, \excludecomment with respect to the three states
# controlled by $active and $mute.
# The state 'interprete minimal and suppress' ($active false, $mute true)
# does not require further actions, just do nothing.
# When in $active state, call one of &process_input_include_file,
# &process_document_header, or &process_package_cmd to examine the
# apropriate line further.
#
# Returns 0 if the caller is to stop interpreting the current file (\endinput).
# Returns 1 otherwise.
# Set $end_document to 1 if an \end{document} is detected (this stops
# the whole task of texexpand).
#
sub interprete {
local($_,$comments) = @_;
local($line) = $_;
local($before,$after);
# the default to print to OUT
$line =~ s/\n/$comments\n/;
if ($active) {
#looses $comments on successful input/include, document header,
#or usepackage/RequirePackage
if (/\\(input|include)\W/) {
($before,$after) = ($`,$&.$');
if ($before =~ /$ignore_cmd_rx/o) {
print OUT $line;
}
else {
if (length($before)) {
#put prefix to \\input etc. to single line
print OUT $before,"\%\n";
#mask special chars
$before =~ s/(\W)/\\$1/g;
#strip prefix from total line incl. comments
$line =~ s/$before//;
}
# print total line incl. comments if merging failed
print OUT $line
#may re-enter &process_file
unless &process_input_include_file($after);
}
}
# elsif (/\\(usepackage|RequirePackage)\s*$options_rx\s*$arg_rx/s) {
elsif (/\\(usepackage|RequirePackage)[^]]/s) {
$before = $`;
if($before =~ /$ignore_cmd_rx/o) {
print OUT $line;
}
else {
while (!/\\(usepackage|RequirePackage)\s*$options_rx\s*$arg_rx/so) {
chomp; $_ =~ s/%.*$//;
$_ .= <IN>;
}
&process_package_cmd($_);
}
}
# elsif (/\\document(class|style)\s*$options_rx\s*$arg_rx/o) {
elsif (/\\document(class|style)/o) {
$before = $`;
if ($before =~ /$ignore_cmd_rx/o) {
print OUT $line;
}
else {
while (!/\\document(class|style)\s*$options_rx\s*$arg_rx/so) {
chomp; $_ =~ s/%.*$//;
$_ .= <IN>;
}
&process_document_header($_);
}
}
elsif ($opt{unsegment} && /^\s*\\segment(\*?)\s*$options_rx\s*$arg_rx\s*$arg_rx\s*/) {
# We found a segmenting command which must vanish.
# Therefore, mutate the \segment into the section command specified
# by $4 (section, subsection, ...) and $1 (* or empty) followed by
# the section text, and an \input statement with filename $3.
# To obtain the section text, we need to take a preview to the next
# lines, as it might be truncated with %'s.
# Line truncations between the regex above (like \segment%\n) are
# not recognized.
# There are as much lines fetched as required to satisfy the equality
# of the amounts of left and right braces, since we aren't able to
# handle nested brace pairs.
# If this strategy fails, texexpand is terminated, thereby satisfying
# the 'all or nothing' requirement.
local($file) = $3;
print OUT "\\$4$1";
$after = $_ = $'; #get tail
local($left,$right) = (tr/\{/\{/,tr/\}/\}/);
while (($left != $right) || !$left) {
#braces not balanced or no opening brace at all, get next line
$_ = <IN>;
die "$prompt arguments to \\segment are too complex\n"
unless length($_) && length($after) < 500;
# strip comments
s/(^|[^\\])(\\\\)*(%.*)/$1$2/;
$left += tr/\{/\{/; $right += tr/\}/\}/;
$after .= $_;
}
$after =~ /\}([^\}]*)$/;
$after = $1;
$_ = $`;
# Ok we have it. $_ should carry the whole section title plus
# opening brace, the original lines squeezed into one.
print OUT $_,"}\n";
# set this globally to control behavior of next &process_file
$segmentfile = 1;
die "$prompt segment file <$file> could not be merged"
unless &process_input_include_file("\\input\{$file\}$after");
}
# Print the first /end{document}, only. Truncate anything after it.
elsif (/^(.*\\end\{document\})/) {
$before = $1;
if ($before =~ /$ignore_cmd_rx/o) {
print OUT $line;
}
else {
print OUT "$before\n";
$end_document++;
}
}
elsif (/\\endinput/) {
$before=$`;
return(0) #stop this file