-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathword-pack.xsl
330 lines (315 loc) · 14.2 KB
/
word-pack.xsl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
<?xml version="1.0" encoding="UTF-8"?>
<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
xmlns:xs="http://www.w3.org/2001/XMLSchema"
xmlns:xd="http://www.oxygenxml.com/ns/doc/xsl"
xmlns:wt="https://github.com/dariok/w2tei"
xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main"
xmlns:pkg="http://schemas.microsoft.com/office/2006/xmlPackage"
xmlns:math="http://www.w3.org/2005/xpath-functions/math"
exclude-result-prefixes="#all"
version="3.0">
<xd:doc scope="stylesheet">
<xd:desc>
<xd:p><xd:b>Created on:</xd:b> Nov 6, 2017</xd:p>
<xd:p><xd:b>Author:</xd:b> dkampkaspar</xd:p>
<xd:p>Provide some very handy functions for parsing Word-XML files</xd:p>
</xd:desc>
</xd:doc>
<!-- Functions to check for a type -->
<xd:doc >
<xd:desc>
<xd:p>Check whether a <xd:pre>w:p</xd:pre> has a certain 'type', i.e. a paragraph or character style of a given
name is applied.</xd:p>
<xd:p>The 2-param-version checks for paragraph styles; it is equivalent to calling <xd:ref name="wt:is"
type="function">wt:is($context, $text, 'p')</xd:ref>.</xd:p>
</xd:desc>
<xd:param name="context">
<xd:p>The context item to be evaluated</xd:p>
</xd:param>
<xd:param name="test">
<xd:p>The string that is to be checked for.</xd:p>
</xd:param>
<xd:return>
<xd:p><xd:pre>true()</xd:pre> if a paragraph style of the given name is applied to the context element;
<xd:pre>false()</xd:pre> otherwise.</xd:p>
</xd:return>
</xd:doc>
<xsl:function name="wt:is" as="xs:boolean">
<xsl:param name="context" as="item()*" />
<xsl:param name="test" as="xs:string" />
<xsl:sequence select="wt:is($context, $test, 'p')"/>
</xsl:function>
<xd:doc>
<xd:desc>
<xd:p>Check whether a <xd:pre>w:p</xd:pre> has a certain 'type', i.e. a paragraph or character style of a given
name is applied.</xd:p>
</xd:desc>
<xd:param name="context">
<xd:p>The context item to be evaluated</xd:p>
</xd:param>
<xd:param name="test">
<xd:p>The string that is to be checked for.</xd:p>
</xd:param>
<xd:param name="pr">
<xd:p>Either 'p' for a paragraph style or 'r' for a character ('run') style.</xd:p>
</xd:param>
<xd:return>
<xd:p><xd:pre>true()</xd:pre> if a paragraph style of the given name is applied to the context element;
<xd:pre>false()</xd:pre> otherwise.</xd:p>
<xd:p>Will also return <xd:pre>false()</xd:pre> if <xs:pre>$pr</xs:pre> is anything but 'p' or 'r'.</xd:p>
</xd:return>
</xd:doc>
<xsl:function name="wt:is" as="xs:boolean">
<xsl:param name="context" as="item()*" />
<xsl:param name="test" as="xs:string" />
<xsl:param name="pr" as="xs:string" />
<xsl:sequence select="wt:is($context, $test, $pr, false())"/>
</xsl:function>
<xd:doc>
<xd:desc>
<xd:p>Check whether a <xd:pre>w:p</xd:pre> has a certain 'type', i.e. a paragraph or character style of a given
name is applied.</xd:p>
</xd:desc>
<xd:param name="context">
<xd:p>The context item to be evaluated</xd:p>
</xd:param>
<xd:param name="test">
<xd:p>The string that is to be checked for.</xd:p>
</xd:param>
<xd:param name="pr">
<xd:p>Either 'p' for a paragraph style or 'r' for a character ('run') style.</xd:p>
</xd:param>
<xd:param name="strict">
<xd:p>If <xd:pre>true()</xd:pre>, strict comparison is used (<xd:pre>style($context) = $test</xd:pre>, else
<xd:pre>contains(style($context), $test)</xd:pre>.</xd:p>
</xd:param>
<xd:return>
<xd:p><xd:pre>true()</xd:pre> if a paragraph style of the given name is applied to the context element;
<xd:pre>false()</xd:pre> otherwise.</xd:p>
<xd:p>Will also return <xd:pre>false()</xd:pre> if <xs:pre>$pr</xs:pre> is anything but 'p' or 'r'.</xd:p>
</xd:return>
</xd:doc>
<xsl:function name="wt:is" as="xs:boolean">
<xsl:param name="context" as="item()*" />
<xsl:param name="test" as="xs:string" />
<xsl:param name="pr" as="xs:string" />
<xsl:param name="strict" as="xs:boolean" />
<xsl:variable name="val">
<xsl:choose>
<!--<xsl:when test="not($pr = 'p' or $pr = 'r')">
<xsl:message>Supplied value for parameter `pr` was neither 'p' nor 'r'</xsl:message>
<xsl:sequence select="false()"/>
</xsl:when>-->
<xsl:when test="$pr = 'p'">
<xsl:value-of select="$context//w:pStyle/@w:val"/>
</xsl:when>
<xsl:otherwise>
<xsl:value-of select="$context//w:rStyle/@w:val"/>
</xsl:otherwise>
</xsl:choose>
</xsl:variable>
<xsl:sequence select="if($strict) then $val = $test else contains($val, $test)" />
</xsl:function>
<xd:doc>
<xd:desc>
<xd:p>Check whether a paragraph is a heading. Headings either have a <xd:pre>w:outlineLvl</xd:pre> descendant
or a style with this element.</xd:p>
</xd:desc>
<xd:param name="context">
<xd:p>The context item to be evaluated</xd:p>
</xd:param>
<xd:return>
<xd:p><xd:pre>true()</xd:pre> if the paragraph given is a heading with an outline level;
<xd:pre>false()</xd:pre> otherwise.</xd:p>
</xd:return>
</xd:doc>
<xsl:function name="wt:isHeading" as="xs:boolean">
<xsl:param name="context" as="element(w:p)" />
<xsl:choose>
<xsl:when test="$context/w:pPr/w:outlineLvl">
<xsl:sequence select="true()" />
</xsl:when>
<xsl:otherwise>
<xsl:variable name="pStyle" select="$context/w:pPr/w:pStyle/@w:val" />
<xsl:variable name="style" select="$context/ancestor::pkg:package//w:style[@w:styleId = $pStyle]" />
<xsl:choose>
<xsl:when test="$style//w:outlineLvl">
<xsl:sequence select="true()" />
</xsl:when>
<xsl:otherwise>
<xsl:sequence select="false()" />
</xsl:otherwise>
</xsl:choose>
</xsl:otherwise>
</xsl:choose>
</xsl:function>
<!-- END Functions to check for a type -->
<!-- Functions to check properties of a Word p -->
<xd:doc>
<xd:desc>Check whether a Word p has content</xd:desc>
<xd:param name="context">
<xd:p>The context item to be evalutated</xd:p>
</xd:param>
<xd:return>
<xd:p><xd:pre>true()</xd:pre> if either text, or a footnote or endnote reference is present;
<xd:pre>false()</xd:pre> otherwise</xd:p>
</xd:return>
</xd:doc>
<xsl:function name="wt:hasContent" as="xs:boolean">
<!-- TODO maybe, we need to check for comments Alone In The Dark^w p -->
<!-- or even a parameter whether comments count or don't -->
<xsl:param name="context" as="item()" />
<xsl:choose>
<xsl:when test="$context//w:t or $context//w:endnoteReference or $context//w:footnoteReference">
<xsl:sequence select="true()" />
</xsl:when>
<xsl:otherwise>
<xsl:sequence select="false()" />
</xsl:otherwise>
</xsl:choose>
</xsl:function>
<!-- END Functions to check properties of a Word p -->
<!-- Functions to deal with strings independently of Word 'runs' -->
<xd:doc>
<xd:desc>Return the full string</xd:desc>
<xd:param name="context">
<xd:p>The context item</xd:p>
</xd:param>
<xd:return>The string value of the Word element; i.e. the concatenated value of all <xd:pre>w:p//w:t</xd:pre>
if the context element is <xd:pre>w:p</xd:pre> or the value of <xd:pre>w:r/w:t</xd:pre> if the context item is
<xd:pre>w:r</xd:pre>.</xd:return>
</xd:doc>
<xsl:function name="wt:string" as="xs:string">
<xsl:param name="context" as="item()*" />
<xsl:value-of select="string-join($context//w:t, '')"/>
</xsl:function>
<xd:doc>
<xd:desc>
<xd:p>Check whether the element contains a given string</xd:p>
</xd:desc>
<xd:param name="context">
<xd:p>The context element to be checked</xd:p>
</xd:param>
<xd:param name="test">
<xd:p>The text to be searched for in the string value of <xd:pre>$context</xd:pre>.</xd:p>
</xd:param>
<xd:return>
<xd:p><xd:pre>true()</xd:pre> if <xd:ref name="wt:string">wt:string</xd:ref> of <xd:pre>$context</xd:pre>
<xd:pre>fn:contains()</xd:pre> the test string, <xd:pre>false()</xd:pre> otherwise.</xd:p>
</xd:return>
</xd:doc>
<xsl:function name="wt:contains" as="xs:boolean">
<xsl:param name="context" as="item()*"/>
<xsl:param name="test" as="xs:string"/>
<xsl:sequence select="contains(wt:string($context), $test)"/>
</xsl:function>
<xd:doc>
<xd:desc>
<xd:p>Check whether the element starts with a given string</xd:p>
</xd:desc>
<xd:param name="context">
<xd:p>The context element to be checked</xd:p>
</xd:param>
<xd:param name="test">
<xd:p>The text to be searched for in the string value of <xd:pre>$context</xd:pre>.</xd:p>
</xd:param>
<xd:return>
<xd:p><xd:pre>true()</xd:pre> if <xd:ref name="wt:string">wt:string</xd:ref> of <xd:pre>$context</xd:pre>
<xd:pre>fn:starts-with()</xd:pre> the test string, <xd:pre>false()</xd:pre> otherwise.</xd:p>
</xd:return>
</xd:doc>
<xsl:function name="wt:starts" as="xs:boolean">
<xsl:param name="context" as="item()"/>
<xsl:param name="test" as="xs:string"/>
<xsl:sequence select="starts-with(wt:string($context), $test)"/>
</xsl:function>
<xd:doc>
<xd:desc>
<xd:p>Check whether the element ends with a given string</xd:p>
</xd:desc>
<xd:param name="context">
<xd:p>The context element to be checked</xd:p>
</xd:param>
<xd:param name="test">
<xd:p>The text to be searched for in the string value of <xd:pre>$context</xd:pre>.</xd:p>
</xd:param>
<xd:return>
<xd:p><xd:pre>true()</xd:pre> if <xd:ref name="wt:string">wt:string</xd:ref> of <xd:pre>$context</xd:pre>
<xd:pre>fn:ends-with()</xd:pre> the test string, <xd:pre>false()</xd:pre> otherwise.</xd:p>
</xd:return>
</xd:doc>
<xsl:function name="wt:ends" as="xs:boolean">
<xsl:param name="context" as="item()"/>
<xsl:param name="test" as="xs:string"/>
<xsl:sequence select="ends-with(wt:string($context), $test)"/>
</xsl:function>
<!-- END Functions to deal with strings independently of Word 'runs' -->
<!-- Functions to select runs -->
<xd:doc>
<xd:desc>Check whether this is the first run in a possible sequence of runs (e.g. the first 'berschrift1'
possibly immediately followed by one or more 'berschrift1'). May be used for paragraphs, too.</xd:desc>
<xd:param name="context">The context item.</xd:param>
<xd:param name="test">The test string for the paragraph or run style</xd:param>
<xd:param name="pr">check in Paragraph or Run</xd:param>
<xd:return><xd:pre>true()</xd:pre> if <xd:pre>wt:is($xontext, $text, $pr)</xd:pre> evaluates as true and the
immediately preceding sibling will check as false.</xd:return>
</xd:doc>
<xsl:function name="wt:isFirst" as="xs:boolean">
<xsl:param name="context" as="item()" />
<xsl:param name="test" as="xs:string" />
<xsl:param name="pr" as="xs:string" />
<xsl:choose>
<xsl:when test="$pr = 'p'">
<xsl:sequence select="wt:is($context, $test, $pr)
and not(wt:is($context/preceding-sibling::w:p[1], $test, 'p'))" />
</xsl:when>
<xsl:when test="$pr = 'r'">
<xsl:sequence select="wt:is($context, $test, $pr)
and not(wt:is($context/preceding-sibling::w:r[1], $test, 'r')
or wt:is($context/parent::w:p/preceding-sibling::w:p[1]/w:r[last()], $test, 'r'))" />
</xsl:when>
<xsl:otherwise><xsl:sequence select="false()"/></xsl:otherwise>
</xsl:choose>
</xsl:function>
<xd:doc>
<xd:desc>Matches all those runs with a sequence of runs that immediately follow the first on, i.e. they are of the
same type and are immediately preceded by w:r of the same type.</xd:desc>
<xd:param name="context">The context item</xd:param>
<xd:param name="me">The item that forms the start of the sequence</xd:param>
<xd:param name="test">The type to test</xd:param>
<xd:param name="pr">Whether this is a paragraph or a run style</xd:param>
</xd:doc>
<xsl:function name="wt:followMe" as="xs:boolean">
<xsl:param name="context" as="item()" />
<xsl:param name="me" as="item()" />
<xsl:param name="test" as="xs:string" />
<xsl:param name="pr" as="xs:string" />
<xsl:choose>
<xsl:when test="$pr = 'p' or $pr = 'r'">
<xsl:variable name="myID" select="generate-id($me)"/>
<xsl:variable name="pre" select="$context/preceding-sibling::w:*[wt:isFirst(., $test, $pr)][1]" />
<xsl:sequence select="wt:is($context, $test, $pr) and not(wt:isFirst($context, $test, $pr))
and $myID = generate-id($pre)" />
</xsl:when>
<xsl:otherwise><xsl:sequence select="false()" /></xsl:otherwise>
</xsl:choose>
</xsl:function>
<!-- END Functions to select runs -->
<!-- Helper: Hex to Dec -->
<xsl:function name="wt:hexToDec">
<xsl:param name="hex"/>
<xsl:variable name="dec"
select="string-length(substring-before('0123456789ABCDEF', substring($hex,1,1)))"/>
<xsl:choose>
<xsl:when test="matches($hex, '([0-9]*|[A-F]*)')">
<xsl:value-of
select="if ($hex = '') then 0
else $dec * math:pow(16, string-length($hex) - 1) + wt:hexToDec(substring($hex,2))"/>
</xsl:when>
<xsl:otherwise>
<xsl:message>Provided value is not hexadecimal...</xsl:message>
<xsl:value-of select="$hex"/>
</xsl:otherwise>
</xsl:choose>
</xsl:function>
</xsl:stylesheet>