-
Notifications
You must be signed in to change notification settings - Fork 6
/
Copy pathmarkdown.xml
227 lines (217 loc) · 10.4 KB
/
markdown.xml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
<?xml version="1.0"?>
<!DOCTYPE MODE SYSTEM "xmode.dtd"><!--
Markdown mode for jEdit by Peter Lynch (http://peterlynch.ca)
Original inspiration drawn from Ali Rantakari's jEdit mode at http://hasseg.org
Markdown home: http://daringfireball.net/projects/markdown/
Suggested Habits and limitations for using this syntax
* first rule - avoid being 'lazy' as defined in the spec. This means not indenting lines 2 - n in unordered lists and
similar laziness. Not being lazy gives you better syntax highlighting.
* prefix all blockquote lines with 'greater than' char, avoid being lazy, since jedit mode regexp
can't cross lines
* try to keep code blocks 4 spaces single tab deep only
* add an extra space for inline links or image links that are in a paragraph and happen to wrap to
start on the beginning of a line. A leading space should turn coloring on for those links
* brackets '[' or ']' appearing in paragraphs that are not part of a link definition should have the leding bracket escaped
to prevent this mode thinking you are starting a link reference
* block level html in a blockquote will still be parsed for markdown syntax, even though the spec says
markdown is not parsed for in block level html
* determining the difference between a code block and list paragraph indented 4 spaces(or tab) is impossible
so to help identify code blocks any paragraph indented four spaces is treated as such
* tabs are assumed to be taken as four spaces.
* link label definitions should start at the beginning of a line and NOT up to 3 spaces leading as allowed by the spec
-->
<MODE>
<PROPS>
<PROPERTY NAME="tabSize" VALUE="4" />
<PROPERTY NAME="indentSize" VALUE="2" />
<PROPERTY NAME="noTabs" VALUE="true" />
<PROPERTY NAME="wrap" VALUE="soft" />
<PROPERTY NAME="commentStart" VALUE="<!--" />
<PROPERTY NAME="commentEnd" VALUE="-->" />
</PROPS>
<!-- ================ MAIN ================================= -->
<RULES IGNORE_CASE="TRUE">
<!-- HANDLE BLOCK LEVEL HTML ELEMENTS -->
<!-- SGML comment -->
<SPAN TYPE="COMMENT1">
<BEGIN><!--</BEGIN>
<END>--></END>
</SPAN>
<!-- JavaScript -->
<SPAN AT_LINE_START="TRUE" TYPE="MARKUP" DELEGATE="html::JAVASCRIPT">
<BEGIN><script</BEGIN>
<END></script></END>
</SPAN>
<!-- special hr case -->
<SEQ_REGEXP AT_LINE_START="TRUE" TYPE="MARKUP"><hr\b([^<>])*?/?></SEQ_REGEXP>
<!-- block level html must be at the start of a line we isolate this because block
level html should not be parsed for markdown syntax -->
<SPAN_REGEXP HASH_CHAR="<" AT_LINE_START="TRUE" TYPE="MARKUP" DELEGATE="BLOCK_HTML_TAGS">
<BEGIN><(p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|noscript|form|fieldset|iframe|math|ins|del)\b</BEGIN>
<END></$1></END>
</SPAN_REGEXP>
<!-- ignore dangling less thans to allow for things like 4 < 5 -->
<SEQ TYPE="NULL"> < </SEQ>
<!-- HANDLE OTHER INLINE HTML ELEMENTS -->
<SPAN TYPE="MARKUP" DELEGATE="INLINE_MARKUP">
<BEGIN><</BEGIN>
<END>></END>
</SPAN>
<!-- THE REST IS MARKDOWN -->
<IMPORT DELEGATE="MARKDOWN" />
</RULES>
<!-- ================ INLINE HTML ================================= -->
<RULES SET="INLINE_MARKUP" DEFAULT="MARKUP">
<IMPORT DELEGATE="html::TAGS" />
</RULES>
<!-- ================ BLOCK LEVEL HTML ================================= -->
<RULES SET="BLOCK_HTML_TAGS" DEFAULT="MARKUP">
<!-- any line indented less than 4 spaces is not valid markdown in block html -->
<EOL_SPAN_REGEXP AT_LINE_START="TRUE" TYPE="INVALID">[\S]+</EOL_SPAN_REGEXP>
<EOL_SPAN_REGEXP AT_LINE_START="TRUE" TYPE="INVALID"> {1,3}[\S]+</EOL_SPAN_REGEXP>
<EOL_SPAN_REGEXP AT_LINE_START="TRUE" DELEGATE="html::MAIN">( {4}|\t)</EOL_SPAN_REGEXP>
<SPAN TYPE="LITERAL1">
<BEGIN>"</BEGIN>
<END>"</END>
</SPAN>
<SPAN TYPE="LITERAL1">
<BEGIN>'</BEGIN>
<END>'</END>
</SPAN>
<SEQ TYPE="OPERATOR">=</SEQ>
</RULES>
<!-- ================ MARKDOWN ================================= -->
<RULES SET="MARKDOWN" IGNORE_CASE="FALSE">
<!-- blockquotes, also handles nested blockquote chars 	 is tab -->
<EOL_SPAN_REGEXP HASH_CHARS=" >" AT_LINE_START="TRUE" MATCH_TYPE="LITERAL3" DELEGATE="MARKDOWN_BLOCKQUOTE">[ \t]*(>[ \t]{1})+</EOL_SPAN_REGEXP>
<!-- literal characters (i.e. cases where they won't specify formatting) -->
<SEQ TYPE="NULL"> * </SEQ>
<SEQ TYPE="NULL"> _ </SEQ>
<SEQ TYPE="NULL">\][</SEQ>
<SEQ_REGEXP TYPE="NULL" HASH_CHAR="\">\\[\Q*_\`[](){}#+.!-\E]</SEQ_REGEXP>
<!-- GitHub-flavored code blocks -->
<SPAN TYPE="LITERAL2" AT_LINE_START="TRUE">
<BEGIN>```</BEGIN>
<END>```</END>
</SPAN>
<!-- inline code: `NSString* str = @"hi!";` using backticks-->
<SPAN_REGEXP TYPE="LITERAL2" HASH_CHARS="`">
<BEGIN>(`{1,2})</BEGIN>
<END>$1</END>
</SPAN_REGEXP>
<!-- <EOL_SPAN_REGEXP TYPE="LITERAL2" AT_LINE_START="TRUE" HASH_CHARS=" 	">( {4,}|\t+){2,}</EOL_SPAN_REGEXP> -->
<!-- headers (setext-style:) -->
<EOL_SPAN_REGEXP TYPE="KEYWORD1" AT_LINE_START="TRUE" HASH_CHARS="=-">[=-]+</EOL_SPAN_REGEXP>
<!-- headers (atx-style:) -->
<EOL_SPAN_REGEXP TYPE="KEYWORD1" AT_LINE_START="TRUE" HASH_CHAR="#">#{1,6}[ \t]*(.+?)</EOL_SPAN_REGEXP>
<!-- horizontal rules -->
<EOL_SPAN_REGEXP TYPE="KEYWORD1" HASH_CHARS="-*_ 	" AT_LINE_START="TRUE">[ ]{0,2}([ ]?[-_*][ ]?){3,}[ \t]*</EOL_SPAN_REGEXP>
<!-- lists (unordered) -->
<!-- <SEQ_REGEXP TYPE="KEYWORD2" AT_LINE_START="TRUE" HASH_CHARS="*+- 	">[ \t]{0,3}[*+-][ \t]+</SEQ_REGEXP> -->
<SEQ_REGEXP TYPE="KEYWORD2" AT_LINE_START="TRUE" HASH_CHARS="+-* 	">[ \t]{0,}[*+-][ \t]+</SEQ_REGEXP>
<!-- lists (ordered) -->
<SEQ_REGEXP TYPE="KEYWORD2" AT_LINE_START="TRUE" HASH_CHARS="0123456789 	">[ \t]{0,}\d+\.[ \t]+</SEQ_REGEXP>
<!-- Link Label definitions all on one line -->
<EOL_SPAN_REGEXP TYPE="LABEL" AT_WHITESPACE_END="TRUE" DELEGATE="LINK_LABEL_DEFINITION">\[(.*?)\]\:</EOL_SPAN_REGEXP>
<!-- Inline images and page links and pointers ![alt text](/path/to/img.jpg "Title for this") -->
<SPAN_REGEXP TYPE="KEYWORD4" MATCH_TYPE="OPERATOR" NO_LINE_BREAK="TRUE" AT_LINE_START="FALSE" HASH_CHARS=" ![" DELEGATE="LINK_INLINE_URL_TITLE">
<BEGIN> !?\[[\p{Alnum}\p{Blank}]*</BEGIN>
<END>\]</END>
</SPAN_REGEXP>
<!-- emphasis (strong) -->
<SPAN_REGEXP TYPE="LITERAL3" HASH_CHARS="*_" AT_WORD_START="TRUE" NO_LINE_BREAK="TRUE">
<BEGIN>(\*\*|__)</BEGIN>
<END>$1</END>
</SPAN_REGEXP>
<!-- emphasis (em) -->
<SPAN_REGEXP TYPE="LITERAL4" HASH_CHARS="*_" AT_WORD_START="TRUE" NO_LINE_BREAK="TRUE">
<BEGIN>(\*|_)</BEGIN>
<END>$1</END>
</SPAN_REGEXP>
</RULES>
<!-- ================ LINK PROCESSING ================================= -->
<RULES DEFAULT="KEYWORD3" SET="LINK_LABEL_DEFINITION">
<SEQ_REGEXP TYPE="NULL" HASH_CHAR="\">\\[\Q*_\`[](){}#+.!-\E]</SEQ_REGEXP>
<SEQ TYPE="OPERATOR">"</SEQ>
<SEQ TYPE="OPERATOR">(</SEQ>
<SEQ TYPE="OPERATOR">)</SEQ>
<IMPORT DELEGATE="MARKDOWN" />
</RULES>
<RULES SET="LINK_INLINE_URL_TITLE">
<!-- the closing bracket of the link text-->
<SEQ TYPE="OPERATOR">]</SEQ>
<!-- span containing the link label pointer to the definition -->
<SPAN_REGEXP TYPE="KEYWORD4" MATCH_TYPE="OPERATOR" NO_LINE_BREAK="TRUE" AT_LINE_START="FALSE" HASH_CHAR="[" DELEGATE="LINK_INLINE_LABEL_CLOSE">
<BEGIN>\[</BEGIN>
<END>\]</END>
</SPAN_REGEXP>
<!-- span containing the url and optional title -->
<SPAN_REGEXP TYPE="KEYWORD4" MATCH_TYPE="OPERATOR" NO_LINE_BREAK="TRUE" AT_LINE_START="FALSE" HASH_CHAR="(" DELEGATE="LINK_INLINE_URL_TITLE_CLOSE">
<BEGIN>\(</BEGIN>
<END>\)</END>
</SPAN_REGEXP>
</RULES>
<RULES DEFAULT="KEYWORD3" SET="LINK_INLINE_URL_TITLE_CLOSE">
<!-- the closing paren and loop back to MAIN -->
<EOL_SPAN TYPE="NULL" MATCH_TYPE="OPERATOR" DELEGATE="MAIN">)</EOL_SPAN>
</RULES>
<RULES DEFAULT="LABEL" SET="LINK_INLINE_LABEL_CLOSE">
<!-- the closing bracket and loop back to MAIN -->
<EOL_SPAN TYPE="NULL" MATCH_TYPE="OPERATOR" DELEGATE="MAIN">]</EOL_SPAN>
</RULES>
<!-- ================ MARKDOWN EMBEDDED IN A BLOCKQUOTE ================================= -->
<!--
repetitive: these are the same markdown rules but no AT_LINE_START=TRUE because they are
part of a blockquote match which already matched at line start didn't see a better way at
the time
-->
<RULES SET="MARKDOWN_BLOCKQUOTE" IGNORE_CASE="FALSE">
<!-- ignore dangling less thans to allow for things like 4 < 5 -->
<SEQ TYPE="NULL"> < </SEQ>
<!-- HANDLE OTHER INLINE HTML ELEMENTS -->
<SPAN TYPE="MARKUP" DELEGATE="INLINE_MARKUP">
<BEGIN><</BEGIN>
<END>></END>
</SPAN>
<!-- literal characters (i.e. cases where they won't specify formatting) -->
<SEQ TYPE="NULL"> * </SEQ>
<SEQ TYPE="NULL"> _ </SEQ>
<SEQ TYPE="NULL">\][</SEQ>
<SEQ_REGEXP TYPE="NULL" HASH_CHAR="\">\\[\Q*_\`[](){}#+.!-\E]</SEQ_REGEXP>
<!-- inline code: `NSString* str = @"hi!";` using backticks-->
<SPAN_REGEXP TYPE="LITERAL2" HASH_CHARS="`">
<BEGIN>(`{1,2})</BEGIN>
<END>$1</END>
</SPAN_REGEXP>
<!-- telling difference between code blocks and list paragraphs is impossible until regexp cross line boundaries -->
<EOL_SPAN_REGEXP TYPE="LITERAL2" HASH_CHARS=" 	">( {4,}|\t+)\S</EOL_SPAN_REGEXP>
<!-- <EOL_SPAN_REGEXP TYPE="LITERAL2" AT_LINE_START="TRUE" HASH_CHARS=" 	">( {4,}|\t+){2,}</EOL_SPAN_REGEXP> -->
<!-- headers (setext-style:) -->
<EOL_SPAN_REGEXP TYPE="KEYWORD1" HASH_CHARS="=-">[=-]+</EOL_SPAN_REGEXP>
<!-- headers (atx-style:) -->
<EOL_SPAN_REGEXP TYPE="KEYWORD1" HASH_CHAR="#">#{1,6}[ \t]*(.+?)</EOL_SPAN_REGEXP>
<!-- horizontal rules -->
<EOL_SPAN_REGEXP TYPE="KEYWORD1" HASH_CHARS="-*_ 	">[ ]{0,2}([ ]?[-_*][ ]?){3,}[ \t]*</EOL_SPAN_REGEXP>
<!-- lists (unordered) -->
<SEQ_REGEXP TYPE="KEYWORD2" HASH_CHARS="*+- 	">[ \t]{0,}[*+-][ \t]+</SEQ_REGEXP>
<!-- lists (ordered) -->
<SEQ_REGEXP TYPE="KEYWORD2" HASH_CHARS="0123456789 	">[ \t]{0,}\d+\.[ \t]+</SEQ_REGEXP>
<!-- Link Label definitions all on one line -->
<EOL_SPAN_REGEXP TYPE="LABEL" DELEGATE="LINK_LABEL_DEFINITION">\[(.*?)\]\:</EOL_SPAN_REGEXP>
<!-- Inline images and page links and pointers ![alt text](/path/to/img.jpg "Title for this") -->
<SPAN_REGEXP TYPE="KEYWORD4" MATCH_TYPE="OPERATOR" NO_LINE_BREAK="TRUE" AT_LINE_START="FALSE" HASH_CHARS=" ![" DELEGATE="LINK_INLINE_URL_TITLE">
<BEGIN> !?\[[\p{Alnum}\p{Blank}]*</BEGIN>
<END>\]</END>
</SPAN_REGEXP>
<!-- emphasis (strong) -->
<SPAN_REGEXP TYPE="LITERAL3" HASH_CHARS="*_">
<BEGIN>(\*\*|__)</BEGIN>
<END>$1</END>
</SPAN_REGEXP>
<!-- emphasis (em) -->
<SPAN_REGEXP TYPE="LITERAL4" HASH_CHARS="*_">
<BEGIN>(\*|_)</BEGIN>
<END>$1</END>
</SPAN_REGEXP>
</RULES>
</MODE>