diff --git a/.gitignore b/.gitignore index 37fb369..165a17d 100644 --- a/.gitignore +++ b/.gitignore @@ -26,3 +26,7 @@ sample-sources/ !sample-sources/statement-expressions.rs !sample-sources/statements.rs !sample-sources/types.rs + +# Unicode-related autogenerated files +DerivedCoreProperties.txt +UnicodeLexer.x diff --git a/scripts/unicode.py b/scripts/unicode.py new file mode 100755 index 0000000..39d954f --- /dev/null +++ b/scripts/unicode.py @@ -0,0 +1,167 @@ +#!/usr/bin/env python +# +# Copyright 2011-2015 The Rust Project Developers +# 2024 Galois Inc. +# +# This script was originally created by the Rust Project Developers as part of +# the `unicode-xid` crate: +# +# https://github.com/unicode-rs/unicode-xid/blob/b3a2718b062da229c0a50d12281de0e5d8e8cff6/scripts/unicode.py +# +# See the COPYRIGHT file in the `unicode-xid` crate: +# +# https://github.com/unicode-rs/unicode-xid/blob/b3a2718b062da229c0a50d12281de0e5d8e8cff6/COPYRIGHT +# +# Galois Inc. has modified the script to generate an `alex`-based lexer instead +# of a Rust-based lexer. +# +# Licensed under the Apache License, Version 2.0 or the MIT license +# , at your +# option. This file may not be copied, modified, or distributed +# except according to those terms. + +import fileinput, re, os, sys + +unicode_version = (15, 1, 0) + +preamble = '''-- NOTE: The following code was generated by "scripts/unicode.py", do not edit directly +-- +-- If you need to update this code, perform the following steps: +-- +-- 1. (Optional) Update the value of `unicode_version` in "scripts/unicode.py". +-- 2. Run the "scripts/unicode.py" script. +-- 3. Copy the code (including the comments) in the autogenerated `UnicodeLexer.x` file. +-- 4. Replace the existing autogenerated code here. +''' + +postamble = '''-- End of code generated by "scripts/unicode.py". +''' + +def unicode_url(f): + return "http://www.unicode.org/Public/%s.%s.%s/ucd/%s" % (unicode_version + (f,)) + +def fetch(f): + if not os.path.exists(os.path.basename(f)): + os.system("curl -O %s" % unicode_url(f)) + + if not os.path.exists(os.path.basename(f)): + sys.stderr.write("cannot load %s" % f) + exit(1) + +def group_cat(cat): + cat_out = [] + letters = sorted(set(cat)) + cur_start = letters.pop(0) + cur_end = cur_start + for letter in letters: + assert letter > cur_end, \ + "cur_end: %s, letter: %s" % (hex(cur_end), hex(letter)) + if letter == cur_end + 1: + cur_end = letter + else: + cat_out.append((cur_start, cur_end)) + cur_start = cur_end = letter + cat_out.append((cur_start, cur_end)) + return cat_out + +def ungroup_cat(cat): + cat_out = [] + for (lo, hi) in cat: + while lo <= hi: + cat_out.append(lo) + lo += 1 + return cat_out + +def format_table_content(f, content, indent): + line = "" + first = True + for chunk in content.split("|"): + line += " " * indent + if first: + line += "= " + chunk + else: + line += "| " + chunk + line += "\n" + first = False + f.write(line + '\n') + +def load_properties(f, interestingprops): + fetch(f) + props = {} + re1 = re.compile("^ *([0-9A-F]+) *; *(\w+)") + re2 = re.compile("^ *([0-9A-F]+)\.\.([0-9A-F]+) *; *(\w+)") + + for line in fileinput.input(os.path.basename(f)): + prop = None + d_lo = 0 + d_hi = 0 + m = re1.match(line) + if m: + d_lo = m.group(1) + d_hi = m.group(1) + prop = m.group(2) + else: + m = re2.match(line) + if m: + d_lo = m.group(1) + d_hi = m.group(2) + prop = m.group(3) + else: + continue + if interestingprops and prop not in interestingprops: + continue + d_lo = int(d_lo, 16) + d_hi = int(d_hi, 16) + if prop not in props: + props[prop] = [] + props[prop].append((d_lo, d_hi)) + + # optimize if possible + for prop in props: + props[prop] = group_cat(ungroup_cat(props[prop])) + + return props + +def escape_char(c): + return "\\x%04x" % c + +def emit_table(f, name, t_data): + f.write("@%s\n" % name) + data = "" + first = True + for dat in t_data: + if not first: + data += "|" + first = False + if dat[0] == dat[1]: + data += "%s" % escape_char(dat[0]) + else: + data += "[%s-%s]" % (escape_char(dat[0]), escape_char(dat[1])) + format_table_content(f, data, 2) + +def emit_property_module(f, mod, tbl, emit): + for cat in emit: + emit_table(f, cat, tbl[cat]) + +if __name__ == "__main__": + r = "UnicodeLexer.x" + if os.path.exists(r): + os.remove(r) + with open(r, "w") as rf: + # write the file's preamble + rf.write(preamble) + + # download and parse all the data + rf.write(''' +-- Based on Unicode %s.%s.%s, using the following Unicode table: +-- %s + +''' % (unicode_version + (unicode_url("DerviedCoreProperties.txt"),))) + + want_derived = ["XID_Start", "XID_Continue"] + derived = load_properties("DerivedCoreProperties.txt", want_derived) + emit_property_module(rf, "derived_property", derived, want_derived) + + # write the file's postamble + rf.write(postamble) diff --git a/src/Language/Rust/Parser/Lexer.x b/src/Language/Rust/Parser/Lexer.x index e08332b..3acbad4 100644 --- a/src/Language/Rust/Parser/Lexer.x +++ b/src/Language/Rust/Parser/Lexer.x @@ -18,7 +18,7 @@ bitwise and, and unary reference), @&&&x&&&y@ lexes into 'AmpersandAmpersand', ' @'IdentTok' "x"@, 'AmpersandAmpersand', 'Ampersand', @'IdentTok' "y"@. Although the parser sometimes needs to "break apart" tokens, it never has to think about putting them together. That means it can easily figure out that @&&&x&&&y@ parses as @&(&(&x)) && (&y)@ and not @&(&(&x)) & (&(&y))@ even if -bitwise conjunctions bind more tightly that logical conjunctions. +bitwise conjunctions bind more tightly that logical conjunctions. This sort of amguity where one token need to be broken up by the parser occurs for @@ -27,7 +27,7 @@ This sort of amguity where one token need to be broken up by the parser occurs f * @<<@ in qualified type paths like @FromIterator\<\::Item\>@ * @>>@ in qualified paths like @\\>::Bar@ * @>=@ in equality predicates like @F\=i32@ - * @>>=@ in equality predicates like @F\\>=i32@ + * @>>=@ in equality predicates like @F\\>=i32@ -} module Language.Rust.Parser.Lexer ( @@ -56,47 +56,50 @@ import Data.Word ( Word8 ) -- Things to review: -- * improved error messages --- Based heavily on: --- * --- * --- * - } --- XID_START unicode character class -@xid_start +-- NOTE: The following code was generated by "scripts/unicode.py", do not edit directly +-- +-- If you need to update this code, perform the following steps: +-- +-- 1. (Optional) Update the value of `unicode_version` in "scripts/unicode.py". +-- 2. Run the "scripts/unicode.py" script. +-- 3. Copy the code (including the comments) in the autogenerated `UnicodeLexer.x` file. +-- 4. Replace the existing autogenerated code here. + +-- Based on Unicode 15.1.0, using the following Unicode table: +-- http://www.unicode.org/Public/15.1.0/ucd/DerviedCoreProperties.txt + +@XID_Start = [\x0041-\x005a] - | "_" | [\x0061-\x007a] | \x00aa | \x00b5 | \x00ba | [\x00c0-\x00d6] | [\x00d8-\x00f6] - | [\x00f8-\x0236] - | [\x0250-\x02c1] + | [\x00f8-\x02c1] | [\x02c6-\x02d1] | [\x02e0-\x02e4] + | \x02ec | \x02ee + | [\x0370-\x0374] + | [\x0376-\x0377] + | [\x037b-\x037d] + | \x037f | \x0386 | [\x0388-\x038a] | \x038c | [\x038e-\x03a1] - | [\x03a3-\x03ce] - | [\x03d0-\x03f5] - | [\x03f7-\x03fb] - | [\x0400-\x0481] - | [\x048a-\x04ce] - | [\x04d0-\x04f5] - | [\x04f8-\x04f9] - | [\x0500-\x050f] + | [\x03a3-\x03f5] + | [\x03f7-\x0481] + | [\x048a-\x052f] | [\x0531-\x0556] | \x0559 - | [\x0561-\x0587] + | [\x0560-\x0588] | [\x05d0-\x05ea] - | [\x05f0-\x05f2] - | [\x0621-\x063a] - | [\x0640-\x064a] + | [\x05ef-\x05f2] + | [\x0620-\x064a] | [\x066e-\x066f] | [\x0671-\x06d3] | \x06d5 @@ -106,13 +109,25 @@ import Data.Word ( Word8 ) | \x06ff | \x0710 | [\x0712-\x072f] - | [\x074d-\x074f] - | [\x0780-\x07a5] + | [\x074d-\x07a5] | \x07b1 + | [\x07ca-\x07ea] + | [\x07f4-\x07f5] + | \x07fa + | [\x0800-\x0815] + | \x081a + | \x0824 + | \x0828 + | [\x0840-\x0858] + | [\x0860-\x086a] + | [\x0870-\x0887] + | [\x0889-\x088e] + | [\x08a0-\x08c9] | [\x0904-\x0939] | \x093d | \x0950 | [\x0958-\x0961] + | [\x0971-\x0980] | [\x0985-\x098c] | [\x098f-\x0990] | [\x0993-\x09a8] @@ -120,9 +135,11 @@ import Data.Word ( Word8 ) | \x09b2 | [\x09b6-\x09b9] | \x09bd + | \x09ce | [\x09dc-\x09dd] | [\x09df-\x09e1] | [\x09f0-\x09f1] + | \x09fc | [\x0a05-\x0a0a] | [\x0a0f-\x0a10] | [\x0a13-\x0a28] @@ -142,6 +159,7 @@ import Data.Word ( Word8 ) | \x0abd | \x0ad0 | [\x0ae0-\x0ae1] + | \x0af9 | [\x0b05-\x0b0c] | [\x0b0f-\x0b10] | [\x0b13-\x0b28] @@ -161,27 +179,34 @@ import Data.Word ( Word8 ) | [\x0b9e-\x0b9f] | [\x0ba3-\x0ba4] | [\x0ba8-\x0baa] - | [\x0bae-\x0bb5] - | [\x0bb7-\x0bb9] + | [\x0bae-\x0bb9] + | \x0bd0 | [\x0c05-\x0c0c] | [\x0c0e-\x0c10] | [\x0c12-\x0c28] - | [\x0c2a-\x0c33] - | [\x0c35-\x0c39] + | [\x0c2a-\x0c39] + | \x0c3d + | [\x0c58-\x0c5a] + | \x0c5d | [\x0c60-\x0c61] + | \x0c80 | [\x0c85-\x0c8c] | [\x0c8e-\x0c90] | [\x0c92-\x0ca8] | [\x0caa-\x0cb3] | [\x0cb5-\x0cb9] | \x0cbd - | \x0cde + | [\x0cdd-\x0cde] | [\x0ce0-\x0ce1] - | [\x0d05-\x0d0c] + | [\x0cf1-\x0cf2] + | [\x0d04-\x0d0c] | [\x0d0e-\x0d10] - | [\x0d12-\x0d28] - | [\x0d2a-\x0d39] - | [\x0d60-\x0d61] + | [\x0d12-\x0d3a] + | \x0d3d + | \x0d4e + | [\x0d54-\x0d56] + | [\x0d5f-\x0d61] + | [\x0d7a-\x0d7f] | [\x0d85-\x0d96] | [\x0d9a-\x0db1] | [\x0db3-\x0dbb] @@ -192,83 +217,93 @@ import Data.Word ( Word8 ) | [\x0e40-\x0e46] | [\x0e81-\x0e82] | \x0e84 - | [\x0e87-\x0e88] - | \x0e8a - | \x0e8d - | [\x0e94-\x0e97] - | [\x0e99-\x0e9f] - | [\x0ea1-\x0ea3] + | [\x0e86-\x0e8a] + | [\x0e8c-\x0ea3] | \x0ea5 - | \x0ea7 - | [\x0eaa-\x0eab] - | [\x0ead-\x0eb0] + | [\x0ea7-\x0eb0] | \x0eb2 | \x0ebd | [\x0ec0-\x0ec4] | \x0ec6 - | [\x0edc-\x0edd] + | [\x0edc-\x0edf] | \x0f00 | [\x0f40-\x0f47] - | [\x0f49-\x0f6a] - | [\x0f88-\x0f8b] - | [\x1000-\x1021] - | [\x1023-\x1027] - | [\x1029-\x102a] + | [\x0f49-\x0f6c] + | [\x0f88-\x0f8c] + | [\x1000-\x102a] + | \x103f | [\x1050-\x1055] + | [\x105a-\x105d] + | \x1061 + | [\x1065-\x1066] + | [\x106e-\x1070] + | [\x1075-\x1081] + | \x108e | [\x10a0-\x10c5] - | [\x10d0-\x10f8] - | [\x1100-\x1159] - | [\x115f-\x11a2] - | [\x11a8-\x11f9] - | [\x1200-\x1206] - | [\x1208-\x1246] - | \x1248 + | \x10c7 + | \x10cd + | [\x10d0-\x10fa] + | [\x10fc-\x1248] | [\x124a-\x124d] | [\x1250-\x1256] | \x1258 | [\x125a-\x125d] - | [\x1260-\x1286] - | \x1288 + | [\x1260-\x1288] | [\x128a-\x128d] - | [\x1290-\x12ae] - | \x12b0 + | [\x1290-\x12b0] | [\x12b2-\x12b5] | [\x12b8-\x12be] | \x12c0 | [\x12c2-\x12c5] - | [\x12c8-\x12ce] - | [\x12d0-\x12d6] - | [\x12d8-\x12ee] - | [\x12f0-\x130e] - | \x1310 + | [\x12c8-\x12d6] + | [\x12d8-\x1310] | [\x1312-\x1315] - | [\x1318-\x131e] - | [\x1320-\x1346] - | [\x1348-\x135a] - | [\x13a0-\x13f4] + | [\x1318-\x135a] + | [\x1380-\x138f] + | [\x13a0-\x13f5] + | [\x13f8-\x13fd] | [\x1401-\x166c] - | [\x166f-\x1676] + | [\x166f-\x167f] | [\x1681-\x169a] | [\x16a0-\x16ea] - | [\x16ee-\x16f0] - | [\x1700-\x170c] - | [\x170e-\x1711] - | [\x1720-\x1731] + | [\x16ee-\x16f8] + | [\x1700-\x1711] + | [\x171f-\x1731] | [\x1740-\x1751] | [\x1760-\x176c] | [\x176e-\x1770] | [\x1780-\x17b3] | \x17d7 | \x17dc - | [\x1820-\x1877] + | [\x1820-\x1878] | [\x1880-\x18a8] - | [\x1900-\x191c] + | \x18aa + | [\x18b0-\x18f5] + | [\x1900-\x191e] | [\x1950-\x196d] | [\x1970-\x1974] - | [\x1d00-\x1d6b] - | [\x1e00-\x1e9b] - | [\x1ea0-\x1ef9] - | [\x1f00-\x1f15] + | [\x1980-\x19ab] + | [\x19b0-\x19c9] + | [\x1a00-\x1a16] + | [\x1a20-\x1a54] + | \x1aa7 + | [\x1b05-\x1b33] + | [\x1b45-\x1b4c] + | [\x1b83-\x1ba0] + | [\x1bae-\x1baf] + | [\x1bba-\x1be5] + | [\x1c00-\x1c23] + | [\x1c4d-\x1c4f] + | [\x1c5a-\x1c7d] + | [\x1c80-\x1c88] + | [\x1c90-\x1cba] + | [\x1cbd-\x1cbf] + | [\x1ce9-\x1cec] + | [\x1cee-\x1cf3] + | [\x1cf5-\x1cf6] + | \x1cfa + | [\x1d00-\x1dbf] + | [\x1e00-\x1f15] | [\x1f18-\x1f1d] | [\x1f20-\x1f45] | [\x1f48-\x1f4d] @@ -289,6 +324,7 @@ import Data.Word ( Word8 ) | [\x1ff6-\x1ffc] | \x2071 | \x207f + | [\x2090-\x209c] | \x2102 | \x2107 | [\x210a-\x2113] @@ -297,11 +333,28 @@ import Data.Word ( Word8 ) | \x2124 | \x2126 | \x2128 - | [\x212a-\x2131] - | [\x2133-\x2139] - | [\x213d-\x213f] + | [\x212a-\x2139] + | [\x213c-\x213f] | [\x2145-\x2149] - | [\x2160-\x2183] + | \x214e + | [\x2160-\x2188] + | [\x2c00-\x2ce4] + | [\x2ceb-\x2cee] + | [\x2cf2-\x2cf3] + | [\x2d00-\x2d25] + | \x2d27 + | \x2d2d + | [\x2d30-\x2d67] + | \x2d6f + | [\x2d80-\x2d96] + | [\x2da0-\x2da6] + | [\x2da8-\x2dae] + | [\x2db0-\x2db6] + | [\x2db8-\x2dbe] + | [\x2dc0-\x2dc6] + | [\x2dc8-\x2dce] + | [\x2dd0-\x2dd6] + | [\x2dd8-\x2dde] | [\x3005-\x3007] | [\x3021-\x3029] | [\x3031-\x3035] @@ -310,16 +363,69 @@ import Data.Word ( Word8 ) | [\x309d-\x309f] | [\x30a1-\x30fa] | [\x30fc-\x30ff] - | [\x3105-\x312c] + | [\x3105-\x312f] | [\x3131-\x318e] - | [\x31a0-\x31b7] + | [\x31a0-\x31bf] | [\x31f0-\x31ff] - | [\x3400-\x4db5] - | [\x4e00-\x9fa5] - | [\xa000-\xa48c] + | [\x3400-\x4dbf] + | [\x4e00-\xa48c] + | [\xa4d0-\xa4fd] + | [\xa500-\xa60c] + | [\xa610-\xa61f] + | [\xa62a-\xa62b] + | [\xa640-\xa66e] + | [\xa67f-\xa69d] + | [\xa6a0-\xa6ef] + | [\xa717-\xa71f] + | [\xa722-\xa788] + | [\xa78b-\xa7ca] + | [\xa7d0-\xa7d1] + | \xa7d3 + | [\xa7d5-\xa7d9] + | [\xa7f2-\xa801] + | [\xa803-\xa805] + | [\xa807-\xa80a] + | [\xa80c-\xa822] + | [\xa840-\xa873] + | [\xa882-\xa8b3] + | [\xa8f2-\xa8f7] + | \xa8fb + | [\xa8fd-\xa8fe] + | [\xa90a-\xa925] + | [\xa930-\xa946] + | [\xa960-\xa97c] + | [\xa984-\xa9b2] + | \xa9cf + | [\xa9e0-\xa9e4] + | [\xa9e6-\xa9ef] + | [\xa9fa-\xa9fe] + | [\xaa00-\xaa28] + | [\xaa40-\xaa42] + | [\xaa44-\xaa4b] + | [\xaa60-\xaa76] + | \xaa7a + | [\xaa7e-\xaaaf] + | \xaab1 + | [\xaab5-\xaab6] + | [\xaab9-\xaabd] + | \xaac0 + | \xaac2 + | [\xaadb-\xaadd] + | [\xaae0-\xaaea] + | [\xaaf2-\xaaf4] + | [\xab01-\xab06] + | [\xab09-\xab0e] + | [\xab11-\xab16] + | [\xab20-\xab26] + | [\xab28-\xab2e] + | [\xab30-\xab5a] + | [\xab5c-\xab69] + | [\xab70-\xabe2] | [\xac00-\xd7a3] - | [\xf900-\xfa2d] - | [\xfa30-\xfa6a] + | [\xd7b0-\xd7c6] + | [\xd7cb-\xd7fb] + | [\xf900-\xfa6d] + | [\xfa70-\xfad9] | [\xfb00-\xfb06] | [\xfb13-\xfb17] | \xfb1d @@ -350,99 +456,290 @@ import Data.Word ( Word8 ) | [\xffca-\xffcf] | [\xffd2-\xffd7] | [\xffda-\xffdc] - | \xd800 [\xdc00-\xdc0a] - | \xd800 [\xdc0d-\xdc25] - | \xd800 [\xdc28-\xdc39] - | \xd800 [\xdc3c-\xdc3c] - | \xd800 [\xdc3f-\xdc4c] - | \xd800 [\xdc50-\xdc5c] - | \xd800 [\xdc80-\xdcf9] - | \xd800 [\xdf00-\xdf1d] - | \xd800 [\xdf30-\xdf49] - | \xd800 [\xdf80-\xdf9c] - | \xd801 [\xe000-\xe09c] - | \xd802 [\xe400-\xe404] - | \xd802 \x0808 - | \xd802 [\xe40a-\xe434] - | \xd802 [\xe437-\xe437] - | \xd802 \x083c - | \xd802 \x083f - | \xd835 [\xb000-\xb053] - | \xd835 [\xb056-\xb09b] - | \xd835 [\xb09e-\xb09e] - | \xd835 \xd4a2 - | \xd835 [\xb0a5-\xb0a5] - | \xd835 [\xb0a9-\xb0ab] - | \xd835 [\xb0ae-\xb0b8] - | \xd835 \xd4bb - | \xd835 [\xb0bd-\xb0c2] - | \xd835 [\xb0c5-\xb104] - | \xd835 [\xb107-\xb109] - | \xd835 [\xb10d-\xb113] - | \xd835 [\xb116-\xb11b] - | \xd835 [\xb11e-\xb138] - | \xd835 [\xb13b-\xb13d] - | \xd835 [\xb140-\xb143] - | \xd835 \xd546 - | \xd835 [\xb14a-\xb14f] - | \xd835 [\xb152-\xb2a2] - | \xd835 [\xb2a8-\xb2bf] - | \xd835 [\xb2c2-\xb2d9] - | \xd835 [\xb2dc-\xb2f9] - | \xd835 [\xb2fc-\xb313] - | \xd835 [\xb316-\xb333] - | \xd835 [\xb336-\xb34d] - | \xd835 [\xb350-\xb36d] - | \xd835 [\xb370-\xb387] - | \xd835 [\xb38a-\xb3a7] - | \xd835 [\xb3aa-\xb3c1] - | \xd835 [\xb3c4-\xb3c8] - | \xd840 [\xdc00-\xdffe] - | \xd841 [\xe000-\xe3fe] - | \xd842 [\xe400-\xe7fe] - | \xd843 [\xe800-\xebfe] - | \xd844 [\xec00-\xeffe] - | \xd845 [\xf000-\xf3fe] - | \xd846 [\xf400-\xf7fe] - | \xd847 [\xf800-\xfbfe] - | \xd848 [\xfc00-\xfffe] - | \xd849 [\x0000-\x03fe] - | \xd84a [\x0400-\x07fe] - | \xd84b [\x0800-\x0bfe] - | \xd84c [\x0c00-\x0ffe] - | \xd84d [\x1000-\x13fe] - | \xd84e [\x1400-\x17fe] - | \xd84f [\x1800-\x1bfe] - | \xd850 [\x1c00-\x1ffe] - | \xd851 [\x2000-\x23fe] - | \xd852 [\x2400-\x27fe] - | \xd853 [\x2800-\x2bfe] - | \xd854 [\x2c00-\x2ffe] - | \xd855 [\x3000-\x33fe] - | \xd856 [\x3400-\x37fe] - | \xd857 [\x3800-\x3bfe] - | \xd858 [\x3c00-\x3ffe] - | \xd859 [\x4000-\x43fe] - | \xd85a [\x4400-\x47fe] - | \xd85b [\x4800-\x4bfe] - | \xd85c [\x4c00-\x4ffe] - | \xd85d [\x5000-\x53fe] - | \xd85e [\x5400-\x57fe] - | \xd85f [\x5800-\x5bfe] - | \xd860 [\x5c00-\x5ffe] - | \xd861 [\x6000-\x63fe] - | \xd862 [\x6400-\x67fe] - | \xd863 [\x6800-\x6bfe] - | \xd864 [\x6c00-\x6ffe] - | \xd865 [\x7000-\x73fe] - | \xd866 [\x7400-\x77fe] - | \xd867 [\x7800-\x7bfe] - | \xd868 [\x7c00-\x7ffe] - | \xd869 [\x8000-\x82d5] - | \xd87e [\xd400-\xd61c] - --- XID_CONTINUE unicode character class -@xid_continue + | [\x10000-\x1000b] + | [\x1000d-\x10026] + | [\x10028-\x1003a] + | [\x1003c-\x1003d] + | [\x1003f-\x1004d] + | [\x10050-\x1005d] + | [\x10080-\x100fa] + | [\x10140-\x10174] + | [\x10280-\x1029c] + | [\x102a0-\x102d0] + | [\x10300-\x1031f] + | [\x1032d-\x1034a] + | [\x10350-\x10375] + | [\x10380-\x1039d] + | [\x103a0-\x103c3] + | [\x103c8-\x103cf] + | [\x103d1-\x103d5] + | [\x10400-\x1049d] + | [\x104b0-\x104d3] + | [\x104d8-\x104fb] + | [\x10500-\x10527] + | [\x10530-\x10563] + | [\x10570-\x1057a] + | [\x1057c-\x1058a] + | [\x1058c-\x10592] + | [\x10594-\x10595] + | [\x10597-\x105a1] + | [\x105a3-\x105b1] + | [\x105b3-\x105b9] + | [\x105bb-\x105bc] + | [\x10600-\x10736] + | [\x10740-\x10755] + | [\x10760-\x10767] + | [\x10780-\x10785] + | [\x10787-\x107b0] + | [\x107b2-\x107ba] + | [\x10800-\x10805] + | \x10808 + | [\x1080a-\x10835] + | [\x10837-\x10838] + | \x1083c + | [\x1083f-\x10855] + | [\x10860-\x10876] + | [\x10880-\x1089e] + | [\x108e0-\x108f2] + | [\x108f4-\x108f5] + | [\x10900-\x10915] + | [\x10920-\x10939] + | [\x10980-\x109b7] + | [\x109be-\x109bf] + | \x10a00 + | [\x10a10-\x10a13] + | [\x10a15-\x10a17] + | [\x10a19-\x10a35] + | [\x10a60-\x10a7c] + | [\x10a80-\x10a9c] + | [\x10ac0-\x10ac7] + | [\x10ac9-\x10ae4] + | [\x10b00-\x10b35] + | [\x10b40-\x10b55] + | [\x10b60-\x10b72] + | [\x10b80-\x10b91] + | [\x10c00-\x10c48] + | [\x10c80-\x10cb2] + | [\x10cc0-\x10cf2] + | [\x10d00-\x10d23] + | [\x10e80-\x10ea9] + | [\x10eb0-\x10eb1] + | [\x10f00-\x10f1c] + | \x10f27 + | [\x10f30-\x10f45] + | [\x10f70-\x10f81] + | [\x10fb0-\x10fc4] + | [\x10fe0-\x10ff6] + | [\x11003-\x11037] + | [\x11071-\x11072] + | \x11075 + | [\x11083-\x110af] + | [\x110d0-\x110e8] + | [\x11103-\x11126] + | \x11144 + | \x11147 + | [\x11150-\x11172] + | \x11176 + | [\x11183-\x111b2] + | [\x111c1-\x111c4] + | \x111da + | \x111dc + | [\x11200-\x11211] + | [\x11213-\x1122b] + | [\x1123f-\x11240] + | [\x11280-\x11286] + | \x11288 + | [\x1128a-\x1128d] + | [\x1128f-\x1129d] + | [\x1129f-\x112a8] + | [\x112b0-\x112de] + | [\x11305-\x1130c] + | [\x1130f-\x11310] + | [\x11313-\x11328] + | [\x1132a-\x11330] + | [\x11332-\x11333] + | [\x11335-\x11339] + | \x1133d + | \x11350 + | [\x1135d-\x11361] + | [\x11400-\x11434] + | [\x11447-\x1144a] + | [\x1145f-\x11461] + | [\x11480-\x114af] + | [\x114c4-\x114c5] + | \x114c7 + | [\x11580-\x115ae] + | [\x115d8-\x115db] + | [\x11600-\x1162f] + | \x11644 + | [\x11680-\x116aa] + | \x116b8 + | [\x11700-\x1171a] + | [\x11740-\x11746] + | [\x11800-\x1182b] + | [\x118a0-\x118df] + | [\x118ff-\x11906] + | \x11909 + | [\x1190c-\x11913] + | [\x11915-\x11916] + | [\x11918-\x1192f] + | \x1193f + | \x11941 + | [\x119a0-\x119a7] + | [\x119aa-\x119d0] + | \x119e1 + | \x119e3 + | \x11a00 + | [\x11a0b-\x11a32] + | \x11a3a + | \x11a50 + | [\x11a5c-\x11a89] + | \x11a9d + | [\x11ab0-\x11af8] + | [\x11c00-\x11c08] + | [\x11c0a-\x11c2e] + | \x11c40 + | [\x11c72-\x11c8f] + | [\x11d00-\x11d06] + | [\x11d08-\x11d09] + | [\x11d0b-\x11d30] + | \x11d46 + | [\x11d60-\x11d65] + | [\x11d67-\x11d68] + | [\x11d6a-\x11d89] + | \x11d98 + | [\x11ee0-\x11ef2] + | \x11f02 + | [\x11f04-\x11f10] + | [\x11f12-\x11f33] + | \x11fb0 + | [\x12000-\x12399] + | [\x12400-\x1246e] + | [\x12480-\x12543] + | [\x12f90-\x12ff0] + | [\x13000-\x1342f] + | [\x13441-\x13446] + | [\x14400-\x14646] + | [\x16800-\x16a38] + | [\x16a40-\x16a5e] + | [\x16a70-\x16abe] + | [\x16ad0-\x16aed] + | [\x16b00-\x16b2f] + | [\x16b40-\x16b43] + | [\x16b63-\x16b77] + | [\x16b7d-\x16b8f] + | [\x16e40-\x16e7f] + | [\x16f00-\x16f4a] + | \x16f50 + | [\x16f93-\x16f9f] + | [\x16fe0-\x16fe1] + | \x16fe3 + | [\x17000-\x187f7] + | [\x18800-\x18cd5] + | [\x18d00-\x18d08] + | [\x1aff0-\x1aff3] + | [\x1aff5-\x1affb] + | [\x1affd-\x1affe] + | [\x1b000-\x1b122] + | \x1b132 + | [\x1b150-\x1b152] + | \x1b155 + | [\x1b164-\x1b167] + | [\x1b170-\x1b2fb] + | [\x1bc00-\x1bc6a] + | [\x1bc70-\x1bc7c] + | [\x1bc80-\x1bc88] + | [\x1bc90-\x1bc99] + | [\x1d400-\x1d454] + | [\x1d456-\x1d49c] + | [\x1d49e-\x1d49f] + | \x1d4a2 + | [\x1d4a5-\x1d4a6] + | [\x1d4a9-\x1d4ac] + | [\x1d4ae-\x1d4b9] + | \x1d4bb + | [\x1d4bd-\x1d4c3] + | [\x1d4c5-\x1d505] + | [\x1d507-\x1d50a] + | [\x1d50d-\x1d514] + | [\x1d516-\x1d51c] + | [\x1d51e-\x1d539] + | [\x1d53b-\x1d53e] + | [\x1d540-\x1d544] + | \x1d546 + | [\x1d54a-\x1d550] + | [\x1d552-\x1d6a5] + | [\x1d6a8-\x1d6c0] + | [\x1d6c2-\x1d6da] + | [\x1d6dc-\x1d6fa] + | [\x1d6fc-\x1d714] + | [\x1d716-\x1d734] + | [\x1d736-\x1d74e] + | [\x1d750-\x1d76e] + | [\x1d770-\x1d788] + | [\x1d78a-\x1d7a8] + | [\x1d7aa-\x1d7c2] + | [\x1d7c4-\x1d7cb] + | [\x1df00-\x1df1e] + | [\x1df25-\x1df2a] + | [\x1e030-\x1e06d] + | [\x1e100-\x1e12c] + | [\x1e137-\x1e13d] + | \x1e14e + | [\x1e290-\x1e2ad] + | [\x1e2c0-\x1e2eb] + | [\x1e4d0-\x1e4eb] + | [\x1e7e0-\x1e7e6] + | [\x1e7e8-\x1e7eb] + | [\x1e7ed-\x1e7ee] + | [\x1e7f0-\x1e7fe] + | [\x1e800-\x1e8c4] + | [\x1e900-\x1e943] + | \x1e94b + | [\x1ee00-\x1ee03] + | [\x1ee05-\x1ee1f] + | [\x1ee21-\x1ee22] + | \x1ee24 + | \x1ee27 + | [\x1ee29-\x1ee32] + | [\x1ee34-\x1ee37] + | \x1ee39 + | \x1ee3b + | \x1ee42 + | \x1ee47 + | \x1ee49 + | \x1ee4b + | [\x1ee4d-\x1ee4f] + | [\x1ee51-\x1ee52] + | \x1ee54 + | \x1ee57 + | \x1ee59 + | \x1ee5b + | \x1ee5d + | \x1ee5f + | [\x1ee61-\x1ee62] + | \x1ee64 + | [\x1ee67-\x1ee6a] + | [\x1ee6c-\x1ee72] + | [\x1ee74-\x1ee77] + | [\x1ee79-\x1ee7c] + | \x1ee7e + | [\x1ee80-\x1ee89] + | [\x1ee8b-\x1ee9b] + | [\x1eea1-\x1eea3] + | [\x1eea5-\x1eea9] + | [\x1eeab-\x1eebb] + | [\x20000-\x2a6df] + | [\x2a700-\x2b739] + | [\x2b740-\x2b81d] + | [\x2b820-\x2cea1] + | [\x2ceb0-\x2ebe0] + | [\x2ebf0-\x2ee5d] + | [\x2f800-\x2fa1d] + | [\x30000-\x3134a] + | [\x31350-\x323af] + +@XID_Continue = [\x0030-\x0039] | [\x0041-\x005a] | \x005f @@ -453,55 +750,53 @@ import Data.Word ( Word8 ) | \x00ba | [\x00c0-\x00d6] | [\x00d8-\x00f6] - | [\x00f8-\x0236] - | [\x0250-\x02c1] + | [\x00f8-\x02c1] | [\x02c6-\x02d1] | [\x02e0-\x02e4] + | \x02ec | \x02ee - | [\x0300-\x0357] - | [\x035d-\x036f] - | \x0386 - | [\x0388-\x038a] + | [\x0300-\x0374] + | [\x0376-\x0377] + | [\x037b-\x037d] + | \x037f + | [\x0386-\x038a] | \x038c | [\x038e-\x03a1] - | [\x03a3-\x03ce] - | [\x03d0-\x03f5] - | [\x03f7-\x03fb] - | [\x0400-\x0481] - | [\x0483-\x0486] - | [\x048a-\x04ce] - | [\x04d0-\x04f5] - | [\x04f8-\x04f9] - | [\x0500-\x050f] + | [\x03a3-\x03f5] + | [\x03f7-\x0481] + | [\x0483-\x0487] + | [\x048a-\x052f] | [\x0531-\x0556] | \x0559 - | [\x0561-\x0587] - | [\x0591-\x05a1] - | [\x05a3-\x05b9] - | [\x05bb-\x05bd] + | [\x0560-\x0588] + | [\x0591-\x05bd] | \x05bf | [\x05c1-\x05c2] - | \x05c4 + | [\x05c4-\x05c5] + | \x05c7 | [\x05d0-\x05ea] - | [\x05f0-\x05f2] - | [\x0610-\x0615] - | [\x0621-\x063a] - | [\x0640-\x0658] - | [\x0660-\x0669] + | [\x05ef-\x05f2] + | [\x0610-\x061a] + | [\x0620-\x0669] | [\x066e-\x06d3] | [\x06d5-\x06dc] | [\x06df-\x06e8] | [\x06ea-\x06fc] | \x06ff | [\x0710-\x074a] - | [\x074d-\x074f] - | [\x0780-\x07b1] - | [\x0901-\x0939] - | [\x093c-\x094d] - | [\x0950-\x0954] - | [\x0958-\x0963] + | [\x074d-\x07b1] + | [\x07c0-\x07f5] + | \x07fa + | \x07fd + | [\x0800-\x082d] + | [\x0840-\x085b] + | [\x0860-\x086a] + | [\x0870-\x0887] + | [\x0889-\x088e] + | [\x0898-\x08e1] + | [\x08e3-\x0963] | [\x0966-\x096f] - | [\x0981-\x0983] + | [\x0971-\x0983] | [\x0985-\x098c] | [\x098f-\x0990] | [\x0993-\x09a8] @@ -510,11 +805,13 @@ import Data.Word ( Word8 ) | [\x09b6-\x09b9] | [\x09bc-\x09c4] | [\x09c7-\x09c8] - | [\x09cb-\x09cd] + | [\x09cb-\x09ce] | \x09d7 | [\x09dc-\x09dd] | [\x09df-\x09e3] | [\x09e6-\x09f1] + | \x09fc + | \x09fe | [\x0a01-\x0a03] | [\x0a05-\x0a0a] | [\x0a0f-\x0a10] @@ -527,9 +824,10 @@ import Data.Word ( Word8 ) | [\x0a3e-\x0a42] | [\x0a47-\x0a48] | [\x0a4b-\x0a4d] + | \x0a51 | [\x0a59-\x0a5c] | \x0a5e - | [\x0a66-\x0a74] + | [\x0a66-\x0a75] | [\x0a81-\x0a83] | [\x0a85-\x0a8d] | [\x0a8f-\x0a91] @@ -543,6 +841,7 @@ import Data.Word ( Word8 ) | \x0ad0 | [\x0ae0-\x0ae3] | [\x0ae6-\x0aef] + | [\x0af9-\x0aff] | [\x0b01-\x0b03] | [\x0b05-\x0b0c] | [\x0b0f-\x0b10] @@ -550,12 +849,12 @@ import Data.Word ( Word8 ) | [\x0b2a-\x0b30] | [\x0b32-\x0b33] | [\x0b35-\x0b39] - | [\x0b3c-\x0b43] + | [\x0b3c-\x0b44] | [\x0b47-\x0b48] | [\x0b4b-\x0b4d] - | [\x0b56-\x0b57] + | [\x0b55-\x0b57] | [\x0b5c-\x0b5d] - | [\x0b5f-\x0b61] + | [\x0b5f-\x0b63] | [\x0b66-\x0b6f] | \x0b71 | [\x0b82-\x0b83] @@ -567,26 +866,26 @@ import Data.Word ( Word8 ) | [\x0b9e-\x0b9f] | [\x0ba3-\x0ba4] | [\x0ba8-\x0baa] - | [\x0bae-\x0bb5] - | [\x0bb7-\x0bb9] + | [\x0bae-\x0bb9] | [\x0bbe-\x0bc2] | [\x0bc6-\x0bc8] | [\x0bca-\x0bcd] + | \x0bd0 | \x0bd7 - | [\x0be7-\x0bef] - | [\x0c01-\x0c03] - | [\x0c05-\x0c0c] + | [\x0be6-\x0bef] + | [\x0c00-\x0c0c] | [\x0c0e-\x0c10] | [\x0c12-\x0c28] - | [\x0c2a-\x0c33] - | [\x0c35-\x0c39] - | [\x0c3e-\x0c44] + | [\x0c2a-\x0c39] + | [\x0c3c-\x0c44] | [\x0c46-\x0c48] | [\x0c4a-\x0c4d] | [\x0c55-\x0c56] - | [\x0c60-\x0c61] + | [\x0c58-\x0c5a] + | \x0c5d + | [\x0c60-\x0c63] | [\x0c66-\x0c6f] - | [\x0c82-\x0c83] + | [\x0c80-\x0c83] | [\x0c85-\x0c8c] | [\x0c8e-\x0c90] | [\x0c92-\x0ca8] @@ -596,21 +895,20 @@ import Data.Word ( Word8 ) | [\x0cc6-\x0cc8] | [\x0cca-\x0ccd] | [\x0cd5-\x0cd6] - | \x0cde - | [\x0ce0-\x0ce1] + | [\x0cdd-\x0cde] + | [\x0ce0-\x0ce3] | [\x0ce6-\x0cef] - | [\x0d02-\x0d03] - | [\x0d05-\x0d0c] + | [\x0cf1-\x0cf3] + | [\x0d00-\x0d0c] | [\x0d0e-\x0d10] - | [\x0d12-\x0d28] - | [\x0d2a-\x0d39] - | [\x0d3e-\x0d43] + | [\x0d12-\x0d44] | [\x0d46-\x0d48] - | [\x0d4a-\x0d4d] - | \x0d57 - | [\x0d60-\x0d61] + | [\x0d4a-\x0d4e] + | [\x0d54-\x0d57] + | [\x0d5f-\x0d63] | [\x0d66-\x0d6f] - | [\x0d82-\x0d83] + | [\x0d7a-\x0d7f] + | [\x0d81-\x0d83] | [\x0d85-\x0d96] | [\x0d9a-\x0db1] | [\x0db3-\x0dbb] @@ -620,28 +918,22 @@ import Data.Word ( Word8 ) | [\x0dcf-\x0dd4] | \x0dd6 | [\x0dd8-\x0ddf] + | [\x0de6-\x0def] | [\x0df2-\x0df3] | [\x0e01-\x0e3a] | [\x0e40-\x0e4e] | [\x0e50-\x0e59] | [\x0e81-\x0e82] | \x0e84 - | [\x0e87-\x0e88] - | \x0e8a - | \x0e8d - | [\x0e94-\x0e97] - | [\x0e99-\x0e9f] - | [\x0ea1-\x0ea3] + | [\x0e86-\x0e8a] + | [\x0e8c-\x0ea3] | \x0ea5 - | \x0ea7 - | [\x0eaa-\x0eab] - | [\x0ead-\x0eb9] - | [\x0ebb-\x0ebd] + | [\x0ea7-\x0ebd] | [\x0ec0-\x0ec4] | \x0ec6 - | [\x0ec8-\x0ecd] + | [\x0ec8-\x0ece] | [\x0ed0-\x0ed9] - | [\x0edc-\x0edd] + | [\x0edc-\x0edf] | \x0f00 | [\x0f18-\x0f19] | [\x0f20-\x0f29] @@ -649,81 +941,87 @@ import Data.Word ( Word8 ) | \x0f37 | \x0f39 | [\x0f3e-\x0f47] - | [\x0f49-\x0f6a] + | [\x0f49-\x0f6c] | [\x0f71-\x0f84] - | [\x0f86-\x0f8b] - | [\x0f90-\x0f97] + | [\x0f86-\x0f97] | [\x0f99-\x0fbc] | \x0fc6 - | [\x1000-\x1021] - | [\x1023-\x1027] - | [\x1029-\x102a] - | [\x102c-\x1032] - | [\x1036-\x1039] - | [\x1040-\x1049] - | [\x1050-\x1059] + | [\x1000-\x1049] + | [\x1050-\x109d] | [\x10a0-\x10c5] - | [\x10d0-\x10f8] - | [\x1100-\x1159] - | [\x115f-\x11a2] - | [\x11a8-\x11f9] - | [\x1200-\x1206] - | [\x1208-\x1246] - | \x1248 + | \x10c7 + | \x10cd + | [\x10d0-\x10fa] + | [\x10fc-\x1248] | [\x124a-\x124d] | [\x1250-\x1256] | \x1258 | [\x125a-\x125d] - | [\x1260-\x1286] - | \x1288 + | [\x1260-\x1288] | [\x128a-\x128d] - | [\x1290-\x12ae] - | \x12b0 + | [\x1290-\x12b0] | [\x12b2-\x12b5] | [\x12b8-\x12be] | \x12c0 | [\x12c2-\x12c5] - | [\x12c8-\x12ce] - | [\x12d0-\x12d6] - | [\x12d8-\x12ee] - | [\x12f0-\x130e] - | \x1310 + | [\x12c8-\x12d6] + | [\x12d8-\x1310] | [\x1312-\x1315] - | [\x1318-\x131e] - | [\x1320-\x1346] - | [\x1348-\x135a] + | [\x1318-\x135a] + | [\x135d-\x135f] | [\x1369-\x1371] - | [\x13a0-\x13f4] + | [\x1380-\x138f] + | [\x13a0-\x13f5] + | [\x13f8-\x13fd] | [\x1401-\x166c] - | [\x166f-\x1676] + | [\x166f-\x167f] | [\x1681-\x169a] | [\x16a0-\x16ea] - | [\x16ee-\x16f0] - | [\x1700-\x170c] - | [\x170e-\x1714] - | [\x1720-\x1734] + | [\x16ee-\x16f8] + | [\x1700-\x1715] + | [\x171f-\x1734] | [\x1740-\x1753] | [\x1760-\x176c] | [\x176e-\x1770] | [\x1772-\x1773] - | [\x1780-\x17b3] - | [\x17b6-\x17d3] + | [\x1780-\x17d3] | \x17d7 | [\x17dc-\x17dd] | [\x17e0-\x17e9] | [\x180b-\x180d] - | [\x1810-\x1819] - | [\x1820-\x1877] - | [\x1880-\x18a9] - | [\x1900-\x191c] + | [\x180f-\x1819] + | [\x1820-\x1878] + | [\x1880-\x18aa] + | [\x18b0-\x18f5] + | [\x1900-\x191e] | [\x1920-\x192b] | [\x1930-\x193b] | [\x1946-\x196d] | [\x1970-\x1974] - | [\x1d00-\x1d6b] - | [\x1e00-\x1e9b] - | [\x1ea0-\x1ef9] - | [\x1f00-\x1f15] + | [\x1980-\x19ab] + | [\x19b0-\x19c9] + | [\x19d0-\x19da] + | [\x1a00-\x1a1b] + | [\x1a20-\x1a5e] + | [\x1a60-\x1a7c] + | [\x1a7f-\x1a89] + | [\x1a90-\x1a99] + | \x1aa7 + | [\x1ab0-\x1abd] + | [\x1abf-\x1ace] + | [\x1b00-\x1b4c] + | [\x1b50-\x1b59] + | [\x1b6b-\x1b73] + | [\x1b80-\x1bf3] + | [\x1c00-\x1c37] + | [\x1c40-\x1c49] + | [\x1c4d-\x1c7d] + | [\x1c80-\x1c88] + | [\x1c90-\x1cba] + | [\x1cbd-\x1cbf] + | [\x1cd0-\x1cd2] + | [\x1cd4-\x1cfa] + | [\x1d00-\x1f15] | [\x1f18-\x1f1d] | [\x1f20-\x1f45] | [\x1f48-\x1f4d] @@ -742,13 +1040,15 @@ import Data.Word ( Word8 ) | [\x1fe0-\x1fec] | [\x1ff2-\x1ff4] | [\x1ff6-\x1ffc] + | [\x200c-\x200d] | [\x203f-\x2040] | \x2054 | \x2071 | \x207f + | [\x2090-\x209c] | [\x20d0-\x20dc] | \x20e1 - | [\x20e5-\x20ea] + | [\x20e5-\x20f0] | \x2102 | \x2107 | [\x210a-\x2113] @@ -757,11 +1057,28 @@ import Data.Word ( Word8 ) | \x2124 | \x2126 | \x2128 - | [\x212a-\x2131] - | [\x2133-\x2139] - | [\x213d-\x213f] + | [\x212a-\x2139] + | [\x213c-\x213f] | [\x2145-\x2149] - | [\x2160-\x2183] + | \x214e + | [\x2160-\x2188] + | [\x2c00-\x2ce4] + | [\x2ceb-\x2cf3] + | [\x2d00-\x2d25] + | \x2d27 + | \x2d2d + | [\x2d30-\x2d67] + | \x2d6f + | [\x2d7f-\x2d96] + | [\x2da0-\x2da6] + | [\x2da8-\x2dae] + | [\x2db0-\x2db6] + | [\x2db8-\x2dbe] + | [\x2dc0-\x2dc6] + | [\x2dc8-\x2dce] + | [\x2dd0-\x2dd6] + | [\x2dd8-\x2dde] + | [\x2de0-\x2dff] | [\x3005-\x3007] | [\x3021-\x302f] | [\x3031-\x3035] @@ -770,16 +1087,60 @@ import Data.Word ( Word8 ) | [\x3099-\x309a] | [\x309d-\x309f] | [\x30a1-\x30ff] - | [\x3105-\x312c] + | [\x3105-\x312f] | [\x3131-\x318e] - | [\x31a0-\x31b7] + | [\x31a0-\x31bf] | [\x31f0-\x31ff] - | [\x3400-\x4db5] - | [\x4e00-\x9fa5] - | [\xa000-\xa48c] + | [\x3400-\x4dbf] + | [\x4e00-\xa48c] + | [\xa4d0-\xa4fd] + | [\xa500-\xa60c] + | [\xa610-\xa62b] + | [\xa640-\xa66f] + | [\xa674-\xa67d] + | [\xa67f-\xa6f1] + | [\xa717-\xa71f] + | [\xa722-\xa788] + | [\xa78b-\xa7ca] + | [\xa7d0-\xa7d1] + | \xa7d3 + | [\xa7d5-\xa7d9] + | [\xa7f2-\xa827] + | \xa82c + | [\xa840-\xa873] + | [\xa880-\xa8c5] + | [\xa8d0-\xa8d9] + | [\xa8e0-\xa8f7] + | \xa8fb + | [\xa8fd-\xa92d] + | [\xa930-\xa953] + | [\xa960-\xa97c] + | [\xa980-\xa9c0] + | [\xa9cf-\xa9d9] + | [\xa9e0-\xa9fe] + | [\xaa00-\xaa36] + | [\xaa40-\xaa4d] + | [\xaa50-\xaa59] + | [\xaa60-\xaa76] + | [\xaa7a-\xaac2] + | [\xaadb-\xaadd] + | [\xaae0-\xaaef] + | [\xaaf2-\xaaf6] + | [\xab01-\xab06] + | [\xab09-\xab0e] + | [\xab11-\xab16] + | [\xab20-\xab26] + | [\xab28-\xab2e] + | [\xab30-\xab5a] + | [\xab5c-\xab69] + | [\xab70-\xabea] + | [\xabec-\xabed] + | [\xabf0-\xabf9] | [\xac00-\xd7a3] - | [\xf900-\xfa2d] - | [\xfa30-\xfa6a] + | [\xd7b0-\xd7c6] + | [\xd7cb-\xd7fb] + | [\xf900-\xfa6d] + | [\xfa70-\xfad9] | [\xfb00-\xfb06] | [\xfb13-\xfb17] | [\xfb1d-\xfb28] @@ -795,7 +1156,7 @@ import Data.Word ( Word8 ) | [\xfd92-\xfdc7] | [\xfdf0-\xfdf9] | [\xfe00-\xfe0f] - | [\xfe20-\xfe23] + | [\xfe20-\xfe2f] | [\xfe33-\xfe34] | [\xfe4d-\xfe4f] | \xfe71 @@ -814,106 +1175,355 @@ import Data.Word ( Word8 ) | [\xffca-\xffcf] | [\xffd2-\xffd7] | [\xffda-\xffdc] - | \xd800 [\xdc00-\xdc0a] - | \xd800 [\xdc0d-\xdc25] - | \xd800 [\xdc28-\xdc39] - | \xd800 [\xdc3c-\xdc3c] - | \xd800 [\xdc3f-\xdc4c] - | \xd800 [\xdc50-\xdc5c] - | \xd800 [\xdc80-\xdcf9] - | \xd800 [\xdf00-\xdf1d] - | \xd800 [\xdf30-\xdf49] - | \xd800 [\xdf80-\xdf9c] - | \xd801 [\xe000-\xe09c] - | \xd801 [\xe0a0-\xe0a8] - | \xd802 [\xe400-\xe404] - | \xd802 \x0808 - | \xd802 [\xe40a-\xe434] - | \xd802 [\xe437-\xe437] - | \xd802 \x083c - | \xd802 \x083f - | \xd834 [\xad65-\xad68] - | \xd834 [\xad6d-\xad71] - | \xd834 [\xad7b-\xad81] - | \xd834 [\xad85-\xad8a] - | \xd834 [\xadaa-\xadac] - | \xd835 [\xb000-\xb053] - | \xd835 [\xb056-\xb09b] - | \xd835 [\xb09e-\xb09e] - | \xd835 \xd4a2 - | \xd835 [\xb0a5-\xb0a5] - | \xd835 [\xb0a9-\xb0ab] - | \xd835 [\xb0ae-\xb0b8] - | \xd835 \xd4bb - | \xd835 [\xb0bd-\xb0c2] - | \xd835 [\xb0c5-\xb104] - | \xd835 [\xb107-\xb109] - | \xd835 [\xb10d-\xb113] - | \xd835 [\xb116-\xb11b] - | \xd835 [\xb11e-\xb138] - | \xd835 [\xb13b-\xb13d] - | \xd835 [\xb140-\xb143] - | \xd835 \xd546 - | \xd835 [\xb14a-\xb14f] - | \xd835 [\xb152-\xb2a2] - | \xd835 [\xb2a8-\xb2bf] - | \xd835 [\xb2c2-\xb2d9] - | \xd835 [\xb2dc-\xb2f9] - | \xd835 [\xb2fc-\xb313] - | \xd835 [\xb316-\xb333] - | \xd835 [\xb336-\xb34d] - | \xd835 [\xb350-\xb36d] - | \xd835 [\xb370-\xb387] - | \xd835 [\xb38a-\xb3a7] - | \xd835 [\xb3aa-\xb3c1] - | \xd835 [\xb3c4-\xb3c8] - | \xd835 [\xb3ce-\xb3fe] - | \xd840 [\xdc00-\xdffe] - | \xd841 [\xe000-\xe3fe] - | \xd842 [\xe400-\xe7fe] - | \xd843 [\xe800-\xebfe] - | \xd844 [\xec00-\xeffe] - | \xd845 [\xf000-\xf3fe] - | \xd846 [\xf400-\xf7fe] - | \xd847 [\xf800-\xfbfe] - | \xd848 [\xfc00-\xfffe] - | \xd849 [\x0000-\x03fe] - | \xd84a [\x0400-\x07fe] - | \xd84b [\x0800-\x0bfe] - | \xd84c [\x0c00-\x0ffe] - | \xd84d [\x1000-\x13fe] - | \xd84e [\x1400-\x17fe] - | \xd84f [\x1800-\x1bfe] - | \xd850 [\x1c00-\x1ffe] - | \xd851 [\x2000-\x23fe] - | \xd852 [\x2400-\x27fe] - | \xd853 [\x2800-\x2bfe] - | \xd854 [\x2c00-\x2ffe] - | \xd855 [\x3000-\x33fe] - | \xd856 [\x3400-\x37fe] - | \xd857 [\x3800-\x3bfe] - | \xd858 [\x3c00-\x3ffe] - | \xd859 [\x4000-\x43fe] - | \xd85a [\x4400-\x47fe] - | \xd85b [\x4800-\x4bfe] - | \xd85c [\x4c00-\x4ffe] - | \xd85d [\x5000-\x53fe] - | \xd85e [\x5400-\x57fe] - | \xd85f [\x5800-\x5bfe] - | \xd860 [\x5c00-\x5ffe] - | \xd861 [\x6000-\x63fe] - | \xd862 [\x6400-\x67fe] - | \xd863 [\x6800-\x6bfe] - | \xd864 [\x6c00-\x6ffe] - | \xd865 [\x7000-\x73fe] - | \xd866 [\x7400-\x77fe] - | \xd867 [\x7800-\x7bfe] - | \xd868 [\x7c00-\x7ffe] - | \xd869 [\x8000-\x82d5] - | \xd87e [\xd400-\xd61c] - | \xdb40 [\xdd00-\xddee] - -@ident = @xid_start @xid_continue* + | [\x10000-\x1000b] + | [\x1000d-\x10026] + | [\x10028-\x1003a] + | [\x1003c-\x1003d] + | [\x1003f-\x1004d] + | [\x10050-\x1005d] + | [\x10080-\x100fa] + | [\x10140-\x10174] + | \x101fd + | [\x10280-\x1029c] + | [\x102a0-\x102d0] + | \x102e0 + | [\x10300-\x1031f] + | [\x1032d-\x1034a] + | [\x10350-\x1037a] + | [\x10380-\x1039d] + | [\x103a0-\x103c3] + | [\x103c8-\x103cf] + | [\x103d1-\x103d5] + | [\x10400-\x1049d] + | [\x104a0-\x104a9] + | [\x104b0-\x104d3] + | [\x104d8-\x104fb] + | [\x10500-\x10527] + | [\x10530-\x10563] + | [\x10570-\x1057a] + | [\x1057c-\x1058a] + | [\x1058c-\x10592] + | [\x10594-\x10595] + | [\x10597-\x105a1] + | [\x105a3-\x105b1] + | [\x105b3-\x105b9] + | [\x105bb-\x105bc] + | [\x10600-\x10736] + | [\x10740-\x10755] + | [\x10760-\x10767] + | [\x10780-\x10785] + | [\x10787-\x107b0] + | [\x107b2-\x107ba] + | [\x10800-\x10805] + | \x10808 + | [\x1080a-\x10835] + | [\x10837-\x10838] + | \x1083c + | [\x1083f-\x10855] + | [\x10860-\x10876] + | [\x10880-\x1089e] + | [\x108e0-\x108f2] + | [\x108f4-\x108f5] + | [\x10900-\x10915] + | [\x10920-\x10939] + | [\x10980-\x109b7] + | [\x109be-\x109bf] + | [\x10a00-\x10a03] + | [\x10a05-\x10a06] + | [\x10a0c-\x10a13] + | [\x10a15-\x10a17] + | [\x10a19-\x10a35] + | [\x10a38-\x10a3a] + | \x10a3f + | [\x10a60-\x10a7c] + | [\x10a80-\x10a9c] + | [\x10ac0-\x10ac7] + | [\x10ac9-\x10ae6] + | [\x10b00-\x10b35] + | [\x10b40-\x10b55] + | [\x10b60-\x10b72] + | [\x10b80-\x10b91] + | [\x10c00-\x10c48] + | [\x10c80-\x10cb2] + | [\x10cc0-\x10cf2] + | [\x10d00-\x10d27] + | [\x10d30-\x10d39] + | [\x10e80-\x10ea9] + | [\x10eab-\x10eac] + | [\x10eb0-\x10eb1] + | [\x10efd-\x10f1c] + | \x10f27 + | [\x10f30-\x10f50] + | [\x10f70-\x10f85] + | [\x10fb0-\x10fc4] + | [\x10fe0-\x10ff6] + | [\x11000-\x11046] + | [\x11066-\x11075] + | [\x1107f-\x110ba] + | \x110c2 + | [\x110d0-\x110e8] + | [\x110f0-\x110f9] + | [\x11100-\x11134] + | [\x11136-\x1113f] + | [\x11144-\x11147] + | [\x11150-\x11173] + | \x11176 + | [\x11180-\x111c4] + | [\x111c9-\x111cc] + | [\x111ce-\x111da] + | \x111dc + | [\x11200-\x11211] + | [\x11213-\x11237] + | [\x1123e-\x11241] + | [\x11280-\x11286] + | \x11288 + | [\x1128a-\x1128d] + | [\x1128f-\x1129d] + | [\x1129f-\x112a8] + | [\x112b0-\x112ea] + | [\x112f0-\x112f9] + | [\x11300-\x11303] + | [\x11305-\x1130c] + | [\x1130f-\x11310] + | [\x11313-\x11328] + | [\x1132a-\x11330] + | [\x11332-\x11333] + | [\x11335-\x11339] + | [\x1133b-\x11344] + | [\x11347-\x11348] + | [\x1134b-\x1134d] + | \x11350 + | \x11357 + | [\x1135d-\x11363] + | [\x11366-\x1136c] + | [\x11370-\x11374] + | [\x11400-\x1144a] + | [\x11450-\x11459] + | [\x1145e-\x11461] + | [\x11480-\x114c5] + | \x114c7 + | [\x114d0-\x114d9] + | [\x11580-\x115b5] + | [\x115b8-\x115c0] + | [\x115d8-\x115dd] + | [\x11600-\x11640] + | \x11644 + | [\x11650-\x11659] + | [\x11680-\x116b8] + | [\x116c0-\x116c9] + | [\x11700-\x1171a] + | [\x1171d-\x1172b] + | [\x11730-\x11739] + | [\x11740-\x11746] + | [\x11800-\x1183a] + | [\x118a0-\x118e9] + | [\x118ff-\x11906] + | \x11909 + | [\x1190c-\x11913] + | [\x11915-\x11916] + | [\x11918-\x11935] + | [\x11937-\x11938] + | [\x1193b-\x11943] + | [\x11950-\x11959] + | [\x119a0-\x119a7] + | [\x119aa-\x119d7] + | [\x119da-\x119e1] + | [\x119e3-\x119e4] + | [\x11a00-\x11a3e] + | \x11a47 + | [\x11a50-\x11a99] + | \x11a9d + | [\x11ab0-\x11af8] + | [\x11c00-\x11c08] + | [\x11c0a-\x11c36] + | [\x11c38-\x11c40] + | [\x11c50-\x11c59] + | [\x11c72-\x11c8f] + | [\x11c92-\x11ca7] + | [\x11ca9-\x11cb6] + | [\x11d00-\x11d06] + | [\x11d08-\x11d09] + | [\x11d0b-\x11d36] + | \x11d3a + | [\x11d3c-\x11d3d] + | [\x11d3f-\x11d47] + | [\x11d50-\x11d59] + | [\x11d60-\x11d65] + | [\x11d67-\x11d68] + | [\x11d6a-\x11d8e] + | [\x11d90-\x11d91] + | [\x11d93-\x11d98] + | [\x11da0-\x11da9] + | [\x11ee0-\x11ef6] + | [\x11f00-\x11f10] + | [\x11f12-\x11f3a] + | [\x11f3e-\x11f42] + | [\x11f50-\x11f59] + | \x11fb0 + | [\x12000-\x12399] + | [\x12400-\x1246e] + | [\x12480-\x12543] + | [\x12f90-\x12ff0] + | [\x13000-\x1342f] + | [\x13440-\x13455] + | [\x14400-\x14646] + | [\x16800-\x16a38] + | [\x16a40-\x16a5e] + | [\x16a60-\x16a69] + | [\x16a70-\x16abe] + | [\x16ac0-\x16ac9] + | [\x16ad0-\x16aed] + | [\x16af0-\x16af4] + | [\x16b00-\x16b36] + | [\x16b40-\x16b43] + | [\x16b50-\x16b59] + | [\x16b63-\x16b77] + | [\x16b7d-\x16b8f] + | [\x16e40-\x16e7f] + | [\x16f00-\x16f4a] + | [\x16f4f-\x16f87] + | [\x16f8f-\x16f9f] + | [\x16fe0-\x16fe1] + | [\x16fe3-\x16fe4] + | [\x16ff0-\x16ff1] + | [\x17000-\x187f7] + | [\x18800-\x18cd5] + | [\x18d00-\x18d08] + | [\x1aff0-\x1aff3] + | [\x1aff5-\x1affb] + | [\x1affd-\x1affe] + | [\x1b000-\x1b122] + | \x1b132 + | [\x1b150-\x1b152] + | \x1b155 + | [\x1b164-\x1b167] + | [\x1b170-\x1b2fb] + | [\x1bc00-\x1bc6a] + | [\x1bc70-\x1bc7c] + | [\x1bc80-\x1bc88] + | [\x1bc90-\x1bc99] + | [\x1bc9d-\x1bc9e] + | [\x1cf00-\x1cf2d] + | [\x1cf30-\x1cf46] + | [\x1d165-\x1d169] + | [\x1d16d-\x1d172] + | [\x1d17b-\x1d182] + | [\x1d185-\x1d18b] + | [\x1d1aa-\x1d1ad] + | [\x1d242-\x1d244] + | [\x1d400-\x1d454] + | [\x1d456-\x1d49c] + | [\x1d49e-\x1d49f] + | \x1d4a2 + | [\x1d4a5-\x1d4a6] + | [\x1d4a9-\x1d4ac] + | [\x1d4ae-\x1d4b9] + | \x1d4bb + | [\x1d4bd-\x1d4c3] + | [\x1d4c5-\x1d505] + | [\x1d507-\x1d50a] + | [\x1d50d-\x1d514] + | [\x1d516-\x1d51c] + | [\x1d51e-\x1d539] + | [\x1d53b-\x1d53e] + | [\x1d540-\x1d544] + | \x1d546 + | [\x1d54a-\x1d550] + | [\x1d552-\x1d6a5] + | [\x1d6a8-\x1d6c0] + | [\x1d6c2-\x1d6da] + | [\x1d6dc-\x1d6fa] + | [\x1d6fc-\x1d714] + | [\x1d716-\x1d734] + | [\x1d736-\x1d74e] + | [\x1d750-\x1d76e] + | [\x1d770-\x1d788] + | [\x1d78a-\x1d7a8] + | [\x1d7aa-\x1d7c2] + | [\x1d7c4-\x1d7cb] + | [\x1d7ce-\x1d7ff] + | [\x1da00-\x1da36] + | [\x1da3b-\x1da6c] + | \x1da75 + | \x1da84 + | [\x1da9b-\x1da9f] + | [\x1daa1-\x1daaf] + | [\x1df00-\x1df1e] + | [\x1df25-\x1df2a] + | [\x1e000-\x1e006] + | [\x1e008-\x1e018] + | [\x1e01b-\x1e021] + | [\x1e023-\x1e024] + | [\x1e026-\x1e02a] + | [\x1e030-\x1e06d] + | \x1e08f + | [\x1e100-\x1e12c] + | [\x1e130-\x1e13d] + | [\x1e140-\x1e149] + | \x1e14e + | [\x1e290-\x1e2ae] + | [\x1e2c0-\x1e2f9] + | [\x1e4d0-\x1e4f9] + | [\x1e7e0-\x1e7e6] + | [\x1e7e8-\x1e7eb] + | [\x1e7ed-\x1e7ee] + | [\x1e7f0-\x1e7fe] + | [\x1e800-\x1e8c4] + | [\x1e8d0-\x1e8d6] + | [\x1e900-\x1e94b] + | [\x1e950-\x1e959] + | [\x1ee00-\x1ee03] + | [\x1ee05-\x1ee1f] + | [\x1ee21-\x1ee22] + | \x1ee24 + | \x1ee27 + | [\x1ee29-\x1ee32] + | [\x1ee34-\x1ee37] + | \x1ee39 + | \x1ee3b + | \x1ee42 + | \x1ee47 + | \x1ee49 + | \x1ee4b + | [\x1ee4d-\x1ee4f] + | [\x1ee51-\x1ee52] + | \x1ee54 + | \x1ee57 + | \x1ee59 + | \x1ee5b + | \x1ee5d + | \x1ee5f + | [\x1ee61-\x1ee62] + | \x1ee64 + | [\x1ee67-\x1ee6a] + | [\x1ee6c-\x1ee72] + | [\x1ee74-\x1ee77] + | [\x1ee79-\x1ee7c] + | \x1ee7e + | [\x1ee80-\x1ee89] + | [\x1ee8b-\x1ee9b] + | [\x1eea1-\x1eea3] + | [\x1eea5-\x1eea9] + | [\x1eeab-\x1eebb] + | [\x1fbf0-\x1fbf9] + | [\x20000-\x2a6df] + | [\x2a700-\x2b739] + | [\x2b740-\x2b81d] + | [\x2b820-\x2cea1] + | [\x2ceb0-\x2ebe0] + | [\x2ebf0-\x2ee5d] + | [\x2f800-\x2fa1d] + | [\x30000-\x3134a] + | [\x31350-\x323af] + | [\xe0100-\xe01ef] + +-- End of code generated by "scripts/unicode.py". + +-- See https://github.com/rust-lang/rust/blob/ac77e88f7a84e20311f5518e34c806503d586c1c/compiler/rustc_lexer/src/lib.rs#L313-L326 +@id_start = "_" | @XID_Start +@id_continue = @XID_Continue + +@ident = @id_start @id_continue* @raw_ident = r \# @ident @lifetime = \' @ident @@ -944,7 +1554,7 @@ $hexit = [0-9a-fA-F] \' @lit_byte - = b\' ( \\ @byte_escape + = b\' ( \\ @byte_escape | [^\\'\n\t\r] [ \udc00-\udfff ]? ) \' @@ -1020,28 +1630,28 @@ $white+ { \s -> pure (Space Whitespace s) } "/=" { token SlashEqual } "^=" { token CaretEqual } "%=" { token PercentEqual } - - -"@" { token At } -"." { token Dot } -".." { token DotDot } -"..." { token DotDotDot } -"..=" { token DotDotEqual } -"," { token Comma } -";" { token Semicolon } + + +"@" { token At } +"." { token Dot } +".." { token DotDot } +"..." { token DotDotDot } +"..=" { token DotDotEqual } +"," { token Comma } +";" { token Semicolon } ":" { token Colon } "::" { token ModSep } "->" { token RArrow } "<-" { token LArrow } "=>" { token FatArrow } -"(" { token (OpenDelim Paren) } -")" { token (CloseDelim Paren) } +"(" { token (OpenDelim Paren) } +")" { token (CloseDelim Paren) } "[" { token (OpenDelim Bracket) } "]" { token (CloseDelim Bracket) } -"{" { token (OpenDelim Brace) } -"}" { token (CloseDelim Brace) } -"#" { token Pound } -"$" { token Dollar } +"{" { token (OpenDelim Brace) } +"}" { token (CloseDelim Brace) } +"#" { token Pound } +"$" { token Dollar } @lit_integer { \i -> literal (IntegerTok i) } @lit_float { \f -> literal (FloatTok f) } @@ -1070,13 +1680,13 @@ $white+ { \s -> pure (Space Whitespace s) } @ident { \s -> pure (IdentTok (mkIdent s)) } \? { token Question } -@raw_ident { \s -> pure (IdentTok ((mkIdent (drop 2 s)){ raw = True })) } -@ident { \s -> pure (IdentTok (mkIdent s)) } +@raw_ident { \s -> pure (IdentTok ((mkIdent (drop 2 s)){ raw = True })) } +@ident { \s -> pure (IdentTok (mkIdent s)) } @lifetime { \s -> (pure (LifetimeTok (mkIdent (tail s))) :: P Token) } -@outer_doc_line { \c -> pure (Doc (drop 3 c) Outer False) } -@outer_doc_line \r { \c -> pure (Doc (drop 3 (init c)) Outer False) } +@outer_doc_line { \c -> pure (Doc (drop 3 c) Outer False) } +@outer_doc_line \r { \c -> pure (Doc (drop 3 (init c)) Outer False) } @outer_doc_inline / ( [^\*] | \r | \n ) { \_ -> Doc <$> nestedComment <*> pure Outer <*> pure True } @@ -1095,8 +1705,8 @@ token t _ = pure t -- | Given the first part of a literal, try to parse also a suffix. Even if -- the allowed suffixes are very well defined and only valid on integer and -- float literals, we need to put in the same token whatever suffix follows. --- This is for backwards compatibility if Rust decides to ever add suffixes. -literal :: LitTok -> P Token +-- This is for backwards compatibility if Rust decides to ever add suffixes. +literal :: LitTok -> P Token literal lit = do pos <- getPosition inp <- getInput @@ -1119,16 +1729,16 @@ rawString n = do case c_m of -- The string was never closed Nothing -> fail "Invalid raw (byte)string" - + -- The string has a chance of being closed Just '"' -> do n' <- greedyChar '#' n if n' == n then pure "" - else (('"' : replicate n' '#') ++) <$> rawString n + else (('"' : replicate n' '#') ++) <$> rawString n -- Just another character... - Just c -> ([c] ++) <$> rawString n + Just c -> ([c] ++) <$> rawString n -- | Consume a full inline comment (which may be nested). nestedComment :: P String @@ -1142,15 +1752,15 @@ nestedComment = go 1 "" Nothing -> fail "Unclosed comment" Just '*' -> do c' <- peekChar - case c' of + case c' of Nothing -> fail "Unclosed comment" Just '/' -> nextChar *> go (n-1) ('/':'*':s) Just _ -> go n ('*':s) Just '/' -> do c' <- peekChar - case c' of + case c' of Nothing -> fail "Unclosed comment" - Just '*' -> nextChar *> go (n+1) ('*':'/':s) + Just '*' -> nextChar *> go (n+1) ('*':'/':s) Just _ -> go n ('/':s) Just c' -> go n (c':s) @@ -1162,7 +1772,7 @@ nextChar :: P (Maybe Char) nextChar = do pos <- getPosition inp <- getInput - if inputStreamEmpty inp + if inputStreamEmpty inp then pure Nothing else let (c,inp') = takeChar inp pos' = alexMove pos c @@ -1173,7 +1783,7 @@ nextChar = do peekChar :: P (Maybe Char) peekChar = do inp <- getInput - if inputStreamEmpty inp + if inputStreamEmpty inp then pure Nothing else let (c,_) = takeChar inp in pure (Just c) @@ -1195,7 +1805,7 @@ lexicalError = do fail ("Lexical error: the character " ++ show c ++ " does not fit here") --- Functions required by Alex +-- Functions required by Alex -- | type passed around by Alex functions (required by Alex) type AlexInput = (Position, -- current position, @@ -1223,7 +1833,7 @@ alexMove pos '\n' = retPos pos alexMove pos '\r' = incOffset pos 1 alexMove pos _ = incPos pos 1 --- | Lexer for one 'Token'. The only token this cannot produce is 'Interpolated'. +-- | Lexer for one 'Token'. The only token this cannot produce is 'Interpolated'. lexToken :: P (Spanned Token) lexToken = do tok_maybe <- popToken diff --git a/test/unit-tests/LexerTest.hs b/test/unit-tests/LexerTest.hs index 12e7768..10f796c 100644 --- a/test/unit-tests/LexerTest.hs +++ b/test/unit-tests/LexerTest.hs @@ -15,11 +15,11 @@ import Language.Rust.Data.InputStream lexerSuite :: Test lexerSuite = testGroup "lexer suite" [ commonCode, literals ] --- | This contains some random real-life code fragments. The purpose here is +-- | This contains some random real-life code fragments. The purpose here is -- primarily black-box testing. commonCode :: Test commonCode = testGroup "lexing common code fragments" - [ testCode "let span = $p.span;" + [ testCode "let span = $p.span;" [ IdentTok (mkIdent "let") , Space Whitespace " " , IdentTok (mkIdent "span") @@ -32,7 +32,7 @@ commonCode = testGroup "lexing common code fragments" , IdentTok (mkIdent "span") , Semicolon ] - , testCode "$(p.span),+" + , testCode "$(p.span),+" [ Dollar , OpenDelim Paren , IdentTok (mkIdent "p") @@ -94,7 +94,7 @@ commonCode = testGroup "lexing common code fragments" [ IdentTok (mkIdent "fn") , Space Whitespace " " , IdentTok (mkIdent "ܐ_ܐ") - , OpenDelim Paren + , OpenDelim Paren , CloseDelim Paren , Space Whitespace " " , OpenDelim Brace @@ -122,6 +122,10 @@ commonCode = testGroup "lexing common code fragments" , LiteralTok (IntegerTok "1") Nothing ] + -- Unicode characters that require surrogate pairs to encode in UTF-16. These + -- serve as regression tests for issue #3. + , testCode "𝑂_𝑂" [ IdentTok (mkIdent "𝑂_𝑂") ] + , testCode "𐌝" [ IdentTok (mkIdent "𐌝") ] ]