From fd184b15a5b69e42f50c1ed62e2bb771b43eefcf Mon Sep 17 00:00:00 2001 From: Ryan Scott Date: Wed, 28 Aug 2024 15:46:46 -0400 Subject: [PATCH 1/2] Whitespace only --- src/Language/Rust/Parser/Lexer.x | 68 ++++++++++++++++---------------- test/unit-tests/LexerTest.hs | 8 ++-- 2 files changed, 38 insertions(+), 38 deletions(-) diff --git a/src/Language/Rust/Parser/Lexer.x b/src/Language/Rust/Parser/Lexer.x index e08332b..1b65ac4 100644 --- a/src/Language/Rust/Parser/Lexer.x +++ b/src/Language/Rust/Parser/Lexer.x @@ -18,7 +18,7 @@ bitwise and, and unary reference), @&&&x&&&y@ lexes into 'AmpersandAmpersand', ' @'IdentTok' "x"@, 'AmpersandAmpersand', 'Ampersand', @'IdentTok' "y"@. Although the parser sometimes needs to "break apart" tokens, it never has to think about putting them together. That means it can easily figure out that @&&&x&&&y@ parses as @&(&(&x)) && (&y)@ and not @&(&(&x)) & (&(&y))@ even if -bitwise conjunctions bind more tightly that logical conjunctions. +bitwise conjunctions bind more tightly that logical conjunctions. This sort of amguity where one token need to be broken up by the parser occurs for @@ -27,7 +27,7 @@ This sort of amguity where one token need to be broken up by the parser occurs f * @<<@ in qualified type paths like @FromIterator\<\::Item\>@ * @>>@ in qualified paths like @\\>::Bar@ * @>=@ in equality predicates like @F\=i32@ - * @>>=@ in equality predicates like @F\\>=i32@ + * @>>=@ in equality predicates like @F\\>=i32@ -} module Language.Rust.Parser.Lexer ( @@ -944,7 +944,7 @@ $hexit = [0-9a-fA-F] \' @lit_byte - = b\' ( \\ @byte_escape + = b\' ( \\ @byte_escape | [^\\'\n\t\r] [ \udc00-\udfff ]? ) \' @@ -1020,28 +1020,28 @@ $white+ { \s -> pure (Space Whitespace s) } "/=" { token SlashEqual } "^=" { token CaretEqual } "%=" { token PercentEqual } - - -"@" { token At } -"." { token Dot } -".." { token DotDot } -"..." { token DotDotDot } -"..=" { token DotDotEqual } -"," { token Comma } -";" { token Semicolon } + + +"@" { token At } +"." { token Dot } +".." { token DotDot } +"..." { token DotDotDot } +"..=" { token DotDotEqual } +"," { token Comma } +";" { token Semicolon } ":" { token Colon } "::" { token ModSep } "->" { token RArrow } "<-" { token LArrow } "=>" { token FatArrow } -"(" { token (OpenDelim Paren) } -")" { token (CloseDelim Paren) } +"(" { token (OpenDelim Paren) } +")" { token (CloseDelim Paren) } "[" { token (OpenDelim Bracket) } "]" { token (CloseDelim Bracket) } -"{" { token (OpenDelim Brace) } -"}" { token (CloseDelim Brace) } -"#" { token Pound } -"$" { token Dollar } +"{" { token (OpenDelim Brace) } +"}" { token (CloseDelim Brace) } +"#" { token Pound } +"$" { token Dollar } @lit_integer { \i -> literal (IntegerTok i) } @lit_float { \f -> literal (FloatTok f) } @@ -1070,13 +1070,13 @@ $white+ { \s -> pure (Space Whitespace s) } @ident { \s -> pure (IdentTok (mkIdent s)) } \? { token Question } -@raw_ident { \s -> pure (IdentTok ((mkIdent (drop 2 s)){ raw = True })) } -@ident { \s -> pure (IdentTok (mkIdent s)) } +@raw_ident { \s -> pure (IdentTok ((mkIdent (drop 2 s)){ raw = True })) } +@ident { \s -> pure (IdentTok (mkIdent s)) } @lifetime { \s -> (pure (LifetimeTok (mkIdent (tail s))) :: P Token) } -@outer_doc_line { \c -> pure (Doc (drop 3 c) Outer False) } -@outer_doc_line \r { \c -> pure (Doc (drop 3 (init c)) Outer False) } +@outer_doc_line { \c -> pure (Doc (drop 3 c) Outer False) } +@outer_doc_line \r { \c -> pure (Doc (drop 3 (init c)) Outer False) } @outer_doc_inline / ( [^\*] | \r | \n ) { \_ -> Doc <$> nestedComment <*> pure Outer <*> pure True } @@ -1095,8 +1095,8 @@ token t _ = pure t -- | Given the first part of a literal, try to parse also a suffix. Even if -- the allowed suffixes are very well defined and only valid on integer and -- float literals, we need to put in the same token whatever suffix follows. --- This is for backwards compatibility if Rust decides to ever add suffixes. -literal :: LitTok -> P Token +-- This is for backwards compatibility if Rust decides to ever add suffixes. +literal :: LitTok -> P Token literal lit = do pos <- getPosition inp <- getInput @@ -1119,16 +1119,16 @@ rawString n = do case c_m of -- The string was never closed Nothing -> fail "Invalid raw (byte)string" - + -- The string has a chance of being closed Just '"' -> do n' <- greedyChar '#' n if n' == n then pure "" - else (('"' : replicate n' '#') ++) <$> rawString n + else (('"' : replicate n' '#') ++) <$> rawString n -- Just another character... - Just c -> ([c] ++) <$> rawString n + Just c -> ([c] ++) <$> rawString n -- | Consume a full inline comment (which may be nested). nestedComment :: P String @@ -1142,15 +1142,15 @@ nestedComment = go 1 "" Nothing -> fail "Unclosed comment" Just '*' -> do c' <- peekChar - case c' of + case c' of Nothing -> fail "Unclosed comment" Just '/' -> nextChar *> go (n-1) ('/':'*':s) Just _ -> go n ('*':s) Just '/' -> do c' <- peekChar - case c' of + case c' of Nothing -> fail "Unclosed comment" - Just '*' -> nextChar *> go (n+1) ('*':'/':s) + Just '*' -> nextChar *> go (n+1) ('*':'/':s) Just _ -> go n ('/':s) Just c' -> go n (c':s) @@ -1162,7 +1162,7 @@ nextChar :: P (Maybe Char) nextChar = do pos <- getPosition inp <- getInput - if inputStreamEmpty inp + if inputStreamEmpty inp then pure Nothing else let (c,inp') = takeChar inp pos' = alexMove pos c @@ -1173,7 +1173,7 @@ nextChar = do peekChar :: P (Maybe Char) peekChar = do inp <- getInput - if inputStreamEmpty inp + if inputStreamEmpty inp then pure Nothing else let (c,_) = takeChar inp in pure (Just c) @@ -1195,7 +1195,7 @@ lexicalError = do fail ("Lexical error: the character " ++ show c ++ " does not fit here") --- Functions required by Alex +-- Functions required by Alex -- | type passed around by Alex functions (required by Alex) type AlexInput = (Position, -- current position, @@ -1223,7 +1223,7 @@ alexMove pos '\n' = retPos pos alexMove pos '\r' = incOffset pos 1 alexMove pos _ = incPos pos 1 --- | Lexer for one 'Token'. The only token this cannot produce is 'Interpolated'. +-- | Lexer for one 'Token'. The only token this cannot produce is 'Interpolated'. lexToken :: P (Spanned Token) lexToken = do tok_maybe <- popToken diff --git a/test/unit-tests/LexerTest.hs b/test/unit-tests/LexerTest.hs index 12e7768..445d197 100644 --- a/test/unit-tests/LexerTest.hs +++ b/test/unit-tests/LexerTest.hs @@ -15,11 +15,11 @@ import Language.Rust.Data.InputStream lexerSuite :: Test lexerSuite = testGroup "lexer suite" [ commonCode, literals ] --- | This contains some random real-life code fragments. The purpose here is +-- | This contains some random real-life code fragments. The purpose here is -- primarily black-box testing. commonCode :: Test commonCode = testGroup "lexing common code fragments" - [ testCode "let span = $p.span;" + [ testCode "let span = $p.span;" [ IdentTok (mkIdent "let") , Space Whitespace " " , IdentTok (mkIdent "span") @@ -32,7 +32,7 @@ commonCode = testGroup "lexing common code fragments" , IdentTok (mkIdent "span") , Semicolon ] - , testCode "$(p.span),+" + , testCode "$(p.span),+" [ Dollar , OpenDelim Paren , IdentTok (mkIdent "p") @@ -94,7 +94,7 @@ commonCode = testGroup "lexing common code fragments" [ IdentTok (mkIdent "fn") , Space Whitespace " " , IdentTok (mkIdent "ܐ_ܐ") - , OpenDelim Paren + , OpenDelim Paren , CloseDelim Paren , Space Whitespace " " , OpenDelim Brace From 86a65407cd7c77594f51d7dc748132a551db6965 Mon Sep 17 00:00:00 2001 From: Ryan Scott Date: Wed, 28 Aug 2024 14:57:04 -0400 Subject: [PATCH 2/2] Lexer: Properly support Unicode 15.1.0 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The previous lexer implementation in `Language.Rust.Parser.Lexer` was broken for Unicode characters with sufficiently large codepoints, as the previous implementation incorrectly attempted to port UTF-16–encoded codepoints over to `alex`, which is UTF-8–encoded. Rather than try to fix the previous implementation (which was based on old `rustc` code that is no longer used), this ports the lexer to a new implementation that is based on the Rust `unicode-xid` crate (which is how modern versions of `rustc` lex Unicode characters). Specifically: * This adapts `unicode-xid`'s lexer generation script to generate an `alex`-based lexer instead of a Rust-based one. * The new lexer is generated to support codepoints from Unicode 15.1.0. (It is unclear which exact Unicode version the previous lexer targeted, but given that it was last updated in 2016, it was likely quite an old version.) * I have verified that the new lexer can lex exotic Unicode characters such as `𝑂` and `𐌝` by adding them as regression tests. Fixes #3. --- .gitignore | 4 + scripts/unicode.py | 167 ++++ src/Language/Rust/Parser/Lexer.x | 1432 +++++++++++++++++++++--------- test/unit-tests/LexerTest.hs | 4 + 4 files changed, 1196 insertions(+), 411 deletions(-) create mode 100755 scripts/unicode.py diff --git a/.gitignore b/.gitignore index 37fb369..165a17d 100644 --- a/.gitignore +++ b/.gitignore @@ -26,3 +26,7 @@ sample-sources/ !sample-sources/statement-expressions.rs !sample-sources/statements.rs !sample-sources/types.rs + +# Unicode-related autogenerated files +DerivedCoreProperties.txt +UnicodeLexer.x diff --git a/scripts/unicode.py b/scripts/unicode.py new file mode 100755 index 0000000..39d954f --- /dev/null +++ b/scripts/unicode.py @@ -0,0 +1,167 @@ +#!/usr/bin/env python +# +# Copyright 2011-2015 The Rust Project Developers +# 2024 Galois Inc. +# +# This script was originally created by the Rust Project Developers as part of +# the `unicode-xid` crate: +# +# https://github.com/unicode-rs/unicode-xid/blob/b3a2718b062da229c0a50d12281de0e5d8e8cff6/scripts/unicode.py +# +# See the COPYRIGHT file in the `unicode-xid` crate: +# +# https://github.com/unicode-rs/unicode-xid/blob/b3a2718b062da229c0a50d12281de0e5d8e8cff6/COPYRIGHT +# +# Galois Inc. has modified the script to generate an `alex`-based lexer instead +# of a Rust-based lexer. +# +# Licensed under the Apache License, Version 2.0 or the MIT license +# , at your +# option. This file may not be copied, modified, or distributed +# except according to those terms. + +import fileinput, re, os, sys + +unicode_version = (15, 1, 0) + +preamble = '''-- NOTE: The following code was generated by "scripts/unicode.py", do not edit directly +-- +-- If you need to update this code, perform the following steps: +-- +-- 1. (Optional) Update the value of `unicode_version` in "scripts/unicode.py". +-- 2. Run the "scripts/unicode.py" script. +-- 3. Copy the code (including the comments) in the autogenerated `UnicodeLexer.x` file. +-- 4. Replace the existing autogenerated code here. +''' + +postamble = '''-- End of code generated by "scripts/unicode.py". +''' + +def unicode_url(f): + return "http://www.unicode.org/Public/%s.%s.%s/ucd/%s" % (unicode_version + (f,)) + +def fetch(f): + if not os.path.exists(os.path.basename(f)): + os.system("curl -O %s" % unicode_url(f)) + + if not os.path.exists(os.path.basename(f)): + sys.stderr.write("cannot load %s" % f) + exit(1) + +def group_cat(cat): + cat_out = [] + letters = sorted(set(cat)) + cur_start = letters.pop(0) + cur_end = cur_start + for letter in letters: + assert letter > cur_end, \ + "cur_end: %s, letter: %s" % (hex(cur_end), hex(letter)) + if letter == cur_end + 1: + cur_end = letter + else: + cat_out.append((cur_start, cur_end)) + cur_start = cur_end = letter + cat_out.append((cur_start, cur_end)) + return cat_out + +def ungroup_cat(cat): + cat_out = [] + for (lo, hi) in cat: + while lo <= hi: + cat_out.append(lo) + lo += 1 + return cat_out + +def format_table_content(f, content, indent): + line = "" + first = True + for chunk in content.split("|"): + line += " " * indent + if first: + line += "= " + chunk + else: + line += "| " + chunk + line += "\n" + first = False + f.write(line + '\n') + +def load_properties(f, interestingprops): + fetch(f) + props = {} + re1 = re.compile("^ *([0-9A-F]+) *; *(\w+)") + re2 = re.compile("^ *([0-9A-F]+)\.\.([0-9A-F]+) *; *(\w+)") + + for line in fileinput.input(os.path.basename(f)): + prop = None + d_lo = 0 + d_hi = 0 + m = re1.match(line) + if m: + d_lo = m.group(1) + d_hi = m.group(1) + prop = m.group(2) + else: + m = re2.match(line) + if m: + d_lo = m.group(1) + d_hi = m.group(2) + prop = m.group(3) + else: + continue + if interestingprops and prop not in interestingprops: + continue + d_lo = int(d_lo, 16) + d_hi = int(d_hi, 16) + if prop not in props: + props[prop] = [] + props[prop].append((d_lo, d_hi)) + + # optimize if possible + for prop in props: + props[prop] = group_cat(ungroup_cat(props[prop])) + + return props + +def escape_char(c): + return "\\x%04x" % c + +def emit_table(f, name, t_data): + f.write("@%s\n" % name) + data = "" + first = True + for dat in t_data: + if not first: + data += "|" + first = False + if dat[0] == dat[1]: + data += "%s" % escape_char(dat[0]) + else: + data += "[%s-%s]" % (escape_char(dat[0]), escape_char(dat[1])) + format_table_content(f, data, 2) + +def emit_property_module(f, mod, tbl, emit): + for cat in emit: + emit_table(f, cat, tbl[cat]) + +if __name__ == "__main__": + r = "UnicodeLexer.x" + if os.path.exists(r): + os.remove(r) + with open(r, "w") as rf: + # write the file's preamble + rf.write(preamble) + + # download and parse all the data + rf.write(''' +-- Based on Unicode %s.%s.%s, using the following Unicode table: +-- %s + +''' % (unicode_version + (unicode_url("DerviedCoreProperties.txt"),))) + + want_derived = ["XID_Start", "XID_Continue"] + derived = load_properties("DerivedCoreProperties.txt", want_derived) + emit_property_module(rf, "derived_property", derived, want_derived) + + # write the file's postamble + rf.write(postamble) diff --git a/src/Language/Rust/Parser/Lexer.x b/src/Language/Rust/Parser/Lexer.x index 1b65ac4..3acbad4 100644 --- a/src/Language/Rust/Parser/Lexer.x +++ b/src/Language/Rust/Parser/Lexer.x @@ -56,47 +56,50 @@ import Data.Word ( Word8 ) -- Things to review: -- * improved error messages --- Based heavily on: --- * --- * --- * - } --- XID_START unicode character class -@xid_start +-- NOTE: The following code was generated by "scripts/unicode.py", do not edit directly +-- +-- If you need to update this code, perform the following steps: +-- +-- 1. (Optional) Update the value of `unicode_version` in "scripts/unicode.py". +-- 2. Run the "scripts/unicode.py" script. +-- 3. Copy the code (including the comments) in the autogenerated `UnicodeLexer.x` file. +-- 4. Replace the existing autogenerated code here. + +-- Based on Unicode 15.1.0, using the following Unicode table: +-- http://www.unicode.org/Public/15.1.0/ucd/DerviedCoreProperties.txt + +@XID_Start = [\x0041-\x005a] - | "_" | [\x0061-\x007a] | \x00aa | \x00b5 | \x00ba | [\x00c0-\x00d6] | [\x00d8-\x00f6] - | [\x00f8-\x0236] - | [\x0250-\x02c1] + | [\x00f8-\x02c1] | [\x02c6-\x02d1] | [\x02e0-\x02e4] + | \x02ec | \x02ee + | [\x0370-\x0374] + | [\x0376-\x0377] + | [\x037b-\x037d] + | \x037f | \x0386 | [\x0388-\x038a] | \x038c | [\x038e-\x03a1] - | [\x03a3-\x03ce] - | [\x03d0-\x03f5] - | [\x03f7-\x03fb] - | [\x0400-\x0481] - | [\x048a-\x04ce] - | [\x04d0-\x04f5] - | [\x04f8-\x04f9] - | [\x0500-\x050f] + | [\x03a3-\x03f5] + | [\x03f7-\x0481] + | [\x048a-\x052f] | [\x0531-\x0556] | \x0559 - | [\x0561-\x0587] + | [\x0560-\x0588] | [\x05d0-\x05ea] - | [\x05f0-\x05f2] - | [\x0621-\x063a] - | [\x0640-\x064a] + | [\x05ef-\x05f2] + | [\x0620-\x064a] | [\x066e-\x066f] | [\x0671-\x06d3] | \x06d5 @@ -106,13 +109,25 @@ import Data.Word ( Word8 ) | \x06ff | \x0710 | [\x0712-\x072f] - | [\x074d-\x074f] - | [\x0780-\x07a5] + | [\x074d-\x07a5] | \x07b1 + | [\x07ca-\x07ea] + | [\x07f4-\x07f5] + | \x07fa + | [\x0800-\x0815] + | \x081a + | \x0824 + | \x0828 + | [\x0840-\x0858] + | [\x0860-\x086a] + | [\x0870-\x0887] + | [\x0889-\x088e] + | [\x08a0-\x08c9] | [\x0904-\x0939] | \x093d | \x0950 | [\x0958-\x0961] + | [\x0971-\x0980] | [\x0985-\x098c] | [\x098f-\x0990] | [\x0993-\x09a8] @@ -120,9 +135,11 @@ import Data.Word ( Word8 ) | \x09b2 | [\x09b6-\x09b9] | \x09bd + | \x09ce | [\x09dc-\x09dd] | [\x09df-\x09e1] | [\x09f0-\x09f1] + | \x09fc | [\x0a05-\x0a0a] | [\x0a0f-\x0a10] | [\x0a13-\x0a28] @@ -142,6 +159,7 @@ import Data.Word ( Word8 ) | \x0abd | \x0ad0 | [\x0ae0-\x0ae1] + | \x0af9 | [\x0b05-\x0b0c] | [\x0b0f-\x0b10] | [\x0b13-\x0b28] @@ -161,27 +179,34 @@ import Data.Word ( Word8 ) | [\x0b9e-\x0b9f] | [\x0ba3-\x0ba4] | [\x0ba8-\x0baa] - | [\x0bae-\x0bb5] - | [\x0bb7-\x0bb9] + | [\x0bae-\x0bb9] + | \x0bd0 | [\x0c05-\x0c0c] | [\x0c0e-\x0c10] | [\x0c12-\x0c28] - | [\x0c2a-\x0c33] - | [\x0c35-\x0c39] + | [\x0c2a-\x0c39] + | \x0c3d + | [\x0c58-\x0c5a] + | \x0c5d | [\x0c60-\x0c61] + | \x0c80 | [\x0c85-\x0c8c] | [\x0c8e-\x0c90] | [\x0c92-\x0ca8] | [\x0caa-\x0cb3] | [\x0cb5-\x0cb9] | \x0cbd - | \x0cde + | [\x0cdd-\x0cde] | [\x0ce0-\x0ce1] - | [\x0d05-\x0d0c] + | [\x0cf1-\x0cf2] + | [\x0d04-\x0d0c] | [\x0d0e-\x0d10] - | [\x0d12-\x0d28] - | [\x0d2a-\x0d39] - | [\x0d60-\x0d61] + | [\x0d12-\x0d3a] + | \x0d3d + | \x0d4e + | [\x0d54-\x0d56] + | [\x0d5f-\x0d61] + | [\x0d7a-\x0d7f] | [\x0d85-\x0d96] | [\x0d9a-\x0db1] | [\x0db3-\x0dbb] @@ -192,83 +217,93 @@ import Data.Word ( Word8 ) | [\x0e40-\x0e46] | [\x0e81-\x0e82] | \x0e84 - | [\x0e87-\x0e88] - | \x0e8a - | \x0e8d - | [\x0e94-\x0e97] - | [\x0e99-\x0e9f] - | [\x0ea1-\x0ea3] + | [\x0e86-\x0e8a] + | [\x0e8c-\x0ea3] | \x0ea5 - | \x0ea7 - | [\x0eaa-\x0eab] - | [\x0ead-\x0eb0] + | [\x0ea7-\x0eb0] | \x0eb2 | \x0ebd | [\x0ec0-\x0ec4] | \x0ec6 - | [\x0edc-\x0edd] + | [\x0edc-\x0edf] | \x0f00 | [\x0f40-\x0f47] - | [\x0f49-\x0f6a] - | [\x0f88-\x0f8b] - | [\x1000-\x1021] - | [\x1023-\x1027] - | [\x1029-\x102a] + | [\x0f49-\x0f6c] + | [\x0f88-\x0f8c] + | [\x1000-\x102a] + | \x103f | [\x1050-\x1055] + | [\x105a-\x105d] + | \x1061 + | [\x1065-\x1066] + | [\x106e-\x1070] + | [\x1075-\x1081] + | \x108e | [\x10a0-\x10c5] - | [\x10d0-\x10f8] - | [\x1100-\x1159] - | [\x115f-\x11a2] - | [\x11a8-\x11f9] - | [\x1200-\x1206] - | [\x1208-\x1246] - | \x1248 + | \x10c7 + | \x10cd + | [\x10d0-\x10fa] + | [\x10fc-\x1248] | [\x124a-\x124d] | [\x1250-\x1256] | \x1258 | [\x125a-\x125d] - | [\x1260-\x1286] - | \x1288 + | [\x1260-\x1288] | [\x128a-\x128d] - | [\x1290-\x12ae] - | \x12b0 + | [\x1290-\x12b0] | [\x12b2-\x12b5] | [\x12b8-\x12be] | \x12c0 | [\x12c2-\x12c5] - | [\x12c8-\x12ce] - | [\x12d0-\x12d6] - | [\x12d8-\x12ee] - | [\x12f0-\x130e] - | \x1310 + | [\x12c8-\x12d6] + | [\x12d8-\x1310] | [\x1312-\x1315] - | [\x1318-\x131e] - | [\x1320-\x1346] - | [\x1348-\x135a] - | [\x13a0-\x13f4] + | [\x1318-\x135a] + | [\x1380-\x138f] + | [\x13a0-\x13f5] + | [\x13f8-\x13fd] | [\x1401-\x166c] - | [\x166f-\x1676] + | [\x166f-\x167f] | [\x1681-\x169a] | [\x16a0-\x16ea] - | [\x16ee-\x16f0] - | [\x1700-\x170c] - | [\x170e-\x1711] - | [\x1720-\x1731] + | [\x16ee-\x16f8] + | [\x1700-\x1711] + | [\x171f-\x1731] | [\x1740-\x1751] | [\x1760-\x176c] | [\x176e-\x1770] | [\x1780-\x17b3] | \x17d7 | \x17dc - | [\x1820-\x1877] + | [\x1820-\x1878] | [\x1880-\x18a8] - | [\x1900-\x191c] + | \x18aa + | [\x18b0-\x18f5] + | [\x1900-\x191e] | [\x1950-\x196d] | [\x1970-\x1974] - | [\x1d00-\x1d6b] - | [\x1e00-\x1e9b] - | [\x1ea0-\x1ef9] - | [\x1f00-\x1f15] + | [\x1980-\x19ab] + | [\x19b0-\x19c9] + | [\x1a00-\x1a16] + | [\x1a20-\x1a54] + | \x1aa7 + | [\x1b05-\x1b33] + | [\x1b45-\x1b4c] + | [\x1b83-\x1ba0] + | [\x1bae-\x1baf] + | [\x1bba-\x1be5] + | [\x1c00-\x1c23] + | [\x1c4d-\x1c4f] + | [\x1c5a-\x1c7d] + | [\x1c80-\x1c88] + | [\x1c90-\x1cba] + | [\x1cbd-\x1cbf] + | [\x1ce9-\x1cec] + | [\x1cee-\x1cf3] + | [\x1cf5-\x1cf6] + | \x1cfa + | [\x1d00-\x1dbf] + | [\x1e00-\x1f15] | [\x1f18-\x1f1d] | [\x1f20-\x1f45] | [\x1f48-\x1f4d] @@ -289,6 +324,7 @@ import Data.Word ( Word8 ) | [\x1ff6-\x1ffc] | \x2071 | \x207f + | [\x2090-\x209c] | \x2102 | \x2107 | [\x210a-\x2113] @@ -297,11 +333,28 @@ import Data.Word ( Word8 ) | \x2124 | \x2126 | \x2128 - | [\x212a-\x2131] - | [\x2133-\x2139] - | [\x213d-\x213f] + | [\x212a-\x2139] + | [\x213c-\x213f] | [\x2145-\x2149] - | [\x2160-\x2183] + | \x214e + | [\x2160-\x2188] + | [\x2c00-\x2ce4] + | [\x2ceb-\x2cee] + | [\x2cf2-\x2cf3] + | [\x2d00-\x2d25] + | \x2d27 + | \x2d2d + | [\x2d30-\x2d67] + | \x2d6f + | [\x2d80-\x2d96] + | [\x2da0-\x2da6] + | [\x2da8-\x2dae] + | [\x2db0-\x2db6] + | [\x2db8-\x2dbe] + | [\x2dc0-\x2dc6] + | [\x2dc8-\x2dce] + | [\x2dd0-\x2dd6] + | [\x2dd8-\x2dde] | [\x3005-\x3007] | [\x3021-\x3029] | [\x3031-\x3035] @@ -310,16 +363,69 @@ import Data.Word ( Word8 ) | [\x309d-\x309f] | [\x30a1-\x30fa] | [\x30fc-\x30ff] - | [\x3105-\x312c] + | [\x3105-\x312f] | [\x3131-\x318e] - | [\x31a0-\x31b7] + | [\x31a0-\x31bf] | [\x31f0-\x31ff] - | [\x3400-\x4db5] - | [\x4e00-\x9fa5] - | [\xa000-\xa48c] + | [\x3400-\x4dbf] + | [\x4e00-\xa48c] + | [\xa4d0-\xa4fd] + | [\xa500-\xa60c] + | [\xa610-\xa61f] + | [\xa62a-\xa62b] + | [\xa640-\xa66e] + | [\xa67f-\xa69d] + | [\xa6a0-\xa6ef] + | [\xa717-\xa71f] + | [\xa722-\xa788] + | [\xa78b-\xa7ca] + | [\xa7d0-\xa7d1] + | \xa7d3 + | [\xa7d5-\xa7d9] + | [\xa7f2-\xa801] + | [\xa803-\xa805] + | [\xa807-\xa80a] + | [\xa80c-\xa822] + | [\xa840-\xa873] + | [\xa882-\xa8b3] + | [\xa8f2-\xa8f7] + | \xa8fb + | [\xa8fd-\xa8fe] + | [\xa90a-\xa925] + | [\xa930-\xa946] + | [\xa960-\xa97c] + | [\xa984-\xa9b2] + | \xa9cf + | [\xa9e0-\xa9e4] + | [\xa9e6-\xa9ef] + | [\xa9fa-\xa9fe] + | [\xaa00-\xaa28] + | [\xaa40-\xaa42] + | [\xaa44-\xaa4b] + | [\xaa60-\xaa76] + | \xaa7a + | [\xaa7e-\xaaaf] + | \xaab1 + | [\xaab5-\xaab6] + | [\xaab9-\xaabd] + | \xaac0 + | \xaac2 + | [\xaadb-\xaadd] + | [\xaae0-\xaaea] + | [\xaaf2-\xaaf4] + | [\xab01-\xab06] + | [\xab09-\xab0e] + | [\xab11-\xab16] + | [\xab20-\xab26] + | [\xab28-\xab2e] + | [\xab30-\xab5a] + | [\xab5c-\xab69] + | [\xab70-\xabe2] | [\xac00-\xd7a3] - | [\xf900-\xfa2d] - | [\xfa30-\xfa6a] + | [\xd7b0-\xd7c6] + | [\xd7cb-\xd7fb] + | [\xf900-\xfa6d] + | [\xfa70-\xfad9] | [\xfb00-\xfb06] | [\xfb13-\xfb17] | \xfb1d @@ -350,99 +456,290 @@ import Data.Word ( Word8 ) | [\xffca-\xffcf] | [\xffd2-\xffd7] | [\xffda-\xffdc] - | \xd800 [\xdc00-\xdc0a] - | \xd800 [\xdc0d-\xdc25] - | \xd800 [\xdc28-\xdc39] - | \xd800 [\xdc3c-\xdc3c] - | \xd800 [\xdc3f-\xdc4c] - | \xd800 [\xdc50-\xdc5c] - | \xd800 [\xdc80-\xdcf9] - | \xd800 [\xdf00-\xdf1d] - | \xd800 [\xdf30-\xdf49] - | \xd800 [\xdf80-\xdf9c] - | \xd801 [\xe000-\xe09c] - | \xd802 [\xe400-\xe404] - | \xd802 \x0808 - | \xd802 [\xe40a-\xe434] - | \xd802 [\xe437-\xe437] - | \xd802 \x083c - | \xd802 \x083f - | \xd835 [\xb000-\xb053] - | \xd835 [\xb056-\xb09b] - | \xd835 [\xb09e-\xb09e] - | \xd835 \xd4a2 - | \xd835 [\xb0a5-\xb0a5] - | \xd835 [\xb0a9-\xb0ab] - | \xd835 [\xb0ae-\xb0b8] - | \xd835 \xd4bb - | \xd835 [\xb0bd-\xb0c2] - | \xd835 [\xb0c5-\xb104] - | \xd835 [\xb107-\xb109] - | \xd835 [\xb10d-\xb113] - | \xd835 [\xb116-\xb11b] - | \xd835 [\xb11e-\xb138] - | \xd835 [\xb13b-\xb13d] - | \xd835 [\xb140-\xb143] - | \xd835 \xd546 - | \xd835 [\xb14a-\xb14f] - | \xd835 [\xb152-\xb2a2] - | \xd835 [\xb2a8-\xb2bf] - | \xd835 [\xb2c2-\xb2d9] - | \xd835 [\xb2dc-\xb2f9] - | \xd835 [\xb2fc-\xb313] - | \xd835 [\xb316-\xb333] - | \xd835 [\xb336-\xb34d] - | \xd835 [\xb350-\xb36d] - | \xd835 [\xb370-\xb387] - | \xd835 [\xb38a-\xb3a7] - | \xd835 [\xb3aa-\xb3c1] - | \xd835 [\xb3c4-\xb3c8] - | \xd840 [\xdc00-\xdffe] - | \xd841 [\xe000-\xe3fe] - | \xd842 [\xe400-\xe7fe] - | \xd843 [\xe800-\xebfe] - | \xd844 [\xec00-\xeffe] - | \xd845 [\xf000-\xf3fe] - | \xd846 [\xf400-\xf7fe] - | \xd847 [\xf800-\xfbfe] - | \xd848 [\xfc00-\xfffe] - | \xd849 [\x0000-\x03fe] - | \xd84a [\x0400-\x07fe] - | \xd84b [\x0800-\x0bfe] - | \xd84c [\x0c00-\x0ffe] - | \xd84d [\x1000-\x13fe] - | \xd84e [\x1400-\x17fe] - | \xd84f [\x1800-\x1bfe] - | \xd850 [\x1c00-\x1ffe] - | \xd851 [\x2000-\x23fe] - | \xd852 [\x2400-\x27fe] - | \xd853 [\x2800-\x2bfe] - | \xd854 [\x2c00-\x2ffe] - | \xd855 [\x3000-\x33fe] - | \xd856 [\x3400-\x37fe] - | \xd857 [\x3800-\x3bfe] - | \xd858 [\x3c00-\x3ffe] - | \xd859 [\x4000-\x43fe] - | \xd85a [\x4400-\x47fe] - | \xd85b [\x4800-\x4bfe] - | \xd85c [\x4c00-\x4ffe] - | \xd85d [\x5000-\x53fe] - | \xd85e [\x5400-\x57fe] - | \xd85f [\x5800-\x5bfe] - | \xd860 [\x5c00-\x5ffe] - | \xd861 [\x6000-\x63fe] - | \xd862 [\x6400-\x67fe] - | \xd863 [\x6800-\x6bfe] - | \xd864 [\x6c00-\x6ffe] - | \xd865 [\x7000-\x73fe] - | \xd866 [\x7400-\x77fe] - | \xd867 [\x7800-\x7bfe] - | \xd868 [\x7c00-\x7ffe] - | \xd869 [\x8000-\x82d5] - | \xd87e [\xd400-\xd61c] - --- XID_CONTINUE unicode character class -@xid_continue + | [\x10000-\x1000b] + | [\x1000d-\x10026] + | [\x10028-\x1003a] + | [\x1003c-\x1003d] + | [\x1003f-\x1004d] + | [\x10050-\x1005d] + | [\x10080-\x100fa] + | [\x10140-\x10174] + | [\x10280-\x1029c] + | [\x102a0-\x102d0] + | [\x10300-\x1031f] + | [\x1032d-\x1034a] + | [\x10350-\x10375] + | [\x10380-\x1039d] + | [\x103a0-\x103c3] + | [\x103c8-\x103cf] + | [\x103d1-\x103d5] + | [\x10400-\x1049d] + | [\x104b0-\x104d3] + | [\x104d8-\x104fb] + | [\x10500-\x10527] + | [\x10530-\x10563] + | [\x10570-\x1057a] + | [\x1057c-\x1058a] + | [\x1058c-\x10592] + | [\x10594-\x10595] + | [\x10597-\x105a1] + | [\x105a3-\x105b1] + | [\x105b3-\x105b9] + | [\x105bb-\x105bc] + | [\x10600-\x10736] + | [\x10740-\x10755] + | [\x10760-\x10767] + | [\x10780-\x10785] + | [\x10787-\x107b0] + | [\x107b2-\x107ba] + | [\x10800-\x10805] + | \x10808 + | [\x1080a-\x10835] + | [\x10837-\x10838] + | \x1083c + | [\x1083f-\x10855] + | [\x10860-\x10876] + | [\x10880-\x1089e] + | [\x108e0-\x108f2] + | [\x108f4-\x108f5] + | [\x10900-\x10915] + | [\x10920-\x10939] + | [\x10980-\x109b7] + | [\x109be-\x109bf] + | \x10a00 + | [\x10a10-\x10a13] + | [\x10a15-\x10a17] + | [\x10a19-\x10a35] + | [\x10a60-\x10a7c] + | [\x10a80-\x10a9c] + | [\x10ac0-\x10ac7] + | [\x10ac9-\x10ae4] + | [\x10b00-\x10b35] + | [\x10b40-\x10b55] + | [\x10b60-\x10b72] + | [\x10b80-\x10b91] + | [\x10c00-\x10c48] + | [\x10c80-\x10cb2] + | [\x10cc0-\x10cf2] + | [\x10d00-\x10d23] + | [\x10e80-\x10ea9] + | [\x10eb0-\x10eb1] + | [\x10f00-\x10f1c] + | \x10f27 + | [\x10f30-\x10f45] + | [\x10f70-\x10f81] + | [\x10fb0-\x10fc4] + | [\x10fe0-\x10ff6] + | [\x11003-\x11037] + | [\x11071-\x11072] + | \x11075 + | [\x11083-\x110af] + | [\x110d0-\x110e8] + | [\x11103-\x11126] + | \x11144 + | \x11147 + | [\x11150-\x11172] + | \x11176 + | [\x11183-\x111b2] + | [\x111c1-\x111c4] + | \x111da + | \x111dc + | [\x11200-\x11211] + | [\x11213-\x1122b] + | [\x1123f-\x11240] + | [\x11280-\x11286] + | \x11288 + | [\x1128a-\x1128d] + | [\x1128f-\x1129d] + | [\x1129f-\x112a8] + | [\x112b0-\x112de] + | [\x11305-\x1130c] + | [\x1130f-\x11310] + | [\x11313-\x11328] + | [\x1132a-\x11330] + | [\x11332-\x11333] + | [\x11335-\x11339] + | \x1133d + | \x11350 + | [\x1135d-\x11361] + | [\x11400-\x11434] + | [\x11447-\x1144a] + | [\x1145f-\x11461] + | [\x11480-\x114af] + | [\x114c4-\x114c5] + | \x114c7 + | [\x11580-\x115ae] + | [\x115d8-\x115db] + | [\x11600-\x1162f] + | \x11644 + | [\x11680-\x116aa] + | \x116b8 + | [\x11700-\x1171a] + | [\x11740-\x11746] + | [\x11800-\x1182b] + | [\x118a0-\x118df] + | [\x118ff-\x11906] + | \x11909 + | [\x1190c-\x11913] + | [\x11915-\x11916] + | [\x11918-\x1192f] + | \x1193f + | \x11941 + | [\x119a0-\x119a7] + | [\x119aa-\x119d0] + | \x119e1 + | \x119e3 + | \x11a00 + | [\x11a0b-\x11a32] + | \x11a3a + | \x11a50 + | [\x11a5c-\x11a89] + | \x11a9d + | [\x11ab0-\x11af8] + | [\x11c00-\x11c08] + | [\x11c0a-\x11c2e] + | \x11c40 + | [\x11c72-\x11c8f] + | [\x11d00-\x11d06] + | [\x11d08-\x11d09] + | [\x11d0b-\x11d30] + | \x11d46 + | [\x11d60-\x11d65] + | [\x11d67-\x11d68] + | [\x11d6a-\x11d89] + | \x11d98 + | [\x11ee0-\x11ef2] + | \x11f02 + | [\x11f04-\x11f10] + | [\x11f12-\x11f33] + | \x11fb0 + | [\x12000-\x12399] + | [\x12400-\x1246e] + | [\x12480-\x12543] + | [\x12f90-\x12ff0] + | [\x13000-\x1342f] + | [\x13441-\x13446] + | [\x14400-\x14646] + | [\x16800-\x16a38] + | [\x16a40-\x16a5e] + | [\x16a70-\x16abe] + | [\x16ad0-\x16aed] + | [\x16b00-\x16b2f] + | [\x16b40-\x16b43] + | [\x16b63-\x16b77] + | [\x16b7d-\x16b8f] + | [\x16e40-\x16e7f] + | [\x16f00-\x16f4a] + | \x16f50 + | [\x16f93-\x16f9f] + | [\x16fe0-\x16fe1] + | \x16fe3 + | [\x17000-\x187f7] + | [\x18800-\x18cd5] + | [\x18d00-\x18d08] + | [\x1aff0-\x1aff3] + | [\x1aff5-\x1affb] + | [\x1affd-\x1affe] + | [\x1b000-\x1b122] + | \x1b132 + | [\x1b150-\x1b152] + | \x1b155 + | [\x1b164-\x1b167] + | [\x1b170-\x1b2fb] + | [\x1bc00-\x1bc6a] + | [\x1bc70-\x1bc7c] + | [\x1bc80-\x1bc88] + | [\x1bc90-\x1bc99] + | [\x1d400-\x1d454] + | [\x1d456-\x1d49c] + | [\x1d49e-\x1d49f] + | \x1d4a2 + | [\x1d4a5-\x1d4a6] + | [\x1d4a9-\x1d4ac] + | [\x1d4ae-\x1d4b9] + | \x1d4bb + | [\x1d4bd-\x1d4c3] + | [\x1d4c5-\x1d505] + | [\x1d507-\x1d50a] + | [\x1d50d-\x1d514] + | [\x1d516-\x1d51c] + | [\x1d51e-\x1d539] + | [\x1d53b-\x1d53e] + | [\x1d540-\x1d544] + | \x1d546 + | [\x1d54a-\x1d550] + | [\x1d552-\x1d6a5] + | [\x1d6a8-\x1d6c0] + | [\x1d6c2-\x1d6da] + | [\x1d6dc-\x1d6fa] + | [\x1d6fc-\x1d714] + | [\x1d716-\x1d734] + | [\x1d736-\x1d74e] + | [\x1d750-\x1d76e] + | [\x1d770-\x1d788] + | [\x1d78a-\x1d7a8] + | [\x1d7aa-\x1d7c2] + | [\x1d7c4-\x1d7cb] + | [\x1df00-\x1df1e] + | [\x1df25-\x1df2a] + | [\x1e030-\x1e06d] + | [\x1e100-\x1e12c] + | [\x1e137-\x1e13d] + | \x1e14e + | [\x1e290-\x1e2ad] + | [\x1e2c0-\x1e2eb] + | [\x1e4d0-\x1e4eb] + | [\x1e7e0-\x1e7e6] + | [\x1e7e8-\x1e7eb] + | [\x1e7ed-\x1e7ee] + | [\x1e7f0-\x1e7fe] + | [\x1e800-\x1e8c4] + | [\x1e900-\x1e943] + | \x1e94b + | [\x1ee00-\x1ee03] + | [\x1ee05-\x1ee1f] + | [\x1ee21-\x1ee22] + | \x1ee24 + | \x1ee27 + | [\x1ee29-\x1ee32] + | [\x1ee34-\x1ee37] + | \x1ee39 + | \x1ee3b + | \x1ee42 + | \x1ee47 + | \x1ee49 + | \x1ee4b + | [\x1ee4d-\x1ee4f] + | [\x1ee51-\x1ee52] + | \x1ee54 + | \x1ee57 + | \x1ee59 + | \x1ee5b + | \x1ee5d + | \x1ee5f + | [\x1ee61-\x1ee62] + | \x1ee64 + | [\x1ee67-\x1ee6a] + | [\x1ee6c-\x1ee72] + | [\x1ee74-\x1ee77] + | [\x1ee79-\x1ee7c] + | \x1ee7e + | [\x1ee80-\x1ee89] + | [\x1ee8b-\x1ee9b] + | [\x1eea1-\x1eea3] + | [\x1eea5-\x1eea9] + | [\x1eeab-\x1eebb] + | [\x20000-\x2a6df] + | [\x2a700-\x2b739] + | [\x2b740-\x2b81d] + | [\x2b820-\x2cea1] + | [\x2ceb0-\x2ebe0] + | [\x2ebf0-\x2ee5d] + | [\x2f800-\x2fa1d] + | [\x30000-\x3134a] + | [\x31350-\x323af] + +@XID_Continue = [\x0030-\x0039] | [\x0041-\x005a] | \x005f @@ -453,55 +750,53 @@ import Data.Word ( Word8 ) | \x00ba | [\x00c0-\x00d6] | [\x00d8-\x00f6] - | [\x00f8-\x0236] - | [\x0250-\x02c1] + | [\x00f8-\x02c1] | [\x02c6-\x02d1] | [\x02e0-\x02e4] + | \x02ec | \x02ee - | [\x0300-\x0357] - | [\x035d-\x036f] - | \x0386 - | [\x0388-\x038a] + | [\x0300-\x0374] + | [\x0376-\x0377] + | [\x037b-\x037d] + | \x037f + | [\x0386-\x038a] | \x038c | [\x038e-\x03a1] - | [\x03a3-\x03ce] - | [\x03d0-\x03f5] - | [\x03f7-\x03fb] - | [\x0400-\x0481] - | [\x0483-\x0486] - | [\x048a-\x04ce] - | [\x04d0-\x04f5] - | [\x04f8-\x04f9] - | [\x0500-\x050f] + | [\x03a3-\x03f5] + | [\x03f7-\x0481] + | [\x0483-\x0487] + | [\x048a-\x052f] | [\x0531-\x0556] | \x0559 - | [\x0561-\x0587] - | [\x0591-\x05a1] - | [\x05a3-\x05b9] - | [\x05bb-\x05bd] + | [\x0560-\x0588] + | [\x0591-\x05bd] | \x05bf | [\x05c1-\x05c2] - | \x05c4 + | [\x05c4-\x05c5] + | \x05c7 | [\x05d0-\x05ea] - | [\x05f0-\x05f2] - | [\x0610-\x0615] - | [\x0621-\x063a] - | [\x0640-\x0658] - | [\x0660-\x0669] + | [\x05ef-\x05f2] + | [\x0610-\x061a] + | [\x0620-\x0669] | [\x066e-\x06d3] | [\x06d5-\x06dc] | [\x06df-\x06e8] | [\x06ea-\x06fc] | \x06ff | [\x0710-\x074a] - | [\x074d-\x074f] - | [\x0780-\x07b1] - | [\x0901-\x0939] - | [\x093c-\x094d] - | [\x0950-\x0954] - | [\x0958-\x0963] + | [\x074d-\x07b1] + | [\x07c0-\x07f5] + | \x07fa + | \x07fd + | [\x0800-\x082d] + | [\x0840-\x085b] + | [\x0860-\x086a] + | [\x0870-\x0887] + | [\x0889-\x088e] + | [\x0898-\x08e1] + | [\x08e3-\x0963] | [\x0966-\x096f] - | [\x0981-\x0983] + | [\x0971-\x0983] | [\x0985-\x098c] | [\x098f-\x0990] | [\x0993-\x09a8] @@ -510,11 +805,13 @@ import Data.Word ( Word8 ) | [\x09b6-\x09b9] | [\x09bc-\x09c4] | [\x09c7-\x09c8] - | [\x09cb-\x09cd] + | [\x09cb-\x09ce] | \x09d7 | [\x09dc-\x09dd] | [\x09df-\x09e3] | [\x09e6-\x09f1] + | \x09fc + | \x09fe | [\x0a01-\x0a03] | [\x0a05-\x0a0a] | [\x0a0f-\x0a10] @@ -527,9 +824,10 @@ import Data.Word ( Word8 ) | [\x0a3e-\x0a42] | [\x0a47-\x0a48] | [\x0a4b-\x0a4d] + | \x0a51 | [\x0a59-\x0a5c] | \x0a5e - | [\x0a66-\x0a74] + | [\x0a66-\x0a75] | [\x0a81-\x0a83] | [\x0a85-\x0a8d] | [\x0a8f-\x0a91] @@ -543,6 +841,7 @@ import Data.Word ( Word8 ) | \x0ad0 | [\x0ae0-\x0ae3] | [\x0ae6-\x0aef] + | [\x0af9-\x0aff] | [\x0b01-\x0b03] | [\x0b05-\x0b0c] | [\x0b0f-\x0b10] @@ -550,12 +849,12 @@ import Data.Word ( Word8 ) | [\x0b2a-\x0b30] | [\x0b32-\x0b33] | [\x0b35-\x0b39] - | [\x0b3c-\x0b43] + | [\x0b3c-\x0b44] | [\x0b47-\x0b48] | [\x0b4b-\x0b4d] - | [\x0b56-\x0b57] + | [\x0b55-\x0b57] | [\x0b5c-\x0b5d] - | [\x0b5f-\x0b61] + | [\x0b5f-\x0b63] | [\x0b66-\x0b6f] | \x0b71 | [\x0b82-\x0b83] @@ -567,26 +866,26 @@ import Data.Word ( Word8 ) | [\x0b9e-\x0b9f] | [\x0ba3-\x0ba4] | [\x0ba8-\x0baa] - | [\x0bae-\x0bb5] - | [\x0bb7-\x0bb9] + | [\x0bae-\x0bb9] | [\x0bbe-\x0bc2] | [\x0bc6-\x0bc8] | [\x0bca-\x0bcd] + | \x0bd0 | \x0bd7 - | [\x0be7-\x0bef] - | [\x0c01-\x0c03] - | [\x0c05-\x0c0c] + | [\x0be6-\x0bef] + | [\x0c00-\x0c0c] | [\x0c0e-\x0c10] | [\x0c12-\x0c28] - | [\x0c2a-\x0c33] - | [\x0c35-\x0c39] - | [\x0c3e-\x0c44] + | [\x0c2a-\x0c39] + | [\x0c3c-\x0c44] | [\x0c46-\x0c48] | [\x0c4a-\x0c4d] | [\x0c55-\x0c56] - | [\x0c60-\x0c61] + | [\x0c58-\x0c5a] + | \x0c5d + | [\x0c60-\x0c63] | [\x0c66-\x0c6f] - | [\x0c82-\x0c83] + | [\x0c80-\x0c83] | [\x0c85-\x0c8c] | [\x0c8e-\x0c90] | [\x0c92-\x0ca8] @@ -596,21 +895,20 @@ import Data.Word ( Word8 ) | [\x0cc6-\x0cc8] | [\x0cca-\x0ccd] | [\x0cd5-\x0cd6] - | \x0cde - | [\x0ce0-\x0ce1] + | [\x0cdd-\x0cde] + | [\x0ce0-\x0ce3] | [\x0ce6-\x0cef] - | [\x0d02-\x0d03] - | [\x0d05-\x0d0c] + | [\x0cf1-\x0cf3] + | [\x0d00-\x0d0c] | [\x0d0e-\x0d10] - | [\x0d12-\x0d28] - | [\x0d2a-\x0d39] - | [\x0d3e-\x0d43] + | [\x0d12-\x0d44] | [\x0d46-\x0d48] - | [\x0d4a-\x0d4d] - | \x0d57 - | [\x0d60-\x0d61] + | [\x0d4a-\x0d4e] + | [\x0d54-\x0d57] + | [\x0d5f-\x0d63] | [\x0d66-\x0d6f] - | [\x0d82-\x0d83] + | [\x0d7a-\x0d7f] + | [\x0d81-\x0d83] | [\x0d85-\x0d96] | [\x0d9a-\x0db1] | [\x0db3-\x0dbb] @@ -620,28 +918,22 @@ import Data.Word ( Word8 ) | [\x0dcf-\x0dd4] | \x0dd6 | [\x0dd8-\x0ddf] + | [\x0de6-\x0def] | [\x0df2-\x0df3] | [\x0e01-\x0e3a] | [\x0e40-\x0e4e] | [\x0e50-\x0e59] | [\x0e81-\x0e82] | \x0e84 - | [\x0e87-\x0e88] - | \x0e8a - | \x0e8d - | [\x0e94-\x0e97] - | [\x0e99-\x0e9f] - | [\x0ea1-\x0ea3] + | [\x0e86-\x0e8a] + | [\x0e8c-\x0ea3] | \x0ea5 - | \x0ea7 - | [\x0eaa-\x0eab] - | [\x0ead-\x0eb9] - | [\x0ebb-\x0ebd] + | [\x0ea7-\x0ebd] | [\x0ec0-\x0ec4] | \x0ec6 - | [\x0ec8-\x0ecd] + | [\x0ec8-\x0ece] | [\x0ed0-\x0ed9] - | [\x0edc-\x0edd] + | [\x0edc-\x0edf] | \x0f00 | [\x0f18-\x0f19] | [\x0f20-\x0f29] @@ -649,81 +941,87 @@ import Data.Word ( Word8 ) | \x0f37 | \x0f39 | [\x0f3e-\x0f47] - | [\x0f49-\x0f6a] + | [\x0f49-\x0f6c] | [\x0f71-\x0f84] - | [\x0f86-\x0f8b] - | [\x0f90-\x0f97] + | [\x0f86-\x0f97] | [\x0f99-\x0fbc] | \x0fc6 - | [\x1000-\x1021] - | [\x1023-\x1027] - | [\x1029-\x102a] - | [\x102c-\x1032] - | [\x1036-\x1039] - | [\x1040-\x1049] - | [\x1050-\x1059] + | [\x1000-\x1049] + | [\x1050-\x109d] | [\x10a0-\x10c5] - | [\x10d0-\x10f8] - | [\x1100-\x1159] - | [\x115f-\x11a2] - | [\x11a8-\x11f9] - | [\x1200-\x1206] - | [\x1208-\x1246] - | \x1248 + | \x10c7 + | \x10cd + | [\x10d0-\x10fa] + | [\x10fc-\x1248] | [\x124a-\x124d] | [\x1250-\x1256] | \x1258 | [\x125a-\x125d] - | [\x1260-\x1286] - | \x1288 + | [\x1260-\x1288] | [\x128a-\x128d] - | [\x1290-\x12ae] - | \x12b0 + | [\x1290-\x12b0] | [\x12b2-\x12b5] | [\x12b8-\x12be] | \x12c0 | [\x12c2-\x12c5] - | [\x12c8-\x12ce] - | [\x12d0-\x12d6] - | [\x12d8-\x12ee] - | [\x12f0-\x130e] - | \x1310 + | [\x12c8-\x12d6] + | [\x12d8-\x1310] | [\x1312-\x1315] - | [\x1318-\x131e] - | [\x1320-\x1346] - | [\x1348-\x135a] + | [\x1318-\x135a] + | [\x135d-\x135f] | [\x1369-\x1371] - | [\x13a0-\x13f4] + | [\x1380-\x138f] + | [\x13a0-\x13f5] + | [\x13f8-\x13fd] | [\x1401-\x166c] - | [\x166f-\x1676] + | [\x166f-\x167f] | [\x1681-\x169a] | [\x16a0-\x16ea] - | [\x16ee-\x16f0] - | [\x1700-\x170c] - | [\x170e-\x1714] - | [\x1720-\x1734] + | [\x16ee-\x16f8] + | [\x1700-\x1715] + | [\x171f-\x1734] | [\x1740-\x1753] | [\x1760-\x176c] | [\x176e-\x1770] | [\x1772-\x1773] - | [\x1780-\x17b3] - | [\x17b6-\x17d3] + | [\x1780-\x17d3] | \x17d7 | [\x17dc-\x17dd] | [\x17e0-\x17e9] | [\x180b-\x180d] - | [\x1810-\x1819] - | [\x1820-\x1877] - | [\x1880-\x18a9] - | [\x1900-\x191c] + | [\x180f-\x1819] + | [\x1820-\x1878] + | [\x1880-\x18aa] + | [\x18b0-\x18f5] + | [\x1900-\x191e] | [\x1920-\x192b] | [\x1930-\x193b] | [\x1946-\x196d] | [\x1970-\x1974] - | [\x1d00-\x1d6b] - | [\x1e00-\x1e9b] - | [\x1ea0-\x1ef9] - | [\x1f00-\x1f15] + | [\x1980-\x19ab] + | [\x19b0-\x19c9] + | [\x19d0-\x19da] + | [\x1a00-\x1a1b] + | [\x1a20-\x1a5e] + | [\x1a60-\x1a7c] + | [\x1a7f-\x1a89] + | [\x1a90-\x1a99] + | \x1aa7 + | [\x1ab0-\x1abd] + | [\x1abf-\x1ace] + | [\x1b00-\x1b4c] + | [\x1b50-\x1b59] + | [\x1b6b-\x1b73] + | [\x1b80-\x1bf3] + | [\x1c00-\x1c37] + | [\x1c40-\x1c49] + | [\x1c4d-\x1c7d] + | [\x1c80-\x1c88] + | [\x1c90-\x1cba] + | [\x1cbd-\x1cbf] + | [\x1cd0-\x1cd2] + | [\x1cd4-\x1cfa] + | [\x1d00-\x1f15] | [\x1f18-\x1f1d] | [\x1f20-\x1f45] | [\x1f48-\x1f4d] @@ -742,13 +1040,15 @@ import Data.Word ( Word8 ) | [\x1fe0-\x1fec] | [\x1ff2-\x1ff4] | [\x1ff6-\x1ffc] + | [\x200c-\x200d] | [\x203f-\x2040] | \x2054 | \x2071 | \x207f + | [\x2090-\x209c] | [\x20d0-\x20dc] | \x20e1 - | [\x20e5-\x20ea] + | [\x20e5-\x20f0] | \x2102 | \x2107 | [\x210a-\x2113] @@ -757,11 +1057,28 @@ import Data.Word ( Word8 ) | \x2124 | \x2126 | \x2128 - | [\x212a-\x2131] - | [\x2133-\x2139] - | [\x213d-\x213f] + | [\x212a-\x2139] + | [\x213c-\x213f] | [\x2145-\x2149] - | [\x2160-\x2183] + | \x214e + | [\x2160-\x2188] + | [\x2c00-\x2ce4] + | [\x2ceb-\x2cf3] + | [\x2d00-\x2d25] + | \x2d27 + | \x2d2d + | [\x2d30-\x2d67] + | \x2d6f + | [\x2d7f-\x2d96] + | [\x2da0-\x2da6] + | [\x2da8-\x2dae] + | [\x2db0-\x2db6] + | [\x2db8-\x2dbe] + | [\x2dc0-\x2dc6] + | [\x2dc8-\x2dce] + | [\x2dd0-\x2dd6] + | [\x2dd8-\x2dde] + | [\x2de0-\x2dff] | [\x3005-\x3007] | [\x3021-\x302f] | [\x3031-\x3035] @@ -770,16 +1087,60 @@ import Data.Word ( Word8 ) | [\x3099-\x309a] | [\x309d-\x309f] | [\x30a1-\x30ff] - | [\x3105-\x312c] + | [\x3105-\x312f] | [\x3131-\x318e] - | [\x31a0-\x31b7] + | [\x31a0-\x31bf] | [\x31f0-\x31ff] - | [\x3400-\x4db5] - | [\x4e00-\x9fa5] - | [\xa000-\xa48c] + | [\x3400-\x4dbf] + | [\x4e00-\xa48c] + | [\xa4d0-\xa4fd] + | [\xa500-\xa60c] + | [\xa610-\xa62b] + | [\xa640-\xa66f] + | [\xa674-\xa67d] + | [\xa67f-\xa6f1] + | [\xa717-\xa71f] + | [\xa722-\xa788] + | [\xa78b-\xa7ca] + | [\xa7d0-\xa7d1] + | \xa7d3 + | [\xa7d5-\xa7d9] + | [\xa7f2-\xa827] + | \xa82c + | [\xa840-\xa873] + | [\xa880-\xa8c5] + | [\xa8d0-\xa8d9] + | [\xa8e0-\xa8f7] + | \xa8fb + | [\xa8fd-\xa92d] + | [\xa930-\xa953] + | [\xa960-\xa97c] + | [\xa980-\xa9c0] + | [\xa9cf-\xa9d9] + | [\xa9e0-\xa9fe] + | [\xaa00-\xaa36] + | [\xaa40-\xaa4d] + | [\xaa50-\xaa59] + | [\xaa60-\xaa76] + | [\xaa7a-\xaac2] + | [\xaadb-\xaadd] + | [\xaae0-\xaaef] + | [\xaaf2-\xaaf6] + | [\xab01-\xab06] + | [\xab09-\xab0e] + | [\xab11-\xab16] + | [\xab20-\xab26] + | [\xab28-\xab2e] + | [\xab30-\xab5a] + | [\xab5c-\xab69] + | [\xab70-\xabea] + | [\xabec-\xabed] + | [\xabf0-\xabf9] | [\xac00-\xd7a3] - | [\xf900-\xfa2d] - | [\xfa30-\xfa6a] + | [\xd7b0-\xd7c6] + | [\xd7cb-\xd7fb] + | [\xf900-\xfa6d] + | [\xfa70-\xfad9] | [\xfb00-\xfb06] | [\xfb13-\xfb17] | [\xfb1d-\xfb28] @@ -795,7 +1156,7 @@ import Data.Word ( Word8 ) | [\xfd92-\xfdc7] | [\xfdf0-\xfdf9] | [\xfe00-\xfe0f] - | [\xfe20-\xfe23] + | [\xfe20-\xfe2f] | [\xfe33-\xfe34] | [\xfe4d-\xfe4f] | \xfe71 @@ -814,106 +1175,355 @@ import Data.Word ( Word8 ) | [\xffca-\xffcf] | [\xffd2-\xffd7] | [\xffda-\xffdc] - | \xd800 [\xdc00-\xdc0a] - | \xd800 [\xdc0d-\xdc25] - | \xd800 [\xdc28-\xdc39] - | \xd800 [\xdc3c-\xdc3c] - | \xd800 [\xdc3f-\xdc4c] - | \xd800 [\xdc50-\xdc5c] - | \xd800 [\xdc80-\xdcf9] - | \xd800 [\xdf00-\xdf1d] - | \xd800 [\xdf30-\xdf49] - | \xd800 [\xdf80-\xdf9c] - | \xd801 [\xe000-\xe09c] - | \xd801 [\xe0a0-\xe0a8] - | \xd802 [\xe400-\xe404] - | \xd802 \x0808 - | \xd802 [\xe40a-\xe434] - | \xd802 [\xe437-\xe437] - | \xd802 \x083c - | \xd802 \x083f - | \xd834 [\xad65-\xad68] - | \xd834 [\xad6d-\xad71] - | \xd834 [\xad7b-\xad81] - | \xd834 [\xad85-\xad8a] - | \xd834 [\xadaa-\xadac] - | \xd835 [\xb000-\xb053] - | \xd835 [\xb056-\xb09b] - | \xd835 [\xb09e-\xb09e] - | \xd835 \xd4a2 - | \xd835 [\xb0a5-\xb0a5] - | \xd835 [\xb0a9-\xb0ab] - | \xd835 [\xb0ae-\xb0b8] - | \xd835 \xd4bb - | \xd835 [\xb0bd-\xb0c2] - | \xd835 [\xb0c5-\xb104] - | \xd835 [\xb107-\xb109] - | \xd835 [\xb10d-\xb113] - | \xd835 [\xb116-\xb11b] - | \xd835 [\xb11e-\xb138] - | \xd835 [\xb13b-\xb13d] - | \xd835 [\xb140-\xb143] - | \xd835 \xd546 - | \xd835 [\xb14a-\xb14f] - | \xd835 [\xb152-\xb2a2] - | \xd835 [\xb2a8-\xb2bf] - | \xd835 [\xb2c2-\xb2d9] - | \xd835 [\xb2dc-\xb2f9] - | \xd835 [\xb2fc-\xb313] - | \xd835 [\xb316-\xb333] - | \xd835 [\xb336-\xb34d] - | \xd835 [\xb350-\xb36d] - | \xd835 [\xb370-\xb387] - | \xd835 [\xb38a-\xb3a7] - | \xd835 [\xb3aa-\xb3c1] - | \xd835 [\xb3c4-\xb3c8] - | \xd835 [\xb3ce-\xb3fe] - | \xd840 [\xdc00-\xdffe] - | \xd841 [\xe000-\xe3fe] - | \xd842 [\xe400-\xe7fe] - | \xd843 [\xe800-\xebfe] - | \xd844 [\xec00-\xeffe] - | \xd845 [\xf000-\xf3fe] - | \xd846 [\xf400-\xf7fe] - | \xd847 [\xf800-\xfbfe] - | \xd848 [\xfc00-\xfffe] - | \xd849 [\x0000-\x03fe] - | \xd84a [\x0400-\x07fe] - | \xd84b [\x0800-\x0bfe] - | \xd84c [\x0c00-\x0ffe] - | \xd84d [\x1000-\x13fe] - | \xd84e [\x1400-\x17fe] - | \xd84f [\x1800-\x1bfe] - | \xd850 [\x1c00-\x1ffe] - | \xd851 [\x2000-\x23fe] - | \xd852 [\x2400-\x27fe] - | \xd853 [\x2800-\x2bfe] - | \xd854 [\x2c00-\x2ffe] - | \xd855 [\x3000-\x33fe] - | \xd856 [\x3400-\x37fe] - | \xd857 [\x3800-\x3bfe] - | \xd858 [\x3c00-\x3ffe] - | \xd859 [\x4000-\x43fe] - | \xd85a [\x4400-\x47fe] - | \xd85b [\x4800-\x4bfe] - | \xd85c [\x4c00-\x4ffe] - | \xd85d [\x5000-\x53fe] - | \xd85e [\x5400-\x57fe] - | \xd85f [\x5800-\x5bfe] - | \xd860 [\x5c00-\x5ffe] - | \xd861 [\x6000-\x63fe] - | \xd862 [\x6400-\x67fe] - | \xd863 [\x6800-\x6bfe] - | \xd864 [\x6c00-\x6ffe] - | \xd865 [\x7000-\x73fe] - | \xd866 [\x7400-\x77fe] - | \xd867 [\x7800-\x7bfe] - | \xd868 [\x7c00-\x7ffe] - | \xd869 [\x8000-\x82d5] - | \xd87e [\xd400-\xd61c] - | \xdb40 [\xdd00-\xddee] - -@ident = @xid_start @xid_continue* + | [\x10000-\x1000b] + | [\x1000d-\x10026] + | [\x10028-\x1003a] + | [\x1003c-\x1003d] + | [\x1003f-\x1004d] + | [\x10050-\x1005d] + | [\x10080-\x100fa] + | [\x10140-\x10174] + | \x101fd + | [\x10280-\x1029c] + | [\x102a0-\x102d0] + | \x102e0 + | [\x10300-\x1031f] + | [\x1032d-\x1034a] + | [\x10350-\x1037a] + | [\x10380-\x1039d] + | [\x103a0-\x103c3] + | [\x103c8-\x103cf] + | [\x103d1-\x103d5] + | [\x10400-\x1049d] + | [\x104a0-\x104a9] + | [\x104b0-\x104d3] + | [\x104d8-\x104fb] + | [\x10500-\x10527] + | [\x10530-\x10563] + | [\x10570-\x1057a] + | [\x1057c-\x1058a] + | [\x1058c-\x10592] + | [\x10594-\x10595] + | [\x10597-\x105a1] + | [\x105a3-\x105b1] + | [\x105b3-\x105b9] + | [\x105bb-\x105bc] + | [\x10600-\x10736] + | [\x10740-\x10755] + | [\x10760-\x10767] + | [\x10780-\x10785] + | [\x10787-\x107b0] + | [\x107b2-\x107ba] + | [\x10800-\x10805] + | \x10808 + | [\x1080a-\x10835] + | [\x10837-\x10838] + | \x1083c + | [\x1083f-\x10855] + | [\x10860-\x10876] + | [\x10880-\x1089e] + | [\x108e0-\x108f2] + | [\x108f4-\x108f5] + | [\x10900-\x10915] + | [\x10920-\x10939] + | [\x10980-\x109b7] + | [\x109be-\x109bf] + | [\x10a00-\x10a03] + | [\x10a05-\x10a06] + | [\x10a0c-\x10a13] + | [\x10a15-\x10a17] + | [\x10a19-\x10a35] + | [\x10a38-\x10a3a] + | \x10a3f + | [\x10a60-\x10a7c] + | [\x10a80-\x10a9c] + | [\x10ac0-\x10ac7] + | [\x10ac9-\x10ae6] + | [\x10b00-\x10b35] + | [\x10b40-\x10b55] + | [\x10b60-\x10b72] + | [\x10b80-\x10b91] + | [\x10c00-\x10c48] + | [\x10c80-\x10cb2] + | [\x10cc0-\x10cf2] + | [\x10d00-\x10d27] + | [\x10d30-\x10d39] + | [\x10e80-\x10ea9] + | [\x10eab-\x10eac] + | [\x10eb0-\x10eb1] + | [\x10efd-\x10f1c] + | \x10f27 + | [\x10f30-\x10f50] + | [\x10f70-\x10f85] + | [\x10fb0-\x10fc4] + | [\x10fe0-\x10ff6] + | [\x11000-\x11046] + | [\x11066-\x11075] + | [\x1107f-\x110ba] + | \x110c2 + | [\x110d0-\x110e8] + | [\x110f0-\x110f9] + | [\x11100-\x11134] + | [\x11136-\x1113f] + | [\x11144-\x11147] + | [\x11150-\x11173] + | \x11176 + | [\x11180-\x111c4] + | [\x111c9-\x111cc] + | [\x111ce-\x111da] + | \x111dc + | [\x11200-\x11211] + | [\x11213-\x11237] + | [\x1123e-\x11241] + | [\x11280-\x11286] + | \x11288 + | [\x1128a-\x1128d] + | [\x1128f-\x1129d] + | [\x1129f-\x112a8] + | [\x112b0-\x112ea] + | [\x112f0-\x112f9] + | [\x11300-\x11303] + | [\x11305-\x1130c] + | [\x1130f-\x11310] + | [\x11313-\x11328] + | [\x1132a-\x11330] + | [\x11332-\x11333] + | [\x11335-\x11339] + | [\x1133b-\x11344] + | [\x11347-\x11348] + | [\x1134b-\x1134d] + | \x11350 + | \x11357 + | [\x1135d-\x11363] + | [\x11366-\x1136c] + | [\x11370-\x11374] + | [\x11400-\x1144a] + | [\x11450-\x11459] + | [\x1145e-\x11461] + | [\x11480-\x114c5] + | \x114c7 + | [\x114d0-\x114d9] + | [\x11580-\x115b5] + | [\x115b8-\x115c0] + | [\x115d8-\x115dd] + | [\x11600-\x11640] + | \x11644 + | [\x11650-\x11659] + | [\x11680-\x116b8] + | [\x116c0-\x116c9] + | [\x11700-\x1171a] + | [\x1171d-\x1172b] + | [\x11730-\x11739] + | [\x11740-\x11746] + | [\x11800-\x1183a] + | [\x118a0-\x118e9] + | [\x118ff-\x11906] + | \x11909 + | [\x1190c-\x11913] + | [\x11915-\x11916] + | [\x11918-\x11935] + | [\x11937-\x11938] + | [\x1193b-\x11943] + | [\x11950-\x11959] + | [\x119a0-\x119a7] + | [\x119aa-\x119d7] + | [\x119da-\x119e1] + | [\x119e3-\x119e4] + | [\x11a00-\x11a3e] + | \x11a47 + | [\x11a50-\x11a99] + | \x11a9d + | [\x11ab0-\x11af8] + | [\x11c00-\x11c08] + | [\x11c0a-\x11c36] + | [\x11c38-\x11c40] + | [\x11c50-\x11c59] + | [\x11c72-\x11c8f] + | [\x11c92-\x11ca7] + | [\x11ca9-\x11cb6] + | [\x11d00-\x11d06] + | [\x11d08-\x11d09] + | [\x11d0b-\x11d36] + | \x11d3a + | [\x11d3c-\x11d3d] + | [\x11d3f-\x11d47] + | [\x11d50-\x11d59] + | [\x11d60-\x11d65] + | [\x11d67-\x11d68] + | [\x11d6a-\x11d8e] + | [\x11d90-\x11d91] + | [\x11d93-\x11d98] + | [\x11da0-\x11da9] + | [\x11ee0-\x11ef6] + | [\x11f00-\x11f10] + | [\x11f12-\x11f3a] + | [\x11f3e-\x11f42] + | [\x11f50-\x11f59] + | \x11fb0 + | [\x12000-\x12399] + | [\x12400-\x1246e] + | [\x12480-\x12543] + | [\x12f90-\x12ff0] + | [\x13000-\x1342f] + | [\x13440-\x13455] + | [\x14400-\x14646] + | [\x16800-\x16a38] + | [\x16a40-\x16a5e] + | [\x16a60-\x16a69] + | [\x16a70-\x16abe] + | [\x16ac0-\x16ac9] + | [\x16ad0-\x16aed] + | [\x16af0-\x16af4] + | [\x16b00-\x16b36] + | [\x16b40-\x16b43] + | [\x16b50-\x16b59] + | [\x16b63-\x16b77] + | [\x16b7d-\x16b8f] + | [\x16e40-\x16e7f] + | [\x16f00-\x16f4a] + | [\x16f4f-\x16f87] + | [\x16f8f-\x16f9f] + | [\x16fe0-\x16fe1] + | [\x16fe3-\x16fe4] + | [\x16ff0-\x16ff1] + | [\x17000-\x187f7] + | [\x18800-\x18cd5] + | [\x18d00-\x18d08] + | [\x1aff0-\x1aff3] + | [\x1aff5-\x1affb] + | [\x1affd-\x1affe] + | [\x1b000-\x1b122] + | \x1b132 + | [\x1b150-\x1b152] + | \x1b155 + | [\x1b164-\x1b167] + | [\x1b170-\x1b2fb] + | [\x1bc00-\x1bc6a] + | [\x1bc70-\x1bc7c] + | [\x1bc80-\x1bc88] + | [\x1bc90-\x1bc99] + | [\x1bc9d-\x1bc9e] + | [\x1cf00-\x1cf2d] + | [\x1cf30-\x1cf46] + | [\x1d165-\x1d169] + | [\x1d16d-\x1d172] + | [\x1d17b-\x1d182] + | [\x1d185-\x1d18b] + | [\x1d1aa-\x1d1ad] + | [\x1d242-\x1d244] + | [\x1d400-\x1d454] + | [\x1d456-\x1d49c] + | [\x1d49e-\x1d49f] + | \x1d4a2 + | [\x1d4a5-\x1d4a6] + | [\x1d4a9-\x1d4ac] + | [\x1d4ae-\x1d4b9] + | \x1d4bb + | [\x1d4bd-\x1d4c3] + | [\x1d4c5-\x1d505] + | [\x1d507-\x1d50a] + | [\x1d50d-\x1d514] + | [\x1d516-\x1d51c] + | [\x1d51e-\x1d539] + | [\x1d53b-\x1d53e] + | [\x1d540-\x1d544] + | \x1d546 + | [\x1d54a-\x1d550] + | [\x1d552-\x1d6a5] + | [\x1d6a8-\x1d6c0] + | [\x1d6c2-\x1d6da] + | [\x1d6dc-\x1d6fa] + | [\x1d6fc-\x1d714] + | [\x1d716-\x1d734] + | [\x1d736-\x1d74e] + | [\x1d750-\x1d76e] + | [\x1d770-\x1d788] + | [\x1d78a-\x1d7a8] + | [\x1d7aa-\x1d7c2] + | [\x1d7c4-\x1d7cb] + | [\x1d7ce-\x1d7ff] + | [\x1da00-\x1da36] + | [\x1da3b-\x1da6c] + | \x1da75 + | \x1da84 + | [\x1da9b-\x1da9f] + | [\x1daa1-\x1daaf] + | [\x1df00-\x1df1e] + | [\x1df25-\x1df2a] + | [\x1e000-\x1e006] + | [\x1e008-\x1e018] + | [\x1e01b-\x1e021] + | [\x1e023-\x1e024] + | [\x1e026-\x1e02a] + | [\x1e030-\x1e06d] + | \x1e08f + | [\x1e100-\x1e12c] + | [\x1e130-\x1e13d] + | [\x1e140-\x1e149] + | \x1e14e + | [\x1e290-\x1e2ae] + | [\x1e2c0-\x1e2f9] + | [\x1e4d0-\x1e4f9] + | [\x1e7e0-\x1e7e6] + | [\x1e7e8-\x1e7eb] + | [\x1e7ed-\x1e7ee] + | [\x1e7f0-\x1e7fe] + | [\x1e800-\x1e8c4] + | [\x1e8d0-\x1e8d6] + | [\x1e900-\x1e94b] + | [\x1e950-\x1e959] + | [\x1ee00-\x1ee03] + | [\x1ee05-\x1ee1f] + | [\x1ee21-\x1ee22] + | \x1ee24 + | \x1ee27 + | [\x1ee29-\x1ee32] + | [\x1ee34-\x1ee37] + | \x1ee39 + | \x1ee3b + | \x1ee42 + | \x1ee47 + | \x1ee49 + | \x1ee4b + | [\x1ee4d-\x1ee4f] + | [\x1ee51-\x1ee52] + | \x1ee54 + | \x1ee57 + | \x1ee59 + | \x1ee5b + | \x1ee5d + | \x1ee5f + | [\x1ee61-\x1ee62] + | \x1ee64 + | [\x1ee67-\x1ee6a] + | [\x1ee6c-\x1ee72] + | [\x1ee74-\x1ee77] + | [\x1ee79-\x1ee7c] + | \x1ee7e + | [\x1ee80-\x1ee89] + | [\x1ee8b-\x1ee9b] + | [\x1eea1-\x1eea3] + | [\x1eea5-\x1eea9] + | [\x1eeab-\x1eebb] + | [\x1fbf0-\x1fbf9] + | [\x20000-\x2a6df] + | [\x2a700-\x2b739] + | [\x2b740-\x2b81d] + | [\x2b820-\x2cea1] + | [\x2ceb0-\x2ebe0] + | [\x2ebf0-\x2ee5d] + | [\x2f800-\x2fa1d] + | [\x30000-\x3134a] + | [\x31350-\x323af] + | [\xe0100-\xe01ef] + +-- End of code generated by "scripts/unicode.py". + +-- See https://github.com/rust-lang/rust/blob/ac77e88f7a84e20311f5518e34c806503d586c1c/compiler/rustc_lexer/src/lib.rs#L313-L326 +@id_start = "_" | @XID_Start +@id_continue = @XID_Continue + +@ident = @id_start @id_continue* @raw_ident = r \# @ident @lifetime = \' @ident diff --git a/test/unit-tests/LexerTest.hs b/test/unit-tests/LexerTest.hs index 445d197..10f796c 100644 --- a/test/unit-tests/LexerTest.hs +++ b/test/unit-tests/LexerTest.hs @@ -122,6 +122,10 @@ commonCode = testGroup "lexing common code fragments" , LiteralTok (IntegerTok "1") Nothing ] + -- Unicode characters that require surrogate pairs to encode in UTF-16. These + -- serve as regression tests for issue #3. + , testCode "𝑂_𝑂" [ IdentTok (mkIdent "𝑂_𝑂") ] + , testCode "𐌝" [ IdentTok (mkIdent "𐌝") ] ]