From 6e2603519903c925006d934d48f86fc072d07b4a Mon Sep 17 00:00:00 2001 From: David Holland Date: Tue, 14 Jan 2025 21:17:48 -0500 Subject: [PATCH 1/4] Lexer: rearrange comment handling. Don't have the lexer match //.*; just match the // part, and extend the same logic that handles dropping the bits between /* and */ to also handle dropping the bits between // and EOL. This requires materializing EOL tokens instead of treating them as whitespace, but that costs ~nothing. Also fix it so we generate explicit messages when there are unclosed comments. A // comment that doesn't have an EOL before EOF generates a warning; a /* comment without a matching */ generates an error. The position for this error is the comment start. (The previous behavior was that unclosed comments commented out the EOF token and that would cause a non-specific "parse error".) Update the lexer call sites to post the messages. There should not be four separate lexer call sites, but that's a problem for a future branch. Add some tests. This includes a new test_parse_errors area akin to the existing test_type_errors. --- intTests/test2181/test.sh | 5 + intTests/test2181/test1.saw | 2 + intTests/test2181/test2.saw | 3 + intTests/test2181/test3.saw | 1 + intTests/test_parse_errors/.gitignore | 3 + intTests/test_parse_errors/lexer001.log.good | 2 + intTests/test_parse_errors/lexer001.saw | 1 + intTests/test_parse_errors/lexer002.log.good | 5 + intTests/test_parse_errors/lexer002.saw | 3 + intTests/test_parse_errors/test.sh | 1 + saw/SAWScript/REPL/Command.hs | 16 ++- src/SAWScript/Import.hs | 31 ++++-- src/SAWScript/Interpreter.hs | 14 ++- src/SAWScript/Lexer.x | 107 +++++++++++++++---- src/SAWScript/Token.hs | 9 +- 15 files changed, 169 insertions(+), 34 deletions(-) create mode 100644 intTests/test2181/test.sh create mode 100644 intTests/test2181/test1.saw create mode 100644 intTests/test2181/test2.saw create mode 100644 intTests/test2181/test3.saw create mode 100644 intTests/test_parse_errors/.gitignore create mode 100644 intTests/test_parse_errors/lexer001.log.good create mode 100644 intTests/test_parse_errors/lexer001.saw create mode 100644 intTests/test_parse_errors/lexer002.log.good create mode 100644 intTests/test_parse_errors/lexer002.saw create mode 100644 intTests/test_parse_errors/test.sh diff --git a/intTests/test2181/test.sh b/intTests/test2181/test.sh new file mode 100644 index 000000000..e4d5a3dac --- /dev/null +++ b/intTests/test2181/test.sh @@ -0,0 +1,5 @@ +set -e + +$SAW test1.saw +$SAW test2.saw +! $SAW test3.saw diff --git a/intTests/test2181/test1.saw b/intTests/test2181/test1.saw new file mode 100644 index 000000000..a2cc0ea49 --- /dev/null +++ b/intTests/test2181/test1.saw @@ -0,0 +1,2 @@ +/* // */ + diff --git a/intTests/test2181/test2.saw b/intTests/test2181/test2.saw new file mode 100644 index 000000000..aa294e3bb --- /dev/null +++ b/intTests/test2181/test2.saw @@ -0,0 +1,3 @@ +// comments do nest + +/* a /* b */ c */ diff --git a/intTests/test2181/test3.saw b/intTests/test2181/test3.saw new file mode 100644 index 000000000..7d5e04660 --- /dev/null +++ b/intTests/test2181/test3.saw @@ -0,0 +1 @@ +/* unclosed comment diff --git a/intTests/test_parse_errors/.gitignore b/intTests/test_parse_errors/.gitignore new file mode 100644 index 000000000..4619758de --- /dev/null +++ b/intTests/test_parse_errors/.gitignore @@ -0,0 +1,3 @@ +*.rawlog +*.log +*.diff diff --git a/intTests/test_parse_errors/lexer001.log.good b/intTests/test_parse_errors/lexer001.log.good new file mode 100644 index 000000000..8ca6fbb9a --- /dev/null +++ b/intTests/test_parse_errors/lexer001.log.good @@ -0,0 +1,2 @@ + Loading file "lexer001.saw" + lexer001.saw:1:47-1:47: Missing newline at end of file diff --git a/intTests/test_parse_errors/lexer001.saw b/intTests/test_parse_errors/lexer001.saw new file mode 100644 index 000000000..2e05ab5ec --- /dev/null +++ b/intTests/test_parse_errors/lexer001.saw @@ -0,0 +1 @@ +// line comment with no newline at end of file \ No newline at end of file diff --git a/intTests/test_parse_errors/lexer002.log.good b/intTests/test_parse_errors/lexer002.log.good new file mode 100644 index 000000000..c4d5b0e6a --- /dev/null +++ b/intTests/test_parse_errors/lexer002.log.good @@ -0,0 +1,5 @@ + Loading file "lexer002.saw" + Stack trace: +lexer002.saw:1:1-1:3: Unclosed block comment + +FAILED diff --git a/intTests/test_parse_errors/lexer002.saw b/intTests/test_parse_errors/lexer002.saw new file mode 100644 index 000000000..bde3ba771 --- /dev/null +++ b/intTests/test_parse_errors/lexer002.saw @@ -0,0 +1,3 @@ +/* + * Unclosed comment + * diff --git a/intTests/test_parse_errors/test.sh b/intTests/test_parse_errors/test.sh new file mode 100644 index 000000000..5fcd79d0a --- /dev/null +++ b/intTests/test_parse_errors/test.sh @@ -0,0 +1 @@ +exec ${TEST_SHELL:-bash} ../support/test-and-diff.sh "$@" diff --git a/saw/SAWScript/REPL/Command.hs b/saw/SAWScript/REPL/Command.hs index 6605fb37f..91f42373e 100644 --- a/saw/SAWScript/REPL/Command.hs +++ b/saw/SAWScript/REPL/Command.hs @@ -165,7 +165,13 @@ typeOfCmd :: String -> REPL () typeOfCmd str | null str = do io $ putStrLn $ "[error] :type requires an argument" | otherwise = - do let tokens = SAWScript.Lexer.lexSAW replFileName (Text.pack str) + do let (tokens, optmsg) = SAWScript.Lexer.lexSAW replFileName (Text.pack str) + case optmsg of + Nothing -> return () + Just (_vrb, pos, msg) -> do + -- XXX wrap printing of positions in the message-printing infrastructure + let msg' = show pos ++ ": " ++ Text.unpack msg + io $ putStrLn msg' expr <- case SAWScript.Parser.parseExpression tokens of Left err -> fail (show err) Right expr -> return expr @@ -247,7 +253,13 @@ caveats: we also hang onto the results and use them to seed the interpreter. -} sawScriptCmd :: String -> REPL () sawScriptCmd str = do - let tokens = SAWScript.Lexer.lexSAW replFileName (Text.pack str) + let (tokens, optmsg) = SAWScript.Lexer.lexSAW replFileName (Text.pack str) + case optmsg of + Nothing -> return () + Just (_vrb, pos, msg) -> do + -- XXX wrap printing of positions in the message-printing infrastructure + let msg' = show pos ++ ": " ++ Text.unpack msg + io $ putStrLn msg' case SAWScript.Parser.parseStmtSemi tokens of Left err -> io $ print err Right stmt -> diff --git a/src/SAWScript/Import.hs b/src/SAWScript/Import.hs index b25fd2627..5e1b800e2 100644 --- a/src/SAWScript/Import.hs +++ b/src/SAWScript/Import.hs @@ -12,26 +12,39 @@ module SAWScript.Import , findAndLoadFile ) where +import qualified Data.Text.IO as TextIO (readFile) +import qualified Data.Text as Text +import Control.Exception +import System.Directory +import SAWScript.Position (Pos) import SAWScript.AST import SAWScript.Lexer (lexSAW) import SAWScript.Options import SAWScript.Parser - -import qualified Data.Text.IO as TextIO (readFile) -import Data.Text (Text) -import System.Directory -import Control.Exception +import SAWScript.Token (Token) loadFile :: Options -> FilePath -> IO [Stmt] loadFile opts fname = do printOutLn opts Info $ "Loading file " ++ show fname ftext <- TextIO.readFile fname - either throwIO return (parseFile fname ftext) + let (tokens, optmsg) = lexSAW fname ftext + case optmsg of + Nothing -> return () + Just (vrb, pos, txt) -> do + -- XXX: the print functions should take care of printing the position + -- (clean this up when we clean out the printing infrastructure) + let txt' = show pos ++ ": " ++ Text.unpack txt + -- XXX: the print functions should also take care of exiting on an error + -- (immediately or later). For now, throw errors and print anything else. + case vrb of + Error -> throwIO $ userError txt' + _ -> printOutLn opts vrb txt' + either throwIO return (parseFile tokens) -parseFile :: FilePath -> Text -> Either ParseError [Stmt] -parseFile fname input = - case parseModule (lexSAW fname input) of +parseFile :: [Token Pos] -> Either ParseError [Stmt] +parseFile tokens = do + case parseModule tokens of Left err -> Left err Right stmts -> Right stmts diff --git a/src/SAWScript/Interpreter.hs b/src/SAWScript/Interpreter.hs index f8eabba88..e09427492 100644 --- a/src/SAWScript/Interpreter.hs +++ b/src/SAWScript/Interpreter.hs @@ -897,8 +897,18 @@ print_value v = do readSchema :: Text -> SS.Schema readSchema str = - case parseSchema (lexSAW "internal" str) of - Left err -> error (show err) + let croak what msg = + error (what ++ " error in builtin " ++ Text.unpack str ++ ": " ++ msg) + tokens = + let (tokens', optmsg) = lexSAW "internal" str in + -- XXX clean this up when we clean out the message printing infrastructure + case optmsg of + Just (Error, _pos, msg) -> croak "Lexer" $ Text.unpack msg + Just (_, _pos, _msg) -> tokens' -- ignore warnings for now + Nothing -> tokens' + in + case parseSchema tokens of + Left err -> croak "Parse" $ show err Right schema -> schema data PrimType diff --git a/src/SAWScript/Lexer.x b/src/SAWScript/Lexer.x index b702ca905..bd0ab252a 100644 --- a/src/SAWScript/Lexer.x +++ b/src/SAWScript/Lexer.x @@ -13,6 +13,7 @@ module SAWScript.Lexer , lexSAW ) where +import SAWScript.Options (Verbosity(..)) import SAWScript.Token import SAWScript.Panic (panic) import SAWScript.Position @@ -22,6 +23,7 @@ import Numeric (readInt) import Data.Char (ord) import qualified Data.Char as Char import Data.List +import Data.Text (Text) import qualified Data.Text as Text import Data.Text (Text) import Data.Word (Word8) @@ -38,7 +40,8 @@ $unispace = \x5 $uniother = \x6 $unitick = \x7 -$whitechar = [\ \t\n\r\f\v $unispace] +$whitechar = [\ \t\r\f\v $unispace] +$gapchar = [$whitechar \n] $special = [\(\)\,\;\[\]\`\{\}] $digit = 0-9 $large = [A-Z $uniupper] @@ -51,7 +54,7 @@ $octit = 0-7 $hexit = [0-9 A-F a-f] $idfirst = [$alpha \_] $idchar = [$alpha $digit $unidigit $unitick \' \_] -$codechar = [$graphic $whitechar] +$codechar = [$graphic $whitechar \n] @reservedid = import|and|let|rec|in|do|if|then|else|as|hiding|typedef |CryptolSetup|JavaSetup|LLVMSetup|MIRSetup|ProofScript|TopLevel|CrucibleSetup @@ -75,7 +78,7 @@ $cntrl = [$large \@\[\\\]\^\_] | SUB | ESC | FS | GS | RS | US | SP | DEL $charesc = [abfnrtv\\\"\'\&] @escape = \\ ($charesc | @ascii | @decimal | o @octal | x @hexadecimal) -@gap = \\ $whitechar+ \\ +@gap = \\ $gapchar+ \\ @string = $graphic # [\"\\] | " " | @escape | @gap @code = ($codechar # \}) | \} ($codechar # \}) @ctype = ($codechar # \|) | \| ($codechar # \}) @@ -83,10 +86,11 @@ $charesc = [abfnrtv\\\"\'\&] sawTokens :- -$white+ ; -"//".* ; -"/*" { plain TCmntS } -"*/" { plain TCmntE } +\n { plain TEOL } +"//" { plain TCommentL } +"/*" { plain TCommentS } +"*/" { plain TCommentE } +$whitechar+ ; @reservedid { plain TReserved } @punct { plain TPunct } @reservedop { plain TOp } @@ -252,23 +256,88 @@ scanTokens filename str = go (startPos, str) in act pos' text : go inp' +-- state for processing comments +data CommentState = CNone | CBlock Pos !Int | CLine + +-- this should cease to be needed once we clean out the message +-- printing infrastructure +type OptMsg = Maybe (Verbosity, Pos, Text) + -- postprocess to drop comments (this allows comments to be nested) -dropComments :: [Token Pos] -> [Token Pos] -dropComments = go 0 - where go :: Int -> [Token Pos] -> [Token Pos] - go _ [] = [] - go !i (TCmntS _ _ : ts) = go (i+1) ts - go !i (TCmntE _ _ : ts) - | i > 0 = go (i-1) ts - go !i (t : ts) - | i /= 0 = go i ts - | True = t : go i ts +dropComments :: [Token Pos] -> ([Token Pos], OptMsg) +dropComments = go CNone + where go :: CommentState -> [Token Pos] -> ([Token Pos], OptMsg) + go state ts = case ts of + [] -> + -- should always see TEOF before we hit the end + panic "Lexer" ["dropComments: tokens ran out"] + TEOF _ _ : _ : _ -> + -- should only see TEOF at the end of the list + panic "Lexer" ["dropComments: misplaced EOF in tokens"] + + t : ts' -> + let take state' = + let (results, optmsg) = go state' ts' in + (t : results, optmsg) + drop state' = go state' ts' + finish = ([t], Nothing) + finishWith msg = ([t], Just msg) + in + case state of + CNone -> case t of + TEOF _ _ -> + -- plain EOF + finish + TCommentS pos _ -> + -- open block-comment + drop (CBlock pos 0) + TCommentL _ _ -> + -- begin line-comment + drop CLine + TEOL _ _ -> + -- ordinary EOL, doesn't need to be seen downstream; drop it + drop CNone + _ -> + -- uncommented token, keep it + take CNone + + CBlock startpos depth -> case t of + TEOF pos _ -> + -- EOF in /**/-comment; unclosed, which is an error + finishWith (Error, startpos, "Unclosed block comment") + TCommentS _ _ -> + -- open nested comment, keep original start position + drop (CBlock startpos (depth + 1)) + TCommentE _ _ + | depth == 0 -> + -- end outer block comment + drop CNone + | otherwise -> + -- end nested block comment + drop (CBlock startpos (depth - 1)) + _ -> + -- anything else in a block comment, drop it + -- (this includes any TCommentLs that come through) + drop state + + CLine -> case t of + TEOF pos _ -> + -- EOF in //-comment; missing a newline + finishWith (Warn, pos, "Missing newline at end of file") + TEOL _ _ -> + -- EOL ending a line comment, drop it + -- (EOLs aren't sent downstream) + drop CNone + _ -> + -- anything else in a line comment, drop it + -- (this includes any TCommentS/TCommentE that appear) + drop CLine -- entry point -lexSAW :: FilePath -> Text -> [Token Pos] +lexSAW :: FilePath -> Text -> ([Token Pos], OptMsg) lexSAW f text = dropComments $ scanTokens f text -- alternate monadic entry point (XXX: does this have any value?) -scan :: Monad m => FilePath -> Text -> m [Token Pos] +scan :: Monad m => FilePath -> Text -> m ([Token Pos], OptMsg) scan f = return . lexSAW f } diff --git a/src/SAWScript/Token.hs b/src/SAWScript/Token.hs index 8a08f5392..c71138338 100644 --- a/src/SAWScript/Token.hs +++ b/src/SAWScript/Token.hs @@ -15,6 +15,9 @@ import SAWScript.Position (Positioned(..)) -- All tokens have tokStr carrying the matched text (even TEOF where -- it doesn't entirely make sense) so that tokStr can be applied to -- any variant of the type. +-- +-- The comment and EOL tokens are internal to the lexer and not seen +-- downstream. data Token p = TVar { tokPos :: p, tokStr :: Text } | TQVar { tokPos :: p, tokStr :: Text, tokVars :: ([Text], Text) } | TLit { tokPos :: p, tokStr :: Text } @@ -25,8 +28,10 @@ data Token p = TVar { tokPos :: p, tokStr :: Text | TReserved { tokPos :: p, tokStr :: Text } | TOp { tokPos :: p, tokStr :: Text } | TNum { tokPos :: p, tokStr :: Text, tokNum :: Integer } - | TCmntS { tokPos :: p, tokStr :: Text } - | TCmntE { tokPos :: p, tokStr :: Text } + | TCommentS { tokPos :: p, tokStr :: Text } + | TCommentE { tokPos :: p, tokStr :: Text } + | TCommentL { tokPos :: p, tokStr :: Text } + | TEOL { tokPos :: p, tokStr :: Text } | TEOF { tokPos :: p, tokStr :: Text } deriving (Show, Functor) From 1b942660e5872d659bb501ef4e486c00702b7e10 Mon Sep 17 00:00:00 2001 From: David Holland Date: Mon, 20 Jan 2025 19:39:10 -0500 Subject: [PATCH 2/4] Add some more tests pursuant to #2189 (interactions of comments and Cryptol blocks) Note: these reflect the current behavior, which as discussed in the issue is not particularly desirable. They should be updated when/if the behavior is improved. --- intTests/test_parse_errors/lexer003.log.good | 3 +++ intTests/test_parse_errors/lexer003.saw | 11 +++++++++++ intTests/test_parse_errors/lexer004.log.good | 3 +++ intTests/test_parse_errors/lexer004.saw | 11 +++++++++++ intTests/test_parse_errors/lexer005.log.good | 5 +++++ intTests/test_parse_errors/lexer005.saw | 11 +++++++++++ intTests/test_parse_errors/lexer006.log.good | 3 +++ intTests/test_parse_errors/lexer006.saw | 15 +++++++++++++++ 8 files changed, 62 insertions(+) create mode 100644 intTests/test_parse_errors/lexer003.log.good create mode 100644 intTests/test_parse_errors/lexer003.saw create mode 100644 intTests/test_parse_errors/lexer004.log.good create mode 100644 intTests/test_parse_errors/lexer004.saw create mode 100644 intTests/test_parse_errors/lexer005.log.good create mode 100644 intTests/test_parse_errors/lexer005.saw create mode 100644 intTests/test_parse_errors/lexer006.log.good create mode 100644 intTests/test_parse_errors/lexer006.saw diff --git a/intTests/test_parse_errors/lexer003.log.good b/intTests/test_parse_errors/lexer003.log.good new file mode 100644 index 000000000..bc84b49cf --- /dev/null +++ b/intTests/test_parse_errors/lexer003.log.good @@ -0,0 +1,3 @@ + Loading file "lexer003.saw" + Parse error at lexer003.saw:9:20-9:22: Unexpected `*/' +FAILED diff --git a/intTests/test_parse_errors/lexer003.saw b/intTests/test_parse_errors/lexer003.saw new file mode 100644 index 000000000..793c5dda6 --- /dev/null +++ b/intTests/test_parse_errors/lexer003.saw @@ -0,0 +1,11 @@ +// Currently comment delimiters in Cryptol blocks are not recognized +// by the saw-script lexer; they're treated as part of the block and +// handled by the Cryptol lexer downstream. This has some odd +// consequences. See issue #2189. +// +// This currently fails with an unexpected end-of-comment because the +// open-comment isn't seen. + +let x = {{ /* 3 }} */; + + diff --git a/intTests/test_parse_errors/lexer004.log.good b/intTests/test_parse_errors/lexer004.log.good new file mode 100644 index 000000000..b67019d2b --- /dev/null +++ b/intTests/test_parse_errors/lexer004.log.good @@ -0,0 +1,3 @@ + Loading file "lexer004.saw" + Parse error at lexer004.saw:9:20-9:21: Unexpected `+' +FAILED diff --git a/intTests/test_parse_errors/lexer004.saw b/intTests/test_parse_errors/lexer004.saw new file mode 100644 index 000000000..30d345b64 --- /dev/null +++ b/intTests/test_parse_errors/lexer004.saw @@ -0,0 +1,11 @@ +// Currently comment delimiters in Cryptol blocks are not recognized +// by the saw-script lexer; they're treated as part of the block and +// handled by the Cryptol lexer downstream. This has some odd +// consequences. See issue #2189. +// +// This currently fails with "unexpected +" because the // isn't +// seen. + +let x = {{ // 3 }} + 4; + + diff --git a/intTests/test_parse_errors/lexer005.log.good b/intTests/test_parse_errors/lexer005.log.good new file mode 100644 index 000000000..9390d8fdc --- /dev/null +++ b/intTests/test_parse_errors/lexer005.log.good @@ -0,0 +1,5 @@ + Loading file "lexer005.saw" + Stack trace: +lexer005.saw:9:9-9:11: Unclosed block comment + +FAILED diff --git a/intTests/test_parse_errors/lexer005.saw b/intTests/test_parse_errors/lexer005.saw new file mode 100644 index 000000000..a2c66ef8e --- /dev/null +++ b/intTests/test_parse_errors/lexer005.saw @@ -0,0 +1,11 @@ +// Currently comment delimiters in Cryptol blocks are not recognized +// by the saw-script lexer; they're treated as part of the block and +// handled by the Cryptol lexer downstream. This has some odd +// consequences. See issue #2189. +// +// This currently fails with an unclosed block comment, because the +// end-comment inside the Cryptol block isn't matched. + +let x = /* {{ */ 2 }}; + + diff --git a/intTests/test_parse_errors/lexer006.log.good b/intTests/test_parse_errors/lexer006.log.good new file mode 100644 index 000000000..82efbab57 --- /dev/null +++ b/intTests/test_parse_errors/lexer006.log.good @@ -0,0 +1,3 @@ + Loading file "lexer006.saw" + Parse error at lexer006.saw:12:1-12:2: Unexpected `+' +FAILED diff --git a/intTests/test_parse_errors/lexer006.saw b/intTests/test_parse_errors/lexer006.saw new file mode 100644 index 000000000..113e20f3c --- /dev/null +++ b/intTests/test_parse_errors/lexer006.saw @@ -0,0 +1,15 @@ +// Currently comment delimiters in Cryptol blocks are not recognized +// by the saw-script lexer; they're treated as part of the block and +// handled by the Cryptol lexer downstream. This has some odd +// consequences. See issue #2189. +// +// This currently fails with "unexpected +" because the whole {{ }} +// is commented out and the comment only ends after _then_ seeing a +// newline. + +let x = 3 // {{ +}} * ++ 5; + + + From 90e186710499dc61ae40ce31f66ec5807d3bd28b Mon Sep 17 00:00:00 2001 From: David Holland Date: Wed, 22 Jan 2025 16:00:56 -0500 Subject: [PATCH 3/4] lexer: Add a bunch of comments --- src/SAWScript/Lexer.x | 25 +++++++++++++++++++++---- src/SAWScript/Token.hs | 22 ++++++++++++++++++++++ 2 files changed, 43 insertions(+), 4 deletions(-) diff --git a/src/SAWScript/Lexer.x b/src/SAWScript/Lexer.x index bd0ab252a..6275cd888 100644 --- a/src/SAWScript/Lexer.x +++ b/src/SAWScript/Lexer.x @@ -256,14 +256,31 @@ scanTokens filename str = go (startPos, str) in act pos' text : go inp' --- state for processing comments +-- | State for processing comments. +-- +-- CNone is the ground state (not in a comment) +-- +-- CBlock startpos n is when we're within a block comment starting at +-- startpos, and n is the number of additional nested block comments +-- that have been opened. +-- +-- CLine is when we're in a line (//-type) comment. data CommentState = CNone | CBlock Pos !Int | CLine --- this should cease to be needed once we clean out the message --- printing infrastructure +-- | Type to hold a diagnostic message (error or warning). +-- +-- This should cease to be needed once we clean out the message +-- printing infrastructure. It's here only to shorten the type +-- signatures below, and represents the possibility of having +-- generated a message. type OptMsg = Maybe (Verbosity, Pos, Text) --- postprocess to drop comments (this allows comments to be nested) +-- | Postprocess to drop comments; this allows block comments to nest. +-- +-- Also returns a message (error or warning) along with the updated +-- token list if there's an unclosed comment when we reach EOF. Since +-- we aren't in a monad here and can't print, dispatching the message +-- is the caller's responsibility. dropComments :: [Token Pos] -> ([Token Pos], OptMsg) dropComments = go CNone where go :: CommentState -> [Token Pos] -> ([Token Pos], OptMsg) diff --git a/src/SAWScript/Token.hs b/src/SAWScript/Token.hs index c71138338..ccb914bac 100644 --- a/src/SAWScript/Token.hs +++ b/src/SAWScript/Token.hs @@ -12,12 +12,34 @@ import Data.Text (Text) import SAWScript.Position (Positioned(..)) +-- | Lexer tokens for saw-script. +-- -- All tokens have tokStr carrying the matched text (even TEOF where -- it doesn't entirely make sense) so that tokStr can be applied to -- any variant of the type. -- -- The comment and EOL tokens are internal to the lexer and not seen -- downstream. +-- +-- The tokens are: +-- TVar variable/identifier +-- TQVar qualified variable/identifier +-- TLit string constant +-- TCode Cryptol code block +-- TCType Cryptol type bllock +-- TUnknown Anything else that doesn't fit +-- TPunct Punctuation tokens used in the grammar +-- TReserved All reserved words +-- TOp Punctuation tokens apparently not used in the grammar +-- TNum Number +-- TCommentS Start of a block comment +-- TCommentE End of a block comment +-- TCommentL Start of a line comment +-- EOL End of line +-- EOF End of file/input +-- +-- FUTURE: many of these could stand to be renamed +-- data Token p = TVar { tokPos :: p, tokStr :: Text } | TQVar { tokPos :: p, tokStr :: Text, tokVars :: ([Text], Text) } | TLit { tokPos :: p, tokStr :: Text } From 1595f041ba57bbe44e352729fdeadfc7d3d2cb75 Mon Sep 17 00:00:00 2001 From: David Holland Date: Wed, 22 Jan 2025 16:17:37 -0500 Subject: [PATCH 4/4] Typo in previous --- src/SAWScript/Token.hs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/SAWScript/Token.hs b/src/SAWScript/Token.hs index ccb914bac..da96034d6 100644 --- a/src/SAWScript/Token.hs +++ b/src/SAWScript/Token.hs @@ -35,8 +35,8 @@ import SAWScript.Position (Positioned(..)) -- TCommentS Start of a block comment -- TCommentE End of a block comment -- TCommentL Start of a line comment --- EOL End of line --- EOF End of file/input +-- TEOL End of line +-- TEOF End of file/input -- -- FUTURE: many of these could stand to be renamed --