Skip to content

Commit

Permalink
Merge pull request #2191 from GaloisInc/2181-comments
Browse files Browse the repository at this point in the history
Fix line vs. block comment interaction in the saw-script lexer
  • Loading branch information
sauclovian-g authored Jan 23, 2025
2 parents bb7b50d + 1595f04 commit 97bcdf4
Show file tree
Hide file tree
Showing 23 changed files with 271 additions and 35 deletions.
5 changes: 5 additions & 0 deletions intTests/test2181/test.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
set -e

$SAW test1.saw
$SAW test2.saw
! $SAW test3.saw
2 changes: 2 additions & 0 deletions intTests/test2181/test1.saw
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
/* // */

3 changes: 3 additions & 0 deletions intTests/test2181/test2.saw
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
// comments do nest

/* a /* b */ c */
1 change: 1 addition & 0 deletions intTests/test2181/test3.saw
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
/* unclosed comment
3 changes: 3 additions & 0 deletions intTests/test_parse_errors/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
*.rawlog
*.log
*.diff
2 changes: 2 additions & 0 deletions intTests/test_parse_errors/lexer001.log.good
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
Loading file "lexer001.saw"
lexer001.saw:1:47-1:47: Missing newline at end of file
1 change: 1 addition & 0 deletions intTests/test_parse_errors/lexer001.saw
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
// line comment with no newline at end of file
5 changes: 5 additions & 0 deletions intTests/test_parse_errors/lexer002.log.good
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
Loading file "lexer002.saw"
Stack trace:
lexer002.saw:1:1-1:3: Unclosed block comment

FAILED
3 changes: 3 additions & 0 deletions intTests/test_parse_errors/lexer002.saw
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
/*
* Unclosed comment
*
3 changes: 3 additions & 0 deletions intTests/test_parse_errors/lexer003.log.good
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
Loading file "lexer003.saw"
Parse error at lexer003.saw:9:20-9:22: Unexpected `*/'
FAILED
11 changes: 11 additions & 0 deletions intTests/test_parse_errors/lexer003.saw
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
// Currently comment delimiters in Cryptol blocks are not recognized
// by the saw-script lexer; they're treated as part of the block and
// handled by the Cryptol lexer downstream. This has some odd
// consequences. See issue #2189.
//
// This currently fails with an unexpected end-of-comment because the
// open-comment isn't seen.

let x = {{ /* 3 }} */;


3 changes: 3 additions & 0 deletions intTests/test_parse_errors/lexer004.log.good
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
Loading file "lexer004.saw"
Parse error at lexer004.saw:9:20-9:21: Unexpected `+'
FAILED
11 changes: 11 additions & 0 deletions intTests/test_parse_errors/lexer004.saw
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
// Currently comment delimiters in Cryptol blocks are not recognized
// by the saw-script lexer; they're treated as part of the block and
// handled by the Cryptol lexer downstream. This has some odd
// consequences. See issue #2189.
//
// This currently fails with "unexpected +" because the // isn't
// seen.

let x = {{ // 3 }} + 4;


5 changes: 5 additions & 0 deletions intTests/test_parse_errors/lexer005.log.good
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
Loading file "lexer005.saw"
Stack trace:
lexer005.saw:9:9-9:11: Unclosed block comment

FAILED
11 changes: 11 additions & 0 deletions intTests/test_parse_errors/lexer005.saw
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
// Currently comment delimiters in Cryptol blocks are not recognized
// by the saw-script lexer; they're treated as part of the block and
// handled by the Cryptol lexer downstream. This has some odd
// consequences. See issue #2189.
//
// This currently fails with an unclosed block comment, because the
// end-comment inside the Cryptol block isn't matched.

let x = /* {{ */ 2 }};


3 changes: 3 additions & 0 deletions intTests/test_parse_errors/lexer006.log.good
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
Loading file "lexer006.saw"
Parse error at lexer006.saw:12:1-12:2: Unexpected `+'
FAILED
15 changes: 15 additions & 0 deletions intTests/test_parse_errors/lexer006.saw
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
// Currently comment delimiters in Cryptol blocks are not recognized
// by the saw-script lexer; they're treated as part of the block and
// handled by the Cryptol lexer downstream. This has some odd
// consequences. See issue #2189.
//
// This currently fails with "unexpected +" because the whole {{ }}
// is commented out and the comment only ends after _then_ seeing a
// newline.

let x = 3 // {{
}} *
+ 5;



1 change: 1 addition & 0 deletions intTests/test_parse_errors/test.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
exec ${TEST_SHELL:-bash} ../support/test-and-diff.sh "$@"
16 changes: 14 additions & 2 deletions saw/SAWScript/REPL/Command.hs
Original file line number Diff line number Diff line change
Expand Up @@ -165,7 +165,13 @@ typeOfCmd :: String -> REPL ()
typeOfCmd str
| null str = do io $ putStrLn $ "[error] :type requires an argument"
| otherwise =
do let tokens = SAWScript.Lexer.lexSAW replFileName (Text.pack str)
do let (tokens, optmsg) = SAWScript.Lexer.lexSAW replFileName (Text.pack str)
case optmsg of
Nothing -> return ()
Just (_vrb, pos, msg) -> do
-- XXX wrap printing of positions in the message-printing infrastructure
let msg' = show pos ++ ": " ++ Text.unpack msg
io $ putStrLn msg'
expr <- case SAWScript.Parser.parseExpression tokens of
Left err -> fail (show err)
Right expr -> return expr
Expand Down Expand Up @@ -247,7 +253,13 @@ caveats:
we also hang onto the results and use them to seed the interpreter. -}
sawScriptCmd :: String -> REPL ()
sawScriptCmd str = do
let tokens = SAWScript.Lexer.lexSAW replFileName (Text.pack str)
let (tokens, optmsg) = SAWScript.Lexer.lexSAW replFileName (Text.pack str)
case optmsg of
Nothing -> return ()
Just (_vrb, pos, msg) -> do
-- XXX wrap printing of positions in the message-printing infrastructure
let msg' = show pos ++ ": " ++ Text.unpack msg
io $ putStrLn msg'
case SAWScript.Parser.parseStmtSemi tokens of
Left err -> io $ print err
Right stmt ->
Expand Down
31 changes: 22 additions & 9 deletions src/SAWScript/Import.hs
Original file line number Diff line number Diff line change
Expand Up @@ -12,26 +12,39 @@ module SAWScript.Import
, findAndLoadFile
) where

import qualified Data.Text.IO as TextIO (readFile)
import qualified Data.Text as Text
import Control.Exception
import System.Directory

import SAWScript.Position (Pos)
import SAWScript.AST
import SAWScript.Lexer (lexSAW)
import SAWScript.Options
import SAWScript.Parser

import qualified Data.Text.IO as TextIO (readFile)
import Data.Text (Text)
import System.Directory
import Control.Exception
import SAWScript.Token (Token)

loadFile :: Options -> FilePath -> IO [Stmt]
loadFile opts fname = do
printOutLn opts Info $ "Loading file " ++ show fname
ftext <- TextIO.readFile fname
either throwIO return (parseFile fname ftext)
let (tokens, optmsg) = lexSAW fname ftext
case optmsg of
Nothing -> return ()
Just (vrb, pos, txt) -> do
-- XXX: the print functions should take care of printing the position
-- (clean this up when we clean out the printing infrastructure)
let txt' = show pos ++ ": " ++ Text.unpack txt
-- XXX: the print functions should also take care of exiting on an error
-- (immediately or later). For now, throw errors and print anything else.
case vrb of
Error -> throwIO $ userError txt'
_ -> printOutLn opts vrb txt'
either throwIO return (parseFile tokens)

parseFile :: FilePath -> Text -> Either ParseError [Stmt]
parseFile fname input =
case parseModule (lexSAW fname input) of
parseFile :: [Token Pos] -> Either ParseError [Stmt]
parseFile tokens = do
case parseModule tokens of
Left err -> Left err
Right stmts -> Right stmts

Expand Down
14 changes: 12 additions & 2 deletions src/SAWScript/Interpreter.hs
Original file line number Diff line number Diff line change
Expand Up @@ -897,8 +897,18 @@ print_value v = do

readSchema :: Text -> SS.Schema
readSchema str =
case parseSchema (lexSAW "internal" str) of
Left err -> error (show err)
let croak what msg =
error (what ++ " error in builtin " ++ Text.unpack str ++ ": " ++ msg)
tokens =
let (tokens', optmsg) = lexSAW "internal" str in
-- XXX clean this up when we clean out the message printing infrastructure
case optmsg of
Just (Error, _pos, msg) -> croak "Lexer" $ Text.unpack msg
Just (_, _pos, _msg) -> tokens' -- ignore warnings for now
Nothing -> tokens'
in
case parseSchema tokens of
Left err -> croak "Parse" $ show err
Right schema -> schema

data PrimType
Expand Down
126 changes: 106 additions & 20 deletions src/SAWScript/Lexer.x
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ module SAWScript.Lexer
, lexSAW
) where

import SAWScript.Options (Verbosity(..))
import SAWScript.Token
import SAWScript.Panic (panic)
import SAWScript.Position
Expand All @@ -22,6 +23,7 @@ import Numeric (readInt)
import Data.Char (ord)
import qualified Data.Char as Char
import Data.List
import Data.Text (Text)
import qualified Data.Text as Text
import Data.Text (Text)
import Data.Word (Word8)
Expand All @@ -38,7 +40,8 @@ $unispace = \x5
$uniother = \x6
$unitick = \x7

$whitechar = [\ \t\n\r\f\v $unispace]
$whitechar = [\ \t\r\f\v $unispace]
$gapchar = [$whitechar \n]
$special = [\(\)\,\;\[\]\`\{\}]
$digit = 0-9
$large = [A-Z $uniupper]
Expand All @@ -51,7 +54,7 @@ $octit = 0-7
$hexit = [0-9 A-F a-f]
$idfirst = [$alpha \_]
$idchar = [$alpha $digit $unidigit $unitick \' \_]
$codechar = [$graphic $whitechar]
$codechar = [$graphic $whitechar \n]

@reservedid = import|and|let|rec|in|do|if|then|else|as|hiding|typedef
|CryptolSetup|JavaSetup|LLVMSetup|MIRSetup|ProofScript|TopLevel|CrucibleSetup
Expand All @@ -75,18 +78,19 @@ $cntrl = [$large \@\[\\\]\^\_]
| SUB | ESC | FS | GS | RS | US | SP | DEL
$charesc = [abfnrtv\\\"\'\&]
@escape = \\ ($charesc | @ascii | @decimal | o @octal | x @hexadecimal)
@gap = \\ $whitechar+ \\
@gap = \\ $gapchar+ \\
@string = $graphic # [\"\\] | " " | @escape | @gap
@code = ($codechar # \}) | \} ($codechar # \})
@ctype = ($codechar # \|) | \| ($codechar # \})
@num = @decimal | 0[bB] @binary | 0[oO] @octal | 0[xX] @hexadecimal

sawTokens :-

$white+ ;
"//".* ;
"/*" { plain TCmntS }
"*/" { plain TCmntE }
\n { plain TEOL }
"//" { plain TCommentL }
"/*" { plain TCommentS }
"*/" { plain TCommentE }
$whitechar+ ;
@reservedid { plain TReserved }
@punct { plain TPunct }
@reservedop { plain TOp }
Expand Down Expand Up @@ -252,23 +256,105 @@ scanTokens filename str = go (startPos, str)
in
act pos' text : go inp'

-- postprocess to drop comments (this allows comments to be nested)
dropComments :: [Token Pos] -> [Token Pos]
dropComments = go 0
where go :: Int -> [Token Pos] -> [Token Pos]
go _ [] = []
go !i (TCmntS _ _ : ts) = go (i+1) ts
go !i (TCmntE _ _ : ts)
| i > 0 = go (i-1) ts
go !i (t : ts)
| i /= 0 = go i ts
| True = t : go i ts
-- | State for processing comments.
--
-- CNone is the ground state (not in a comment)
--
-- CBlock startpos n is when we're within a block comment starting at
-- startpos, and n is the number of additional nested block comments
-- that have been opened.
--
-- CLine is when we're in a line (//-type) comment.
data CommentState = CNone | CBlock Pos !Int | CLine

-- | Type to hold a diagnostic message (error or warning).
--
-- This should cease to be needed once we clean out the message
-- printing infrastructure. It's here only to shorten the type
-- signatures below, and represents the possibility of having
-- generated a message.
type OptMsg = Maybe (Verbosity, Pos, Text)

-- | Postprocess to drop comments; this allows block comments to nest.
--
-- Also returns a message (error or warning) along with the updated
-- token list if there's an unclosed comment when we reach EOF. Since
-- we aren't in a monad here and can't print, dispatching the message
-- is the caller's responsibility.
dropComments :: [Token Pos] -> ([Token Pos], OptMsg)
dropComments = go CNone
where go :: CommentState -> [Token Pos] -> ([Token Pos], OptMsg)
go state ts = case ts of
[] ->
-- should always see TEOF before we hit the end
panic "Lexer" ["dropComments: tokens ran out"]
TEOF _ _ : _ : _ ->
-- should only see TEOF at the end of the list
panic "Lexer" ["dropComments: misplaced EOF in tokens"]

t : ts' ->
let take state' =
let (results, optmsg) = go state' ts' in
(t : results, optmsg)
drop state' = go state' ts'
finish = ([t], Nothing)
finishWith msg = ([t], Just msg)
in
case state of
CNone -> case t of
TEOF _ _ ->
-- plain EOF
finish
TCommentS pos _ ->
-- open block-comment
drop (CBlock pos 0)
TCommentL _ _ ->
-- begin line-comment
drop CLine
TEOL _ _ ->
-- ordinary EOL, doesn't need to be seen downstream; drop it
drop CNone
_ ->
-- uncommented token, keep it
take CNone

CBlock startpos depth -> case t of
TEOF pos _ ->
-- EOF in /**/-comment; unclosed, which is an error
finishWith (Error, startpos, "Unclosed block comment")
TCommentS _ _ ->
-- open nested comment, keep original start position
drop (CBlock startpos (depth + 1))
TCommentE _ _
| depth == 0 ->
-- end outer block comment
drop CNone
| otherwise ->
-- end nested block comment
drop (CBlock startpos (depth - 1))
_ ->
-- anything else in a block comment, drop it
-- (this includes any TCommentLs that come through)
drop state

CLine -> case t of
TEOF pos _ ->
-- EOF in //-comment; missing a newline
finishWith (Warn, pos, "Missing newline at end of file")
TEOL _ _ ->
-- EOL ending a line comment, drop it
-- (EOLs aren't sent downstream)
drop CNone
_ ->
-- anything else in a line comment, drop it
-- (this includes any TCommentS/TCommentE that appear)
drop CLine

-- entry point
lexSAW :: FilePath -> Text -> [Token Pos]
lexSAW :: FilePath -> Text -> ([Token Pos], OptMsg)
lexSAW f text = dropComments $ scanTokens f text

-- alternate monadic entry point (XXX: does this have any value?)
scan :: Monad m => FilePath -> Text -> m [Token Pos]
scan :: Monad m => FilePath -> Text -> m ([Token Pos], OptMsg)
scan f = return . lexSAW f
}
Loading

0 comments on commit 97bcdf4

Please sign in to comment.