-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathTokenise.hs
208 lines (168 loc) · 6.4 KB
/
Tokenise.hs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
{-# LANGUAGE FlexibleContexts #-}
module Tokenise (tokenise,
public,
let_,
in_,
case_,
of_,
equals,
rightArrow,
plus,
minus,
asterisk,
slash,
leftAngle,
rightAngle,
leftAngleEquals,
rightAngleEquals,
bangEquals,
doubleEquals,
comma,
openParen,
closeParen,
ident,
underscore,
number,
eol) where
import Data.Functor.Identity (Identity)
import Text.Parsec
data Token = Public
| Let
| In
| Case
| Of
| Equals
| RightArrow
| Plus
| Minus
| Asterisk
| Slash
| LeftAngle
| RightAngle
| LeftAngleEquals
| RightAngleEquals
| BangEquals
| DoubleEquals
| Comma
| OpenParan
| CloseParan
| Underscore
| Ident String
| Num Integer
| EOL
| EOF
deriving (Eq, Show)
data TokenPos = TokenPos Token SourcePos
tokenise :: String -> Either String [TokenPos]
tokenise input = case parse tokeniser "" input of
Right r -> Right r
Left e -> Left $ show e
tokeniser =
do skipWSOrComment
toks <- many $ do tok <- choice [try $ symStr "public" Public,
try $ symStr "let" Let,
try $ symStr "in" In,
try $ symStr "case" Case,
try $ symStr "of" Of,
try ident_tok,
try number_tok,
try $ symStr "->" RightArrow,
try $ symStr "<=" LeftAngleEquals,
try $ symStr ">=" RightAngleEquals,
try $ symStr "!=" BangEquals,
try $ symStr "==" DoubleEquals,
sym '=' Equals,
sym '+' Plus,
sym '-' Minus,
sym '*' Asterisk,
sym '/' Slash,
sym '<' LeftAngle,
sym '>' RightAngle,
sym ',' Comma,
sym '(' OpenParan,
sym ')' CloseParan,
sym '_' Underscore,
sym '\n' EOL]
skipWSOrComment
return tok
eof
return toks
ident_tok = do first <- letter
rest <- many (letter <|> (char '_') <|> digit)
pos <- getPosition
return $ (TokenPos (Ident (first:rest)) pos)
number_tok = do num <- many1 digit
pos <- getPosition
return $ (TokenPos (Num (read num)) pos)
skipWSOrComment = skipMany (ws <|> comment)
where ws = oneOf " \t" >> return ()
comment = try $ do _ <- string "--"
_ <- manyTill anyChar endOfLine
return ()
symStr str symbol = do _ <- string str
notFollowedBy (alphaNum <|> char '_')
pos <- getPosition
return $ TokenPos symbol pos
sym :: Stream s m Char => Char -> Token -> ParsecT s u m TokenPos
sym c s = do _ <- char c
pos <- getPosition
return $ TokenPos s pos
----
hlToken t = token showToken nextPos testTok
where testTok (TokenPos x _) = if x == t then Just () else Nothing
public :: Stream s Identity TokenPos => Parsec s u ()
public = hlToken Public
let_ :: Stream s Identity TokenPos => Parsec s u ()
let_ = hlToken Let
in_ :: Stream s Identity TokenPos => Parsec s u ()
in_ = hlToken In
case_ :: Stream s Identity TokenPos => Parsec s u ()
case_ = hlToken Case
of_ :: Stream s Identity TokenPos => Parsec s u ()
of_ = hlToken Of
equals :: Stream s Identity TokenPos => Parsec s u ()
equals = hlToken Equals
rightArrow :: Stream s Identity TokenPos => Parsec s u ()
rightArrow = hlToken RightArrow
plus :: Stream s Identity TokenPos => Parsec s u ()
plus = hlToken Plus
minus :: Stream s Identity TokenPos => Parsec s u ()
minus = hlToken Minus
asterisk :: Stream s Identity TokenPos => Parsec s u ()
asterisk = hlToken Asterisk
slash :: Stream s Identity TokenPos => Parsec s u ()
slash = hlToken Slash
leftAngle :: Stream s Identity TokenPos => Parsec s u ()
leftAngle = hlToken LeftAngle
rightAngle :: Stream s Identity TokenPos => Parsec s u ()
rightAngle = hlToken RightAngle
leftAngleEquals :: Stream s Identity TokenPos => Parsec s u ()
leftAngleEquals = hlToken LeftAngleEquals
rightAngleEquals :: Stream s Identity TokenPos => Parsec s u ()
rightAngleEquals = hlToken RightAngleEquals
bangEquals :: Stream s Identity TokenPos => Parsec s u ()
bangEquals = hlToken BangEquals
doubleEquals :: Stream s Identity TokenPos => Parsec s u ()
doubleEquals = hlToken DoubleEquals
comma :: Stream s Identity TokenPos => Parsec s u ()
comma = hlToken Comma
openParen :: Stream s Identity TokenPos => Parsec s u ()
openParen = hlToken OpenParan
closeParen :: Stream s Identity TokenPos => Parsec s u ()
closeParen = hlToken CloseParan
underscore :: Stream s Identity TokenPos => Parsec s u ()
underscore = hlToken Underscore
ident :: Stream s Identity TokenPos => Parsec s u String
ident = token showToken nextPos testIdent
where testIdent (TokenPos (Ident str) _) = Just str
testIdent _ = Nothing
number :: Stream s Identity TokenPos => Parsec s u Integer
number = token showToken nextPos testNumber
where testNumber (TokenPos (Num num) _) = Just num
testNumber _ = Nothing
eol :: Stream s Identity TokenPos => Parsec s u ()
eol = hlToken EOL
showToken (TokenPos t _) = show t
nextPos (TokenPos _ p) = p
instance Show TokenPos where
show (TokenPos t _) = show t