-
Notifications
You must be signed in to change notification settings - Fork 147
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[WIP] feat(std): replace parser lib with more generic version #687
Open
Etherian
wants to merge
8
commits into
gluon-lang:master
Choose a base branch
from
Etherian:new-parser
base: master
Could not load branches
Branch not found: {{ refName }}
Loading
Could not load tags
Nothing to show
Loading
Are you sure you want to change the base?
Some commits from the old base branch may be removed from the timeline,
and old review comments may become outdated.
Open
Changes from 1 commit
Commits
Show all changes
8 commits
Select commit
Hold shift + click to select a range
9ccf284
feat(std): add indexable and streamlike; replace parser
Etherian 84067c2
Merge branch 'master' into new-parser
Etherian db99e31
style(std): cleaned up statet.glu
Etherian adac02b
feat(std): added Alternative implementation for Result
Etherian 390a59f
feat(std): added stuff to parser.glu
Etherian 8abc820
remove indexable and streamlike from parser branch
Etherian 48d845b
fix(std): fixed Result's Alternative implementation
Etherian c738f6c
fix(std): fix Result's Alternative implementation again
Etherian File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
type Indexable ind = { | ||
index : ind a -> Int -> a | ||
} | ||
|
||
let index ?ind : [Indexable ind] -> ind a -> Int -> a = ind.index | ||
|
||
{ | ||
Indexable, | ||
|
||
index, | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,295 +1,109 @@ | ||
//! A simple parser combinator library. | ||
//! A simple parser combinator library | ||
|
||
let prelude = import! std.prelude | ||
let { Functor, Applicative, Alternative, Monad } = prelude | ||
let { id, flip } = import! std.function | ||
let { Functor, Applicative, Alternative, Monad, (<>) } = prelude | ||
let { id, flip, (>>), (<<), (|>), (<|) } = import! std.function | ||
|
||
let { Bool } = import! std.bool | ||
let char @ { ? } = import! std.char | ||
let char @ { Char, ? } = import! std.char | ||
let int = import! std.int | ||
let { Result } = import! std.result | ||
let string = import! std.string | ||
let { (<>) } = import! std.prelude | ||
let list @ { List } = import! std.list | ||
let { Option } = import! std.option | ||
|
||
type OffsetString = { start : Int, end : Int, buffer : String } | ||
type Position = Int | ||
type Error = { position : Position, message : String } | ||
type ParseResult a = Result Error { value : a, buffer : OffsetString } | ||
|
||
/// `Parser` is a monad which parses a `String` into structured values | ||
type Parser a = | ||
OffsetString -> ParseResult a | ||
|
||
let parser : Parser a -> Parser a = id | ||
|
||
let functor : Functor Parser = { | ||
map = \f m -> parser (\buffer -> | ||
let result = parser m buffer | ||
match result with | ||
| Ok a -> Ok { value = f a.value, buffer = a.buffer } | ||
| Err err -> Err err) | ||
} | ||
|
||
let { map } = functor | ||
|
||
let applicative : Applicative Parser = { | ||
functor, | ||
|
||
apply = \f m -> parser (\buffer -> | ||
let result1 = parser f buffer | ||
match result1 with | ||
| Ok g -> | ||
let result2 = parser m g.buffer | ||
match result2 with | ||
| Ok a -> Ok { value = g.value a.value, buffer = a.buffer } | ||
| Err err -> Err err | ||
| Err err -> Err err), | ||
|
||
wrap = \value -> parser (\buffer -> Ok { value, buffer }), | ||
} | ||
|
||
let { (*>), (<*), wrap } = import! std.applicative | ||
|
||
let alternative : Alternative Parser = { | ||
applicative, | ||
|
||
or = \l r -> parser (\stream -> | ||
match parser l stream with | ||
| Ok a -> Ok a | ||
| Err _ -> parser r stream), | ||
empty = parser (\stream -> Err { position = stream.start, message = "empty" }), | ||
} | ||
|
||
let { (<|>) } = import! std.alternative | ||
|
||
let monad : Monad Parser = { | ||
applicative, | ||
|
||
flat_map = \f m -> parser (\buffer -> | ||
match parser m buffer with | ||
| Ok a -> parser (f a.value) a.buffer | ||
| Err err -> Err err), | ||
} | ||
|
||
let { flat_map } = import! std.monad | ||
|
||
let uncons stream : OffsetString -> Option { char : Char, rest : OffsetString } = | ||
if stream.start == stream.end then | ||
None | ||
else | ||
let c = string.char_at stream.buffer stream.start | ||
let char_len = char.len_utf8 c | ||
Some { | ||
char = c, | ||
rest = { | ||
start = stream.start + char_len, | ||
end = stream.end, | ||
buffer = stream.buffer, | ||
}, | ||
} | ||
|
||
let update_position c position : Char -> Position -> Position = | ||
position + char.len_utf8 c | ||
|
||
/// Returns `message` as what was expected by `p` | ||
#[infix(left, 0)] | ||
let (<?>) p message : Parser a -> String -> Parser a = | ||
parser (\stream -> | ||
match p stream with | ||
| Ok x -> Ok x | ||
| Err _ -> Err { position = stream.start, message }) | ||
|
||
/// Parses any character. Only errors if the stream is out of input | ||
let any : Parser Char = | ||
parser (\stream -> | ||
match uncons stream with | ||
| Some record -> | ||
let { char, rest } = record | ||
Ok { value = char, buffer = rest } | ||
| None -> Err { position = stream.start, message = "End of stream" }) | ||
|
||
/// Fails the parser with `message` as the cause | ||
let fail message : String -> Parser a = | ||
parser (\stream -> Err { position = stream.start, message }) | ||
|
||
/// Succeeds if `predicate` returns `Some`, fails if `None` is returned | ||
let satisfy_map predicate : (Char -> Option a) -> Parser a = | ||
let f c = | ||
match predicate c with | ||
| Some x -> wrap x | ||
| None -> fail ("Unexpected character " <> char.show.show c) | ||
flat_map f any | ||
|
||
/// Succeeds if `predicate` returns True, fails if `False` is returned | ||
let satisfy predicate : (Char -> Bool) -> Parser Char = | ||
satisfy_map (\c -> if predicate c then Some c else None) | ||
|
||
/// Succeeds if the next token is `expected` | ||
let token expected : Char -> Parser Char = | ||
satisfy (\c -> expected == c) | ||
|
||
/// Succeds if the next token is a letter | ||
let letter : Parser Char = satisfy char.is_alphabetic <?> "letter" | ||
|
||
/// Succeds if the next token is a digit | ||
let digit : Parser Char = satisfy (flip char.is_digit 10) <?> "digit" | ||
|
||
/// Succeds if the next token is alphanumeric | ||
let alpha_num : Parser Char = satisfy char.is_alphanumeric <?> "letter or digit" | ||
|
||
/// Succeds if the next token is a space | ||
let space : Parser Char = token ' ' | ||
|
||
/// Succeds if the next token is a tab | ||
let tab : Parser Char = token '\t' | ||
|
||
/// Parses one or more tokens passing `predicate` and returns the `String` between the start and | ||
/// end of those tokens | ||
let take1 predicate : (Char -> Bool) -> Parser String = | ||
parser (\stream -> | ||
let take_ stream2 = | ||
match uncons stream2 with | ||
| Some record -> | ||
if predicate record.char then take_ record.rest | ||
else if stream.start == stream2.start then | ||
Err { position = stream.start, message = "Unexpected token" } | ||
else Ok { | ||
value = string.slice stream.buffer stream.start stream2.start, | ||
buffer = stream2, | ||
} | ||
| None -> Ok { | ||
value = string.slice stream.buffer stream.start stream.end, | ||
buffer = stream2, | ||
} | ||
take_ stream) | ||
|
||
/// Parses zero or more tokens passing `predicate` and returns the `String` between the start and | ||
/// end of those tokens | ||
let take predicate : (Char -> Bool) -> Parser String = | ||
take1 predicate <|> wrap "" | ||
|
||
/// Parses using `p` and returns the `String` between the start and of what `p` parsed | ||
let recognize p : Parser a -> Parser String = | ||
parser (\stream -> | ||
match parser p stream with | ||
| Ok a -> | ||
Ok { | ||
value = string.slice stream.buffer stream.start a.buffer.start, | ||
buffer = a.buffer, | ||
} | ||
| Err err -> Err err) | ||
|
||
/// Skips over whitespace characters | ||
let spaces = take char.is_whitespace | ||
|
||
/// Creates a parser from a factory function. Useful to prevent mutually recursive parser from looping forever | ||
let lazy_parser f : (() -> Parser a) -> Parser a = | ||
parser (\stream -> f () stream) | ||
|
||
/// Parses `x` between `l` and `r`, returning the result of `x` | ||
let between l r x : Parser a -> Parser b -> Parser c -> Parser c = | ||
l *> x <* r | ||
|
||
rec | ||
/// Parses with `p` zero or more times | ||
let many p : Parser a -> Parser (List a) = | ||
many1 p <|> wrap Nil | ||
|
||
/// Parses with `p` one or more times | ||
let many1 p : Parser a -> Parser (List a) = | ||
do h = p | ||
map (\t -> Cons h t) (many p) | ||
in | ||
rec | ||
/// Parses with `p` zero or more times, ignoring the result of the parser | ||
let skip_many p : Parser a -> Parser () = | ||
skip_many1 p <|> wrap () | ||
/// Parses with `p` one or more times, ignoring the result of the parser | ||
let skip_many1 p : Parser a -> Parser () = | ||
seq p | ||
skip_many p | ||
in | ||
/// Parses one of the characters of `s` | ||
let one_of s : String -> Parser Char = | ||
satisfy (\first -> | ||
let len = string.len s | ||
let one_of_ i = | ||
if i == len then | ||
False | ||
let { Result } = import! std.result | ||
let streamlike @ { Streamlike, ? } = import! std.streamlike | ||
let string @ { String, ? } = import! std.string | ||
|
||
// TODO How handle atom streams generically? Stream-like interface? | ||
// TODO How handle error position generically? Zip stream with enumeration? | ||
type Parser s e a = StateT s (Result e) a | ||
// TODO How have Parser (Result Err) on surface but Streamer type (Option) internally? | ||
// How handle custom error types this way? Do I need to build mini failure lib? | ||
Etherian marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
// use implicit argument that user fills in with error type and end case? | ||
let sr_to_psr end sr : e -> Streamer s a -> Parser s e a = | ||
sr >> \opt -> match opt with | ||
| Some next -> Ok next | ||
| None -> Err end | ||
|
||
// terrible idea? | ||
let any_atom : [Streamlike atm srm] -> Parser srm e atm = | ||
sr_to_psr ? uncons // TODO Error handling | ||
|
||
// TODO mixing stream processors and stream adapters does not work | ||
let take n : [Streamlike atm srm] -> Int -> Parser srm e () = | ||
\xs -> Ok {value = (), state = streamlike.take n xs} | ||
|
||
// TODO What error type should library functions return? | ||
Etherian marked this conversation as resolved.
Show resolved
Hide resolved
|
||
let satisfy_map pred : [Streamlike atm srm] -> (atm -> Option a) -> Parser srm e a = | ||
any_atom >>= \atm -> | ||
match pred atm with | ||
| Some a -> wrap a | ||
| None -> // TODO Error handling | ||
|
||
let satisfy pred : [Streamlike atm srm] -> (atm -> Bool) -> Parser srm e atm = | ||
satisfy_map (\c -> if pred c then Some c else None) | ||
|
||
let atom x : [Streamlike atm srm] -> [Eq atm] -> atm -> Parser srm e atm = | ||
satisfy <| (==) x | ||
|
||
rec let token ts : [Streamlike atm srm] -> [Streamlike atm tknsrm] -> [Eq atm] -> tknsrm -> Parser srm e atm = | ||
any_atom >>= \x -> | ||
match any_atom ts with | ||
| Ok {value = t, state = ts'} -> | ||
if x == t then | ||
map (\_ -> ts) (token ts') | ||
else | ||
let c = string.char_at s i | ||
if first == c then True | ||
else one_of_ (i + char.len_utf8 c) | ||
one_of_ 0) | ||
<|> fail ("Expected one of `" <> s <> "`") | ||
Err ? // TODO Error handling | ||
| Err ? -> wrap ts // TODO Error handling: needs to be EndOfStream error | ||
| Err e -> (\_ -> Err e) // unreachable? | ||
|
||
// end - matches end of stream | ||
// take_while1? | ||
// many | ||
// many1 | ||
// skip_many? | ||
// between | ||
// one_of | ||
// sep_by | ||
// sep_by1 | ||
// chain | ||
// chain1 | ||
// parse | ||
|
||
type Position = Int | ||
type OffsetString = { start : Position, end : Position, buffer : String } | ||
type StringParser a = Parser OffsetString ? a // TODO Error handling | ||
|
||
/// Parses at least one element of `parser` separated by `sep` | ||
let sep_by1 parser sep : Parser a -> Parser b -> Parser (List a) = | ||
do x = parser | ||
do xs = many (sep *> parser) | ||
wrap (Cons x xs) | ||
let eolsym c : Char -> Bool = | ||
elem c ['\r', '\n'] | ||
|
||
/// Parses `parser` separated by `sep` | ||
let sep_by parser sep : Parser a -> Parser b -> Parser (List a) = | ||
sep_by1 parser sep <|> wrap Nil | ||
let space : [Streamlike Char srm] -> Parser srm e Char = | ||
atom ' ' | ||
|
||
/// Like `sep_by1` but applies the function returned by `op` on the left fold of successive parses | ||
let chainl1 p op : Parser a -> Parser (a -> a -> a) -> Parser a = | ||
do l = p | ||
let rest x = | ||
( | ||
do f = op | ||
do r = p | ||
rest (f x r)) <|> wrap x | ||
rest l | ||
let eol : [Streamlike Char srm] -> Parser srm e Char = (atom '\r' *> atom '\n') <|> atom '\n' <|> atom '\r' | ||
|
||
/// Like `sep_by` but applies the function returned by `op` on the left fold of successive parses | ||
let chainl p op v : Parser a -> Parser (a -> a -> a) -> a -> Parser a = | ||
chainl1 p op <|> wrap v | ||
let whitespace : [Streamlike Char srm] -> Parser srm e Char = | ||
satisfy char.is_whitespace | ||
|
||
let letter : [Streamlike Char srm] -> Parser srm e Char = | ||
satisfy char.is_alphabetic | ||
|
||
/// Parses `input` using `p` | ||
let parse p input : Parser a -> String -> Result String a = | ||
match p { start = 0, end = string.len input, buffer = input } with | ||
| Ok ok -> Ok ok.value | ||
| Err err -> Err (int.show.show err.position <> ":" <> err.message) | ||
let digit : [Streamlike Char srm] -> Parser srm e Char = | ||
satisfy <| flip char.is_digit 10 | ||
|
||
{ | ||
Position, Error, ParseResult, Parser, | ||
let alphanum : [Streamlike Char srm] -> Parser srm e Char = | ||
letter <|> digit | ||
|
||
functor, applicative, alternative, monad, | ||
// spaces | ||
// whitespaces? | ||
// rest of line (all but line break)? | ||
|
||
parser, | ||
type ByteParser a = // TODO | ||
|
||
any, | ||
between, | ||
token, | ||
many, | ||
many1, | ||
satisfy, | ||
satisfy_map, | ||
spaces, | ||
take1, | ||
take, | ||
lazy_parser, | ||
fail, | ||
recognize, | ||
skip_many, | ||
skip_many1, | ||
one_of, | ||
sep_by, | ||
sep_by1, | ||
chainl1, | ||
chainl, | ||
(<?>), | ||
|
||
alpha_num, | ||
letter, | ||
digit, | ||
space, | ||
tab, | ||
// practice parser implementation | ||
type PsrErr = | ||
| ParseFailure String | ||
| EndOfStream | ||
|
||
parse, | ||
} | ||
type Parser a = Parser [Byte] PsrErr a |
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Would be nice to represent a position as an associated type however that does not work in gluon (yet). Could maybe parameterize
StreamLike
with a position argument though.