Skip to content

Commit

Permalink
Merge branch 'chanbengz:main' into main
Browse files Browse the repository at this point in the history
  • Loading branch information
Jaredanwolfgang authored Dec 3, 2024
2 parents 3150edb + a8bd9d1 commit d02e366
Show file tree
Hide file tree
Showing 15 changed files with 183 additions and 100 deletions.
12 changes: 6 additions & 6 deletions src/ast/src/fmt.rs
Original file line number Diff line number Diff line change
Expand Up @@ -33,14 +33,14 @@ impl fmt::Display for Statement {
impl fmt::Display for Variable {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
Variable::VarDeclaration(ident, values, dims) => write!(f, "Variable Declaration: {} = [{}] with dimensions [{}]",
ident,
values.iter().map(|v| format!("{}", v)).collect::<Vec<String>>().join(", "),
Variable::VarDeclaration(ident, value, dims) => write!(f, "Variable Declaration: {} = [{}] with dimensions [{}]",
ident,
value,
dims.iter().map(|d| d.to_string()).collect::<Vec<String>>().join(", ")),
Variable::MemberReference(ident, member) => write!(f, "{}.{}", ident, member),
Variable::FormalParameter(ident, values, dims) => write!(f, "Formal Parameter: {} = [{}] with dimensions [{}]",
ident,
values.iter().map(|v| format!("{}", v)).collect::<Vec<String>>().join(", "),
Variable::FormalParameter(ident, value, dims) => write!(f, "Formal Parameter: {} = [{}] with dimensions [{}]",
ident,
value,
dims.iter().map(|d| d.to_string()).collect::<Vec<String>>().join(", ")),
Variable::VarReference(ident, dims) => write!(f, "{}{}", ident,
dims.iter().map(|d| format!("[{}]", d)).collect::<Vec<String>>().join("")),
Expand Down
12 changes: 6 additions & 6 deletions src/ast/src/tree.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,9 +25,9 @@ pub enum Variable {
// (identifier, values, dimensions)
// Variable can be used to declare a variable or reference a variable.
// Variable can be a single value or an array.
VarReference(Box<String>, Box<Vec<CompExpr>>),
VarDeclaration(Box<String>, Box<Vec<Value>>, Box<Vec<CompExpr>>),
VarAssignment(Box<String>, Box<CompExpr>, Box<Vec<CompExpr>>),
VarReference(Box<String>, Box<Vec<CompExpr>>), // varname, offsets
VarDeclaration(Box<String>, Box<Value>, Box<Vec<CompExpr>>), // varname, type, offsets
VarAssignment(Box<String>, Box<CompExpr>, Box<Vec<CompExpr>>), // varname, expr, offsets

// Struct is defined to realize object.
StructReference(Box<String>),
Expand All @@ -38,15 +38,15 @@ pub enum Variable {
StructAssignment(Box<String>, Box<String>, Box<CompExpr>),

MemberReference(Box<String>, Box<String>),
FormalParameter(Box<String>, Box<Vec<Value>>, Box<Vec<usize>>),
FormalParameter(Box<String>, Box<Value>, Box<Vec<usize>>),
Error
}

#[derive(Clone, Debug, PartialEq)]
pub enum Function {
// (identifier, input_params, output_params, body)
FuncReference(Box<String>, Box<Vec<Box<CompExpr>>>),
FuncDeclaration(Box<String>, Box<Vec<Variable>>, Box<Value>, Body),
FuncReference(Box<String>, Vec<Box<CompExpr>>),
FuncDeclaration(Box<String>, Vec<Variable>, Box<Value>, Body),
Error
}

Expand Down
4 changes: 3 additions & 1 deletion src/lexer/src/lexer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,9 @@ impl<'input> Iterator for Lexer<'input> {
.map(|(token, span)|
match token {
Ok(token) => Ok((span.start, token, span.end)),
Err(_) => Ok((span.start, Token::Error, span.end)),
Err(LexicalError::UnknownToken) => Ok((span.start, Token::Error, span.end)),
Err(LexicalError::InvalidCharacter(_)) => Ok((span.start, Token::Error, span.end)),
_ => Ok((span.start, Token::Error, span.end)),
})
}
}
8 changes: 5 additions & 3 deletions src/lexer/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -129,7 +129,7 @@ mod test {
fn literals() {
assert_lex(
r#"
true false 0 42 0xDEAD 0Xdead 3.14 .12345 500.1 10.000 'f' '\u2764' "doge to the moon"
true false 0 42 0xDEAD 0Xdead 3.14 .12345 500.1 10.000 'f' "doge to the moon"
"#,
&[
(LiteralBool(true), "true"),
Expand All @@ -143,7 +143,6 @@ mod test {
(LiteralFloat(500.1), "500.1"),
(LiteralFloat(10.000), "10.000"),
(LiteralChar(char::from('f')), "'f'"),
(LiteralChar(char::from('❤')), r"'\u2764'"),
(LiteralString(String::from("doge to the moon")), r#""doge to the moon""#),
][..]
);
Expand Down Expand Up @@ -289,7 +288,7 @@ mod test {
#[test]
fn test_func() {
let source = r#"
int func(int a, int b) { if (a > b) { return a; } else { return b; } }"#;
int func(int a, int b) { int 0_wrong_id; if (a > b) { return a; } else { return b; } }"#;
assert_lex(source, &[
(TypeInt, "int"),
(Identifier(String::from("func")), "func"),
Expand All @@ -301,6 +300,9 @@ mod test {
(Identifier(String::from("b")), "b"),
(RightParen, ")"),
(LeftBrace, "{"),
(TypeInt, "int"),
(Invalid, "0_wrong_id"),
(Semicolon, ";"),
(KeywordIf, "if"),
(LeftParen, "("),
(Identifier(String::from("a")), "a"),
Expand Down
45 changes: 23 additions & 22 deletions src/lexer/src/tokens.rs
Original file line number Diff line number Diff line change
Expand Up @@ -163,28 +163,8 @@ pub enum Token {
#[regex(r"(0|[1-9][0-9]*)", |lex| lex.slice().parse::<u32>().unwrap())]
#[regex(r"0[xX][0-9a-fA-F]+", process_hex)]
LiteralInt(u32),
#[regex(r"'.'", |lex| {
let slice = lex.slice();
let c = slice.chars().nth(1).unwrap();
if c.is_ascii() {
Ok(c)
} else {
Err(LexicalError::NonAsciiCharacter)
}
})]
#[regex(r"'\\[xu][0-9a-fA-F]{1,6}'", |lex| {
let slice = lex.slice();
let hex_part = &slice[3..slice.len() - 1]; // Extract the hex part after \u
match u32::from_str_radix(hex_part, 16) {
Ok(u) => {
match std::char::from_u32(u) {
Some(c) => Ok(c),
None => return Err(LexicalError::InvalidCharacter(format!("Invalid Unicode character: {}", u))),
}
}
Err(e) => return Err(LexicalError::InvalidCharacter(format!("{:?}", e))),
}
})]

#[token("'", process_char)]
LiteralChar(char),
#[regex(r#""([^"\\]|\\["\\bnfrt]|\\x[0-9a-fA-F]{2}|\\u[a-fA-F0-9]{1,6})*""#, process_string)]
LiteralString(String),
Expand All @@ -199,9 +179,30 @@ pub enum Token {
BlockComment,

// error handling
#[regex(r"(0|[1-9][0-9]*)[a-zA-Z_][a-zA-Z0-9_]*")]
Invalid,
Error
}

fn process_char(lex: &mut logos::Lexer<Token>) -> Result<char, LexicalError> {
if let Some(len) = lex.remainder().find("'") {
lex.bump(len + 1);
let slice = &lex.slice()[1..len + 1];
if len == 1 {
Ok(slice.chars().next().unwrap())
} else if &slice[..2] == "\\x" {
match u8::from_str_radix(&slice[2..], 16) {
Ok(byte) => Ok(byte as char),
Err(_) => Err(LexicalError::InvalidCharacter(format!("Invalid hexadecimal character: {}", slice))),
}
} else {
Err(LexicalError::InvalidCharacter(format!("Invalid character: {}", slice)))
}
} else {
Err(LexicalError::UnexpectedEndOfProgram)
}
}

fn process_hex(lex: &mut logos::Lexer<Token>) -> Result<u32, LexicalError> {
let slice = lex.slice();
let hex = &slice[2..];
Expand Down
29 changes: 22 additions & 7 deletions src/parser/src/error.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
use std::fmt::format;
use spl_lexer::tokens::{Token, LexicalError};
use lalrpop_util::ErrorRecovery;

Expand All @@ -6,27 +7,41 @@ pub fn display_error(errors: &Vec<ErrorRecovery<usize, Token, LexicalError>>, in
let mut error_str = Vec::new();
for error in errors {
match &error.error {
lalrpop_util::ParseError::UnrecognizedToken { token, expected, .. } => {
lalrpop_util::ParseError::UnrecognizedToken { token, expected} => {
let expected_str = match &expected[0][1..expected[0].len() - 1] {
";" => "semicolon ';'",
")" => "closing parenthesis ')'",
_ => expected[0].as_str(),
};
let lineno = input[..token.0].lines().count() - ((input.as_bytes()[token.0 - 1] == 32) as usize);
let mut last = token.0;
while last > 0 && (input.as_bytes()[last - 1] == 32 || input.as_bytes()[last - 1] == 9) {
last -= 1;
}
let lineno = input[..token.0].lines().count() - ((input.as_bytes()[last - 1] == 10) as usize);
error_str.push((lineno, format!("Error type B at Line {}: Missing {}\n", lineno,
expected_str).to_owned()));
},
lalrpop_util::ParseError::User { error } => {
match error {
LexicalError::MissingLexeme(l, token, _) => {
let lineno = input[..*l].lines().count();
LexicalError::MissingLexeme(l, token, r) => {
let mut last = *l;
while last > 0 && (input.as_bytes()[last - 1] == 32 || input.as_bytes()[last - 1] == 9) {
last -= 1;
}
let expected_str = match token.as_str() {
"';'" => format!("semicolon {}", token),
"')'" => format!("closing parenthesis {}", token),
"Exp" => format!("{} after {}", token, input[last-1..last].to_owned()),
_ => token.to_owned(),
};
let lineno = input[..*r].lines().count() - ((input.as_bytes()[last - 1] == 10) as usize);
error_str.push((lineno, format!("Error type B at Line {}: Missing {}\n",
lineno, token).to_owned()));
lineno, expected_str).to_owned()));
},
LexicalError::UnknownLexeme(l, _) => {
LexicalError::UnknownLexeme(l, r) => {
let lineno = input[..*l].lines().count();
error_str.push((lineno, format!("Error type A at Line {}: unknown lexeme {}\n",
lineno, input.chars().nth(*l).unwrap())));
lineno, input[*l..*r].to_string()).to_owned()));
},
_ => {}
}
Expand Down
Loading

0 comments on commit d02e366

Please sign in to comment.