From 214b1c186f6a449aa98facb9ce9666d3ec3fdcc5 Mon Sep 17 00:00:00 2001 From: BreakingLead Date: Sat, 7 Sep 2024 21:41:31 +0800 Subject: [PATCH] =?UTF-8?q?`[feat]`=20=E5=AE=8C=E6=88=90=E4=BA=86=E8=AF=8D?= =?UTF-8?q?=E6=B3=95=E8=A7=A3=E6=9E=90?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/bin/main.mbt | 274 +++++++++++----------- src/pass_lex/moon.pkg.json | 3 +- src/pass_lex/pass_parser.mbt | 3 - src/pass_lex/pass_tokenizer.mbt | 388 ++++++++++++++++++++++---------- src/types/type_token.mbt | 11 +- 5 files changed, 415 insertions(+), 264 deletions(-) delete mode 100644 src/pass_lex/pass_parser.mbt diff --git a/src/bin/main.mbt b/src/bin/main.mbt index 65271c5..3dc8f20 100644 --- a/src/bin/main.mbt +++ b/src/bin/main.mbt @@ -5,151 +5,151 @@ fn main { @fs.write_to_string("run/tmp.out", result) } -// fn main { -// let argv = @env.get_args() -// let mut file = None -// let no_run = Ref::new(false) -// let print_ast = Ref::new(false) -// let print_knf = Ref::new(false) -// let print_each_knf = Ref::new(false) -// let print_final_knf = Ref::new(false) -// let print_closure_ir = Ref::new(false) -// let print_asm = Ref::new(false) -// let knf_opt_iters = Ref::new(10) -// let run_with_interpreter = Ref::new(false) -// @ArgParser.parse( -// [ -// ( -// "--interpret", -// "-i", -// @ArgParser.Set(run_with_interpreter), -// "Run with interpreter", -// ), -// ("--print-ast", "", @ArgParser.Set(print_ast), "Print AST"), -// ("--no-run", "", @ArgParser.Set(no_run), "Do not run the program"), -// ("--print-knf", "", @ArgParser.Set(print_knf), "Print initial KNF"), -// ( -// "--print-each-knf", -// "", -// @ArgParser.Set(print_each_knf), -// "Print each KNF in optimization", -// ), -// ( -// "--print-final-knf", -// "", -// @ArgParser.Set(print_final_knf), -// "Print final KNF", -// ), -// ( -// "--print-closure-ir", -// "", -// @ArgParser.Set(print_closure_ir), -// "Print closure IR", -// ), -// ("--print-asm", "", @ArgParser.Set(print_asm), "Print assembly"), -// ( -// "--knf-opt-iters", -// "N", -// @ArgParser.String( -// fn(s) { -// let i = @strconv.parse_int?(s) -// match i { -// Ok(i) => knf_opt_iters.val = i -// Err(e) => @util.die("Invalid number") -// } -// }, -// ), -// "Number of optimization iterations", -// ), -// ], -// fn(s) { -// if file.is_empty().not() { -// @util.die("multiple files are given") -// } -// file = Some(s) -// }, -// "", -// argv, -// ) +fn main_w() -> Unit { + let argv = @env.get_args() + let mut file = None + let no_run = Ref::new(false) + let print_ast = Ref::new(false) + let print_knf = Ref::new(false) + let print_each_knf = Ref::new(false) + let print_final_knf = Ref::new(false) + let print_closure_ir = Ref::new(false) + let print_asm = Ref::new(false) + let knf_opt_iters = Ref::new(10) + let run_with_interpreter = Ref::new(false) + @ArgParser.parse( + [ + ( + "--interpret", + "-i", + @ArgParser.Set(run_with_interpreter), + "Run with interpreter", + ), + ("--print-ast", "", @ArgParser.Set(print_ast), "Print AST"), + ("--no-run", "", @ArgParser.Set(no_run), "Do not run the program"), + ("--print-knf", "", @ArgParser.Set(print_knf), "Print initial KNF"), + ( + "--print-each-knf", + "", + @ArgParser.Set(print_each_knf), + "Print each KNF in optimization", + ), + ( + "--print-final-knf", + "", + @ArgParser.Set(print_final_knf), + "Print final KNF", + ), + ( + "--print-closure-ir", + "", + @ArgParser.Set(print_closure_ir), + "Print closure IR", + ), + ("--print-asm", "", @ArgParser.Set(print_asm), "Print assembly"), + ( + "--knf-opt-iters", + "N", + @ArgParser.String( + fn(s) { + let i = @strconv.parse_int?(s) + match i { + Ok(i) => knf_opt_iters.val = i + Err(e) => @util.die("Invalid number") + } + }, + ), + "Number of optimization iterations", + ), + ], + fn(s) { + if file.is_empty().not() { + @util.die("multiple files are given") + } + file = Some(s) + }, + "", + argv, + ) -// // Input -// let file = if file.is_empty() { -// println("no input file") -// @util.die("no input file") -// } else { -// file.unwrap() -// } -// let contents = @fs.read_to_string(file) -// let typechecked = to_typecheked(contents) -// if print_ast.val { -// println(typechecked.to_string()) -// } + // Input + let file = if file.is_empty() { + println("no input file") + @util.die("no input file") + } else { + file.unwrap() + } + let contents = @fs.read_to_string(file) + let typechecked = to_typecheked(contents) + if print_ast.val { + println(typechecked.to_string()) + } -// // To KNF -// let external_fns = externals() -// let knf_env = @knf.KnfEnv::new(external_fns) -// let knf = knf_env.to_knf(typechecked) -// if print_knf.val { -// println("Initial KNF:") -// println(knf.to_pretty_print()) -// } + // To KNF + let external_fns = externals() + let knf_env = @knf.KnfEnv::new(external_fns) + let knf = knf_env.to_knf(typechecked) + if print_knf.val { + println("Initial KNF:") + println(knf.to_pretty_print()) + } -// // Optimization -// let mut knf = knf -// for i = 0; i < knf_opt_iters.val; i = i + 1 { -// let new_knf = knf_env.opt_pass(knf) -// if new_knf == knf { -// break -// } -// knf = new_knf -// if print_each_knf.val { -// println("KNF Iteration \{i}:") -// println(knf.to_pretty_print()) -// } -// } -// if print_final_knf.val || no_run.val { -// println("Final KNF:") -// println(knf.to_pretty_print()) -// } + // Optimization + let mut knf = knf + for i = 0; i < knf_opt_iters.val; i = i + 1 { + let new_knf = knf_env.opt_pass(knf) + if new_knf == knf { + break + } + knf = new_knf + if print_each_knf.val { + println("KNF Iteration \{i}:") + println(knf.to_pretty_print()) + } + } + if print_final_knf.val || no_run.val { + println("Final KNF:") + println(knf.to_pretty_print()) + } -// // Interpretation -// if run_with_interpreter.val { -// if no_run.val { -// return -// } -// let interpreter = @knf_eval.KnfInterpreter::new() -// add_interpreter_fns(interpreter) -// let _result = match interpreter.eval_full?(knf) { -// Ok(result) => result -// Err(Failure(e)) => { -// println("Error: " + e) -// @util.die("Evaluation error") -// } -// } -// return -// } + // Interpretation + if run_with_interpreter.val { + if no_run.val { + return + } + let interpreter = @knf_eval.KnfInterpreter::new() + add_interpreter_fns(interpreter) + let _result = match interpreter.eval_full?(knf) { + Ok(result) => result + Err(Failure(e)) => { + println("Error: " + e) + @util.die("Evaluation error") + } + } + return + } -// // KNF to closure -// let closure_ir = @closure.knf_program_to_closure( -// knf, -// Map::from_iter(external_fns.iter()), -// ) -// if print_closure_ir.val { -// println("Closure IR:") -// println(closure_ir.to_string()) -// } + // KNF to closure + let closure_ir = @closure.knf_program_to_closure( + knf, + Map::from_iter(external_fns.iter()), + ) + if print_closure_ir.val { + println("Closure IR:") + println(closure_ir.to_string()) + } -// // Fill in the holes here! + // Fill in the holes here! -// // Code generation -// let real_asm = @riscv.emit(abort("TODO")) + // Code generation + let real_asm = @riscv.emit(abort("TODO")) -// // Print asm -// for asm in real_asm { -// println(asm) -// println("") -// } -// } + // Print asm + for asm in real_asm { + println(asm) + println("") + } +} fn to_typecheked(source : String) -> @types.Syntax { abort("todo") diff --git a/src/pass_lex/moon.pkg.json b/src/pass_lex/moon.pkg.json index e733389..611fd6d 100644 --- a/src/pass_lex/moon.pkg.json +++ b/src/pass_lex/moon.pkg.json @@ -1,6 +1,7 @@ { "import": [ "breakinglead/moonbite/types", - "breakinglead/moonbite" + "breakinglead/moonbite", + "breakinglead/moonbite/util" ] } \ No newline at end of file diff --git a/src/pass_lex/pass_parser.mbt b/src/pass_lex/pass_parser.mbt deleted file mode 100644 index 26c6a6d..0000000 --- a/src/pass_lex/pass_parser.mbt +++ /dev/null @@ -1,3 +0,0 @@ -fn parse(ctx : @moonbite.Ctx) -> @moonbite.Ctx { - ctx -} diff --git a/src/pass_lex/pass_tokenizer.mbt b/src/pass_lex/pass_tokenizer.mbt index fb139e8..c5f85f4 100644 --- a/src/pass_lex/pass_tokenizer.mbt +++ b/src/pass_lex/pass_tokenizer.mbt @@ -1,5 +1,3 @@ -type StringBuf Array[Char] - fn is_whitespace(c : Char) -> Bool { match c { '\n' | '\r' | '\t' | ' ' => true @@ -62,135 +60,285 @@ fn is_numeric(c : Char) -> Bool { } } -test "random input test cases" { - assert_false!(is_whitespace('x')) - assert_false!(is_whitespace('y')) - assert_false!(is_whitespace('z')) - assert_false!(is_whitespace('A')) - assert_false!(is_whitespace('B')) - assert_false!(is_whitespace('C')) - assert_true!(is_whitespace('\n')) - assert_true!(is_whitespace('\r')) - assert_true!(is_whitespace('\t')) - assert_true!(is_whitespace(' ')) - assert_false!(is_whitespace(':')) -} - -// 不知道是怎么写出来的 -// 还在修 bug -fn split_whitespaces(code : String) -> Iter[String] { - let len = code.length() - Iter::new( - fn(yield) { - let mut start = 0 - let mut curr_len = 0 - while start < len { - if is_whitespace(code[start]) { - start += 1 - } else { - curr_len = start - while not(is_whitespace(code[start + curr_len])) { // && start + curr_len < len { - curr_len += 1 - } else { - if yield(code.substring(~start, end=start + curr_len)) == IterEnd { - println("yield end") // do not display anything??? - break IterEnd - } - break IterContinue - } +struct Scanner { + source : String + mut tokens : Array[@types.Token] + mut start : Int + mut current : Int + mut line : Int + mut had_error : Bool +} + +fn Scanner::new(source : String) -> Scanner { + { + source, + tokens: Array::new(), + start: 0, + current: 0, + line: 1, + had_error: false, + } +} + +fn Scanner::error(self : Scanner, message : String) -> Unit { + self.had_error = true + @util.die( + "At line " + + self.line.to_string() + + " |" + + self.source.substring(start=self.start, end=self.current) + + "| has error: " + + message, + ) +} + +fn Scanner::is_at_end(self : Scanner) -> Bool { + self.current >= self.source.length() +} + +fn Scanner::peek(self : Scanner, ~distance : Int = 0) -> Char { + if self.is_at_end() { + '\u0000' + } else { + self.source[self.current + distance] + } +} + +fn Scanner::advance(self : Scanner) -> Char { + self.current += 1 + self.source[self.current - 1] +} + +fn Scanner::match_char(self : Scanner, expected : Char) -> Bool { + if self.is_at_end() { + false + } else if self.source[self.current] != expected { + false + } else { + self.current += 1 + true + } +} + +fn Scanner::add(self : Scanner, token_type : @types.Token) -> Unit { + self.tokens.push(token_type) +} + +fn Scanner::add_number(self : Scanner) -> Unit { + let mut has_dot = false + while is_numeric(self.peek()) { + self.current += 1 + } + if self.peek() == '.' && is_numeric(self.peek(distance=1)) { + has_dot = true + self.current += 1 // consume the '.' + while is_numeric(self.peek()) { + self.current += 1 + } + } + if has_dot { + let n = self.source.substring(start=self.start, end=self.current) + let n = @strconv.parse_double?(n).unwrap() + self.add(@types.Token::DOUBLE_LITERAL(n)) + } else { + let n = self.source.substring(start=self.start, end=self.current) + let n = @strconv.parse_int?(n).unwrap() + self.add(@types.Token::INT_LITERAL(n)) + } +} + +fn Scanner::add_identifier(self : Scanner) -> Unit { + while is_alphabetic(self.peek()) { + self.current += 1 + } + let text = self.source.substring(start=self.start, end=self.current) + match text { + "true" => self.add(@types.Token::TRUE) + "false" => self.add(@types.Token::FALSE) + "if" => self.add(@types.Token::IF) + "else" => self.add(@types.Token::ELSE) + "fn" => self.add(@types.Token::FN) + "let" => self.add(@types.Token::LET) + "Unit" => self.add(@types.Token::UNIT) + "Int" => self.add(@types.Token::INT) + "Double" => self.add(@types.Token::DOUBLE) + "Array" => self.add(@types.Token::ARRAY) + _ => self.add(@types.Token::IDENTIFIER(text)) + } +} + +fn Scanner::scan_token(self : Scanner) -> Unit { + let c = self.advance() + match c { + '(' => self.add(@types.Token::LPAREN) + ')' => self.add(@types.Token::RPAREN) + '[' => self.add(@types.Token::LBRACKET) + ']' => self.add(@types.Token::RBRACKET) + '{' => self.add(@types.Token::LCURLYBRACKET) + '}' => self.add(@types.Token::RCURLYBRACKET) + ':' => self.add(@types.Token::COLON) + ';' => self.add(@types.Token::SEMICOLON) + ',' => self.add(@types.Token::COMMA) + '.' => self.add(@types.Token::DOT) + '+' => self.add(@types.Token::ADD) + '-' => + if self.match_char('>') { + self.add(@types.Token::ARROW) + } else { + self.add(@types.Token::SUB) + } + '*' => self.add(@types.Token::MUL) + '/' => + if self.match_char('/') { + while self.peek() != '\n' && not(self.is_at_end()) { + self.current += 1 } } else { - // never - IterContinue + self.add(@types.Token::DIV) } - }, - ) + '=' => + if self.match_char('=') { + self.add(@types.Token::EQ) + } else { + self.add(@types.Token::ASSIGN) + } + '<' => + if self.match_char('=') { + self.add(@types.Token::LE) + } else { + self.add(@types.Token::LT) + } + '>' => + if self.match_char('=') { + self.add(@types.Token::GE) + } else { + self.add(@types.Token::GT) + } + '\n' => self.line += 1 + '"' => self.error("String literals not supported") + c => + if is_whitespace(c) { + // do nothing + } else if is_numeric(c) { + self.add_number() + } else if is_alphabetic(c) { + self.add_identifier() + } else { + self.error("Unexpected character") + println(c) + } + } } -fn split_whitespaces_easier_version(code : String) -> Array[String] { - let res = Array::new() - let mut buf = Buffer::new() - let mut should_write = false - for i in code { - if is_whitespace(i) { - if should_write { - res.push(buf.to_string()) - buf = Buffer::new() - } - should_write = false - continue - } else { - should_write = true - buf.write_char(i) - } +fn Scanner::scan_tokens(self : Scanner) -> Array[@types.Token] { + while not(self.is_at_end()) { + self.start = self.current + self.scan_token() } - res + return self.tokens } -test "split whitespaces" { - let a = split_whitespaces_easier_version( - " hello world\n why\t it is so::messy ", +test "tokenize double literal and integer literal" { + let scanner = Scanner::new("let x = 123.456; let y = 123;") + let tokens = scanner.scan_tokens() + assert_eq!( + tokens, + [ + @types.Token::LET, + @types.Token::IDENTIFIER("x"), + @types.Token::ASSIGN, + @types.Token::DOUBLE_LITERAL(123.456), + @types.Token::SEMICOLON, + @types.Token::LET, + @types.Token::IDENTIFIER("y"), + @types.Token::ASSIGN, + @types.Token::INT_LITERAL(123), + @types.Token::SEMICOLON, + ], ) - assert_eq!(a, ["hello", "world", "why", "it", "is", "so::messy"]) } -pub fn tokenize(ctx : @moonbite.Ctx) -> @moonbite.Ctx { - let code = ctx.code - let strings = split_whitespaces_easier_version(code) - let tokens = strings.map( - fn(i) { - match i { - "let" => @types.Token::LET - "fn" => @types.Token::FN - "if" => @types.Token::IF - "else" => @types.Token::ELSE - "true" => @types.Token::TRUE - "false" => @types.Token::FALSE - "unit" => @types.Token::UNIT - "bool" => @types.Token::BOOL - "int" => @types.Token::INT - "double" => @types.Token::DOUBLE - "array" => @types.Token::ARRAY - "not" => @types.Token::NOT - "->" => @types.Token::ARROW - ":" => @types.Token::COLON - ";" => @types.Token::SEMICOLON - "," => @types.Token::COMMA - "." => @types.Token::DOT - "+" => @types.Token::ADD - "-" => @types.Token::SUB - "*" => @types.Token::MUL - "/" => @types.Token::DIV - "=" => @types.Token::ASSIGN - "==" => @types.Token::EQ - "<=" => @types.Token::LE - "(" => @types.Token::LPAREN - ")" => @types.Token::RPAREN - "[" => @types.Token::LBRACKET - "]" => @types.Token::RBRACKET - "{" => @types.Token::LCURLYBRACKET - "}" => @types.Token::RCURLYBRACKET - // s =>{ - // // identifier - // // [a-zA-Z_][a-zA-Z0-9_]*; - // // if s.iter().all(fn(c) { is_alphabetic(c) || is_numeric(c) }) && - // // s.iter().any(is_numeric) && - // // is_alphabetic(s.iter().last().unwrap()) { - // // @types.Token::IDENTIFIER(s) - // } - // } - t => - if t.iter().all(fn(c) { is_alphabetic(c) || is_numeric(c) }) && - not( - t.iter().any(is_numeric) && - is_alphabetic(t.iter().last().unwrap()), - ) { - @types.Token::IDENTIFIER(t) - } else { - @types.Token::NUMBER(t) - } - } - }, +test "tokenize identifier" { + let scanner = Scanner::new("let aaabbb = 123; fn main() { meow();}") + let tokens = scanner.scan_tokens() + assert_eq!( + tokens, + [ + @types.Token::LET, + @types.Token::IDENTIFIER("aaabbb"), + @types.Token::ASSIGN, + @types.Token::INT_LITERAL(123), + @types.Token::SEMICOLON, + @types.Token::FN, + @types.Token::IDENTIFIER("main"), + @types.Token::LPAREN, + @types.Token::RPAREN, + @types.Token::LCURLYBRACKET, + @types.Token::IDENTIFIER("meow"), + @types.Token::LPAREN, + @types.Token::RPAREN, + @types.Token::SEMICOLON, + @types.Token::RCURLYBRACKET, + ], + ) +} + +test "tokenize if-else statements" { + let scanner = Scanner::new("if (x > 0) { print(x); } else { print(-x); }") + let tokens = scanner.scan_tokens() + assert_eq!( + tokens, + [ + @types.Token::IF, + @types.Token::LPAREN, + @types.Token::IDENTIFIER("x"), + @types.Token::GT, + @types.Token::INT_LITERAL(0), + @types.Token::RPAREN, + @types.Token::LCURLYBRACKET, + @types.Token::IDENTIFIER("print"), + @types.Token::LPAREN, + @types.Token::IDENTIFIER("x"), + @types.Token::RPAREN, + @types.Token::SEMICOLON, + @types.Token::RCURLYBRACKET, + @types.Token::ELSE, + @types.Token::LCURLYBRACKET, + @types.Token::IDENTIFIER("print"), + @types.Token::LPAREN, + @types.Token::SUB, + @types.Token::IDENTIFIER("x"), + @types.Token::RPAREN, + @types.Token::SEMICOLON, + @types.Token::RCURLYBRACKET, + ], + ) +} + +test "tokenize >= > <= <" { + let scanner = Scanner::new("if x>=0 { if y<0 {} }") + let tokens = scanner.scan_tokens() + assert_eq!( + tokens, + [ + @types.Token::IF, + @types.Token::IDENTIFIER("x"), + @types.Token::GE, + @types.Token::INT_LITERAL(0), + @types.Token::LCURLYBRACKET, + @types.Token::IF, + @types.Token::IDENTIFIER("y"), + @types.Token::LT, + @types.Token::INT_LITERAL(0), + @types.Token::LCURLYBRACKET, + @types.Token::RCURLYBRACKET, + @types.Token::RCURLYBRACKET, + ], ) +} + +pub fn tokenize(ctx : @moonbite.Ctx) -> @moonbite.Ctx { + let scanner = Scanner::new(ctx.code) + let tokens = scanner.scan_tokens() { ..ctx, tokens: Some(tokens) } } diff --git a/src/types/type_token.mbt b/src/types/type_token.mbt index de8b0ef..334a1c9 100644 --- a/src/types/type_token.mbt +++ b/src/types/type_token.mbt @@ -11,7 +11,8 @@ pub enum Token { ELSE FN LET - NUMBER(String) + INT_LITERAL(Int) + DOUBLE_LITERAL(Double) IDENTIFIER(String) DOT ADD @@ -20,7 +21,11 @@ pub enum Token { DIV ASSIGN EQ + NE // new LE + LT // new + GE // new + GT // new LPAREN RPAREN LBRACKET @@ -31,5 +36,5 @@ pub enum Token { COLON SEMICOLON COMMA - WS -} derive(Show) + UNKNOWN +} derive(Show, Eq)