From 9854c6cdc93e4de9ea748bf14b95532a06e2353d Mon Sep 17 00:00:00 2001 From: Christopher Gagner Date: Sun, 22 Sep 2024 22:51:47 -0400 Subject: [PATCH] Fixed parsers handled EOF. --- moos-parser/src/behavior/behavior.lalrpop | 21 +++++++++------- moos-parser/src/behavior/lexer.rs | 29 ++++++++++++----------- moos-parser/src/moos/lexer.rs | 28 ++++++++++++---------- moos-parser/src/moos/moos.lalrpop | 15 ++++++++---- moos-parser/src/nsplug/lexer.rs | 14 ++++++++--- moos-parser/src/nsplug/nsplug.lalrpop | 27 ++++++++++++--------- 6 files changed, 80 insertions(+), 54 deletions(-) diff --git a/moos-parser/src/behavior/behavior.lalrpop b/moos-parser/src/behavior/behavior.lalrpop index e65845b..a29b90d 100644 --- a/moos-parser/src/behavior/behavior.lalrpop +++ b/moos-parser/src/behavior/behavior.lalrpop @@ -13,9 +13,9 @@ pub Lines: tree::Lines = { } pub Line: Line = { - "EOL" => Line::EndOfLine{line: l.line, index: l.index}, - "EOL" => Line::Comment{comment, line: l.line}, - "EOL" => { + END => Line::EndOfLine{line: l.line, index: l.index}, + END => Line::Comment{comment, line: l.line}, + END => { let e = lalrpop_util::ErrorRecovery { error: lalrpop_util::ParseError::User { error: BehaviorParseError::new_unexpected_assignment(l, r) @@ -35,7 +35,7 @@ pub Line: Line = { } }, SetBlock => <>, - "EOL" => Line::Variable{variable, line: l.line}, + END => Line::Variable{variable, line: l.line}, => { state.errors.push(error); Line::Error{start_line: l.line, end_line: r.line} @@ -108,8 +108,8 @@ Assignments: tree::Assignments = { } Initialize: Line = { - "initialize" "EOL" => Line::Initialize{assignments, deferred: false, line: l.line, range: TokenRange::new_line(l,r).expect("Invalid token range while parsing `initialize`")}, - "initialize_" "EOL" => Line::Initialize{assignments, deferred: true, line: l.line, range: TokenRange::new_line(l,r).expect("Invalid token range while parsing `initialize_`")}, + "initialize" END => Line::Initialize{assignments, deferred: false, line: l.line, range: TokenRange::new_line(l,r).expect("Invalid token range while parsing `initialize`")}, + "initialize_" END => Line::Initialize{assignments, deferred: true, line: l.line, range: TokenRange::new_line(l,r).expect("Invalid token range while parsing `initialize_`")}, } CommentOrEmptyLine: Line = { @@ -154,7 +154,7 @@ BehaviorBlock: Line = { Block: (tree::UnknownBlock, Location, Location) = { "{" "EOL" - "}" "EOL" + "}" END => ( tree::UnknownBlock { open_curly_line: ocl.line, @@ -185,7 +185,7 @@ SetBlock: Line = { "EOL"? "{" "EOL" - "}" "EOL" + "}" END => Line::SetBlock { set_block: tree::SetBlock{ set_block_comment: None, @@ -206,6 +206,10 @@ SetBlock: Line = { }, } +END: () = { + "EOL" => {}, + "EOF" => {}, +} // --------------------------------------------------------------------------- // Token Definitions @@ -221,6 +225,7 @@ extern { QuoteEnd => Token::QuoteEnd, "ValueString" => Token::ValueString(<&'input str>), "EOL" => Token::EOL, + "EOF" => Token::EOF, "EnvVariable" => Token::EnvVariable(<&'input str>), "PartialEnvVariable" => Token::PartialEnvVariable(<&'input str>), "int" => Token::Integer(, <&'input str>), diff --git a/moos-parser/src/behavior/lexer.rs b/moos-parser/src/behavior/lexer.rs index 5714440..d42b4b1 100644 --- a/moos-parser/src/behavior/lexer.rs +++ b/moos-parser/src/behavior/lexer.rs @@ -64,6 +64,7 @@ pub struct Lexer<'input> { trim_start: bool, trim_end: bool, token_queue: TokenQueue<'input>, + reached_eof: bool, } impl<'input> Lexer<'input> { @@ -92,6 +93,7 @@ impl<'input> Lexer<'input> { trim_start: true, trim_end: false, token_queue: TokenQueue::new(), + reached_eof: false, } } @@ -556,8 +558,6 @@ impl<'input> Lexer<'input> { // 9. # Macro => Skip entire line // // Ignore other tokens - - let mut found_new_line = false; while let Some(((i, c), (_ii, cc))) = self.iter.find(|&((_i, c), (_ii, cc))| { c == '\n' || (c == '/' && cc == '/') // Comment @@ -572,9 +572,7 @@ impl<'input> Lexer<'input> { match c { '\n' => { self.tokenize_new_line(i, false); - found_new_line = true; - // Break out of the tokenize for-loop after each line - break; + return; } '/' => self.tokenize_comment(i), c if (c == '$' && cc == '{') => { @@ -600,16 +598,19 @@ impl<'input> Lexer<'input> { } } - if !found_new_line { - // Should only get in here if we have reached the end of the input. - // If so, check that there isn't some straggling unhandled string. - self.trim_end = true; - if let Some((prev_i, unhandled)) = self.get_unhandled_string(self.input.len(), true) { - if !unhandled.is_empty() { - self.scan_keywords_and_values(unhandled, prev_i); - } - self.previous_index = self.get_safe_index(self.input.len()); + // Should only get in here if we have reached the end of the input. + // If so, check that there isn't some straggling unhandled string. + self.trim_end = true; + if let Some((prev_i, unhandled)) = self.get_unhandled_string(self.input.len(), true) { + if !unhandled.is_empty() { + self.scan_keywords_and_values(unhandled, prev_i); } + self.previous_index = self.get_safe_index(self.input.len()); + } + + if !self.reached_eof { + self.push_token(self.input.len(), Token::EOF, self.input.len()); + self.reached_eof = true; } } } diff --git a/moos-parser/src/moos/lexer.rs b/moos-parser/src/moos/lexer.rs index fc43eef..efb9940 100644 --- a/moos-parser/src/moos/lexer.rs +++ b/moos-parser/src/moos/lexer.rs @@ -57,6 +57,7 @@ pub struct Lexer<'input> { trim_start: bool, trim_end: bool, token_queue: TokenQueue<'input>, + reached_eof: bool, } impl<'input> Lexer<'input> { @@ -83,6 +84,7 @@ impl<'input> Lexer<'input> { trim_start: true, trim_end: false, token_queue: TokenQueue::new(), + reached_eof: false, } } @@ -521,7 +523,6 @@ impl<'input> Lexer<'input> { // // Ignore other tokens - let mut found_new_line = false; while let Some(((i, c), (_ii, cc))) = self.iter.find(|&((_i, c), (_ii, cc))| { c == '\n' || (c == '/' && cc == '/') // Comment @@ -534,9 +535,7 @@ impl<'input> Lexer<'input> { match c { '\n' => { self.tokenize_new_line(i, false); - found_new_line = true; - // Break out of the tokenize for-loop after each line - break; + return; } '/' => self.tokenize_comment(i), c if (c == '$' && cc == '{') => { @@ -561,16 +560,19 @@ impl<'input> Lexer<'input> { } } - if !found_new_line { - // Should only get in here if we have reached the end of the input. - // If so, check that there isn't some straggling unhandled string. - self.trim_end = true; - if let Some((prev_i, unhandled)) = self.get_unhandled_string(self.input.len(), true) { - if !unhandled.is_empty() { - self.scan_keywords_and_values(unhandled, prev_i); - } - self.previous_index = self.get_safe_index(self.input.len()); + // Should only get in here if we have reached the end of the input. + // If so, check that there isn't some straggling unhandled string. + self.trim_end = true; + if let Some((prev_i, unhandled)) = self.get_unhandled_string(self.input.len(), true) { + if !unhandled.is_empty() { + self.scan_keywords_and_values(unhandled, prev_i); } + self.previous_index = self.get_safe_index(self.input.len()); + } + + if !self.reached_eof { + self.push_token(self.input.len(), Token::EOF, self.input.len()); + self.reached_eof = true; } } } diff --git a/moos-parser/src/moos/moos.lalrpop b/moos-parser/src/moos/moos.lalrpop index 334d930..1f3e849 100644 --- a/moos-parser/src/moos/moos.lalrpop +++ b/moos-parser/src/moos/moos.lalrpop @@ -13,12 +13,12 @@ pub Lines: tree::Lines = { } pub Line: Line = { - "EOL" => Line::EndOfLine{line: l.line, index: l.index}, - "EOL" => Line::Comment{comment, line: l.line}, + END => Line::EndOfLine{line: l.line, index: l.index}, + END => Line::Comment{comment, line: l.line}, => Line::Assignment{assignment, line: l.line}, Define => <>, ProcessConfig => <>, - "EOL" => Line::Variable{variable, line: l.line}, + END => Line::Variable{variable, line: l.line}, => { state.errors.push(error); Line::Error{start_line: l.line, end_line: r.line} @@ -66,7 +66,7 @@ Comment: tree::Comment = { } Assignment: tree::Assignment = { - "=" "EOL" => tree::Assignment{name, value: value.into(), comment}, + "=" END => tree::Assignment{name, value: value.into(), comment}, } Define: Line = { @@ -95,7 +95,7 @@ ProcessConfig: Line = { "{" "EOL" - "}" "EOL" + "}" END => Line::ProcessConfig { process_config: tree::ProcessConfig{ process_config_comment, @@ -114,6 +114,10 @@ ProcessConfig: Line = { }, } +END: () = { + "EOL" => {}, + "EOF" => {}, +} // --------------------------------------------------------------------------- // Token Definitions @@ -129,6 +133,7 @@ extern { QuoteEnd => Token::QuoteEnd, "ValueString" => Token::ValueString(<&'input str>), "EOL" => Token::EOL, + "EOF" => Token::EOF, "EnvVariable" => Token::EnvVariable(<&'input str>), "PartialEnvVariable" => Token::PartialEnvVariable(<&'input str>), "int" => Token::Integer(, <&'input str>), diff --git a/moos-parser/src/nsplug/lexer.rs b/moos-parser/src/nsplug/lexer.rs index 502c826..feeb5c8 100644 --- a/moos-parser/src/nsplug/lexer.rs +++ b/moos-parser/src/nsplug/lexer.rs @@ -59,6 +59,7 @@ pub struct Lexer<'input> { char_count: usize, start_of_line: bool, token_queue: TokenQueue<'input>, + reached_eof: bool, } impl<'input> Lexer<'input> { @@ -82,6 +83,7 @@ impl<'input> Lexer<'input> { char_count: 0, start_of_line: true, token_queue: TokenQueue::new(), + reached_eof: false, } } @@ -612,8 +614,7 @@ impl<'input> Lexer<'input> { match c { '\n' => { self.tokenize_new_line(i, true); - // Break out of the tokenize for-loop after each line - break; + return; } c if (c == '$' && cc == '(') => { // drop the unhandled tokens before this because we are not @@ -639,13 +640,20 @@ impl<'input> Lexer<'input> { } }); } - '#' => self.tokenize_macro(i), + '#' => { + self.tokenize_macro(i); + return; + } _ => {} } } // NOTE: There could still be tokens to be parse, but we don't care // about them. + if !self.reached_eof { + self.push_token(self.input.len(), Token::EOF, self.input.len()); + self.reached_eof = true; + } } } diff --git a/moos-parser/src/nsplug/nsplug.lalrpop b/moos-parser/src/nsplug/nsplug.lalrpop index a420863..7bdbce8 100644 --- a/moos-parser/src/nsplug/nsplug.lalrpop +++ b/moos-parser/src/nsplug/nsplug.lalrpop @@ -17,7 +17,7 @@ pub Line: Line = { IfNotDef => <>, MacroDefine => <>, MacroInclude => <>, - "EOL" => Line::EndOfLine{line: l.line, index: l.index}, + END => Line::EndOfLine{line: l.line, index: l.index}, UnknownMacro => <>, => Line::Variable{variable, line: l.line}, => { @@ -46,7 +46,7 @@ pub Line: Line = { } MacroDefine: Line = { - WhiteSpace+ "EOL" => + WhiteSpace+ END => Line::Macro{ macro_type: MacroType::Define{definition, range: TokenRange::new_line(ml,mr).unwrap()}, comment: None, @@ -66,7 +66,7 @@ MacroDefinition: MacroDefinition = { } MacroInclude: Line = { - WhiteSpace+ WhiteSpace+ "EOL" => { + WhiteSpace+ WhiteSpace+ END => { Line::Macro{ macro_type: MacroType::Include{ path: tree::IncludePath::VariableStrings(path, TokenRange::new_line(pl,pr).expect("Invalid token range while parsing `IncludePath`")), @@ -78,7 +78,7 @@ MacroInclude: Line = { indent: indent.unwrap_or_default().into(), } }, - WhiteSpace+ WhiteSpace* "EOL" => { + WhiteSpace+ WhiteSpace* END => { Line::Macro{ macro_type: MacroType::Include{ path: tree::IncludePath::VariableStrings(path, TokenRange::new_line(pl,pr).expect("Invalid token range while parsing `IncludePath`")), @@ -91,7 +91,7 @@ MacroInclude: Line = { } }, // Quote - WhiteSpace+ WhiteSpace+ "EOL" => { + WhiteSpace+ WhiteSpace+ END => { Line::Macro{ macro_type: MacroType::Include{ path: tree::IncludePath::Quote(path), @@ -103,7 +103,7 @@ MacroInclude: Line = { indent: indent.unwrap_or_default().into(), } }, - WhiteSpace+ WhiteSpace* "EOL" => { + WhiteSpace+ WhiteSpace* END => { Line::Macro{ macro_type: MacroType::Include{ path: tree::IncludePath::Quote(path), @@ -118,7 +118,7 @@ MacroInclude: Line = { } UnknownMacro: Line = { - DropTokens* "EOL" => { + DropTokens* END => { let e = lalrpop_util::ErrorRecovery { error: lalrpop_util::ParseError::User { error: PlugParseError::new_unknown_macro(l, m) @@ -245,7 +245,7 @@ IfDefBranch: tree::IfDefBranch = { body: body.unwrap_or_default().into(), branch: Box::new(branch), }, - "#else" WhiteSpace* "EOL" "#endif" WhiteSpace* "EOL" => + "#else" WhiteSpace* "EOL" "#endif" WhiteSpace* END => tree::IfDefBranch::Else { line: ml.line, line_end_index: el.index, @@ -257,7 +257,7 @@ IfDefBranch: tree::IfDefBranch = { endif_macro_range: TokenRange::new_line(endl,endr).unwrap(), endif_indent: endif_indent.unwrap_or_default().into(), }, - "#endif" WhiteSpace* "EOL" => + "#endif" WhiteSpace* END => tree::IfDefBranch::EndIf { line: ml.line, line_end_index: el.index, @@ -299,7 +299,7 @@ IfNotDef: Line = { } IfNotDefBranch: tree::IfNotDefBranch = { - "#else" WhiteSpace* "EOL" "#endif" WhiteSpace* "EOL" => + "#else" WhiteSpace* "EOL" "#endif" WhiteSpace* END => tree::IfNotDefBranch::Else { line: ml.line, line_end_index: el.index, @@ -311,7 +311,7 @@ IfNotDefBranch: tree::IfNotDefBranch = { endif_macro_range: TokenRange::new_line(endl,endr).unwrap(), endif_indent: endif_indent.unwrap_or_default().into(), }, - "#endif" WhiteSpace* "EOL" => + "#endif" WhiteSpace* END => tree::IfNotDefBranch::EndIf { line: ml.line, line_end_index: el.index, @@ -328,6 +328,10 @@ IfNotDefClauses: tree::IfNotDefClauses = { }, } +END: () = { + "EOL" => {}, + "EOF" => {}, +} // --------------------------------------------------------------------------- // Token Definitions @@ -342,6 +346,7 @@ extern { QuoteEnd => Token::QuoteEnd, "ValueString" => Token::ValueString(<&'input str>), "EOL" => Token::EOL, + "EOF" => Token::EOF, "PlugVariable" => Token::PlugVariable(<&'input str>), "PartialPlugVariable" => Token::PartialPlugVariable(<&'input str>), "PlugUpperVariable" => Token::PlugUpperVariable(<&'input str>),