Skip to content

Commit

Permalink
[70_9] Parser: improve number_parser and keyword_parser for S7 Scheme
Browse files Browse the repository at this point in the history
## What
+ number_parser: support `#` prefix
+ keyword_parser: support customizing extra chars
+ improve s7-lang.scm

## Why
In S7 Scheme
``` scheme
#b001
#xab1
#o777
#t 
#f

string->number
string?
set!
```

## How to test your changes?
Launch Mogan Research and check `TeXmacs/tests/tm/70_9.tm`
  • Loading branch information
da-liii authored May 15, 2024
1 parent ce0df48 commit 511f393
Show file tree
Hide file tree
Showing 7 changed files with 203 additions and 43 deletions.
12 changes: 5 additions & 7 deletions TeXmacs/plugins/s7/progs/code/s7-lang.scm
Original file line number Diff line number Diff line change
Expand Up @@ -17,10 +17,10 @@
(tm-define (parser-feature lan key)
(:require (and (== lan "s7") (== key "keyword")))
`(,(string->symbol key)
(constant
"#t" "#f" "pi")
(extra_chars "?" "-" "!" "*" ">" "=" "<")
(constant "pi")
(declare_type
"define" "set!" "lambda" "define-macro" "define-constant" "let")
"define" "set!" "lambda" "define-macro" "define-constant" "let" "let*")
(keyword
"eq?" "bignum" "length" "append" "procedure-source"
; SRFI-1: List constructors
Expand Down Expand Up @@ -97,14 +97,12 @@

(define (s7-number-suffix)
`(suffix
(imaginary "j" "J")))
(imaginary "i")))

(tm-define (parser-feature lan key)
(:require (and (== lan "s7") (== key "number")))
`(,(string->symbol key)
(bool_features
"prefix_0x" "prefix_0b"
"sci_notation")
(bool_features "prefix_#")
(separator "_")
,(s7-number-suffix)))

Expand Down
104 changes: 104 additions & 0 deletions TeXmacs/tests/tm/70_9.tm
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
<TeXmacs|2.1.2>

<style|<tuple|generic|no-page-numbers|chinese|s7>>

<\body>
<\session|s7|default>
<\output>
S7 Scheme 10.6 (14-Apr-2023)
</output>

<\unfolded-io>
\<gtr\>\
<|unfolded-io>
#t
<|unfolded-io>
#t
</unfolded-io>

<\unfolded-io>
\<gtr\>\
<|unfolded-io>
#f
<|unfolded-io>
#f
</unfolded-io>

<\unfolded-io>
\<gtr\>\
<|unfolded-io>
#o777
<|unfolded-io>
511
</unfolded-io>

<\unfolded-io>
\<gtr\>\
<|unfolded-io>
#b111
<|unfolded-io>
7
</unfolded-io>

<\unfolded-io>
\<gtr\>\
<|unfolded-io>
#xab1
<|unfolded-io>
2737
</unfolded-io>

<\unfolded-io>
\<gtr\>\
<|unfolded-io>
(number-\<gtr\>string 1)
<|unfolded-io>
"1"
</unfolded-io>

<\unfolded-io>
\<gtr\>\
<|unfolded-io>
(string? "1")
<|unfolded-io>
#t
</unfolded-io>

<\unfolded-io>
\<gtr\>\
<|unfolded-io>
(list-ref (list 1 2 3 4) 3)
<|unfolded-io>
4
</unfolded-io>

<\unfolded-io>
\<gtr\>\
<|unfolded-io>
(define x 1)
<|unfolded-io>
1
</unfolded-io>

<\unfolded-io>
\<gtr\>\
<|unfolded-io>
(set! x 2)
<|unfolded-io>
2
</unfolded-io>

<\input>
\<gtr\>\
<|input>
let*
</input>
</session>
</body>

<\initial>
<\collection>
<associate|page-medium|paper>
<associate|page-screen-margin|false>
</collection>
</initial>
36 changes: 30 additions & 6 deletions src/Data/Parser/keyword_parser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@

#include "keyword_parser.hpp"
#include "analyze.hpp"
#include "converter.hpp"
#include "iterator.hpp"
#include "scheme.hpp"
#include "tree.hpp"
Expand All @@ -19,12 +20,32 @@
keyword_parser_rep::keyword_parser_rep () {
current_keyword= "";
keyword_group = hashmap<string, string> ();
extra_chars = array<char> ();
}

void
keyword_parser_rep::insert_extra_char (char extra_char) {
extra_chars << extra_char;
}

bool
read_keyword (string s, int& i, string& result, array<char> extras) {
int opos= i;
int s_N = N (s);
// a keyword must start with alpha
if (i < s_N && is_alpha (s[i])) i++;
while (i < s_N && (is_alpha (s[i]) || contains (s[i], extras))) {
i++;
}
result= s (opos, i);
return i > opos;
}

bool
keyword_parser_rep::can_parse (string s, int pos) {
string word;
bool hit= read_word (s, pos, word) && keyword_group->contains (word);
bool hit= read_keyword (s, pos, word, extra_chars) &&
keyword_group->contains (word);
if (hit) current_keyword= word;
return hit;
}
Expand All @@ -43,12 +64,15 @@ keyword_parser_rep::use_keywords_of_lang (string lang_code) {
list<tree> l= as_list_tree (eval (get_list_of_keywords_tree));
if (DEBUG_PARSER)
debug_packrat << "Keywords definition of [" << lang_code << "] loaded!\n";
for (int i= 0; i < N (l); i++) {
tree group_words= l[i];
string group = get_label (group_words);
for (int j= 0; j < N (group_words); j++) {
int l_N= N (l);
for (int i= 0; i < l_N; i++) {
tree group_words = l[i];
string group = get_label (group_words);
int group_words_N= N (group_words);
for (int j= 0; j < group_words_N; j++) {
string word= get_label (group_words[j]);
put (word, group);
// number->string is actually number-<gtr>string
put (utf8_to_cork (word), group);
}
}
}
2 changes: 2 additions & 0 deletions src/Data/Parser/keyword_parser.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -35,11 +35,13 @@ class keyword_parser_rep : public parser_rep {
}

void use_keywords_of_lang (string lang_code);
void insert_extra_char (char extra_char);

private:
void do_parse (string s, int& pos);
hashmap<string, string> keyword_group;
string current_keyword;
array<char> extra_chars;
};

#endif
45 changes: 28 additions & 17 deletions src/Data/Parser/number_parser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@

number_parser_rep::number_parser_rep ()
: PREFIX_0B ("prefix_0b"), PREFIX_0O ("prefix_0o"), PREFIX_0X ("prefix_0x"),
NO_SUFFIX_WITH_BOX ("no_suffix_with_box"),
PREFIX_HASH ("prefix_#"), NO_SUFFIX_WITH_BOX ("no_suffix_with_box"),
SCIENTIFIC_NOTATION ("sci_notation") {
separator= '\0';
}
Expand Down Expand Up @@ -45,34 +45,38 @@ number_parser_rep::parse_decimal (string s, int& pos) {
}

bool
number_parser_rep::can_parse_prefix_0b (string s, int pos) {
return prefix_0b () && pos + 2 < N (s) && s[pos] == '0' &&
(s[pos + 1] == 'b' || s[pos + 1] == 'B');
number_parser_rep::can_parse_prefix_b (string s, int pos) {
return pos + 2 < N (s) && (s[pos + 1] == 'b' || s[pos + 1] == 'B') &&
((prefix_0b () && s[pos] == '0') || (prefix_hash () && s[pos] == '#'));
}

bool
number_parser_rep::can_parse_prefix_0o (string s, int pos) {
return prefix_0o () && pos + 2 < N (s) && s[pos] == '0' &&
(s[pos + 1] == 'o' || s[pos + 1] == 'O');
number_parser_rep::can_parse_prefix_o (string s, int pos) {
return pos + 2 < N (s) && (s[pos + 1] == 'o' || s[pos + 1] == 'O') &&
((prefix_0o () && s[pos] == '0') || (prefix_hash () && s[pos] == '#'));
}

bool
number_parser_rep::can_parse_prefix_0x (string s, int pos) {
return prefix_0x () && pos + 2 < N (s) && s[pos] == '0' &&
(s[pos + 1] == 'x' || s[pos + 1] == 'X');
number_parser_rep::can_parse_prefix_x (string s, int pos) {
return pos + 2 < N (s) && (s[pos + 1] == 'x' || s[pos + 1] == 'X') &&
((prefix_0x () && s[pos] == '0') || (prefix_hash () && s[pos] == '#'));
}

bool
number_parser_rep::can_parse (string s, int pos) {
// check on len >= 3
if (pos + 2 < N (s)) {
if (can_parse_prefix_0b (s, pos) || can_parse_prefix_0x (s, pos) ||
can_parse_prefix_0o (s, pos))
if (can_parse_prefix_b (s, pos) || can_parse_prefix_x (s, pos) ||
can_parse_prefix_o (s, pos))
return true;
}
// check on len >= 2
if (pos + 1 < N (s)) {
if (s[pos] == '.' && is_digit (s[pos + 1])) return true;
// for #t and #f
if (prefix_hash () && s[pos] == '#' &&
(s[pos + 1] == 't' || s[pos + 1] == 'f'))
return true;
}
// finally, check on len >= 1
return pos < N (s) && is_digit (s[pos]);
Expand All @@ -82,22 +86,29 @@ void
number_parser_rep::do_parse (string s, int& pos) {
if (pos >= N (s)) return;

if (!is_digit (s[pos]) &&
if (!is_digit (s[pos]) && !(prefix_hash () && s[pos] == '#') &&
!(s[pos] == '.' && pos + 1 < N (s) && is_digit (s[pos + 1])))
return;

// Start with 0b, 0o, 0x
if (can_parse_prefix_0b (s, pos)) {
// for #t and #f
if (prefix_hash () && pos + 1 < N (s) && s[pos] == '#' &&
(s[pos + 1] == 't' || s[pos + 1] == 'f')) {
pos+= 2;
return;
}

// Start with 0b, 0o, 0x, #b, #o, #x
if (can_parse_prefix_b (s, pos)) {
pos+= 2;
parse_binary (s, pos);
if (no_suffix_with_box ()) return;
}
if (can_parse_prefix_0o (s, pos)) {
if (can_parse_prefix_o (s, pos)) {
pos+= 2;
parse_octal (s, pos);
if (no_suffix_with_box ()) return;
}
if (can_parse_prefix_0x (s, pos)) {
if (can_parse_prefix_x (s, pos)) {
pos+= 2;
parse_hex (s, pos);
if (no_suffix_with_box ()) return;
Expand Down
13 changes: 10 additions & 3 deletions src/Data/Parser/number_parser.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ class number_parser_rep : public parser_rep {
string PREFIX_0B;
string PREFIX_0O;
string PREFIX_0X;
string PREFIX_HASH;
string NO_SUFFIX_WITH_BOX;
string SCIENTIFIC_NOTATION;

Expand Down Expand Up @@ -56,6 +57,12 @@ class number_parser_rep : public parser_rep {
else remove_bool_feature (PREFIX_0X);
}

inline bool prefix_hash () { return bool_features->contains (PREFIX_HASH); }
inline void support_prefix_hash (bool param) {
if (param) insert_bool_feature (PREFIX_HASH);
else remove_bool_feature (PREFIX_HASH);
}

inline bool no_suffix_with_box () {
return bool_features->contains (NO_SUFFIX_WITH_BOX);
}
Expand Down Expand Up @@ -92,9 +99,9 @@ class number_parser_rep : public parser_rep {

void do_parse (string s, int& pos);

bool can_parse_prefix_0b (string s, int pos);
bool can_parse_prefix_0o (string s, int pos);
bool can_parse_prefix_0x (string s, int pos);
bool can_parse_prefix_b (string s, int pos);
bool can_parse_prefix_o (string s, int pos);
bool can_parse_prefix_x (string s, int pos);

void parse_binary (string s, int& pos);
void parse_hex (string s, int& pos);
Expand Down
Loading

0 comments on commit 511f393

Please sign in to comment.