chore: split grammar for list into its own file

nlopes · Oct 2, 2024 · fc48ac4 · fc48ac4
1 parent 340479c
commit fc48ac4
Show file tree

Hide file tree

Showing 6 changed files with 83 additions and 63 deletions.
diff --git a/acdc-parser/grammar/asciidoc.pest b/acdc-parser/grammar/asciidoc.pest
@@ -1,8 +1,8 @@
 document = _{
   SOI ~
-  (EMPTY_LINE | comment)* ~
-  (document_header ~ EMPTY_LINE)? ~
+  (NEWLINE | comment)* ~
+  (document_header ~ NEWLINE)? ~
   block* ~
-  (EMPTY_LINE | comment)* ~
+  (NEWLINE | comment)* ~
   EOI
 }
diff --git a/acdc-parser/grammar/block.pest b/acdc-parser/grammar/block.pest
@@ -8,7 +8,7 @@ block = {
 }
 
 section = {
-    section_header_start ~ section_title ~ NEWLINE ~ EMPTY_LINE ~
+    section_header_start ~ section_title ~ NEWLINE{2} ~
     section_content*
 }
 
@@ -19,56 +19,10 @@ section_content = !{ (!(&section_level | EOI | &delimited_block) ~ ANY)+ }
 section_level_symbol = _{ ("=" | "#") }
 
 section_level = { section_level_symbol{2,6} }
-section_title = { (!NEWLINE ~ ANY)* }
-
-// Lists
-list = {
-    (anchor | attribute_list | blocktitle)* ~
-    (
-        unordered_list |
-        ordered_list |
-        description_list
-    ) ~ NEWLINE*
-}
-
-list_item = ${
-    (
-        list_multiline |
-        (continuation ~ delimited_block)
-    )+
-}
-
-empty_lines = _{ NEWLINE{2,}+ | (NEWLINE ~ EOI) }
-
-list_title = { "." ~ !("." | SPACE) ~ (!NEWLINE ~ ANY)+ ~ (NEWLINE | EOI) }
-
-list_multiline = ${ ((!(NEWLINE{2,} | EOI | NEWLINE ~ (unordered_level | ordered_level_token))  ~ ANY) | other_list_inline)+ }
-other_list_inline = @{ (!empty_lines ~ !EOI ~ !list_inline ~ !(NEWLINE ~ (unordered_level | ordered_level_token )) ~ !(continuation ~ delimited_block) ~ ANY)+ }
-
-continuation = { NEWLINE ~ "+" ~ NEWLINE }
-
-// TODO(nlopes): only support one line for now
-unordered_list = { unordered_list_item+ }
-unordered_list_item = { unordered_level ~ SPACE ~ list_item ~ (NEWLINE | EOI) }
-unordered_level = { ("*"+ | "-"+) }
-
-ordered_list = { ordered_list_item+ }
-ordered_list_item = { ordered_level_token ~ SPACE ~ checklist_item? ~ list_item ~ (NEWLINE | EOI) }
-ordered_level_token = _{ ordered_level_number? ~ ordered_level }
-ordered_level_number = { ASCII_DIGIT+ }
-ordered_level = { "."+ }
-
-checklist_item = _{ "[" ~ (checklist_item_checked | checklist_item_unchecked) ~ "]" ~ SPACE }
-checklist_item_checked = !{ "X" | "x" | "*" }
-checklist_item_unchecked = !{ SPACE }
-
-description_term = { (!"::" ~ (!NEWLINE ~ ANY))+ ~ "::"}
-description_list = { (description_term ~ SPACE ~ list_item)+ }
-
-
-list_inline = { (!NEWLINE ~ ANY)* }
+section_title = { ONE_CHAR* }
 
 // Paragraphs
 paragraph = {
-    (!((EMPTY_LINE{2,}) | EOI) ~ ANY)+ ~ NEWLINE*
+    (anchor | attribute_list | blocktitle)* ~
+    (!((NEWLINE{2,}) | EOI) ~ ANY)+ ~ NEWLINE*
 }
diff --git a/acdc-parser/grammar/core.pest b/acdc-parser/grammar/core.pest
@@ -2,10 +2,10 @@ SPACE = _{ " " }
 TAB = _{ "\t" }
 WSPACE = _{ SPACE | TAB }
 NEWLINE = _{ "\n" }
-EMPTY_LINE = _{ WSPACE* ~ NEWLINE }
+EMPTY_LINES = _{ NEWLINE{2,}+ | (NEWLINE ~ EOI) }
+ONE_CHAR = _{ !NEWLINE ~ ANY }
 
-empty_header = { "" ~ NEWLINE }
-comment = { "//" ~ (!NEWLINE ~ ANY)* ~ NEWLINE }
+comment = { "//" ~ ONE_CHAR* ~ NEWLINE }
 
 id = @{ id_start_char ~ id_subsequent_char* }
 
@@ -25,14 +25,17 @@ inline_anchor = {
 }
 
 blocktitle = _{ "." ~ !("." | SPACE) ~ title ~ NEWLINE }
-title = { (!NEWLINE ~ ANY)+ }
+title = { ONE_CHAR+ }
 
-attribute_list = { inline_attribute_list ~ NEWLINE }
+attribute_list = { (role_list | inline_attribute_list) ~ NEWLINE }
 
 inline_attribute_list = _{
-  "[" ~ (attribute ~ ("," ~ attribute)* )? ~ "]"
+    "[" ~ (attribute ~ ("," ~ attribute)* )? ~ "]"
 }
 
+role_list = { "[" ~ "." ~ role ~ ("." ~ role)* ~ "]" }
+role = { attribute_value }
+
 // https://docs.asciidoctor.org/asciidoc/latest/attributes/names-and-values/#user-defined
 //
 // Although uppercase characters are permitted in an attribute name, the name is converted

diff --git a/acdc-parser/grammar/document.pest b/acdc-parser/grammar/document.pest
@@ -10,10 +10,10 @@ document_attribute = { document_attribute_entry }
 // last colon and capture everything up to it as the title and everything after it as the
 // subtitle. So I'm just going to ignore it for now and instead do it in the parser.
 document_title_token = @{ ("=" | "#") ~ SPACE ~ document_title ~ NEWLINE }
-document_title = !{ (!NEWLINE ~ ANY)+ }
+document_title = !{ ONE_CHAR+ }
 
 document_attribute_entry = _{ ":" ~ attribute_name ~ ":" ~ (SPACE ~ document_attribute_value)? ~ NEWLINE }
-document_attribute_value = !{ (!NEWLINE ~ ANY)* }
+document_attribute_value = !{ ONE_CHAR* }
 
 author_revision = _{
   (author_line ~ revision_line?)?
@@ -41,7 +41,7 @@ name_part = _{
 }
 
 optional_author_email = _{ WSPACE* ~ "<" ~ author_email ~ ">" }
-author_email = { (!">" ~ !NEWLINE ~ ANY)+ }
+author_email = { (!">" ~ ONE_CHAR)+ }
 
 revision_line = {
   revision_number ~
@@ -56,4 +56,4 @@ optional_revision_date = _{ ("," ~ WSPACE ~ revision_date )? }
 revision_date = { (!NEWLINE ~ !":" ~ ANY)+ }
 
 optional_revision_remark = _{ (":" ~ WSPACE ~ revision_remark)? }
-revision_remark = { (!NEWLINE ~ ANY)+ }
+revision_remark = { ONE_CHAR+ }
diff --git a/acdc-parser/grammar/list.pest b/acdc-parser/grammar/list.pest
@@ -0,0 +1,42 @@
+// Lists
+list = {
+    (anchor | attribute_list | blocktitle)* ~
+    (
+        unordered_list |
+        ordered_list |
+        description_list
+    ) ~ NEWLINE*
+}
+
+list_item = ${
+    (
+        list_multiline |
+        (continuation ~ delimited_block)
+    )+
+}
+
+list_title = { "." ~ !("." | SPACE) ~ ONE_CHAR+ ~ (NEWLINE | EOI) }
+list_multiline = ${ ((!(NEWLINE{2,} | EOI | NEWLINE ~ (unordered_level | ordered_level_token))  ~ ANY) | other_list_inline)+ }
+other_list_inline = @{ (!EMPTY_LINES ~ !EOI ~ !list_inline ~ !(NEWLINE ~ (unordered_level | ordered_level_token )) ~ !(continuation ~ delimited_block) ~ ANY)+ }
+
+continuation = { NEWLINE ~ "+" ~ NEWLINE }
+
+// TODO(nlopes): only support one line for now
+unordered_list = { unordered_list_item+ }
+unordered_list_item = { unordered_level ~ SPACE ~ list_item ~ (NEWLINE | EOI) }
+unordered_level = { ("*"+ | "-"+) }
+
+ordered_list = { ordered_list_item+ }
+ordered_list_item = { ordered_level_token ~ SPACE ~ checklist_item? ~ list_item ~ (NEWLINE | EOI) }
+ordered_level_token = _{ ordered_level_number? ~ ordered_level }
+ordered_level_number = { ASCII_DIGIT+ }
+ordered_level = { "."+ }
+
+checklist_item = _{ "[" ~ (checklist_item_checked | checklist_item_unchecked) ~ "]" ~ SPACE }
+checklist_item_checked = !{ "X" | "x" | "*" }
+checklist_item_unchecked = !{ SPACE }
+
+description_term = { (!"::" ~ ONE_CHAR)+ ~ "::"}
+description_list = { (description_term ~ SPACE ~ list_item)+ }
+
+list_inline = { ONE_CHAR* }
diff --git a/acdc-parser/src/lib.rs b/acdc-parser/src/lib.rs
@@ -20,6 +20,7 @@ pub struct PestParser;
 #[derive(Parser, Debug)]
 #[grammar = "../grammar/block.pest"]
 #[grammar = "../grammar/core.pest"]
+#[grammar = "../grammar/list.pest"]
 #[grammar = "../grammar/delimited.pest"]
 #[grammar = "../grammar/document.pest"]
 #[grammar = "../grammar/asciidoc.pest"]
@@ -701,6 +702,26 @@ mod tests {
         }
     }
 
+    #[test]
+    fn test_blah() {
+        let result = PestParser
+            .parse(
+                "[.text-center]
+This text is centered, so it must be important.
+",
+                /*
+                "[[cpu,CPU]]Central Processing Unit (CPU)::
+                The brain of the computer.
+
+                [[hard-drive]]Hard drive::
+                Permanent storage for operating system and/or user files.",
+                */
+            )
+            .unwrap();
+        dbg!(&result);
+        panic!()
+    }
+
     #[test]
     fn test_mdbasics_adoc() {
         // let parser = PestParser;