Skip to content

Commit

Permalink
chore: split grammar for list into its own file
Browse files Browse the repository at this point in the history
  • Loading branch information
nlopes committed Oct 2, 2024
1 parent 340479c commit fc48ac4
Show file tree
Hide file tree
Showing 6 changed files with 83 additions and 63 deletions.
6 changes: 3 additions & 3 deletions acdc-parser/grammar/asciidoc.pest
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
document = _{
SOI ~
(EMPTY_LINE | comment)* ~
(document_header ~ EMPTY_LINE)? ~
(NEWLINE | comment)* ~
(document_header ~ NEWLINE)? ~
block* ~
(EMPTY_LINE | comment)* ~
(NEWLINE | comment)* ~
EOI
}
54 changes: 4 additions & 50 deletions acdc-parser/grammar/block.pest
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ block = {
}

section = {
section_header_start ~ section_title ~ NEWLINE ~ EMPTY_LINE ~
section_header_start ~ section_title ~ NEWLINE{2} ~
section_content*
}

Expand All @@ -19,56 +19,10 @@ section_content = !{ (!(&section_level | EOI | &delimited_block) ~ ANY)+ }
section_level_symbol = _{ ("=" | "#") }

section_level = { section_level_symbol{2,6} }
section_title = { (!NEWLINE ~ ANY)* }

// Lists
list = {
(anchor | attribute_list | blocktitle)* ~
(
unordered_list |
ordered_list |
description_list
) ~ NEWLINE*
}

list_item = ${
(
list_multiline |
(continuation ~ delimited_block)
)+
}

empty_lines = _{ NEWLINE{2,}+ | (NEWLINE ~ EOI) }

list_title = { "." ~ !("." | SPACE) ~ (!NEWLINE ~ ANY)+ ~ (NEWLINE | EOI) }

list_multiline = ${ ((!(NEWLINE{2,} | EOI | NEWLINE ~ (unordered_level | ordered_level_token)) ~ ANY) | other_list_inline)+ }
other_list_inline = @{ (!empty_lines ~ !EOI ~ !list_inline ~ !(NEWLINE ~ (unordered_level | ordered_level_token )) ~ !(continuation ~ delimited_block) ~ ANY)+ }

continuation = { NEWLINE ~ "+" ~ NEWLINE }

// TODO(nlopes): only support one line for now
unordered_list = { unordered_list_item+ }
unordered_list_item = { unordered_level ~ SPACE ~ list_item ~ (NEWLINE | EOI) }
unordered_level = { ("*"+ | "-"+) }

ordered_list = { ordered_list_item+ }
ordered_list_item = { ordered_level_token ~ SPACE ~ checklist_item? ~ list_item ~ (NEWLINE | EOI) }
ordered_level_token = _{ ordered_level_number? ~ ordered_level }
ordered_level_number = { ASCII_DIGIT+ }
ordered_level = { "."+ }

checklist_item = _{ "[" ~ (checklist_item_checked | checklist_item_unchecked) ~ "]" ~ SPACE }
checklist_item_checked = !{ "X" | "x" | "*" }
checklist_item_unchecked = !{ SPACE }

description_term = { (!"::" ~ (!NEWLINE ~ ANY))+ ~ "::"}
description_list = { (description_term ~ SPACE ~ list_item)+ }


list_inline = { (!NEWLINE ~ ANY)* }
section_title = { ONE_CHAR* }

// Paragraphs
paragraph = {
(!((EMPTY_LINE{2,}) | EOI) ~ ANY)+ ~ NEWLINE*
(anchor | attribute_list | blocktitle)* ~
(!((NEWLINE{2,}) | EOI) ~ ANY)+ ~ NEWLINE*
}
15 changes: 9 additions & 6 deletions acdc-parser/grammar/core.pest
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,10 @@ SPACE = _{ " " }
TAB = _{ "\t" }
WSPACE = _{ SPACE | TAB }
NEWLINE = _{ "\n" }
EMPTY_LINE = _{ WSPACE* ~ NEWLINE }
EMPTY_LINES = _{ NEWLINE{2,}+ | (NEWLINE ~ EOI) }
ONE_CHAR = _{ !NEWLINE ~ ANY }

empty_header = { "" ~ NEWLINE }
comment = { "//" ~ (!NEWLINE ~ ANY)* ~ NEWLINE }
comment = { "//" ~ ONE_CHAR* ~ NEWLINE }

id = @{ id_start_char ~ id_subsequent_char* }

Expand All @@ -25,14 +25,17 @@ inline_anchor = {
}

blocktitle = _{ "." ~ !("." | SPACE) ~ title ~ NEWLINE }
title = { (!NEWLINE ~ ANY)+ }
title = { ONE_CHAR+ }

attribute_list = { inline_attribute_list ~ NEWLINE }
attribute_list = { (role_list | inline_attribute_list) ~ NEWLINE }

inline_attribute_list = _{
"[" ~ (attribute ~ ("," ~ attribute)* )? ~ "]"
"[" ~ (attribute ~ ("," ~ attribute)* )? ~ "]"
}

role_list = { "[" ~ "." ~ role ~ ("." ~ role)* ~ "]" }
role = { attribute_value }

// https://docs.asciidoctor.org/asciidoc/latest/attributes/names-and-values/#user-defined
//
// Although uppercase characters are permitted in an attribute name, the name is converted
Expand Down
8 changes: 4 additions & 4 deletions acdc-parser/grammar/document.pest
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,10 @@ document_attribute = { document_attribute_entry }
// last colon and capture everything up to it as the title and everything after it as the
// subtitle. So I'm just going to ignore it for now and instead do it in the parser.
document_title_token = @{ ("=" | "#") ~ SPACE ~ document_title ~ NEWLINE }
document_title = !{ (!NEWLINE ~ ANY)+ }
document_title = !{ ONE_CHAR+ }

document_attribute_entry = _{ ":" ~ attribute_name ~ ":" ~ (SPACE ~ document_attribute_value)? ~ NEWLINE }
document_attribute_value = !{ (!NEWLINE ~ ANY)* }
document_attribute_value = !{ ONE_CHAR* }

author_revision = _{
(author_line ~ revision_line?)?
Expand Down Expand Up @@ -41,7 +41,7 @@ name_part = _{
}

optional_author_email = _{ WSPACE* ~ "<" ~ author_email ~ ">" }
author_email = { (!">" ~ !NEWLINE ~ ANY)+ }
author_email = { (!">" ~ ONE_CHAR)+ }

revision_line = {
revision_number ~
Expand All @@ -56,4 +56,4 @@ optional_revision_date = _{ ("," ~ WSPACE ~ revision_date )? }
revision_date = { (!NEWLINE ~ !":" ~ ANY)+ }

optional_revision_remark = _{ (":" ~ WSPACE ~ revision_remark)? }
revision_remark = { (!NEWLINE ~ ANY)+ }
revision_remark = { ONE_CHAR+ }
42 changes: 42 additions & 0 deletions acdc-parser/grammar/list.pest
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
// Lists
list = {
(anchor | attribute_list | blocktitle)* ~
(
unordered_list |
ordered_list |
description_list
) ~ NEWLINE*
}

list_item = ${
(
list_multiline |
(continuation ~ delimited_block)
)+
}

list_title = { "." ~ !("." | SPACE) ~ ONE_CHAR+ ~ (NEWLINE | EOI) }
list_multiline = ${ ((!(NEWLINE{2,} | EOI | NEWLINE ~ (unordered_level | ordered_level_token)) ~ ANY) | other_list_inline)+ }
other_list_inline = @{ (!EMPTY_LINES ~ !EOI ~ !list_inline ~ !(NEWLINE ~ (unordered_level | ordered_level_token )) ~ !(continuation ~ delimited_block) ~ ANY)+ }

continuation = { NEWLINE ~ "+" ~ NEWLINE }

// TODO(nlopes): only support one line for now
unordered_list = { unordered_list_item+ }
unordered_list_item = { unordered_level ~ SPACE ~ list_item ~ (NEWLINE | EOI) }
unordered_level = { ("*"+ | "-"+) }

ordered_list = { ordered_list_item+ }
ordered_list_item = { ordered_level_token ~ SPACE ~ checklist_item? ~ list_item ~ (NEWLINE | EOI) }
ordered_level_token = _{ ordered_level_number? ~ ordered_level }
ordered_level_number = { ASCII_DIGIT+ }
ordered_level = { "."+ }

checklist_item = _{ "[" ~ (checklist_item_checked | checklist_item_unchecked) ~ "]" ~ SPACE }
checklist_item_checked = !{ "X" | "x" | "*" }
checklist_item_unchecked = !{ SPACE }

description_term = { (!"::" ~ ONE_CHAR)+ ~ "::"}
description_list = { (description_term ~ SPACE ~ list_item)+ }

list_inline = { ONE_CHAR* }
21 changes: 21 additions & 0 deletions acdc-parser/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ pub struct PestParser;
#[derive(Parser, Debug)]
#[grammar = "../grammar/block.pest"]
#[grammar = "../grammar/core.pest"]
#[grammar = "../grammar/list.pest"]
#[grammar = "../grammar/delimited.pest"]
#[grammar = "../grammar/document.pest"]
#[grammar = "../grammar/asciidoc.pest"]
Expand Down Expand Up @@ -701,6 +702,26 @@ mod tests {
}
}

#[test]
fn test_blah() {
let result = PestParser
.parse(
"[.text-center]
This text is centered, so it must be important.
",
/*
"[[cpu,CPU]]Central Processing Unit (CPU)::
The brain of the computer.
[[hard-drive]]Hard drive::
Permanent storage for operating system and/or user files.",
*/
)
.unwrap();
dbg!(&result);
panic!()
}

#[test]
fn test_mdbasics_adoc() {
// let parser = PestParser;
Expand Down

0 comments on commit fc48ac4

Please sign in to comment.