Skip to content

Commit

Permalink
Multiline strings fixes (#643)
Browse files Browse the repository at this point in the history
* scanner: allow multiline strings to end with "" or ''

* parser: trim all whitespaces after \ in multiline
  • Loading branch information
pelletier authored Oct 28, 2021
1 parent c871a61 commit 39f893a
Show file tree
Hide file tree
Showing 4 changed files with 66 additions and 8 deletions.
18 changes: 15 additions & 3 deletions parser.go
Original file line number Diff line number Diff line change
Expand Up @@ -570,13 +570,25 @@ func (p *parser) parseMultilineBasicString(b []byte) ([]byte, []byte, []byte, er
// When the last non-whitespace character on a line is an unescaped \,
// it will be trimmed along with all whitespace (including newlines) up
// to the next non-whitespace character or closing delimiter.
if token[i+1] == '\n' || (token[i+1] == '\r' && token[i+2] == '\n') {
i++ // skip the \

isLastNonWhitespaceOnLine := false
j := 1
findEOLLoop:
for ; j < len(token)-3-i; j++ {
switch token[i+j] {
case ' ', '\t':
continue
case '\n':
isLastNonWhitespaceOnLine = true
}
break findEOLLoop
}
if isLastNonWhitespaceOnLine {
i += j
for ; i < len(token)-3; i++ {
c := token[i]
if !(c == '\n' || c == '\r' || c == ' ' || c == '\t') {
i--

break
}
}
Expand Down
50 changes: 47 additions & 3 deletions scanner.go
Original file line number Diff line number Diff line change
Expand Up @@ -76,8 +76,30 @@ func scanMultilineLiteralString(b []byte) ([]byte, []byte, error) {
// mll-char = %x09 / %x20-26 / %x28-7E / non-ascii
// mll-quotes = 1*2apostrophe
for i := 3; i < len(b); {
if b[i] == '\'' && scanFollowsMultilineLiteralStringDelimiter(b[i:]) {
return b[:i+3], b[i+3:], nil
if scanFollowsMultilineLiteralStringDelimiter(b[i:]) {
i += 3

// At that point we found 3 apostrophe, and i is the
// index of the byte after the third one. The scanner
// needs to be eager, because there can be an extra 2
// apostrophe that can be accepted at the end of the
// string.

if i >= len(b) || b[i] != '\'' {
return b[:i], b[i:], nil
}
i++

if i >= len(b) || b[i] != '\'' {
return b[:i], b[i:], nil
}
i++

if i < len(b) && b[i] == '\'' {
return nil, nil, newDecodeError(b[i-3:i+1], "''' not allowed in multiline literal string")
}

return b[:i], b[i:], nil
}
size := utf8ValidNext(b[i:])
if size == 0 {
Expand Down Expand Up @@ -201,7 +223,29 @@ loop:
switch b[i] {
case '"':
if scanFollowsMultilineBasicStringDelimiter(b[i:]) {
return b[:i+3], escaped, b[i+3:], nil
i += 3

// At that point we found 3 apostrophe, and i is the
// index of the byte after the third one. The scanner
// needs to be eager, because there can be an extra 2
// apostrophe that can be accepted at the end of the
// string.

if i >= len(b) || b[i] != '"' {
return b[:i], escaped, b[i:], nil
}
i++

if i >= len(b) || b[i] != '"' {
return b[:i], escaped, b[i:], nil
}
i++

if i < len(b) && b[i] == '"' {
return nil, escaped, nil, newDecodeError(b[i-3:i+1], `""" not allowed in multiline basic string`)
}

return b[:i], escaped, b[i:], nil
}
case '\\':
if len(b) < i+2 {
Expand Down
4 changes: 4 additions & 0 deletions toml_testgen_support_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ import (
"encoding/json"
"testing"

"github.com/pelletier/go-toml/v2"
"github.com/pelletier/go-toml/v2/testsuite"
"github.com/stretchr/testify/require"
)
Expand Down Expand Up @@ -37,6 +38,9 @@ func testgenValid(t *testing.T, input string, jsonRef string) {

err := testsuite.Unmarshal([]byte(input), &doc)
if err != nil {
if de, ok := err.(*toml.DecodeError); ok {
t.Logf("%s\n%s", err, de)
}
t.Fatalf("failed parsing toml: %s", err)
}
j, err := testsuite.ValueToTaggedJSON(doc)
Expand Down
2 changes: 0 additions & 2 deletions toml_testgen_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1379,14 +1379,12 @@ func TestTOMLTest_Valid_String_Escapes(t *testing.T) {
}

func TestTOMLTest_Valid_String_MultilineQuotes(t *testing.T) {
t.Skip("FIXME")
input := "# Make sure that quotes inside multiline strings are allowed, including right\n# after the opening '''/\"\"\" and before the closing '''/\"\"\"\n\nlit_one = ''''one quote''''\nlit_two = '''''two quotes'''''\nlit_one_space = ''' 'one quote' '''\nlit_two_space = ''' ''two quotes'' '''\n\none = \"\"\"\"one quote\"\"\"\"\ntwo = \"\"\"\"\"two quotes\"\"\"\"\"\none_space = \"\"\" \"one quote\" \"\"\"\ntwo_space = \"\"\" \"\"two quotes\"\" \"\"\"\n\nmismatch1 = \"\"\"aaa'''bbb\"\"\"\nmismatch2 = '''aaa\"\"\"bbb'''\n"
jsonRef := "{\n \"lit_one\": {\n \"type\": \"string\",\n \"value\": \"'one quote'\"\n },\n \"lit_one_space\": {\n \"type\": \"string\",\n \"value\": \" 'one quote' \"\n },\n \"lit_two\": {\n \"type\": \"string\",\n \"value\": \"''two quotes''\"\n },\n \"lit_two_space\": {\n \"type\": \"string\",\n \"value\": \" ''two quotes'' \"\n },\n \"mismatch1\": {\n \"type\": \"string\",\n \"value\": \"aaa'''bbb\"\n },\n \"mismatch2\": {\n \"type\": \"string\",\n \"value\": \"aaa\\\"\\\"\\\"bbb\"\n },\n \"one\": {\n \"type\": \"string\",\n \"value\": \"\\\"one quote\\\"\"\n },\n \"one_space\": {\n \"type\": \"string\",\n \"value\": \" \\\"one quote\\\" \"\n },\n \"two\": {\n \"type\": \"string\",\n \"value\": \"\\\"\\\"two quotes\\\"\\\"\"\n },\n \"two_space\": {\n \"type\": \"string\",\n \"value\": \" \\\"\\\"two quotes\\\"\\\" \"\n }\n}\n"
testgenValid(t, input, jsonRef)
}

func TestTOMLTest_Valid_String_Multiline(t *testing.T) {
t.Skip("FIXME")
input := "# NOTE: this file includes some literal tab characters.\n\nmultiline_empty_one = \"\"\"\"\"\"\nmultiline_empty_two = \"\"\"\n\"\"\"\nmultiline_empty_three = \"\"\"\\\n \"\"\"\nmultiline_empty_four = \"\"\"\\\n \\\n \\ \n \"\"\"\n\nequivalent_one = \"The quick brown fox jumps over the lazy dog.\"\nequivalent_two = \"\"\"\nThe quick brown \\\n\n\n fox jumps over \\\n the lazy dog.\"\"\"\n\nequivalent_three = \"\"\"\\\n The quick brown \\\n fox jumps over \\\n the lazy dog.\\\n \"\"\"\n\nwhitespace-after-bs = \"\"\"\\\n The quick brown \\\n fox jumps over \\ \n the lazy dog.\\\t\n \"\"\"\n\nno-space = \"\"\"a\\\n b\"\"\"\n\nkeep-ws-before = \"\"\"a \t\\\n b\"\"\"\n\nescape-bs-1 = \"\"\"a \\\\\nb\"\"\"\n\nescape-bs-2 = \"\"\"a \\\\\\\nb\"\"\"\n\nescape-bs-3 = \"\"\"a \\\\\\\\\n b\"\"\"\n"
jsonRef := "{\n \"equivalent_one\": {\n \"type\": \"string\",\n \"value\": \"The quick brown fox jumps over the lazy dog.\"\n },\n \"equivalent_three\": {\n \"type\": \"string\",\n \"value\": \"The quick brown fox jumps over the lazy dog.\"\n },\n \"equivalent_two\": {\n \"type\": \"string\",\n \"value\": \"The quick brown fox jumps over the lazy dog.\"\n },\n \"escape-bs-1\": {\n \"type\": \"string\",\n \"value\": \"a \\\\\\nb\"\n },\n \"escape-bs-2\": {\n \"type\": \"string\",\n \"value\": \"a \\\\b\"\n },\n \"escape-bs-3\": {\n \"type\": \"string\",\n \"value\": \"a \\\\\\\\\\n b\"\n },\n \"keep-ws-before\": {\n \"type\": \"string\",\n \"value\": \"a \\tb\"\n },\n \"multiline_empty_four\": {\n \"type\": \"string\",\n \"value\": \"\"\n },\n \"multiline_empty_one\": {\n \"type\": \"string\",\n \"value\": \"\"\n },\n \"multiline_empty_three\": {\n \"type\": \"string\",\n \"value\": \"\"\n },\n \"multiline_empty_two\": {\n \"type\": \"string\",\n \"value\": \"\"\n },\n \"no-space\": {\n \"type\": \"string\",\n \"value\": \"ab\"\n },\n \"whitespace-after-bs\": {\n \"type\": \"string\",\n \"value\": \"The quick brown fox jumps over the lazy dog.\"\n }\n}\n"
testgenValid(t, input, jsonRef)
Expand Down

0 comments on commit 39f893a

Please sign in to comment.