From bee295616a220dba57c4998cc20a32e2ad6b1315 Mon Sep 17 00:00:00 2001 From: Thomas Pelletier Date: Tue, 28 Mar 2023 01:27:01 +0000 Subject: [PATCH 1/5] Add printer for comments example test --- unstable/parser.go | 4 +-- unstable/parser_test.go | 70 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 72 insertions(+), 2 deletions(-) diff --git a/unstable/parser.go b/unstable/parser.go index 571f4e95..de260adb 100644 --- a/unstable/parser.go +++ b/unstable/parser.go @@ -49,8 +49,6 @@ func NewParserError(highlight []byte, format string, args ...interface{}) error // For performance reasons, go-toml doesn't make a copy of the input bytes to // the parser. Make sure to copy all the bytes you need to outlive the slice // given to the parser. -// -// The parser doesn't provide nodes for comments yet, nor for whitespace. type Parser struct { data []byte builder builder @@ -58,6 +56,8 @@ type Parser struct { left []byte err error first bool + + KeepComments bool } // Data returns the slice provided to the last call to Reset. diff --git a/unstable/parser_test.go b/unstable/parser_test.go index 5e201cda..186564bd 100644 --- a/unstable/parser_test.go +++ b/unstable/parser_test.go @@ -448,6 +448,76 @@ func TestParser_AST_DateTimes(t *testing.T) { } } +func ExampleParserAllComments() { + doc := `# Top of the document comment. +# Optional, any amount of lines. + +# Above table. +[table] # Next to table. +# Above simple value. +key = "value" # Next to simple value. +# Below simple value. + +# Some comment alone. + +# Multiple comments, on multiple lines. + +# Above inline table. +name = { first = "Tom", last = "Preston-Werner" } # Next to inline table. +# Below inline table. + +# Above array. +array = [ 1, 2, 3 ] # Next to one-line array. +# Below array. + +# Above multi-line array. +key5 = [ # Next to start of inline array. + # Second line before array content. + 1, # Next to first element. + # After first element. + 2, + 3, # Next to last element + # After last element. +] # Next to end of array. +# Below multi-line array. + +# Before array table. +[[products]] # Next to array table. +# After array table. +` + + p := &Parser{} + p.Reset([]byte(doc)) + printTree(p) + + // Output: + // yo +} + +func printIndentf(i int, format string, args ...interface{}) { + fmt.Printf("%s%s", strings.Repeat(" ", i), fmt.Sprintf(format, args...)) +} + +func printGeneric(indent int, e *Node) { + if e == nil { + return + } + printIndentf(indent, "%s [%s]\n", e.Kind, e.Data) + printGeneric(indent+1, e.Child()) + printGeneric(indent, e.Next()) +} + +func printTree(p *Parser) { + for p.NextExpression() { + e := p.Expression() + fmt.Println("---") + printGeneric(0, e) + } + if err := p.Error(); err != nil { + panic(err) + } +} + func ExampleParser() { doc := ` hello = "world" From 28d4ed8259b0e79e119d47c585705d8c7ebabf53 Mon Sep 17 00:00:00 2001 From: Thomas Pelletier Date: Tue, 28 Mar 2023 19:16:59 +0000 Subject: [PATCH 2/5] Example of AST with comments --- unstable/parser.go | 85 ++++++++++++++++++++++++++++++++--------- unstable/parser_test.go | 85 +++++++++++++++++++++++++++++++++++++++-- unstable/scanner.go | 1 - 3 files changed, 150 insertions(+), 21 deletions(-) diff --git a/unstable/parser.go b/unstable/parser.go index de260adb..d315391f 100644 --- a/unstable/parser.go +++ b/unstable/parser.go @@ -155,6 +155,19 @@ func (p *Parser) parseNewline(b []byte) ([]byte, error) { return nil, NewParserError(b[0:1], "expected newline but got %#U", b[0]) } +func (p *Parser) parseComment(b []byte) (reference, []byte, error) { + ref := invalidReference + data, rest, err := scanComment(b) + if p.KeepComments && err == nil { + ref = p.builder.Push(Node{ + Kind: Comment, + Raw: p.Range(data), + Data: data, + }) + } + return ref, rest, err +} + func (p *Parser) parseExpression(b []byte) (reference, []byte, error) { // expression = ws [ comment ] // expression =/ ws keyval ws [ comment ] @@ -168,7 +181,7 @@ func (p *Parser) parseExpression(b []byte) (reference, []byte, error) { } if b[0] == '#' { - _, rest, err := scanComment(b) + ref, rest, err := p.parseComment(b) return ref, rest, err } @@ -190,7 +203,8 @@ func (p *Parser) parseExpression(b []byte) (reference, []byte, error) { b = p.parseWhitespace(b) if len(b) > 0 && b[0] == '#' { - _, rest, err := scanComment(b) + cref, rest, err := p.parseComment(b) + p.builder.Chain(ref, cref) return ref, rest, err } @@ -471,17 +485,33 @@ func (p *Parser) parseValArray(b []byte) (reference, []byte, error) { Kind: Array, }) + // First indicates whether the parser is looking for the first element + // (non-comment) of the array. first := true - var lastChild reference + lastChild := invalidReference + + addChild := func(valueRef reference) { + if lastChild == invalidReference { + p.builder.AttachChild(parent, valueRef) + } else { + p.builder.Chain(lastChild, valueRef) + } + lastChild = valueRef + } var err error for len(b) > 0 { - b, err = p.parseOptionalWhitespaceCommentNewline(b) + cref := invalidReference + cref, b, err = p.parseOptionalWhitespaceCommentNewline(b) if err != nil { return parent, nil, err } + if cref != invalidReference { + addChild(cref) + } + if len(b) == 0 { return parent, nil, NewParserError(arrayStart[:1], "array is incomplete") } @@ -496,10 +526,13 @@ func (p *Parser) parseValArray(b []byte) (reference, []byte, error) { } b = b[1:] - b, err = p.parseOptionalWhitespaceCommentNewline(b) + cref, b, err = p.parseOptionalWhitespaceCommentNewline(b) if err != nil { return parent, nil, err } + if cref != invalidReference { + addChild(cref) + } } else if !first { return parent, nil, NewParserError(b[0:1], "array elements must be separated by commas") } @@ -515,17 +548,16 @@ func (p *Parser) parseValArray(b []byte) (reference, []byte, error) { return parent, nil, err } - if first { - p.builder.AttachChild(parent, valueRef) - } else { - p.builder.Chain(lastChild, valueRef) - } - lastChild = valueRef + addChild(valueRef) - b, err = p.parseOptionalWhitespaceCommentNewline(b) + cref, b, err = p.parseOptionalWhitespaceCommentNewline(b) if err != nil { return parent, nil, err } + if cref != invalidReference { + addChild(cref) + } + first = false } @@ -534,15 +566,34 @@ func (p *Parser) parseValArray(b []byte) (reference, []byte, error) { return parent, rest, err } -func (p *Parser) parseOptionalWhitespaceCommentNewline(b []byte) ([]byte, error) { +func (p *Parser) parseOptionalWhitespaceCommentNewline(b []byte) (reference, []byte, error) { + rootCommentRef := invalidReference + latestCommentRef := invalidReference + + addComment := func(ref reference) { + if rootCommentRef == invalidReference { + rootCommentRef = ref + } else if latestCommentRef == invalidReference { + p.builder.AttachChild(rootCommentRef, ref) + latestCommentRef = ref + } else { + p.builder.Chain(latestCommentRef, ref) + latestCommentRef = ref + } + } + for len(b) > 0 { var err error b = p.parseWhitespace(b) if len(b) > 0 && b[0] == '#' { - _, b, err = scanComment(b) + var ref reference + ref, b, err = p.parseComment(b) if err != nil { - return nil, err + return invalidReference, nil, err + } + if ref != invalidReference { + addComment(ref) } } @@ -553,14 +604,14 @@ func (p *Parser) parseOptionalWhitespaceCommentNewline(b []byte) ([]byte, error) if b[0] == '\n' || b[0] == '\r' { b, err = p.parseNewline(b) if err != nil { - return nil, err + return invalidReference, nil, err } } else { break } } - return b, nil + return rootCommentRef, b, nil } func (p *Parser) parseMultilineLiteralString(b []byte) ([]byte, []byte, []byte, error) { diff --git a/unstable/parser_test.go b/unstable/parser_test.go index 186564bd..414a12b6 100644 --- a/unstable/parser_test.go +++ b/unstable/parser_test.go @@ -448,7 +448,7 @@ func TestParser_AST_DateTimes(t *testing.T) { } } -func ExampleParserAllComments() { +func ExampleParser_comments() { doc := `# Top of the document comment. # Optional, any amount of lines. @@ -475,6 +475,7 @@ key5 = [ # Next to start of inline array. # Second line before array content. 1, # Next to first element. # After first element. + # Before second element. 2, 3, # Next to last element # After last element. @@ -486,12 +487,90 @@ key5 = [ # Next to start of inline array. # After array table. ` - p := &Parser{} + p := &Parser{ + KeepComments: true, + } p.Reset([]byte(doc)) printTree(p) // Output: - // yo + // --- + // Comment [# Top of the document comment.] + // --- + // Comment [# Optional, any amount of lines.] + // --- + // Comment [# Above table.] + // --- + // Table [] + // Key [table] + // Comment [# Next to table.] + // --- + // Comment [# Above simple value.] + // --- + // KeyValue [] + // String [value] + // Key [key] + // Comment [# Next to simple value.] + // --- + // Comment [# Below simple value.] + // --- + // Comment [# Some comment alone.] + // --- + // Comment [# Multiple comments, on multiple lines.] + // --- + // Comment [# Above inline table.] + // --- + // KeyValue [] + // InlineTable [] + // KeyValue [] + // String [Tom] + // Key [first] + // KeyValue [] + // String [Preston-Werner] + // Key [last] + // Key [name] + // Comment [# Next to inline table.] + // --- + // Comment [# Below inline table.] + // --- + // Comment [# Above array.] + // --- + // KeyValue [] + // Array [] + // Integer [1] + // Integer [2] + // Integer [3] + // Key [array] + // Comment [# Next to one-line array.] + // --- + // Comment [# Below array.] + // --- + // Comment [# Above multi-line array.] + // --- + // KeyValue [] + // Array [] + // Comment [# Next to start of inline array.] + // Comment [# Second line before array content.] + // Integer [1] + // Comment [# Next to first element.] + // Comment [# After first element.] + // Comment [# Before second element.] + // Integer [2] + // Integer [3] + // Comment [# Next to last element] + // Comment [# After last element.] + // Key [key5] + // Comment [# Next to end of array.] + // --- + // Comment [# Below multi-line array.] + // --- + // Comment [# Before array table.] + // --- + // ArrayTable [] + // Key [products] + // Comment [# Next to array table.] + // --- + // Comment [# After array table.] } func printIndentf(i int, format string, args ...interface{}) { diff --git a/unstable/scanner.go b/unstable/scanner.go index af22ebbe..0512181d 100644 --- a/unstable/scanner.go +++ b/unstable/scanner.go @@ -151,7 +151,6 @@ func scanWhitespace(b []byte) ([]byte, []byte) { return b, b[len(b):] } -//nolint:unparam func scanComment(b []byte) ([]byte, []byte, error) { // comment-start-symbol = %x23 ; # // non-ascii = %x80-D7FF / %xE000-10FFFF From b167cf5acb99907250e4e6ce951cd3c17be13ab2 Mon Sep 17 00:00:00 2001 From: Thomas Pelletier Date: Tue, 28 Mar 2023 19:41:09 +0000 Subject: [PATCH 3/5] Document and make example self-contained --- unstable/parser_test.go | 54 +++++++++++++++++++++++------------------ 1 file changed, 30 insertions(+), 24 deletions(-) diff --git a/unstable/parser_test.go b/unstable/parser_test.go index 414a12b6..caa5671f 100644 --- a/unstable/parser_test.go +++ b/unstable/parser_test.go @@ -448,6 +448,15 @@ func TestParser_AST_DateTimes(t *testing.T) { } } +// This example demonstrates how to parse a TOML document and preserving +// comments. Comments are stored in the AST as Comment nodes. This example +// displays the structure of the full AST generated by the parser using the +// following structure: +// +// 1. Each root-level expression is separated by three dashes. +// 2. Bytes associated to a node are displayed in square brackets. +// 3. Siblings have the same indentation. +// 4. Children of a node are indented one level. func ExampleParser_comments() { doc := `# Top of the document comment. # Optional, any amount of lines. @@ -487,6 +496,27 @@ key5 = [ # Next to start of inline array. # After array table. ` + var printGeneric func(indent int, e *Node) + printGeneric = func(indent int, e *Node) { + if e == nil { + return + } + fmt.Printf("%s%s", strings.Repeat(" ", indent), fmt.Sprintf("%s [%s]\n", e.Kind, e.Data)) + printGeneric(indent+1, e.Child()) + printGeneric(indent, e.Next()) + } + + printTree := func(p *Parser) { + for p.NextExpression() { + e := p.Expression() + fmt.Println("---") + printGeneric(0, e) + } + if err := p.Error(); err != nil { + panic(err) + } + } + p := &Parser{ KeepComments: true, } @@ -573,30 +603,6 @@ key5 = [ # Next to start of inline array. // Comment [# After array table.] } -func printIndentf(i int, format string, args ...interface{}) { - fmt.Printf("%s%s", strings.Repeat(" ", i), fmt.Sprintf(format, args...)) -} - -func printGeneric(indent int, e *Node) { - if e == nil { - return - } - printIndentf(indent, "%s [%s]\n", e.Kind, e.Data) - printGeneric(indent+1, e.Child()) - printGeneric(indent, e.Next()) -} - -func printTree(p *Parser) { - for p.NextExpression() { - e := p.Expression() - fmt.Println("---") - printGeneric(0, e) - } - if err := p.Error(); err != nil { - panic(err) - } -} - func ExampleParser() { doc := ` hello = "world" From 5274f6f2af8a4d8b3f74568665a410ddefd6dc19 Mon Sep 17 00:00:00 2001 From: Thomas Pelletier Date: Tue, 28 Mar 2023 20:21:52 +0000 Subject: [PATCH 4/5] Expose and print position in document --- unstable/parser.go | 38 ++++++++++++ unstable/parser_test.go | 126 ++++++++++++++++++++-------------------- 2 files changed, 102 insertions(+), 62 deletions(-) diff --git a/unstable/parser.go b/unstable/parser.go index d315391f..6202cfe3 100644 --- a/unstable/parser.go +++ b/unstable/parser.go @@ -142,6 +142,44 @@ func (p *Parser) Error() error { return p.err } +// Position describes a position in the input. +type Position struct { + // Number of bytes from the beginning of the input. + Offset int + // Line number, starting at 1. + Line int + // Column number, starting at 1. + Column int +} + +// Shape describes the position of a range in the input. +type Shape struct { + Start Position + End Position +} + +func (p *Parser) position(b []byte) Position { + offset := danger.SubsliceOffset(p.data, b) + + lead := p.data[:offset] + + return Position{ + Offset: offset, + Line: bytes.Count(lead, []byte{'\n'}) + 1, + Column: len(lead) - bytes.LastIndex(lead, []byte{'\n'}), + } +} + +// Shape returns the shape of the given range in the input. Will +// panic if the range is not a subslice of the input. +func (p *Parser) Shape(r Range) Shape { + raw := p.Raw(r) + return Shape{ + Start: p.position(raw), + End: p.position(raw[r.Length:]), + } +} + func (p *Parser) parseNewline(b []byte) ([]byte, error) { if b[0] == '\n' { return b[1:], nil diff --git a/unstable/parser_test.go b/unstable/parser_test.go index caa5671f..42e62928 100644 --- a/unstable/parser_test.go +++ b/unstable/parser_test.go @@ -496,21 +496,23 @@ key5 = [ # Next to start of inline array. # After array table. ` - var printGeneric func(indent int, e *Node) - printGeneric = func(indent int, e *Node) { + var printGeneric func(*Parser, int, *Node) + printGeneric = func(p *Parser, indent int, e *Node) { if e == nil { return } - fmt.Printf("%s%s", strings.Repeat(" ", indent), fmt.Sprintf("%s [%s]\n", e.Kind, e.Data)) - printGeneric(indent+1, e.Child()) - printGeneric(indent, e.Next()) + s := p.Shape(e.Raw) + x := fmt.Sprintf("%d:%d->%d:%d (%d->%d)", s.Start.Line, s.Start.Column, s.End.Line, s.End.Column, s.Start.Offset, s.End.Offset) + fmt.Printf("%-25s | %s%s [%s]\n", x, strings.Repeat(" ", indent), e.Kind, e.Data) + printGeneric(p, indent+1, e.Child()) + printGeneric(p, indent, e.Next()) } printTree := func(p *Parser) { for p.NextExpression() { e := p.Expression() fmt.Println("---") - printGeneric(0, e) + printGeneric(p, 0, e) } if err := p.Error(); err != nil { panic(err) @@ -525,82 +527,82 @@ key5 = [ # Next to start of inline array. // Output: // --- - // Comment [# Top of the document comment.] + // 1:1->1:31 (0->30) | Comment [# Top of the document comment.] // --- - // Comment [# Optional, any amount of lines.] + // 2:1->2:33 (31->63) | Comment [# Optional, any amount of lines.] // --- - // Comment [# Above table.] + // 4:1->4:15 (65->79) | Comment [# Above table.] // --- - // Table [] - // Key [table] - // Comment [# Next to table.] + // 1:1->1:1 (0->0) | Table [] + // 5:2->5:7 (81->86) | Key [table] + // 5:9->5:25 (88->104) | Comment [# Next to table.] // --- - // Comment [# Above simple value.] + // 6:1->6:22 (105->126) | Comment [# Above simple value.] // --- - // KeyValue [] - // String [value] - // Key [key] - // Comment [# Next to simple value.] + // 1:1->1:1 (0->0) | KeyValue [] + // 7:7->7:14 (133->140) | String [value] + // 7:1->7:4 (127->130) | Key [key] + // 7:15->7:38 (141->164) | Comment [# Next to simple value.] // --- - // Comment [# Below simple value.] + // 8:1->8:22 (165->186) | Comment [# Below simple value.] // --- - // Comment [# Some comment alone.] + // 10:1->10:22 (188->209) | Comment [# Some comment alone.] // --- - // Comment [# Multiple comments, on multiple lines.] + // 12:1->12:40 (211->250) | Comment [# Multiple comments, on multiple lines.] // --- - // Comment [# Above inline table.] + // 14:1->14:22 (252->273) | Comment [# Above inline table.] // --- - // KeyValue [] - // InlineTable [] - // KeyValue [] - // String [Tom] - // Key [first] - // KeyValue [] - // String [Preston-Werner] - // Key [last] - // Key [name] - // Comment [# Next to inline table.] + // 1:1->1:1 (0->0) | KeyValue [] + // 15:8->15:9 (281->282) | InlineTable [] + // 1:1->1:1 (0->0) | KeyValue [] + // 15:18->15:23 (291->296) | String [Tom] + // 15:10->15:15 (283->288) | Key [first] + // 1:1->1:1 (0->0) | KeyValue [] + // 15:32->15:48 (305->321) | String [Preston-Werner] + // 15:25->15:29 (298->302) | Key [last] + // 15:1->15:5 (274->278) | Key [name] + // 15:51->15:74 (324->347) | Comment [# Next to inline table.] // --- - // Comment [# Below inline table.] + // 16:1->16:22 (348->369) | Comment [# Below inline table.] // --- - // Comment [# Above array.] + // 18:1->18:15 (371->385) | Comment [# Above array.] // --- - // KeyValue [] - // Array [] - // Integer [1] - // Integer [2] - // Integer [3] - // Key [array] - // Comment [# Next to one-line array.] + // 1:1->1:1 (0->0) | KeyValue [] + // 1:1->1:1 (0->0) | Array [] + // 1:1->1:1 (0->0) | Integer [1] + // 1:1->1:1 (0->0) | Integer [2] + // 1:1->1:1 (0->0) | Integer [3] + // 19:1->19:6 (386->391) | Key [array] + // 19:21->19:46 (406->431) | Comment [# Next to one-line array.] // --- - // Comment [# Below array.] + // 20:1->20:15 (432->446) | Comment [# Below array.] // --- - // Comment [# Above multi-line array.] + // 22:1->22:26 (448->473) | Comment [# Above multi-line array.] // --- - // KeyValue [] - // Array [] - // Comment [# Next to start of inline array.] - // Comment [# Second line before array content.] - // Integer [1] - // Comment [# Next to first element.] - // Comment [# After first element.] - // Comment [# Before second element.] - // Integer [2] - // Integer [3] - // Comment [# Next to last element] - // Comment [# After last element.] - // Key [key5] - // Comment [# Next to end of array.] + // 1:1->1:1 (0->0) | KeyValue [] + // 1:1->1:1 (0->0) | Array [] + // 23:10->23:42 (483->515) | Comment [# Next to start of inline array.] + // 24:3->24:38 (518->553) | Comment [# Second line before array content.] + // 1:1->1:1 (0->0) | Integer [1] + // 25:6->25:30 (559->583) | Comment [# Next to first element.] + // 26:3->26:25 (586->608) | Comment [# After first element.] + // 27:3->27:27 (611->635) | Comment [# Before second element.] + // 1:1->1:1 (0->0) | Integer [2] + // 1:1->1:1 (0->0) | Integer [3] + // 29:6->29:28 (646->668) | Comment [# Next to last element] + // 30:3->30:24 (671->692) | Comment [# After last element.] + // 23:1->23:5 (474->478) | Key [key5] + // 31:3->31:26 (695->718) | Comment [# Next to end of array.] // --- - // Comment [# Below multi-line array.] + // 32:1->32:26 (719->744) | Comment [# Below multi-line array.] // --- - // Comment [# Before array table.] + // 34:1->34:22 (746->767) | Comment [# Before array table.] // --- - // ArrayTable [] - // Key [products] - // Comment [# Next to array table.] + // 1:1->1:1 (0->0) | ArrayTable [] + // 35:3->35:11 (770->778) | Key [products] + // 35:14->35:36 (781->803) | Comment [# Next to array table.] // --- - // Comment [# After array table.] + // 36:1->36:21 (804->824) | Comment [# After array table.] } func ExampleParser() { From ad9ccbebceeea757b9140b3fca300247bee32be4 Mon Sep 17 00:00:00 2001 From: Thomas Pelletier Date: Wed, 29 Mar 2023 17:35:32 +0000 Subject: [PATCH 5/5] Don't chain invalid node --- unstable/parser.go | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/unstable/parser.go b/unstable/parser.go index 6202cfe3..a8eb0529 100644 --- a/unstable/parser.go +++ b/unstable/parser.go @@ -242,7 +242,9 @@ func (p *Parser) parseExpression(b []byte) (reference, []byte, error) { if len(b) > 0 && b[0] == '#' { cref, rest, err := p.parseComment(b) - p.builder.Chain(ref, cref) + if cref != invalidReference { + p.builder.Chain(ref, cref) + } return ref, rest, err }