Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Comments support in unstable/Parser #860

Merged
merged 5 commits into from
May 19, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
129 changes: 110 additions & 19 deletions unstable/parser.go
Original file line number Diff line number Diff line change
Expand Up @@ -49,15 +49,15 @@ func NewParserError(highlight []byte, format string, args ...interface{}) error
// For performance reasons, go-toml doesn't make a copy of the input bytes to
// the parser. Make sure to copy all the bytes you need to outlive the slice
// given to the parser.
//
// The parser doesn't provide nodes for comments yet, nor for whitespace.
type Parser struct {
data []byte
builder builder
ref reference
left []byte
err error
first bool

KeepComments bool
}

// Data returns the slice provided to the last call to Reset.
Expand Down Expand Up @@ -142,6 +142,44 @@ func (p *Parser) Error() error {
return p.err
}

// Position describes a position in the input.
type Position struct {
// Number of bytes from the beginning of the input.
Offset int
// Line number, starting at 1.
Line int
// Column number, starting at 1.
Column int
}

// Shape describes the position of a range in the input.
type Shape struct {
Start Position
End Position
}

func (p *Parser) position(b []byte) Position {
offset := danger.SubsliceOffset(p.data, b)

lead := p.data[:offset]

return Position{
Offset: offset,
Line: bytes.Count(lead, []byte{'\n'}) + 1,
Column: len(lead) - bytes.LastIndex(lead, []byte{'\n'}),
}
}

// Shape returns the shape of the given range in the input. Will
// panic if the range is not a subslice of the input.
func (p *Parser) Shape(r Range) Shape {
raw := p.Raw(r)
return Shape{
Start: p.position(raw),
End: p.position(raw[r.Length:]),
}
}

func (p *Parser) parseNewline(b []byte) ([]byte, error) {
if b[0] == '\n' {
return b[1:], nil
Expand All @@ -155,6 +193,19 @@ func (p *Parser) parseNewline(b []byte) ([]byte, error) {
return nil, NewParserError(b[0:1], "expected newline but got %#U", b[0])
}

func (p *Parser) parseComment(b []byte) (reference, []byte, error) {
ref := invalidReference
data, rest, err := scanComment(b)
if p.KeepComments && err == nil {
ref = p.builder.Push(Node{
Kind: Comment,
Raw: p.Range(data),
Data: data,
})
}
return ref, rest, err
}

func (p *Parser) parseExpression(b []byte) (reference, []byte, error) {
// expression = ws [ comment ]
// expression =/ ws keyval ws [ comment ]
Expand All @@ -168,7 +219,7 @@ func (p *Parser) parseExpression(b []byte) (reference, []byte, error) {
}

if b[0] == '#' {
_, rest, err := scanComment(b)
ref, rest, err := p.parseComment(b)
return ref, rest, err
}

Expand All @@ -190,7 +241,10 @@ func (p *Parser) parseExpression(b []byte) (reference, []byte, error) {
b = p.parseWhitespace(b)

if len(b) > 0 && b[0] == '#' {
_, rest, err := scanComment(b)
cref, rest, err := p.parseComment(b)
if cref != invalidReference {
p.builder.Chain(ref, cref)
}
return ref, rest, err
}

Expand Down Expand Up @@ -471,17 +525,33 @@ func (p *Parser) parseValArray(b []byte) (reference, []byte, error) {
Kind: Array,
})

// First indicates whether the parser is looking for the first element
// (non-comment) of the array.
first := true

var lastChild reference
lastChild := invalidReference

addChild := func(valueRef reference) {
if lastChild == invalidReference {
p.builder.AttachChild(parent, valueRef)
} else {
p.builder.Chain(lastChild, valueRef)
}
lastChild = valueRef
}

var err error
for len(b) > 0 {
b, err = p.parseOptionalWhitespaceCommentNewline(b)
cref := invalidReference
cref, b, err = p.parseOptionalWhitespaceCommentNewline(b)
if err != nil {
return parent, nil, err
}

if cref != invalidReference {
addChild(cref)
}

if len(b) == 0 {
return parent, nil, NewParserError(arrayStart[:1], "array is incomplete")
}
Expand All @@ -496,10 +566,13 @@ func (p *Parser) parseValArray(b []byte) (reference, []byte, error) {
}
b = b[1:]

b, err = p.parseOptionalWhitespaceCommentNewline(b)
cref, b, err = p.parseOptionalWhitespaceCommentNewline(b)
if err != nil {
return parent, nil, err
}
if cref != invalidReference {
addChild(cref)
}
} else if !first {
return parent, nil, NewParserError(b[0:1], "array elements must be separated by commas")
}
Expand All @@ -515,17 +588,16 @@ func (p *Parser) parseValArray(b []byte) (reference, []byte, error) {
return parent, nil, err
}

if first {
p.builder.AttachChild(parent, valueRef)
} else {
p.builder.Chain(lastChild, valueRef)
}
lastChild = valueRef
addChild(valueRef)

b, err = p.parseOptionalWhitespaceCommentNewline(b)
cref, b, err = p.parseOptionalWhitespaceCommentNewline(b)
if err != nil {
return parent, nil, err
}
if cref != invalidReference {
addChild(cref)
}

first = false
}

Expand All @@ -534,15 +606,34 @@ func (p *Parser) parseValArray(b []byte) (reference, []byte, error) {
return parent, rest, err
}

func (p *Parser) parseOptionalWhitespaceCommentNewline(b []byte) ([]byte, error) {
func (p *Parser) parseOptionalWhitespaceCommentNewline(b []byte) (reference, []byte, error) {
rootCommentRef := invalidReference
latestCommentRef := invalidReference

addComment := func(ref reference) {
if rootCommentRef == invalidReference {
rootCommentRef = ref
} else if latestCommentRef == invalidReference {
p.builder.AttachChild(rootCommentRef, ref)
latestCommentRef = ref
} else {
p.builder.Chain(latestCommentRef, ref)
latestCommentRef = ref
}
}

for len(b) > 0 {
var err error
b = p.parseWhitespace(b)

if len(b) > 0 && b[0] == '#' {
_, b, err = scanComment(b)
var ref reference
ref, b, err = p.parseComment(b)
if err != nil {
return nil, err
return invalidReference, nil, err
}
if ref != invalidReference {
addComment(ref)
}
}

Expand All @@ -553,14 +644,14 @@ func (p *Parser) parseOptionalWhitespaceCommentNewline(b []byte) ([]byte, error)
if b[0] == '\n' || b[0] == '\r' {
b, err = p.parseNewline(b)
if err != nil {
return nil, err
return invalidReference, nil, err
}
} else {
break
}
}

return b, nil
return rootCommentRef, b, nil
}

func (p *Parser) parseMultilineLiteralString(b []byte) ([]byte, []byte, []byte, error) {
Expand Down
157 changes: 157 additions & 0 deletions unstable/parser_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -448,6 +448,163 @@ func TestParser_AST_DateTimes(t *testing.T) {
}
}

// This example demonstrates how to parse a TOML document and preserving
// comments. Comments are stored in the AST as Comment nodes. This example
// displays the structure of the full AST generated by the parser using the
// following structure:
//
// 1. Each root-level expression is separated by three dashes.
// 2. Bytes associated to a node are displayed in square brackets.
// 3. Siblings have the same indentation.
// 4. Children of a node are indented one level.
func ExampleParser_comments() {
doc := `# Top of the document comment.
# Optional, any amount of lines.

# Above table.
[table] # Next to table.
# Above simple value.
key = "value" # Next to simple value.
# Below simple value.

# Some comment alone.

# Multiple comments, on multiple lines.

# Above inline table.
name = { first = "Tom", last = "Preston-Werner" } # Next to inline table.
# Below inline table.

# Above array.
array = [ 1, 2, 3 ] # Next to one-line array.
# Below array.

# Above multi-line array.
key5 = [ # Next to start of inline array.
# Second line before array content.
1, # Next to first element.
# After first element.
# Before second element.
2,
3, # Next to last element
# After last element.
] # Next to end of array.
# Below multi-line array.

# Before array table.
[[products]] # Next to array table.
# After array table.
`

var printGeneric func(*Parser, int, *Node)
printGeneric = func(p *Parser, indent int, e *Node) {
if e == nil {
return
}
s := p.Shape(e.Raw)
x := fmt.Sprintf("%d:%d->%d:%d (%d->%d)", s.Start.Line, s.Start.Column, s.End.Line, s.End.Column, s.Start.Offset, s.End.Offset)
fmt.Printf("%-25s | %s%s [%s]\n", x, strings.Repeat(" ", indent), e.Kind, e.Data)
printGeneric(p, indent+1, e.Child())
printGeneric(p, indent, e.Next())
}

printTree := func(p *Parser) {
for p.NextExpression() {
e := p.Expression()
fmt.Println("---")
printGeneric(p, 0, e)
}
if err := p.Error(); err != nil {
panic(err)
}
}

p := &Parser{
KeepComments: true,
}
p.Reset([]byte(doc))
printTree(p)

// Output:
// ---
// 1:1->1:31 (0->30) | Comment [# Top of the document comment.]
// ---
// 2:1->2:33 (31->63) | Comment [# Optional, any amount of lines.]
// ---
// 4:1->4:15 (65->79) | Comment [# Above table.]
// ---
// 1:1->1:1 (0->0) | Table []
// 5:2->5:7 (81->86) | Key [table]
// 5:9->5:25 (88->104) | Comment [# Next to table.]
// ---
// 6:1->6:22 (105->126) | Comment [# Above simple value.]
// ---
// 1:1->1:1 (0->0) | KeyValue []
// 7:7->7:14 (133->140) | String [value]
// 7:1->7:4 (127->130) | Key [key]
// 7:15->7:38 (141->164) | Comment [# Next to simple value.]
// ---
// 8:1->8:22 (165->186) | Comment [# Below simple value.]
// ---
// 10:1->10:22 (188->209) | Comment [# Some comment alone.]
// ---
// 12:1->12:40 (211->250) | Comment [# Multiple comments, on multiple lines.]
// ---
// 14:1->14:22 (252->273) | Comment [# Above inline table.]
// ---
// 1:1->1:1 (0->0) | KeyValue []
// 15:8->15:9 (281->282) | InlineTable []
// 1:1->1:1 (0->0) | KeyValue []
// 15:18->15:23 (291->296) | String [Tom]
// 15:10->15:15 (283->288) | Key [first]
// 1:1->1:1 (0->0) | KeyValue []
// 15:32->15:48 (305->321) | String [Preston-Werner]
// 15:25->15:29 (298->302) | Key [last]
// 15:1->15:5 (274->278) | Key [name]
// 15:51->15:74 (324->347) | Comment [# Next to inline table.]
// ---
// 16:1->16:22 (348->369) | Comment [# Below inline table.]
// ---
// 18:1->18:15 (371->385) | Comment [# Above array.]
// ---
// 1:1->1:1 (0->0) | KeyValue []
// 1:1->1:1 (0->0) | Array []
// 1:1->1:1 (0->0) | Integer [1]
// 1:1->1:1 (0->0) | Integer [2]
// 1:1->1:1 (0->0) | Integer [3]
// 19:1->19:6 (386->391) | Key [array]
// 19:21->19:46 (406->431) | Comment [# Next to one-line array.]
// ---
// 20:1->20:15 (432->446) | Comment [# Below array.]
// ---
// 22:1->22:26 (448->473) | Comment [# Above multi-line array.]
// ---
// 1:1->1:1 (0->0) | KeyValue []
// 1:1->1:1 (0->0) | Array []
// 23:10->23:42 (483->515) | Comment [# Next to start of inline array.]
// 24:3->24:38 (518->553) | Comment [# Second line before array content.]
// 1:1->1:1 (0->0) | Integer [1]
// 25:6->25:30 (559->583) | Comment [# Next to first element.]
// 26:3->26:25 (586->608) | Comment [# After first element.]
// 27:3->27:27 (611->635) | Comment [# Before second element.]
// 1:1->1:1 (0->0) | Integer [2]
// 1:1->1:1 (0->0) | Integer [3]
// 29:6->29:28 (646->668) | Comment [# Next to last element]
// 30:3->30:24 (671->692) | Comment [# After last element.]
// 23:1->23:5 (474->478) | Key [key5]
// 31:3->31:26 (695->718) | Comment [# Next to end of array.]
// ---
// 32:1->32:26 (719->744) | Comment [# Below multi-line array.]
// ---
// 34:1->34:22 (746->767) | Comment [# Before array table.]
// ---
// 1:1->1:1 (0->0) | ArrayTable []
// 35:3->35:11 (770->778) | Key [products]
// 35:14->35:36 (781->803) | Comment [# Next to array table.]
// ---
// 36:1->36:21 (804->824) | Comment [# After array table.]
}

func ExampleParser() {
doc := `
hello = "world"
Expand Down
Loading