Skip to content

Commit

Permalink
syntax: better column numbers for escapes inside backquotes
Browse files Browse the repository at this point in the history
Backslashes inside backquote command substitutions are deduplicated,
so they are not included in any literal values,
but they still need to be counted for the sake of column numbers.

With the input

    last=`echo \\$demo`

we used to think that the literal with Go string value "\\$demo",
deduplicated from the input "\\\\$demo", went from column 12 to 18.
This can't be, since the input literal was seven bytes and not six,
even if we end up parsing the literal as six bytes after deduplicating.
We now correctly parse the column positions as 12 and 19.

Fixes #1028.
  • Loading branch information
mvdan committed Dec 26, 2023
1 parent bcbe8ac commit 28117db
Show file tree
Hide file tree
Showing 3 changed files with 27 additions and 1 deletion.
2 changes: 2 additions & 0 deletions syntax/filetests_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -4631,6 +4631,8 @@ func recursiveSanityCheck(tb testing.TB, src string, v any) {
// ended by semicolon
case endOff > 0 && src[endOff-1] == '&':
// ended by & or |&
case end == '\\' && src[endOff+1] == '`':
// ended by an escaped backquote
default:
tb.Errorf("Unexpected Stmt.End() %d %q in %q",
endOff, end, src)
Expand Down
8 changes: 7 additions & 1 deletion syntax/lexer.go
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,9 @@ retry:
}
if p.openBquotes > 0 && bquotes < p.openBquotes &&
p.bsp < len(p.bs) && bquoteEscaped(p.bs[p.bsp]) {
// We turn backquote command substitutions into $(),
// so we remove the extra backslashes needed by the backquotes.
// For good position information, we still include them in p.w.
bquotes++
goto retry
}
Expand All @@ -102,7 +105,7 @@ retry:
if p.litBs != nil {
p.litBs = append(p.litBs, b)
}
p.w, p.r = 1, rune(b)
p.w, p.r = 1+bquotes, rune(b)
return p.r
}
if !utf8.FullRune(p.bs[p.bsp:]) {
Expand Down Expand Up @@ -827,6 +830,9 @@ func (p *Parser) newLit(r rune) {
func (p *Parser) endLit() (s string) {
if p.r == utf8.RuneSelf || p.r == escNewl {
s = string(p.litBs)
} else if p.r == '`' && p.w > 1 {
// If we ended at a nested and escaped backquote, litBs does not include the backslash.
s = string(p.litBs[:len(p.litBs)-1])
} else {
s = string(p.litBs[:len(p.litBs)-p.w])
}
Expand Down
18 changes: 18 additions & 0 deletions syntax/parser_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ import (
"sync"
"testing"

qt "github.com/frankban/quicktest"
"github.com/google/go-cmp/cmp"
)

Expand Down Expand Up @@ -2529,3 +2530,20 @@ func TestIsIncomplete(t *testing.T) {
})
}
}

func TestBackquotesPos(t *testing.T) {
in := "`\\\\foo`"
p := NewParser()
f, err := p.Parse(strings.NewReader(in), "")
qt.Assert(t, err, qt.IsNil)
cmdSubst := f.Stmts[0].Cmd.(*CallExpr).Args[0].Parts[0].(*CmdSubst)
lit := cmdSubst.Stmts[0].Cmd.(*CallExpr).Args[0].Parts[0].(*Lit)

qt.Assert(t, lit.Value, qt.Equals, lit.Value)
// Note that positions of literals with escape sequences inside backquote command substitutions
// are weird, since we effectively skip over the double escaping in the literal value and positions.
// Even though the input source has '\\foo' between columns 2 and 7 (length 5)
// we end up keeping '\foo' between columns 3 and 7 (length 4).
qt.Assert(t, lit.ValuePos.String(), qt.Equals, "1:2")
qt.Assert(t, lit.ValueEnd.String(), qt.Equals, "1:7")
}

0 comments on commit 28117db

Please sign in to comment.