Skip to content

Commit

Permalink
Merge pull request #89 from platinummonkey/url_query_paramaters_fix
Browse files Browse the repository at this point in the history
fixes URL sanitization with more than one query parameter
  • Loading branch information
David Kitchen authored Aug 29, 2019
2 parents d9ddfec + 29035b5 commit 1c05eea
Show file tree
Hide file tree
Showing 2 changed files with 116 additions and 3 deletions.
107 changes: 104 additions & 3 deletions sanitize.go
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,98 @@ func (p *Policy) SanitizeReader(r io.Reader) *bytes.Buffer {
return p.sanitize(r)
}

const escapedURLChars = "'<>\"\r"

func escapeUrlComponent(val string) string {
w := bytes.NewBufferString("")
i := strings.IndexAny(val, escapedURLChars)
for i != -1 {
if _, err := w.WriteString(val[:i]); err != nil {
return w.String()
}
var esc string
switch val[i] {
case '\'':
// "&#39;" is shorter than "&apos;" and apos was not in HTML until HTML5.
esc = "&#39;"
case '<':
esc = "&lt;"
case '>':
esc = "&gt;"
case '"':
// "&#34;" is shorter than "&quot;".
esc = "&#34;"
case '\r':
esc = "&#13;"
default:
panic("unrecognized escape character")
}
val = val[i+1:]
if _, err := w.WriteString(esc); err != nil {
return w.String()
}
i = strings.IndexAny(val, escapedURLChars)
}
w.WriteString(val)
return w.String()
}

func sanitizedUrl(val string) (string, error) {
u, err := url.Parse(val)
if err != nil {
return "", err
}
// sanitize the url query params
sanitizedQueryValues := make(url.Values, 0)
queryValues := u.Query()
for k, vals := range queryValues {
sk := html.EscapeString(k)
for _, v := range vals {
sv := escapeUrlComponent(v)
sanitizedQueryValues.Set(sk, sv)
}
}
u.RawQuery = sanitizedQueryValues.Encode()
// u.String() will also sanitize host/scheme/user/pass
return u.String(), nil
}

func (p *Policy) writeLinkableBuf(buff *bytes.Buffer, token *html.Token) {
// do not escape multiple query parameters
tokenBuff := bytes.NewBufferString("")
tokenBuff.WriteString("<")
tokenBuff.WriteString(token.Data)
for _, attr := range token.Attr {
tokenBuff.WriteByte(' ')
tokenBuff.WriteString(attr.Key)
tokenBuff.WriteString(`="`)
switch attr.Key {
case "href", "src":
u, ok := p.validURL(attr.Val)
if !ok {
tokenBuff.WriteString(html.EscapeString(attr.Val))
continue
}
u, err := sanitizedUrl(u)
if err == nil {
tokenBuff.WriteString(u)
} else {
// fallthrough
tokenBuff.WriteString(html.EscapeString(attr.Val))
}
default:
// re-apply
tokenBuff.WriteString(html.EscapeString(attr.Val))
}
tokenBuff.WriteByte('"')
}
if token.Type == html.SelfClosingTagToken {
tokenBuff.WriteString("/")
}
tokenBuff.WriteString(">")
buff.WriteString(tokenBuff.String())
}

// Performs the actual sanitization process.
func (p *Policy) sanitize(r io.Reader) *bytes.Buffer {

Expand Down Expand Up @@ -150,7 +242,6 @@ func (p *Policy) sanitize(r io.Reader) *bytes.Buffer {
}
break
}

if len(token.Attr) != 0 {
token.Attr = p.sanitizeAttrs(token.Data, token.Attr, aps)
}
Expand All @@ -167,7 +258,12 @@ func (p *Policy) sanitize(r io.Reader) *bytes.Buffer {
}

if !skipElementContent {
buff.WriteString(token.String())
// do not escape multiple query parameters
if linkable(token.Data) {
p.writeLinkableBuf(&buff, &token)
} else {
buff.WriteString(token.String())
}
}

case html.EndTagToken:
Expand Down Expand Up @@ -226,7 +322,12 @@ func (p *Policy) sanitize(r io.Reader) *bytes.Buffer {
}

if !skipElementContent {
buff.WriteString(token.String())
// do not escape multiple query parameters
if linkable(token.Data) {
p.writeLinkableBuf(&buff, &token)
} else {
buff.WriteString(token.String())
}
}

case html.TextToken:
Expand Down
12 changes: 12 additions & 0 deletions sanitize_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,14 @@ func TestLinks(t *testing.T) {
in: `<a href="?q=1">`,
expected: `<a href="?q=1" rel="nofollow">`,
},
{
in: `<a href="?q=1&r=2">`,
expected: `<a href="?q=1&r=2" rel="nofollow">`,
},
{
in: `<a href="?q=1&r=2&s=:foo@">`,
expected: `<a href="?q=1&r=2&s=%3Afoo%40" rel="nofollow">`,
},
{
in: `<img src="data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAUAAAAFCAYAAACNbyblAAAAHElEQVQI12P4//8/w38GIAXDIBKE0DHxgljNBAAO9TXL0Y4OHwAAAABJRU5ErkJggg==" alt="Red dot" />`,
expected: `<img alt="Red dot"/>`,
Expand All @@ -135,6 +143,10 @@ func TestLinks(t *testing.T) {
in: `<img src="giraffe.gif" />`,
expected: `<img src="giraffe.gif"/>`,
},
{
in: `<img src="giraffe.gif?height=500&width=500" />`,
expected: `<img src="giraffe.gif?height=500&width=500"/>`,
},
}

p := UGCPolicy()
Expand Down

0 comments on commit 1c05eea

Please sign in to comment.