From b65ae7cbe1972b06a2556dac68abd6b4e971849e Mon Sep 17 00:00:00 2001 From: Lunny Xiao Date: Fri, 25 Dec 2020 23:32:45 +0800 Subject: [PATCH 1/3] Fix the bug that sanitizedUrl will change the order of the link --- sanitize.go | 83 +++++++++++++++++++++++++++++++++++++++++------- sanitize_test.go | 23 ++++++++++++++ 2 files changed, 95 insertions(+), 11 deletions(-) diff --git a/sanitize.go b/sanitize.go index 103f39f..fdf199b 100644 --- a/sanitize.go +++ b/sanitize.go @@ -122,22 +122,83 @@ func escapeUrlComponent(val string) string { return w.String() } -func sanitizedUrl(val string) (string, error) { +// Query represents a query +type Query struct { + Key string + Value string +} + +func parseQuery(query string) (values []Query, err error) { + for query != "" { + key := query + if i := strings.IndexAny(key, "&;"); i >= 0 { + key, query = key[:i], key[i+1:] + } else { + query = "" + } + if key == "" { + continue + } + value := "" + if i := strings.Index(key, "="); i >= 0 { + key, value = key[:i], key[i+1:] + } + key, err1 := url.QueryUnescape(key) + if err1 != nil { + if err == nil { + err = err1 + } + continue + } + value, err1 = url.QueryUnescape(value) + if err1 != nil { + if err == nil { + err = err1 + } + continue + } + values = append(values, Query{ + Key: key, + Value: value, + }) + } + return values, err +} + +func encodeQueries(queries []Query) string { + var b strings.Builder + for i, query := range queries { + b.WriteString(query.Key) + b.WriteString("=") + b.WriteString(query.Value) + if i < len(queries)-1 { + b.WriteString("&") + } + } + return b.String() +} + +func sanitizedURL(val string) (string, error) { u, err := url.Parse(val) if err != nil { return "", err } // sanitize the url query params - sanitizedQueryValues := make(url.Values, 0) - queryValues := u.Query() - for k, vals := range queryValues { - sk := html.EscapeString(k) - for _, v := range vals { - sv := v - sanitizedQueryValues.Add(sk, sv) - } + sanitizedQueries := make([]Query, 0) + // we use parseQuery but not u.Values to keep the order not change because + // u.Values is a map which has a random order. + queryValues, err := parseQuery(u.RawQuery) + if err != nil { + return "", err + } + for _, query := range queryValues { + sk := html.EscapeString(query.Key) + sanitizedQueries = append(sanitizedQueries, Query{ + Key: sk, + Value: query.Value, + }) } - u.RawQuery = sanitizedQueryValues.Encode() + u.RawQuery = encodeQueries(sanitizedQueries) // u.String() will also sanitize host/scheme/user/pass return u.String(), nil } @@ -158,7 +219,7 @@ func (p *Policy) writeLinkableBuf(buff *bytes.Buffer, token *html.Token) { tokenBuff.WriteString(html.EscapeString(attr.Val)) continue } - u, err := sanitizedUrl(u) + u, err := sanitizedURL(u) if err == nil { tokenBuff.WriteString(u) } else { diff --git a/sanitize_test.go b/sanitize_test.go index 5f3c049..bc2dfaa 100644 --- a/sanitize_test.go +++ b/sanitize_test.go @@ -1678,3 +1678,26 @@ func TestIssue85NoReferrer(t *testing.T) { } wg.Wait() } + +func TestSanitizedURL(t *testing.T) { + tests := []test{ + { + in: `http://abc.com?d=1&a=2&a=3`, + expected: `http://abc.com?d=1&a=2&a=3`, + }, + } + + for _, theTest := range tests { + res, err := sanitizedURL(theTest.in) + if err != nil { + t.Errorf("sanitizedURL returned error: %v", err) + } + if theTest.expected != res { + t.Errorf( + "test failed;\ninput : %s\nexpected: %s", + theTest.in, + theTest.expected, + ) + } + } +} From 1d32e801b72d364867404052bd34041eee4a0fec Mon Sep 17 00:00:00 2001 From: Lunny Xiao Date: Fri, 25 Dec 2020 23:35:17 +0800 Subject: [PATCH 2/3] Fix comment --- sanitize.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sanitize.go b/sanitize.go index fdf199b..03b171a 100644 --- a/sanitize.go +++ b/sanitize.go @@ -185,8 +185,8 @@ func sanitizedURL(val string) (string, error) { } // sanitize the url query params sanitizedQueries := make([]Query, 0) - // we use parseQuery but not u.Values to keep the order not change because - // u.Values is a map which has a random order. + // we use parseQuery but not u.Query to keep the order not change because + // url.Values is a map which has a random order. queryValues, err := parseQuery(u.RawQuery) if err != nil { return "", err From ca34796141e8d95265c47804b69fdd77c4412306 Mon Sep 17 00:00:00 2001 From: Lunny Xiao Date: Sun, 27 Dec 2020 23:44:28 +0800 Subject: [PATCH 3/3] Add missed QueryEscape on encode and also simplified the codes --- sanitize.go | 18 +++++++----------- sanitize_test.go | 15 ++++++++++++++- 2 files changed, 21 insertions(+), 12 deletions(-) diff --git a/sanitize.go b/sanitize.go index 03b171a..a58333a 100644 --- a/sanitize.go +++ b/sanitize.go @@ -168,9 +168,9 @@ func parseQuery(query string) (values []Query, err error) { func encodeQueries(queries []Query) string { var b strings.Builder for i, query := range queries { - b.WriteString(query.Key) + b.WriteString(url.QueryEscape(query.Key)) b.WriteString("=") - b.WriteString(query.Value) + b.WriteString(url.QueryEscape(query.Value)) if i < len(queries)-1 { b.WriteString("&") } @@ -183,22 +183,18 @@ func sanitizedURL(val string) (string, error) { if err != nil { return "", err } - // sanitize the url query params - sanitizedQueries := make([]Query, 0) + // we use parseQuery but not u.Query to keep the order not change because // url.Values is a map which has a random order. queryValues, err := parseQuery(u.RawQuery) if err != nil { return "", err } - for _, query := range queryValues { - sk := html.EscapeString(query.Key) - sanitizedQueries = append(sanitizedQueries, Query{ - Key: sk, - Value: query.Value, - }) + // sanitize the url query params + for i, query := range queryValues { + queryValues[i].Key = html.EscapeString(query.Key) } - u.RawQuery = encodeQueries(sanitizedQueries) + u.RawQuery = encodeQueries(queryValues) // u.String() will also sanitize host/scheme/user/pass return u.String(), nil } diff --git a/sanitize_test.go b/sanitize_test.go index bc2dfaa..2a79a59 100644 --- a/sanitize_test.go +++ b/sanitize_test.go @@ -1685,6 +1685,18 @@ func TestSanitizedURL(t *testing.T) { in: `http://abc.com?d=1&a=2&a=3`, expected: `http://abc.com?d=1&a=2&a=3`, }, + { + in: `http://abc.com?d=1 2&a=2&a=3`, + expected: `http://abc.com?d=1+2&a=2&a=3`, + }, + { + in: `http://abc.com?d=1/2&a=2&a=3`, + expected: `http://abc.com?d=1%2F2&a=2&a=3`, + }, + { + in: `http://abc.com?=1&a=2&a=3`, + expected: `http://abc.com?%26lt%3Bd%26gt%3B=1&a=2&a=3`, + }, } for _, theTest := range tests { @@ -1694,9 +1706,10 @@ func TestSanitizedURL(t *testing.T) { } if theTest.expected != res { t.Errorf( - "test failed;\ninput : %s\nexpected: %s", + "test failed;\ninput : %s\nexpected: %s, actual: %s", theTest.in, theTest.expected, + res, ) } }