diff --git a/src/builtin.jq b/src/builtin.jq index 2b8263c7ed..51606a2a60 100644 --- a/src/builtin.jq +++ b/src/builtin.jq @@ -197,6 +197,43 @@ def ascii_downcase: def ascii_upcase: explode | map( if 97 <= . and . <= 122 then . - 32 else . end) | implode; +# inverse of @uri in jq +def url_decode: + def unhex: + if 48 <= . and . <= 57 then . - 48 elif 65 <= . and . <= 70 then . - 55 else . - 87 end; + + def bytes: + def loop($i): + if $i >= length then empty else 16 * (.[$i+1] | unhex) + (.[$i+2] | unhex), loop($i+3) end; + [loop(0)]; + + def codepoints: + def loop($i): + if $i >= length then empty + elif .[$i] >= 240 then (.[$i+3]-128) + 64*(.[$i+2]-128) + 4096*(.[$i+1]-128) + 262144*(.[$i]-240), loop($i+4) + elif .[$i] >= 224 then (.[$i+2]-128) + 64*(.[$i+1]-128) + 4096*(.[$i]-224), loop($i+3) + elif .[$i] >= 192 then (.[$i+1]-128) + 64*(.[$i]-192), loop($i+2) + else .[$i], loop($i+1) + end; + [loop(0)]; + + # Note that URL-encoding implies percent-encoded UTF-8 octets, so we have to + # manually reassemble these into codepoints for implode + gsub("(?(?:%[0-9a-fA-F]{2})+)"; .m | explode | bytes | codepoints | implode); + +# reimplementation of @uri in jq to maintain a similar name to url_decode +def url_encode: + # The helper function checks whether the input corresponds to one of the characters: !'()* + def recode: . as $c | [33,39,40,41,42] | index($c); + def hex: if . < 10 then 48 + . else 55 + . end; + @uri + | explode + # 37 ==> "%", 50 ==> "2" + | map( if recode then (37, 50, ((. - 32) | hex)) else . end ) + | implode; + + + # Streaming utilities def truncate_stream(stream): . as $n | null | stream | . as $input | if (.[0]|length) > $n then setpath([0];$input[0][$n:]) else empty end; diff --git a/tests/jq.test b/tests/jq.test index c5c8cf0630..4eb0875c7d 100644 --- a/tests/jq.test +++ b/tests/jq.test @@ -2091,3 +2091,27 @@ try ("foobar" | .[1.5]) catch . null "Cannot index string with number" +# url_encode/url_decode +url_encode +"=" +"%3D" + +url_encode +"á" +"%C3%A1" + +url_encode | url_decode +"á" +"á" + +url_decode +"http%3A%2F%2Ffoo%20bar%2F" +"http://foo bar/" + +(@uri | url_decode) == (url_encode | url_decode) +"http://foo bar/" +true + +@uri | url_decode | url_encode +"è" +"%C3%A8"