From df67b6b74d2a297752ed8cefee2a7e49d8c2791b Mon Sep 17 00:00:00 2001 From: Rodrigo Coffani Date: Thu, 30 Nov 2023 07:21:12 -0300 Subject: [PATCH 1/3] feat: url_decode and url_encode --- src/builtin.jq | 37 +++++++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) diff --git a/src/builtin.jq b/src/builtin.jq index 2b8263c7ed..187740573b 100644 --- a/src/builtin.jq +++ b/src/builtin.jq @@ -197,6 +197,43 @@ def ascii_downcase: def ascii_upcase: explode | map( if 97 <= . and . <= 122 then . - 32 else . end) | implode; +# inverse of @uri in jq +def url_decode: + # The helper function converts the input string written in the given + # "base" to an integer + def to_i(base): + explode + | reverse + | map(if 65 <= . and . <= 90 then . + 32 else . end) # downcase + | map(if . > 96 then . - 87 else . - 48 end) # "a" ~ 97 => 10 ~ 87 + | reduce .[] as $c + # base: [power, ans] + ([1,0]; (.[0] * base) as $b | [$b, .[1] + (.[0] * $c)]) | .[1]; + + . as $in + | length as $length + | [0, ""] # i, answer + | until ( .[0] >= $length; + .[0] as $i + | if $in[$i:$i+1] == "%" + then [ $i + 3, .[1] + ([$in[$i+1:$i+3] | to_i(16)] | implode) ] + else [ $i + 1, .[1] + $in[$i:$i+1] ] + end) + | .[1]; # answer + +# reimplementation of @uri in jq to maintain a similar name to url_decode +def url_encode: + # The helper function checks whether the input corresponds to one of the characters: !'()* + def recode: . as $c | [33,39,40,41,42] | index($c); + def hex: if . < 10 then 48 + . else 55 + . end; + @uri + | explode + # 37 ==> "%", 50 ==> "2" + | map( if recode then (37, 50, ((. - 32) | hex)) else . end ) + | implode; + + + # Streaming utilities def truncate_stream(stream): . as $n | null | stream | . as $input | if (.[0]|length) > $n then setpath([0];$input[0][$n:]) else empty end; From 26d11ae48a8947203a9c1c192c46c35dc0d509b1 Mon Sep 17 00:00:00 2001 From: Rodrigo Coffani Date: Thu, 30 Nov 2023 07:42:00 -0300 Subject: [PATCH 2/3] feat: tests for url_encode/url_decode --- tests/jq.test | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/tests/jq.test b/tests/jq.test index c5c8cf0630..f876020926 100644 --- a/tests/jq.test +++ b/tests/jq.test @@ -2091,3 +2091,19 @@ try ("foobar" | .[1.5]) catch . null "Cannot index string with number" +# url_encode/url_decode +url_encode +"=" +"%3D" + +url_encode +"á" +"%C3%A1" + +url_decode +"http%3A%2F%2Ffoo%20bar%2F" +"http://foo bar/" + +(@uri | url_decode) == (url_encode | url_decode) +"http://foo bar/" +true From c5f8a445a1ce92b10aed7fc31181f97722f3498d Mon Sep 17 00:00:00 2001 From: Rodrigo Coffani Date: Thu, 30 Nov 2023 08:44:12 -0300 Subject: [PATCH 3/3] feat: new function and tests --- src/builtin.jq | 40 ++++++++++++++++++++-------------------- tests/jq.test | 8 ++++++++ 2 files changed, 28 insertions(+), 20 deletions(-) diff --git a/src/builtin.jq b/src/builtin.jq index 187740573b..51606a2a60 100644 --- a/src/builtin.jq +++ b/src/builtin.jq @@ -199,27 +199,27 @@ def ascii_upcase: # inverse of @uri in jq def url_decode: - # The helper function converts the input string written in the given - # "base" to an integer - def to_i(base): - explode - | reverse - | map(if 65 <= . and . <= 90 then . + 32 else . end) # downcase - | map(if . > 96 then . - 87 else . - 48 end) # "a" ~ 97 => 10 ~ 87 - | reduce .[] as $c - # base: [power, ans] - ([1,0]; (.[0] * base) as $b | [$b, .[1] + (.[0] * $c)]) | .[1]; + def unhex: + if 48 <= . and . <= 57 then . - 48 elif 65 <= . and . <= 70 then . - 55 else . - 87 end; - . as $in - | length as $length - | [0, ""] # i, answer - | until ( .[0] >= $length; - .[0] as $i - | if $in[$i:$i+1] == "%" - then [ $i + 3, .[1] + ([$in[$i+1:$i+3] | to_i(16)] | implode) ] - else [ $i + 1, .[1] + $in[$i:$i+1] ] - end) - | .[1]; # answer + def bytes: + def loop($i): + if $i >= length then empty else 16 * (.[$i+1] | unhex) + (.[$i+2] | unhex), loop($i+3) end; + [loop(0)]; + + def codepoints: + def loop($i): + if $i >= length then empty + elif .[$i] >= 240 then (.[$i+3]-128) + 64*(.[$i+2]-128) + 4096*(.[$i+1]-128) + 262144*(.[$i]-240), loop($i+4) + elif .[$i] >= 224 then (.[$i+2]-128) + 64*(.[$i+1]-128) + 4096*(.[$i]-224), loop($i+3) + elif .[$i] >= 192 then (.[$i+1]-128) + 64*(.[$i]-192), loop($i+2) + else .[$i], loop($i+1) + end; + [loop(0)]; + + # Note that URL-encoding implies percent-encoded UTF-8 octets, so we have to + # manually reassemble these into codepoints for implode + gsub("(?(?:%[0-9a-fA-F]{2})+)"; .m | explode | bytes | codepoints | implode); # reimplementation of @uri in jq to maintain a similar name to url_decode def url_encode: diff --git a/tests/jq.test b/tests/jq.test index f876020926..4eb0875c7d 100644 --- a/tests/jq.test +++ b/tests/jq.test @@ -2100,6 +2100,10 @@ url_encode "á" "%C3%A1" +url_encode | url_decode +"á" +"á" + url_decode "http%3A%2F%2Ffoo%20bar%2F" "http://foo bar/" @@ -2107,3 +2111,7 @@ url_decode (@uri | url_decode) == (url_encode | url_decode) "http://foo bar/" true + +@uri | url_decode | url_encode +"è" +"%C3%A8"