diff --git a/Makefile.am b/Makefile.am
index 0b4b81e78e..a183477fde 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -141,7 +141,7 @@ endif
### Tests (make check)
-TESTS = tests/mantest tests/jqtest tests/shtest tests/utf8test tests/base64test
+TESTS = tests/mantest tests/jqtest tests/shtest tests/utf8test tests/base64test tests/uritest
if !WIN32
TESTS += tests/optionaltest
endif
@@ -218,7 +218,6 @@ EXTRA_DIST = $(DOC_FILES) $(man_MANS) $(TESTS) $(TEST_LOG_COMPILER) \
jq.1.prebuilt jq.spec src/lexer.c src/lexer.h src/parser.c \
src/parser.h src/version.h src/builtin.jq scripts/version \
libjq.pc \
- tests/base64.test tests/jq-f-test.sh tests/jq.test \
tests/modules/a.jq tests/modules/b/b.jq tests/modules/c/c.jq \
tests/modules/c/d.jq tests/modules/data.json \
tests/modules/home1/.jq tests/modules/home2/.jq/g.jq \
@@ -232,7 +231,7 @@ EXTRA_DIST = $(DOC_FILES) $(man_MANS) $(TESTS) $(TEST_LOG_COMPILER) \
tests/onig.supp tests/local.supp \
tests/setup tests/torture/input0.json \
tests/optional.test tests/man.test tests/manonig.test \
- tests/jq.test tests/onig.test tests/base64.test \
+ tests/jq.test tests/onig.test tests/base64.test tests/uri.test \
tests/utf8-truncate.jq tests/jq-f-test.sh \
tests/no-main-program.jq tests/yes-main-program.jq
diff --git a/docs/content/manual/dev/manual.yml b/docs/content/manual/dev/manual.yml
index 2ec138fc42..90bd033064 100644
--- a/docs/content/manual/dev/manual.yml
+++ b/docs/content/manual/dev/manual.yml
@@ -2141,6 +2141,11 @@ sections:
Applies percent-encoding, by mapping all reserved URI
characters to a `%XX` sequence.
+ * `@urid`:
+
+ The inverse of `@uri`, applies percent-decoding, by mapping
+ all `%XX` sequences to their corresponding URI characters.
+
* `@csv`:
The input must be an array, and it is rendered as CSV
diff --git a/jq.1.prebuilt b/jq.1.prebuilt
index 151868fddf..553b63fc15 100644
--- a/jq.1.prebuilt
+++ b/jq.1.prebuilt
@@ -1,5 +1,5 @@
.
-.TH "JQ" "1" "July 2024" "" ""
+.TH "JQ" "1" "August 2024" "" ""
.
.SH "NAME"
\fBjq\fR \- Command\-line JSON processor
@@ -2330,6 +2330,12 @@ Applies HTML/XML escaping, by mapping the characters \fB<>&\'"\fR to their entit
Applies percent\-encoding, by mapping all reserved URI characters to a \fB%XX\fR sequence\.
.
.TP
+\fB@urid\fR:
+.
+.IP
+The inverse of \fB@uri\fR, applies percent\-decoding, by mapping all \fB%XX\fR sequences to their corresponding URI characters\.
+.
+.TP
\fB@csv\fR:
.
.IP
diff --git a/src/builtin.c b/src/builtin.c
index e39975b0a0..69e9b07214 100644
--- a/src/builtin.c
+++ b/src/builtin.c
@@ -657,6 +657,48 @@ static jv f_format(jq_state *jq, jv input, jv fmt) {
}
jv_free(input);
return line;
+ } else if (!strcmp(fmt_s, "urid")) {
+ jv_free(fmt);
+ input = f_tostring(jq, input);
+
+ jv line = jv_string("");
+ const char *errmsg = "is not a valid uri encoding";
+ const char *s = jv_string_value(input);
+ while (*s) {
+ if (*s != '%') {
+ line = jv_string_append_buf(line, s++, 1);
+ } else {
+ unsigned char unicode[4] = {0};
+ int b = 0;
+ // check leading bits of first octet to determine length of unicode character
+ // (https://datatracker.ietf.org/doc/html/rfc3629#section-3)
+ while (b == 0 || (b < 4 && unicode[0] >> 7 & 1 && unicode[0] >> (7-b) & 1)) {
+ if (*(s++) != '%') {
+ jv_free(line);
+ return type_error(input, errmsg);
+ }
+ for (int i=0; i<2; i++) {
+ unicode[b] <<= 4;
+ char c = *(s++);
+ if ('0' <= c && c <= '9') unicode[b] |= c - '0';
+ else if ('a' <= c && c <= 'f') unicode[b] |= c - 'a' + 10;
+ else if ('A' <= c && c <= 'F') unicode[b] |= c - 'A' + 10;
+ else {
+ jv_free(line);
+ return type_error(input, errmsg);
+ }
+ }
+ b++;
+ }
+ if (!jvp_utf8_is_valid((const char *)unicode, (const char *)unicode+b)) {
+ jv_free(line);
+ return type_error(input, errmsg);
+ }
+ line = jv_string_append_buf(line, (const char *)unicode, b);
+ }
+ }
+ jv_free(input);
+ return line;
} else if (!strcmp(fmt_s, "sh")) {
jv_free(fmt);
if (jv_get_kind(input) != JV_KIND_ARRAY)
diff --git a/tests/jq.test b/tests/jq.test
index d249bc1936..88cd5d8b9f 100644
--- a/tests/jq.test
+++ b/tests/jq.test
@@ -61,7 +61,7 @@ null
null
"interpolation"
-@text,@json,([1,.]|@csv,@tsv),@html,@uri,@sh,(@base64|.,@base64d)
+@text,@json,([1,.]|@csv,@tsv),@html,(@uri|.,@urid),@sh,(@base64|.,@base64d)
"!()<>&'\"\t"
"!()<>&'\"\t"
"\"!()<>&'\\\"\\t\""
@@ -69,6 +69,7 @@ null
"1\t!()<>&'\"\\t"
"!()<>&'"\t"
"%21%28%29%3C%3E%26%27%22%09"
+"!()<>&'\"\t"
"'!()<>&'\\''\"\t'"
"ISgpPD4mJyIJ"
"!()<>&'\"\t"
@@ -86,6 +87,10 @@ null
"\u03bc"
"%CE%BC"
+@urid
+"%CE%BC"
+"\u03bc"
+
@html "\(.)"
""
"<script>hax</script>"
diff --git a/tests/uri.test b/tests/uri.test
new file mode 100644
index 0000000000..de10244463
--- /dev/null
+++ b/tests/uri.test
@@ -0,0 +1,38 @@
+# Tests are groups of three lines: program, input, expected output
+# Blank lines and lines starting with # are ignored
+
+@uri
+"<>&'\"\t"
+"%3C%3E%26%27%22%09"
+
+# decoding encoded output results in same text
+(@uri|@urid)
+"<>&'\"\t"
+"<>&'\"\t"
+
+# testing variable length unicode characters
+@uri
+"a \u03bc \u2230 \ud83d\ude0e"
+"a%20%CE%BC%20%E2%88%B0%20%F0%9F%98%8E"
+
+@urid
+"a%20%CE%BC%20%E2%88%B0%20%F0%9F%98%8E"
+"a \u03bc \u2230 \ud83d\ude0e"
+
+### invalid uri strings
+
+# unicode character should be length 4 (not 3)
+. | try @urid catch .
+"%F0%93%81"
+"string (\"%F0%93%81\") is not a valid uri encoding"
+
+# invalid hex value ('FX')
+. | try @urid catch .
+"%FX%9F%98%8E"
+"string (\"%FX%9F%98%8E\") is not a valid uri encoding"
+
+# trailing utf-8 octets must be formatted like 10xxxxxx
+# 'C0' = 11000000 invalid
+. | try @urid catch .
+"%F0%C0%81%8E"
+"string (\"%F0%C0%81%8E\") is not a valid uri encoding"
diff --git a/tests/uritest b/tests/uritest
new file mode 100755
index 0000000000..1d2642c510
--- /dev/null
+++ b/tests/uritest
@@ -0,0 +1,5 @@
+#!/bin/sh
+
+. "${0%/*}/setup" "$@"
+
+$VALGRIND $Q $JQ -L "$mods" --run-tests $JQTESTDIR/uri.test