Skip to content

Commit

Permalink
fixing issue 861 (#863)
Browse files Browse the repository at this point in the history
  • Loading branch information
lemire authored Jan 31, 2025
1 parent 1492acb commit ac78801
Show file tree
Hide file tree
Showing 2 changed files with 96 additions and 4 deletions.
77 changes: 73 additions & 4 deletions include/ada/url_search_params-inl.h
Original file line number Diff line number Diff line change
Expand Up @@ -162,10 +162,79 @@ inline void url_search_params::remove(const std::string_view key,
}

inline void url_search_params::sort() {
std::ranges::stable_sort(
params, [](const key_value_pair &lhs, const key_value_pair &rhs) {
return lhs.first < rhs.first;
});
std::ranges::stable_sort(params, [](const key_value_pair &lhs,
const key_value_pair &rhs) {
size_t i = 0, j = 0;
uint32_t low_surrogate1 = 0, low_surrogate2 = 0;
while ((i < lhs.first.size() || low_surrogate1 != 0) &&
(j < rhs.first.size() || low_surrogate2 != 0)) {
uint32_t codePoint1 = 0, codePoint2 = 0;

if (low_surrogate1 != 0) {
codePoint1 = low_surrogate1;
low_surrogate1 = 0;
} else {
uint8_t c1 = uint8_t(lhs.first[i]);
if (c1 <= 0x7F) {
codePoint1 = c1;
i++;
} else if (c1 <= 0xDF) {
codePoint1 = ((c1 & 0x1F) << 6) | (uint8_t(lhs.first[i + 1]) & 0x3F);
i += 2;
} else if (c1 <= 0xEF) {
codePoint1 = ((c1 & 0x0F) << 12) |
((uint8_t(lhs.first[i + 1]) & 0x3F) << 6) |
(uint8_t(lhs.first[i + 2]) & 0x3F);
i += 3;
} else {
codePoint1 = ((c1 & 0x07) << 18) |
((uint8_t(lhs.first[i + 1]) & 0x3F) << 12) |
((uint8_t(lhs.first[i + 2]) & 0x3F) << 6) |
(uint8_t(lhs.first[i + 3]) & 0x3F);
i += 4;

codePoint1 -= 0x10000;
uint16_t high_surrogate = uint16_t(0xD800 + (codePoint1 >> 10));
low_surrogate1 = uint16_t(0xDC00 + (codePoint1 & 0x3FF));
codePoint1 = high_surrogate;
}
}

if (low_surrogate2 != 0) {
codePoint2 = low_surrogate2;
low_surrogate2 = 0;
} else {
uint8_t c2 = uint8_t(rhs.first[j]);
if (c2 <= 0x7F) {
codePoint2 = c2;
j++;
} else if (c2 <= 0xDF) {
codePoint2 = ((c2 & 0x1F) << 6) | (uint8_t(rhs.first[j + 1]) & 0x3F);
j += 2;
} else if (c2 <= 0xEF) {
codePoint2 = ((c2 & 0x0F) << 12) |
((uint8_t(rhs.first[j + 1]) & 0x3F) << 6) |
(uint8_t(rhs.first[j + 2]) & 0x3F);
j += 3;
} else {
codePoint2 = ((c2 & 0x07) << 18) |
((uint8_t(rhs.first[j + 1]) & 0x3F) << 12) |
((uint8_t(rhs.first[j + 2]) & 0x3F) << 6) |
(uint8_t(rhs.first[j + 3]) & 0x3F);
j += 4;
codePoint2 -= 0x10000;
uint16_t high_surrogate = uint16_t(0xD800 + (codePoint2 >> 10));
low_surrogate2 = uint16_t(0xDC00 + (codePoint2 & 0x3FF));
codePoint2 = high_surrogate;
}
}

if (codePoint1 != codePoint2) {
return (codePoint1 < codePoint2);
}
}
return (j < rhs.first.size() || low_surrogate2 != 0);
});
}

inline url_search_params_keys_iter url_search_params::get_keys() {
Expand Down
23 changes: 23 additions & 0 deletions tests/url_search_params.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -257,3 +257,26 @@ TEST(url_search_params, test_character_set) {
}
SUCCEED();
}

// Taken from
// https://github.com/web-platform-tests/wpt/blob/d5085f61e2d949bc9fb24b04f4c6a47bdf6d3be9/url/urlsearchparams-sort.any.js#L11
TEST(url_search_params, sort_unicode_code_units) {
ada::url_search_params search_params("\xef\xac\x83&\xf0\x9f\x8c\x88");
search_params.sort();
ASSERT_EQ(search_params.size(), 2);
auto keys = search_params.get_keys();
ASSERT_EQ(keys.next(), "\xf0\x9f\x8c\x88");
ASSERT_EQ(keys.next(), "\xef\xac\x83");
SUCCEED();
}

TEST(url_search_params, sort_unicode_code_units_edge_case) {
ada::url_search_params search_params(
"\xf0\x9f\x8c\x88\xef\xac\x83&\xf0\x9f\x8c\x88");
search_params.sort();
ASSERT_EQ(search_params.size(), 2);
auto keys = search_params.get_keys();
ASSERT_EQ(keys.next(), "\xf0\x9f\x8c\x88");
ASSERT_EQ(keys.next(), "\xf0\x9f\x8c\x88\xef\xac\x83");
SUCCEED();
}

0 comments on commit ac78801

Please sign in to comment.