From 6c7efc4de87efb1cbdfe2acbaa5def1d1d5fa8a5 Mon Sep 17 00:00:00 2001 From: Uli Schlachter Date: Mon, 29 Nov 2021 18:20:54 +0100 Subject: [PATCH] Switch from pcre to pcre2 (#4684) The issue at [0] was opened and I just took a stab at it. I have no prior experience with pcre and pcre2, but using [1,2] I hacked together something that seems to work. Next, Michael told me to turn that patch/hack into a PR, so here we are. The dependency in meson.build now uses version:'>=10', but this is more a random guess than actual knowledge. There was a while loop in regex_new() that dealt with an error when pcre was not compiled with UTF-8 support. This loop uses a magic constant of 32 for the error code. I just dropped this loop, because I was just writing a hack and did not intend to turn this into a PR. Also, a quick "grep 32 /usr/include/pcre.h" does not find anything useful, so... *shrug* pcre_study() was removed without replacement, so the corresponding code is also simply removed. Testing done: The test suite passes for me. YMMV. [0]: https://github.com/i3/i3/issues/4682 [1]: https://www.pcre.org/current/doc/html/pcre2api.html [2]: https://www.pcre.org/current/doc/html/pcre2demo.html Signed-off-by: Uli Schlachter Fixes: https://github.com/i3/i3/issues/4682 --- include/data.h | 7 ++++--- meson.build | 2 +- src/regex.c | 39 ++++++++++++++++----------------------- 3 files changed, 21 insertions(+), 27 deletions(-) diff --git a/include/data.h b/include/data.h index 95acd66da..293d14efd 100644 --- a/include/data.h +++ b/include/data.h @@ -9,11 +9,13 @@ */ #pragma once +#define PCRE2_CODE_UNIT_WIDTH 8 + #define SN_API_NOT_YET_FROZEN 1 #include #include -#include +#include #include #include @@ -248,8 +250,7 @@ struct Startup_Sequence { */ struct regex { char *pattern; - pcre *regex; - pcre_extra *extra; + pcre2_code *regex; }; /** diff --git a/meson.build b/meson.build index 27fc9fb58..df2d86e0c 100644 --- a/meson.build +++ b/meson.build @@ -316,7 +316,7 @@ xcb_util_xrm_dep = dependency('xcb-xrm', method: 'pkg-config') xkbcommon_dep = dependency('xkbcommon', method: 'pkg-config') xkbcommon_x11_dep = dependency('xkbcommon-x11', method: 'pkg-config') yajl_dep = dependency('yajl', method: 'pkg-config') -libpcre_dep = dependency('libpcre', version: '>=8.10', method: 'pkg-config') +libpcre_dep = dependency('libpcre2-8', version: '>=10', method: 'pkg-config') cairo_dep = dependency('cairo', version: '>=1.14.4', method: 'pkg-config') pangocairo_dep = dependency('pangocairo', method: 'pkg-config') glib_dep = dependency('glib-2.0', method: 'pkg-config') diff --git a/src/regex.c b/src/regex.c index 8f039157f..66ae51135 100644 --- a/src/regex.c +++ b/src/regex.c @@ -20,34 +20,23 @@ * */ struct regex *regex_new(const char *pattern) { - const char *error; - int errorcode, offset; + int errorcode; + PCRE2_SIZE offset; struct regex *re = scalloc(1, sizeof(struct regex)); re->pattern = sstrdup(pattern); - int options = PCRE_UTF8; + uint32_t options = PCRE2_UTF; /* We use PCRE_UCP so that \B, \b, \D, \d, \S, \s, \W, \w and some POSIX * character classes play nicely with Unicode */ - options |= PCRE_UCP; - while (!(re->regex = pcre_compile2(pattern, options, &errorcode, &error, &offset, NULL))) { - /* If the error is that PCRE was not compiled with UTF-8 support we - * disable it and try again */ - if (errorcode == 32) { - options &= ~PCRE_UTF8; - continue; - } - ELOG("PCRE regular expression compilation failed at %d: %s\n", - offset, error); + options |= PCRE2_UCP; + if (!(re->regex = pcre2_compile((PCRE2_SPTR)pattern, PCRE2_ZERO_TERMINATED, options, &errorcode, &offset, NULL))) { + PCRE2_UCHAR buffer[256]; + pcre2_get_error_message(errorcode, buffer, sizeof(buffer)); + ELOG("PCRE regular expression compilation failed at %lu: %s\n", + offset, buffer); regex_free(re); return NULL; } - re->extra = pcre_study(re->regex, 0, &error); - /* If an error happened, we print the error message, but continue. - * Studying the regular expression leads to faster matching, but it’s not - * absolutely necessary. */ - if (error) { - ELOG("PCRE regular expression studying failed: %s\n", error); - } return re; } @@ -60,7 +49,6 @@ void regex_free(struct regex *regex) { return; FREE(regex->pattern); FREE(regex->regex); - FREE(regex->extra); FREE(regex); } @@ -71,17 +59,22 @@ void regex_free(struct regex *regex) { * */ bool regex_matches(struct regex *regex, const char *input) { + pcre2_match_data *match_data; int rc; + match_data = pcre2_match_data_create_from_pattern(regex->regex, NULL); + /* We use strlen() because pcre_exec() expects the length of the input * string in bytes */ - if ((rc = pcre_exec(regex->regex, regex->extra, input, strlen(input), 0, 0, NULL, 0)) == 0) { + rc = pcre2_match(regex->regex, (PCRE2_SPTR)input, strlen(input), 0, 0, match_data, NULL); + pcre2_match_data_free(match_data); + if (rc > 0) { LOG("Regular expression \"%s\" matches \"%s\"\n", regex->pattern, input); return true; } - if (rc == PCRE_ERROR_NOMATCH) { + if (rc == PCRE2_ERROR_NOMATCH) { LOG("Regular expression \"%s\" does not match \"%s\"\n", regex->pattern, input); return false;