forked from python/cpython
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
pythongh-119396: Optimize unicode_repr() (python#119617)
Use stringlib to specialize unicode_repr() for each string kind (UCS1, UCS2, UCS4). Benchmark: +-------------------------------------+---------+----------------------+ | Benchmark | ref | change2 | +=====================================+=========+======================+ | repr('abc') | 100 ns | 103 ns: 1.02x slower | +-------------------------------------+---------+----------------------+ | repr('a' * 100) | 369 ns | 369 ns: 1.00x slower | +-------------------------------------+---------+----------------------+ | repr(('a' + squote) * 100) | 1.21 us | 946 ns: 1.27x faster | +-------------------------------------+---------+----------------------+ | repr(('a' + nl) * 100) | 1.23 us | 907 ns: 1.36x faster | +-------------------------------------+---------+----------------------+ | repr(dquote + ('a' + squote) * 100) | 1.08 us | 858 ns: 1.25x faster | +-------------------------------------+---------+----------------------+ | Geometric mean | (ref) | 1.16x faster | +-------------------------------------+---------+----------------------+
- Loading branch information
Showing
4 changed files
with
131 additions
and
102 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,95 @@ | ||
/* stringlib: repr() implementation */ | ||
|
||
#ifndef STRINGLIB_FASTSEARCH_H | ||
#error must include "stringlib/fastsearch.h" before including this module | ||
#endif | ||
|
||
|
||
static void | ||
STRINGLIB(repr)(PyObject *unicode, Py_UCS4 quote, | ||
STRINGLIB_CHAR *odata) | ||
{ | ||
Py_ssize_t isize = PyUnicode_GET_LENGTH(unicode); | ||
const void *idata = PyUnicode_DATA(unicode); | ||
int ikind = PyUnicode_KIND(unicode); | ||
|
||
*odata++ = quote; | ||
for (Py_ssize_t i = 0; i < isize; i++) { | ||
Py_UCS4 ch = PyUnicode_READ(ikind, idata, i); | ||
|
||
/* Escape quotes and backslashes */ | ||
if ((ch == quote) || (ch == '\\')) { | ||
*odata++ = '\\'; | ||
*odata++ = ch; | ||
continue; | ||
} | ||
|
||
/* Map special whitespace to '\t', \n', '\r' */ | ||
if (ch == '\t') { | ||
*odata++ = '\\'; | ||
*odata++ = 't'; | ||
} | ||
else if (ch == '\n') { | ||
*odata++ = '\\'; | ||
*odata++ = 'n'; | ||
} | ||
else if (ch == '\r') { | ||
*odata++ = '\\'; | ||
*odata++ = 'r'; | ||
} | ||
|
||
/* Map non-printable US ASCII to '\xhh' */ | ||
else if (ch < ' ' || ch == 0x7F) { | ||
*odata++ = '\\'; | ||
*odata++ = 'x'; | ||
*odata++ = Py_hexdigits[(ch >> 4) & 0x000F]; | ||
*odata++ = Py_hexdigits[ch & 0x000F]; | ||
} | ||
|
||
/* Copy ASCII characters as-is */ | ||
else if (ch < 0x7F) { | ||
*odata++ = ch; | ||
} | ||
|
||
/* Non-ASCII characters */ | ||
else { | ||
/* Map Unicode whitespace and control characters | ||
(categories Z* and C* except ASCII space) | ||
*/ | ||
if (!Py_UNICODE_ISPRINTABLE(ch)) { | ||
*odata++ = '\\'; | ||
/* Map 8-bit characters to '\xhh' */ | ||
if (ch <= 0xff) { | ||
*odata++ = 'x'; | ||
*odata++ = Py_hexdigits[(ch >> 4) & 0x000F]; | ||
*odata++ = Py_hexdigits[ch & 0x000F]; | ||
} | ||
/* Map 16-bit characters to '\uxxxx' */ | ||
else if (ch <= 0xffff) { | ||
*odata++ = 'u'; | ||
*odata++ = Py_hexdigits[(ch >> 12) & 0xF]; | ||
*odata++ = Py_hexdigits[(ch >> 8) & 0xF]; | ||
*odata++ = Py_hexdigits[(ch >> 4) & 0xF]; | ||
*odata++ = Py_hexdigits[ch & 0xF]; | ||
} | ||
/* Map 21-bit characters to '\U00xxxxxx' */ | ||
else { | ||
*odata++ = 'U'; | ||
*odata++ = Py_hexdigits[(ch >> 28) & 0xF]; | ||
*odata++ = Py_hexdigits[(ch >> 24) & 0xF]; | ||
*odata++ = Py_hexdigits[(ch >> 20) & 0xF]; | ||
*odata++ = Py_hexdigits[(ch >> 16) & 0xF]; | ||
*odata++ = Py_hexdigits[(ch >> 12) & 0xF]; | ||
*odata++ = Py_hexdigits[(ch >> 8) & 0xF]; | ||
*odata++ = Py_hexdigits[(ch >> 4) & 0xF]; | ||
*odata++ = Py_hexdigits[ch & 0xF]; | ||
} | ||
} | ||
/* Copy characters as-is */ | ||
else { | ||
*odata++ = ch; | ||
} | ||
} | ||
} | ||
*odata = quote; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters