Skip to content

Commit

Permalink
upgrade unicode db to 6.3.0 (closes #19221)
Browse files Browse the repository at this point in the history
  • Loading branch information
benjaminp committed Oct 10, 2013
1 parent f7102c1 commit 7da8059
Show file tree
Hide file tree
Showing 8 changed files with 17,195 additions and 17,153 deletions.
8 changes: 4 additions & 4 deletions Doc/library/unicodedata.rst
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,8 @@

This module provides access to the Unicode Character Database (UCD) which
defines character properties for all Unicode characters. The data contained in
this database is compiled from the `UCD version 6.2.0
<http://www.unicode.org/Public/6.2.0/ucd>`_.
this database is compiled from the `UCD version 6.3.0
<http://www.unicode.org/Public/6.3.0/ucd>`_.

The module uses the same names and symbols as defined by Unicode
Standard Annex #44, `"Unicode Character Database"
Expand Down Expand Up @@ -166,6 +166,6 @@ Examples:

.. rubric:: Footnotes

.. [#] http://www.unicode.org/Public/6.2.0/ucd/NameAliases.txt
.. [#] http://www.unicode.org/Public/6.3.0/ucd/NameAliases.txt
.. [#] http://www.unicode.org/Public/6.2.0/ucd/NamedSequences.txt
.. [#] http://www.unicode.org/Public/6.3.0/ucd/NamedSequences.txt
2 changes: 1 addition & 1 deletion Lib/test/test_unicodedata.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
class UnicodeMethodsTest(unittest.TestCase):

# update this, if the database changes
expectedchecksum = 'bf7a78f1a532421b5033600102e23a92044dbba9'
expectedchecksum = 'e74e878de71b6e780ffac271785c3cb58f6251f3'

def test_method_checksum(self):
h = hashlib.sha1()
Expand Down
2 changes: 2 additions & 0 deletions Misc/NEWS
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@ Projected release date: 2013-10-20
Core and Builtins
-----------------

- Issue #19221: Upgrade Unicode database to version 6.3.0.

- Issue #16742: The result of the C callback PyOS_ReadlineFunctionPointer must
now be a string allocated by PyMem_RawMalloc() or PyMem_RawRealloc() (or NULL
if an error occurred), instead of a string allocated by PyMem_Malloc() or
Expand Down
4 changes: 2 additions & 2 deletions Modules/unicodedata.c
Original file line number Diff line number Diff line change
Expand Up @@ -1322,10 +1322,10 @@ PyDoc_STRVAR(unicodedata_docstring,
"This module provides access to the Unicode Character Database which\n\
defines character properties for all Unicode characters. The data in\n\
this database is based on the UnicodeData.txt file version\n\
6.0.0 which is publically available from ftp://ftp.unicode.org/.\n\
6.3.0 which is publically available from ftp://ftp.unicode.org/.\n\
\n\
The module uses the same names and symbols as defined by the\n\
UnicodeData File Format 6.0.0 (see\n\
UnicodeData File Format 6.3.0 (see\n\
http://www.unicode.org/reports/tr44/tr44-6.html).");


Expand Down
1,576 changes: 792 additions & 784 deletions Modules/unicodedata_db.h

Large diffs are not rendered by default.

32,738 changes: 16,386 additions & 16,352 deletions Modules/unicodename_db.h

Large diffs are not rendered by default.

14 changes: 6 additions & 8 deletions Objects/unicodetype_db.h
Original file line number Diff line number Diff line change
Expand Up @@ -1589,7 +1589,7 @@ static unsigned short index2[] = {
55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 0, 0,
0, 0, 0, 55, 55, 55, 5, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 21, 21, 21,
21, 21, 0, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 25, 25, 25, 25, 25, 25, 25, 25,
25, 25, 25, 5, 0, 0, 5, 5, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55,
25, 25, 25, 5, 21, 0, 5, 5, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55,
55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55,
55, 55, 55, 96, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 25, 25, 25, 25,
25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 7, 8,
Expand Down Expand Up @@ -1801,7 +1801,7 @@ static unsigned short index2[] = {
25, 25, 25, 25, 25, 25, 25, 25, 5, 5, 5, 96, 5, 5, 5, 5, 55, 25, 0, 0, 7,
8, 9, 10, 11, 12, 13, 14, 15, 16, 0, 0, 0, 0, 0, 0, 27, 27, 27, 27, 27,
27, 27, 27, 27, 27, 0, 0, 0, 0, 0, 0, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
25, 25, 25, 2, 0, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 0, 0, 0, 0, 0, 0,
25, 25, 25, 21, 0, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 0, 0, 0, 0, 0, 0,
55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55,
55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 96,
55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55,
Expand All @@ -1828,7 +1828,7 @@ static unsigned short index2[] = {
7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 132, 0, 0, 0, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55,
55, 55, 55, 55, 55, 55, 55, 25, 25, 18, 18, 18, 0, 0, 5, 5, 55, 55, 55,
55, 55, 55, 55, 55, 55, 55, 25, 25, 18, 18, 25, 0, 0, 5, 5, 55, 55, 55,
55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55,
55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55,
55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 18, 25, 18, 25,
Expand Down Expand Up @@ -1915,7 +1915,7 @@ static unsigned short index2[] = {
5, 5, 5, 5, 5, 5, 5, 5, 5, 6, 5, 5, 6, 3, 3, 21, 21, 21, 21, 21, 2, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 18, 18, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 18, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 2, 21,
21, 21, 21, 21, 0, 0, 0, 0, 0, 21, 21, 21, 21, 21, 21, 245, 95, 0, 0,
21, 21, 21, 21, 0, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 245, 95, 0, 0,
246, 247, 248, 249, 250, 251, 5, 5, 5, 5, 5, 95, 245, 26, 22, 23, 246,
247, 248, 249, 250, 251, 5, 5, 5, 5, 5, 0, 95, 95, 95, 95, 95, 95, 95,
95, 95, 95, 95, 95, 95, 0, 0, 0, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
Expand Down Expand Up @@ -2925,9 +2925,6 @@ static unsigned short index2[] = {
double _PyUnicode_ToNumeric(Py_UCS4 ch)
{
switch (ch) {
case 0x12456:
case 0x12457:
return (double) -1.0;
case 0x0F33:
return (double) -1.0/2.0;
case 0x0030:
Expand Down Expand Up @@ -3383,6 +3380,7 @@ double _PyUnicode_ToNumeric(Py_UCS4 ch)
case 0x12435:
case 0x1244A:
case 0x12450:
case 0x12456:
case 0x12459:
case 0x1D361:
case 0x1D7D0:
Expand Down Expand Up @@ -3539,6 +3537,7 @@ double _PyUnicode_ToNumeric(Py_UCS4 ch)
case 0x1243B:
case 0x1244B:
case 0x12451:
case 0x12457:
case 0x1D362:
case 0x1D7D1:
case 0x1D7DB:
Expand Down Expand Up @@ -4294,7 +4293,6 @@ int _PyUnicode_IsWhitespace(const Py_UCS4 ch)
case 0x0085:
case 0x00A0:
case 0x1680:
case 0x180E:
case 0x2000:
case 0x2001:
case 0x2002:
Expand Down
4 changes: 2 additions & 2 deletions Tools/unicode/makeunicodedata.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@
VERSION = "3.2"

# The Unicode Database
UNIDATA_VERSION = "6.2.0"
UNIDATA_VERSION = "6.3.0"
UNICODE_DATA = "UnicodeData%s.txt"
COMPOSITION_EXCLUSIONS = "CompositionExclusions%s.txt"
EASTASIAN_WIDTH = "EastAsianWidth%s.txt"
Expand Down Expand Up @@ -68,7 +68,7 @@

BIDIRECTIONAL_NAMES = [ "", "L", "LRE", "LRO", "R", "AL", "RLE", "RLO",
"PDF", "EN", "ES", "ET", "AN", "CS", "NSM", "BN", "B", "S", "WS",
"ON" ]
"ON", "LRI", "RLI", "FSI", "PDI" ]

EASTASIANWIDTH_NAMES = [ "F", "H", "W", "Na", "A", "N" ]

Expand Down

0 comments on commit 7da8059

Please sign in to comment.