Skip to content

Commit

Permalink
Improvements to unicode database
Browse files Browse the repository at this point in the history
  • Loading branch information
GrieferAtWork committed Nov 5, 2023
1 parent 3d65697 commit 4b8936e
Show file tree
Hide file tree
Showing 5 changed files with 538 additions and 1,853 deletions.
28 changes: 18 additions & 10 deletions kos/cpp.hint
Original file line number Diff line number Diff line change
Expand Up @@ -627,14 +627,14 @@


// <hybrid/pointer.h>
#define __HYBRID_FUNCPTR32(return,cc,name,args) return (cc *name)args
#define __HYBRID_FUNCPTR64(return,cc,name,args) return (cc *name)args
#define FUNCPTR32(return,cc,name,args) return (cc *name)args
#define FUNCPTR64(return,cc,name,args) return (cc *name)args
#define __HYBRID_PTR32(T) T *
#define __HYBRID_PTR64(T) T *
#define PTR32(T) T *
#define PTR64(T) T *
#define __HYBRID_FUNCPTR32(return,cc,name,args) return (cc*name)args
#define __HYBRID_FUNCPTR64(return,cc,name,args) return (cc*name)args
#define FUNCPTR32(return,cc,name,args) return (cc*name)args
#define FUNCPTR64(return,cc,name,args) return (cc*name)args
#define __HYBRID_PTR32(T) T*
#define __HYBRID_PTR64(T) T*
#define PTR32(T) T*
#define PTR64(T) T*



Expand Down Expand Up @@ -707,7 +707,7 @@
// <hybrid/sequence/[...].h>
#define __HYBRID_Q_STRUCT struct
#define __HYBRID_Q_CLASS class
#define LIST_HEAD(n,t) struct n{struct t *lh_first;}
#define LIST_HEAD(n,t) struct n{struct t*lh_first;}
#define LIST_HEAD_P(T) struct{T*lh_first;}
#define LIST_ENTRY(t) struct{struct t*le_next,**le_prev;}
#define LIST_ENTRY_P(T) struct{T*le_next,**le_prev;}
Expand All @@ -723,7 +723,7 @@
#define LIST_P_FOREACH(...) for(;;)
#define LIST_P_FOREACH_P(...) for(;;)

#define SLIST_HEAD(n,t) struct n{struct t *slh_first;}
#define SLIST_HEAD(n,t) struct n{struct t*slh_first;}
#define SLIST_HEAD_P(T) struct{T*slh_first;}
#define SLIST_ENTRY(t) struct{struct t*sle_next;}
#define SLIST_ENTRY_P(T) struct{T*sle_next;}
Expand Down Expand Up @@ -903,6 +903,14 @@
#define __CDECL_OR_DEFAULT
#define __HYBRID_LIBATOMIC_DECL
#define __HYBRID_LIBATOMIC_CC
#define ____IMPL_DO_FFS(...) {}
#define ____IMPL_DO_CLZ(...) {}
#define ____IMPL_DO_CTZ(...) {}
#define ____IMPL_DO_POPCOUNT(...) {}
#define ____IMPL_DO_PARITY(...) {}
#define ____IMPL_DO_CLRSB(...) {}
#define ____IMPL_DO_PDEP(...) {}
#define ____IMPL_DO_PEXT(...) {}
#define BSEARCH(...) for(;;)
#define BSEARCH_EX(...) for(;;)
#define BSEARCH_RANGE(...) for(;;)
Expand Down
11 changes: 5 additions & 6 deletions kos/src/libc/user/unicode.c
Original file line number Diff line number Diff line change
Expand Up @@ -71,11 +71,10 @@ PRIVATE struct unifold const unicode_fold_descriptors[UNICODE_FOLD_COUNT];
PRIVATE unidigit_t const unicode_digits[UNICODE_DIGIT_COUNT];
#define UNICODE_DESCRIPTOR_COUNT 42
PRIVATE struct __unitraits const unicode_descriptors[UNICODE_DESCRIPTOR_COUNT];
#define UNICODE_COUNT 0x10fffe
#define UNICODE_COUNT_VALID 0x110000
#define UNICODE_SHIFT 6
#define UNICODE_TAB1_MASK 0x3f
#define UNICODE_DESCRIPTOR_INDEX(ch) unicode_tab2[(unicode_tab1[(uint32_t)(ch) >> 6] << 6) + ((uint8_t)(ch) & 0x3f)]
#define UNICODE_COUNT 0xe01f0
#define UNICODE_SHIFT 7
#define UNICODE_TAB1_MASK 0x7f
#define UNICODE_DESCRIPTOR_INDEX(ch) unicode_tab2[(unicode_tab1[(uint32_t)(ch) >> 7] << 7) + ((uint8_t)(ch) & 0x7f)]
#define UNICODE_DESCRIPTOR(ch) unicode_descriptors[UNICODE_DESCRIPTOR_INDEX(ch)]
#define unicode_default_descriptor unicode_descriptors[0]
PRIVATE uint16_t const unicode_tab1[42];
Expand All @@ -99,7 +98,7 @@ INTERN ATTR_SECTION(".text.crt.unicode.UTF") ATTR_CONST ATTR_RETNONNULL struct _
NOTHROW(LIBCCALL libc___unicode_descriptor)(char32_t ch)
/*[[[body:libc___unicode_descriptor]]]*/
{
if likely(ch < UNICODE_COUNT_VALID)
if likely(ch < UNICODE_COUNT)
return &UNICODE_DESCRIPTOR(ch);
return &unicode_default_descriptor;
}
Expand Down
1 change: 1 addition & 0 deletions kos/src/libc/user/unicode/.gitignore
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
UnicodeData.txt
DerivedCoreProperties.txt
CaseFolding.txt
!db.dat
14 changes: 6 additions & 8 deletions kos/src/libc/user/unicode/ctype.dee
Original file line number Diff line number Diff line change
Expand Up @@ -76,11 +76,11 @@ function sizeofGreatestIntegerInBytes(values: {int...}): int {
@@>> local a, b, shift = return...;
@@>> tab[i] == b[(a[(unsigned)i >> shift] << shift) + (i & ((1 << shift)-1))];
@@HINT: If no mapping exists to optimize for this, shift will equal `0'
function splitTables(tab: {int: int}, errorValue: int) {
function splitTables(tab: {int: int}, errorValue: int): ({int...}, {int...}, int) {
local maxShift = 0;
local table = List();
for (local key, value: tab) {
if (key > #table)
if (key >= #table)
table.resize(key + 1, errorValue);
table[key] = value;
}
Expand Down Expand Up @@ -232,7 +232,7 @@ for (local line: readLines("UnicodeData.txt")) {
local generalCategory = fields[2];
local biDiCategory = fields[4];

if (generalCategory in ["Cc", "Cf", "Cs", "Co", "Cn"])
if (generalCategory in ["Cc", "Cf", "Cs", /*"Co",*/ "Cn"])
flags |= UNICODE_ISCNTRL;
if (biDiCategory == "B" || generalCategory in ["Zl", "Zp"]) {
flags |= UNICODE_ISLF;
Expand Down Expand Up @@ -302,7 +302,6 @@ for (local line: readLines("UnicodeData.txt")) {
}



/* Parse DerivedCoreProperties.txt
* Source: https://ftp.unicode.org/Public/UNIDATA/DerivedCoreProperties.txt */
local DerivedCoreProperties: {int: HashSet with string} = Dict();
Expand Down Expand Up @@ -594,13 +593,12 @@ function generateUnicodeDatabase() {
}

local tab1_mask = ((1 << shift) - 1);
local validCount = #unicode_tab1 << shift;
print("#define UNICODE_COUNT ", ((ord2DescIndex.keys > ...) + 1).hex());
print("#define UNICODE_COUNT_VALID ", validCount.hex());
local maxOrd = (ord2DescIndex.keys > ...);
print("#define UNICODE_COUNT ", (maxOrd + 1).hex());
print("#define UNICODE_SHIFT ", shift);
print("#define UNICODE_TAB1_MASK ", tab1_mask.hex());
print("#define UNICODE_DESCRIPTOR_INDEX(ch) ",
"unicode_tab2[(unicode_tab1[(", cTypeForInteger(validCount - 1), ")(ch) >> ", shift, "] << ", shift,
"unicode_tab2[(unicode_tab1[(", cTypeForInteger(maxOrd), ")(ch) >> ", shift, "] << ", shift,
") + ((", cTypeForInteger(tab1_mask), ")(ch) & ", tab1_mask.hex(), ")]");
print("#define UNICODE_DESCRIPTOR(ch) unicode_descriptors[UNICODE_DESCRIPTOR_INDEX(ch)]");
print("#define unicode_default_descriptor unicode_descriptors[", desc2Index[NULL_DESCRIPTOR], "]");
Expand Down
Loading

0 comments on commit 4b8936e

Please sign in to comment.