forFudan · forFudan · Feb 11, 2025
diff --git a/.gitignore b/.gitignore
@@ -17,8 +17,10 @@
 
 */beta/archived/*.dict.yaml
 
-# Ignore zip files
+# Ignore specific types of files
 *.zip
+*.pdf
+*.png
 
 # Ignore distributions
 /dist

diff --git a/lua/yuhao/yuhao_autocompletion_filter_conditional.lua b/lua/yuhao/yuhao_autocompletion_filter_conditional.lua
diff --git a/lua/yuhao/yuhao_chaifen.lua b/lua/yuhao/yuhao_chaifen.lua
@@ -7,6 +7,7 @@ https://github.com/Ace-Who/rime-xuma/blob/master/schema/lua/ace/xuma_spelling.lu
 更新:
 - 20240919: 將詞語拆分中的菱形改爲全角波浪號.
 - 20240921: 更改默認的注解等級.
+- 20250210: 修正了一些顯示錯誤.
 ---------------------------------------------------------------------------
 ]]
 
@@ -189,11 +190,12 @@ local function xform(s)
       :gsub(',', ' · ')
       :gsub(' ·  ·  · ', ' · ')
       :gsub(' ·  · ', ' · ')
+      :gsub(' ·  · ', ' · ')
+      :gsub('〔 · 〕', '')
       :gsub('〔〕', '')
       :gsub('〔 · ', "〔")
 end
 
-
 local function parse_spll(str)
   -- Handle spellings like "{于下}{四点}丶"(for 求) where some radicals are
   -- represented by characters in braces.

diff --git a/lua/yuhao/yuhao_charset_filter.lua b/lua/yuhao/yuhao_charset_filter.lua
@@ -42,6 +42,7 @@ So a lua filter would be helpful to filter the frequently used
 20240819: 前置常用漢字功能只對 CJK 内的漢字生效.
 20240820: 前置常用漢字功能只在輸入爲四碼時方纔生效.
 20240908: 加入前置極常用繁簡漢字功能.
+20250210: 詞語中只要有一個字符在常用字符集内,就前置之.
 ------------------------------------------------------------------------
 ]]
 
@@ -64,7 +65,7 @@ local function yuhao_charset_prioritizer(input, env, option, charset)
     end
     if length_of_input_is_4 then
         for cand in input:iter() do
-            local is_charset_or_not_cjk = core.string_is_in_set(cand.text, charset)
+            local is_charset_or_not_cjk = core.string_is_intersected_with_set(cand.text, charset)
             -- 二種情況顯示字符: (1) 極常用 (2) 過濾器關閉
             if is_charset_or_not_cjk or not switch_on then
                 yield(cand)

diff --git a/lua/yuhao/yuhao_charsets.lua b/lua/yuhao/yuhao_charsets.lua
@@ -6,7 +6,7 @@ Author: 朱宇浩 (forFudan) <[email protected]>
 Github: https://github.com/forFudan/
 Purpose: 列舉常用漢字字符集
 版權聲明：
-專爲宇浩輸入法製作 <https://yuhao.forfudan.com>
+專爲宇浩輸入法製作 <https://shurufa.com>
 轉載請保留作者名和出處
 Creative Commons Attribution-NonCommercial-NoDerivatives 4.0 International
 --------------------------------------------------------------------------------

diff --git a/lua/yuhao/yuhao_core.lua b/lua/yuhao/yuhao_core.lua
@@ -11,13 +11,15 @@ Purpose: 宇浩輸入法的 RIME lua 提供核心函數
 Creative Commons Attribution-NonCommercial-NoDerivatives 4.0 International
 --------------------------------------------------------------------------------
 版本：
-20230418: 寫成 `set_from_str`, `is_subset`.
-20240107: 寫成 `is_intersected`.
-20240512: 重構函數, 寫成 `len_of_set`, `string_is_in_set`,
-    `char_is_in_unicode_blocks`, `string_is_in_unicode_blocks`
-    `string_is_in_charset_or_not_in_cjk`
-20240514: 增加 `string_starts_with`.
+20230418: 寫成 `set_from_str()`, `is_subset()`.
+20240107: 寫成 `is_intersected()`.
+20240512: 重構函數, 寫成 `len_of_set()`, `string_is_in_set()`,
+    `char_is_in_unicode_blocks()`, `string_is_in_unicode_blocks()`
+    `string_is_in_charset_or_not_in_cjk()`
+20240514: 增加 `string_starts_with()`.
 20240919: 更新對於 CJK 區塊的定義, 加入西夏文和契丹小字等.
+20250210: 增加 `string_is_intersected_with_set()`.
+20250211: 更新對於 CJK 區塊的定義, 加入韓文音節等.
 --------------------------------------------------------------------------------
 ]]
 
@@ -94,7 +96,18 @@ function core.is_intersected(set1, set2)
     return false
 end
 
-core.cjk_blocks = {       -- CJK 區塊(非符號區)
+--- 判斷一個字符串中存在 set 中的字符
+---@param text string
+---@param set table
+---@return boolean
+function core.string_is_intersected_with_set(text, set)
+    local set_of_text = core.set_from_str(text)
+    return core.is_intersected(set_of_text, set)
+end
+
+core.cjk_blocks = { -- CJK 區塊(非符號區)
+    -- { 0x3000, 0x303F },   -- 中日韓符號和標點
+
     { 0x4E00,  0x9FFF },  -- 中日韓統一表意文字
     { 0x3400,  0x4DBF },  -- 中日韓統一表意文字擴展區A
     { 0x20000, 0x323AF }, -- 中日韓統一表意文字擴展區B到擴展區H
@@ -109,24 +122,30 @@ core.cjk_blocks = {       -- CJK 區塊(非符號區)
     { 0x2F800, 0x2FA1F }, -- 中日韓兼容表意文字補充
     { 0x3190,  0x319F },  -- 漢文訓讀
 
+    { 0xE000,  0xF8FF },  -- 私用區 宇浩字根在此區
+
     { 0x2FF0,  0x2FFF },  -- 表意文字描述字符
-    -- { 0x3000, 0x303F },   -- 中日韓符號和標點
     { 0x3200,  0x32FF },  -- 中日韓帶圈字符及月份
     { 0x1F200, 0x1F2FF }, -- 帶圈表意文字補充
     { 0x1F000, 0x1F02F }, -- 麻將牌
     { 0x2600,  0x26FF },  -- 雜項符號(太極兩儀四象八卦)
     { 0x4DC0,  0x4DFF },  -- 易經六十四卦
     { 0x1D300, 0x1D35F }, -- 太玄經卦爻
 
+    { 0x3040,  0x309F },  -- 平假名
+    { 0x30A0,  0x30FF },  -- 片假名
+    { 0x1B000, 0x1B0FF }, -- 補充假名
+    { 0x1B100, 0x1B12F }, -- 假名擴展
+    { 0xAC00,  0xD7AF },  -- 諺文音節
+    { 0x1100,  0x11FF },  -- 諺文字母
+    { 0x3130,  0x318F },  -- 諺文兼容字母
+    { 0xA960,  0xA97F },  -- 谚文字母扩展-A
+    { 0xD7B0,  0xD7FF },  -- 谚文字母扩展-B
+
     { 0x17000, 0x187FF }, -- 西夏文
     { 0x18800, 0x18AFF }, -- 西夏文部件
     { 0x18D00, 0x18D7F }, -- 西夏文補充
     { 0x18B00, 0x18CFF }, -- 契丹小字
-
-    { 0xE000,  0xF8FF },  -- 私用區 宇浩字根在此區
-
-    { 0x1B000, 0x1B0FF }, -- 補充假名
-    { 0x1B100, 0x1B12F }, -- 假名擴展
 }
 
 --- 判斷一個字符是不是在一組 Unicode 區位中