Skip to content

Commit

Permalink
Add more simp words to yuhao schema
Browse files Browse the repository at this point in the history
  • Loading branch information
forFudan committed Sep 15, 2023
1 parent df4856a commit 09b7810
Show file tree
Hide file tree
Showing 33 changed files with 63,533 additions and 46,800 deletions.
692 changes: 0 additions & 692 deletions beta/generator/宇浩一二簡設置簡體.csv

This file was deleted.

681 changes: 0 additions & 681 deletions beta/generator/宇浩一二簡設置繁體.csv

This file was deleted.

682 changes: 0 additions & 682 deletions beta/generator/宇浩一二簡設置臺灣.csv

This file was deleted.

97 changes: 97 additions & 0 deletions beta/hotfix/lua/yuhao/yuhao_postpone_full_code.lua
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
-- 名稱: yuhao_postpone_full_code.lua
-- 原作者: Ace-Who <https://github.com/Ace-Who/rime-xuma/>
-- 原代碼介紹:
-- 出现重码时,将全码匹配且有简码的「单字」「适当」后置。
-- 目前的实现方式,原理适用于所有使用规则简码的形码方案。

-- 修改: forFudan
-- 版本: 20230103
-- 修改介紹:
-- 根據宇浩輸入法更新字根列表。

local radstr = "也不亡尚穴韋甲屮丌鬼巛丶户用爪石非僉巳儿酉雨乃生马电豸馬囗禺了矛尸丅面食寸幺瓦壬足麻齒乙骨又米冊爿末西王古讠人毛世丨止母{shuxia}丰自艮士合禾曰广见上灬〇𬺰𠂤缶七牛卯刀文千扌瓜阝斤風气魚衤工厶龰欠攴宀彡見丂竹罒烏目至艹𠂇二丬已方兀一木之八且臣矢乚卩鸟犬牙弓疒糸向山匚{sui}戊{suw}廴夕土田黽丷凡貝饣鱼刂大豕弋亦门巾長示片車犭耳夫羽𧘇水飛亠黑未戈小礻火㗊虎爾三车𡗗辛鬥鹵冖口手氵辰言白虫尤心入高龶臼殳舟卜走立來鹿子辶彐纟丿身贝申皿其匕乌亍皮早十日而{nuyx}〢歹甫羊革夂予干亥隹月己丁彳咼钅力門女川长亻乂巴夭舌九几冂金厂由鳥𫝀⺄㇂丩⺶𠕁龴⻟𫩏𠂒〣冎髟ソ𠂎㇞⼌マ釒⼓丄𣥂𡈼𡿨⻍⺂ッ卄乀丆戶⻎訁⼹𫠠⺧𠃌コ䒑𰃦⺮氺卝戸匸𰁜⼁𩙿𠃊习乜忄𭕄㇍兀㐄𠂊𠥓勹冫丱乁𰀁𤣩爫⼅㇇⼂廾㇣㇈⺈𠘧𠀎𥫗㔾㇕㇀龷⺆凵Γ𠂉𫶧㇜耂ス㇒𦥑糹𡭔⺼攵⼃𰆊飠𠃎ユ⺊𥝌㇏⺍⾻乛⻗虍卅ュ㐅⻞⼶㇝卌𠁼⺬𠆢尢⺌⺁𠃍𠃋龵⺥㇉𧰨𠄌朩𠤎镸⼢牜亅㇅癶𠂆⻊巜𠄎𠃑𱼀𠃜⾅覀䶹キヰ⺋⺝𠘨⼫⺕夊𰀪⺩𠂭𧾷⺀"

local function init(env)
local config = env.engine.schema.config
local code_rvdb = config:get_string('schema_name/code')
env.code_rvdb = ReverseDb('build/' .. code_rvdb .. '.reverse.bin')
env.his_inp = config:get_string('history/input')
env.delimiter = config:get_string('speller/delimiter')
env.max_index = config:get_int('yuhao_postpone_full_code/lua/max_index')
or 3
end

local function get_short(codestr)
local s = ' ' .. codestr
for code in s:gmatch('%l+') do
if s:find(' ' .. code .. '%l+') then
return code
end
end
end

local function has_short_and_is_full(cand, env)
-- completion 和 sentence 类型不属于精确匹配,但要通过 cand:get_genuine() 判
-- 断,因为 simplifier 会覆盖类型为 simplified。先行判断 type 并非必要,只是
-- 为了轻微的性能优势。
local cand_gen = cand:get_genuine()
if cand_gen.type == 'completion' or cand_gen.type == 'sentence' then
return false, true
end
local input = env.engine.context.input
local cand_input = input:sub(cand.start + 1, cand._end)
-- 去掉可能含有的 delimiter。
cand_input = cand_input:gsub('[' .. env.delimiter .. ']', '')
-- 字根可能设置了特殊扩展码,不视作全码,不予后置。
if cand_input:len() > 2 and radstr:find(cand_gen.text, 1, true) then
return
end
-- history_translator 不后置。
if cand_input == env.his_inp then return end
local codestr = env.code_rvdb:lookup(cand_gen.text)
local is_comp = not
string.find(' ' .. codestr .. ' ', ' ' .. cand_input .. ' ', 1, true)
local short = not is_comp and get_short(codestr)
-- 注意排除有简码但是输入的是不规则编码的情况
return short and cand_input:find('^' .. short .. '%l+'), is_comp
end

local function filter(input, env)
local context = env.engine.context
if not context:get_option("yuhao_postpone_full_code") then
for cand in input:iter() do yield(cand) end
else
-- 具体实现不是后置目标候选,而是前置非目标候选
local dropped_cands = {}
local done_drop
local pos = 1
-- Todo: 计算 pos 时考虑可能存在的重复候选被 uniquifier 合并的情况。
for cand in input:iter() do
if done_drop then
yield(cand)
else
-- 后置不越过 env.max_index 和以下几类候选:
-- 1) 顶功方案使用 script_translator 导致的匹配部分输入的候选,例如输入
-- otu 且光标在 u 后时会出现编码为 ot 的候选。不过通过填满码表的三码和
-- 四码的位置,能消除这类候选。2) 顶功方案的造词翻译器允许出现的
-- completion 类型候选。3) 顶功方案的补空候选——全角空格( U+3000)。
local is_bad_script_cand = cand._end < context.caret_pos
local drop, is_comp = has_short_and_is_full(cand, env)
if pos >= env.max_index
or is_bad_script_cand or is_comp or cand.text == ' ' then
for i, cand in ipairs(dropped_cands) do yield(cand) end
done_drop = true
yield(cand)
-- 精确匹配的词组不予后置
elseif not drop or utf8.len(cand.text) > 1 then
yield(cand)
pos = pos + 1
else table.insert(dropped_cands, cand)
end
end
end
for i, cand in ipairs(dropped_cands) do yield(cand) end
end
end

return { init = init, func = filter }
12 changes: 12 additions & 0 deletions beta/hotfix/rime.lua
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@

-- 宇浩输入法
yuhao_char_filter = require("yuhao/yuhao_char_filter")
yuhao_char_first = yuhao_char_filter.yuhao_char_first
yuhao_char_only = yuhao_char_filter.yuhao_char_only
yuhao_single_char_only_for_full_code = require("yuhao/yuhao_single_char_only_for_full_code")
yuhao_postpone_full_code = require("yuhao/yuhao_postpone_full_code")
yuhao_autocompletion_filter = require("yuhao/yuhao_autocompletion_filter")
yuhao_helper = require("yuhao/yuhao_helper")
local temp = require("yuhao/yuhao_chaifen")
yuhao_chaifen = temp.filter
yuhao_chaifen_processor = temp.processor
16 changes: 16 additions & 0 deletions beta/hotfix/yuhao.custom.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
# 功能開關一鍵配置

patch:
# schema/name: 宇浩·简体 # 方案名
speller/auto_select: true # 四碼唯一自動上屏
translator/enable_completion: true # 輸入預測(逐碼提示)
fixed/enable_completion: true # 輸入預測(逐碼提示)
# menu/page_size: 5 # 每頁候選數量
style/horizontal: false # 橫排候選欄
style/inline_preedit: true # 候選欄内置於輸入界面
style/preedit_type: composition # 候選顯示字母(composition)还是候選(preview)
# 以下開關,0爲第一個,1爲第二個,2爲第三個
switches/@0/reset: 1 # [只出常用字, 常用字前置, 全字集原排序]
switches/@1/reset: 0 # [字詞同出, 全碼出單]
switches/@2/reset: 0 # [原始排序, 全碼後置]
switches/@3/reset: 0 # [〇註解, 一重註解, 二重註解, 三重註解]
260 changes: 260 additions & 0 deletions beta/hotfix/yuhao.schema.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,260 @@
# Rime schema settings
# encoding: utf-8

schema:
schema_id: yuhao
name: 宇浩繁简通·简体简码
version: 20230522
author:
- 發明人 朱宇浩
- Rime方案 <https://zhuyuhao.com/yuhao/>
- 官方QQ群 735728797
description: |
宇浩输入法,繁简通打,以简化汉字为主设置简码,故名为「开来学」。
默認設定:
- 常用字前置,不屏蔽生僻字。
- 開啓輸入預測。
- 不顯示拆分提示,通過 / 鍵 或 Control+Shift+C 開啓。
- Z 鍵反查拼音
- 按"help"或"bang"提示快捷鍵和官方網站。
dependencies:
- yuhao_pinyin
- yuhao_chaifen

switches:
- options:
- yuhao_char_only
- yuhao_char_first
- cjk
states: [只出常用字, 常用字前置, 全字集原排序]
reset: 1
# Control+Shift+O: 只出常用字
# Control+Shift+I: 常用字前置
- name: yuhao_single_char_only_for_full_code
states: ["字詞同出", "全码出单"]
reset: 0
# Control+Shift+D
- name: yuhao_postpone_full_code
reset: 0
states: [原始排序, 全碼後置]
# Control+Shift+H
- options:
- yuhao_chaifen.off
- yuhao_chaifen.lv1
- yuhao_chaifen.lv2
- yuhao_chaifen.lv3
states: [〇註解, 一重註解, 二重註解, 三重註解]
reset: 0
- name: yuhao_autocompletion_filter
reset: 0
states: [輸入預測, 精確匹配]
# Control+Shift+Y
- name: traditionalization
states: [简保持, 简转繁]
reset: 0
# Control+Shift+F
- name: simplification
reset: 0
states: [繁保持, 繁轉簡]
# Control+Shift+J
- name: ascii_punct
states: [。,, .,]
reset: 0
- name: ascii_mode
states: [中文, 西文]
reset: 0
- name: full_shape
states: ["半角", "全角"]
reset: 0

engine:
processors:
- ascii_composer
- recognizer
- lua_processor@yuhao_chaifen_processor
- key_binder
- speller
- punctuator
- selector
- navigator
- express_editor
segmentors:
- ascii_segmentor
- matcher
- affix_segmentor@zaoci # 用户造詞
- abc_segmentor
- punct_segmentor
- fallback_segmentor
translators:
- punct_translator
- history_translator@history
- reverse_lookup_translator
- table_translator@fixed
- table_translator
- lua_translator@yuhao_helper # 幫助文檔
- "table_translator@zaoci" # 用户造詞
filters:
- lua_filter@yuhao_autocompletion_filter
- lua_filter@yuhao_single_char_only_for_full_code
- lua_filter@yuhao_char_first
- lua_filter@yuhao_char_only
- lua_filter@yuhao_postpone_full_code
- simplifier@traditionalize
- simplifier@simplify
- lua_filter@yuhao_chaifen
- uniquifier

traditionalize:
tags: [abc, reverse_lookup]
option_name: traditionalization
opencc_config: s2t.json
tips: all
comment_format:
- xform/^/〔/
- xform/$/〕/

simplify:
tags: [abc, reverse_lookup]
option_name: simplification
opencc_config: t2s.json
tips: all
comment_format:
- xform/^/〔/
- xform/$/〕/

yuhao_postpone_full_code:
tags: [abc]
lua:
max_index: 3 # 最大後置位置,默認爲 3

schema_name:
code: yuhao
spelling: yuhao_chaifen

yuhao_chaifen:
lua:
switch_key: "Control+c"
cycle_key: "Shift+Control+C"

speller:
alphabet: zyxwvutsrqponmlkjihgfedcba
initials: zyxwvutsrqponmlkjihgfedcba
delimiter: "`"
max_code_length: 4
auto_select: true
algebra:
- "derive/^([a-y])[a-y]([a-y]*)$/$1z$2/" # Az AzC AzCD
- "derive/^([a-y]+)[a-y]([a-y]*)$/$1z$2/" # ABz ABzD ABCz AzC AzCD
- "derive/^([a-y]+)[a-y]$/$1z/" # ABz ABCz
# - "derive/^([a-y]+)[a-y]{2}$/$1zz/" # ABzz Azz
# - "derive/^([a-y])[a-y]{2}([a-y])$/$1zz$2/" # AzzD
# - "derive/^([a-y])[a-y]{3}$/$1zzz/" # Azzz
# - "derive/^([a-y])[a-y]([a-y])[a-y]$/$1z$2z/" # AzCz

translator:
dictionary: yuhao
prism: yuhao
db_class: tabledb
enable_completion: true
enable_sentence: false
enable_user_dict: true
enable_encoder: true
encode_commit_history: false # 对连续上屏的词自动成词
max_phrase_length: 4 # 自动成词的最大词长
preedit_format: []
comment_format:
- 'xform/^~/ /'
disable_user_dict_for_patterns:
- "^z.*$"

zaoci:
__include: translator
initial_quality: 0
enable_sentence: true
enable_user_dict: true
prefix: '`'
tips: "〔用户造詞〕"
tag: zaoci

# 固定词库词的权重,防止自动调频
fixed:
__include: translator
enable_user_dict: false
enable_sentence: false
initial_quality: 1000000

history:
input: z
size: 1
initial_quality: 1

reverse_lookup:
dictionary: yuhao_pinyin
prefix: "z"
tips: 〔全拼反查〕
closing_tips: 〔反查关闭〕
preedit_format:
- xform/([nl])v/$1ü/
- xform/([nl])ue/$1üe/
- xform/([jqxy])v/$1u/

punctuator:
import_preset: symbols_yuhao
half_shape:
"%": "%"
"*": "*"
"\\": ""
"[": ""
"]": ""
"{": ""
"}": ""
"/": "/"
"|": "|"

key_binder:
bindings:
- { when: always, accept: "Control+period", toggle: ascii_punct }
- { when: always, accept: "Control+Shift+J", toggle: simplification }
- { when: always, accept: "Control+Shift+F", toggle: traditionalization }
- { when: always, accept: "Control+Shift+O", toggle: yuhao_char_only } # 常用詞過濾
- { when: always, accept: "Control+Shift+I", toggle: yuhao_char_first } # 常用詞前置
- { when: has_menu, accept: "0", toggle: yuhao_char_only } # 常用詞過濾
- {
when: always,
accept: "Control+Shift+D",
toggle: yuhao_single_char_only_for_full_code,
} # 全码出单
# - { when: always, accept: "Control+Shift+C", toggle: chaifen } # 拆分
- { when: has_menu, accept: "/", toggle: yuhao_chaifen.off } # 拆分
# - { when: always, accept: "Control+Shift+G", toggle: guji } # 反查
- {
when: always,
accept: "Control+Shift+H",
toggle: yuhao_postpone_full_code,
} # 全碼後置
- {
when: always,
accept: "Control+Shift+Y",
toggle: yuhao_autocompletion_filter,
} # 輸入預測
- { when: has_menu, accept: semicolon, send: 2 } # 分号次选
- { when: has_menu, accept: apostrophe, send: 3 } # 引号三选
- { when: has_menu, accept: Tab, send: 4 } # 製表符四选
- { when: has_menu, accept: minus, send: Page_Up } #減號上翻頁
- { when: has_menu, accept: equal, send: Page_Down } #等號下翻頁

recognizer:
import_preset: default
patterns:
uppercase: "^(?![`;]).*[A-Z][-_+.'0-9A-Za-z]*$"
reverse_lookup: "^z([a-z]+?)*$"
zaoci: "^[a-y]*`[a-y`]*$"
punct: '^/([0-9]0?|[A-Za-z]+)?$'

style:
horizontal: false
inline_preedit: true
preedit_type: preview

# menu:
# page_size: 5
Loading

0 comments on commit 09b7810

Please sign in to comment.