From 7f8966e55839d7b589b5adf20cf36855ff94695d Mon Sep 17 00:00:00 2001 From: Jack Rueter Date: Sat, 16 Nov 2024 23:13:26 +0200 Subject: [PATCH] Add more words missing 3652 unique forms --- src/fst/morphology/affixes/nouns.lexc | 16 +++--- src/fst/morphology/affixes/quantifiers.lexc | 10 ++-- src/fst/morphology/affixes/verbs.lexc | 50 +++++++++++++++++++ src/fst/morphology/phonology.twolc | 4 +- .../morphology/stems/adjectives_newwords.lexc | 1 + src/fst/morphology/stems/nouns_newwords.lexc | 34 ++++++++++++- src/fst/morphology/stems/numerals.lexc | 5 +- src/fst/morphology/stems/verbs_newwords.lexc | 2 + 8 files changed, 109 insertions(+), 13 deletions(-) diff --git a/src/fst/morphology/affixes/nouns.lexc b/src/fst/morphology/affixes/nouns.lexc index 3f3f8f3b..35b7a45f 100644 --- a/src/fst/morphology/affixes/nouns.lexc +++ b/src/fst/morphology/affixes/nouns.lexc @@ -180,12 +180,12 @@ LEXICON N_PERT1 ! pertʼ:pert !pertin !pertid :ʼ SG-NOM-SUF ; -+Sg+Ill:%>hä K ; -:%>ä SG-OBLIQUE ; ++Sg+Ill:ʼ%>he K ; +:i SG-OBLIQUE ; R ; ! xxx check ! Plural -:%>ä PL-NOM-SUF ; -:%>ä PL-ACC-SUF ; +:i PL-NOM-SUF ; +:i PL-ACC-SUF ; : PL-OBLIQUE ; @@ -350,7 +350,8 @@ LEXICON N_SEIBAZ ! seibaz:seib !!€ d: +N+Pl+Par :az SG-NOM-SUF ; :as SG-PAR-SUF_t ; -:ha SG-OBLIQUE ; ++Sg+Ill:haze K ; +:ha SG-OBLIQUE_NO_DERIV/ILL/PAR ; R ; ! xxx check ! Plural :ha PL-NOM-SUF ; @@ -420,7 +421,8 @@ LEXICON N_VEDEKAZ ! vedekaz:vedeka !!€ d: +N+Pl+Par :z SG-NOM-SUF ; :s SG-PAR-SUF_t ; -:ha SG-OBLIQUE ; ++Sg+Ill:haze K ; +:ha SG-OBLIQUE_NO_DERIV/ILL/PAR ; R ; ! xxx check ! Plural :ha PL-NOM-SUF ; @@ -637,6 +639,7 @@ LEXICON N_SAMAL !samal:samal syncope !!€ n: +N+Sg+Gen !!€ d: +N+Pl+Par : SG-NOM-SUF ; +: SG-PAR-SUF_t ; :%^RmVow%^DEVOICE%>o SG-OBLIQUE ; R ; ! xxx check ! Plural @@ -771,6 +774,7 @@ LEXICON N_ine/izhen/isht/izhehe/izhid !!€ d: +N+Pl+Par :ne SG-NOM-SUF ; :š SG-PAR-SUF_t ; ++Sg+Ill:žhe K ;!CHECKME 2024-11-16 :že SG-OBLIQUE ; R ; ! xxx check ! Plural diff --git a/src/fst/morphology/affixes/quantifiers.lexc b/src/fst/morphology/affixes/quantifiers.lexc index 86caf21a..09bec478 100644 --- a/src/fst/morphology/affixes/quantifiers.lexc +++ b/src/fst/morphology/affixes/quantifiers.lexc @@ -3,13 +3,17 @@ LEXICON NUM_KAKS1 !! **LEXICON @LEXNAME@** +Sg+Nom:ksʼ K ; -+Sg+Gen:hten K ; ++Sg:hte OBLIQUE_CASES_FOR_PRONOUNS ; LEXICON NUM_YKS1 !! **LEXICON @LEXNAME@** -+Sg+Nom:ks' K ; -+Sg+Gen:hten K ; ++Sg+Nom:ksʼ K ; ++Sg:hte OBLIQUE_CASES_FOR_PRONOUNS ; +LEXICON NUM_UEHESA +!! **LEXICON @LEXNAME@** ++Sg+Nom: K ; ++Sg: OBLIQUE_CASES_FOR_PRONOUNS ; !LEXICON NUM_NORUZ !!! **LEXICON @LEXNAME@** diff --git a/src/fst/morphology/affixes/verbs.lexc b/src/fst/morphology/affixes/verbs.lexc index bd262855..90d04de0 100644 --- a/src/fst/morphology/affixes/verbs.lexc +++ b/src/fst/morphology/affixes/verbs.lexc @@ -625,6 +625,30 @@ LEXICON V_2Syl_DVta/eb/i/DVkaha !! preterite stem vowel : ACT_IND_PRT ;! gets i +LEXICON V_1Syl_DVta/ub/i/DVkaha +!! @LEXNAME@ = seišta:seiž +! V_?? +!! preceding vowel always required for affix +:u V-VowelStem-PRS ; +!! refl-ind-prs, act-imprt-sg2, act-ind-prs, ind-sg-conneg, NomAg ++Der+Der/NomAg+N:u N_KACUI ;! this will need its own work with ‹e› present stems20241109 + +!! sometimes requires preceding vowel +:u ACT_COND_type ; +: ACT_PRFPRC_type_nu ; + +: VNONFIN_m ; !does not include -matoi 20241109 + +!!consonant stem if there is one takes both te, tes,... and kaha, koi +:%^DEVOICE V-ConsonantStem_t/k ; +! refl-imprt-sg2, refl-ind-prs-sg-conneg + +!! problems with tta, ta, da +:%^DEVOICE INF_ta ; !pagišta + +!! preterite stem vowel +:u ACT_IND_PRT ;! gets i + LEXICON V_1Syl_DVta/eb/i/DVkaha !! @LEXNAME@ = pesta:pez only for verbs with devoicing ! V_PESTA @@ -649,6 +673,31 @@ LEXICON V_1Syl_DVta/eb/i/DVkaha !! preterite stem vowel : ACT_IND_PRT ;! gets i +LEXICON V_1Syl_DVta/eb/i/ggaha +!! @LEXNAME@ = pesta:pez only for verbs with devoicing +! V_PESTA +!! preceding vowel always required for affix +:e V-VowelStem-PRS ; +!! refl-ind-prs, act-imprt-sg2, act-ind-prs, ind-sg-conneg, NomAg ++Der+Der/NomAg+N:i N_TEGII ;! this will need its own work with ‹e› present stems20241109 + +!! sometimes requires preceding vowel +: ACT_COND_type ; +: ACT_PRFPRC_type_nu ; + +:e VNONFIN_m ; !does not include -matoi 20241109 + +!!consonant stem if there is one takes both te, tes,... and kaha, koi +:%^DEVOICE V-ConsonantStem_te/tud/tas ; !DV devoice +: V-ConsonantStem_gaha/goi ; +! refl-imprt-sg2, refl-ind-prs-sg-conneg + +!! problems with tta, ta, da +:%^DEVOICE INF_ta ; + +!! preterite stem vowel +: ACT_IND_PRT ;! gets i + LEXICON V_kta/gub/gui/ggaha !! @LEXNAME@ = kirkta:kir ! V_?? @@ -1281,6 +1330,7 @@ LEXICON V-VowelStem-PRS ACT_IND_PRT_SG_CONNEG ; +Der+Der/V:%>škande ACT_IND_PRS ;!inchoative? ++Der+Der/V:%>škande REFL_IND_PRS ;!inchoative? +Der+Der/V:%>škanz ACT_IND_PRT ;!inchoative? +Der+Der/V:%>ška V-ConsonantStem_t/k ;!inchoative? CHECKME 20241110 diff --git a/src/fst/morphology/phonology.twolc b/src/fst/morphology/phonology.twolc index 5c6cf5a6..961738bb 100644 --- a/src/fst/morphology/phonology.twolc +++ b/src/fst/morphology/phonology.twolc @@ -230,8 +230,8 @@ V1:Vx <=> Vx Cns:* %> h _ ; !! **@RULENAME@** vauged: vauktan Cx:Cy <=> _ [%{eØ%}: |%{uØ%}: ] Cns:CnsVoiceless %^RmVow: %^DEVOICE: %> ; Cns:CnsVoiceless [ %{eØ%}: | %{uØ%}: ] _ %^RmVow: %^DEVOICE: %> ; - [i | r | n | Vow:] _ (%^PEN: %^DEVOICE:) (%^RmVow:) %^DEVOICE: ; - [i | r | n | Vow:] _ Vow: Cns: %^PEN: %^DEVOICE: ; + [Vow:] _ (%^PEN: %^DEVOICE:) (%^RmVow:) %^DEVOICE: ; + [i | l (ʼ) | n | r | Vow:] _ Vow: Cns: %^PEN: %^DEVOICE: ; where Cx in (g d) Cy in (k t) matched ; !! oiged+A+Sg+Gen: __right/oikea__ diff --git a/src/fst/morphology/stems/adjectives_newwords.lexc b/src/fst/morphology/stems/adjectives_newwords.lexc index 87e2d3cc..8101a29e 100644 --- a/src/fst/morphology/stems/adjectives_newwords.lexc +++ b/src/fst/morphology/stems/adjectives_newwords.lexc @@ -86,6 +86,7 @@ aigvotte+A:aigvo A_IN_TTE ; kaikenvuitte+A:kaikenvui A_IN_TTE ; tozioiged+A:tozi#oiged A_OIGED ; huiged+A:huiged A_OIGED ; +selʼged+A:selʼged A_OIGED ; lämoinrusked+A:lämoin#rusked A_RUSKED ; purpuranrusked+A:purpuranrusked A_RUSKED ; päivänvauged+A:päivänvauged A_OIGED ; diff --git a/src/fst/morphology/stems/nouns_newwords.lexc b/src/fst/morphology/stems/nouns_newwords.lexc index 95617ba7..7d98e045 100644 --- a/src/fst/morphology/stems/nouns_newwords.lexc +++ b/src/fst/morphology/stems/nouns_newwords.lexc @@ -297,4 +297,36 @@ nalog+N:nalog N_MARJ ; naižjumal+N:naižjumal N_MARJ ; noid+N:noid N_POIG ; orj+N:orj N_0/an/ad/aha/id ; -oza+N:oz N_MUNA ; \ No newline at end of file +oza+N:oz N_MUNA ; +semen+N:semen N_samal ; +smokvanpu+N:smokvanpu N_PU ; +soba+N:sob N_MUNA ; +sorm+N:sorm N_SARV ; +soton+N:soton N_POIG ; +spasib+N:spasib N_JAUH ; +starin+N:starin N_MARJ ; +sud+N:sud N_POIG ; +suim+N:suim N_POIG ; +suimpertʼ+N:suimpert N_PERT1 ; +süvüzʼ+N:süvüd N_NORUZ1 ; +taba+N:tab N_SANA ; +rahvazkogo+N:rahvazkogo N_PU ; +razbainik+N:razbainik N_MARJ ; +ridanik+N:ridanik N_MARJ ; +kalanik+N:kalanik N_MARJ ; +praznik+N:praznik N_MARJ ; +jälʼgnik+N:jälʼgnik N_MARJ ; +kesknik+N:kesknik N_MARJ ; +matknik+N:matknik N_MARJ ; +merimatknik+N:merimatknik N_MARJ ; +nahknik+N:nahknik N_MARJ ; +nužnik+N:nužnik N_MARJ ; +nägudez+N:nägudez N_ALUZ ; +padanik+N:padanik N_MARJ ; +pagenik+N:pagenik N_MARJ ; +pagan+N:pagan N_MARJ ; +torasebranik+N:torasebranik N_MARJ ; +türmnik+N:türmnik N_MARJ ; +verijälʼgnik+N:verijälʼgnik N_MARJ ; +vozʼpraznik+N:vozʼpraznik N_MARJ ; +käskištonopendai+N:käskištonopenda N_KACUI ; \ No newline at end of file diff --git a/src/fst/morphology/stems/numerals.lexc b/src/fst/morphology/stems/numerals.lexc index 0f15fdf1..01159a12 100644 --- a/src/fst/morphology/stems/numerals.lexc +++ b/src/fst/morphology/stems/numerals.lexc @@ -20,7 +20,10 @@ LEXICON Numerals +Use/Circ: ISOLATED-NUMEXP ; ! for ½ etc. ! LEXICON NUM_CARD_ALL -yksi+Num:yksi # ; +kahesa+Num:kahesa NUM_UEHESA ; +kaksʼ+Num:ka NUM_KAKS1 ; +ühesa+Num:ühesa NUM_UEHESA ; +üksʼ+Num:ü NUM_YKS1 ; seičeme+Num:seičeme NUM_SEICHEME ; LEXICON NUM_SEICHEME !seičeme:seičem diff --git a/src/fst/morphology/stems/verbs_newwords.lexc b/src/fst/morphology/stems/verbs_newwords.lexc index 3261d550..b3342284 100644 --- a/src/fst/morphology/stems/verbs_newwords.lexc +++ b/src/fst/morphology/stems/verbs_newwords.lexc @@ -952,6 +952,8 @@ vinkta+V:ving V_kta/gub/gui/ggaha ; činkta+V:čing V_kta/gub/gui/ggaha ; štarkta+V:štarg V_kta/gub/gui/ggaha ; !#V_kuda/ub/ui/Q ;_haukkuda+V:hauk V_kuda/ub/ui/Q ; +seišta+V:seiž V_1Syl_DVta/ub/i/DVkaha ; +sülʼkta+V:sülʼg V_1Syl_DVta/eb/i/ggaha ; börišta+V:böriž V_2Syl_DVta/eb/i/DVkaha ; hogišta+V:hogiž V_2Syl_DVta/eb/i/DVkaha ; jurišta+V:juriž V_2Syl_DVta/eb/i/DVkaha ;