Skip to content

Commit

Permalink
Add more words
Browse files Browse the repository at this point in the history
missing 3652 unique forms
  • Loading branch information
rueter committed Nov 16, 2024
1 parent c0e9f25 commit 7f8966e
Show file tree
Hide file tree
Showing 8 changed files with 109 additions and 13 deletions.
16 changes: 10 additions & 6 deletions src/fst/morphology/affixes/nouns.lexc
Original file line number Diff line number Diff line change
Expand Up @@ -180,12 +180,12 @@ LEXICON N_PERT1 ! pertʼ:pert
!pertin
!pertid
:ʼ SG-NOM-SUF ;
+Sg+Ill:%>hä K ;
:%>ä SG-OBLIQUE ;
+Sg+Ill:ʼ%>he K ;
:i SG-OBLIQUE ;
R ; ! xxx check
! Plural
:%>ä PL-NOM-SUF ;
:%>ä PL-ACC-SUF ;
:i PL-NOM-SUF ;
:i PL-ACC-SUF ;
: PL-OBLIQUE ;


Expand Down Expand Up @@ -350,7 +350,8 @@ LEXICON N_SEIBAZ ! seibaz:seib
!!€ d: +N+Pl+Par
:az SG-NOM-SUF ;
:as SG-PAR-SUF_t ;
:ha SG-OBLIQUE ;
+Sg+Ill:haze K ;
:ha SG-OBLIQUE_NO_DERIV/ILL/PAR ;
R ; ! xxx check
! Plural
:ha PL-NOM-SUF ;
Expand Down Expand Up @@ -420,7 +421,8 @@ LEXICON N_VEDEKAZ ! vedekaz:vedeka
!!€ d: +N+Pl+Par
:z SG-NOM-SUF ;
:s SG-PAR-SUF_t ;
:ha SG-OBLIQUE ;
+Sg+Ill:haze K ;
:ha SG-OBLIQUE_NO_DERIV/ILL/PAR ;
R ; ! xxx check
! Plural
:ha PL-NOM-SUF ;
Expand Down Expand Up @@ -637,6 +639,7 @@ LEXICON N_SAMAL !samal:samal syncope
!!€ n: +N+Sg+Gen
!!€ d: +N+Pl+Par
: SG-NOM-SUF ;
: SG-PAR-SUF_t ;
:%^RmVow%^DEVOICE%>o SG-OBLIQUE ;
R ; ! xxx check
! Plural
Expand Down Expand Up @@ -771,6 +774,7 @@ LEXICON N_ine/izhen/isht/izhehe/izhid
!!€ d: +N+Pl+Par
:ne SG-NOM-SUF ;
:š SG-PAR-SUF_t ;
+Sg+Ill:žhe K ;!CHECKME 2024-11-16
:že SG-OBLIQUE ;
R ; ! xxx check
! Plural
Expand Down
10 changes: 7 additions & 3 deletions src/fst/morphology/affixes/quantifiers.lexc
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,17 @@
LEXICON NUM_KAKS1
!! **LEXICON @LEXNAME@**
+Sg+Nom:ksʼ K ;
+Sg+Gen:hten K ;
+Sg:hte OBLIQUE_CASES_FOR_PRONOUNS ;

LEXICON NUM_YKS1
!! **LEXICON @LEXNAME@**
+Sg+Nom:ks' K ;
+Sg+Gen:hten K ;
+Sg+Nom:ksʼ K ;
+Sg:hte OBLIQUE_CASES_FOR_PRONOUNS ;

LEXICON NUM_UEHESA
!! **LEXICON @LEXNAME@**
+Sg+Nom: K ;
+Sg: OBLIQUE_CASES_FOR_PRONOUNS ;

!LEXICON NUM_NORUZ
!!! **LEXICON @LEXNAME@**
Expand Down
50 changes: 50 additions & 0 deletions src/fst/morphology/affixes/verbs.lexc
Original file line number Diff line number Diff line change
Expand Up @@ -625,6 +625,30 @@ LEXICON V_2Syl_DVta/eb/i/DVkaha
!! preterite stem vowel
: ACT_IND_PRT ;! gets i

LEXICON V_1Syl_DVta/ub/i/DVkaha
!! @LEXNAME@ = seišta:seiž
! V_??
!! preceding vowel always required for affix
:u V-VowelStem-PRS ;
!! refl-ind-prs, act-imprt-sg2, act-ind-prs, ind-sg-conneg, NomAg
+Der+Der/NomAg+N:u N_KACUI ;! this will need its own work with ‹e› present stems20241109

!! sometimes requires preceding vowel
:u ACT_COND_type ;
: ACT_PRFPRC_type_nu ;

: VNONFIN_m ; !does not include -matoi 20241109

!!consonant stem if there is one takes both te, tes,... and kaha, koi
:%^DEVOICE V-ConsonantStem_t/k ;
! refl-imprt-sg2, refl-ind-prs-sg-conneg

!! problems with tta, ta, da
:%^DEVOICE INF_ta ; !pagišta

!! preterite stem vowel
:u ACT_IND_PRT ;! gets i

LEXICON V_1Syl_DVta/eb/i/DVkaha
!! @LEXNAME@ = pesta:pez only for verbs with devoicing
! V_PESTA
Expand All @@ -649,6 +673,31 @@ LEXICON V_1Syl_DVta/eb/i/DVkaha
!! preterite stem vowel
: ACT_IND_PRT ;! gets i

LEXICON V_1Syl_DVta/eb/i/ggaha
!! @LEXNAME@ = pesta:pez only for verbs with devoicing
! V_PESTA
!! preceding vowel always required for affix
:e V-VowelStem-PRS ;
!! refl-ind-prs, act-imprt-sg2, act-ind-prs, ind-sg-conneg, NomAg
+Der+Der/NomAg+N:i N_TEGII ;! this will need its own work with ‹e› present stems20241109

!! sometimes requires preceding vowel
: ACT_COND_type ;
: ACT_PRFPRC_type_nu ;

:e VNONFIN_m ; !does not include -matoi 20241109

!!consonant stem if there is one takes both te, tes,... and kaha, koi
:%^DEVOICE V-ConsonantStem_te/tud/tas ; !DV devoice
: V-ConsonantStem_gaha/goi ;
! refl-imprt-sg2, refl-ind-prs-sg-conneg

!! problems with tta, ta, da
:%^DEVOICE INF_ta ;

!! preterite stem vowel
: ACT_IND_PRT ;! gets i

LEXICON V_kta/gub/gui/ggaha
!! @LEXNAME@ = kirkta:kir
! V_??
Expand Down Expand Up @@ -1281,6 +1330,7 @@ LEXICON V-VowelStem-PRS
ACT_IND_PRT_SG_CONNEG ;

+Der+Der/V:%>škande ACT_IND_PRS ;!inchoative?
+Der+Der/V:%>škande REFL_IND_PRS ;!inchoative?
+Der+Der/V:%>škanz ACT_IND_PRT ;!inchoative?
+Der+Der/V:%>ška V-ConsonantStem_t/k ;!inchoative? CHECKME 20241110

Expand Down
4 changes: 2 additions & 2 deletions src/fst/morphology/phonology.twolc
Original file line number Diff line number Diff line change
Expand Up @@ -230,8 +230,8 @@ V1:Vx <=> Vx Cns:* %> h _ ;
!! **@RULENAME@** vauged: vauktan
Cx:Cy <=> _ [%{eØ%}: |%{uØ%}: ] Cns:CnsVoiceless %^RmVow: %^DEVOICE: %> ;
Cns:CnsVoiceless [ %{eØ%}: | %{uØ%}: ] _ %^RmVow: %^DEVOICE: %> ;
[i | r | n | Vow:] _ (%^PEN: %^DEVOICE:) (%^RmVow:) %^DEVOICE: ;
[i | r | n | Vow:] _ Vow: Cns: %^PEN: %^DEVOICE: ;
[Vow:] _ (%^PEN: %^DEVOICE:) (%^RmVow:) %^DEVOICE: ;
[i | l (ʼ) | n | r | Vow:] _ Vow: Cns: %^PEN: %^DEVOICE: ;
where Cx in (g d)
Cy in (k t) matched ;
!! oiged+A+Sg+Gen: __right/oikea__
Expand Down
1 change: 1 addition & 0 deletions src/fst/morphology/stems/adjectives_newwords.lexc
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,7 @@ aigvotte+A:aigvo A_IN_TTE ;
kaikenvuitte+A:kaikenvui A_IN_TTE ;
tozioiged+A:tozi#oiged A_OIGED ;
huiged+A:huiged A_OIGED ;
selʼged+A:selʼged A_OIGED ;
lämoinrusked+A:lämoin#rusked A_RUSKED ;
purpuranrusked+A:purpuranrusked A_RUSKED ;
päivänvauged+A:päivänvauged A_OIGED ;
Expand Down
34 changes: 33 additions & 1 deletion src/fst/morphology/stems/nouns_newwords.lexc
Original file line number Diff line number Diff line change
Expand Up @@ -297,4 +297,36 @@ nalog+N:nalog N_MARJ ;
naižjumal+N:naižjumal N_MARJ ;
noid+N:noid N_POIG ;
orj+N:orj N_0/an/ad/aha/id ;
oza+N:oz N_MUNA ;
oza+N:oz N_MUNA ;
semen+N:semen N_samal ;
smokvanpu+N:smokvanpu N_PU ;
soba+N:sob N_MUNA ;
sorm+N:sorm N_SARV ;
soton+N:soton N_POIG ;
spasib+N:spasib N_JAUH ;
starin+N:starin N_MARJ ;
sud+N:sud N_POIG ;
suim+N:suim N_POIG ;
suimpertʼ+N:suimpert N_PERT1 ;
süvüzʼ+N:süvüd N_NORUZ1 ;
taba+N:tab N_SANA ;
rahvazkogo+N:rahvazkogo N_PU ;
razbainik+N:razbainik N_MARJ ;
ridanik+N:ridanik N_MARJ ;
kalanik+N:kalanik N_MARJ ;
praznik+N:praznik N_MARJ ;
jälʼgnik+N:jälʼgnik N_MARJ ;
kesknik+N:kesknik N_MARJ ;
matknik+N:matknik N_MARJ ;
merimatknik+N:merimatknik N_MARJ ;
nahknik+N:nahknik N_MARJ ;
nužnik+N:nužnik N_MARJ ;
nägudez+N:nägudez N_ALUZ ;
padanik+N:padanik N_MARJ ;
pagenik+N:pagenik N_MARJ ;
pagan+N:pagan N_MARJ ;
torasebranik+N:torasebranik N_MARJ ;
türmnik+N:türmnik N_MARJ ;
verijälʼgnik+N:verijälʼgnik N_MARJ ;
vozʼpraznik+N:vozʼpraznik N_MARJ ;
käskištonopendai+N:käskištonopenda N_KACUI ;
5 changes: 4 additions & 1 deletion src/fst/morphology/stems/numerals.lexc
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,10 @@ LEXICON Numerals
+Use/Circ: ISOLATED-NUMEXP ; ! for ½ etc. !

LEXICON NUM_CARD_ALL
yksi+Num:yksi # ;
kahesa+Num:kahesa NUM_UEHESA ;
kaksʼ+Num:ka NUM_KAKS1 ;
ühesa+Num:ühesa NUM_UEHESA ;
üksʼ+Num:ü NUM_YKS1 ;
seičeme+Num:seičeme NUM_SEICHEME ;

LEXICON NUM_SEICHEME !seičeme:seičem
Expand Down
2 changes: 2 additions & 0 deletions src/fst/morphology/stems/verbs_newwords.lexc
Original file line number Diff line number Diff line change
Expand Up @@ -952,6 +952,8 @@ vinkta+V:ving V_kta/gub/gui/ggaha ;
činkta+V:čing V_kta/gub/gui/ggaha ;
štarkta+V:štarg V_kta/gub/gui/ggaha ;
!#V_kuda/ub/ui/Q ;_haukkuda+V:hauk V_kuda/ub/ui/Q ;
seišta+V:seiž V_1Syl_DVta/ub/i/DVkaha ;
sülʼkta+V:sülʼg V_1Syl_DVta/eb/i/ggaha ;
börišta+V:böriž V_2Syl_DVta/eb/i/DVkaha ;
hogišta+V:hogiž V_2Syl_DVta/eb/i/DVkaha ;
jurišta+V:juriž V_2Syl_DVta/eb/i/DVkaha ;
Expand Down

0 comments on commit 7f8966e

Please sign in to comment.