forked from cltl/OpenDutchWordnet
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathlemma.py
124 lines (91 loc) · 3.35 KB
/
lemma.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
from collections import defaultdict
class Lemma():
'''
methods to access and manipulate resource lemma based
'''
def __init__(self):
pass
def lemmas_generator(self,pos=None):
'''
return dict of all lemmas
@type pos: str
@param pos: noun | verb.
Default is None, then no filtering is performed.
'''
lemmas = defaultdict(int)
for le_obj in self.les_get_generator():
lemma = le_obj.get_lemma()
if pos:
part_of_speech = le_obj.get_pos()
if pos == part_of_speech:
lemmas[lemma] += 1
else:
lemmas[lemma] += 1
return lemmas
def lemma_get_generator(self,lemma,pos=None,ignore_case=False):
'''
return generator of Le class instances
@type lemma: str
@param lemma: lemma
@type pos: str
@param pos: noun | verb.
Default is None, then no filtering is performed.
@rtype: list
@return: list of Le class instances
Added by AN:
@param ignore_case: True | False
@type ignore_case: bool
Default is False. If ignore_case True returns lemmas ignoring case
'''
if ignore_case == False:
les = []
for le_obj in self.les_get_generator():
can_lemma = le_obj.get_lemma()
if can_lemma == lemma:
if pos:
part_of_speech = le_obj.get_pos()
if pos == part_of_speech:
les.append(le_obj)
else:
les.append(le_obj)
else:
les = []
for le_obj in self.les_get_generator():
can_lemma = le_obj.get_lemma()
if can_lemma.lower() == lemma.lower():
if pos:
part_of_speech = le_obj.get_pos()
if pos == part_of_speech:
les.append(le_obj)
else:
les.append(le_obj)
return les
def lemma_num_senses(self,lemma,pos=None):
'''
return number of senses
@type lemma: str
@param lemma: lemma
@type pos: str
@param pos: noun | verb.
Default is None, then no filtering is performed.
@rtype: int
@return: number of senses
'''
return len(self.lemma_get_generator(lemma, pos))
def lemma_highest_sense_number(self,lemma,pos=None):
'''
return highest sense number of le instances of lemma
@type lemma: str
@param lemma: lemma
@type pos: str
@param pos: noun | verb.
Default is None, then no filtering is performed.
@rtype: int
@return: highest sense number
'''
highest = 0
for le_obj in self.lemma_get_generator(lemma, pos):
sense_id = int(le_obj.get_sense_number())
if sense_id > highest:
highest = sense_id
return highest