-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathpart2.py
56 lines (44 loc) · 1.62 KB
/
part2.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
from pathlib import Path
from sharedFunctions import estEmissions
def predictSentiments(emissions, testfile, outputfile="dev.p2.out"):
"""
Predicts sentiments using argmax(emission)
If no outputfile given, saves labelled file as dev.p2.out
@param emissions: output from estEmissions function
@param testfile: input file with unlabelled text
@param outputfile: name of file to save the output of labelled text
"""
# find best #UNK# for later use
unkTag = "O"
unkP = 0
for tag in emissions.keys():
if emissions[tag]["#UNK#"] > unkP:
unkTag = tag
with open(testfile) as f, open(outputfile, "w") as out:
for line in f:
if line == "\n":
out.write(line)
else:
word = line.strip().lower()
# find most likely tag for word
bestP = 0
bestTag = ""
for tag in emissions.keys():
if word in emissions[tag]:
if emissions[tag][word] > bestP:
bestP = emissions[tag][word]
bestTag = tag
if bestTag == "":
bestTag = unkTag
out.write("{} {}\n".format(word, bestTag))
# main
datasets = ["EN", "FR", "CN", "SG"]
for ds in datasets:
datafolder = Path(ds)
trainFile = datafolder / "train"
testFile = datafolder / "dev.in"
outputFile = datafolder / "dev.p2.out"
emissions = estEmissions(trainFile)
predictSentiments(emissions, testFile, outputFile)
print("Output:", outputFile)
print("Done!")