-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcompress_normalise_jp_test.py
executable file
·62 lines (50 loc) · 2.23 KB
/
compress_normalise_jp_test.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
#/usr/bin/env python3
import unittest,imp,sys,os
from pdb import set_trace
import compress_normalise_jp
import mecabtools
imp.reload(compress_normalise_jp)
imp.reload(mecabtools)
TestPairs=[
# reru and seru
('なにが彼をそうさせるのか考えられない','何 が 彼 を そう s a せ る の か 考え られ な い'),
# case of unique rendering
('そのキュウリがからくなる','その 胡瓜 が 辛 く なr u'),
# case of exemplar
('そういわれても','そう 言w a れ て も'),
# case of kana rendering
('サッパリとしたリンゴ', 'さっぱり と s i た りんご'),
# case of suffix-kana change
('サボったらどうなるかな','さぼr t たら どう なr u かな'),
# case of elongationb
('どーなってもいーや','どうなってもいいや'),
#
('深い切り込みを入れる','深 い 切込み を 入れ る')
]
class TestCompressNormalise(unittest.TestCase):
def setUp(self):
HomeDir=os.getenv('HOME')
ProcDataDir=os.path.join('/links/processedData/mecabStdJp')
RawDataDir=os.path.join('/links/rawData/mecabStdJp')
self.testpairs=TestPairs
self.explines=[Pair[1] for Pair in TestPairs]
self.testorgsents=[TestPair[0] for TestPair in TestPairs]
self.testfp=os.path.join(ProcDataDir,'corpora/compress_normalise_test.txt')
with open(self.testfp,'tw') as FSw:
FSw.write('\n'.join(self.testorgsents)+'\n')
self.dicloc=os.path.join(RawDataDir,'dics')
self.exemplarfp=os.path.join(self.dicloc,'exemplars.txt')
# self.stdmodelloc=os.path.join(RawDataDir,'models/standard')
def test_compress_normalise(self):
ResultNewLines=[]
set_trace()
compress_normalise_jp.main0(self.testfp, self.dicloc, ExemplarFP=self.exemplarfp, Debug=1)
OutFP='.'.join(self.testfp.split('.')[:-1])+'.compressed.normed.mecab'
assert(os.path.isfile(OutFP))
MecabSentsG=mecabtools.mecabfile2mecabsents(OutFP)
for Sent in MecabSentsG:
ResultNewLines.append(Sent.stringify_orths())
self.maxDiff=None
self.assertEqual(self.explines,ResultNewLines)
if __name__=='__main__':
unittest.main()