-
Notifications
You must be signed in to change notification settings - Fork 0
/
test_unisent_multilingual.py
55 lines (37 loc) · 1.75 KB
/
test_unisent_multilingual.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
#!pip uninstall --quiet --yes tensorflow
#!pip install --quiet tensorflow-gpu
#!pip install --quiet tensorflow-hub
#!pip install tensorflow_text>=2.0.0rc0
#!pip install -q pyyaml h5py
#pip install tensorflow tensorflow-hub tensorflow_text>=2.0.0rc0 pyyaml h5py
if __name__ == "__main__":
import numpy as np
import tensorflow as tf
import tensorflow_hub as hub
import tensorflow_text
unisent_multilingual = hub.load("https://tfhub.dev/google/universal-sentence-encoder-multilingual-large/3")
#embedding1 = description_embeddings(["The quick brown fox jumps over the lazy dog"])
#print(embedding1.numpy())
from test_textsim import *
measures={}
SAME,DIFF=load_data("./data/test_STS2017en-en.txt")
print("Multilingual universal sentence encodings")
results = run_experiment(SAME,DIFF,lambda x : unisent_multilingual([x]).numpy(),angdist,inverse=False)
measures['multi-unisent-angdist'] = results
print(score(results[0],results[1]))
results = run_experiment(SAME,DIFF,lambda x : unisent_multilingual([x]).numpy(),distance.cosine,inverse=True)
measures['multi-unisent-cosine'] = results
print(score(results[0],results[1]))
results = run_experiment(SAME,DIFF,lambda x : unisent_multilingual([x]).numpy(),sqrtdist,inverse=True)
measures['multi-unisent-sqrt'] = results
print(score(results[0],results[1]))
with open("output_multilingual_unisent.csv", "w") as fh:
fh.write("\t".join(['metric','measure','which_comparisons']))
fh.write("\n")
for key in measures.keys():
for xi in measures[key][0]:
fh.write("{}\t{}\t{}\n".format(key,xi,'SAME'))
for xi in measures[key][1]:
fh.write("{}\t{}\t{}\n".format(key,xi,'DIFF'))
#df = pd.DataFrame(tuples, columns=['metric','measure','which_comparisons'])
#df.to_csv(outfile,index=False)