-
Notifications
You must be signed in to change notification settings - Fork 0
/
eval.py
31 lines (20 loc) · 1.07 KB
/
eval.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
from sklearn.metrics import f1_score
from sklearn.metrics import confusion_matrix
import numpy as np
import argparse
# given gold and prediction, compute the f1 macro across reviews
def evaluate(gold, pred):
rids = list(gold.keys())
return f1_score([l for r in rids for l in gold[r]["labels"]], [l for r in rids for l in pred[r]["labels"]],
average="macro")
# compute the confusion matrix across reviews
def confusion(gold, pred):
rids = list(gold.keys())
return confusion_matrix([l for r in rids for l in gold[r]], [l for r in rids for l in pred[r]])
# computes f1 macro for each domains and computes the mean across domains
def eval_across_domains(gold, pred):
domains = set(g["domain"] for g in gold.values())
rid_to_domain = {rid : g["domain"] for rid, g in gold.items()}
per_domain = {d: evaluate(dict(filter(lambda x: x[1]["domain"] == d, gold.items())),
dict(filter(lambda x: rid_to_domain[x[1]["id"]] == d, pred.items()))) for d in domains}
return per_domain, np.mean(list(per_domain.values()))