Skip to content

Commit

Permalink
fix: Fix dataset, script
Browse files Browse the repository at this point in the history
Signed-off-by: Diwank Singh Tomer <[email protected]>
  • Loading branch information
creatorrr committed Jan 30, 2021
1 parent 07e975e commit e3eb463
Show file tree
Hide file tree
Showing 3 changed files with 33 additions and 53 deletions.
40 changes: 1 addition & 39 deletions scripts/benchmark/ChatbotCorpus.json
Original file line number Diff line number Diff line change
Expand Up @@ -295,7 +295,6 @@
"templates": [
"how i can get from ENTITYGPE to ENTITYGPE",
"can you give me a connection from ENTITYGPE to ENTITYGPE?",
"depart in ENTITYGPE, i assume",
"when is the next ENTITYGPE leaving from ENTITYGPE?",
"when does the ENTITYNOUN come at ENTITYGPE",
"when is the ENTITYPRODUCT from ENTITYGPE?"
Expand Down Expand Up @@ -1522,7 +1521,6 @@
"match": "ENTITYNOUN in ENTITYGPE",
"templates": [
"ENTITYNOUN in ENTITYGPE",
"depart in ENTITYGPE, i assume",
"how do i get from ENTITYGPE to ENTITYGPE?",
"how can i get from ENTITYGPE to ENTITYGPE as ENTITYNOUN as possible?",
"can you give me a connection from ENTITYGPE to ENTITYGPE?",
Expand Down Expand Up @@ -2587,36 +2585,6 @@
"how can i get from ENTITYGPE to ENTITYGPE?"
]
},
{
"entities": [
{
"text": "garching",
"entityType": "StationStart",
"holmesIdentifier": "ENTITYGPE"
}
],
"training": false,
"inputs": [
"I assume you leave in garching.",
"I assume that you leave in garching.",
"I think you leave in garching.",
"I assume that they leave in garching.",
"I assume they leave in garching.",
"I assume that you depart in garching.",
"I assume you depart in garching.",
"I assume you leave in garching",
"depart in garching, i assume"
],
"match": "depart in ENTITYGPE, i assume",
"templates": [
"depart in ENTITYGPE, i assume",
"or depart from ENTITYGPE",
"is there a ENTITYPRODUCT from ENTITYGPE to ENTITYGPE?",
"can you find a ENTITYPRODUCT from ENTITYGPE to ENTITYGPE?",
"ENTITYGPE to ENTITYGPE",
"how do i get from ENTITYGPE to ENTITYGPE?"
]
},
{
"entities": [
{
Expand Down Expand Up @@ -3434,8 +3402,7 @@
"how can i get from ENTITYGPE to ENTITYGPE?",
"when does the ENTITYNOUN starts at ENTITYGPE?",
"when will the ENTITYNOUN depart from ENTITYGPE?",
"when does the ENTITYNOUN leaves at ENTITYGPE?",
"depart in ENTITYGPE, i assume"
"when does the ENTITYNOUN leaves at ENTITYGPE?"
]
},
{
Expand Down Expand Up @@ -3754,7 +3721,6 @@
"how can i get from ENTITYGPE to ENTITYGPE?",
"when does the ENTITYNOUN departs at ENTITYGPE?",
"when is the ENTITYPRODUCT leaving in ENTITYGPE",
"depart in ENTITYGPE, i assume",
"when is the ENTITYNOUN in ENTITYGPE?"
]
},
Expand Down Expand Up @@ -4238,7 +4204,6 @@
"templates": [
"how can i get from ENTITYGPE to ENTITYGPE?",
"how can i get to ENTITYGPE?",
"depart in ENTITYGPE, i assume",
"when does the ENTITYPRODUCT leaving in ENTITYGPE",
"when is the ENTITYNOUN in ENTITYGPE?",
"ENTITYNOUN from ENTITYGPE"
Expand Down Expand Up @@ -5188,7 +5153,6 @@
"match": "when is the ENTITYNOUN leaving from ENTITYGPE?",
"templates": [
"when is the ENTITYNOUN leaving from ENTITYGPE?",
"depart in ENTITYGPE, i assume",
"how to get from ENTITYGPE to ENTITYGPE",
"ENTITYGPE to ENTITYGPE",
"what's the ENTITYNOUN way from ENTITYGPE to ENTITYGPE?",
Expand Down Expand Up @@ -6156,7 +6120,6 @@
"from ENTITYGPE to ENTITYGPE",
"when is the ENTITYNOUN from ENTITYGPE to ENTITYGPE",
"when does the ENTITYNOUN starts at ENTITYGPE?",
"depart in ENTITYGPE, i assume",
"when the ENTITYNOUN in ENTITYGPE is leaving?",
"when does the ENTITYNOUN leaves at ENTITYGPE?"
]
Expand Down Expand Up @@ -6439,7 +6402,6 @@
"from ENTITYGPE to ENTITYGPE",
"take me from ENTITYGPE to ENTITYGPE",
"ENTITYNOUN from ENTITYGPE",
"depart in ENTITYGPE, i assume",
"when does the ENTITYNOUN departes from ENTITYGPE?"
]
}
Expand Down
37 changes: 23 additions & 14 deletions scripts/benchmark/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
import holmes_extractor as holmes
import patches.holmes_extractor.semantics as semantics
import ChatbotCorpus
from textdistance import cosine


logger = logging.getLogger("holmes_extractor")
Expand Down Expand Up @@ -75,24 +76,31 @@ def match_phrase(self, search_phrases, phrase):
return self.manager.match_search_phrases_against(entry=phrase)


def check_result(results, all_matches):
def is_match(phrase1, phrase2, threshold=0.75):
similarity = cosine.normalized_similarity(phrase1, phrase2)

return (similarity >= threshold)

def check_result(results, all_matches, threshold=0.75):
res = []
for result in results:
matches = result.get("word_matches", [])
if not matches:
continue

res.append(
any(
[
" ".join(
[
m["search_phrase_word"].lower()
for m in matches
]
)
== match.lower()
for match in all_matches
is_match(
" ".join(
[
m["search_phrase_word"].lower()
for m in matches
]
),
match.lower(),
threshold=threshold
) for match in all_matches
]
)
)
Expand Down Expand Up @@ -139,9 +147,9 @@ def create_search_phrases(model, phrases):
return result


def main(paraphrase: bool = True, large_model: bool = False, sentences: int = 0):
model = Model("tuner007/pegasus_paraphrase")
model.prepare()
def main(paraphrase: bool = True, large_model: bool = False, sentences: int = 0, threshold: float = 0.75):
model = paraphrase and Model("tuner007/pegasus_paraphrase")
paraphrase and model.prepare()

logger.info("-" * 120)
success, count, errors, non_empty_count = 0, 0, 0, 0
Expand Down Expand Up @@ -171,7 +179,8 @@ def main(paraphrase: bool = True, large_model: bool = False, sentences: int = 0)
errors += 1
logger.exception(e)
continue
if check_result(result, create_search_phrases(model, [sentence.match])):
search_phrases = create_search_phrases(model, [sentence.match]) if paraphrase else [sentence.match]
if check_result(result, search_phrases, threshold=threshold):
success += 1
break
count += 1
Expand Down
9 changes: 9 additions & 0 deletions scripts/benchmark/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
abydos==0.5.0
backcall==0.2.0
beautifulsoup4==4.9.3
beautifultable==1.0.1
blis==0.2.4
boto3==1.16.62
botocore==1.19.62
Expand All @@ -9,6 +11,9 @@ chardet==4.0.0
click==7.1.2
cymem==2.0.5
decorator==4.4.2
deprecation==2.1.0
en-core-web-lg==2.1.0
en-core-web-md==2.1.0
falcon==2.0.0
filelock==3.0.12
holmes-extractor==2.2.1
Expand All @@ -18,6 +23,7 @@ ipython==7.19.0
ipython-genutils==0.2.0
isodate==0.6.0
jedi==0.18.0
jellyfish==0.8.2
jmespath==0.10.0
joblib==1.0.0
json-sempai==0.4.0
Expand All @@ -40,6 +46,8 @@ ptyprocess==0.7.0
Pygments==2.7.4
pyparsing==2.4.7
python-dateutil==2.8.1
python-Levenshtein==0.12.1
pyxDamerauLevenshtein==1.6.1
rdflib==5.0.0
regex==2020.11.13
requests==2.25.1
Expand All @@ -53,6 +61,7 @@ sklearn==0.0
soupsieve==2.1
spacy==2.1.0
srsly==1.0.5
textdistance==4.2.1
thinc==7.0.8
threadpoolctl==2.1.0
tokenizers==0.9.4
Expand Down

0 comments on commit e3eb463

Please sign in to comment.