Skip to content

Commit

Permalink
fix:standardize_lang
Browse files Browse the repository at this point in the history
  • Loading branch information
JarbasAl committed Oct 16, 2024
1 parent 8b46ed8 commit 0e7a2db
Show file tree
Hide file tree
Showing 3 changed files with 30 additions and 12 deletions.
2 changes: 2 additions & 0 deletions ovos_padatious/intent_container.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
import inspect
import json
import os
import time

from ovos_padatious import padaos
import sys
Expand Down Expand Up @@ -249,6 +250,7 @@ def train(self, debug=True, force=False, single_thread=False, timeout=20):
timeout=timeout
), daemon=True)
self.train_thread.start()
time.sleep(0.5)
self.train_thread.join(timeout)

self.must_train = False
Expand Down
37 changes: 26 additions & 11 deletions ovos_padatious/opm.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@
from ovos_utils.log import LOG
from ovos_utils.xdg_utils import xdg_data_home
from ovos_plugin_manager.templates.pipeline import PipelinePlugin, IntentMatch

from langcodes import closest_match

class PadatiousMatcher:
"""Matcher class to avoid redundancy in padatious intent matching."""
Expand Down Expand Up @@ -249,17 +249,32 @@ def calc_intent(self, utterances: List[str], lang: str = None,
return None

lang = lang or self.lang
lang = standardize_lang_tag(lang)

lang = self._get_closest_lang(lang)
if lang is None: # no intents registered for this lang
return None

sess = SessionManager.get(message)
# TODO - allow close langs, match dialects
if lang in self.containers:
intent_container = self.containers.get(lang)
intents = [_calc_padatious_intent(utt, intent_container, sess)
for utt in utterances]
intents = [i for i in intents if i is not None]
# select best
if intents:
return max(intents, key=lambda k: k.conf)

intent_container = self.containers.get(lang)
intents = [_calc_padatious_intent(utt, intent_container, sess)
for utt in utterances]
intents = [i for i in intents if i is not None]
# select best
if intents:
return max(intents, key=lambda k: k.conf)

def _get_closest_lang(self, lang: str) -> Optional[str]:
if self.containers:
lang = standardize_lang_tag(lang)
closest, score = closest_match(lang, list(self.containers.keys()))
# https://langcodes-hickford.readthedocs.io/en/sphinx/index.html#distance-values
# 0 -> These codes represent the same language, possibly after filling in values and normalizing.
# 1- 3 -> These codes indicate a minor regional difference.
# 4 - 10 -> These codes indicate a significant but unproblematic regional difference.
if score < 10:
return closest
return None

def shutdown(self):
self.bus.remove('padatious:register_intent', self.register_intent)
Expand Down
3 changes: 2 additions & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
fann2>=1.0.7, < 1.1.0
xxhash
ovos-plugin-manager>=0.0.26
ovos-workshop>=0.1.7,<2.0.0
ovos-workshop>=0.1.7,<2.0.0
langcodes

0 comments on commit 0e7a2db

Please sign in to comment.