Skip to content

Commit

Permalink
feat: implement algorithm for better string matching (#124)
Browse files Browse the repository at this point in the history
Algorithm works as follows:
- Matches full answer string to the guess
- Matches answer string up to first non-alphanumerical character to the guess
- Matches first N words in answer to guess, where N is the number of words in the guess
  • Loading branch information
NiceAesth authored Dec 31, 2023
1 parent 7c29cd4 commit 49714eb
Show file tree
Hide file tree
Showing 3 changed files with 14 additions and 2 deletions.
3 changes: 1 addition & 2 deletions src/classes/osudle.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,10 +42,9 @@ def check_guess(self, message: str) -> bool:
return (
message.channel is self.interaction.channel
and not message.author.bot
and humanizer.fuzzy_string_match(
and humanizer.song_title_match(
message.content,
self.current_beatmapset.title,
permit_low_match=False,
)
)

Expand Down
12 changes: 12 additions & 0 deletions src/common/humanizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@

from datetime import timedelta

from common.regex import alphanumeric_rx
from thefuzz import fuzz

FUZZY_MATCH_THRESHOLD = 80
Expand Down Expand Up @@ -87,3 +88,14 @@ def milliseconds_to_duration(position: float) -> str:
def fuzzy_string_match(str1: str, str2: str, permit_low_match: bool = False) -> bool:
THRESHOLD = FUZZY_MATCH_THRESHOLD_LOW if permit_low_match else FUZZY_MATCH_THRESHOLD
return fuzz.ratio(str1.casefold(), str2.casefold()) >= THRESHOLD


def song_title_match(guess: str, answer: str) -> bool:
alphanumeric = alphanumeric_rx.split(answer)[0]
guess_length = len(guess.split(" "))
partial_words = " ".join(answer.split(" ")[:guess_length])
return (
fuzzy_string_match(guess, answer)
or fuzzy_string_match(guess, alphanumeric)
or fuzzy_string_match(guess, partial_words)
)
1 change: 1 addition & 0 deletions src/common/regex.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,3 +12,4 @@
user_link_rx = re.compile(
r"(https?)://(?P<domain>osu|lazer)\.ppy\.sh/u(sers)?/(?P<userid>[0-9]+)",
)
alphanumeric_rx = re.compile(r"[^a-zA-Z0-9]")

0 comments on commit 49714eb

Please sign in to comment.