Skip to content

Commit

Permalink
fix punc model
Browse files Browse the repository at this point in the history
  • Loading branch information
LauraGPT committed Jan 13, 2024
1 parent 49e8e9d commit ccac6ce
Showing 1 changed file with 0 additions and 20 deletions.
20 changes: 0 additions & 20 deletions funasr/datasets/preprocessor.py
Original file line number Diff line number Diff line change
Expand Up @@ -664,26 +664,6 @@ def __init__(
if self.seg_jieba:
jieba.load_userdict(seg_dict_file)

@classmethod
def split_words(cls, text: str):
words = []
segs = text.split()
for seg in segs:
# There is no space in seg.
current_word = ""
for c in seg:
if len(c.encode()) == 1:
# This is an ASCII char.
current_word += c
else:
# This is a Chinese char.
if len(current_word) > 0:
words.append(current_word)
current_word = ""
words.append(c)
if len(current_word) > 0:
words.append(current_word)
return words

@classmethod
def isEnglish(cls, text:str):
Expand Down

0 comments on commit ccac6ce

Please sign in to comment.