From ccac6ceea98a1bcc7c06e4c6e010159f850f32cc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=B8=B8=E9=9B=81?= Date: Sat, 13 Jan 2024 22:35:20 +0800 Subject: [PATCH] fix punc model --- funasr/datasets/preprocessor.py | 20 -------------------- 1 file changed, 20 deletions(-) diff --git a/funasr/datasets/preprocessor.py b/funasr/datasets/preprocessor.py index 966cc9449..f3b7d43ee 100644 --- a/funasr/datasets/preprocessor.py +++ b/funasr/datasets/preprocessor.py @@ -664,26 +664,6 @@ def __init__( if self.seg_jieba: jieba.load_userdict(seg_dict_file) - @classmethod - def split_words(cls, text: str): - words = [] - segs = text.split() - for seg in segs: - # There is no space in seg. - current_word = "" - for c in seg: - if len(c.encode()) == 1: - # This is an ASCII char. - current_word += c - else: - # This is a Chinese char. - if len(current_word) > 0: - words.append(current_word) - current_word = "" - words.append(c) - if len(current_word) > 0: - words.append(current_word) - return words @classmethod def isEnglish(cls, text:str):