From ccac6ceea98a1bcc7c06e4c6e010159f850f32cc Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E6=B8=B8=E9=9B=81?= <zhifu.gzf@alibaba-inc.com>
Date: Sat, 13 Jan 2024 22:35:20 +0800
Subject: [PATCH] fix punc model

---
 funasr/datasets/preprocessor.py | 20 --------------------
 1 file changed, 20 deletions(-)

diff --git a/funasr/datasets/preprocessor.py b/funasr/datasets/preprocessor.py
index 966cc9449..f3b7d43ee 100644
--- a/funasr/datasets/preprocessor.py
+++ b/funasr/datasets/preprocessor.py
@@ -664,26 +664,6 @@ def __init__(
         if self.seg_jieba:
             jieba.load_userdict(seg_dict_file)
 
-    @classmethod
-    def split_words(cls, text: str):
-        words = []
-        segs = text.split()
-        for seg in segs:
-            # There is no space in seg.
-            current_word = ""
-            for c in seg:
-                if len(c.encode()) == 1:
-                    # This is an ASCII char.
-                    current_word += c
-                else:
-                    # This is a Chinese char.
-                    if len(current_word) > 0:
-                        words.append(current_word)
-                        current_word = ""
-                    words.append(c)
-            if len(current_word) > 0:
-                words.append(current_word)
-        return words
 
     @classmethod
     def isEnglish(cls, text:str):