Skip to content

Commit

Permalink
bug fix for punc and umap
Browse files Browse the repository at this point in the history
  • Loading branch information
R1ckShi committed Jan 23, 2024
1 parent 2c3183b commit ae4dcee
Show file tree
Hide file tree
Showing 3 changed files with 5 additions and 3 deletions.
2 changes: 2 additions & 0 deletions funasr/models/campplus/cluster_backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,7 @@ def __init__(self,
self.metric = metric

def __call__(self, X):
from umap.umap_ import UMAP
umap_X = umap.UMAP(
n_neighbors=self.n_neighbors,
min_dist=0.0,
Expand Down Expand Up @@ -156,6 +157,7 @@ def forward(self, X, **params):
if X.shape[0] < 20:
return np.zeros(X.shape[0], dtype='int')
if X.shape[0] < 2048 or k is not None:
# unexpected corner case
labels = self.spectral_cluster(X, k)
else:
labels = self.umap_hdbscan_cluster(X)
Expand Down
4 changes: 2 additions & 2 deletions funasr/models/ct_transformer/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -336,17 +336,17 @@ def inference(self,
elif new_mini_sentence[-1] != "。" and new_mini_sentence[-1] != "?" and len(new_mini_sentence[-1].encode())!=1:
new_mini_sentence_out = new_mini_sentence + "。"
new_mini_sentence_punc_out = new_mini_sentence_punc[:-1] + [self.sentence_end_id]
if len(punctuations): punctuations[-1] = 2
elif new_mini_sentence[-1] != "." and new_mini_sentence[-1] != "?" and len(new_mini_sentence[-1].encode())==1:
new_mini_sentence_out = new_mini_sentence + "."
new_mini_sentence_punc_out = new_mini_sentence_punc[:-1] + [self.sentence_end_id]

if len(punctuations): punctuations[-1] = 2
# keep a punctuations array for punc segment
if punc_array is None:
punc_array = punctuations
else:
punc_array = torch.cat([punc_array, punctuations], dim=0)
result_i = {"key": key[0], "text": new_mini_sentence_out, "punc_array": punc_array}
results.append(result_i)

return results, meta_data

2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@
# "protobuf",
"tqdm",
"hdbscan",
"umap",
"umap_learn",
"jaconv",
"hydra-core>=1.3.2",
],
Expand Down

0 comments on commit ae4dcee

Please sign in to comment.