forked from shibing624/text2vec
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcomputing_embeddings_demo.py
50 lines (41 loc) · 1.74 KB
/
computing_embeddings_demo.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
# -*- coding: utf-8 -*-
"""
@author:XuMing([email protected])
@description:
This basic example loads a pre-trained model from the web and uses it to
generate sentence embeddings for a given list of sentences.
"""
import sys
sys.path.append('..')
from text2vec import SentenceModel, EncoderType
from text2vec import Word2Vec
def compute_emb(model):
# Embed a list of sentences
sentences = [
'卡',
'银行卡',
'如何更换花呗绑定银行卡',
'花呗更改绑定银行卡',
'This framework generates embeddings for each input sentence',
'Sentences are passed as a list of string.',
'The quick brown fox jumps over the lazy dog.'
]
sentence_embeddings = model.encode(sentences, show_progress_bar=True)
print(type(sentence_embeddings), sentence_embeddings.shape)
# The result is a list of sentence embeddings as numpy arrays
for sentence, embedding in zip(sentences, sentence_embeddings):
print("Sentence:", sentence)
print("Embedding shape:", embedding.shape)
print("Embedding head:", embedding[:10])
print()
if __name__ == "__main__":
# 中文句向量模型(CoSENT),中文语义匹配任务推荐,支持fine-tune继续训练
t2v_model = SentenceModel("shibing624/text2vec-base-chinese")
print(t2v_model)
compute_emb(t2v_model)
# 支持多语言的句向量模型(Sentence-BERT),英文语义匹配任务推荐,支持fine-tune继续训练
sbert_model = SentenceModel("sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2")
compute_emb(sbert_model)
# 中文词向量模型(word2vec),中文字面匹配任务和冷启动适用
w2v_model = Word2Vec("w2v-light-tencent-chinese")
compute_emb(w2v_model)