-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathls_bart_summary.py
121 lines (99 loc) · 4.93 KB
/
ls_bart_summary.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
import time
import argparse
import torch
import lightseq.inference as lsi
from transformers import BartTokenizer, BartForConditionalGeneration, BertTokenizer, AutoTokenizer
def ls_bart(model, inputs):
torch.cuda.synchronize()
start_time = time.perf_counter()
#generated_ids = model.infer(inputs)
#generated_ids = model.infer(inputs, multiple_output=True, sampling_method='topk',topk=8, topp=0.9, length_penalty=1.3)
#generated_ids = model.infer(inputs, sampling_method='topk_greedy',topk=8, topp=0.9, length_penalty=1.3)
#print ('inputs', inputs)
#generated_ids = model.infer(inputs, sampling_method='topk',topk=8, topp=0, length_penalty=0.9)
generated_ids = model.infer(inputs, sampling_method='topk',topk=8, length_penalty=0.9, multiple_output=True)
torch.cuda.synchronize()
end_time = time.perf_counter()
return generated_ids, end_time - start_time
def hf_bart(model, inputs):
torch.cuda.synchronize()
start_time = time.perf_counter()
generated_ids = model.generate(inputs.to("cuda:0"), max_length=128, do_sample=True,top_k=8, repetition_penalty=1.3, num_return_sequences=10)
torch.cuda.synchronize()
end_time = time.perf_counter()
return generated_ids, end_time - start_time
def ls_generate(model, tokenizer, inputs_id):
print("=========lightseq=========")
print("lightseq generating...")
ls_res_ids, ls_time = ls_bart(model, inputs_id)
#print (ls_res_ids)
ls_res_ids = [ids[0] for ids in ls_res_ids[0]]
ls_res = tokenizer.batch_decode(ls_res_ids, skip_special_tokens=True)
candidate_list = [tokenizer.decode(g, skip_special_tokens=True, clean_up_tokenization_spaces=True) for g in ls_res_ids]
#print ('\n'.join(candidate_list))
print(f"lightseq time: {ls_time}s")
print("lightseq results:")
for sent in ls_res:
print(sent)
def hf_generate(model, tokenizer, inputs_id):
print("=========huggingface=========")
print("huggingface generating...")
hf_res_ids, hf_time = hf_bart(model, inputs_id)
candidate_list = [tokenizer.decode(g, skip_special_tokens=True, clean_up_tokenization_spaces=True) for g in hf_res_ids]
print ('\n'.join(candidate_list))
hf_res = tokenizer.batch_decode(hf_res_ids, skip_special_tokens=True)
print(f"huggingface time: {hf_time}s")
print("huggingface results:")
for sent in hf_res:
print(sent)
def warmup(tokenizer, ls_model, hf_model, sentences):
inputs = tokenizer(sentences, return_tensors="pt", padding=True)
inputs_id = inputs["input_ids"]
ls_generate(ls_model, tokenizer, inputs_id)
hf_generate(hf_model, tokenizer, inputs_id)
def main():
parser = argparse.ArgumentParser()
parser.add_argument("--user_input", action="store_true")
args = parser.parse_args()
print("initializing bart tokenizer...")
# change to "facebook/bart-large" for large model
model_name = 'uer/bart-base-chinese-cluecorpussmall'
model_name = '/workspace/lightseq/examples/inference/python/test/checkpoint-341000'
model_name = 'facebook/bart-large-cnn'
#tokenizer = BertTokenizer.from_pretrained(model_name)
tokenizer = BartTokenizer.from_pretrained(model_name)
print("creating lightseq model...")
# change to "lightseq_bart_large.hdf5" for large model
#ls_model = lsi.Transformer("lightseq_bart_base.hdf5", 128)
ls_model = lsi.Transformer("lightseq_bart_base.hdf5", 16)
print("creating huggingface model...")
# change to "facebook/bart-large" for large model
# hf_model = BartForConditionalGeneration.from_pretrained("/workspace/lightseq/examples/inference/python/test/checkpoint-341000")
# hf_model.to("cuda:0")
sentences = [
"I love that girl, but <mask> does not <mask> me.",
"She is so <mask> that I can not help glance at <mask>.",
"Nothing's gonna <mask> my love for you.",
"Drop everything now. Meet me in the pouring <mask>. Kiss me on the sidewalk.",
]
#sentences = ["患者:后背右侧中间儿,就是胸部对应的那个区域,一直很酸痛,有一周了,一点儿都没有缓解,很痛。"]#,"患者:我眼睛疼"]
sentences = ["My friends are cool but they eat too many carbs."]*3
print("====================START warmup====================")
# warmup(tokenizer, ls_model, hf_model, sentences)
print("====================END warmup====================")
while True:
if args.user_input:
sentences = [input("input the masked sentence:\n")]
print("tokenizing the sentences...")
inputs = tokenizer(sentences, return_tensors="pt", padding=True)
inputs_id = inputs["input_ids"]
for i in range(3):
ls_generate(ls_model, tokenizer, inputs_id)
#inputs = tokenizer(sentences[0], return_tensors="pt", padding=True)
# inputs = tokenizer(sentences, return_tensors="pt", padding=True)
# inputs_id = inputs["input_ids"]
# hf_generate(hf_model, tokenizer, inputs_id)
if not args.user_input:
break
if __name__ == "__main__":
main()