-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathrag_llama_demo.py
131 lines (72 loc) · 2.89 KB
/
rag_llama_demo.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
#!/usr/bin/env python
# coding: utf-8
# In[2]:
get_ipython().system('python -m pip install --upgrade pip')
get_ipython().system('pip install transformers')
get_ipython().system('pip install llama_index')
get_ipython().system('pip install accelerate')
get_ipython().system('pip install sentence_transformers')
get_ipython().system('pip install langchain')
# In[1]:
import logging
import sys, os
logging.basicConfig(stream=sys.stdout, level=logging.INFO)
logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))
from llama_index import VectorStoreIndex, SimpleDirectoryReader, ServiceContext
from llama_index.llms import HuggingFaceLLM
os.environ['TRANSFORMERS_CACHE'] = '/workspace/cache/'
os.environ['HF_DATASETS_CACHE']='/workspace/cache/'
# In[2]:
from llama_index.prompts.prompts import SimpleInputPrompt
system_prompt = """<|SYSTEM|># A chat between a curious user and an artificial intelligence assistant.
The assistant gives helpful, detailed, and polite answers to the user's questions.
"""
# This will wrap the default prompts that are internal to llama-index
query_wrapper_prompt = SimpleInputPrompt("<|USER|>{query_str}<|ASSISTANT|>")
# In[3]:
import torch
llm = HuggingFaceLLM(
context_window=4096,
max_new_tokens=256,
generate_kwargs={"temperature": 0.1, "do_sample": False},
system_prompt=system_prompt,
query_wrapper_prompt=query_wrapper_prompt,
tokenizer_name="ehartford/Wizard-Vicuna-13B-Uncensored",
model_name="ehartford/Wizard-Vicuna-13B-Uncensored",
device_map="auto",
stopping_ids=[50278, 50279, 50277, 1, 0],
tokenizer_kwargs={"max_length": 4096},
model_kwargs={"torch_dtype": torch.float16}
)
service_context = ServiceContext.from_defaults(chunk_size=1024, llm=llm)
# In[4]:
resp = llm.complete("Summarize the short story 'Gray Denim' by Harl Vincent.")
print(resp)
# In[5]:
from pathlib import Path
from llama_index import download_loader
from langchain.embeddings.huggingface import HuggingFaceEmbeddings
from llama_index import LangchainEmbedding, ServiceContext
embed_model = LangchainEmbedding(
HuggingFaceEmbeddings(
model_name="intfloat/e5-large-v2",
model_kwargs={"device": "cuda"},
)
)
service_context = ServiceContext.from_defaults(chunk_size=1024, llm=llm, embed_model=embed_model)
# In[7]:
EpubReader = download_loader("EpubReader")
loader = EpubReader()
documents = loader.load_data(file=Path("Super-Science-December-1930.epub"))
# In[8]:
index = VectorStoreIndex.from_documents(documents, service_context=service_context)
index.storage_context.persist(persist_dir="./sfbook")
# In[9]:
query_engine = index.as_query_engine()
response = query_engine.query("Summarize the short story 'Gray Denim' by Harl Vincent.")
# In[10]:
print(response)
# In[11]:
response = query_engine.query("Who are the Red Police in the short story 'Gray Denim' by Harl Vincent.")
print(response)
# In[ ]: