-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathPine_Process.py
90 lines (65 loc) · 3.33 KB
/
Pine_Process.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
import os, random, string
from langchain.embeddings import HuggingFaceEmbeddings
from pinecone import Pinecone
from dotenv import load_dotenv
import google.generativeai as genai
from langchain.text_splitter import RecursiveCharacterTextSplitter
load_dotenv()
CHUNK_SIZE = 3000
# GENAI_API_KEY = ""
HF_EMBEDDINGS = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
pine = Pinecone(api_key=os.environ.get("PINECONE_API_KEY"))
index = pine.Index(os.environ.get("PINECONE_INDEX"))
genai.configure(api_key=os.environ.get("GENAI_API_KEY"))
def get_context_new(input_query,med_name,k=2):
input_embed = HF_EMBEDDINGS.embed_query(input_query)
pinecone_resp = index.query(vector=input_embed, top_k=k, include_metadata=True,
filter={"med_intial": med_name,
})
if not pinecone_resp['matches']:
# print(pinecone_resp)
return "No matches Found, check the metadata "
context = ""
for i in range(len(pinecone_resp['matches'])):
score = pinecone_resp['matches'][i]["score"]
print("Score : ", score)
# if score >= 0.40:
context += "".join(pinecone_resp['matches'][i]['metadata']['text'])
if context == "":
context = f"No context Found, answer it yourself "
# print(context)
return context
def get_llm_response(context:str,query:str) -> str :
input = f""" Your are MedSathi a helpful pharmacist. Use the given context to get the knowledge of the medicine and provide the asked information about the medicine using the context, and answer only using the context don't add any other information over it.
context:{context}.
This is the query you have to answer
query:{query}
but if the user not provide any medicine name or something. if they do any small talk like: hi, hello chat with them and tell him about your self but in short and clear and you can only give information which in FDA approved.
"""
model=genai.GenerativeModel('gemini-pro')
response=model.generate_content(input)
return response.text
def upsert_data(medname,text):
''' Funtiom to upsert data into Pinecone with company name and text as metadata :
@param company = name of the company text belongs to
@param text = text extracted from the company pdf files
@ returns None
'''
text_splitter = RecursiveCharacterTextSplitter(chunk_size=CHUNK_SIZE, chunk_overlap=20)
text_chunks = text_splitter.split_text(text)
print("This the the length of Text chucks : ",len(text_chunks))
for chunk in text_chunks:
metadata = {"med_intial": medname, "text":chunk}
# Text to be embedded
vector = HF_EMBEDDINGS.embed_query(chunk)
# Ids generation for vectors
_id = ''.join(random.choices(string.ascii_letters + string.digits, k=10,))
# Upserting vector into pinecone database
index.upsert(vectors=[{"id":_id, "values": vector, "metadata": metadata}])
print("Vector upserted successfully")
if __name__ == "__main__":
prompt = "why we use Tetanus, Diphtheria?"
context = get_context_new(prompt, "A")
# response = get_llm_response(context, prompt)
print(context)
# print(response)