Skip to content

Commit

Permalink
refactored to langchain
Browse files Browse the repository at this point in the history
  • Loading branch information
Ashad001 committed Jul 27, 2024
1 parent 3dea55b commit f4d7f2b
Show file tree
Hide file tree
Showing 2 changed files with 181 additions and 0 deletions.
120 changes: 120 additions & 0 deletions backend/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,120 @@
aiohttp==3.9.5
aiosignal==1.3.1
annotated-types==0.7.0
anyio==4.4.0
asgiref==3.8.1
attrs==23.2.0
backoff==2.2.1
bcrypt==4.2.0
beautifulsoup4==4.12.3
bs4==0.0.2
build==1.2.1
cachetools==5.4.0
certifi==2024.7.4
charset-normalizer==3.3.2
chroma-hnswlib==0.7.6
chromadb==0.5.5
click==8.1.7
colorama==0.4.6
coloredlogs==15.0.1
dataclasses-json==0.6.7
Deprecated==1.2.14
distro==1.9.0
dnspython==2.6.1
email_validator==2.2.0
fastapi==0.111.1
fastapi-cli==0.0.4
filelock==3.15.4
flatbuffers==24.3.25
frozenlist==1.4.1
fsspec==2024.6.1
google-auth==2.32.0
googleapis-common-protos==1.63.2
greenlet==3.0.3
groq==0.9.0
grpcio==1.65.1
h11==0.14.0
httpcore==1.0.5
httptools==0.6.1
httpx==0.27.0
huggingface-hub==0.24.2
humanfriendly==10.0
idna==3.7
importlib_metadata==8.0.0
importlib_resources==6.4.0
Jinja2==3.1.4
jsonpatch==1.33
jsonpointer==3.0.0
kubernetes==30.1.0
langchain==0.2.11
langchain-chroma==0.1.2
langchain-community==0.2.10
langchain-core==0.2.24
langchain-groq==0.1.6
langchain-text-splitters==0.2.2
langsmith==0.1.93
markdown-it-py==3.0.0
MarkupSafe==2.1.5
marshmallow==3.21.3
mdurl==0.1.2
mmh3==4.1.0
monotonic==1.6
mpmath==1.3.0
multidict==6.0.5
mypy-extensions==1.0.0
numpy==1.26.4
oauthlib==3.2.2
onnxruntime==1.18.1
opentelemetry-api==1.26.0
opentelemetry-exporter-otlp-proto-common==1.26.0
opentelemetry-exporter-otlp-proto-grpc==1.26.0
opentelemetry-instrumentation==0.47b0
opentelemetry-instrumentation-asgi==0.47b0
opentelemetry-instrumentation-fastapi==0.47b0
opentelemetry-proto==1.26.0
opentelemetry-sdk==1.26.0
opentelemetry-semantic-conventions==0.47b0
opentelemetry-util-http==0.47b0
orjson==3.10.6
overrides==7.7.0
packaging==24.1
posthog==3.5.0
protobuf==4.25.4
pyasn1==0.6.0
pyasn1_modules==0.4.0
pydantic==2.8.2
pydantic_core==2.20.1
Pygments==2.18.0
PyPika==0.48.9
pyproject_hooks==1.1.0
pyreadline3==3.4.1
python-dateutil==2.9.0.post0
python-dotenv==1.0.1
python-multipart==0.0.9
PyYAML==6.0.1
requests==2.32.3
requests-oauthlib==2.0.0
rich==13.7.1
rsa==4.9
setuptools==71.1.0
shellingham==1.5.4
six==1.16.0
sniffio==1.3.1
soupsieve==2.5
SQLAlchemy==2.0.31
starlette==0.37.2
sympy==1.13.1
tenacity==8.5.0
tokenizers==0.19.1
tqdm==4.66.4
typer==0.12.3
typing-inspect==0.9.0
typing_extensions==4.12.2
urllib3==2.2.2
uvicorn==0.30.3
watchfiles==0.22.0
websocket-client==1.8.0
websockets==12.0
wrapt==1.16.0
yarl==1.9.4
zipp==3.19.2
61 changes: 61 additions & 0 deletions backend/src/main.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
import os
import bs4
from dotenv import load_dotenv
from langchain_groq import ChatGroq
from langchain import hub
from langchain_chroma import Chroma
from langchain_community.document_loaders import DirectoryLoader
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
# from langchain_openai import OpenAIEmbeddings
from langchain_huggingface.embeddings import HuggingFaceEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter

model_name = "BAAI/bge-small-en"
model_kwargs = {"device": "cpu"}
encode_kwargs = {"normalize_embeddings": True}

embeddings = HuggingFaceEmbeddings()

load_dotenv()
os.environ["LANGCHAIN_TRACING_V2"] = "true"


llm = ChatGroq(model="llama3-8b-8192")

docs = DirectoryLoader(
path = "./data",
silent_errors=True,
show_progress=True,
use_multithreading=True,
).load_documents()

if os.path.exists("./chroma_db"):
vectorstore = Chroma.from_directory("./chroma_db")
else:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
splits = text_splitter.split_documents(docs)
vectorstore = Chroma.from_documents(
documents=splits,
embedding=embeddings,
persist_directory="./chroma_db",
)

retriever = vectorstore.as_retriever()
prompt = hub.pull("rlm/rag-prompt")


def format_docs(docs):
return "\n\n".join(doc.page_content for doc in docs)


rag_chain = (
{"context": retriever | format_docs, "question": RunnablePassthrough()}
| prompt
| llm
| StrOutputParser()
)

response = rag_chain.invoke("What is ultimate RAG?")

print(response)

0 comments on commit f4d7f2b

Please sign in to comment.