The-OpenROAD-Project · luarss · Dec 18, 2024 · Dec 8, 2024 · Dec 8, 2024 · Dec 8, 2024
diff --git a/backend/src/agents/retriever_graph.py b/backend/src/agents/retriever_graph.py
@@ -73,7 +73,7 @@ def __init__(
         llm_model: Union[ChatGoogleGenerativeAI, ChatVertexAI, ChatOllama],
         embeddings_config: dict[str, str],
         reranking_model_name: str,
-        inbuit_tool_calling: bool,
+        inbuilt_tool_calling: bool,
         use_cuda: bool = False,
     ):
         self.llm = llm_model
@@ -100,7 +100,7 @@ def __init__(
             "retrieve_errinfo",
             "retrieve_yosys_rtdocs",
         ]
-        self.inbuit_tool_calling = inbuit_tool_calling
+        self.inbuilt_tool_calling = inbuilt_tool_calling
 
         self.tool_descriptions = ""
         for tool in self.tools:
@@ -120,7 +120,7 @@ def agent(self, state: AgentState) -> dict[str, list[str]]:
         if self.llm is None:
             return {"tools": []}
 
-        if self.inbuit_tool_calling:
+        if self.inbuilt_tool_calling:
             model = self.llm.bind_tools(self.tools, tool_choice="any")
 
             tool_choice_chain = (
@@ -195,7 +195,7 @@ def route(self, state: AgentState) -> list[str]:
         if tools == []:
             return ["retrieve_general"]
 
-        if self.inbuit_tool_calling:
+        if self.inbuilt_tool_calling:
             tool_names = [tool["name"] for tool in tools if "name" in tool]  # type: ignore
             return tool_names
         else:

diff --git a/backend/src/api/routers/graphs.py b/backend/src/api/routers/graphs.py
@@ -8,6 +8,9 @@
 from langchain_google_vertexai import ChatVertexAI
 from langchain_google_genai import ChatGoogleGenerativeAI
 from langchain_ollama import ChatOllama
+from langchain_core.messages import AIMessageChunk
+
+from starlette.responses import StreamingResponse
 
 from ...agents.retriever_graph import RetrieverGraph
 from ..models.response_model import ChatResponse, UserInput
@@ -76,7 +79,7 @@
     embeddings_config=embeddings_config,
     reranking_model_name=hf_reranker,
     use_cuda=use_cuda,
-    inbuit_tool_calling=False,
+    inbuilt_tool_calling=False,
 )
 rg.initialize()
 
@@ -142,3 +145,47 @@ async def get_agent_response(user_input: UserInput) -> ChatResponse:
         response = {"response": llm_response, "tool": tools}
 
     return ChatResponse(**response)
+
+
+async def get_response_stream(user_input: UserInput):
+    user_question = user_input.query
+
+    inputs = {
+        "messages": [
+            ("user", user_question),
+        ],
+        "chat_history": get_history_str(user_input.chat_history),
+    }
+
+    urls: list[str] = []
+    current_llm_call_count = 1
+
+    if rg.graph is not None:
+        async for event in rg.graph.astream_events(inputs, version="v2"):
+            chunk = event["event"]
+
+            if chunk == "on_chat_model_end":
+                current_llm_call_count += 1
+
+            if chunk == "on_retriever_start" or chunk == "on_retriever_end":
+                for document in event.get("data", {}).get("output", {}):
+                    urls.append(document.metadata["url"])
+
+            if chunk == "on_chat_model_stream" and current_llm_call_count == 2:
+                message_content = event.get("data", {}).get("chunk", {})
+                if isinstance(message_content, AIMessageChunk):
+                    msg = message_content.content
+                else:
+                    msg = None
+
+                yield str(msg) + "\n\n"
+
+    urls = list(set(urls))
+    yield f"Sources: {', '.join(urls)}\n\n"
+
+
+@router.post("/agent-retriever/stream", response_class=StreamingResponse)
+async def get_agent_response_streaming(user_input: UserInput):
+    return StreamingResponse(
+        get_response_stream(user_input), media_type="text/event-stream"
+    )