From 9a27b1d6c79cfb524c492a3b90be51601b9bc69c Mon Sep 17 00:00:00 2001 From: Lyu Songlin Date: Mon, 20 Jan 2025 16:08:16 +0800 Subject: [PATCH] solve some comments --- .../transformer/agentic_intent_translator.py | 10 +++++++++ .../transformer/awel_intent_interpreter.py | 10 --------- .../rag/transformer/awel_intent_translator.py | 10 +++++++++ .../rag/transformer/mas_intent_interpreter.py | 10 --------- ...rpreter.py => simple_intent_translator.py} | 8 +++---- .../knowledge_graph/community_summary.py | 10 +++------ .../document_graph_retriever.py | 8 +++---- ...retriever_router.py => graph_retriever.py} | 22 +++++++++---------- .../text_based_graph_retriever.py | 4 ++-- 9 files changed, 43 insertions(+), 49 deletions(-) create mode 100644 dbgpt/rag/transformer/agentic_intent_translator.py delete mode 100644 dbgpt/rag/transformer/awel_intent_interpreter.py create mode 100644 dbgpt/rag/transformer/awel_intent_translator.py delete mode 100644 dbgpt/rag/transformer/mas_intent_interpreter.py rename dbgpt/rag/transformer/{intent_interpreter.py => simple_intent_translator.py} (96%) rename dbgpt/storage/knowledge_graph/graph_retriever/{graph_retriever_router.py => graph_retriever.py} (91%) diff --git a/dbgpt/rag/transformer/agentic_intent_translator.py b/dbgpt/rag/transformer/agentic_intent_translator.py new file mode 100644 index 000000000..4961754c1 --- /dev/null +++ b/dbgpt/rag/transformer/agentic_intent_translator.py @@ -0,0 +1,10 @@ +"""Agentic ntentTranslator class.""" +import logging + +from dbgpt.rag.transformer.base import TranslatorBase + +logger = logging.getLogger(__name__) + + +class AgenticIntentTranslator(TranslatorBase): + """Agentic ntentTranslator class.""" diff --git a/dbgpt/rag/transformer/awel_intent_interpreter.py b/dbgpt/rag/transformer/awel_intent_interpreter.py deleted file mode 100644 index ec40bc9c7..000000000 --- a/dbgpt/rag/transformer/awel_intent_interpreter.py +++ /dev/null @@ -1,10 +0,0 @@ -"""AWELIntentInterpreter class.""" -import logging - -from dbgpt.rag.transformer.base import TranslatorBase - -logger = logging.getLogger(__name__) - - -class AWELIntentInterpreter(TranslatorBase): - """AWELIntentInterpreter class.""" diff --git a/dbgpt/rag/transformer/awel_intent_translator.py b/dbgpt/rag/transformer/awel_intent_translator.py new file mode 100644 index 000000000..291b69a68 --- /dev/null +++ b/dbgpt/rag/transformer/awel_intent_translator.py @@ -0,0 +1,10 @@ +"""AwelIntentTranslator class.""" +import logging + +from dbgpt.rag.transformer.base import TranslatorBase + +logger = logging.getLogger(__name__) + + +class AwelIntentTranslator(TranslatorBase): + """AwelIntentTranslator class.""" diff --git a/dbgpt/rag/transformer/mas_intent_interpreter.py b/dbgpt/rag/transformer/mas_intent_interpreter.py deleted file mode 100644 index 04b00af9a..000000000 --- a/dbgpt/rag/transformer/mas_intent_interpreter.py +++ /dev/null @@ -1,10 +0,0 @@ -"""MASIntentInterpreter class.""" -import logging - -from dbgpt.rag.transformer.base import TranslatorBase - -logger = logging.getLogger(__name__) - - -class MASIntentInterpreter(TranslatorBase): - """MASIntentInterpreter class.""" diff --git a/dbgpt/rag/transformer/intent_interpreter.py b/dbgpt/rag/transformer/simple_intent_translator.py similarity index 96% rename from dbgpt/rag/transformer/intent_interpreter.py rename to dbgpt/rag/transformer/simple_intent_translator.py index 8e532e0d0..b4976d7ff 100644 --- a/dbgpt/rag/transformer/intent_interpreter.py +++ b/dbgpt/rag/transformer/simple_intent_translator.py @@ -1,4 +1,4 @@ -"""IntentInterpreter class.""" +"""SimpleIntentTranslator class.""" import json import logging import re @@ -42,11 +42,11 @@ logger = logging.getLogger(__name__) -class IntentInterpreter(LLMTranslator): - """IntentInterpreter class.""" +class SimpleIntentTranslator(LLMTranslator): + """SimpleIntentTranslator class.""" def __init__(self, llm_client: LLMClient, model_name: str): - """Initialize the IntentInterpreter.""" + """Initialize the SimpleIntentTranslator.""" super().__init__(llm_client, model_name, INTENT_INTERPRET_PT) def _format_messages(self, text: str, history: str = None) -> List[BaseMessage]: diff --git a/dbgpt/storage/knowledge_graph/community_summary.py b/dbgpt/storage/knowledge_graph/community_summary.py index 017d2b50f..d5e65223c 100644 --- a/dbgpt/storage/knowledge_graph/community_summary.py +++ b/dbgpt/storage/knowledge_graph/community_summary.py @@ -12,12 +12,11 @@ from dbgpt.rag.transformer.community_summarizer import CommunitySummarizer from dbgpt.rag.transformer.graph_embedder import GraphEmbedder from dbgpt.rag.transformer.graph_extractor import GraphExtractor -from dbgpt.rag.transformer.intent_interpreter import IntentInterpreter from dbgpt.rag.transformer.text2gql import Text2GQL from dbgpt.rag.transformer.text_embedder import TextEmbedder from dbgpt.storage.knowledge_graph.base import ParagraphChunk from dbgpt.storage.knowledge_graph.community.community_store import CommunityStore -from dbgpt.storage.knowledge_graph.graph_retriever.graph_retriever_router import GraphRetrieverRouter +from dbgpt.storage.knowledge_graph.graph_retriever.graph_retriever import GraphRetriever from dbgpt.storage.knowledge_graph.knowledge_graph import ( GRAPH_PARAMETERS, BuiltinKnowledgeGraph, @@ -362,12 +361,9 @@ def community_store_configure(name: str, cfg: VectorStoreConfig): ), ) - self._intent_interpreter = IntentInterpreter(self._llm_client, self._model_name) - self._text2gql = Text2GQL(self._llm_client, self._model_name) - self._knowledge_graph_triplet_search_top_size = 5 self._knowledge_graph_document_search_top_size = 5 - self._graph_retriever_router = GraphRetrieverRouter( + self._graph_retriever = GraphRetriever( config, self._graph_store_apdater, ) @@ -548,7 +544,7 @@ async def asimilar_search_with_scores( ] context = "\n".join(summaries) if summaries else "" - subgraph, subgraph_for_doc, text2gql_query = await self._graph_retriever_router.retrieve(text) + subgraph, (subgraph_for_doc, text2gql_query) = await self._graph_retriever.retrieve(text) knowledge_graph_str = subgraph.format() if subgraph else "" knowledge_graph_for_doc_str = ( diff --git a/dbgpt/storage/knowledge_graph/graph_retriever/document_graph_retriever.py b/dbgpt/storage/knowledge_graph/graph_retriever/document_graph_retriever.py index 3b389a606..c3b916900 100644 --- a/dbgpt/storage/knowledge_graph/graph_retriever/document_graph_retriever.py +++ b/dbgpt/storage/knowledge_graph/graph_retriever/document_graph_retriever.py @@ -27,15 +27,15 @@ def __init__( self._similarity_search_score_threshold = similarity_search_score_threshold async def retrieve( - self, subs: Optional[Union[List[str], List[List[float]]]], triplet_graph: Optional[Graph] + self, input: Union[Graph, List[str], List[List[float]]] ) -> Tuple[Graph, None]: """Retrieve from document graph.""" - if triplet_graph: + if isinstance(input, Graph): # If retrieve subgraph from triplet graph successfully # Using the vids to search chunks and doc keywords_for_document_graph = [] - for vertex in triplet_graph.vertices(): + for vertex in input.vertices(): keywords_for_document_graph.append(vertex.name) # Using the vids to search chunks and doc # entities -> chunks -> doc @@ -49,7 +49,7 @@ async def retrieve( # Using subs to search chunks # subs -> chunks -> doc subgraph_for_doc = self._graph_store_apdater.explore_docgraph_without_entities( - subs=subs, + subs=input, topk=self._similarity_search_topk, score_threshold=self._similarity_search_score_threshold, limit=self._document_topk, diff --git a/dbgpt/storage/knowledge_graph/graph_retriever/graph_retriever_router.py b/dbgpt/storage/knowledge_graph/graph_retriever/graph_retriever.py similarity index 91% rename from dbgpt/storage/knowledge_graph/graph_retriever/graph_retriever_router.py rename to dbgpt/storage/knowledge_graph/graph_retriever/graph_retriever.py index a5344dffc..48dc21e0a 100644 --- a/dbgpt/storage/knowledge_graph/graph_retriever/graph_retriever_router.py +++ b/dbgpt/storage/knowledge_graph/graph_retriever/graph_retriever.py @@ -37,8 +37,8 @@ logger = logging.getLogger(__name__) -class GraphRetrieverRouter: - """Graph Retriever Router class.""" +class GraphRetriever(GraphRetrieverBase): + """Graph Retriever class.""" def __init__( self, @@ -113,7 +113,7 @@ def __init__( similarity_search_score_threshold, ) - async def retrieve(self, text: str) -> Tuple[Graph, Graph, str]: + async def retrieve(self, text: str) -> Tuple[Graph, Tuple[Graph, str]]: """Retrieve subgraph from triplet graph and document graph.""" subgraph = MemoryGraph() @@ -124,6 +124,9 @@ async def retrieve(self, text: str) -> Tuple[Graph, Graph, str]: if self._enable_text_search: # Retrieve from knowledge graph with text. subgraph, text2gql_query = await self._text_based_graph_retriever.retrieve(text) + + # Extract keywords from original question + keywords: List[str] = await self._keyword_extractor.extract(text) if subgraph.vertex_count == 0 and subgraph.edge_count == 0: # if not enable text search or text search failed to retrieve subgraph @@ -135,7 +138,7 @@ async def retrieve(self, text: str) -> Tuple[Graph, Graph, str]: vector = await self._text_embedder.embed(text) # Embedding the keywords vectors = await self._text_embedder.batch_embed( - keywords, batch_size=self._triplet_embedding_batch_size + keywords, batch_size=self._embedding_batch_size ) # Using the embeddings of keywords and question vectors.append(vector) @@ -145,8 +148,6 @@ async def retrieve(self, text: str) -> Tuple[Graph, Graph, str]: f"embedding vector:\n[KEYWORDS]:{keywords}\n[QUESTION]:{text}" ) else: - # Extract keywords from original question - keywords: List[str] = await self._keyword_extractor.extract(text) subs = keywords logger.info( "Search subgraph with the following keywords:\n" @@ -166,13 +167,10 @@ async def retrieve(self, text: str) -> Tuple[Graph, Graph, str]: if subgraph.vertex_count == 0 and subgraph.edge_count == 0: # If not enable triplet graph or failed to retrieve subgraph # Using subs to retrieve from document graph - subgraph_for_doc = await self._document_graph_retriever.retrieve(subs=subs) + subgraph_for_doc = await self._document_graph_retriever.retrieve(subs) else: # If retrieve subgraph from triplet graph successfully # Using entities in subgraph to search chunks and doc - subgraph_for_doc = await self._document_graph_retriever.retrieve( - subs=subs, - triplet_graph=subgraph - ) + subgraph_for_doc = await self._document_graph_retriever.retrieve(triplet_graph=subgraph) - return subgraph, subgraph_for_doc, text2gql_query + return subgraph, (subgraph_for_doc, text2gql_query) diff --git a/dbgpt/storage/knowledge_graph/graph_retriever/text_based_graph_retriever.py b/dbgpt/storage/knowledge_graph/graph_retriever/text_based_graph_retriever.py index b44a9a160..a77621efc 100644 --- a/dbgpt/storage/knowledge_graph/graph_retriever/text_based_graph_retriever.py +++ b/dbgpt/storage/knowledge_graph/graph_retriever/text_based_graph_retriever.py @@ -4,7 +4,7 @@ import logging from typing import Dict, List, Union, Tuple -from dbgpt.rag.transformer.intent_interpreter import IntentInterpreter +from dbgpt.rag.transformer.simple_intent_translator import SimpleIntentTranslator from dbgpt.rag.transformer.text2gql import Text2GQL from dbgpt.storage.graph_store.graph import MemoryGraph, Graph from dbgpt.storage.knowledge_graph.graph_retriever.base import GraphRetrieverBase @@ -20,7 +20,7 @@ def __init__(self, graph_store_apdater, triplet_topk, llm_client, model_name): self._graph_store_apdater = graph_store_apdater self._triplet_topk = triplet_topk - self._intent_interpreter = IntentInterpreter(llm_client, model_name) + self._intent_interpreter = SimpleIntentTranslator(llm_client, model_name) self._text2gql = Text2GQL(llm_client, model_name) async def retrieve(self, text: str) -> Tuple[Graph, str]: