diff --git a/libs/ktem/ktem/index/file/graph/light_graph_index.py b/libs/ktem/ktem/index/file/graph/light_graph_index.py index 297844889..aae864ed4 100644 --- a/libs/ktem/ktem/index/file/graph/light_graph_index.py +++ b/libs/ktem/ktem/index/file/graph/light_graph_index.py @@ -1,8 +1,9 @@ -from typing import Any +from typing import Any, Optional from uuid import uuid4 from ktem.db.engine import engine from sqlalchemy.orm import Session + from ..base import BaseFileIndexIndexing, BaseFileIndexRetriever from .graph_index import GraphRAGIndex from .lightrag_pipelines import LightRAGIndexingPipeline, LightRAGRetrieverPipeline @@ -11,7 +12,7 @@ class LightRAGIndex(GraphRAGIndex): def __init__(self, app, id: int, name: str, config: dict): super().__init__(app, id, name, config) - self._collection_graph_id = None + self._collection_graph_id: Optional[str] = None def _setup_indexing_cls(self): self._indexing_pipeline_cls = LightRAGIndexingPipeline @@ -20,21 +21,24 @@ def _setup_retriever_cls(self): self._retriever_pipeline_cls = [LightRAGRetrieverPipeline] def _get_or_create_collection_graph_id(self): - if not self._collection_graph_id: - # Try to find existing graph ID for this collection - with Session(engine) as session: - result = ( - session.query(self._resources["Index"].target_id) - .filter(self._resources["Index"].relation_type == "graph") - .first() + if self._collection_graph_id: + return self._collection_graph_id + + # Try to find existing graph ID for this collection + with Session(engine) as session: + result = ( + session.query(self._resources["Index"].target_id) # type: ignore + .filter( + self._resources["Index"].relation_type == "graph" # type: ignore ) - if result: - self._collection_graph_id = result[0] - else: - self._collection_graph_id = str(uuid4()) + .first() + ) + if result: + self._collection_graph_id = result[0] + else: + self._collection_graph_id = str(uuid4()) return self._collection_graph_id - def get_indexing_pipeline(self, settings, user_id) -> BaseFileIndexIndexing: pipeline = super().get_indexing_pipeline(settings, user_id) # indexing settings diff --git a/libs/ktem/ktem/index/file/graph/lightrag_pipelines.py b/libs/ktem/ktem/index/file/graph/lightrag_pipelines.py index b4deaf299..a1631c0d7 100644 --- a/libs/ktem/ktem/index/file/graph/lightrag_pipelines.py +++ b/libs/ktem/ktem/index/file/graph/lightrag_pipelines.py @@ -247,7 +247,7 @@ class LightRAGIndexingPipeline(GraphRAGIndexingPipeline): def store_file_id_with_graph_id(self, file_ids: list[str | None]): # Use the collection-wide graph ID for LightRAG graph_id = self.collection_graph_id - + # Record all files under this graph_id with Session(engine) as session: for file_id in file_ids: @@ -349,7 +349,10 @@ def call_graphrag_index(self, graph_id: str, docs: list[Document]): process_doc_count = 0 yield Document( channel="debug", - text=f"[GraphRAG] {'Updating' if is_incremental else 'Creating'} index: {process_doc_count} / {total_docs} documents.", + text=( + f"[GraphRAG] {'Updating' if is_incremental else 'Creating'} index: " + f"{process_doc_count} / {total_docs} documents." + ), ) for doc_id in range(0, len(all_docs), INDEX_BATCHSIZE):