Skip to content

Commit

Permalink
fix linter issues
Browse files Browse the repository at this point in the history
  • Loading branch information
varunsharma27 committed Jan 13, 2025
1 parent 25f2f31 commit fabd0f1
Show file tree
Hide file tree
Showing 2 changed files with 23 additions and 16 deletions.
32 changes: 18 additions & 14 deletions libs/ktem/ktem/index/file/graph/light_graph_index.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
from typing import Any
from typing import Any, Optional
from uuid import uuid4

from ktem.db.engine import engine
from sqlalchemy.orm import Session

from ..base import BaseFileIndexIndexing, BaseFileIndexRetriever
from .graph_index import GraphRAGIndex
from .lightrag_pipelines import LightRAGIndexingPipeline, LightRAGRetrieverPipeline
Expand All @@ -11,7 +12,7 @@
class LightRAGIndex(GraphRAGIndex):
def __init__(self, app, id: int, name: str, config: dict):
super().__init__(app, id, name, config)
self._collection_graph_id = None
self._collection_graph_id: Optional[str] = None

def _setup_indexing_cls(self):
self._indexing_pipeline_cls = LightRAGIndexingPipeline
Expand All @@ -20,21 +21,24 @@ def _setup_retriever_cls(self):
self._retriever_pipeline_cls = [LightRAGRetrieverPipeline]

def _get_or_create_collection_graph_id(self):
if not self._collection_graph_id:
# Try to find existing graph ID for this collection
with Session(engine) as session:
result = (
session.query(self._resources["Index"].target_id)
.filter(self._resources["Index"].relation_type == "graph")
.first()
if self._collection_graph_id:
return self._collection_graph_id

# Try to find existing graph ID for this collection
with Session(engine) as session:
result = (
session.query(self._resources["Index"].target_id) # type: ignore
.filter(
self._resources["Index"].relation_type == "graph" # type: ignore
)
if result:
self._collection_graph_id = result[0]
else:
self._collection_graph_id = str(uuid4())
.first()
)
if result:
self._collection_graph_id = result[0]
else:
self._collection_graph_id = str(uuid4())
return self._collection_graph_id


def get_indexing_pipeline(self, settings, user_id) -> BaseFileIndexIndexing:
pipeline = super().get_indexing_pipeline(settings, user_id)
# indexing settings
Expand Down
7 changes: 5 additions & 2 deletions libs/ktem/ktem/index/file/graph/lightrag_pipelines.py
Original file line number Diff line number Diff line change
Expand Up @@ -247,7 +247,7 @@ class LightRAGIndexingPipeline(GraphRAGIndexingPipeline):
def store_file_id_with_graph_id(self, file_ids: list[str | None]):
# Use the collection-wide graph ID for LightRAG
graph_id = self.collection_graph_id

# Record all files under this graph_id
with Session(engine) as session:
for file_id in file_ids:
Expand Down Expand Up @@ -349,7 +349,10 @@ def call_graphrag_index(self, graph_id: str, docs: list[Document]):
process_doc_count = 0
yield Document(
channel="debug",
text=f"[GraphRAG] {'Updating' if is_incremental else 'Creating'} index: {process_doc_count} / {total_docs} documents.",
text=(
f"[GraphRAG] {'Updating' if is_incremental else 'Creating'} index: "
f"{process_doc_count} / {total_docs} documents."
),
)

for doc_id in range(0, len(all_docs), INDEX_BATCHSIZE):
Expand Down

0 comments on commit fabd0f1

Please sign in to comment.