Cinnamon · taprosoft · Nov 8, 2024 · Nov 8, 2024
diff --git a/Dockerfile b/Dockerfile
@@ -46,7 +46,7 @@ RUN --mount=type=ssh  \
 
 RUN --mount=type=ssh  \
     --mount=type=cache,target=/root/.cache/pip  \
-    if [ "$TARGETARCH" = "amd64" ]; then pip install graphrag future; fi
+    if [ "$TARGETARCH" = "amd64" ]; then pip install "graphrag<=0.3.6" future; fi
 
 # Clean up
 RUN apt-get autoremove \

diff --git a/README.md b/README.md
@@ -204,7 +204,7 @@ documents and developers who want to build their own RAG pipeline.
 - **Non-Docker Installation**: If you are not using Docker, install GraphRAG with the following command:
 
   ```shell
-  pip install graphrag future
+  pip install "graphrag<=0.3.6" future
   ```
 
 - **Setting Up API KEY**: To use the GraphRAG retriever feature, ensure you set the `GRAPHRAG_API_KEY` environment variable. You can do this directly in your environment or by adding it to a `.env` file.

diff --git a/libs/ktem/ktem/index/file/graph/lightrag_pipelines.py b/libs/ktem/ktem/index/file/graph/lightrag_pipelines.py
@@ -150,16 +150,26 @@ async def lightrag_build_local_query_context(
         for k, n, d in zip(results, node_datas, node_degrees)
         if n is not None
     ]
-    use_text_units = await _find_most_related_text_unit_from_entities(
-        node_datas, query_param, text_chunks_db, knowledge_graph_inst
-    )
-    use_relations = await _find_most_related_edges_from_entities(
-        node_datas, query_param, knowledge_graph_inst
-    )
+
+    try:
+        use_text_units = await _find_most_related_text_unit_from_entities(
+            node_datas, query_param, text_chunks_db, knowledge_graph_inst
+        )
+    except Exception:
+        use_text_units = []
+
+    try:
+        use_relations = await _find_most_related_edges_from_entities(
+            node_datas, query_param, knowledge_graph_inst
+        )
+    except Exception:
+        use_relations = []
+
     logging.info(
         f"Local query uses {len(node_datas)} entities, "
         f"{len(use_relations)} relations, {len(use_text_units)} text units"
     )
+
     entites_section_list = [["id", "entity", "type", "description", "rank"]]
     for i, n in enumerate(node_datas):
         entites_section_list.append(
@@ -226,7 +236,9 @@ def call_graphrag_index(self, graph_id: str, docs: list[Document]):
         )
 
         all_docs = [
-            doc.text for doc in docs if doc.metadata.get("type", "text") == "text"
+            doc.text
+            for doc in docs
+            if doc.metadata.get("type", "text") == "text" and len(doc.text.strip()) > 0
         ]
 
         yield Document(

diff --git a/libs/ktem/ktem/index/file/graph/nano_pipelines.py b/libs/ktem/ktem/index/file/graph/nano_pipelines.py
@@ -232,7 +232,9 @@ def call_graphrag_index(self, graph_id: str, docs: list[Document]):
         )
 
         all_docs = [
-            doc.text for doc in docs if doc.metadata.get("type", "text") == "text"
+            doc.text
+            for doc in docs
+            if doc.metadata.get("type", "text") == "text" and len(doc.text.strip()) > 0
         ]
 
         yield Document(