diff --git a/README.md b/README.md
index 0d7016d2..26d41e22 100644
--- a/README.md
+++ b/README.md
@@ -106,8 +106,21 @@ print(rag.query("What are the top themes in this story?", param=QueryParam(mode=
 
 # Perform hybrid search
 print(rag.query("What are the top themes in this story?", param=QueryParam(mode="hybrid")))
+
+# Perform mix search (Knowledge Graph + Vector Retrieval)
+# Mix mode combines knowledge graph and vector search:
+# - Uses both structured (KG) and unstructured (vector) information
+# - Provides comprehensive answers by analyzing relationships and context
+# - Supports image content through HTML img tags
+# - Allows control over retrieval depth via top_k parameter
+print(rag.query("What are the top themes in this story?", param=QueryParam(
+    mode="mix")))
+
 ```
 
+
+
+
 <details>
 <summary> Using Open AI-like APIs </summary>
 
@@ -262,7 +275,7 @@ In order to run this experiment on low RAM GPU you should select small model and
 
 ```python
 class QueryParam:
-    mode: Literal["local", "global", "hybrid", "naive"] = "global"
+    mode: Literal["local", "global", "hybrid", "naive", "mix"] = "global"
     only_need_context: bool = False
     response_type: str = "Multiple Paragraphs"
     # Number of top-k items to retrieve; corresponds to entities in "local" mode and relationships in "global" mode.
diff --git a/lightrag/base.py b/lightrag/base.py
index c3ba3e09..94a39cf3 100644
--- a/lightrag/base.py
+++ b/lightrag/base.py
@@ -16,7 +16,7 @@
 
 @dataclass
 class QueryParam:
-    mode: Literal["local", "global", "hybrid", "naive"] = "global"
+    mode: Literal["local", "global", "hybrid", "naive", "mix"] = "global"
     only_need_context: bool = False
     only_need_prompt: bool = False
     response_type: str = "Multiple Paragraphs"
diff --git a/lightrag/lightrag.py b/lightrag/lightrag.py
index 992c43a4..9a7ebeb8 100644
--- a/lightrag/lightrag.py
+++ b/lightrag/lightrag.py
@@ -16,6 +16,7 @@
     # local_query,global_query,hybrid_query,
     kg_query,
     naive_query,
+    mix_kg_vector_query,
 )
 
 from .utils import (
@@ -630,6 +631,25 @@ async def aquery(self, query: str, param: QueryParam = QueryParam()):
                     embedding_func=None,
                 ),
             )
+        elif param.mode == "mix":
+            response = await mix_kg_vector_query(
+                query,
+                self.chunk_entity_relation_graph,
+                self.entities_vdb,
+                self.relationships_vdb,
+                self.chunks_vdb,
+                self.text_chunks,
+                param,
+                asdict(self),
+                hashing_kv=self.llm_response_cache
+                if self.llm_response_cache
+                and hasattr(self.llm_response_cache, "global_config")
+                else self.key_string_value_json_storage_cls(
+                    namespace="llm_response_cache",
+                    global_config=asdict(self),
+                    embedding_func=None,
+                ),
+            )
         else:
             raise ValueError(f"Unknown mode {param.mode}")
         await self._query_done()
diff --git a/lightrag/operate.py b/lightrag/operate.py
index 2a566ba0..b63e3754 100644
--- a/lightrag/operate.py
+++ b/lightrag/operate.py
@@ -1147,3 +1147,195 @@ async def naive_query(
     )
 
     return response
+
+
+async def mix_kg_vector_query(
+    query,
+    knowledge_graph_inst: BaseGraphStorage,
+    entities_vdb: BaseVectorStorage,
+    relationships_vdb: BaseVectorStorage,
+    chunks_vdb: BaseVectorStorage,
+    text_chunks_db: BaseKVStorage[TextChunkSchema],
+    query_param: QueryParam,
+    global_config: dict,
+    hashing_kv: BaseKVStorage = None,
+) -> str:
+    """
+    Hybrid retrieval implementation combining knowledge graph and vector search.
+
+    This function performs a hybrid search by:
+    1. Extracting semantic information from knowledge graph
+    2. Retrieving relevant text chunks through vector similarity
+    3. Combining both results for comprehensive answer generation
+    """
+    # 1. Cache handling
+    use_model_func = global_config["llm_model_func"]
+    args_hash = compute_args_hash("mix", query)
+    cached_response, quantized, min_val, max_val = await handle_cache(
+        hashing_kv, args_hash, query, "mix"
+    )
+    if cached_response is not None:
+        return cached_response
+
+    # 2. Execute knowledge graph and vector searches in parallel
+    async def get_kg_context():
+        try:
+            # Reuse keyword extraction logic from kg_query
+            example_number = global_config["addon_params"].get("example_number", None)
+            if example_number and example_number < len(
+                PROMPTS["keywords_extraction_examples"]
+            ):
+                examples = "\n".join(
+                    PROMPTS["keywords_extraction_examples"][: int(example_number)]
+                )
+            else:
+                examples = "\n".join(PROMPTS["keywords_extraction_examples"])
+
+            language = global_config["addon_params"].get(
+                "language", PROMPTS["DEFAULT_LANGUAGE"]
+            )
+
+            # Extract keywords using LLM
+            kw_prompt = PROMPTS["keywords_extraction"].format(
+                query=query, examples=examples, language=language
+            )
+            result = await use_model_func(kw_prompt, keyword_extraction=True)
+
+            match = re.search(r"\{.*\}", result, re.DOTALL)
+            if not match:
+                logger.warning(
+                    "No JSON-like structure found in keywords extraction result"
+                )
+                return None
+
+            result = match.group(0)
+            keywords_data = json.loads(result)
+            hl_keywords = keywords_data.get("high_level_keywords", [])
+            ll_keywords = keywords_data.get("low_level_keywords", [])
+
+            if not hl_keywords and not ll_keywords:
+                logger.warning("Both high-level and low-level keywords are empty")
+                return None
+
+            # Convert keyword lists to strings
+            ll_keywords_str = ", ".join(ll_keywords) if ll_keywords else ""
+            hl_keywords_str = ", ".join(hl_keywords) if hl_keywords else ""
+
+            # Set query mode based on available keywords
+            if not ll_keywords_str and not hl_keywords_str:
+                return None
+            elif not ll_keywords_str:
+                query_param.mode = "global"
+            elif not hl_keywords_str:
+                query_param.mode = "local"
+            else:
+                query_param.mode = "hybrid"
+
+            # Build knowledge graph context
+            context = await _build_query_context(
+                [ll_keywords_str, hl_keywords_str],
+                knowledge_graph_inst,
+                entities_vdb,
+                relationships_vdb,
+                text_chunks_db,
+                query_param,
+            )
+
+            return context
+
+        except Exception as e:
+            logger.error(f"Error in get_kg_context: {str(e)}")
+            return None
+
+    async def get_vector_context():
+        # Reuse vector search logic from naive_query
+        try:
+            # Reduce top_k for vector search in hybrid mode since we have structured information from KG
+            mix_topk = min(10, query_param.top_k)
+            results = await chunks_vdb.query(query, top_k=mix_topk)
+            if not results:
+                return None
+
+            chunks_ids = [r["id"] for r in results]
+            chunks = await text_chunks_db.get_by_ids(chunks_ids)
+
+            valid_chunks = [
+                chunk for chunk in chunks if chunk is not None and "content" in chunk
+            ]
+
+            if not valid_chunks:
+                return None
+
+            maybe_trun_chunks = truncate_list_by_token_size(
+                valid_chunks,
+                key=lambda x: x["content"],
+                max_token_size=query_param.max_token_for_text_unit,
+            )
+
+            if not maybe_trun_chunks:
+                return None
+
+            return "\n--New Chunk--\n".join([c["content"] for c in maybe_trun_chunks])
+        except Exception as e:
+            logger.error(f"Error in get_vector_context: {e}")
+            return None
+
+    # 3. Execute both retrievals in parallel
+    kg_context, vector_context = await asyncio.gather(
+        get_kg_context(), get_vector_context()
+    )
+
+    # 4. Merge contexts
+    if kg_context is None and vector_context is None:
+        return PROMPTS["fail_response"]
+
+    if query_param.only_need_context:
+        return {"kg_context": kg_context, "vector_context": vector_context}
+
+    # 5. Construct hybrid prompt
+    sys_prompt = PROMPTS["mix_rag_response"].format(
+        kg_context=kg_context
+        if kg_context
+        else "No relevant knowledge graph information found",
+        vector_context=vector_context
+        if vector_context
+        else "No relevant text information found",
+        response_type=query_param.response_type,
+    )
+
+    if query_param.only_need_prompt:
+        return sys_prompt
+
+    # 6. Generate response
+    response = await use_model_func(
+        query,
+        system_prompt=sys_prompt,
+        stream=query_param.stream,
+    )
+
+    if isinstance(response, str) and len(response) > len(sys_prompt):
+        response = (
+            response.replace(sys_prompt, "")
+            .replace("user", "")
+            .replace("model", "")
+            .replace(query, "")
+            .replace("<system>", "")
+            .replace("</system>", "")
+            .strip()
+        )
+
+    # 7. Save cache
+    await save_to_cache(
+        hashing_kv,
+        CacheData(
+            args_hash=args_hash,
+            content=response,
+            prompt=query,
+            quantized=quantized,
+            min_val=min_val,
+            max_val=max_val,
+            mode="mix",
+        ),
+    )
+
+    return response
diff --git a/lightrag/prompt.py b/lightrag/prompt.py
index 8421c3c4..4f613042 100644
--- a/lightrag/prompt.py
+++ b/lightrag/prompt.py
@@ -284,3 +284,81 @@
 0.5: Partially related and answer needs modification to be used
 Return only a number between 0-1, without any additional content.
 """
+
+PROMPTS["mix_rag_response"] = """---Role Definition---
+You are a professional knowledge integration assistant, responsible for answering questions strictly based on provided knowledge graph and text information. You must follow these rules:
+1. Only use provided knowledge graph and text information
+2. Do not use your own knowledge or experience
+3. Do not make any assumptions or speculations
+4. Analyze the language used in the user message and respond in the same language
+5. Include relevant images from the source information using HTML img tags
+
+---Objective---
+Generate comprehensive and accurate answers based on knowledge graph and vector search information.
+First analyze the language of the user's question (Chinese/English/Others), then respond in the same language.
+In the following cases, respond politely with "I apologize, but I am unable to provide a complete answer to this question" in the user's language:
+1. No relevant information found in provided sources
+2. Question is beyond the scope of provided information
+3. Requires knowledge beyond provided information
+4. Requires speculation or assumptions
+
+---Information Sources---
+1. Knowledge Graph Analysis Results (Structured Information):
+{kg_context}
+
+2. Vector Search Results (Original Text):
+{vector_context}
+
+---Response Format---
+Target response format and length requirements: {response_type}
+Response language: Analyze user message language and respond in the same language
+Image inclusion: If source information contains relevant images in HTML img tags, include them in the response
+
+---Guidelines---
+1. Language Recognition and Usage:
+   - Carefully analyze the language used in user message
+   - If question is in Chinese, respond in Chinese (e.g., "非常抱歉，基于现有信息我无法完整回答这个问题")
+   - If question is in English, respond in English
+   - If question is in other languages, respond in the same language
+
+2. Information Usage Rules:
+   - Must reference both knowledge graph and vector search results
+   - Each statement must clearly indicate its source
+   - Forbidden to use information outside provided sources
+   - If information is insufficient, politely state inability to answer in user's language
+   - When relevant images are found in source information, include them using HTML img tags
+
+3. Response Standards:
+   - Strictly follow specified format and length requirements
+   - Use markdown format for organization
+   - Use quotation marks for direct quotes
+   - Clearly distinguish between factual statements and sources
+   - No speculation or assumptions allowed
+   - Preserve and include HTML img tags for relevant images
+   - Place images appropriately within the context of the answer
+
+4. Information Integration Requirements:
+   - Only integrate directly relevant information
+   - No excessive interpretation or reasoning
+   - Maintain objectivity, no personal views
+   - If information conflicts, note it and prioritize knowledge graph
+   - When information is incomplete, clearly state the gaps
+   - Include relevant images that support or illustrate the answer
+
+5. Quality Control:
+   - Every answer must be traceable to provided sources
+   - No vague or uncertain expressions
+   - No subjective judgments
+   - No filling in information gaps
+   - No supplementing with common sense or background knowledge
+   - Only include images that are directly relevant to the question
+   - Maintain original img tags without modification
+
+Processing Flow:
+1. First identify the language of user message
+2. Analyze provided knowledge graph and vector search information
+3. Identify relevant images in HTML img tags from the sources
+4. Organize and generate response in the same language as user, incorporating relevant images
+5. If unable to answer, express this politely in user's language with an explanation
+
+Remember: It's better to say "I apologize, but I am unable to provide a complete answer to this question" (in the user's language, maintaining politeness) than to use information outside provided sources or make speculations. When including images, only use those that are directly relevant and helpful to the answer."""