From e3aeb01694c4a2f8b42b56fd78419d6ac283e288 Mon Sep 17 00:00:00 2001
From: Daniele Martinoli <dmartino@redhat.com>
Date: Wed, 26 Feb 2025 10:00:37 +0100
Subject: [PATCH] qdrant inline provider

Signed-off-by: Daniele Martinoli <dmartino@redhat.com>
---
 distributions/dependencies.json               |  1 +
 docs/source/providers/vector_io/qdrant.md     |  4 ++--
 .../inline/vector_io/qdrant/__init__.py       | 19 +++++++++++++++++
 .../inline/vector_io/qdrant/config.py         | 21 +++++++++++++++++++
 llama_stack/providers/registry/vector_io.py   |  8 +++++++
 .../remote/vector_io/qdrant/config.py         |  1 -
 .../remote/vector_io/qdrant/qdrant.py         |  9 +++++---
 llama_stack/templates/ollama/build.yaml       |  1 +
 llama_stack/templates/ollama/ollama.py        | 12 ++++++++---
 .../templates/ollama/run-with-safety.yaml     |  4 ++++
 llama_stack/templates/ollama/run.yaml         |  4 ++++
 11 files changed, 75 insertions(+), 9 deletions(-)
 create mode 100644 llama_stack/providers/inline/vector_io/qdrant/__init__.py
 create mode 100644 llama_stack/providers/inline/vector_io/qdrant/config.py

diff --git a/distributions/dependencies.json b/distributions/dependencies.json
index 622cf791b3..f0f8f34f83 100644
--- a/distributions/dependencies.json
+++ b/distributions/dependencies.json
@@ -443,6 +443,7 @@
     "psycopg2-binary",
     "pymongo",
     "pypdf",
+    "qdrant-client",
     "redis",
     "requests",
     "scikit-learn",
diff --git a/docs/source/providers/vector_io/qdrant.md b/docs/source/providers/vector_io/qdrant.md
index c374ade98d..fb22753916 100644
--- a/docs/source/providers/vector_io/qdrant.md
+++ b/docs/source/providers/vector_io/qdrant.md
@@ -3,7 +3,7 @@ orphan: true
 ---
 # Qdrant
 
-[Qdrant](https://qdrant.tech/documentation/) is a remote vector database provider for Llama Stack. It 
+[Qdrant](https://qdrant.tech/documentation/) is a inline and remote vector database provider for Llama Stack. It 
 allows you to store and query vectors directly in memory.
 That means you'll get fast and efficient vector retrieval.
 
@@ -17,7 +17,7 @@ That means you'll get fast and efficient vector retrieval.
 To use Qdrant in your Llama Stack project, follow these steps:
 
 1. Install the necessary dependencies.
-2. Configure your Llama Stack project to use Faiss.
+2. Configure your Llama Stack project to use Qdrant.
 3. Start storing and querying vectors.
 
 ## Installation
diff --git a/llama_stack/providers/inline/vector_io/qdrant/__init__.py b/llama_stack/providers/inline/vector_io/qdrant/__init__.py
new file mode 100644
index 0000000000..7e80ae16b2
--- /dev/null
+++ b/llama_stack/providers/inline/vector_io/qdrant/__init__.py
@@ -0,0 +1,19 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+from typing import Dict
+
+from llama_stack.providers.datatypes import Api, ProviderSpec
+
+from .config import QdrantVectorIOConfig
+
+
+async def get_provider_impl(config: QdrantVectorIOConfig, deps: Dict[Api, ProviderSpec]):
+    from llama_stack.providers.remote.vector_io.qdrant.qdrant import QdrantVectorIOAdapter
+
+    impl = QdrantVectorIOAdapter(config, deps[Api.inference])
+    await impl.initialize()
+    return impl
diff --git a/llama_stack/providers/inline/vector_io/qdrant/config.py b/llama_stack/providers/inline/vector_io/qdrant/config.py
new file mode 100644
index 0000000000..ff9d6ce58b
--- /dev/null
+++ b/llama_stack/providers/inline/vector_io/qdrant/config.py
@@ -0,0 +1,21 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+
+from pydantic import BaseModel
+
+from llama_stack.schema_utils import json_schema_type
+
+
+@json_schema_type
+class QdrantVectorIOConfig(BaseModel):
+    path: str
+
+    @classmethod
+    def sample_run_config(cls, __distro_dir__: str) -> dict[str, any]:
+        return {
+            "path": "${env.QDRANT_PATH:~/.llama/" + __distro_dir__ + "}/" + "qdrant.db",
+        }
diff --git a/llama_stack/providers/registry/vector_io.py b/llama_stack/providers/registry/vector_io.py
index ff4f9caf5f..afa2056caa 100644
--- a/llama_stack/providers/registry/vector_io.py
+++ b/llama_stack/providers/registry/vector_io.py
@@ -100,6 +100,14 @@ def available_providers() -> List[ProviderSpec]:
             ),
             api_dependencies=[],
         ),
+        InlineProviderSpec(
+            api=Api.vector_io,
+            provider_type="inline::qdrant",
+            pip_packages=["qdrant-client"],
+            module="llama_stack.providers.inline.vector_io.qdrant",
+            config_class="llama_stack.providers.inline.vector_io.qdrant.QdrantVectorIOConfig",
+            api_dependencies=[Api.inference],
+        ),
         remote_provider_spec(
             Api.vector_io,
             AdapterSpec(
diff --git a/llama_stack/providers/remote/vector_io/qdrant/config.py b/llama_stack/providers/remote/vector_io/qdrant/config.py
index f212882d84..dca95a2296 100644
--- a/llama_stack/providers/remote/vector_io/qdrant/config.py
+++ b/llama_stack/providers/remote/vector_io/qdrant/config.py
@@ -23,4 +23,3 @@ class QdrantVectorIOConfig(BaseModel):
     prefix: Optional[str] = None
     timeout: Optional[int] = None
     host: Optional[str] = None
-    path: Optional[str] = None
diff --git a/llama_stack/providers/remote/vector_io/qdrant/qdrant.py b/llama_stack/providers/remote/vector_io/qdrant/qdrant.py
index 586b8ca954..f8e6fea1aa 100644
--- a/llama_stack/providers/remote/vector_io/qdrant/qdrant.py
+++ b/llama_stack/providers/remote/vector_io/qdrant/qdrant.py
@@ -6,7 +6,7 @@
 
 import logging
 import uuid
-from typing import Any, Dict, List, Optional
+from typing import Any, Dict, List, Optional, Union
 
 from numpy.typing import NDArray
 from qdrant_client import AsyncQdrantClient, models
@@ -16,12 +16,13 @@
 from llama_stack.apis.vector_dbs import VectorDB
 from llama_stack.apis.vector_io import Chunk, QueryChunksResponse, VectorIO
 from llama_stack.providers.datatypes import Api, VectorDBsProtocolPrivate
+from llama_stack.providers.inline.vector_io.qdrant import QdrantVectorIOConfig as InlineQdrantVectorIOConfig
 from llama_stack.providers.utils.memory.vector_store import (
     EmbeddingIndex,
     VectorDBWithIndex,
 )
 
-from .config import QdrantVectorIOConfig
+from .config import QdrantVectorIOConfig as RemoteQdrantVectorIOConfig
 
 log = logging.getLogger(__name__)
 CHUNK_ID_KEY = "_chunk_id"
@@ -99,7 +100,9 @@ async def delete(self):
 
 
 class QdrantVectorIOAdapter(VectorIO, VectorDBsProtocolPrivate):
-    def __init__(self, config: QdrantVectorIOConfig, inference_api: Api.inference) -> None:
+    def __init__(
+        self, config: Union[RemoteQdrantVectorIOConfig, InlineQdrantVectorIOConfig], inference_api: Api.inference
+    ) -> None:
         self.config = config
         self.client = AsyncQdrantClient(**self.config.model_dump(exclude_none=True))
         self.cache = {}
diff --git a/llama_stack/templates/ollama/build.yaml b/llama_stack/templates/ollama/build.yaml
index da33b8d53d..849fe49840 100644
--- a/llama_stack/templates/ollama/build.yaml
+++ b/llama_stack/templates/ollama/build.yaml
@@ -6,6 +6,7 @@ distribution_spec:
     - remote::ollama
     vector_io:
     - inline::sqlite-vec
+    - inline::qdrant
     - remote::chromadb
     - remote::pgvector
     safety:
diff --git a/llama_stack/templates/ollama/ollama.py b/llama_stack/templates/ollama/ollama.py
index ba3cfe684c..5616939703 100644
--- a/llama_stack/templates/ollama/ollama.py
+++ b/llama_stack/templates/ollama/ollama.py
@@ -13,6 +13,7 @@
     ShieldInput,
     ToolGroupInput,
 )
+from llama_stack.providers.inline.vector_io.qdrant.config import QdrantVectorIOConfig
 from llama_stack.providers.inline.vector_io.sqlite_vec.config import SQLiteVectorIOConfig
 from llama_stack.providers.remote.inference.ollama import OllamaImplConfig
 from llama_stack.templates.template import DistributionTemplate, RunConfigSettings
@@ -21,7 +22,7 @@
 def get_distribution_template() -> DistributionTemplate:
     providers = {
         "inference": ["remote::ollama"],
-        "vector_io": ["inline::sqlite-vec", "remote::chromadb", "remote::pgvector"],
+        "vector_io": ["inline::sqlite-vec", "inline::qdrant", "remote::chromadb", "remote::pgvector"],
         "safety": ["inline::llama-guard"],
         "agents": ["inline::meta-reference"],
         "telemetry": ["inline::meta-reference"],
@@ -47,6 +48,11 @@ def get_distribution_template() -> DistributionTemplate:
         provider_type="inline::sqlite-vec",
         config=SQLiteVectorIOConfig.sample_run_config(f"distributions/{name}"),
     )
+    vector_io_provider_qdrant = Provider(
+        provider_id="qdrant",
+        provider_type="inline::qdrant",
+        config=QdrantVectorIOConfig.sample_run_config(f"distributions/{name}"),
+    )
 
     inference_model = ModelInput(
         model_id="${env.INFERENCE_MODEL}",
@@ -92,7 +98,7 @@ def get_distribution_template() -> DistributionTemplate:
             "run.yaml": RunConfigSettings(
                 provider_overrides={
                     "inference": [inference_provider],
-                    "vector_io": [vector_io_provider_sqlite],
+                    "vector_io": [vector_io_provider_sqlite, vector_io_provider_qdrant],
                 },
                 default_models=[inference_model],
                 default_tool_groups=default_tool_groups,
@@ -100,7 +106,7 @@ def get_distribution_template() -> DistributionTemplate:
             "run-with-safety.yaml": RunConfigSettings(
                 provider_overrides={
                     "inference": [inference_provider],
-                    "vector_io": [vector_io_provider_sqlite],
+                    "vector_io": [vector_io_provider_sqlite, vector_io_provider_qdrant],
                     "safety": [
                         Provider(
                             provider_id="llama-guard",
diff --git a/llama_stack/templates/ollama/run-with-safety.yaml b/llama_stack/templates/ollama/run-with-safety.yaml
index d5766dec1c..0c4f8bea51 100644
--- a/llama_stack/templates/ollama/run-with-safety.yaml
+++ b/llama_stack/templates/ollama/run-with-safety.yaml
@@ -21,6 +21,10 @@ providers:
     provider_type: inline::sqlite-vec
     config:
       db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/sqlite_vec.db
+  - provider_id: qdrant
+    provider_type: inline::qdrant
+    config:
+      path: ${env.QDRANT_PATH:~/.llama/distributions/ollama}/qdrant.db
   safety:
   - provider_id: llama-guard
     provider_type: inline::llama-guard
diff --git a/llama_stack/templates/ollama/run.yaml b/llama_stack/templates/ollama/run.yaml
index 0c82552c6b..7513704469 100644
--- a/llama_stack/templates/ollama/run.yaml
+++ b/llama_stack/templates/ollama/run.yaml
@@ -21,6 +21,10 @@ providers:
     provider_type: inline::sqlite-vec
     config:
       db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/sqlite_vec.db
+  - provider_id: qdrant
+    provider_type: inline::qdrant
+    config:
+      path: ${env.QDRANT_PATH:~/.llama/distributions/ollama}/qdrant.db
   safety:
   - provider_id: llama-guard
     provider_type: inline::llama-guard