From e3aeb01694c4a2f8b42b56fd78419d6ac283e288 Mon Sep 17 00:00:00 2001 From: Daniele Martinoli Date: Wed, 26 Feb 2025 10:00:37 +0100 Subject: [PATCH] qdrant inline provider Signed-off-by: Daniele Martinoli --- distributions/dependencies.json | 1 + docs/source/providers/vector_io/qdrant.md | 4 ++-- .../inline/vector_io/qdrant/__init__.py | 19 +++++++++++++++++ .../inline/vector_io/qdrant/config.py | 21 +++++++++++++++++++ llama_stack/providers/registry/vector_io.py | 8 +++++++ .../remote/vector_io/qdrant/config.py | 1 - .../remote/vector_io/qdrant/qdrant.py | 9 +++++--- llama_stack/templates/ollama/build.yaml | 1 + llama_stack/templates/ollama/ollama.py | 12 ++++++++--- .../templates/ollama/run-with-safety.yaml | 4 ++++ llama_stack/templates/ollama/run.yaml | 4 ++++ 11 files changed, 75 insertions(+), 9 deletions(-) create mode 100644 llama_stack/providers/inline/vector_io/qdrant/__init__.py create mode 100644 llama_stack/providers/inline/vector_io/qdrant/config.py diff --git a/distributions/dependencies.json b/distributions/dependencies.json index 622cf791b3..f0f8f34f83 100644 --- a/distributions/dependencies.json +++ b/distributions/dependencies.json @@ -443,6 +443,7 @@ "psycopg2-binary", "pymongo", "pypdf", + "qdrant-client", "redis", "requests", "scikit-learn", diff --git a/docs/source/providers/vector_io/qdrant.md b/docs/source/providers/vector_io/qdrant.md index c374ade98d..fb22753916 100644 --- a/docs/source/providers/vector_io/qdrant.md +++ b/docs/source/providers/vector_io/qdrant.md @@ -3,7 +3,7 @@ orphan: true --- # Qdrant -[Qdrant](https://qdrant.tech/documentation/) is a remote vector database provider for Llama Stack. It +[Qdrant](https://qdrant.tech/documentation/) is a inline and remote vector database provider for Llama Stack. It allows you to store and query vectors directly in memory. That means you'll get fast and efficient vector retrieval. @@ -17,7 +17,7 @@ That means you'll get fast and efficient vector retrieval. To use Qdrant in your Llama Stack project, follow these steps: 1. Install the necessary dependencies. -2. Configure your Llama Stack project to use Faiss. +2. Configure your Llama Stack project to use Qdrant. 3. Start storing and querying vectors. ## Installation diff --git a/llama_stack/providers/inline/vector_io/qdrant/__init__.py b/llama_stack/providers/inline/vector_io/qdrant/__init__.py new file mode 100644 index 0000000000..7e80ae16b2 --- /dev/null +++ b/llama_stack/providers/inline/vector_io/qdrant/__init__.py @@ -0,0 +1,19 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +from typing import Dict + +from llama_stack.providers.datatypes import Api, ProviderSpec + +from .config import QdrantVectorIOConfig + + +async def get_provider_impl(config: QdrantVectorIOConfig, deps: Dict[Api, ProviderSpec]): + from llama_stack.providers.remote.vector_io.qdrant.qdrant import QdrantVectorIOAdapter + + impl = QdrantVectorIOAdapter(config, deps[Api.inference]) + await impl.initialize() + return impl diff --git a/llama_stack/providers/inline/vector_io/qdrant/config.py b/llama_stack/providers/inline/vector_io/qdrant/config.py new file mode 100644 index 0000000000..ff9d6ce58b --- /dev/null +++ b/llama_stack/providers/inline/vector_io/qdrant/config.py @@ -0,0 +1,21 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + + +from pydantic import BaseModel + +from llama_stack.schema_utils import json_schema_type + + +@json_schema_type +class QdrantVectorIOConfig(BaseModel): + path: str + + @classmethod + def sample_run_config(cls, __distro_dir__: str) -> dict[str, any]: + return { + "path": "${env.QDRANT_PATH:~/.llama/" + __distro_dir__ + "}/" + "qdrant.db", + } diff --git a/llama_stack/providers/registry/vector_io.py b/llama_stack/providers/registry/vector_io.py index ff4f9caf5f..afa2056caa 100644 --- a/llama_stack/providers/registry/vector_io.py +++ b/llama_stack/providers/registry/vector_io.py @@ -100,6 +100,14 @@ def available_providers() -> List[ProviderSpec]: ), api_dependencies=[], ), + InlineProviderSpec( + api=Api.vector_io, + provider_type="inline::qdrant", + pip_packages=["qdrant-client"], + module="llama_stack.providers.inline.vector_io.qdrant", + config_class="llama_stack.providers.inline.vector_io.qdrant.QdrantVectorIOConfig", + api_dependencies=[Api.inference], + ), remote_provider_spec( Api.vector_io, AdapterSpec( diff --git a/llama_stack/providers/remote/vector_io/qdrant/config.py b/llama_stack/providers/remote/vector_io/qdrant/config.py index f212882d84..dca95a2296 100644 --- a/llama_stack/providers/remote/vector_io/qdrant/config.py +++ b/llama_stack/providers/remote/vector_io/qdrant/config.py @@ -23,4 +23,3 @@ class QdrantVectorIOConfig(BaseModel): prefix: Optional[str] = None timeout: Optional[int] = None host: Optional[str] = None - path: Optional[str] = None diff --git a/llama_stack/providers/remote/vector_io/qdrant/qdrant.py b/llama_stack/providers/remote/vector_io/qdrant/qdrant.py index 586b8ca954..f8e6fea1aa 100644 --- a/llama_stack/providers/remote/vector_io/qdrant/qdrant.py +++ b/llama_stack/providers/remote/vector_io/qdrant/qdrant.py @@ -6,7 +6,7 @@ import logging import uuid -from typing import Any, Dict, List, Optional +from typing import Any, Dict, List, Optional, Union from numpy.typing import NDArray from qdrant_client import AsyncQdrantClient, models @@ -16,12 +16,13 @@ from llama_stack.apis.vector_dbs import VectorDB from llama_stack.apis.vector_io import Chunk, QueryChunksResponse, VectorIO from llama_stack.providers.datatypes import Api, VectorDBsProtocolPrivate +from llama_stack.providers.inline.vector_io.qdrant import QdrantVectorIOConfig as InlineQdrantVectorIOConfig from llama_stack.providers.utils.memory.vector_store import ( EmbeddingIndex, VectorDBWithIndex, ) -from .config import QdrantVectorIOConfig +from .config import QdrantVectorIOConfig as RemoteQdrantVectorIOConfig log = logging.getLogger(__name__) CHUNK_ID_KEY = "_chunk_id" @@ -99,7 +100,9 @@ async def delete(self): class QdrantVectorIOAdapter(VectorIO, VectorDBsProtocolPrivate): - def __init__(self, config: QdrantVectorIOConfig, inference_api: Api.inference) -> None: + def __init__( + self, config: Union[RemoteQdrantVectorIOConfig, InlineQdrantVectorIOConfig], inference_api: Api.inference + ) -> None: self.config = config self.client = AsyncQdrantClient(**self.config.model_dump(exclude_none=True)) self.cache = {} diff --git a/llama_stack/templates/ollama/build.yaml b/llama_stack/templates/ollama/build.yaml index da33b8d53d..849fe49840 100644 --- a/llama_stack/templates/ollama/build.yaml +++ b/llama_stack/templates/ollama/build.yaml @@ -6,6 +6,7 @@ distribution_spec: - remote::ollama vector_io: - inline::sqlite-vec + - inline::qdrant - remote::chromadb - remote::pgvector safety: diff --git a/llama_stack/templates/ollama/ollama.py b/llama_stack/templates/ollama/ollama.py index ba3cfe684c..5616939703 100644 --- a/llama_stack/templates/ollama/ollama.py +++ b/llama_stack/templates/ollama/ollama.py @@ -13,6 +13,7 @@ ShieldInput, ToolGroupInput, ) +from llama_stack.providers.inline.vector_io.qdrant.config import QdrantVectorIOConfig from llama_stack.providers.inline.vector_io.sqlite_vec.config import SQLiteVectorIOConfig from llama_stack.providers.remote.inference.ollama import OllamaImplConfig from llama_stack.templates.template import DistributionTemplate, RunConfigSettings @@ -21,7 +22,7 @@ def get_distribution_template() -> DistributionTemplate: providers = { "inference": ["remote::ollama"], - "vector_io": ["inline::sqlite-vec", "remote::chromadb", "remote::pgvector"], + "vector_io": ["inline::sqlite-vec", "inline::qdrant", "remote::chromadb", "remote::pgvector"], "safety": ["inline::llama-guard"], "agents": ["inline::meta-reference"], "telemetry": ["inline::meta-reference"], @@ -47,6 +48,11 @@ def get_distribution_template() -> DistributionTemplate: provider_type="inline::sqlite-vec", config=SQLiteVectorIOConfig.sample_run_config(f"distributions/{name}"), ) + vector_io_provider_qdrant = Provider( + provider_id="qdrant", + provider_type="inline::qdrant", + config=QdrantVectorIOConfig.sample_run_config(f"distributions/{name}"), + ) inference_model = ModelInput( model_id="${env.INFERENCE_MODEL}", @@ -92,7 +98,7 @@ def get_distribution_template() -> DistributionTemplate: "run.yaml": RunConfigSettings( provider_overrides={ "inference": [inference_provider], - "vector_io": [vector_io_provider_sqlite], + "vector_io": [vector_io_provider_sqlite, vector_io_provider_qdrant], }, default_models=[inference_model], default_tool_groups=default_tool_groups, @@ -100,7 +106,7 @@ def get_distribution_template() -> DistributionTemplate: "run-with-safety.yaml": RunConfigSettings( provider_overrides={ "inference": [inference_provider], - "vector_io": [vector_io_provider_sqlite], + "vector_io": [vector_io_provider_sqlite, vector_io_provider_qdrant], "safety": [ Provider( provider_id="llama-guard", diff --git a/llama_stack/templates/ollama/run-with-safety.yaml b/llama_stack/templates/ollama/run-with-safety.yaml index d5766dec1c..0c4f8bea51 100644 --- a/llama_stack/templates/ollama/run-with-safety.yaml +++ b/llama_stack/templates/ollama/run-with-safety.yaml @@ -21,6 +21,10 @@ providers: provider_type: inline::sqlite-vec config: db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/sqlite_vec.db + - provider_id: qdrant + provider_type: inline::qdrant + config: + path: ${env.QDRANT_PATH:~/.llama/distributions/ollama}/qdrant.db safety: - provider_id: llama-guard provider_type: inline::llama-guard diff --git a/llama_stack/templates/ollama/run.yaml b/llama_stack/templates/ollama/run.yaml index 0c82552c6b..7513704469 100644 --- a/llama_stack/templates/ollama/run.yaml +++ b/llama_stack/templates/ollama/run.yaml @@ -21,6 +21,10 @@ providers: provider_type: inline::sqlite-vec config: db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/sqlite_vec.db + - provider_id: qdrant + provider_type: inline::qdrant + config: + path: ${env.QDRANT_PATH:~/.llama/distributions/ollama}/qdrant.db safety: - provider_id: llama-guard provider_type: inline::llama-guard