From ed2bd60bd96bd9d8e51e684d75bd0f117d320b67 Mon Sep 17 00:00:00 2001 From: Kai Wu Date: Tue, 25 Feb 2025 11:25:23 -0800 Subject: [PATCH 1/6] add ollama embedding config and fix sqlite_vec db --- .../providers/inline/vector_io/sqlite_vec/sqlite_vec.py | 2 +- llama_stack/templates/ollama/run.yaml | 6 ++++++ 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py b/llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py index 17865c93e9..eb97572d58 100644 --- a/llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py +++ b/llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py @@ -162,7 +162,7 @@ def __init__(self, config, inference_api: Api.inference) -> None: async def initialize(self) -> None: # Open a connection to the SQLite database (the file is specified in the config). - self.connection = sqlite3.connect(self.config.db_path) + self.connection = sqlite3.connect(self.config.db_path,check_same_thread=False) self.connection.enable_load_extension(True) sqlite_vec.load(self.connection) self.connection.enable_load_extension(False) diff --git a/llama_stack/templates/ollama/run.yaml b/llama_stack/templates/ollama/run.yaml index d64e07347d..48e2434825 100644 --- a/llama_stack/templates/ollama/run.yaml +++ b/llama_stack/templates/ollama/run.yaml @@ -87,6 +87,12 @@ models: model_id: ${env.INFERENCE_MODEL} provider_id: ollama model_type: llm +- metadata: + embedding_dimension: 384 + model_id: all-MiniLM-L6-v2 + provider_id: ollama + provider_model_id: all-minilm:latest + model_type: embedding shields: [] vector_dbs: [] datasets: [] From 733b9c07b509e0bdef8e49591b7aa320cafbd4d6 Mon Sep 17 00:00:00 2001 From: Kai Wu Date: Tue, 25 Feb 2025 13:42:02 -0800 Subject: [PATCH 2/6] pre-commit --- .../providers/inline/vector_io/sqlite_vec/sqlite_vec.py | 2 +- llama_stack/templates/ollama/run.yaml | 6 ------ 2 files changed, 1 insertion(+), 7 deletions(-) diff --git a/llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py b/llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py index eb97572d58..7e8230ff9d 100644 --- a/llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py +++ b/llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py @@ -162,7 +162,7 @@ def __init__(self, config, inference_api: Api.inference) -> None: async def initialize(self) -> None: # Open a connection to the SQLite database (the file is specified in the config). - self.connection = sqlite3.connect(self.config.db_path,check_same_thread=False) + self.connection = sqlite3.connect(self.config.db_path, check_same_thread=False) self.connection.enable_load_extension(True) sqlite_vec.load(self.connection) self.connection.enable_load_extension(False) diff --git a/llama_stack/templates/ollama/run.yaml b/llama_stack/templates/ollama/run.yaml index 48e2434825..d64e07347d 100644 --- a/llama_stack/templates/ollama/run.yaml +++ b/llama_stack/templates/ollama/run.yaml @@ -87,12 +87,6 @@ models: model_id: ${env.INFERENCE_MODEL} provider_id: ollama model_type: llm -- metadata: - embedding_dimension: 384 - model_id: all-MiniLM-L6-v2 - provider_id: ollama - provider_model_id: all-minilm:latest - model_type: embedding shields: [] vector_dbs: [] datasets: [] From 32e89191c2552b5a74526316eabde998384eec4c Mon Sep 17 00:00:00 2001 From: Kai Wu Date: Tue, 25 Feb 2025 15:08:48 -0800 Subject: [PATCH 3/6] fix ollama.py bug --- llama_stack/templates/ollama/ollama.py | 2 +- llama_stack/templates/ollama/run.yaml | 6 ++++++ 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/llama_stack/templates/ollama/ollama.py b/llama_stack/templates/ollama/ollama.py index 4f644c270c..89233c5ae6 100644 --- a/llama_stack/templates/ollama/ollama.py +++ b/llama_stack/templates/ollama/ollama.py @@ -93,7 +93,7 @@ def get_distribution_template() -> DistributionTemplate: "inference": [inference_provider], "vector_io": [vector_io_provider_sqlite], }, - default_models=[inference_model], + default_models=[inference_model, embedding_model], default_tool_groups=default_tool_groups, ), "run-with-safety.yaml": RunConfigSettings( diff --git a/llama_stack/templates/ollama/run.yaml b/llama_stack/templates/ollama/run.yaml index d64e07347d..48e2434825 100644 --- a/llama_stack/templates/ollama/run.yaml +++ b/llama_stack/templates/ollama/run.yaml @@ -87,6 +87,12 @@ models: model_id: ${env.INFERENCE_MODEL} provider_id: ollama model_type: llm +- metadata: + embedding_dimension: 384 + model_id: all-MiniLM-L6-v2 + provider_id: ollama + provider_model_id: all-minilm:latest + model_type: embedding shields: [] vector_dbs: [] datasets: [] From 6ff7ea127feaf3ac30a6bf73a77fc0895b7f8e15 Mon Sep 17 00:00:00 2001 From: Kai Wu Date: Wed, 26 Feb 2025 15:46:19 -0800 Subject: [PATCH 4/6] fix sqlite_vec by using local thread --- .../inline/vector_io/sqlite_vec/sqlite_vec.py | 26 +++++++++++++++---- 1 file changed, 21 insertions(+), 5 deletions(-) diff --git a/llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py b/llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py index 7e8230ff9d..347d7ab2bf 100644 --- a/llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py +++ b/llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py @@ -8,6 +8,7 @@ import logging import sqlite3 import struct +import threading import uuid from typing import Any, Dict, List, Optional @@ -158,11 +159,21 @@ def __init__(self, config, inference_api: Api.inference) -> None: self.config = config self.inference_api = inference_api self.cache: Dict[str, VectorDBWithIndex] = {} - self.connection: Optional[sqlite3.Connection] = None + self._local = threading.local() + + def _get_connection(self): + """Get a thread-local database connection.""" + if not hasattr(self._local, "conn"): + try: + self._local.conn = sqlite3.connect(self.config.db_path) + except Exception as e: + print(f"Error connecting to SQLite database: {e}") + raise e + return self._local.conn async def initialize(self) -> None: # Open a connection to the SQLite database (the file is specified in the config). - self.connection = sqlite3.connect(self.config.db_path, check_same_thread=False) + self.connection = self._get_connection() self.connection.enable_load_extension(True) sqlite_vec.load(self.connection) self.connection.enable_load_extension(False) @@ -185,9 +196,14 @@ async def initialize(self) -> None: self.cache[vector_db.identifier] = VectorDBWithIndex(vector_db, index, self.inference_api) async def shutdown(self) -> None: - if self.connection: - self.connection.close() - self.connection = None + # We can't access other threads' connections, so we just close our own + if hasattr(self._local, "conn"): + try: + self._local.conn.close() + except Exception as e: + print(f"Error closing SQLite connection: {e}") + finally: + del self._local.conn async def register_vector_db(self, vector_db: VectorDB) -> None: if self.connection is None: From f037510f3a635c5395f8cd555d823bee9aff5e3c Mon Sep 17 00:00:00 2001 From: Kai Wu Date: Mon, 3 Mar 2025 14:53:25 -0800 Subject: [PATCH 5/6] change self.connection to always get_connection --- .../inline/vector_io/sqlite_vec/sqlite_vec.py | 32 +++++++++---------- 1 file changed, 15 insertions(+), 17 deletions(-) diff --git a/llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py b/llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py index 347d7ab2bf..102cad8e28 100644 --- a/llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py +++ b/llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py @@ -23,7 +23,6 @@ logger = logging.getLogger(__name__) - def serialize_vector(vector: List[float]) -> bytes: """Serialize a list of floats into a compact binary representation.""" return struct.pack(f"{len(vector)}f", *vector) @@ -148,6 +147,7 @@ async def query(self, embedding: NDArray, k: int, score_threshold: float) -> Que return QueryChunksResponse(chunks=chunks, scores=scores) + class SQLiteVecVectorIOAdapter(VectorIO, VectorDBsProtocolPrivate): """ A VectorIO implementation using SQLite + sqlite_vec. @@ -173,11 +173,11 @@ def _get_connection(self): async def initialize(self) -> None: # Open a connection to the SQLite database (the file is specified in the config). - self.connection = self._get_connection() - self.connection.enable_load_extension(True) - sqlite_vec.load(self.connection) - self.connection.enable_load_extension(False) - cur = self.connection.cursor() + connection = self._get_connection() + connection.enable_load_extension(True) + sqlite_vec.load(connection) + connection.enable_load_extension(False) + cur = connection.cursor() # Create a table to persist vector DB registrations. cur.execute(""" CREATE TABLE IF NOT EXISTS vector_dbs ( @@ -185,14 +185,14 @@ async def initialize(self) -> None: metadata TEXT ); """) - self.connection.commit() + connection.commit() # Load any existing vector DB registrations. cur.execute("SELECT metadata FROM vector_dbs") rows = cur.fetchall() for row in rows: vector_db_data = row[0] vector_db = VectorDB.model_validate_json(vector_db_data) - index = await SQLiteVecIndex.create(vector_db.embedding_dimension, self.connection, vector_db.identifier) + index = await SQLiteVecIndex.create(vector_db.embedding_dimension, connection, vector_db.identifier) self.cache[vector_db.identifier] = VectorDBWithIndex(vector_db, index, self.inference_api) async def shutdown(self) -> None: @@ -206,31 +206,29 @@ async def shutdown(self) -> None: del self._local.conn async def register_vector_db(self, vector_db: VectorDB) -> None: - if self.connection is None: - raise RuntimeError("SQLite connection not initialized") - cur = self.connection.cursor() + connection = self._get_connection() + cur = connection.cursor() cur.execute( "INSERT OR REPLACE INTO vector_dbs (id, metadata) VALUES (?, ?)", (vector_db.identifier, vector_db.model_dump_json()), ) - self.connection.commit() - index = await SQLiteVecIndex.create(vector_db.embedding_dimension, self.connection, vector_db.identifier) + connection.commit() + index = await SQLiteVecIndex.create(vector_db.embedding_dimension, connection, vector_db.identifier) self.cache[vector_db.identifier] = VectorDBWithIndex(vector_db, index, self.inference_api) async def list_vector_dbs(self) -> List[VectorDB]: return [v.vector_db for v in self.cache.values()] async def unregister_vector_db(self, vector_db_id: str) -> None: - if self.connection is None: - raise RuntimeError("SQLite connection not initialized") + connection = self._get_connection() if vector_db_id not in self.cache: logger.warning(f"Vector DB {vector_db_id} not found") return await self.cache[vector_db_id].index.delete() del self.cache[vector_db_id] - cur = self.connection.cursor() + cur = connection.cursor() cur.execute("DELETE FROM vector_dbs WHERE id = ?", (vector_db_id,)) - self.connection.commit() + connection.commit() async def insert_chunks(self, vector_db_id: str, chunks: List[Chunk], ttl_seconds: Optional[int] = None) -> None: if vector_db_id not in self.cache: From e48ae304e70f331fd590618a8f19e2060af6ed1d Mon Sep 17 00:00:00 2001 From: Kai Wu Date: Mon, 3 Mar 2025 15:00:44 -0800 Subject: [PATCH 6/6] pre-commit --- llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py b/llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py index 102cad8e28..e35a761d92 100644 --- a/llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py +++ b/llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py @@ -23,6 +23,7 @@ logger = logging.getLogger(__name__) + def serialize_vector(vector: List[float]) -> bytes: """Serialize a list of floats into a compact binary representation.""" return struct.pack(f"{len(vector)}f", *vector) @@ -147,7 +148,6 @@ async def query(self, embedding: NDArray, k: int, score_threshold: float) -> Que return QueryChunksResponse(chunks=chunks, scores=scores) - class SQLiteVecVectorIOAdapter(VectorIO, VectorDBsProtocolPrivate): """ A VectorIO implementation using SQLite + sqlite_vec.