From ed2bd60bd96bd9d8e51e684d75bd0f117d320b67 Mon Sep 17 00:00:00 2001
From: Kai Wu <kaiwu@meta.com>
Date: Tue, 25 Feb 2025 11:25:23 -0800
Subject: [PATCH 1/6] add ollama embedding config and fix sqlite_vec db

---
 .../providers/inline/vector_io/sqlite_vec/sqlite_vec.py     | 2 +-
 llama_stack/templates/ollama/run.yaml                       | 6 ++++++
 2 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py b/llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py
index 17865c93e9..eb97572d58 100644
--- a/llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py
+++ b/llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py
@@ -162,7 +162,7 @@ def __init__(self, config, inference_api: Api.inference) -> None:
 
     async def initialize(self) -> None:
         # Open a connection to the SQLite database (the file is specified in the config).
-        self.connection = sqlite3.connect(self.config.db_path)
+        self.connection = sqlite3.connect(self.config.db_path,check_same_thread=False)
         self.connection.enable_load_extension(True)
         sqlite_vec.load(self.connection)
         self.connection.enable_load_extension(False)
diff --git a/llama_stack/templates/ollama/run.yaml b/llama_stack/templates/ollama/run.yaml
index d64e07347d..48e2434825 100644
--- a/llama_stack/templates/ollama/run.yaml
+++ b/llama_stack/templates/ollama/run.yaml
@@ -87,6 +87,12 @@ models:
   model_id: ${env.INFERENCE_MODEL}
   provider_id: ollama
   model_type: llm
+- metadata:
+    embedding_dimension: 384
+  model_id: all-MiniLM-L6-v2
+  provider_id: ollama
+  provider_model_id: all-minilm:latest
+  model_type: embedding
 shields: []
 vector_dbs: []
 datasets: []

From 733b9c07b509e0bdef8e49591b7aa320cafbd4d6 Mon Sep 17 00:00:00 2001
From: Kai Wu <kaiwu@meta.com>
Date: Tue, 25 Feb 2025 13:42:02 -0800
Subject: [PATCH 2/6] pre-commit

---
 .../providers/inline/vector_io/sqlite_vec/sqlite_vec.py     | 2 +-
 llama_stack/templates/ollama/run.yaml                       | 6 ------
 2 files changed, 1 insertion(+), 7 deletions(-)

diff --git a/llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py b/llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py
index eb97572d58..7e8230ff9d 100644
--- a/llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py
+++ b/llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py
@@ -162,7 +162,7 @@ def __init__(self, config, inference_api: Api.inference) -> None:
 
     async def initialize(self) -> None:
         # Open a connection to the SQLite database (the file is specified in the config).
-        self.connection = sqlite3.connect(self.config.db_path,check_same_thread=False)
+        self.connection = sqlite3.connect(self.config.db_path, check_same_thread=False)
         self.connection.enable_load_extension(True)
         sqlite_vec.load(self.connection)
         self.connection.enable_load_extension(False)
diff --git a/llama_stack/templates/ollama/run.yaml b/llama_stack/templates/ollama/run.yaml
index 48e2434825..d64e07347d 100644
--- a/llama_stack/templates/ollama/run.yaml
+++ b/llama_stack/templates/ollama/run.yaml
@@ -87,12 +87,6 @@ models:
   model_id: ${env.INFERENCE_MODEL}
   provider_id: ollama
   model_type: llm
-- metadata:
-    embedding_dimension: 384
-  model_id: all-MiniLM-L6-v2
-  provider_id: ollama
-  provider_model_id: all-minilm:latest
-  model_type: embedding
 shields: []
 vector_dbs: []
 datasets: []

From 32e89191c2552b5a74526316eabde998384eec4c Mon Sep 17 00:00:00 2001
From: Kai Wu <kaiwu@meta.com>
Date: Tue, 25 Feb 2025 15:08:48 -0800
Subject: [PATCH 3/6] fix ollama.py bug

---
 llama_stack/templates/ollama/ollama.py | 2 +-
 llama_stack/templates/ollama/run.yaml  | 6 ++++++
 2 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/llama_stack/templates/ollama/ollama.py b/llama_stack/templates/ollama/ollama.py
index 4f644c270c..89233c5ae6 100644
--- a/llama_stack/templates/ollama/ollama.py
+++ b/llama_stack/templates/ollama/ollama.py
@@ -93,7 +93,7 @@ def get_distribution_template() -> DistributionTemplate:
                     "inference": [inference_provider],
                     "vector_io": [vector_io_provider_sqlite],
                 },
-                default_models=[inference_model],
+                default_models=[inference_model, embedding_model],
                 default_tool_groups=default_tool_groups,
             ),
             "run-with-safety.yaml": RunConfigSettings(
diff --git a/llama_stack/templates/ollama/run.yaml b/llama_stack/templates/ollama/run.yaml
index d64e07347d..48e2434825 100644
--- a/llama_stack/templates/ollama/run.yaml
+++ b/llama_stack/templates/ollama/run.yaml
@@ -87,6 +87,12 @@ models:
   model_id: ${env.INFERENCE_MODEL}
   provider_id: ollama
   model_type: llm
+- metadata:
+    embedding_dimension: 384
+  model_id: all-MiniLM-L6-v2
+  provider_id: ollama
+  provider_model_id: all-minilm:latest
+  model_type: embedding
 shields: []
 vector_dbs: []
 datasets: []

From 6ff7ea127feaf3ac30a6bf73a77fc0895b7f8e15 Mon Sep 17 00:00:00 2001
From: Kai Wu <kaiwu@meta.com>
Date: Wed, 26 Feb 2025 15:46:19 -0800
Subject: [PATCH 4/6] fix sqlite_vec by using local thread

---
 .../inline/vector_io/sqlite_vec/sqlite_vec.py | 26 +++++++++++++++----
 1 file changed, 21 insertions(+), 5 deletions(-)

diff --git a/llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py b/llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py
index 7e8230ff9d..347d7ab2bf 100644
--- a/llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py
+++ b/llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py
@@ -8,6 +8,7 @@
 import logging
 import sqlite3
 import struct
+import threading
 import uuid
 from typing import Any, Dict, List, Optional
 
@@ -158,11 +159,21 @@ def __init__(self, config, inference_api: Api.inference) -> None:
         self.config = config
         self.inference_api = inference_api
         self.cache: Dict[str, VectorDBWithIndex] = {}
-        self.connection: Optional[sqlite3.Connection] = None
+        self._local = threading.local()
+
+    def _get_connection(self):
+        """Get a thread-local database connection."""
+        if not hasattr(self._local, "conn"):
+            try:
+                self._local.conn = sqlite3.connect(self.config.db_path)
+            except Exception as e:
+                print(f"Error connecting to SQLite database: {e}")
+                raise e
+        return self._local.conn
 
     async def initialize(self) -> None:
         # Open a connection to the SQLite database (the file is specified in the config).
-        self.connection = sqlite3.connect(self.config.db_path, check_same_thread=False)
+        self.connection = self._get_connection()
         self.connection.enable_load_extension(True)
         sqlite_vec.load(self.connection)
         self.connection.enable_load_extension(False)
@@ -185,9 +196,14 @@ async def initialize(self) -> None:
             self.cache[vector_db.identifier] = VectorDBWithIndex(vector_db, index, self.inference_api)
 
     async def shutdown(self) -> None:
-        if self.connection:
-            self.connection.close()
-            self.connection = None
+        # We can't access other threads' connections, so we just close our own
+        if hasattr(self._local, "conn"):
+            try:
+                self._local.conn.close()
+            except Exception as e:
+                print(f"Error closing SQLite connection: {e}")
+            finally:
+                del self._local.conn
 
     async def register_vector_db(self, vector_db: VectorDB) -> None:
         if self.connection is None:

From f037510f3a635c5395f8cd555d823bee9aff5e3c Mon Sep 17 00:00:00 2001
From: Kai Wu <kaiwu@meta.com>
Date: Mon, 3 Mar 2025 14:53:25 -0800
Subject: [PATCH 5/6] change self.connection to always get_connection

---
 .../inline/vector_io/sqlite_vec/sqlite_vec.py | 32 +++++++++----------
 1 file changed, 15 insertions(+), 17 deletions(-)

diff --git a/llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py b/llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py
index 347d7ab2bf..102cad8e28 100644
--- a/llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py
+++ b/llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py
@@ -23,7 +23,6 @@
 
 logger = logging.getLogger(__name__)
 
-
 def serialize_vector(vector: List[float]) -> bytes:
     """Serialize a list of floats into a compact binary representation."""
     return struct.pack(f"{len(vector)}f", *vector)
@@ -148,6 +147,7 @@ async def query(self, embedding: NDArray, k: int, score_threshold: float) -> Que
         return QueryChunksResponse(chunks=chunks, scores=scores)
 
 
+
 class SQLiteVecVectorIOAdapter(VectorIO, VectorDBsProtocolPrivate):
     """
     A VectorIO implementation using SQLite + sqlite_vec.
@@ -173,11 +173,11 @@ def _get_connection(self):
 
     async def initialize(self) -> None:
         # Open a connection to the SQLite database (the file is specified in the config).
-        self.connection = self._get_connection()
-        self.connection.enable_load_extension(True)
-        sqlite_vec.load(self.connection)
-        self.connection.enable_load_extension(False)
-        cur = self.connection.cursor()
+        connection = self._get_connection()
+        connection.enable_load_extension(True)
+        sqlite_vec.load(connection)
+        connection.enable_load_extension(False)
+        cur = connection.cursor()
         # Create a table to persist vector DB registrations.
         cur.execute("""
             CREATE TABLE IF NOT EXISTS vector_dbs (
@@ -185,14 +185,14 @@ async def initialize(self) -> None:
                 metadata TEXT
             );
         """)
-        self.connection.commit()
+        connection.commit()
         # Load any existing vector DB registrations.
         cur.execute("SELECT metadata FROM vector_dbs")
         rows = cur.fetchall()
         for row in rows:
             vector_db_data = row[0]
             vector_db = VectorDB.model_validate_json(vector_db_data)
-            index = await SQLiteVecIndex.create(vector_db.embedding_dimension, self.connection, vector_db.identifier)
+            index = await SQLiteVecIndex.create(vector_db.embedding_dimension, connection, vector_db.identifier)
             self.cache[vector_db.identifier] = VectorDBWithIndex(vector_db, index, self.inference_api)
 
     async def shutdown(self) -> None:
@@ -206,31 +206,29 @@ async def shutdown(self) -> None:
                 del self._local.conn
 
     async def register_vector_db(self, vector_db: VectorDB) -> None:
-        if self.connection is None:
-            raise RuntimeError("SQLite connection not initialized")
-        cur = self.connection.cursor()
+        connection = self._get_connection()
+        cur = connection.cursor()
         cur.execute(
             "INSERT OR REPLACE INTO vector_dbs (id, metadata) VALUES (?, ?)",
             (vector_db.identifier, vector_db.model_dump_json()),
         )
-        self.connection.commit()
-        index = await SQLiteVecIndex.create(vector_db.embedding_dimension, self.connection, vector_db.identifier)
+        connection.commit()
+        index = await SQLiteVecIndex.create(vector_db.embedding_dimension, connection, vector_db.identifier)
         self.cache[vector_db.identifier] = VectorDBWithIndex(vector_db, index, self.inference_api)
 
     async def list_vector_dbs(self) -> List[VectorDB]:
         return [v.vector_db for v in self.cache.values()]
 
     async def unregister_vector_db(self, vector_db_id: str) -> None:
-        if self.connection is None:
-            raise RuntimeError("SQLite connection not initialized")
+        connection = self._get_connection()
         if vector_db_id not in self.cache:
             logger.warning(f"Vector DB {vector_db_id} not found")
             return
         await self.cache[vector_db_id].index.delete()
         del self.cache[vector_db_id]
-        cur = self.connection.cursor()
+        cur = connection.cursor()
         cur.execute("DELETE FROM vector_dbs WHERE id = ?", (vector_db_id,))
-        self.connection.commit()
+        connection.commit()
 
     async def insert_chunks(self, vector_db_id: str, chunks: List[Chunk], ttl_seconds: Optional[int] = None) -> None:
         if vector_db_id not in self.cache:

From e48ae304e70f331fd590618a8f19e2060af6ed1d Mon Sep 17 00:00:00 2001
From: Kai Wu <kaiwu@meta.com>
Date: Mon, 3 Mar 2025 15:00:44 -0800
Subject: [PATCH 6/6] pre-commit

---
 llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py b/llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py
index 102cad8e28..e35a761d92 100644
--- a/llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py
+++ b/llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py
@@ -23,6 +23,7 @@
 
 logger = logging.getLogger(__name__)
 
+
 def serialize_vector(vector: List[float]) -> bytes:
     """Serialize a list of floats into a compact binary representation."""
     return struct.pack(f"{len(vector)}f", *vector)
@@ -147,7 +148,6 @@ async def query(self, embedding: NDArray, k: int, score_threshold: float) -> Que
         return QueryChunksResponse(chunks=chunks, scores=scores)
 
 
-
 class SQLiteVecVectorIOAdapter(VectorIO, VectorDBsProtocolPrivate):
     """
     A VectorIO implementation using SQLite + sqlite_vec.