meta-llama · wukaixingxp · Feb 25, 2025 · Feb 25, 2025 · Feb 25, 2025 · Feb 26, 2025
@@ -8,6 +8,7 @@
 import logging
 import sqlite3
 import struct
+import threading
 import uuid
 from typing import Any, Dict, List, Optional
 
@@ -158,11 +159,21 @@ def __init__(self, config, inference_api: Api.inference) -> None:
         self.config = config
         self.inference_api = inference_api
         self.cache: Dict[str, VectorDBWithIndex] = {}
-        self.connection: Optional[sqlite3.Connection] = None
+        self._local = threading.local()
+
+    def _get_connection(self):
+        """Get a thread-local database connection."""
+        if not hasattr(self._local, "conn"):
+            try:
+                self._local.conn = sqlite3.connect(self.config.db_path)
+            except Exception as e:
+                print(f"Error connecting to SQLite database: {e}")
+                raise e
+        return self._local.conn
 
     async def initialize(self) -> None:
         # Open a connection to the SQLite database (the file is specified in the config).
-        self.connection = sqlite3.connect(self.config.db_path)
+        self.connection = self._get_connection()
         self.connection.enable_load_extension(True)
         sqlite_vec.load(self.connection)
         self.connection.enable_load_extension(False)
@@ -185,9 +196,14 @@ async def initialize(self) -> None:
             self.cache[vector_db.identifier] = VectorDBWithIndex(vector_db, index, self.inference_api)
 
     async def shutdown(self) -> None:
-        if self.connection:
-            self.connection.close()
-            self.connection = None
+        # We can't access other threads' connections, so we just close our own
+        if hasattr(self._local, "conn"):
+            try:
+                self._local.conn.close()
+            except Exception as e:
+                print(f"Error closing SQLite connection: {e}")
+            finally:
+                del self._local.conn
 
     async def register_vector_db(self, vector_db: VectorDB) -> None:
         if self.connection is None:

diff --git a/llama_stack/templates/ollama/ollama.py b/llama_stack/templates/ollama/ollama.py
@@ -93,7 +93,7 @@ def get_distribution_template() -> DistributionTemplate:
                     "inference": [inference_provider],
                     "vector_io": [vector_io_provider_sqlite],
                 },
-                default_models=[inference_model],
+                default_models=[inference_model, embedding_model],
                 default_tool_groups=default_tool_groups,
             ),
             "run-with-safety.yaml": RunConfigSettings(

diff --git a/llama_stack/templates/ollama/run.yaml b/llama_stack/templates/ollama/run.yaml
@@ -87,6 +87,12 @@ models:
   model_id: ${env.INFERENCE_MODEL}
   provider_id: ollama
   model_type: llm
+- metadata:
+    embedding_dimension: 384
+  model_id: all-MiniLM-L6-v2
+  provider_id: ollama
+  provider_model_id: all-minilm:latest
+  model_type: embedding
 shields: []
 vector_dbs: []
 datasets: []