From d9590cb0464f9775b7e411a1696601efa5a76b0e Mon Sep 17 00:00:00 2001
From: Peter Robicheaux <peter@roboflow.com>
Date: Thu, 23 Jan 2025 18:29:28 +0000
Subject: [PATCH 1/4] Use precomputed owl embeddings

---
 inference/models/owlv2/owlv2.py | 25 ++++++++++++++++++++++---
 1 file changed, 22 insertions(+), 3 deletions(-)

diff --git a/inference/models/owlv2/owlv2.py b/inference/models/owlv2/owlv2.py
index a4346b9275..25eb7d2c2b 100644
--- a/inference/models/owlv2/owlv2.py
+++ b/inference/models/owlv2/owlv2.py
@@ -46,6 +46,8 @@
     load_image_rgb,
 )
 
+CPU_IMAGE_EMBED_CACHE_SIZE = 10000
+
 # TYPES
 Hash = NewType("Hash", str)
 PosNegKey = Literal["positive", "negative"]
@@ -319,7 +321,7 @@ def reset_cache(self):
         # each entry should be on the order of 300*4KB, so 1000 is 400MB of CUDA memory
         self.image_embed_cache = LimitedSizeDict(size_limit=OWLV2_IMAGE_CACHE_SIZE)
         # no need for limit here, as we're only storing on CPU
-        self.cpu_image_embed_cache = dict()
+        self.cpu_image_embed_cache = LimitedSizeDict(size_limit=CPU_IMAGE_EMBED_CACHE_SIZE)
         # each entry should be on the order of 10 bytes, so 1000 is 10KB
         self.image_size_cache = LimitedSizeDict(size_limit=OWLV2_IMAGE_CACHE_SIZE)
         # entry size will vary depending on the number of samples, but 10 should be safe
@@ -693,9 +695,26 @@ def serialize_training_data(
         hf_id: str = f"google/{OWLV2_VERSION_ID}",
         iou_threshold: float = 0.3,
         save_dir: str = os.path.join(MODEL_CACHE_DIR, "owl-v2-serialized-data"),
+        previous_embeddings_file: str = None,
     ):
         roboflow_id = hf_id.replace("google/", "owlv2/")
-        owlv2 = OwlV2(model_id=roboflow_id)
+        if previous_embeddings_file is not None:
+            if DEVICE == "cpu":
+                model_data = torch.load(
+                    previous_embeddings_file, map_location="cpu"
+                )
+            else:
+                model_data = torch.load(previous_embeddings_file)
+            class_names = model_data["class_names"]
+            train_data_dict = model_data["train_data_dict"]
+            huggingface_id = model_data["huggingface_id"]
+            roboflow_id = model_data["roboflow_id"]
+            # each model can have its own OwlV2 instance because we use a singleton
+            owlv2 = OwlV2(model_id=roboflow_id)
+            owlv2.cpu_image_embed_cache = model_data["image_embeds"]
+        else:
+            owlv2 = OwlV2(model_id=roboflow_id)
+        
         train_data_dict, image_embeds = owlv2.make_class_embeddings_dict(
             training_data, iou_threshold, return_image_embeds=True
         )
@@ -826,7 +845,7 @@ def draw_predictions(
     def save_small_model_without_image_embeds(
         self, save_dir: str = os.path.join(MODEL_CACHE_DIR, "owl-v2-serialized-data")
     ):
-        self.owlv2.cpu_image_embed_cache = dict()
+        self.owlv2.cpu_image_embed_cache = LimitedSizeDict(size_limit=CPU_IMAGE_EMBED_CACHE_SIZE)
         return self.save_model(
             self.huggingface_id,
             self.roboflow_id,

From 6e909fca6816fd61744b146dbe1b6b8b10a04d66 Mon Sep 17 00:00:00 2001
From: Grzegorz Klimaszewski
 <166530809+grzegorz-roboflow@users.noreply.github.com>
Date: Thu, 23 Jan 2025 22:03:21 +0100
Subject: [PATCH 2/4] formatting

---
 inference/models/owlv2/owlv2.py | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/inference/models/owlv2/owlv2.py b/inference/models/owlv2/owlv2.py
index 25eb7d2c2b..3a4eb1a713 100644
--- a/inference/models/owlv2/owlv2.py
+++ b/inference/models/owlv2/owlv2.py
@@ -321,7 +321,9 @@ def reset_cache(self):
         # each entry should be on the order of 300*4KB, so 1000 is 400MB of CUDA memory
         self.image_embed_cache = LimitedSizeDict(size_limit=OWLV2_IMAGE_CACHE_SIZE)
         # no need for limit here, as we're only storing on CPU
-        self.cpu_image_embed_cache = LimitedSizeDict(size_limit=CPU_IMAGE_EMBED_CACHE_SIZE)
+        self.cpu_image_embed_cache = LimitedSizeDict(
+            size_limit=CPU_IMAGE_EMBED_CACHE_SIZE
+        )
         # each entry should be on the order of 10 bytes, so 1000 is 10KB
         self.image_size_cache = LimitedSizeDict(size_limit=OWLV2_IMAGE_CACHE_SIZE)
         # entry size will vary depending on the number of samples, but 10 should be safe
@@ -700,9 +702,7 @@ def serialize_training_data(
         roboflow_id = hf_id.replace("google/", "owlv2/")
         if previous_embeddings_file is not None:
             if DEVICE == "cpu":
-                model_data = torch.load(
-                    previous_embeddings_file, map_location="cpu"
-                )
+                model_data = torch.load(previous_embeddings_file, map_location="cpu")
             else:
                 model_data = torch.load(previous_embeddings_file)
             class_names = model_data["class_names"]
@@ -714,7 +714,7 @@ def serialize_training_data(
             owlv2.cpu_image_embed_cache = model_data["image_embeds"]
         else:
             owlv2 = OwlV2(model_id=roboflow_id)
-        
+
         train_data_dict, image_embeds = owlv2.make_class_embeddings_dict(
             training_data, iou_threshold, return_image_embeds=True
         )
@@ -845,7 +845,9 @@ def draw_predictions(
     def save_small_model_without_image_embeds(
         self, save_dir: str = os.path.join(MODEL_CACHE_DIR, "owl-v2-serialized-data")
     ):
-        self.owlv2.cpu_image_embed_cache = LimitedSizeDict(size_limit=CPU_IMAGE_EMBED_CACHE_SIZE)
+        self.owlv2.cpu_image_embed_cache = LimitedSizeDict(
+            size_limit=CPU_IMAGE_EMBED_CACHE_SIZE
+        )
         return self.save_model(
             self.huggingface_id,
             self.roboflow_id,

From ac748c5f2d740679e116174a28e7a63aa588bc59 Mon Sep 17 00:00:00 2001
From: Peter Robicheaux <peter@roboflow.com>
Date: Fri, 24 Jan 2025 05:42:56 +0000
Subject: [PATCH 3/4] Bump tokenizers dep (needed for owlvit)

---
 requirements/_requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/requirements/_requirements.txt b/requirements/_requirements.txt
index c99c7a9f5e..f5e5b4ec5b 100644
--- a/requirements/_requirements.txt
+++ b/requirements/_requirements.txt
@@ -34,7 +34,7 @@ pandas>=2.0.0,<2.3.0
 paho-mqtt~=1.6.1
 pytest>=8.0.0,<9.0.0  # this is not a joke, sam2 requires this as the fork we are using is dependent on that, yet
 # do not mark the dependency: https://github.com/SauravMaheshkar/samv2/blob/main/sam2/utils/download.py
-tokenizers>=0.19.0,<=0.20.3
+tokenizers>=0.19.0,<=0.21.3
 slack-sdk~=3.33.4
 twilio~=9.3.7
 httpx>=0.25.1,<0.28.0  # must be pinned as bc in 0.28.0 is causing Anthropics to fail

From f710eabfe304b20941e33a395944950af3092d36 Mon Sep 17 00:00:00 2001
From: Peter Robicheaux <peter@roboflow.com>
Date: Fri, 24 Jan 2025 05:43:16 +0000
Subject: [PATCH 4/4] less precise dep

---
 requirements/_requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/requirements/_requirements.txt b/requirements/_requirements.txt
index f5e5b4ec5b..feab020bb0 100644
--- a/requirements/_requirements.txt
+++ b/requirements/_requirements.txt
@@ -34,7 +34,7 @@ pandas>=2.0.0,<2.3.0
 paho-mqtt~=1.6.1
 pytest>=8.0.0,<9.0.0  # this is not a joke, sam2 requires this as the fork we are using is dependent on that, yet
 # do not mark the dependency: https://github.com/SauravMaheshkar/samv2/blob/main/sam2/utils/download.py
-tokenizers>=0.19.0,<=0.21.3
+tokenizers>=0.19.0,<=0.21
 slack-sdk~=3.33.4
 twilio~=9.3.7
 httpx>=0.25.1,<0.28.0  # must be pinned as bc in 0.28.0 is causing Anthropics to fail