From 415ee8c494fed696428a74544218ff99ebca1d61 Mon Sep 17 00:00:00 2001
From: Jacob Sela <jacob.sela@voxel51.com>
Date: Wed, 15 Jan 2025 17:27:24 -0800
Subject: [PATCH 1/3] open clip fix

---
 fiftyone/utils/open_clip.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/fiftyone/utils/open_clip.py b/fiftyone/utils/open_clip.py
index d51c350144..40230cc6c6 100644
--- a/fiftyone/utils/open_clip.py
+++ b/fiftyone/utils/open_clip.py
@@ -88,7 +88,7 @@ def _load_model(self, config):
         (
             self._model,
             _,
-            self.preprocess,
+            self._preprocess,
         ) = open_clip.create_model_and_transforms(
             config.clip_model,
             pretrained=config.pretrained,
@@ -134,7 +134,7 @@ def _get_class_logits(self, text_features, image_features):
 
     def _predict_all(self, imgs):
         if self._preprocess:
-            imgs = [self._preprocess(img).unsqueeze(0) for img in imgs]
+            imgs = [self._preprocess(img) for img in imgs]
 
         if isinstance(imgs, (list, tuple)):
             imgs = torch.stack(imgs)

From 23d6a6d01532f053cba5ba9b0af23d6ad757b508 Mon Sep 17 00:00:00 2001
From: Jacob Sela <jacob.sela@voxel51.com>
Date: Sun, 19 Jan 2025 14:39:19 -0800
Subject: [PATCH 2/3] readded preprocessor fix

---
 fiftyone/utils/open_clip.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/fiftyone/utils/open_clip.py b/fiftyone/utils/open_clip.py
index 40230cc6c6..ed2527fddf 100644
--- a/fiftyone/utils/open_clip.py
+++ b/fiftyone/utils/open_clip.py
@@ -57,6 +57,7 @@ class TorchOpenClipModel(fout.TorchImageModel, fom.PromptMixin):
     def __init__(self, config):
         super().__init__(config)
         self._text_features = None
+        self.preprocess = self._preprocess_aux
 
     @property
     def can_embed_prompts(self):
@@ -88,7 +89,7 @@ def _load_model(self, config):
         (
             self._model,
             _,
-            self._preprocess,
+            self._preprocess_aux,
         ) = open_clip.create_model_and_transforms(
             config.clip_model,
             pretrained=config.pretrained,

From cb284818031ce2b080873652fafcdedff2776b0d Mon Sep 17 00:00:00 2001
From: brimoor <brimoor@umich.edu>
Date: Mon, 20 Jan 2025 16:22:48 -0500
Subject: [PATCH 3/3] only use autocast when using GPU

---
 fiftyone/utils/open_clip.py | 14 +++++++++-----
 1 file changed, 9 insertions(+), 5 deletions(-)

diff --git a/fiftyone/utils/open_clip.py b/fiftyone/utils/open_clip.py
index ed2527fddf..45f8897651 100644
--- a/fiftyone/utils/open_clip.py
+++ b/fiftyone/utils/open_clip.py
@@ -5,6 +5,7 @@
 | `voxel51.com <https://voxel51.com/>`_
 |
 """
+import contextlib
 import logging
 
 import fiftyone.core.models as fom
@@ -143,12 +144,15 @@ def _predict_all(self, imgs):
         height, width = imgs.size()[-2:]
         frame_size = (width, height)
 
-        if self._using_gpu:
-            imgs = imgs.to(self.device)
+        with torch.no_grad(), contextlib.ExitStack() as ctx:
+            if self._using_gpu:
+                imgs = imgs.to(self.device)
+
+                # https://github.com/voxel51/fiftyone/pull/5395#issuecomment-2601055784
+                ctx.enter_context(
+                    torch.amp.autocast(device_type=self.device.type)
+                )
 
-        with torch.no_grad(), torch.amp.autocast(
-            device_type=self.device.type if self._using_gpu else "cpu"
-        ):
             image_features = self._model.encode_image(imgs)
             text_features = self._get_text_features()