From edf0a255be5485d8e3602105ed22ebeaebcab61d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Gen=C3=ADs=20L=C3=A1inez=20Moreno?=
 <57753185+elockquence@users.noreply.github.com>
Date: Fri, 3 May 2024 11:02:54 +0200
Subject: [PATCH] generate_image_prompts.py unnecessary txt embeddings

Deleted unnecessary ' ' (text) embeddings that cause a dimension error when stacked to image embeddings.
---
 tools/generate_image_prompts.py | 11 -----------
 1 file changed, 11 deletions(-)

diff --git a/tools/generate_image_prompts.py b/tools/generate_image_prompts.py
index ba0d0236..b8b3ed84 100644
--- a/tools/generate_image_prompts.py
+++ b/tools/generate_image_prompts.py
@@ -26,16 +26,6 @@
     text_model = CLIPTextModelWithProjection.from_pretrained(args.model)
     processor = AutoProcessor.from_pretrained(args.model)
 
-    # padding prompts
-    device = 'cuda:0'
-    text_model.to(device)
-    texts = tokenizer(text=[' '], return_tensors='pt', padding=True)
-    texts = texts.to(device)
-    text_outputs = text_model(**texts)
-    txt_feats = text_outputs.text_embeds
-    txt_feats = txt_feats / txt_feats.norm(p=2, dim=-1, keepdim=True)
-    txt_feats = txt_feats.reshape(-1, txt_feats.shape[-1]).cpu().data.numpy()
-
     images = os.listdir(args.image_dir)
     category_embeds = []
 
@@ -54,6 +44,5 @@ def _forward_vision_model(image_name):
 
     for image_ in tqdm.tqdm(images):
         _forward_vision_model(image_)
-    category_embeds.append(txt_feats)
     category_embeds = np.stack(category_embeds)
     np.save(osp.join(args.out_dir, args.out_file), category_embeds)