From edf0a255be5485d8e3602105ed22ebeaebcab61d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Gen=C3=ADs=20L=C3=A1inez=20Moreno?= <57753185+elockquence@users.noreply.github.com> Date: Fri, 3 May 2024 11:02:54 +0200 Subject: [PATCH] generate_image_prompts.py unnecessary txt embeddings Deleted unnecessary ' ' (text) embeddings that cause a dimension error when stacked to image embeddings. --- tools/generate_image_prompts.py | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/tools/generate_image_prompts.py b/tools/generate_image_prompts.py index ba0d0236..b8b3ed84 100644 --- a/tools/generate_image_prompts.py +++ b/tools/generate_image_prompts.py @@ -26,16 +26,6 @@ text_model = CLIPTextModelWithProjection.from_pretrained(args.model) processor = AutoProcessor.from_pretrained(args.model) - # padding prompts - device = 'cuda:0' - text_model.to(device) - texts = tokenizer(text=[' '], return_tensors='pt', padding=True) - texts = texts.to(device) - text_outputs = text_model(**texts) - txt_feats = text_outputs.text_embeds - txt_feats = txt_feats / txt_feats.norm(p=2, dim=-1, keepdim=True) - txt_feats = txt_feats.reshape(-1, txt_feats.shape[-1]).cpu().data.numpy() - images = os.listdir(args.image_dir) category_embeds = [] @@ -54,6 +44,5 @@ def _forward_vision_model(image_name): for image_ in tqdm.tqdm(images): _forward_vision_model(image_) - category_embeds.append(txt_feats) category_embeds = np.stack(category_embeds) np.save(osp.join(args.out_dir, args.out_file), category_embeds)