-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathdemo.py
61 lines (47 loc) · 2.07 KB
/
demo.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
# Import the required libraries
import torch
from tqdm import tqdm
import torch.nn.functional as F
import numpy as np
import shutil
def demo_retrieval(model, sentence, dataloader, device, k = 5, train_siam = True): # dataloader should have shuffle = False
model.to(device)
model.eval()
len_recall = 0
with torch.no_grad():
if train_siam:
text_features = model.dual_encoder.language_model(
sentence['caption_input_ids'].to(device),
attention_masks = sentence['caption_attention_masks'].to(device),
token_type_ids = sentence['caption_token_type_ids'].to(device)
)
else:
text_features = model.language_model(
sentence['caption_input_ids'].to(device),
attention_masks = sentence['caption_attention_masks'].to(device),
token_type_ids = sentence['caption_token_type_ids'].to(device)
)
text_features = F.normalize(text_features.cpu(), p=2, dim=1)
similarities = []
all_ids = []
all_images = []
for batch_in in dataloader:
with torch.no_grad():
if train_siam:
image_features = model.dual_encoder.vision_model(batch_in['images'].to(device))
else:
image_features = model.vision_model(batch_in['images'].to(device))
image_features = F.normalize(image_features.cpu(), p=2, dim=1)
all_ids.append(batch_in['image_ids'])
all_images.append(batch_in['image_path'])
sim_scores = torch.matmul(image_features, text_features.T)
similarities.append(sim_scores)
similarities = torch.cat(similarities, dim=0).T
ranked_indexes = torch.argsort(similarities, dim=1)
all_ids = torch.cat(all_ids, dim=0)
all_images = np.concatenate(all_images, axis=0)
ranked_images = all_images[ranked_indexes.cpu().numpy()]
image_paths = ranked_images[0, :k]
for i, image_path in enumerate(list(image_paths)):
shutil.copy(image_path, f"results/{i}.jpg")
model.train()