forked from 232525/PureT
-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathdata_loader.py
executable file
·117 lines (98 loc) · 4.32 KB
/
data_loader.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
import os
import torch
from torchvision import transforms
from lib.config import cfg
from datasets.coco_dataset import CocoDataset
import samplers.distributed
import numpy as np
def sample_collate(batch):
indices, input_seq, target_seq, gv_feat, att_feats = zip(*batch)
indices = np.stack(indices, axis=0).reshape(-1)
input_seq = torch.cat([torch.from_numpy(b) for b in input_seq], 0)
target_seq = torch.cat([torch.from_numpy(b) for b in target_seq], 0)
gv_feat = torch.cat([torch.from_numpy(b) for b in gv_feat], 0)
"""
# 读取图像的预训练特征时,大小为[L, D],其中L的长度可能不一(如目标特征)
# 因此需要进行特征数量判断,并生成特征掩码 att_mask
atts_num = [x.shape[0] for x in att_feats]
max_att_num = np.max(atts_num)
feat_arr = []
mask_arr = []
for i, num in enumerate(atts_num):
tmp_feat = np.zeros((1, max_att_num, att_feats[i].shape[1]), dtype=np.float32)
tmp_feat[:, 0:att_feats[i].shape[0], :] = att_feats[i]
feat_arr.append(torch.from_numpy(tmp_feat))
tmp_mask = np.zeros((1, max_att_num), dtype=np.float32)
tmp_mask[:, 0:num] = 1
mask_arr.append(torch.from_numpy(tmp_mask))
att_feats = torch.cat(feat_arr, 0)
att_mask = torch.cat(mask_arr, 0)
"""
# 图像特征,无需与预训练特征一样进行特征数量判断,直接合并即可
# att_mask为最终grid特征大小,实际上grid特征无需att_mask亦可
att_feats = torch.stack(att_feats, 0) # [B, 3, 384, 384]
att_mask = torch.ones(att_feats.size()[0], 12*12)
# att_mask = torch.ones(att_feats.size()[0], 576)
return indices, input_seq, target_seq, gv_feat, att_feats, att_mask
def sample_collate_val(batch):
indices, gv_feat, att_feats = zip(*batch)
indices = np.stack(indices, axis=0).reshape(-1)
gv_feat = torch.cat([torch.from_numpy(b) for b in gv_feat], 0)
"""
# 读取图像的预训练特征时,大小为[L, D],其中L的长度可能不一(如目标特征)
# 因此需要进行特征数量判断,并生成特征掩码 att_mask
atts_num = [x.shape[0] for x in att_feats]
max_att_num = np.max(atts_num)
feat_arr = []
mask_arr = []
for i, num in enumerate(atts_num):
tmp_feat = np.zeros((1, max_att_num, att_feats[i].shape[1]), dtype=np.float32)
tmp_feat[:, 0:att_feats[i].shape[0], :] = att_feats[i]
feat_arr.append(torch.from_numpy(tmp_feat))
tmp_mask = np.zeros((1, max_att_num), dtype=np.float32)
tmp_mask[:, 0:num] = 1
mask_arr.append(torch.from_numpy(tmp_mask))
att_feats = torch.cat(feat_arr, 0)
att_mask = torch.cat(mask_arr, 0)
"""
# 图像特征,无需与预训练特征一样进行特征数量判断,直接合并即可
# att_mask为最终grid特征大小,实际上grid特征无需att_mask亦可
att_feats = torch.stack(att_feats, 0) # [B, 3, 384, 384]
att_mask = torch.ones(att_feats.size()[0], 12*12)
# att_mask = torch.ones(att_feats.size()[0], 576)
return indices, gv_feat, att_feats, att_mask
def load_train(distributed, epoch, coco_set):
sampler = samplers.distributed.DistributedSampler(coco_set, epoch=epoch) \
if distributed else None
shuffle = cfg.DATA_LOADER.SHUFFLE if sampler is None else False
loader = torch.utils.data.DataLoader(
coco_set,
batch_size = cfg.TRAIN.BATCH_SIZE,
shuffle = shuffle,
num_workers = cfg.DATA_LOADER.NUM_WORKERS,
drop_last = cfg.DATA_LOADER.DROP_LAST,
pin_memory = cfg.DATA_LOADER.PIN_MEMORY,
sampler = sampler,
collate_fn = sample_collate
)
return loader
def load_val(image_ids_path, gv_feat_path, att_feats_folder):
coco_set = CocoDataset(
image_ids_path = image_ids_path,
input_seq = None,
target_seq = None,
gv_feat_path = gv_feat_path,
att_feats_folder = att_feats_folder,
seq_per_img = 1,
max_feat_num = cfg.DATA_LOADER.MAX_FEAT
)
loader = torch.utils.data.DataLoader(
coco_set,
batch_size = cfg.TEST.BATCH_SIZE,
shuffle = False,
num_workers = cfg.DATA_LOADER.NUM_WORKERS,
drop_last = False,
pin_memory = cfg.DATA_LOADER.PIN_MEMORY,
collate_fn = sample_collate_val
)
return loader