You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
Hello,
it appears that current code resultes in the following error:
RuntimeError:
Some tensors share memory, this will lead to duplicate memory on disk and potential differences when loading them again: [{'seq_pos_enc.pe', 'embed_timestep.seq_pos_enc.pe'}].
A potential way to correctly save your model is to use `save_model`.
I took a look into the dlt.py file and I think this problem is because you are directly assigning seq_pos_enc to embed_timestep.seq_pos_enc,
I added a little change to dlt.py as follows:
import torch
import torch.nn as nn
from diffusers import ModelMixin, ConfigMixin
from diffusers.configuration_utils import register_to_config
from einops import rearrange
from models.utils import PositionalEncoding, TimestepEmbedder
class DLT(ModelMixin, ConfigMixin):
@register_to_config
def __init__(self, categories_num, latent_dim=256, num_layers=4, num_heads=4, dropout_r=0., activation="gelu",
cond_emb_size=224, cat_emb_size=64):
super().__init__()
self.latent_dim = latent_dim
self.dropout_r = dropout_r
self.categories_num = categories_num
self.seq_pos_enc = PositionalEncoding(self.latent_dim, self.dropout_r)
# learnable embedding for each category.
self.cat_emb = nn.Parameter(torch.randn(self.categories_num, cat_emb_size))
# condition embedding
self.cond_mask_box_emb = nn.Parameter(torch.randn(2, cond_emb_size))
self.cond_mask_cat_emb = nn.Parameter(torch.randn(2, cat_emb_size))
seqTransEncoderLayer = nn.TransformerEncoderLayer(d_model=self.latent_dim,
nhead=num_heads,
dim_feedforward=self.latent_dim * 2,
dropout=dropout_r,
activation=activation)
self.seqTransEncoder = nn.TransformerEncoder(seqTransEncoderLayer,
num_layers=num_layers)
# self.embed_timestep = TimestepEmbedder(self.latent_dim, self.seq_pos_enc)
self.time_embed = nn.Sequential(
nn.Linear(self.latent_dim, self.latent_dim),
nn.SiLU(),
nn.Linear(self.latent_dim, self.latent_dim),
)
self.output_process = nn.Sequential(
nn.Linear(self.latent_dim, 4))
self.output_cls = nn.Sequential(
nn.Linear(self.latent_dim, categories_num))
self.size_emb = nn.Sequential(
nn.Linear(2, cond_emb_size),
)
self.loc_emb = nn.Sequential(
nn.Linear(2, cond_emb_size),
)
def forward(self, sample, noisy_sample, timesteps):
# put noize to element categories, those we want to predict
cat_input = noisy_sample['cat'] * sample['mask_cat'] + (1 - sample['mask_cat']) * sample['cat']
cat_input_flat = rearrange(cat_input, 'b c -> (b c)')
# pit noize to element boxes, those we want to predict
sample_tensor = sample['mask_box'] * noisy_sample['box'] + (1 - sample['mask_box']) * sample['box_cond']
xy = sample_tensor[:, :, :2]
wh = sample_tensor[:, :, 2:]
elem_cat_emb = self.cat_emb[cat_input_flat, :]
elem_cat_emb = rearrange(elem_cat_emb, '(b c) d -> b c d', b=noisy_sample['box'].shape[0])
mask_wh = sample['mask_box'][:, :, 2]
mask_xy = sample['mask_box'][:, :, 0]
def mask_to_emb(mask, cond_mask_emb):
mask_flat = rearrange(mask, 'b c -> (b c)').type(torch.LongTensor)
mask_all_emb = cond_mask_emb[mask_flat, :]
mask_all_emb = rearrange(mask_all_emb, '(b c) d -> b c d', b=mask.shape[0])
return mask_all_emb
emb_mask_wh = mask_to_emb(mask_wh, self.cond_mask_box_emb)
emb_mask_xy = mask_to_emb(mask_xy, self.cond_mask_box_emb)
emb_mask_cl = mask_to_emb(sample['mask_cat'], self.cond_mask_cat_emb)
# t_emb = self.embed_timestep(timesteps)
t_emb = self.time_embed(self.seq_pos_enc.pe[timesteps]).permute(1, 0, 2)
size_emb = self.size_emb(wh) + emb_mask_wh
loc_emb = self.loc_emb(xy) + emb_mask_xy
elem_cat_emb = elem_cat_emb + emb_mask_cl
tokens_emb = torch.cat([size_emb, loc_emb, elem_cat_emb], dim=-1)
tokens_emb = rearrange(tokens_emb, 'b c d -> c b d')
# adding the timestep embed
xseq = torch.cat((t_emb, tokens_emb), dim=0)
xseq = self.seq_pos_enc(xseq)
output = self.seqTransEncoder(xseq)[1:]
output = rearrange(output, 'c b d -> b c d')
output_box = self.output_process(output)
output_cls = self.output_cls(output)
return output_box, output_cls
In this case the wrapper class TimestepEmbedder is no longer required but instead we directly construct the time embed in the dlt class and use seq_pos_enc directly without reassigning the seq_pos_enc to avoid the share memory error.
Can you please confirm if the change is correct or what you intended to do is to clone seq_pos_enc?
Thank you
The text was updated successfully, but these errors were encountered:
Hello,
it appears that current code resultes in the following error:
I took a look into the dlt.py file and I think this problem is because you are directly assigning seq_pos_enc to embed_timestep.seq_pos_enc,
I added a little change to dlt.py as follows:
In this case the wrapper class TimestepEmbedder is no longer required but instead we directly construct the time embed in the dlt class and use seq_pos_enc directly without reassigning the seq_pos_enc to avoid the share memory error.
Can you please confirm if the change is correct or what you intended to do is to clone seq_pos_enc?
Thank you
The text was updated successfully, but these errors were encountered: