[New Models] Add attention layer with MLP layer, but training time is increasing #3055

nsa05605 · 2024-11-15T08:47:30Z

Model/Dataset/Scheduler description

Hi, I'm trying to add attention layer on my own detector now. But there are some problems..

First one is the training time per epoch is increasing.. it maybe caused by memory leak.. but I can't find the reason.
Second one is I can not sure whether the weight is optimizing.

My source code is below.

Copyright (c) OpenMMLab. All rights reserved.

import torch
from mmcv.runner import force_fp32, BaseModule
from torch.nn import functional as F

from ..builder import DETECTORS
from .mvx_two_stage import MVXTwoStageDetector

from mmdet3d.core import bbox3d2result, merge_aug_bboxes_3d

from mmdet3d.models.backbones.voxel_fusion_layer import build_voxel_fusion_layer
from mmdet3d.models.backbones.middle_fusion_layer import build_middle_fusion_layer

from mmdet3d.models.builder import build_middle_encoder
from mmdet3d.models.builder import build_voxel_encoder
from mmdet3d.models.builder import build_backbone
from mmdet3d.models.builder import build_neck

from mmcv.runner import force_fp32
from mmcv.ops import Voxelization
import torch.nn as nn
import torch.nn.init as init
import torch.profiler as profiler

import time
from mmcv.utils import Registry
MLP_LAYER = Registry('mlp_layer')
VELOCITY_OFFSET = Registry('velocity_offset')

@MLP_LAYER.register_module()
class MLP(nn.Module):
def init(self):
super(MLP, self).init()
self.linear = nn.Sequential(
nn.Linear(10, 32),
nn.LayerNorm(32),
nn.ReLU(),
nn.Linear(32, 64),
nn.ReLU(),
nn.Linear(64, 1)
)
self.init_weights()
print("MLP Initialization")

def init_weights(self):
    for layer in self.linear:
        if isinstance(layer, nn.Linear):
            init.kaiming_normal_(layer.weight, mode='fan_in', nonlinearity='relu')
            if layer.bias is not None:
                init.constant_(layer.bias, 0)

def forward(self, x):
    out = self.linear(x)
    print(f'out.grad : {out.grad}')
    return out

@VELOCITY_OFFSET.register_module()
class VelocityAttention(nn.Module):
def init(self, max_pairs=50):
super(VelocityAttention, self).init()
print("VelocityAttention Initialization")
self.max_pairs = max_pairs

def forward(self, gt_bboxes_3d, rad_points, bbox_list, MLPNet):
    velo_det_attention = torch.zeros((len(bbox_list), self.max_pairs, 2), device='cuda')
    velo_gt = torch.zeros((len(gt_bboxes_3d), self.max_pairs, 2), device='cuda')

    num_of_matching_pairs = 0
    for batch in range(len(bbox_list)):
        det = bbox_list[batch][0]  # Bounding boxes
        if len(det) == 0:
            continue

        gt_bat = gt_bboxes_3d[batch]
        det_tensor = det.tensor.cuda()
        gt_bat_tensor = gt_bat.tensor.cuda()
       
        matching_pairs = self.find_matching_boxes(gt_bat_tensor, det_tensor)
        if len(matching_pairs) == 0:  # 매칭 없으면 넘기기
            continue
        num_of_matching_pairs += len(matching_pairs)

     
        vel_det_att = torch.zeros((len(matching_pairs), 2), device='cuda')
        vel_gt = torch.zeros((len(matching_pairs), 2), device='cuda')
        radars = rad_points[batch]  # Radar points
        num_radar = 20  # 객체 하나에 담긴 최대 Radar point

        asso_mat = torch.zeros((len(matching_pairs), num_radar, 10), dtype=torch.float, device='cuda')
        scores = torch.zeros((len(matching_pairs), num_radar + 1), dtype=torch.float, device='cuda')
        scores[:, -1] = 1

        for i, pair in enumerate(matching_pairs):

            idx = pair[0]
            idx_gt = pair[1]

            vel_gt[i, 0] = gt_bat_tensor[idx_gt, 7]
            vel_gt[i, 1] = gt_bat_tensor[idx_gt, 8]

            sd_dis = torch.sqrt(1e-5 + torch.pow(radars[:, 0] - det_tensor[idx, 0], 2) +
                                torch.pow(radars[:, 1] - det_tensor[idx, 1], 2))
            sd_ids, index_sd = torch.sort(sd_dis, descending=False)
            index_sd = index_sd[:num_radar]

            asso_mat[i, :, 0] = det_tensor[idx, 3]  # width
            asso_mat[i, :, 1] = det_tensor[idx, 4]  # length

            velo = torch.sqrt(torch.pow(det_tensor[idx, 7], 2) + torch.pow(det_tensor[idx, 8], 2)).cuda()
            velo_x = det_tensor[idx, 7] / (velo + 1e-5)
            velo_y = det_tensor[idx, 8] / (velo + 1e-5)
            asso_mat[i, :, 2] = velo  # ||v||
            asso_mat[i, :, 3] = velo_x  # vx / ||v||
            asso_mat[i, :, 4] = velo_y  # vy / ||v||

            gamma = torch.cos(torch.atan((det.tensor[idx, 0] / (-det.tensor[idx, 1] + 1e-5)) +
                                         torch.atan(det_tensor[idx, 7] / (det_tensor[idx, 8] + 1e-5)))).cuda()
            asso_mat[i, :, 5] = gamma  # angle between D's motion & D's radial
            asso_mat[i, :, 6] = det_tensor[idx, 0] - radars[index_sd, 0]  # dx
            asso_mat[i, :, 7] = det_tensor[idx, 1] - radars[index_sd, 1]  # dy
            asso_mat[i, :, 8] = radars[index_sd, 6]  # timestamp

            beta = torch.atan((-det_tensor[idx, 1]) / (det_tensor[idx, 0] + 1e-5)) - \
                   torch.atan((-radars[index_sd, 1]) / (radars[index_sd, 0] + 1e-5)).cuda()  # angle between D's motion & Q's radial
            radar_velo = torch.sqrt(torch.pow(radars[index_sd, 4], 2) + torch.pow(radars[index_sd, 5], 2)).cuda()
            velo_bp = radar_velo / (torch.cos(torch.acos(gamma) + beta) + 1e-5)
            asso_mat[i, :, 9] = velo_bp.cuda()
        
        for i in range(len(matching_pairs)):
            scores[i, :-1] = MLPNet(asso_mat[i, :, :]).squeeze(-1) 
            scores_norm = F.softmax(scores[i, :], dim=0).cuda()  

            velo_cand = torch.cat((asso_mat[i, :, 9], asso_mat[i, 0, 2].view(1)), 0).t().cuda()
            mag_refined = torch.sum(scores_norm * velo_cand).cuda()
            velo_refined = mag_refined * torch.cat((asso_mat[i, 0, 3].view(1), asso_mat[i, 0, 4].view(1)), 0).cuda()
            vel_det_att[i, :] = velo_refined

        velo_det_attention[batch, :len(matching_pairs), :] = vel_det_att
        velo_gt[batch, :len(matching_pairs), :] = vel_gt

        del vel_gt, vel_det_att, scores_norm, asso_mat

    return velo_det_attention, velo_gt

in main detector model, I refer my Attention model like this.

self.MLPNet = MLP().cuda()
self.VelocityAttention = VelocityAttention(max_pairs=max_pairs).to('cuda')

and in forward(), I use this layer.

velo_det_attention, velo_gt = self.VelocityAttention(gt_bboxes_3d, rad_points, bbox_list, self.MLPNet)

Open source status

The model implementation is available
The model weights are available.

Provide useful links for the implementation

No response

The text was updated successfully, but these errors were encountered:

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

[New Models] Add attention layer with MLP layer, but training time is increasing #3055

[New Models] Add attention layer with MLP layer, but training time is increasing #3055

nsa05605 commented Nov 15, 2024 •

edited

Loading

[New Models] Add attention layer with MLP layer, but training time is increasing #3055

[New Models] Add attention layer with MLP layer, but training time is increasing #3055

Comments

nsa05605 commented Nov 15, 2024 • edited Loading

Model/Dataset/Scheduler description

Copyright (c) OpenMMLab. All rights reserved.

in main detector model, I refer my Attention model like this.

and in forward(), I use this layer.

Open source status

Provide useful links for the implementation

nsa05605 commented Nov 15, 2024 •

edited

Loading