mm3dtest / projects /PETR /petr /transforms_3d.py
giantmonkeyTC
2344
34d1f8b
# Copyright (c) OpenMMLab. All rights reserved.
import numpy as np
import torch
from mmcv.transforms import BaseTransform
from PIL import Image
from mmdet3d.registry import TRANSFORMS
from mmdet3d.structures.bbox_3d import LiDARInstance3DBoxes
@TRANSFORMS.register_module()
class ResizeCropFlipImage(BaseTransform):
"""Random resize, Crop and flip the image
Args:
size (tuple, optional): Fixed padding size.
"""
def __init__(self, data_aug_conf=None, training=True):
self.data_aug_conf = data_aug_conf
self.training = training
def transform(self, results):
"""Call function to pad images, masks, semantic segmentation maps.
Args:
results (dict): Result dict from loading pipeline.
Returns:
dict: Updated result dict.
"""
imgs = results['img']
N = len(imgs)
new_imgs = []
resize, resize_dims, crop, flip, rotate = self._sample_augmentation()
results['lidar2cam'] = np.array(results['lidar2cam'])
for i in range(N):
intrinsic = np.array(results['cam2img'][i])
viewpad = np.eye(4)
viewpad[:intrinsic.shape[0], :intrinsic.shape[1]] = intrinsic
results['cam2img'][i] = viewpad
img = Image.fromarray(np.uint8(imgs[i]))
# augmentation (resize, crop, horizontal flip, rotate)
# different view use different aug (BEV Det)
img, ida_mat = self._img_transform(
img,
resize=resize,
resize_dims=resize_dims,
crop=crop,
flip=flip,
rotate=rotate,
)
new_imgs.append(np.array(img).astype(np.float32))
results['cam2img'][
i][:3, :3] = ida_mat @ results['cam2img'][i][:3, :3]
results['img'] = new_imgs
return results
def _get_rot(self, h):
return torch.Tensor([
[np.cos(h), np.sin(h)],
[-np.sin(h), np.cos(h)],
])
def _img_transform(self, img, resize, resize_dims, crop, flip, rotate):
ida_rot = torch.eye(2)
ida_tran = torch.zeros(2)
# adjust image
img = img.resize(resize_dims)
img = img.crop(crop)
if flip:
img = img.transpose(method=Image.FLIP_LEFT_RIGHT)
img = img.rotate(rotate)
# post-homography transformation
ida_rot *= resize
ida_tran -= torch.Tensor(crop[:2])
if flip:
A = torch.Tensor([[-1, 0], [0, 1]])
b = torch.Tensor([crop[2] - crop[0], 0])
ida_rot = A.matmul(ida_rot)
ida_tran = A.matmul(ida_tran) + b
A = self._get_rot(rotate / 180 * np.pi)
b = torch.Tensor([crop[2] - crop[0], crop[3] - crop[1]]) / 2
b = A.matmul(-b) + b
ida_rot = A.matmul(ida_rot)
ida_tran = A.matmul(ida_tran) + b
ida_mat = torch.eye(3)
ida_mat[:2, :2] = ida_rot
ida_mat[:2, 2] = ida_tran
return img, ida_mat
def _sample_augmentation(self):
H, W = self.data_aug_conf['H'], self.data_aug_conf['W']
fH, fW = self.data_aug_conf['final_dim']
if self.training:
resize = np.random.uniform(*self.data_aug_conf['resize_lim'])
resize_dims = (int(W * resize), int(H * resize))
newW, newH = resize_dims
crop_h = int(
(1 - np.random.uniform(*self.data_aug_conf['bot_pct_lim'])) *
newH) - fH
crop_w = int(np.random.uniform(0, max(0, newW - fW)))
crop = (crop_w, crop_h, crop_w + fW, crop_h + fH)
flip = False
if self.data_aug_conf['rand_flip'] and np.random.choice([0, 1]):
flip = True
rotate = np.random.uniform(*self.data_aug_conf['rot_lim'])
else:
resize = max(fH / H, fW / W)
resize_dims = (int(W * resize), int(H * resize))
newW, newH = resize_dims
crop_h = int(
(1 - np.mean(self.data_aug_conf['bot_pct_lim'])) * newH) - fH
crop_w = int(max(0, newW - fW) / 2)
crop = (crop_w, crop_h, crop_w + fW, crop_h + fH)
flip = False
rotate = 0
return resize, resize_dims, crop, flip, rotate
@TRANSFORMS.register_module()
class GlobalRotScaleTransImage(BaseTransform):
"""Random resize, Crop and flip the image
Args:
size (tuple, optional): Fixed padding size.
"""
def __init__(
self,
rot_range=[-0.3925, 0.3925],
scale_ratio_range=[0.95, 1.05],
translation_std=[0, 0, 0],
reverse_angle=False,
training=True,
):
self.rot_range = rot_range
self.scale_ratio_range = scale_ratio_range
self.translation_std = translation_std
self.reverse_angle = reverse_angle
self.training = training
def transform(self, results):
"""Call function to pad images, masks, semantic segmentation maps.
Args:
results (dict): Result dict from loading pipeline.
Returns:
dict: Updated result dict.
"""
# random rotate
rot_angle = np.random.uniform(*self.rot_range)
self.rotate_bev_along_z(results, rot_angle)
if self.reverse_angle:
rot_angle *= -1
results['gt_bboxes_3d'].rotate(np.array(rot_angle))
# random scale
scale_ratio = np.random.uniform(*self.scale_ratio_range)
self.scale_xyz(results, scale_ratio)
results['gt_bboxes_3d'].scale(scale_ratio)
# TODO: support translation
if not self.reverse_angle:
gt_bboxes_3d = results['gt_bboxes_3d'].numpy()
gt_bboxes_3d[:, 6] -= 2 * rot_angle
results['gt_bboxes_3d'] = LiDARInstance3DBoxes(
gt_bboxes_3d, box_dim=9)
return results
def rotate_bev_along_z(self, results, angle):
rot_cos = torch.cos(torch.tensor(angle))
rot_sin = torch.sin(torch.tensor(angle))
rot_mat = torch.tensor([[rot_cos, -rot_sin, 0, 0],
[rot_sin, rot_cos, 0, 0], [0, 0, 1, 0],
[0, 0, 0, 1]])
rot_mat_inv = torch.inverse(rot_mat)
num_view = len(results['lidar2cam'])
for view in range(num_view):
results['lidar2cam'][view] = (
torch.tensor(np.array(results['lidar2cam'][view]).T).float()
@ rot_mat_inv).T.numpy()
return
def scale_xyz(self, results, scale_ratio):
rot_mat = torch.tensor([
[scale_ratio, 0, 0, 0],
[0, scale_ratio, 0, 0],
[0, 0, scale_ratio, 0],
[0, 0, 0, 1],
])
rot_mat_inv = torch.inverse(rot_mat)
num_view = len(results['lidar2cam'])
for view in range(num_view):
results['lidar2cam'][view] = (torch.tensor(
rot_mat_inv.T
@ results['lidar2cam'][view].T).float()).T.numpy()
return