|
|
|
import numpy as np |
|
import torch |
|
from mmcv.transforms import BaseTransform |
|
from PIL import Image |
|
|
|
from mmdet3d.registry import TRANSFORMS |
|
from mmdet3d.structures.bbox_3d import LiDARInstance3DBoxes |
|
|
|
|
|
@TRANSFORMS.register_module() |
|
class ResizeCropFlipImage(BaseTransform): |
|
"""Random resize, Crop and flip the image |
|
Args: |
|
size (tuple, optional): Fixed padding size. |
|
""" |
|
|
|
def __init__(self, data_aug_conf=None, training=True): |
|
self.data_aug_conf = data_aug_conf |
|
self.training = training |
|
|
|
def transform(self, results): |
|
"""Call function to pad images, masks, semantic segmentation maps. |
|
|
|
Args: |
|
results (dict): Result dict from loading pipeline. |
|
Returns: |
|
dict: Updated result dict. |
|
""" |
|
|
|
imgs = results['img'] |
|
N = len(imgs) |
|
new_imgs = [] |
|
resize, resize_dims, crop, flip, rotate = self._sample_augmentation() |
|
results['lidar2cam'] = np.array(results['lidar2cam']) |
|
for i in range(N): |
|
intrinsic = np.array(results['cam2img'][i]) |
|
viewpad = np.eye(4) |
|
viewpad[:intrinsic.shape[0], :intrinsic.shape[1]] = intrinsic |
|
results['cam2img'][i] = viewpad |
|
img = Image.fromarray(np.uint8(imgs[i])) |
|
|
|
|
|
img, ida_mat = self._img_transform( |
|
img, |
|
resize=resize, |
|
resize_dims=resize_dims, |
|
crop=crop, |
|
flip=flip, |
|
rotate=rotate, |
|
) |
|
new_imgs.append(np.array(img).astype(np.float32)) |
|
results['cam2img'][ |
|
i][:3, :3] = ida_mat @ results['cam2img'][i][:3, :3] |
|
|
|
results['img'] = new_imgs |
|
|
|
return results |
|
|
|
def _get_rot(self, h): |
|
|
|
return torch.Tensor([ |
|
[np.cos(h), np.sin(h)], |
|
[-np.sin(h), np.cos(h)], |
|
]) |
|
|
|
def _img_transform(self, img, resize, resize_dims, crop, flip, rotate): |
|
ida_rot = torch.eye(2) |
|
ida_tran = torch.zeros(2) |
|
|
|
img = img.resize(resize_dims) |
|
img = img.crop(crop) |
|
if flip: |
|
img = img.transpose(method=Image.FLIP_LEFT_RIGHT) |
|
img = img.rotate(rotate) |
|
|
|
|
|
ida_rot *= resize |
|
ida_tran -= torch.Tensor(crop[:2]) |
|
if flip: |
|
A = torch.Tensor([[-1, 0], [0, 1]]) |
|
b = torch.Tensor([crop[2] - crop[0], 0]) |
|
ida_rot = A.matmul(ida_rot) |
|
ida_tran = A.matmul(ida_tran) + b |
|
A = self._get_rot(rotate / 180 * np.pi) |
|
b = torch.Tensor([crop[2] - crop[0], crop[3] - crop[1]]) / 2 |
|
b = A.matmul(-b) + b |
|
ida_rot = A.matmul(ida_rot) |
|
ida_tran = A.matmul(ida_tran) + b |
|
ida_mat = torch.eye(3) |
|
ida_mat[:2, :2] = ida_rot |
|
ida_mat[:2, 2] = ida_tran |
|
return img, ida_mat |
|
|
|
def _sample_augmentation(self): |
|
H, W = self.data_aug_conf['H'], self.data_aug_conf['W'] |
|
fH, fW = self.data_aug_conf['final_dim'] |
|
if self.training: |
|
resize = np.random.uniform(*self.data_aug_conf['resize_lim']) |
|
resize_dims = (int(W * resize), int(H * resize)) |
|
newW, newH = resize_dims |
|
crop_h = int( |
|
(1 - np.random.uniform(*self.data_aug_conf['bot_pct_lim'])) * |
|
newH) - fH |
|
crop_w = int(np.random.uniform(0, max(0, newW - fW))) |
|
crop = (crop_w, crop_h, crop_w + fW, crop_h + fH) |
|
flip = False |
|
if self.data_aug_conf['rand_flip'] and np.random.choice([0, 1]): |
|
flip = True |
|
rotate = np.random.uniform(*self.data_aug_conf['rot_lim']) |
|
else: |
|
resize = max(fH / H, fW / W) |
|
resize_dims = (int(W * resize), int(H * resize)) |
|
newW, newH = resize_dims |
|
crop_h = int( |
|
(1 - np.mean(self.data_aug_conf['bot_pct_lim'])) * newH) - fH |
|
crop_w = int(max(0, newW - fW) / 2) |
|
crop = (crop_w, crop_h, crop_w + fW, crop_h + fH) |
|
flip = False |
|
rotate = 0 |
|
return resize, resize_dims, crop, flip, rotate |
|
|
|
|
|
@TRANSFORMS.register_module() |
|
class GlobalRotScaleTransImage(BaseTransform): |
|
"""Random resize, Crop and flip the image |
|
Args: |
|
size (tuple, optional): Fixed padding size. |
|
""" |
|
|
|
def __init__( |
|
self, |
|
rot_range=[-0.3925, 0.3925], |
|
scale_ratio_range=[0.95, 1.05], |
|
translation_std=[0, 0, 0], |
|
reverse_angle=False, |
|
training=True, |
|
): |
|
|
|
self.rot_range = rot_range |
|
self.scale_ratio_range = scale_ratio_range |
|
self.translation_std = translation_std |
|
|
|
self.reverse_angle = reverse_angle |
|
self.training = training |
|
|
|
def transform(self, results): |
|
"""Call function to pad images, masks, semantic segmentation maps. |
|
|
|
Args: |
|
results (dict): Result dict from loading pipeline. |
|
Returns: |
|
dict: Updated result dict. |
|
""" |
|
|
|
rot_angle = np.random.uniform(*self.rot_range) |
|
|
|
self.rotate_bev_along_z(results, rot_angle) |
|
if self.reverse_angle: |
|
rot_angle *= -1 |
|
results['gt_bboxes_3d'].rotate(np.array(rot_angle)) |
|
|
|
|
|
scale_ratio = np.random.uniform(*self.scale_ratio_range) |
|
self.scale_xyz(results, scale_ratio) |
|
results['gt_bboxes_3d'].scale(scale_ratio) |
|
|
|
|
|
if not self.reverse_angle: |
|
gt_bboxes_3d = results['gt_bboxes_3d'].numpy() |
|
gt_bboxes_3d[:, 6] -= 2 * rot_angle |
|
results['gt_bboxes_3d'] = LiDARInstance3DBoxes( |
|
gt_bboxes_3d, box_dim=9) |
|
|
|
return results |
|
|
|
def rotate_bev_along_z(self, results, angle): |
|
rot_cos = torch.cos(torch.tensor(angle)) |
|
rot_sin = torch.sin(torch.tensor(angle)) |
|
|
|
rot_mat = torch.tensor([[rot_cos, -rot_sin, 0, 0], |
|
[rot_sin, rot_cos, 0, 0], [0, 0, 1, 0], |
|
[0, 0, 0, 1]]) |
|
rot_mat_inv = torch.inverse(rot_mat) |
|
num_view = len(results['lidar2cam']) |
|
for view in range(num_view): |
|
results['lidar2cam'][view] = ( |
|
torch.tensor(np.array(results['lidar2cam'][view]).T).float() |
|
@ rot_mat_inv).T.numpy() |
|
|
|
return |
|
|
|
def scale_xyz(self, results, scale_ratio): |
|
rot_mat = torch.tensor([ |
|
[scale_ratio, 0, 0, 0], |
|
[0, scale_ratio, 0, 0], |
|
[0, 0, scale_ratio, 0], |
|
[0, 0, 0, 1], |
|
]) |
|
|
|
rot_mat_inv = torch.inverse(rot_mat) |
|
|
|
num_view = len(results['lidar2cam']) |
|
for view in range(num_view): |
|
results['lidar2cam'][view] = (torch.tensor( |
|
rot_mat_inv.T |
|
@ results['lidar2cam'][view].T).float()).T.numpy() |
|
|
|
return |
|
|