File size: 6,113 Bytes
f9561b9 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 |
from typing import Tuple
import torch
from torchvision import transforms
import numpy as np
import pandas as pd
def to_numpy(tensor: torch.Tensor):
if torch.is_tensor(tensor):
return tensor.cpu().detach().numpy()
elif type(tensor).__module__ != "numpy":
raise ValueError("Cannot convert {} to numpy array".format(type(tensor)))
return tensor
def to_torch(ndarray: np.ndarray):
if type(ndarray).__module__ == "numpy":
return torch.from_numpy(ndarray)
elif not torch.is_tensor(ndarray):
raise ValueError("Cannot convert {} to torch tensor".format(type(ndarray)))
return ndarray
def get_head_box_channel(
x_min, y_min, x_max, y_max, width, height, resolution, coordconv=False
):
head_box = (
np.array([x_min / width, y_min / height, x_max / width, y_max / height])
* resolution
)
int_head_box = head_box.astype(int)
int_head_box = np.clip(int_head_box, 0, resolution - 1)
if int_head_box[0] == int_head_box[2]:
if int_head_box[0] == 0:
int_head_box[2] = 1
elif int_head_box[2] == resolution - 1:
int_head_box[0] = resolution - 2
elif abs(head_box[2] - int_head_box[2]) > abs(head_box[0] - int_head_box[0]):
int_head_box[2] += 1
else:
int_head_box[0] -= 1
if int_head_box[1] == int_head_box[3]:
if int_head_box[1] == 0:
int_head_box[3] = 1
elif int_head_box[3] == resolution - 1:
int_head_box[1] = resolution - 2
elif abs(head_box[3] - int_head_box[3]) > abs(head_box[1] - int_head_box[1]):
int_head_box[3] += 1
else:
int_head_box[1] -= 1
head_box = int_head_box
if coordconv:
unit = np.array(range(0, resolution), dtype=np.float32)
head_channel = []
for i in unit:
head_channel.append([unit + i])
head_channel = np.squeeze(np.array(head_channel)) / float(np.max(head_channel))
head_channel[head_box[1] : head_box[3], head_box[0] : head_box[2]] = 0
else:
head_channel = np.zeros((resolution, resolution), dtype=np.float32)
head_channel[head_box[1] : head_box[3], head_box[0] : head_box[2]] = 1
head_channel = torch.from_numpy(head_channel)
return head_channel
def draw_labelmap(img, pt, sigma, type="Gaussian"):
# Draw a 2D gaussian
# Adopted from https://github.com/anewell/pose-hg-train/blob/master/src/pypose/draw.py
img = to_numpy(img)
# Check that any part of the gaussian is in-bounds
size = int(6 * sigma + 1)
ul = [int(pt[0] - 3 * sigma), int(pt[1] - 3 * sigma)]
br = [ul[0] + size, ul[1] + size]
if ul[0] >= img.shape[1] or ul[1] >= img.shape[0] or br[0] < 0 or br[1] < 0:
# If not, just return the image as is
return to_torch(img)
# Generate gaussian
x = np.arange(0, size, 1, float)
y = x[:, np.newaxis]
x0 = y0 = size // 2
# The gaussian is not normalized, we want the center value to equal 1
if type == "Gaussian":
g = np.exp(-((x - x0) ** 2 + (y - y0) ** 2) / (2 * sigma**2))
elif type == "Cauchy":
g = sigma / (((x - x0) ** 2 + (y - y0) ** 2 + sigma**2) ** 1.5)
# Usable gaussian range
g_x = max(0, -ul[0]), min(br[0], img.shape[1]) - ul[0]
g_y = max(0, -ul[1]), min(br[1], img.shape[0]) - ul[1]
# Image range
img_x = max(0, ul[0]), min(br[0], img.shape[1])
img_y = max(0, ul[1]), min(br[1], img.shape[0])
img[img_y[0] : img_y[1], img_x[0] : img_x[1]] += g[g_y[0] : g_y[1], g_x[0] : g_x[1]]
# img = img / np.max(img)
return to_torch(img)
def draw_labelmap_no_quant(img, pt, sigma, type="Gaussian"):
img = to_numpy(img)
shape = img.shape
x = np.arange(shape[0])
y = np.arange(shape[1])
xx, yy = np.meshgrid(x, y, indexing="ij")
dist_matrix = (yy - float(pt[0])) ** 2 + (xx - float(pt[1])) ** 2
if type == "Gaussian":
g = np.exp(-dist_matrix / (2 * sigma**2))
elif type == "Cauchy":
g = sigma / ((dist_matrix + sigma**2) ** 1.5)
g[dist_matrix > 10 * sigma**2] = 0
img += g
# img = img / np.max(img)
return to_torch(img)
def multi_hot_targets(gaze_pts, out_res):
w, h = out_res
target_map = np.zeros((h, w))
for p in gaze_pts:
if p[0] >= 0:
x, y = map(int, [p[0] * float(w), p[1] * float(h)])
x = min(x, w - 1)
y = min(y, h - 1)
target_map[y, x] = 1
return target_map
def get_cone(tgt, src, wh, theta=150):
eye = src * wh
gaze = tgt * wh
pixel_mat = np.stack(
np.meshgrid(np.arange(wh[0]), np.arange(wh[1])),
-1,
)
dot_prod = np.sum((pixel_mat - eye) * (gaze - eye), axis=-1)
gaze_vector_norm = np.sqrt(np.sum((gaze - eye) ** 2))
pixel_mat_norm = np.sqrt(np.sum((pixel_mat - eye) ** 2, axis=-1))
gaze_cones = dot_prod / (gaze_vector_norm * pixel_mat_norm)
gaze_cones = np.nan_to_num(gaze_cones, nan=1)
theta = theta * (np.pi / 180)
beta = np.arccos(gaze_cones)
# Create mask where true if beta is less than theta/2
pixel_mat_presence = beta < (theta / 2)
# Zero out values outside the gaze cone
gaze_cones[~pixel_mat_presence] = 0
gaze_cones = np.clip(gaze_cones, 0, None)
return torch.from_numpy(gaze_cones).unsqueeze(0).float()
def get_transform(
input_resolution: int, mean: Tuple[int, int, int], std: Tuple[int, int, int]
):
return transforms.Compose(
[
transforms.Resize((input_resolution, input_resolution)),
transforms.ToTensor(),
transforms.Normalize(mean=mean, std=std),
]
)
def smooth_by_conv(window_size, df, col):
padded_track = pd.concat(
[
pd.DataFrame([[df.iloc[0][col]]] * (window_size // 2), columns=[0]),
df[col],
pd.DataFrame([[df.iloc[-1][col]]] * (window_size // 2), columns=[0]),
]
)
smoothed_signals = np.convolve(
padded_track.squeeze(), np.ones(window_size) / window_size, mode="valid"
)
return smoothed_signals
|