ViTGaze / tools /utils.py
yhsong's picture
initial commit
f9561b9 verified
raw
history blame
5.46 kB
from typing import Union, Iterable, Tuple
import numpy as np
import torch
import cv2
from sklearn.metrics import roc_auc_score
from sklearn.metrics import average_precision_score
def auc(heatmap, onehot_im, is_im=True):
if is_im:
auc_score = roc_auc_score(
np.reshape(onehot_im, onehot_im.size), np.reshape(heatmap, heatmap.size)
)
else:
auc_score = roc_auc_score(onehot_im, heatmap)
return auc_score
def ap(label, pred):
return average_precision_score(label, pred)
def argmax_pts(heatmap):
idx = np.unravel_index(heatmap.argmax(), heatmap.shape)
pred_y, pred_x = map(float, idx)
return pred_x, pred_y
def L2_dist(p1, p2):
return np.sqrt((p1[0] - p2[0]) ** 2 + (p1[1] - p2[1]) ** 2)
def multi_hot_targets(gaze_pts, out_res):
w, h = out_res
target_map = np.zeros((h, w))
for p in gaze_pts:
if p[0] >= 0:
x, y = map(int, [p[0] * w.float(), p[1] * h.float()])
x = min(x, w - 1)
y = min(y, h - 1)
target_map[y, x] = 1
return target_map
def inverse_transform(tensor: torch.Tensor) -> np.ndarray:
tensor = tensor.detach().cpu().permute(0, 2, 3, 1)
mean = torch.tensor([0.485, 0.456, 0.406])
std = torch.tensor([0.229, 0.224, 0.225])
tensor = tensor * std + mean
return cv2.cvtColor((tensor.numpy() * 255).astype(np.uint8)[0], cv2.COLOR_RGB2BGR)
def draw(data, heatmap, out_path, on_img=True):
img = inverse_transform(data["images"])
head_channel = cv2.applyColorMap(
(data["head_channels"].squeeze().detach().cpu().numpy() * 255).astype(np.uint8),
cv2.COLORMAP_BONE,
)
hm = cv2.applyColorMap((heatmap * 255).astype(np.uint8), cv2.COLORMAP_JET)
heatmap = hm
heatmap = cv2.resize(heatmap, (img.shape[1], img.shape[0]))
if on_img:
img = cv2.addWeighted(img, 1, heatmap, 0.5, 1)
else:
img = heatmap
# img = cv2.addWeighted(img, 1, head_channel, 0.1, 1)
cv2.imwrite(out_path, img)
def draw_origin_img(data, out_path):
img = inverse_transform(data["images"])
hm = cv2.applyColorMap(
(data["heatmaps"].squeeze().detach().cpu().numpy() * 255).astype(np.uint8),
cv2.COLORMAP_JET,
)
hm[data["heatmaps"].squeeze().detach().cpu().numpy() == 0] = 0
hm = cv2.resize(hm, (img.shape[1], img.shape[0]))
head_channel = cv2.applyColorMap(
(data["head_channels"].squeeze().detach().cpu().numpy() * 255).astype(np.uint8),
cv2.COLORMAP_BONE,
)
head_channel[data["head_channels"].squeeze().detach().cpu().numpy() < 0.1] = 0
hm = cv2.resize(hm, (img.shape[1], img.shape[0]))
ori = cv2.addWeighted(img, 1, hm, 0.5, 1)
ori = cv2.addWeighted(ori, 1, head_channel, 0.1, 1)
cv2.imwrite(out_path, ori)
class __Image2MP4:
def __init__(self):
self.Fourcc = cv2.VideoWriter_fourcc(*"mp4v")
def __call__(
self,
frames: Union[Iterable[np.ndarray], str],
path: str,
fps: float = 30.0,
isize: Tuple[int, int] = None,
):
if isinstance(frames, str): # directory of img files
from os import listdir, path as osp
imgs = sorted(listdir(frames))
frames = [
cv2.imread(osp.join(frames, img), cv2.IMREAD_COLOR) for img in imgs
]
if isize is None:
isize = (frames[0].shape[1], frames[0].shape[0])
output_video = cv2.VideoWriter(path, self.Fourcc, fps, isize)
for frame in frames:
frame = cv2.resize(frame, isize)
output_video.write(frame)
output_video.release()
img2mp4 = __Image2MP4()
def dark_inference(heatmap: np.ndarray, gaussian_kernel: int = 39):
pred_x, pred_y = argmax_pts(heatmap)
pred_x, pred_y = int(pred_x), int(pred_y)
height, width = heatmap.shape[-2:]
# Gaussian blur
orig_max = heatmap.max()
border = (gaussian_kernel - 1) // 2
dr = np.zeros((height + 2 * border, width + 2 * border))
dr[border:-border, border:-border] = heatmap.copy()
dr = cv2.GaussianBlur(dr, (gaussian_kernel, gaussian_kernel), 0)
heatmap = dr[border:-border, border:-border].copy()
heatmap *= orig_max / np.max(heatmap)
# Log-likelihood
heatmap = np.maximum(heatmap, 1e-10)
heatmap = np.log(heatmap)
# DARK
if 1 < pred_x < width - 2 and 1 < pred_y < height - 2:
dx = 0.5 * (heatmap[pred_y][pred_x + 1] - heatmap[pred_y][pred_x - 1])
dy = 0.5 * (heatmap[pred_y + 1][pred_x] - heatmap[pred_y - 1][pred_x])
dxx = 0.25 * (
heatmap[pred_y][pred_x + 2]
- 2 * heatmap[pred_y][pred_x]
+ heatmap[pred_y][pred_x - 2]
)
dxy = 0.25 * (
heatmap[pred_y + 1][pred_x + 1]
- heatmap[pred_y - 1][pred_x + 1]
- heatmap[pred_y + 1][pred_x - 1]
+ heatmap[pred_y - 1][pred_x - 1]
)
dyy = 0.25 * (
heatmap[pred_y + 2][pred_x]
- 2 * heatmap[pred_y][pred_x]
+ heatmap[pred_y - 2][pred_x]
)
derivative = np.matrix([[dx],[dy]])
hessian = np.matrix([[dxx,dxy],[dxy,dyy]])
if dxx * dyy - dxy ** 2 != 0:
hessianinv = hessian.I
offset = -hessianinv * derivative
offset_x, offset_y = np.squeeze(np.array(offset.T), axis=0)
pred_x += offset_x
pred_y += offset_y
return pred_x, pred_y