ViTGaze / tools /utils.py

initial commit

f9561b9 verified 5 months ago

5.46 kB

	from typing import Union, Iterable, Tuple
	import numpy as np
	import torch
	import cv2
	from sklearn.metrics import roc_auc_score
	from sklearn.metrics import average_precision_score


	def auc(heatmap, onehot_im, is_im=True):
	if is_im:
	auc_score = roc_auc_score(
	np.reshape(onehot_im, onehot_im.size), np.reshape(heatmap, heatmap.size)
	)
	else:
	auc_score = roc_auc_score(onehot_im, heatmap)
	return auc_score


	def ap(label, pred):
	return average_precision_score(label, pred)


	def argmax_pts(heatmap):
	idx = np.unravel_index(heatmap.argmax(), heatmap.shape)
	pred_y, pred_x = map(float, idx)
	return pred_x, pred_y


	def L2_dist(p1, p2):
	return np.sqrt((p1[0] - p2[0]) 2 + (p1[1] - p2[1]) 2)


	def multi_hot_targets(gaze_pts, out_res):
	w, h = out_res
	target_map = np.zeros((h, w))
	for p in gaze_pts:
	if p[0] >= 0:
	x, y = map(int, [p[0] * w.float(), p[1] * h.float()])
	x = min(x, w - 1)
	y = min(y, h - 1)
	target_map[y, x] = 1
	return target_map


	def inverse_transform(tensor: torch.Tensor) -> np.ndarray:
	tensor = tensor.detach().cpu().permute(0, 2, 3, 1)
	mean = torch.tensor([0.485, 0.456, 0.406])
	std = torch.tensor([0.229, 0.224, 0.225])
	tensor = tensor * std + mean
	return cv2.cvtColor((tensor.numpy() * 255).astype(np.uint8)[0], cv2.COLOR_RGB2BGR)


	def draw(data, heatmap, out_path, on_img=True):
	img = inverse_transform(data["images"])
	head_channel = cv2.applyColorMap(
	(data["head_channels"].squeeze().detach().cpu().numpy() * 255).astype(np.uint8),
	cv2.COLORMAP_BONE,
	)
	hm = cv2.applyColorMap((heatmap * 255).astype(np.uint8), cv2.COLORMAP_JET)
	heatmap = hm
	heatmap = cv2.resize(heatmap, (img.shape[1], img.shape[0]))
	if on_img:
	img = cv2.addWeighted(img, 1, heatmap, 0.5, 1)
	else:
	img = heatmap
	# img = cv2.addWeighted(img, 1, head_channel, 0.1, 1)
	cv2.imwrite(out_path, img)


	def draw_origin_img(data, out_path):
	img = inverse_transform(data["images"])
	hm = cv2.applyColorMap(
	(data["heatmaps"].squeeze().detach().cpu().numpy() * 255).astype(np.uint8),
	cv2.COLORMAP_JET,
	)
	hm[data["heatmaps"].squeeze().detach().cpu().numpy() == 0] = 0
	hm = cv2.resize(hm, (img.shape[1], img.shape[0]))
	head_channel = cv2.applyColorMap(
	(data["head_channels"].squeeze().detach().cpu().numpy() * 255).astype(np.uint8),
	cv2.COLORMAP_BONE,
	)
	head_channel[data["head_channels"].squeeze().detach().cpu().numpy() < 0.1] = 0
	hm = cv2.resize(hm, (img.shape[1], img.shape[0]))
	ori = cv2.addWeighted(img, 1, hm, 0.5, 1)
	ori = cv2.addWeighted(ori, 1, head_channel, 0.1, 1)
	cv2.imwrite(out_path, ori)


	class __Image2MP4:
	def __init__(self):
	self.Fourcc = cv2.VideoWriter_fourcc(*"mp4v")

	def __call__(
	self,
	frames: Union[Iterable[np.ndarray], str],
	path: str,
	fps: float = 30.0,
	isize: Tuple[int, int] = None,
	):
	if isinstance(frames, str): # directory of img files
	from os import listdir, path as osp

	imgs = sorted(listdir(frames))
	frames = [
	cv2.imread(osp.join(frames, img), cv2.IMREAD_COLOR) for img in imgs
	]

	if isize is None:
	isize = (frames[0].shape[1], frames[0].shape[0])

	output_video = cv2.VideoWriter(path, self.Fourcc, fps, isize)
	for frame in frames:
	frame = cv2.resize(frame, isize)
	output_video.write(frame)
	output_video.release()


	img2mp4 = __Image2MP4()


	def dark_inference(heatmap: np.ndarray, gaussian_kernel: int = 39):
	pred_x, pred_y = argmax_pts(heatmap)
	pred_x, pred_y = int(pred_x), int(pred_y)
	height, width = heatmap.shape[-2:]
	# Gaussian blur
	orig_max = heatmap.max()
	border = (gaussian_kernel - 1) // 2
	dr = np.zeros((height + 2 * border, width + 2 * border))
	dr[border:-border, border:-border] = heatmap.copy()
	dr = cv2.GaussianBlur(dr, (gaussian_kernel, gaussian_kernel), 0)
	heatmap = dr[border:-border, border:-border].copy()
	heatmap *= orig_max / np.max(heatmap)
	# Log-likelihood
	heatmap = np.maximum(heatmap, 1e-10)
	heatmap = np.log(heatmap)
	# DARK
	if 1 < pred_x < width - 2 and 1 < pred_y < height - 2:
	dx = 0.5 * (heatmap[pred_y][pred_x + 1] - heatmap[pred_y][pred_x - 1])
	dy = 0.5 * (heatmap[pred_y + 1][pred_x] - heatmap[pred_y - 1][pred_x])
	dxx = 0.25 * (
	heatmap[pred_y][pred_x + 2]
	- 2 * heatmap[pred_y][pred_x]
	+ heatmap[pred_y][pred_x - 2]
	)
	dxy = 0.25 * (
	heatmap[pred_y + 1][pred_x + 1]
	- heatmap[pred_y - 1][pred_x + 1]
	- heatmap[pred_y + 1][pred_x - 1]
	+ heatmap[pred_y - 1][pred_x - 1]
	)
	dyy = 0.25 * (
	heatmap[pred_y + 2][pred_x]
	- 2 * heatmap[pred_y][pred_x]
	+ heatmap[pred_y - 2][pred_x]
	)
	derivative = np.matrix([[dx],[dy]])
	hessian = np.matrix([[dxx,dxy],[dxy,dyy]])
	if dxx * dyy - dxy ** 2 != 0:
	hessianinv = hessian.I
	offset = -hessianinv * derivative
	offset_x, offset_y = np.squeeze(np.array(offset.T), axis=0)
	pred_x += offset_x
	pred_y += offset_y
	return pred_x, pred_y