# Copyright (c) OpenMMLab. All rights reserved. from typing import Dict, List, Tuple import numpy as np from mmengine.logging import MMLogger, print_log PQReturnsType = Tuple[np.double, np.double, np.ndarray, np.ndarray, np.ndarray] class EvalPanoptic: r"""Evaluate panoptic results for Semantickitti and NuScenes. Please refer to the `semantic kitti api `_ for more details Args: classes (list): Classes used in the dataset. thing_classes (list): Thing classes used in the dataset. stuff_classes (list): Stuff classes used in the dataset. min_num_points (int): Minimum number of points of an object to be counted as ground truth in evaluation. id_offset (int): Offset for instance ids to concat with semantic labels. label2cat (dict[str]): Mapping from label to category. ignore_index (list[int]): Indices of ignored classes in evaluation. logger (logging.Logger | str, optional): Logger used for printing. Defaults to None. """ def __init__(self, classes: List[str], thing_classes: List[str], stuff_classes: List[str], min_num_points: int, id_offset: int, label2cat: Dict[str, str], ignore_index: List[str], logger: MMLogger = None): self.classes = classes self.thing_classes = thing_classes self.stuff_classes = stuff_classes self.ignore_index = np.array(ignore_index, dtype=int) self.num_classes = len(classes) self.label2cat = label2cat self.logger = logger self.include = np.array( [n for n in range(self.num_classes) if n not in self.ignore_index], dtype=int) self.id_offset = id_offset self.eps = 1e-15 self.min_num_points = min_num_points self.reset() def reset(self): """Reset class variables.""" # general things # iou stuff self.confusion_matrix = np.zeros((self.num_classes, self.num_classes), dtype=int) # panoptic stuff self.pan_tp = np.zeros(self.num_classes, dtype=int) self.pan_iou = np.zeros(self.num_classes, dtype=np.double) self.pan_fp = np.zeros(self.num_classes, dtype=int) self.pan_fn = np.zeros(self.num_classes, dtype=int) self.evaluated_fnames = [] def evaluate(self, gt_labels: List[Dict[str, np.ndarray]], seg_preds: List[Dict[str, np.ndarray]]) -> Dict[str, float]: """Evaluate the predictions. Args: gt_labels (list[dict[np.ndarray]]): Ground Truth. seg_preds (list[dict[np.ndarray]]): Predictions. Returns: dict[float]: The computed metrics. The keys are the names of the metrics, and the values are corresponding results. """ assert len(seg_preds) == len(gt_labels) for f in range(len(seg_preds)): gt_semantic_seg = gt_labels[f]['pts_semantic_mask'].astype(int) gt_instance_seg = gt_labels[f]['pts_instance_mask'].astype(int) pred_semantic_seg = seg_preds[f]['pts_semantic_mask'].astype(int) pred_instance_seg = seg_preds[f]['pts_instance_mask'].astype(int) self.add_semantic_sample(pred_semantic_seg, gt_semantic_seg) self.add_panoptic_sample(pred_semantic_seg, gt_semantic_seg, pred_instance_seg, gt_instance_seg) result_dicts = self.print_results() return result_dicts def print_results(self) -> Dict[str, float]: """Print results. Returns: dict[float]: The computed metrics. The keys are the names of the metrics, and the values are corresponding results. """ pq, sq, rq, all_pq, all_sq, all_rq = self.get_pq() miou, iou = self.get_iou() # now make a nice dictionary output_dict = {} # make python variables pq = pq.item() sq = sq.item() rq = rq.item() all_pq = all_pq.flatten().tolist() all_sq = all_sq.flatten().tolist() all_rq = all_rq.flatten().tolist() miou = miou.item() iou = iou.flatten().tolist() output_dict['all'] = {} output_dict['all']['pq'] = pq output_dict['all']['sq'] = sq output_dict['all']['rq'] = rq output_dict['all']['miou'] = miou for idx, (_pq, _sq, _rq, _iou) in enumerate(zip(all_pq, all_sq, all_rq, iou)): class_str = self.classes[idx] output_dict[class_str] = {} output_dict[class_str]['pq'] = _pq output_dict[class_str]['sq'] = _sq output_dict[class_str]['rq'] = _rq output_dict[class_str]['miou'] = _iou pq_dagger = np.mean( [float(output_dict[c]['pq']) for c in self.thing_classes] + [float(output_dict[c]['miou']) for c in self.stuff_classes]) pq_things = np.mean( [float(output_dict[c]['pq']) for c in self.thing_classes]) rq_things = np.mean( [float(output_dict[c]['rq']) for c in self.thing_classes]) sq_things = np.mean( [float(output_dict[c]['sq']) for c in self.thing_classes]) pq_stuff = np.mean( [float(output_dict[c]['pq']) for c in self.stuff_classes]) rq_stuff = np.mean( [float(output_dict[c]['rq']) for c in self.stuff_classes]) sq_stuff = np.mean( [float(output_dict[c]['sq']) for c in self.stuff_classes]) result_dicts = {} result_dicts['pq'] = float(pq) result_dicts['pq_dagger'] = float(pq_dagger) result_dicts['sq_mean'] = float(sq) result_dicts['rq_mean'] = float(rq) result_dicts['miou'] = float(miou) result_dicts['pq_stuff'] = float(pq_stuff) result_dicts['rq_stuff'] = float(rq_stuff) result_dicts['sq_stuff'] = float(sq_stuff) result_dicts['pq_things'] = float(pq_things) result_dicts['rq_things'] = float(rq_things) result_dicts['sq_things'] = float(sq_things) if self.logger is not None: print_log('| | IoU | PQ | RQ | SQ |', self.logger) for k, v in output_dict.items(): print_log( '|{}| {:.4f} | {:.4f} | {:.4f} | {:.4f} |'.format( k.ljust(8)[-8:], v['miou'], v['pq'], v['rq'], v['sq']), self.logger) print_log('True Positive: ', self.logger) print_log('\t|\t'.join([str(x) for x in self.pan_tp]), self.logger) print_log('False Positive: ') print_log('\t|\t'.join([str(x) for x in self.pan_fp]), self.logger) print_log('False Negative: ') print_log('\t|\t'.join([str(x) for x in self.pan_fn]), self.logger) else: print('| | IoU | PQ | RQ | SQ |') for k, v in output_dict.items(): print('|{}| {:.4f} | {:.4f} | {:.4f} | {:.4f} |'.format( k.ljust(8)[-8:], v['miou'], v['pq'], v['rq'], v['sq'])) print('True Positive: ') print('\t|\t'.join([str(x) for x in self.pan_tp])) print('False Positive: ') print('\t|\t'.join([str(x) for x in self.pan_fp])) print('False Negative: ') print('\t|\t'.join([str(x) for x in self.pan_fn])) return result_dicts def get_pq(self) -> PQReturnsType: """Get results of PQ metric. Returns: tuple(np.ndarray): PQ, SQ, RQ of each class and all class. """ # get PQ and first calculate for all classes sq_all = self.pan_iou.astype(np.double) / np.maximum( self.pan_tp.astype(np.double), self.eps) rq_all = self.pan_tp.astype(np.double) / np.maximum( self.pan_tp.astype(np.double) + 0.5 * self.pan_fp.astype(np.double) + 0.5 * self.pan_fn.astype(np.double), self.eps) pq_all = sq_all * rq_all # then do the REAL mean (no ignored classes) sq = sq_all[self.include].mean() rq = rq_all[self.include].mean() pq = pq_all[self.include].mean() return (pq, sq, rq, pq_all, sq_all, rq_all) def get_iou(self) -> Tuple[np.double, np.ndarray]: """Get results of IOU metric. Returns: tuple(np.ndarray): iou of all class and each class. """ tp, fp, fn = self.get_iou_stats() intersection = tp union = tp + fp + fn union = np.maximum(union, self.eps) iou = intersection.astype(np.double) / union.astype(np.double) iou_mean = (intersection[self.include].astype(np.double) / union[self.include].astype(np.double)).mean() return iou_mean, iou def get_iou_stats(self) -> Tuple[np.ndarray, np.ndarray, np.ndarray]: """Get IOU statistics of TP, FP and FN. Returns: tuple(np.ndarray): TP, FP, FN of all class. """ # copy to avoid modifying the real deal conf = self.confusion_matrix.copy().astype(np.double) # remove fp from confusion on the ignore classes predictions # points that were predicted of another class, but were ignore # (corresponds to zeroing the cols of those classes, # since the predictions go on the rows) conf[:, self.ignore_index] = 0 # get the clean stats tp = conf.diagonal() fp = conf.sum(axis=1) - tp fn = conf.sum(axis=0) - tp return tp, fp, fn def add_semantic_sample(self, semantic_preds: np.ndarray, gt_semantics: np.ndarray): """Add one batch of semantic predictions and ground truths. Args: semantic_preds (np.ndarray): Semantic predictions. gt_semantics (np.ndarray): Semantic ground truths. """ idxs = np.stack([semantic_preds, gt_semantics], axis=0) # make confusion matrix (cols = gt, rows = pred) np.add.at(self.confusion_matrix, tuple(idxs), 1) def add_panoptic_sample(self, semantic_preds: np.ndarray, gt_semantics: np.ndarray, instance_preds: np.ndarray, gt_instances: np.ndarray): """Add one sample of panoptic predictions and ground truths for evaluation. Args: semantic_preds (np.ndarray): Semantic predictions. gt_semantics (np.ndarray): Semantic ground truths. instance_preds (np.ndarray): Instance predictions. gt_instances (np.ndarray): Instance ground truths. """ # avoid zero (ignored label) instance_preds = instance_preds + 1 gt_instances = gt_instances + 1 # only interested in points that are # outside the void area (not in excluded classes) for cl in self.ignore_index: # make a mask for this class gt_not_in_excl_mask = gt_semantics != cl # remove all other points semantic_preds = semantic_preds[gt_not_in_excl_mask] gt_semantics = gt_semantics[gt_not_in_excl_mask] instance_preds = instance_preds[gt_not_in_excl_mask] gt_instances = gt_instances[gt_not_in_excl_mask] # first step is to count intersections > 0.5 IoU # for each class (except the ignored ones) for cl in self.include: # get a class mask pred_inst_in_cl_mask = semantic_preds == cl gt_inst_in_cl_mask = gt_semantics == cl # get instance points in class (makes outside stuff 0) pred_inst_in_cl = instance_preds * pred_inst_in_cl_mask.astype(int) gt_inst_in_cl = gt_instances * gt_inst_in_cl_mask.astype(int) # generate the areas for each unique instance prediction unique_pred, counts_pred = np.unique( pred_inst_in_cl[pred_inst_in_cl > 0], return_counts=True) id2idx_pred = {id: idx for idx, id in enumerate(unique_pred)} matched_pred = np.array([False] * unique_pred.shape[0]) # generate the areas for each unique instance gt_np unique_gt, counts_gt = np.unique( gt_inst_in_cl[gt_inst_in_cl > 0], return_counts=True) id2idx_gt = {id: idx for idx, id in enumerate(unique_gt)} matched_gt = np.array([False] * unique_gt.shape[0]) # generate intersection using offset valid_combos = np.logical_and(pred_inst_in_cl > 0, gt_inst_in_cl > 0) id_offset_combo = pred_inst_in_cl[ valid_combos] + self.id_offset * gt_inst_in_cl[valid_combos] unique_combo, counts_combo = np.unique( id_offset_combo, return_counts=True) # generate an intersection map # count the intersections with over 0.5 IoU as TP gt_labels = unique_combo // self.id_offset pred_labels = unique_combo % self.id_offset gt_areas = np.array([counts_gt[id2idx_gt[id]] for id in gt_labels]) pred_areas = np.array( [counts_pred[id2idx_pred[id]] for id in pred_labels]) intersections = counts_combo unions = gt_areas + pred_areas - intersections ious = intersections.astype(float) / unions.astype(float) tp_indexes = ious > 0.5 self.pan_tp[cl] += np.sum(tp_indexes) self.pan_iou[cl] += np.sum(ious[tp_indexes]) matched_gt[[id2idx_gt[id] for id in gt_labels[tp_indexes]]] = True matched_pred[[id2idx_pred[id] for id in pred_labels[tp_indexes]]] = True # count the FN if len(counts_gt) > 0: self.pan_fn[cl] += np.sum( np.logical_and(counts_gt >= self.min_num_points, ~matched_gt)) # count the FP if len(matched_pred) > 0: self.pan_fp[cl] += np.sum( np.logical_and(counts_pred >= self.min_num_points, ~matched_pred)) def panoptic_seg_eval(gt_labels: List[np.ndarray], seg_preds: List[np.ndarray], classes: List[str], thing_classes: List[str], stuff_classes: List[str], min_num_points: int, id_offset: int, label2cat: Dict[str, str], ignore_index: List[int], logger: MMLogger = None) -> Dict[str, float]: """Panoptic Segmentation Evaluation. Evaluate the result of the panoptic segmentation. Args: gt_labels (list[dict[np.ndarray]]): Ground Truth. seg_preds (list[dict[np.ndarray]]): Predictions. classes (list[str]): Classes used in the dataset. thing_classes (list[str]): Thing classes used in the dataset. stuff_classes (list[str]): Stuff classes used in the dataset. min_num_points (int): Minimum point number of object to be counted as ground truth in evaluation. id_offset (int): Offset for instance ids to concat with semantic labels. label2cat (dict[str]): Mapping from label to category. ignore_index (list[int]): Indices of ignored classes in evaluation. logger (logging.Logger | str, optional): Logger used for printing. Defaults to None. Returns: dict[float]: Dict of results. """ panoptic_seg_eval = EvalPanoptic(classes, thing_classes, stuff_classes, min_num_points, id_offset, label2cat, ignore_index, logger) ret_dict = panoptic_seg_eval.evaluate(gt_labels, seg_preds) return ret_dict