# Copyright (c) OpenMMLab. All rights reserved.
# adapted from https://github.com/ScanNet/ScanNet/blob/master/BenchmarkScripts/3d_evaluation/evaluate_semantic_instance.py # noqa
from copy import deepcopy

import numpy as np

from . import util_3d


def evaluate_matches(matches, class_labels, options):
    """Evaluate instance segmentation from matched gt and predicted instances
    for all scenes.

    Args:
        matches (dict): Contains gt2pred and pred2gt infos for every scene.
        class_labels (tuple[str]): Class names.
        options (dict): ScanNet evaluator options. See get_options.

    Returns:
        np.array: Average precision scores for all thresholds and categories.
    """
    overlaps = options['overlaps']
    min_region_sizes = [options['min_region_sizes'][0]]
    dist_threshes = [options['distance_threshes'][0]]
    dist_confs = [options['distance_confs'][0]]

    # results: class x overlap
    ap = np.zeros((len(dist_threshes), len(class_labels), len(overlaps)))
    for di, (min_region_size, distance_thresh, distance_conf) in enumerate(
            zip(min_region_sizes, dist_threshes, dist_confs)):
        for oi, overlap_th in enumerate(overlaps):
            pred_visited = {}
            for m in matches:
                for label_name in class_labels:
                    for p in matches[m]['pred'][label_name]:
                        if 'filename' in p:
                            pred_visited[p['filename']] = False
            for li, label_name in enumerate(class_labels):
                y_true = np.empty(0)
                y_score = np.empty(0)
                hard_false_negatives = 0
                has_gt = False
                has_pred = False
                for m in matches:
                    pred_instances = matches[m]['pred'][label_name]
                    gt_instances = matches[m]['gt'][label_name]
                    # filter groups in ground truth
                    gt_instances = [
                        gt for gt in gt_instances
                        if gt['instance_id'] >= 1000 and gt['vert_count'] >=
                        min_region_size and gt['med_dist'] <= distance_thresh
                        and gt['dist_conf'] >= distance_conf
                    ]
                    if gt_instances:
                        has_gt = True
                    if pred_instances:
                        has_pred = True

                    cur_true = np.ones(len(gt_instances))
                    cur_score = np.ones(len(gt_instances)) * (-float('inf'))
                    cur_match = np.zeros(len(gt_instances), dtype=bool)
                    # collect matches
                    for (gti, gt) in enumerate(gt_instances):
                        found_match = False
                        for pred in gt['matched_pred']:
                            # greedy assignments
                            if pred_visited[pred['filename']]:
                                continue
                            overlap = float(pred['intersection']) / (
                                gt['vert_count'] + pred['vert_count'] -
                                pred['intersection'])
                            if overlap > overlap_th:
                                confidence = pred['confidence']
                                # if already have a prediction for this gt,
                                # the prediction with the lower score is automatically a false positive # noqa
                                if cur_match[gti]:
                                    max_score = max(cur_score[gti], confidence)
                                    min_score = min(cur_score[gti], confidence)
                                    cur_score[gti] = max_score
                                    # append false positive
                                    cur_true = np.append(cur_true, 0)
                                    cur_score = np.append(cur_score, min_score)
                                    cur_match = np.append(cur_match, True)
                                # otherwise set score
                                else:
                                    found_match = True
                                    cur_match[gti] = True
                                    cur_score[gti] = confidence
                                    pred_visited[pred['filename']] = True
                        if not found_match:
                            hard_false_negatives += 1
                    # remove non-matched ground truth instances
                    cur_true = cur_true[cur_match]
                    cur_score = cur_score[cur_match]

                    # collect non-matched predictions as false positive
                    for pred in pred_instances:
                        found_gt = False
                        for gt in pred['matched_gt']:
                            overlap = float(gt['intersection']) / (
                                gt['vert_count'] + pred['vert_count'] -
                                gt['intersection'])
                            if overlap > overlap_th:
                                found_gt = True
                                break
                        if not found_gt:
                            num_ignore = pred['void_intersection']
                            for gt in pred['matched_gt']:
                                # group?
                                if gt['instance_id'] < 1000:
                                    num_ignore += gt['intersection']
                                # small ground truth instances
                                if gt['vert_count'] < min_region_size or gt[
                                        'med_dist'] > distance_thresh or gt[
                                            'dist_conf'] < distance_conf:
                                    num_ignore += gt['intersection']
                            proportion_ignore = float(
                                num_ignore) / pred['vert_count']
                            # if not ignored append false positive
                            if proportion_ignore <= overlap_th:
                                cur_true = np.append(cur_true, 0)
                                confidence = pred['confidence']
                                cur_score = np.append(cur_score, confidence)

                    # append to overall results
                    y_true = np.append(y_true, cur_true)
                    y_score = np.append(y_score, cur_score)

                # compute average precision
                if has_gt and has_pred:
                    # compute precision recall curve first

                    # sorting and cumsum
                    score_arg_sort = np.argsort(y_score)
                    y_score_sorted = y_score[score_arg_sort]
                    y_true_sorted = y_true[score_arg_sort]
                    y_true_sorted_cumsum = np.cumsum(y_true_sorted)

                    # unique thresholds
                    (thresholds, unique_indices) = np.unique(
                        y_score_sorted, return_index=True)
                    num_prec_recall = len(unique_indices) + 1

                    # prepare precision recall
                    num_examples = len(y_score_sorted)
                    # follow https://github.com/ScanNet/ScanNet/pull/26 ? # noqa
                    num_true_examples = y_true_sorted_cumsum[-1] if len(
                        y_true_sorted_cumsum) > 0 else 0
                    precision = np.zeros(num_prec_recall)
                    recall = np.zeros(num_prec_recall)

                    # deal with the first point
                    y_true_sorted_cumsum = np.append(y_true_sorted_cumsum, 0)
                    # deal with remaining
                    for idx_res, idx_scores in enumerate(unique_indices):
                        cumsum = y_true_sorted_cumsum[idx_scores - 1]
                        tp = num_true_examples - cumsum
                        fp = num_examples - idx_scores - tp
                        fn = cumsum + hard_false_negatives
                        p = float(tp) / (tp + fp)
                        r = float(tp) / (tp + fn)
                        precision[idx_res] = p
                        recall[idx_res] = r

                    # first point in curve is artificial
                    precision[-1] = 1.
                    recall[-1] = 0.

                    # compute average of precision-recall curve
                    recall_for_conv = np.copy(recall)
                    recall_for_conv = np.append(recall_for_conv[0],
                                                recall_for_conv)
                    recall_for_conv = np.append(recall_for_conv, 0.)

                    stepWidths = np.convolve(recall_for_conv, [-0.5, 0, 0.5],
                                             'valid')
                    # integrate is now simply a dot product
                    ap_current = np.dot(precision, stepWidths)

                elif has_gt:
                    ap_current = 0.0
                else:
                    ap_current = float('nan')
                ap[di, li, oi] = ap_current
    return ap


def compute_averages(aps, options, class_labels):
    """Averages AP scores for all categories.

    Args:
        aps (np.array): AP scores for all thresholds and categories.
        options (dict): ScanNet evaluator options. See get_options.
        class_labels (tuple[str]): Class names.

    Returns:
        dict: Overall and per-category AP scores.
    """
    d_inf = 0
    o50 = np.where(np.isclose(options['overlaps'], 0.5))
    o25 = np.where(np.isclose(options['overlaps'], 0.25))
    o_all_but25 = np.where(
        np.logical_not(np.isclose(options['overlaps'], 0.25)))
    avg_dict = {}
    avg_dict['all_ap'] = np.nanmean(aps[d_inf, :, o_all_but25])
    avg_dict['all_ap_50%'] = np.nanmean(aps[d_inf, :, o50])
    avg_dict['all_ap_25%'] = np.nanmean(aps[d_inf, :, o25])
    avg_dict['classes'] = {}
    for (li, label_name) in enumerate(class_labels):
        avg_dict['classes'][label_name] = {}
        avg_dict['classes'][label_name]['ap'] = np.average(aps[d_inf, li,
                                                               o_all_but25])
        avg_dict['classes'][label_name]['ap50%'] = np.average(aps[d_inf, li,
                                                                  o50])
        avg_dict['classes'][label_name]['ap25%'] = np.average(aps[d_inf, li,
                                                                  o25])
    return avg_dict


def assign_instances_for_scan(pred_info, gt_ids, options, valid_class_ids,
                              class_labels, id_to_label):
    """Assign gt and predicted instances for a single scene.

    Args:
        pred_info (dict): Predicted masks, labels and scores.
        gt_ids (np.array): Ground truth instance masks.
        options (dict): ScanNet evaluator options. See get_options.
        valid_class_ids (tuple[int]): Ids of valid categories.
        class_labels (tuple[str]): Class names.
        id_to_label (dict[int, str]): Mapping of valid class id to class label.

    Returns:
        dict: Per class assigned gt to predicted instances.
        dict: Per class assigned predicted to gt instances.
    """
    # get gt instances
    gt_instances = util_3d.get_instances(gt_ids, valid_class_ids, class_labels,
                                         id_to_label)
    # associate
    gt2pred = deepcopy(gt_instances)
    for label in gt2pred:
        for gt in gt2pred[label]:
            gt['matched_pred'] = []
    pred2gt = {}
    for label in class_labels:
        pred2gt[label] = []
    num_pred_instances = 0
    # mask of void labels in the ground truth
    bool_void = np.logical_not(np.in1d(gt_ids // 1000, valid_class_ids))
    # go through all prediction masks
    for pred_mask_file in pred_info:
        label_id = int(pred_info[pred_mask_file]['label_id'])
        conf = pred_info[pred_mask_file]['conf']
        if not label_id in id_to_label:  # noqa E713
            continue
        label_name = id_to_label[label_id]
        # read the mask
        pred_mask = pred_info[pred_mask_file]['mask']
        if len(pred_mask) != len(gt_ids):
            raise ValueError('len(pred_mask) != len(gt_ids)')
        # convert to binary
        pred_mask = np.not_equal(pred_mask, 0)
        num = np.count_nonzero(pred_mask)
        if num < options['min_region_sizes'][0]:
            continue  # skip if empty

        pred_instance = {}
        pred_instance['filename'] = pred_mask_file
        pred_instance['pred_id'] = num_pred_instances
        pred_instance['label_id'] = label_id
        pred_instance['vert_count'] = num
        pred_instance['confidence'] = conf
        pred_instance['void_intersection'] = np.count_nonzero(
            np.logical_and(bool_void, pred_mask))

        # matched gt instances
        matched_gt = []
        # go through all gt instances with matching label
        for (gt_num, gt_inst) in enumerate(gt2pred[label_name]):
            intersection = np.count_nonzero(
                np.logical_and(gt_ids == gt_inst['instance_id'], pred_mask))
            if intersection > 0:
                gt_copy = gt_inst.copy()
                pred_copy = pred_instance.copy()
                gt_copy['intersection'] = intersection
                pred_copy['intersection'] = intersection
                matched_gt.append(gt_copy)
                gt2pred[label_name][gt_num]['matched_pred'].append(pred_copy)
        pred_instance['matched_gt'] = matched_gt
        num_pred_instances += 1
        pred2gt[label_name].append(pred_instance)

    return gt2pred, pred2gt


def scannet_eval(preds, gts, options, valid_class_ids, class_labels,
                 id_to_label):
    """Evaluate instance segmentation in ScanNet protocol.

    Args:
        preds (list[dict]): Per scene predictions of mask, label and
            confidence.
        gts (list[np.array]): Per scene ground truth instance masks.
        options (dict): ScanNet evaluator options. See get_options.
        valid_class_ids (tuple[int]): Ids of valid categories.
        class_labels (tuple[str]): Class names.
        id_to_label (dict[int, str]): Mapping of valid class id to class label.

    Returns:
        dict: Overall and per-category AP scores.
    """
    options = get_options(options)
    matches = {}
    for i, (pred, gt) in enumerate(zip(preds, gts)):
        matches_key = i
        # assign gt to predictions
        gt2pred, pred2gt = assign_instances_for_scan(pred, gt, options,
                                                     valid_class_ids,
                                                     class_labels, id_to_label)
        matches[matches_key] = {}
        matches[matches_key]['gt'] = gt2pred
        matches[matches_key]['pred'] = pred2gt

    ap_scores = evaluate_matches(matches, class_labels, options)
    avgs = compute_averages(ap_scores, options, class_labels)
    return avgs


def get_options(options=None):
    """Set ScanNet evaluator options.

    Args:
        options (dict, optional): Not default options. Default: None.

    Returns:
        dict: Updated options with all 4 keys.
    """
    assert options is None or isinstance(options, dict)
    _options = dict(
        overlaps=np.append(np.arange(0.5, 0.95, 0.05), 0.25),
        min_region_sizes=np.array([100]),
        distance_threshes=np.array([float('inf')]),
        distance_confs=np.array([-float('inf')]))
    if options is not None:
        _options.update(options)
    return _options