Source code for kwcoco.metrics.functional

import numpy as np
import warnings
from scipy.sparse import coo_matrix


[docs]def fast_confusion_matrix(y_true, y_pred, n_labels, sample_weight=None): """ faster version of sklearn confusion matrix that avoids the expensive checks and label rectification Args: y_true (ndarray[int]): ground truth class label for each sample y_pred (ndarray[int]): predicted class label for each sample n_labels (int): number of labels sample_weight (ndarray[int|float]): weight of each sample Returns: ndarray[int64|float64, dim=2]: matrix where rows represent real and cols represent pred and the value at each cell is the total amount of weight Example: >>> y_true = np.array([0, 0, 0, 0, 1, 1, 1, 0, 0, 1]) >>> y_pred = np.array([0, 0, 0, 0, 0, 0, 0, 1, 1, 1]) >>> fast_confusion_matrix(y_true, y_pred, 2) array([[4, 2], [3, 1]]) >>> fast_confusion_matrix(y_true, y_pred, 2).ravel() array([4, 2, 3, 1]) """ if sample_weight is None: sample_weight = np.ones(len(y_true), dtype=np.uint8) # The accumulation dtype needs to have 64bits to avoid overflow dtype = np.float64 if sample_weight.dtype.kind == 'f' else np.int64 matrix = coo_matrix((sample_weight, (y_true, y_pred)), shape=(n_labels, n_labels), dtype=dtype).toarray() return matrix
[docs]def _truncated_roc(y_df, bg_idx=-1, fp_cutoff=None): """ Computes truncated ROC info """ import sklearn try: from sklearn.metrics._ranking import _binary_clf_curve except ImportError: from sklearn.metrics.ranking import _binary_clf_curve y_true = (y_df['true'] == y_df['pred']) y_score = y_df['score'] sample_weight = y_df['weight'] # y_true[y_true == -1] = 0 # < TRUCNATED PART > # GET ROC CURVES AT A PARTICULAR FALSE POSITIVE COUNT CUTOFF # This will let different runs be more comparable realpos_total = sample_weight[(y_df['txs'] >= 0)].sum() fp_count, tp_count, count_thresholds = _binary_clf_curve( y_true, y_score, pos_label=1, sample_weight=sample_weight) if len(count_thresholds) > 0 and count_thresholds[-1] == 0: # Chop off the last entry where it will jump count_thresholds = count_thresholds[:-1] tp_count = tp_count[:-1] fp_count = fp_count[:-1] # Cutoff the curves at a comparable point if fp_cutoff is None: fp_cutoff = np.inf idxs = np.where(fp_count > fp_cutoff)[0] if len(idxs) == 0: idx = len(fp_count) else: idx = idxs[0] trunc_fp_count = fp_count[:idx] trunc_tp_count = tp_count[:idx] trunc_thresholds = count_thresholds[:idx] # if the cuttoff was not reached, horizontally extend the curve # This will hurt the scores (aka we may be bias against small # scenes), but this will ensure that big scenes are comparable if len(fp_count) == 0: trunc_fp_count = np.array([fp_cutoff]) trunc_tp_count = np.array([0]) trunc_thresholds = np.array([0]) # THIS WILL CAUSE AUC TO RAISE AN ERROR IF IT GETS HIT elif fp_count[-1] < fp_cutoff and np.isfinite(fp_cutoff): trunc_fp_count = np.hstack([trunc_fp_count, [fp_cutoff]]) trunc_tp_count = np.hstack([trunc_tp_count, [trunc_tp_count[-1]]]) trunc_thresholds = np.hstack([trunc_thresholds, [0]]) falsepos_total = trunc_fp_count[-1] # is this right? trunc_tpr = trunc_tp_count / realpos_total trunc_fpr = trunc_fp_count / falsepos_total trunc_auc = sklearn.metrics.auc(trunc_fpr, trunc_tpr) # < /TRUCNATED PART > roc_info = { 'fp_cutoff': fp_cutoff, 'realpos_total': realpos_total, 'tpr': trunc_tpr, 'fpr': trunc_fpr, 'fp_count': trunc_fp_count, 'tp_count': trunc_tp_count, 'thresholds': trunc_thresholds, 'auc': trunc_auc, } return roc_info
[docs]def _pr_curves(y): """ Compute a PR curve from a method Args: y (pd.DataFrame | DataFrameArray): output of detection_confusions Returns: Tuple[float, ndarray, ndarray] Example: >>> # xdoctest: +REQUIRES(module:sklearn) >>> import pandas as pd >>> y1 = pd.DataFrame.from_records([ >>> {'pred': 0, 'score': 10.00, 'true': -1, 'weight': 1.00}, >>> {'pred': 0, 'score': 1.65, 'true': 0, 'weight': 1.00}, >>> {'pred': 0, 'score': 8.64, 'true': -1, 'weight': 1.00}, >>> {'pred': 0, 'score': 3.97, 'true': 0, 'weight': 1.00}, >>> {'pred': 0, 'score': 1.68, 'true': 0, 'weight': 1.00}, >>> {'pred': 0, 'score': 5.06, 'true': 0, 'weight': 1.00}, >>> {'pred': 0, 'score': 0.25, 'true': 0, 'weight': 1.00}, >>> {'pred': 0, 'score': 1.75, 'true': 0, 'weight': 1.00}, >>> {'pred': 0, 'score': 8.52, 'true': 0, 'weight': 1.00}, >>> {'pred': 0, 'score': 5.20, 'true': 0, 'weight': 1.00}, >>> ]) >>> import kwcoco as nh >>> import kwarray >>> y2 = kwarray.DataFrameArray(y1) >>> _pr_curves(y2) >>> _pr_curves(y1) """ import sklearn # compute metrics on a per class basis if y is None: return np.nan, [], [] # References [Manning2008] and [Everingham2010] present alternative # variants of AP that interpolate the precision-recall curve. Currently, # average_precision_score does not implement any interpolated variant # http://scikit-learn.org/stable/modules/model_evaluation.html # In the future, we should simply use the sklearn version # which gives nice easy to reproduce results. with warnings.catch_warnings(): warnings.filterwarnings('ignore', message='invalid .* true_divide') is_correct = (y['true'] == y['pred']).astype(int) ap = sklearn.metrics.average_precision_score( y_true=is_correct, y_score=y['score'], sample_weight=y['weight'], ) prec, rec, thresholds = sklearn.metrics.precision_recall_curve( is_correct, y['score'], sample_weight=y['weight'], ) return ap, prec, rec
[docs]def _average_precision(tpr, ppv): """ Compute average precision of a binary PR curve. This is simply the area under the curve. Args: tpr (ndarray): true positive rate - aka recall ppv (ndarray): positive predictive value - aka precision """ # The average precision is simply the area under the PR curve. xdata = tpr ydata = ppv if xdata[0] > xdata[-1]: xdata = xdata[::-1] ydata = ydata[::-1] # Note: we could simply use sklearn.metrics.auc, which has more robust # checks. ap = np.trapz(y=ydata, x=xdata) return ap