Source code for kwcoco.metrics.sklearn_alts

"""
Faster pure-python versions of sklearn functions that avoid expensive checks
and label rectifications. It is assumed that all labels are consecutive
non-negative integers.
"""
from scipy.sparse import coo_matrix
import numpy as np


[docs]def confusion_matrix(y_true, y_pred, n_labels=None, labels=None,
                     sample_weight=None):
    """
    faster version of sklearn confusion matrix that avoids the
    expensive checks and label rectification

    Runs in about 0.7ms

    Returns:
        ndarray: matrix where rows represent real and cols represent pred

    Example:
        >>> y_true = np.array([0, 0, 0, 0, 1, 1, 1, 0,  0, 1])
        >>> y_pred = np.array([0, 0, 0, 0, 0, 0, 0, 1,  1, 1])
        >>> confusion_matrix(y_true, y_pred, 2)
        array([[4, 2],
               [3, 1]])
        >>> confusion_matrix(y_true, y_pred, 2).ravel()
        array([4, 2, 3, 1])

    Benchmarks:
        import ubelt as ub
        y_true = np.random.randint(0, 2, 10000)
        y_pred = np.random.randint(0, 2, 10000)

        n = 1000
        for timer in ub.Timerit(n, bestof=10, label='py-time'):
            sample_weight = [1] * len(y_true)
            confusion_matrix(y_true, y_pred, 2, sample_weight=sample_weight)

        for timer in ub.Timerit(n, bestof=10, label='np-time'):
            sample_weight = np.ones(len(y_true), dtype=np.int)
            confusion_matrix(y_true, y_pred, 2, sample_weight=sample_weight)
    """
    if sample_weight is None:
        sample_weight = np.ones(len(y_true), dtype=np.int)
    if n_labels is None:
        n_labels = len(labels)
    CM = coo_matrix((sample_weight, (y_true, y_pred)),
                    shape=(n_labels, n_labels),
                    dtype=np.int64).toarray()
    return CM


[docs]def global_accuracy_from_confusion(cfsn):
    # real is rows, pred is columns
    n_ii = np.diag(cfsn)
    # sum over pred = columns = axis1
    t_i = cfsn.sum(axis=1)
    global_acc = n_ii.sum() / t_i.sum()
    return global_acc


[docs]def class_accuracy_from_confusion(cfsn):
    # real is rows, pred is columns
    n_ii = np.diag(cfsn)
    # sum over pred = columns = axis1
    t_i = cfsn.sum(axis=1)
    per_class_acc = (n_ii / t_i).mean()
    class_acc = np.nan_to_num(per_class_acc).mean()
    return class_acc