Source code for kwcoco.metrics.sklearn_alts

"""
Faster pure-python versions of sklearn functions that avoid expensive checks
and label rectifications. It is assumed that all labels are consecutive
non-negative integers.
"""
from scipy.sparse import coo_matrix
import numpy as np


[docs]def confusion_matrix(y_true, y_pred, n_labels=None, labels=None, sample_weight=None): """ faster version of sklearn confusion matrix that avoids the expensive checks and label rectification Runs in about 0.7ms Returns: ndarray: matrix where rows represent real and cols represent pred Example: >>> y_true = np.array([0, 0, 0, 0, 1, 1, 1, 0, 0, 1]) >>> y_pred = np.array([0, 0, 0, 0, 0, 0, 0, 1, 1, 1]) >>> confusion_matrix(y_true, y_pred, 2) array([[4, 2], [3, 1]]) >>> confusion_matrix(y_true, y_pred, 2).ravel() array([4, 2, 3, 1]) Benchmarks: import ubelt as ub y_true = np.random.randint(0, 2, 10000) y_pred = np.random.randint(0, 2, 10000) n = 1000 for timer in ub.Timerit(n, bestof=10, label='py-time'): sample_weight = [1] * len(y_true) confusion_matrix(y_true, y_pred, 2, sample_weight=sample_weight) for timer in ub.Timerit(n, bestof=10, label='np-time'): sample_weight = np.ones(len(y_true), dtype=np.int) confusion_matrix(y_true, y_pred, 2, sample_weight=sample_weight) """ if sample_weight is None: sample_weight = np.ones(len(y_true), dtype=np.int) if n_labels is None: n_labels = len(labels) CM = coo_matrix((sample_weight, (y_true, y_pred)), shape=(n_labels, n_labels), dtype=np.int64).toarray() return CM
[docs]def global_accuracy_from_confusion(cfsn): # real is rows, pred is columns n_ii = np.diag(cfsn) # sum over pred = columns = axis1 t_i = cfsn.sum(axis=1) global_acc = n_ii.sum() / t_i.sum() return global_acc
[docs]def class_accuracy_from_confusion(cfsn): # real is rows, pred is columns n_ii = np.diag(cfsn) # sum over pred = columns = axis1 t_i = cfsn.sum(axis=1) per_class_acc = (n_ii / t_i).mean() class_acc = np.nan_to_num(per_class_acc).mean() return class_acc