kwcoco.metrics package

Module contents

mkinit kwcoco.metrics -w –relative

class kwcoco.metrics.BinaryConfusionVectors(data, cx=None, classes=None)[source]

Bases: ubelt.util_mixins.NiceRepr

Stores information about a binary classification problem. This is always with respect to a specific class, which is given by cx and classes.

The data DataFrameArray must contain
is_true - if the row is an instance of class classes[cx] pred_score - the predicted probability of class classes[cx], and weight - sample weight of the example


>>> self = BinaryConfusionVectors.demo(n=10)
>>> print('self = {!r}'.format(self))
>>> print('pr = {}'.format(ub.repr2(self.measures())))
>>> print('roc = {}'.format(ub.repr2(self.roc())))
>>> self = BinaryConfusionVectors.demo(n=0)
>>> print('pr = {}'.format(ub.repr2(self.measures())))
>>> print('roc = {}'.format(ub.repr2(self.roc())))
>>> self = BinaryConfusionVectors.demo(n=1)
>>> print('pr = {}'.format(ub.repr2(self.measures())))
>>> print('roc = {}'.format(ub.repr2(self.roc())))
>>> self = BinaryConfusionVectors.demo(n=2)
>>> print('self = {!r}'.format(self))
>>> print('pr = {}'.format(ub.repr2(self.measures())))
>>> print('roc = {}'.format(ub.repr2(self.roc())))
classmethod demo(n=10, p_true=0.5, p_error=0.2, rng=None)[source]

Create random data for tests


>>> from kwcoco.metrics.confusion_vectors import *  # NOQA
>>> cfsn = BinaryConfusionVectors.demo(n=1000, p_error=0.1)
>>> measures = cfsn.measures()
>>> print('measures = {}'.format(ub.repr2(measures, nl=1)))
>>> # xdoctest: +REQUIRES(--show)
>>> import kwplot
>>> kwplot.autompl()
>>> kwplot.figure(fnum=1, pnum=(1, 2, 1))
>>> measures.draw('pr')
>>> kwplot.figure(fnum=1, pnum=(1, 2, 2))
>>> measures.draw('roc')
precision_recall(stabalize_thresh=7, stabalize_pad=7, method='sklearn')[source]

Deprecated, all information lives in measures now

roc(fp_cutoff=None, stabalize_thresh=7, stabalize_pad=7)[source]

Deprecated, all information lives in measures now


memoization decorator for a method that respects args and kwargs



>>> import ubelt as ub
>>> closure = {'a': 'b', 'c': 'd'}
>>> incr = [0]
>>> class Foo(object):
>>>     @memoize_method
>>>     def foo_memo(self, key):
>>>         value = closure[key]
>>>         incr[0] += 1
>>>         return value
>>>     def foo(self, key):
>>>         value = closure[key]
>>>         incr[0] += 1
>>>         return value
>>> self = Foo()
>>> assert'a') == 'b' and'c') == 'd'
>>> assert incr[0] == 2
>>> print('Call memoized version')
>>> assert self.foo_memo('a') == 'b' and self.foo_memo('c') == 'd'
>>> assert incr[0] == 4
>>> assert self.foo_memo('a') == 'b' and self.foo_memo('c') == 'd'
>>> print('Counter should no longer increase')
>>> assert incr[0] == 4
>>> print('Closure changes result without memoization')
>>> closure = {'a': 0, 'c': 1}
>>> assert'a') == 0 and'c') == 1
>>> assert incr[0] == 6
>>> assert self.foo_memo('a') == 'b' and self.foo_memo('c') == 'd'
>>> print('Constructing a new object should get a new cache')
>>> self2 = Foo()
>>> self2.foo_memo('a')
>>> assert incr[0] == 7
>>> self2.foo_memo('a')
>>> assert incr[0] == 7
class kwcoco.metrics.ConfusionVectors(data, classes, probs=None)[source]

Bases: ubelt.util_mixins.NiceRepr

Stores information used to construct a confusion matrix. This includes corresponding vectors of predicted labels, true labels, sample weights, etc…

  • data (DataFrameArray) – should at least have keys true, pred, weight
  • classes (Sequence | CategoryTree) – list of category names or category graph
  • probs (ndarray, optional) – probabilities for each class


>>> # xdoctest: IGNORE_WANT
>>> from kwcoco.metrics import DetectionMetrics
>>> dmet = DetectionMetrics.demo(
>>>     nimgs=10, nboxes=(0, 10), n_fp=(0, 1), nclasses=3)
>>> cfsn_vecs = dmet.confusion_vectors()
>>> print(
     pred  true   score  weight     iou  txs  pxs  gid
0       2     2 10.0000  1.0000  1.0000    0    4    0
1       2     2  7.5025  1.0000  1.0000    1    3    0
2       1     1  5.0050  1.0000  1.0000    2    2    0
3       3    -1  2.5075  1.0000 -1.0000   -1    1    0
4       2    -1  0.0100  1.0000 -1.0000   -1    0    0
5      -1     2  0.0000  1.0000 -1.0000    3   -1    0
6      -1     2  0.0000  1.0000 -1.0000    4   -1    0
7       2     2 10.0000  1.0000  1.0000    0    5    1
8       2     2  8.0020  1.0000  1.0000    1    4    1
9       1     1  6.0040  1.0000  1.0000    2    3    1
..    ...   ...     ...     ...     ...  ...  ...  ...
62     -1     2  0.0000  1.0000 -1.0000    7   -1    7
63     -1     3  0.0000  1.0000 -1.0000    8   -1    7
64     -1     1  0.0000  1.0000 -1.0000    9   -1    7
65      1    -1 10.0000  1.0000 -1.0000   -1    0    8
66      1     1  0.0100  1.0000  1.0000    0    1    8
67      3    -1 10.0000  1.0000 -1.0000   -1    3    9
68      2     2  6.6700  1.0000  1.0000    0    2    9
69      2     2  3.3400  1.0000  1.0000    1    1    9
70      3    -1  0.0100  1.0000 -1.0000   -1    0    9
71     -1     2  0.0000  1.0000 -1.0000    2   -1    9
>>> # xdoctest: +REQUIRES(--show)
>>> import kwplot
>>> kwplot.autompl()
>>> from kwcoco.metrics.confusion_vectors import ConfusionVectors
>>> cfsn_vecs = ConfusionVectors.demo(
>>>     nimgs=128, nboxes=(0, 10), n_fp=(0, 3), n_fn=(0, 3), nclasses=3)
>>> cx_to_binvecs = cfsn_vecs.binarize_ovr()
>>> measures = cx_to_binvecs.measures()['perclass']
>>> print('measures = {!r}'.format(measures))
measures = <PerClass_Measures({
    'cat_1': <Measures({'ap': 0.7501, 'auc': 0.7170, 'catname': cat_1, 'max_f1': f1=0.77@0.41, 'max_mcc': mcc=0.71@0.44, 'nsupport': 787.0000, 'realneg_total': 594.0000, 'realpos_total': 193.0000})>,
    'cat_2': <Measures({'ap': 0.8288, 'auc': 0.8137, 'catname': cat_2, 'max_f1': f1=0.83@0.40, 'max_mcc': mcc=0.78@0.40, 'nsupport': 787.0000, 'realneg_total': 589.0000, 'realpos_total': 198.0000})>,
    'cat_3': <Measures({'ap': 0.7536, 'auc': 0.7150, 'catname': cat_3, 'max_f1': f1=0.77@0.40, 'max_mcc': mcc=0.71@0.42, 'nsupport': 787.0000, 'realneg_total': 578.0000, 'realpos_total': 209.0000})>,
}) at 0x7f1b9b0d6130>
>>> kwplot.figure(fnum=1, doclf=True)
>>> measures.draw(key='pr', fnum=1, pnum=(1, 3, 1))
>>> measures.draw(key='roc', fnum=1, pnum=(1, 3, 2))
>>> measures.draw(key='mcc', fnum=1, pnum=(1, 3, 3))
classmethod from_json(state)[source]
classmethod demo(**kw)[source]


>>> cfsn_vecs = ConfusionVectors.demo()
>>> print('cfsn_vecs = {!r}'.format(cfsn_vecs))
>>> cx_to_binvecs = cfsn_vecs.binarize_ovr()
>>> print('cx_to_binvecs = {!r}'.format(cx_to_binvecs))
classmethod from_arrays(true, pred=None, score=None, weight=None, probs=None, classes=None)[source]

Construct confusion vector data structure from component arrays


>>> import kwarray
>>> classes = ['person', 'vehicle', 'object']
>>> rng = kwarray.ensure_rng(0)
>>> true = (rng.rand(10) * len(classes)).astype(
>>> probs = rng.rand(len(true), len(classes))
>>> cfsn_vecs = ConfusionVectors.from_arrays(true=true, probs=probs, classes=classes)
>>> cfsn_vecs.confusion_matrix()
pred     person  vehicle  object
person        0        0       0
vehicle       2        4       1
object        2        1       0
confusion_matrix(raw=False, compress=False)[source]

Builds a confusion matrix from the confusion vectors.

Parameters:raw (bool) – if True uses ‘pred_raw’ otherwise used ‘pred’
cm : the labeled confusion matrix
(Note: we should write a efficient replacement for
this use case. #remove_pandas)
Return type:pd.DataFrame
xdoctest -m ~/code/kwcoco/kwcoco/metrics/ ConfusionVectors.confusion_matrix


>>> from kwcoco.metrics import DetectionMetrics
>>> dmet = DetectionMetrics.demo(
>>>     nimgs=10, nboxes=(0, 10), n_fp=(0, 1), n_fn=(0, 1), nclasses=3, cls_noise=.2)
>>> cfsn_vecs = dmet.confusion_vectors()
>>> cm = cfsn_vecs.confusion_matrix()
>>> print(cm.to_string(float_format=lambda x: '%.2f' % x))
pred        background  cat_1  cat_2  cat_3
background        0.00   1.00   1.00   1.00
cat_1             2.00  12.00   0.00   1.00
cat_2             2.00   0.00  14.00   1.00
cat_3             1.00   0.00   1.00  17.00

Creates a coarsened set of vectors


Creates a binary representation useful for measuring the performance of detectors. It is assumed that scores of “positive” classes should be high and “negative” clases should be low.

Parameters:negative_classes (List[str | int]) – list of negative class names or idxs, by default chooses any class with a true class index of -1. These classes should ideally have low scores.


>>> from kwcoco.metrics import DetectionMetrics
>>> dmet = DetectionMetrics.demo(
>>>     nimgs=10, nboxes=(0, 10), n_fp=(0, 1), nclasses=3)
>>> cfsn_vecs = dmet.confusion_vectors()
>>> class_idxs = list(dmet.classes.node_to_idx.values())
>>> binvecs = cfsn_vecs.binarize_peritem()
binarize_ovr(mode=1, keyby='name', ignore_classes={'ignore'})[source]

Transforms cfsn_vecs into one-vs-rest BinaryConfusionVectors for each category.

  • mode (int, default=1) – 0 for heirarchy aware or 1 for voc like. MODE 0 IS PROBABLY BROKEN
  • keyby (int | str) – can be cx or name
  • ignore_classes (Set[str]) – category names to ignore

which behaves like

Dict[int, BinaryConfusionVectors]: cx_to_binvecs

Return type:



>>> cfsn_vecs = ConfusionVectors.demo()
>>> print('cfsn_vecs = {!r}'.format(cfsn_vecs))
>>> catname_to_binvecs = cfsn_vecs.binarize_ovr(keyby='name')
>>> print('catname_to_binvecs = {!r}'.format(catname_to_binvecs))


Consider we want to measure how well we can classify beagles.

Given a multiclass confusion vector, we need to carefully select a subset. We ignore any truth that is coarser than our current label. We also ignore any background predictions on irrelevant classes

dog | dog <- ignore coarser truths dog | cat <- ignore coarser truths dog | beagle <- ignore coarser truths cat | dog cat | cat cat | background <- ignore failures to predict unrelated classes cat | maine-coon beagle | beagle beagle | dog beagle | background beagle | cat Snoopy | beagle Snoopy | cat maine-coon | background <- ignore failures to predict unrelated classes maine-coon | beagle maine-coon | cat

Anything not marked as ignore is counted. We count anything marked as beagle or a finer grained class (e.g. Snoopy) as a positive case. All other cases are negative. The scores come from the predicted probability of beagle, which must be remembered outside the dataframe.


Build a classification report with various metrics.


>>> from kwcoco.metrics.confusion_vectors import *  # NOQA
>>> cfsn_vecs = ConfusionVectors.demo()
>>> report = cfsn_vecs.classification_report(verbose=1)
class kwcoco.metrics.DetectionMetrics(classes=None)[source]

Bases: ubelt.util_mixins.NiceRepr

  • gid_to_true_dets (Dict) – maps image ids to truth
  • gid_to_pred_dets (Dict) – maps image ids to predictions
  • classes (CategoryTree) – category coder


>>> dmet = DetectionMetrics.demo(
>>>     nimgs=100, nboxes=(0, 3), n_fp=(0, 1), nclasses=8, score_noise=0.9, hacked=False)
>>> print(dmet.score_kwcoco(bias=0, compat='mutex', prioritize='iou')['mAP'])
>>> print(dmet.score_voc(bias=0)['mAP'])
>>> #print(dmet.score_coco()['mAP'])
classmethod from_coco(true_coco, pred_coco, gids=None, verbose=0)[source]

Create detection metrics from two coco files representing the truth and predictions.

  • true_coco (kwcoco.CocoDataset)
  • pred_coco (kwcoco.CocoDataset)


>>> import kwcoco
>>> true_coco = kwcoco.CocoDataset.demo('shapes')
>>> pred_coco = true_coco
>>> self = DetectionMetrics.from_coco(true_coco, pred_coco)
>>> self.score_voc()
add_predictions(pred_dets, imgname=None, gid=None)[source]

Register/Add predicted detections for an image

  • pred_dets (Detections) – predicted detections
  • imgname (str) – a unique string to identify the image
  • gid (int, optional) – the integer image id if known
add_truth(true_dets, imgname=None, gid=None)[source]

Register/Add groundtruth detections for an image

  • true_dets (Detections) – groundtruth
  • imgname (str) – a unique string to identify the image
  • gid (int, optional) – the integer image id if known

gets Detections representation for groundtruth in an image


gets Detections representation for predictions in an image

confusion_vectors(ovthresh=0.5, bias=0, gids=None, compat='all', prioritize='iou', ignore_classes='ignore', background_class=NoParam, verbose='auto', workers=0, track_probs='try')[source]

Assigns predicted boxes to the true boxes so we can transform the detection problem into a classification problem for scoring.

  • ovthresh (float, default=0.5) – bounding box overlap iou threshold required for assignment
  • bias (float, default=0.0) – for computing bounding box overlap, either 1 or 0
  • gids (List[int], default=None) – which subset of images ids to compute confusion metrics on. If not specified all images are used.
  • compat (str, default=’all’) – can be (‘ancestors’ | ‘mutex’ | ‘all’). determines which pred boxes are allowed to match which true boxes. If ‘mutex’, then pred boxes can only match true boxes of the same class. If ‘ancestors’, then pred boxes can match true boxes that match or have a coarser label. If ‘all’, then any pred can match any true, regardless of its category label.
  • prioritize (str, default=’iou’) – can be (‘iou’ | ‘class’ | ‘correct’) determines which box to assign to if mutiple true boxes overlap a predicted box. if prioritize is iou, then the true box with maximum iou (above ovthresh) will be chosen. If prioritize is class, then it will prefer matching a compatible class above a higher iou. If prioritize is correct, then ancestors of the true class are preferred over descendents of the true class, over unreleated classes.
  • ignore_classes (set, default={‘ignore’}) – class names indicating ignore regions
  • background_class (str, default=ub.NoParam) – Name of the background class. If unspecified we try to determine it with heuristics. A value of None means there is no background class.
  • verbose (int, default=’auto’) – verbosity flag. In auto mode, verbose=1 if len(gids) > 1000.
  • workers (int, default=0) – number of parallel assignment processes
  • track_probs (str, default=’try’) – can be ‘try’, ‘force’, or False. if truthy, we assume probabilities for multiple classes are available.

Scores the detections using kwant

score_kwcoco(ovthresh=0.5, bias=0, gids=None, compat='all', prioritize='iou')[source]

our scoring method

score_voc(ovthresh=0.5, bias=1, method='voc2012', gids=None, ignore_classes='ignore')[source]

score using voc method


>>> dmet = DetectionMetrics.demo(
>>>     nimgs=100, nboxes=(0, 3), n_fp=(0, 1), nclasses=8,
>>>     score_noise=.5)
>>> print(dmet.score_voc()['mAP'])

score using ms-coco method


>>> # xdoctest: +REQUIRES(--pycocotools)
>>> dmet = DetectionMetrics.demo(
>>>     nimgs=100, nboxes=(0, 3), n_fp=(0, 1), nclasses=8)
>>> print(dmet.score_coco()['mAP'])
classmethod demo(**kwargs)[source]

Creates random true boxes and predicted boxes that have some noisy offset from the truth.


nclasses (int, default=1): number of foreground classes. nimgs (int, default=1): number of images in the coco datasts. nboxes (int, default=1): boxes per image. n_fp (int, default=0): number of false positives. n_fn (int, default=0): number of false negatives. box_noise (float, default=0): std of a normal distribution used to

perterb both box location and box size.
cls_noise (float, default=0): probability that a class label will
change. Must be within 0 and 1.

anchors (ndarray, default=None): used to create random boxes null_pred (bool, default=0):

if True, predicted classes are returned as null, which means only localization scoring is suitable.
with_probs (bool, default=1):
if True, includes per-class probabilities with predictions


>>> kwargs = {}
>>> # Seed the RNG
>>> kwargs['rng'] = 0
>>> # Size parameters determine how big the data is
>>> kwargs['nimgs'] = 5
>>> kwargs['nboxes'] = 7
>>> kwargs['nclasses'] = 11
>>> # Noise parameters perterb predictions further from the truth
>>> kwargs['n_fp'] = 3
>>> kwargs['box_noise'] = 0.1
>>> kwargs['cls_noise'] = 0.5
>>> dmet = DetectionMetrics.demo(**kwargs)
>>> print('dmet.classes = {}'.format(dmet.classes))
dmet.classes = <CategoryTree(nNodes=12, maxDepth=3, maxBreadth=4...)>
>>> # Can grab kwimage.Detection object for any image
>>> print(dmet.true_detections(gid=0))
>>> print(dmet.pred_detections(gid=0))


>>> # Test case with null predicted categories
>>> dmet = DetectionMetrics.demo(nimgs=30, null_pred=1, nclasses=3,
>>>                              nboxes=10, n_fp=10, box_noise=0.3,
>>>                              with_probs=False)
>>> dmet.gid_to_pred_dets[0].data
>>> dmet.gid_to_true_dets[0].data
>>> cfsn_vecs = dmet.confusion_vectors()
>>> binvecs_ovr = cfsn_vecs.binarize_ovr()
>>> binvecs_per = cfsn_vecs.binarize_peritem()
>>> measures_per = binvecs_per.measures()
>>> measures_ovr = binvecs_ovr.measures()
>>> print('measures_per = {!r}'.format(measures_per))
>>> print('measures_ovr = {!r}'.format(measures_ovr))
>>> # xdoctest: +REQUIRES(--show)
>>> import kwplot
>>> kwplot.autompl()
>>> pr_per.draw(fnum=1)
>>> measures_ovr['perclass'].draw(key='pr', fnum=2)
summarize(out_dpath=None, plot=False, title='')[source]


>>> from kwcoco.metrics.confusion_vectors import *  # NOQA
>>> from kwcoco.metrics.detect_metrics import DetectionMetrics
>>> dmet = DetectionMetrics.demo(
>>>     n_fp=(0, 128), n_fn=(0, 4), nimgs=512, nboxes=(0, 32),
>>>     nclasses=3, rng=0)
>>> # xdoctest: +REQUIRES(--show)
>>> import kwplot
>>> kwplot.autompl()
>>> dmet.summarize(plot=True, title='DetectionMetrics summary demo')
>>> kwplot.show_if_requested()
class kwcoco.metrics.Measures(roc_info)[source]

Bases: ubelt.util_mixins.NiceRepr, kwcoco.metrics.util.DictProxy


>>> from kwcoco.metrics.confusion_vectors import *  # NOQA
>>> binvecs = BinaryConfusionVectors.demo(n=100, p_error=0.5)
>>> self = binvecs.measures()
>>> print('self = {!r}'.format(self))
>>> # xdoctest: +REQUIRES(--show)
>>> import kwplot
>>> kwplot.autompl()
>>> self.draw(doclf=True)
>>> self.draw(key='pr',  pnum=(1, 2, 1))
>>> self.draw(key='roc', pnum=(1, 2, 2))
>>> kwplot.show_if_requested()
draw(key=None, prefix='', **kw)[source]


>>> cfsn_vecs = ConfusionVectors.demo()
>>> ovr_cfsn = cfsn_vecs.binarize_ovr(keyby='name')
>>> self = ovr_cfsn.measures()['perclass']
>>> self.draw('mcc', doclf=True, fnum=1)
>>> self.draw('pr', doclf=1, fnum=2)
>>> self.draw('roc', doclf=1, fnum=3)
summary_plot(fnum=1, title='')[source]


>>> from kwcoco.metrics.confusion_vectors import *  # NOQA
>>> cfsn_vecs = ConfusionVectors.demo(n=100, p_error=0.5)
>>> binvecs = cfsn_vecs.binarize_peritem()
>>> self = binvecs.measures()
>>> # xdoctest: +REQUIRES(--show)
>>> import kwplot
>>> kwplot.autompl()
>>> self.summary_plot()
>>> kwplot.show_if_requested()
class kwcoco.metrics.OneVsRestConfusionVectors(cx_to_binvecs, classes)[source]

Bases: ubelt.util_mixins.NiceRepr

Container for multiple one-vs-rest binary confusion vectors

  • cx_to_binvecs
  • classes


>>> from kwcoco.metrics import DetectionMetrics
>>> dmet = DetectionMetrics.demo(
>>>     nimgs=10, nboxes=(0, 10), n_fp=(0, 1), nclasses=3)
>>> cfsn_vecs = dmet.confusion_vectors()
>>> self = cfsn_vecs.binarize_ovr(keyby='name')
>>> print('self = {!r}'.format(self))
classmethod demo()[source]


>>> self = OneVsRestConfusionVectors.demo()
>>> thresh_result = self.measures()['perclass']
class kwcoco.metrics.PerClass_Measures(cx_to_info)[source]

Bases: ubelt.util_mixins.NiceRepr, kwcoco.metrics.util.DictProxy

draw(key='mcc', prefix='', **kw)[source]


>>> cfsn_vecs = ConfusionVectors.demo()
>>> ovr_cfsn = cfsn_vecs.binarize_ovr(keyby='name')
>>> self = ovr_cfsn.measures()['perclass']
>>> self.draw('mcc', doclf=True, fnum=1)
>>> self.draw('pr', doclf=1, fnum=2)
>>> self.draw('roc', doclf=1, fnum=3)
draw_roc(prefix='', **kw)[source]
draw_pr(prefix='', **kw)[source]
summary_plot(fnum=1, title='')[source]
python ~/code/kwcoco/kwcoco/metrics/ PerClass_Measures.summary_plot –show


>>> from kwcoco.metrics.confusion_vectors import *  # NOQA
>>> from kwcoco.metrics.detect_metrics import DetectionMetrics
>>> dmet = DetectionMetrics.demo(
>>>     n_fp=(0, 5), n_fn=(0, 5), nimgs=128, nboxes=(0, 10),
>>>     nclasses=3)
>>> cfsn_vecs = dmet.confusion_vectors()
>>> ovr_cfsn = cfsn_vecs.binarize_ovr(keyby='name')
>>> self = ovr_cfsn.measures()['perclass']
>>> # xdoctest: +REQUIRES(--show)
>>> import kwplot
>>> kwplot.autompl()
>>> self.summary_plot(title='demo summary_plot ovr')
>>> kwplot.show_if_requested()
xdoctest -m ~/code/kwcoco/kwcoco/metrics/ eval_detections_cli