Source code for kwcoco.cli.coco_stats

#!/usr/bin/env python
import ubelt as ub
import scriptconfig as scfg


[docs] class CocoStatsCLI: name = 'stats'
[docs] class CLIConfig(scfg.DataConfig): """ Compute summary statistics about a COCO dataset """ __command__ = 'stats' src = scfg.Value(['special:shapes8'], position=1, help='path to dataset', nargs='+') basic = scfg.Value(True, isflag=True, help='show basic stats') extended = scfg.Value(True, isflag=True, help='show extended stats') catfreq = scfg.Value(True, isflag=True, help='show category frequency stats') boxes = scfg.Value(False, isflag=True, help=ub.paragraph( ''' show bounding box stats in width-height format. ''')) image_size = scfg.Value(False, isflag=True, help='show image size stats') annot_attrs = scfg.Value(False, isflag=True, help='show annotation attribute information') image_attrs = scfg.Value(False, isflag=True, help='show image attribute information') video_attrs = scfg.Value(False, isflag=True, help='show video attribute information') io_workers = scfg.Value(0, help=ub.paragraph( ''' number of workers when reading multiple kwcoco files ''')) embed = scfg.Value(False, isflag=True, help='embed into interactive shell') __epilog__ = """ Example Usage: kwcoco stats --src=special:shapes8 kwcoco stats --src=special:shapes8 --boxes=True """
[docs] @classmethod def main(cls, cmdline=True, **kw): """ Example: >>> kw = {'src': 'special:shapes8'} >>> cmdline = False >>> cls = CocoStatsCLI >>> cls.main(cmdline, **kw) """ import kwcoco import numpy as np config = cls.CLIConfig.cli(data=kw, cmdline=cmdline) try: from rich import print as rich_print except ImportError: rich_print = print rich_print('config = {}'.format(ub.urepr(config, nl=1))) if config['src'] is None: raise Exception('must specify source: {}'.format(config['src'])) if isinstance(config['src'], str): fpaths = [config['src']] else: fpaths = config['src'] datasets = list(kwcoco.CocoDataset.coerce_multiple(fpaths, workers=config.io_workers)) print('Finished reading datasets') # hack dataset tags dset_tags = [dset.tag for dset in datasets] if len(set(dset_tags)) < len(dset_tags): from os.path import commonprefix dset_fpaths = [dset.fpath for dset in datasets] toremove = commonprefix(dset_fpaths) for dset in datasets: dset.tag = dset.fpath.replace(toremove, '') try: import networkx as nx for dset in datasets: print('dset = {!r}'.format(dset)) print('Category Heirarchy: ') print(nx.write_network_text(dset.object_categories().graph)) except Exception: pass import pandas as pd pd.set_option('max_colwidth', 256) if config['basic']: tag_to_stats = {} for dset in datasets: tag_to_stats[dset.tag] = dset.basic_stats() df = pd.DataFrame.from_dict(tag_to_stats) rich_print(df.T.to_string(float_format=lambda x: '%0.3f' % x)) if config['extended']: tag_to_ext_stats = {} for dset in datasets: tag_to_ext_stats[dset.tag] = dset.extended_stats() # allkeys = ['annots_per_img', 'annots_per_cat'] allkeys = sorted(set(ub.flatten(s.keys() for s in tag_to_ext_stats.values()))) # print('allkeys = {!r}'.format(allkeys)) for key in allkeys: print('\n--{!r}'.format(key)) df = pd.DataFrame.from_dict( {k: v[key] for k, v in tag_to_ext_stats.items()}) rich_print(df.T.to_string(float_format=lambda x: '%0.3f' % x)) if config['catfreq']: tag_to_freq = {} for dset in datasets: tag_to_freq[dset.tag] = dset.category_annotation_frequency() df = pd.DataFrame.from_dict(tag_to_freq) rich_print(df.to_string(float_format=lambda x: '%0.3f' % x)) if config['video_attrs']: print('Video Attrs') for dset in datasets: attrs = dset.videos().attribute_frequency() print('video_attrs = {}'.format(ub.urepr(attrs, nl=1))) if config['image_attrs']: print('Image Attrs') for dset in datasets: print('dset.tag = {!r}'.format(dset.tag)) attrs = dset.images().attribute_frequency() print('image_attrs = {}'.format(ub.urepr(attrs, nl=1))) if config['annot_attrs']: print('Annot Attrs') for dset in datasets: print('dset.tag = {!r}'.format(dset.tag)) attrs = dset.annots().attribute_frequency() print('annot_attrs = {}'.format(ub.urepr(attrs, nl=1))) if config['boxes']: print('Box stats') for dset in datasets: print('dset.tag = {!r}'.format(dset.tag)) print(ub.urepr(dset.boxsize_stats(), nl=-1, precision=2)) if config['image_size']: print('Image size stats') for dset in datasets: print('dset.tag = {!r}'.format(dset.tag)) images = dset.images() heights = np.array(images.lookup('height', np.nan)) widths = np.array(images.lookup('width', np.nan)) rt_areas = np.sqrt(heights * widths) imgsize_df = pd.DataFrame({ 'height': heights, 'widths': widths, 'rt_areas': rt_areas, }) size_stats = imgsize_df.describe() print(size_stats) idx = np.argmax(rt_areas) try: biggest_image = images.take([idx]).coco_images[0] max_area_h = biggest_image.img['height'] max_area_w = biggest_image.img['width'] print('Max image: {} x {}'.format(max_area_w, max_area_h)) pixels = max_area_w * max_area_h total_disk_bytes = 0 for fpath in list(biggest_image.iter_image_filepaths()): fpath = ub.Path(fpath) num_bytes = fpath.stat().st_size total_disk_bytes += num_bytes total_disk_gb = total_disk_bytes / 2 ** 30 pixel_gb_per_bit = (pixels / 8) / 2 ** 30 print('total_disk_gb = {!r}'.format(total_disk_gb)) print('pixel_gb_per_bit = {!r}'.format(pixel_gb_per_bit)) except Exception: print('error getting max size') # print('dset.tag = {!r}'.format(dset.tag)) # print(ub.urepr(dset.boxsize_stats(), nl=-1, precision=2)) if config['embed']: # Hidden hack import xdev xdev.embed()
# for dset in datasets: # # dset = datasets[0] # # kwcoco.CocoDataset.coerce(config['src']) # print('dset.fpath = {!r}'.format(dset.fpath)) # if config['basic']: # basic = dset.basic_stats() # print('basic = {}'.format(ub.urepr(basic, nl=1))) # if config['extended']: # extended = dset.extended_stats() # print('extended = {}'.format(ub.urepr(extended, nl=1, precision=2))) # if config['catfreq']: # print('Category frequency') # freq = dset.category_annotation_frequency() # import pandas as pd # df = pd.DataFrame.from_dict({str(dset.tag): freq}) # pd.set_option('max_colwidth', 256) # print(df.to_string(float_format=lambda x: '%0.3f' % x)) # if config['boxes']: # print('Box stats') # print(ub.urepr(dset.boxsize_stats(), nl=-1, precision=2)) _CLI = CocoStatsCLI main = _CLI.main if __name__ == '__main__': """ CommandLine: python -m kwcoco.cli.coco_stats --src=special:shapes8 """ _CLI.main()