Source code for kwcoco.cli.coco_validate

#!/usr/bin/env python
import ubelt as ub
import scriptconfig as scfg


# Code to help generate the CLI from keyword arugments to CocoDataset.validate
__autogen_cli_args__ = r"""
import kwcoco
from xdoctest.docstr import docscrape_google
blocks = docscrape_google.split_google_docblocks(kwcoco.CocoDataset.validate.__doc__)
argblock = dict(blocks)['Args'][0]
found = None
for arg in list(docscrape_google.parse_google_argblock(argblock, clean_desc=False)):
    if arg['name'].startswith('**'):
        found = arg
        break
subdesc = ub.codeblock(found['desc'])
# Sub parsing of kwargs
sub_parts = list(docscrape_google.parse_google_argblock(subdesc))
from vimtk._dirty import format_multiple_paragraph_sentences
for part in sub_parts:
    default = part['type'].split('=')[1]
    line1 = f"'{part['name']}': scfg.Value({default}, help=ub.paragraph("
    kwargs = {}
    wrapped_desc = format_multiple_paragraph_sentences(part['desc'], **kwargs)
    sq = chr(39)
    tsq = sq * 3
    line2 = f'    {tsq}'
    line3 = ub.indent(wrapped_desc)
    line4 = f'    {tsq})),'
    lines = [line1, line2, line3, line4]
    print('\n'.join(lines))
    print('')
"""


[docs]class CocoValidateCLI: name = 'validate'
[docs] class CLIConfig(scfg.Config): """ Validate that a coco file conforms to the json schema, that assets exist, and potentially fix corrupted assets by removing them. """ default = { 'src': scfg.Value(['special:shapes8'], nargs='+', help='path to datasets', position=1), # 'schema': scfg.Value(True, help='If True check the json schema'), # 'missing': scfg.Value(True, help='If True check if all assets (e.g. images) exist'), # 'corrupted': scfg.Value(False, help='If True check the assets can be read'), # 'require_relative': scfg.Value(False, help='If 1, requires all assets are relative to the bundle dpath. If 2, then they must be inside the bundle dpath'), 'schema': scfg.Value(True, help=ub.paragraph( ''' if True, validate the json-schema ''')), 'unique': scfg.Value(True, help=ub.paragraph( ''' if True, validate unique secondary keys ''')), 'missing': scfg.Value(True, help=ub.paragraph( ''' if True, validate registered files exist ''')), 'corrupted': scfg.Value(False, isflag=True, help=ub.paragraph( ''' if True, validate data in registered files ''')), 'channels': scfg.Value(True, help=ub.paragraph( ''' if True, validate that channels in auxiliary/asset items are all unique. ''')), 'require_relative': scfg.Value(False, isflag=True, help=ub.paragraph( ''' if True, causes validation to fail if paths are non-portable, i.e. all paths must be relative to the bundle directory. if>0, paths must be relative to bundle root. if>1, paths must be inside bundle root. ''')), 'img_attrs': scfg.Value('warn', help=ub.paragraph( ''' if truthy, check that image attributes contain width and height entries. If 'warn', then warn if they do not exist. If 'error', then fail. ''')), 'verbose': scfg.Value(1, help=ub.paragraph( ''' verbosity flag ''')), 'fastfail': scfg.Value(False, isflag=True, help=ub.paragraph( ''' if True raise errors immediately ''')), # TODO: Move these to a different tool. This should only validate, # not fix anything. 'fix': scfg.Value(None, help=ub.paragraph( ''' Code indicating strategy to attempt to fix the dataset. If None, do nothing. If remove, removes missing / corrupted images. Other strategies may be added in the future. This is a hueristic and does not always work. dst must be specified. And only one src dataset can be given. ''')), 'dst': scfg.Value(None, help=ub.paragraph( ''' Location to write a "fixed" coco file if a fix strategy is given. ''')) } epilog = """ Example Usage: kwcoco toydata --dst foo.json --key=special:shapes8 kwcoco validate --src=foo.json --corrupted=True """
[docs] @classmethod def main(cls, cmdline=True, **kw): """ Example: >>> from kwcoco.cli.coco_validate import * # NOQA >>> kw = {'src': 'special:shapes8'} >>> cmdline = False >>> cls = CocoValidateCLI >>> cls.main(cmdline, **kw) """ import kwcoco config = cls.CLIConfig(kw, cmdline=cmdline) print('config = {}'.format(ub.repr2(dict(config), nl=1))) if config['src'] is None: raise Exception('must specify source: {}'.format(config['src'])) if isinstance(config['src'], str): fpaths = [config['src']] else: fpaths = config['src'] if config['dst']: if len(fpaths) != 1: raise Exception('can only specify 1 dataset in fix mode') fix_strat = set() if config['fix'] is not None: fix_strat = {c.lower() for c in config['fix'].split('+')} fpath_to_errors = {} for fpath in ub.ProgIter(fpaths, desc='reading datasets', verbose=1): print('reading fpath = {!r}'.format(fpath)) dset = kwcoco.CocoDataset.coerce(fpath) config_ = ub.dict_diff(config, {'src', 'dst', 'fix'}) result = dset.validate(**config_) if 'missing' in result: if 'remove' in fix_strat: missing = result['missing'] bad_gids = [t[2] for t in missing] status = dset.remove_images(bad_gids, verbose=1) print('status = {}'.format(ub.repr2(status, nl=1))) if 'corrupted' in result: if 'remove' in fix_strat: corrupted = result['corrupted'] bad_gids = [t[2] for t in corrupted] status = dset.remove_images(bad_gids, verbose=1) print('status = {}'.format(ub.repr2(status, nl=1))) if config['dst']: if len(fpaths) != 1: raise Exception('can only specify 1 dataset in fix mode') dset.dump(config['dst'], newlines=True) errors = result['errors'] fpath_to_errors[fpath] = errors has_errors = any(ub.flatten(fpath_to_errors.values())) if has_errors: errmsg = ub.repr2(fpath_to_errors, nl=1) print('fpath_to_errors = {}'.format(errmsg)) raise Exception(errmsg)
_CLI = CocoValidateCLI if __name__ == '__main__': """ CommandLine: python -m kwcoco.cli.coco_stats --src=special:shapes8 """ _CLI.main()