Source code for kwcoco.cli.coco_validate

#!/usr/bin/env python
import ubelt as ub
import scriptconfig as scfg


[docs]class CocoValidateCLI:
[docs]    name = 'validate'

[docs]    class CLIConfig(scfg.Config):
        """
        Validate that a coco file conforms to the json schema, that assets
        exist, and potentially fix corrupted assets by removing them.
        """
[docs]        default = {
            'src': scfg.Value(['special:shapes8'], nargs='+', help='path to datasets', position=1),
            'schema': scfg.Value(True, help='If True check the json schema'),

            'missing': scfg.Value(True, help='If True check if all assets (e.g. images) exist'),
            'corrupted': scfg.Value(False, help='If True check the assets can be read'),

            'fix': scfg.Value(None, help=ub.paragraph(
                '''
                Code indicating strategy to attempt to fix the dataset.
                If None, do nothing.
                If remove, removes missing / corrupted images.
                Other strategies may be added in the future.

                This is a hueristic and does not always work. dst must be
                specified. And only one src dataset can be given.
                ''')),

            'dst': scfg.Value(None, help=ub.paragraph(
                '''
                Location to write a "fixed" coco file if a fix strategy is
                given.
                '''))

        }
[docs]        epilog = """
        Example Usage:
            kwcoco toydata --dst foo.json --key=special:shapes8
            kwcoco validate --src=foo.json --corrupted=True
        """

    @classmethod
[docs]    def main(cls, cmdline=True, **kw):
        """
        Example:
            >>> from kwcoco.cli.coco_validate import *  # NOQA
            >>> kw = {'src': 'special:shapes8'}
            >>> cmdline = False
            >>> cls = CocoValidateCLI
            >>> cls.main(cmdline, **kw)
        """
        import kwcoco
        config = cls.CLIConfig(kw, cmdline=cmdline)
        print('config = {}'.format(ub.repr2(dict(config), nl=1)))

        if config['src'] is None:
            raise Exception('must specify source: {}'.format(config['src']))

        if isinstance(config['src'], str):
            fpaths = [config['src']]
        else:
            fpaths = config['src']

        if config['dst']:
            if len(fpaths) != 1:
                raise Exception('can only specify 1 dataset in fix mode')

        fix_strat = set()
        if config['fix'] is not None:
            fix_strat = {c.lower() for c in config['fix'].split('+')}

        for fpath in ub.ProgIter(fpaths, desc='reading datasets', verbose=1):
            print('reading fpath = {!r}'.format(fpath))
            dset = kwcoco.CocoDataset.coerce(fpath)

            config_ = ub.dict_diff(config, {'src', 'dst', 'fix'})
            result = dset.validate(**config_)

            if 'missing' in result:
                if 'remove' in fix_strat:
                    missing = result['missing']
                    bad_gids = [t[2] for t in missing]
                    status = dset.remove_images(bad_gids, verbose=1)
                    print('status = {}'.format(ub.repr2(status, nl=1)))

            if 'corrupted' in result:
                if 'remove' in fix_strat:
                    corrupted = result['corrupted']
                    bad_gids = [t[2] for t in corrupted]
                    status = dset.remove_images(bad_gids, verbose=1)
                    print('status = {}'.format(ub.repr2(status, nl=1)))

            if config['dst']:
                if len(fpaths) != 1:
                    raise Exception('can only specify 1 dataset in fix mode')
                dset.dump(config['dst'], newlines=True)

            errors = result['errors']
            if errors:
                print('result = {}'.format(ub.repr2(result, nl=-1)))
                raise Exception('\n'.join(errors))


[docs]_CLI = CocoValidateCLI

if __name__ == '__main__':
    """
    CommandLine:
        python -m kwcoco.cli.coco_stats --src=special:shapes8
    """
    _CLI.main()