Source code for kwcoco.cli.coco_reroot

#!/usr/bin/env python
import ubelt as ub
import scriptconfig as scfg


[docs] class CocoRerootCLI: name = 'reroot'
[docs] class CocoRerootConfig(scfg.DataConfig): """ Reroot image paths onto a new image root. Modify the root of a coco dataset such to either make paths relative to a new root or make paths absolute. TODO: - [ ] Evaluate that all tests cases work """ __epilog__ = """ Example Usage: kwcoco reroot --help kwcoco reroot --src=special:shapes8 --dst rerooted.json kwcoco reroot --src=special:shapes8 --new_prefix=foo --check=True --dst rerooted.json """ src = scfg.Value(None, help=( 'Input coco dataset path'), position=1) dst = scfg.Value(None, help=( 'Output coco dataset path'), position=2) new_prefix = scfg.Value(None, help=( 'New prefix to insert before every image file name.')) old_prefix = scfg.Value(None, help=( 'Old prefix to remove from the start of every image file name.')) absolute = scfg.Value(True, help=( 'If False, the output file uses relative paths')) check = scfg.Value(True, help=( 'If True, checks that all data exists')) autofix = scfg.Value(False, isflag=True, help=( ub.paragraph( ''' If True, attempts an automatic fix. This assumes that paths are prefixed with an absolute path belonging to a different machine, and it attempts to strip off a minimal prefix to find relative paths that do exist. '''))) compress = scfg.Value('auto', help='if True writes results with compression. DEPRECATED. Just use a .zip suffix.') inplace = scfg.Value(False, isflag=True, help=( 'if True and dst is unspecified then the output will overwrite the input'))
CLIConfig = CocoRerootConfig
[docs] @classmethod def main(cls, cmdline=True, **kw): r""" Example: >>> # xdoctest: +SKIP >>> kw = {'src': 'special:shapes8'} >>> cmdline = False >>> cls = CocoRerootCLI >>> cls.main(cmdline, **kw) Ignore: python ~/code/kwcoco/kwcoco/cli/coco_reroot.py \ --src=$HOME/code/bioharn/fast_test/deep_training/training_truth.json \ --dst=$HOME/code/bioharn/fast_test/deep_training/training_truth2.json \ --new_prefix=/home/joncrall/code/bioharn/fast_test/training_data \ --old_prefix=/run/media/matt/Storage/TEST/training_data python ~/code/kwcoco/kwcoco/cli/coco_reroot.py \ --src=$HOME/code/bioharn/fast_test/deep_training/validation_truth.json \ --dst=$HOME/code/bioharn/fast_test/deep_training/validation_truth2.json \ --new_prefix=/home/joncrall/code/bioharn/fast_test/training_data \ --old_prefix=/run/media/matt/Storage/TEST/training_data cmdline = ''' --src=$HOME/code/bioharn/fast_test/deep_training/training_truth.json --dst=$HOME/code/bioharn/fast_test/deep_training/training_truth2.json --new_prefix=/home/joncrall/code/bioharn/fast_test/training_data --old_prefix=/run/media/matt/Storage/TEST/training_data ''' /run/media/matt/Storage/TEST/training_data --check=True --dst rerooted.json """ import kwcoco from os.path import dirname, abspath config = cls.CLIConfig.cli(data=kw, cmdline=cmdline) try: import rich except ImportError: rich_print = print else: rich_print = rich.print rich_print('config = {}'.format(ub.urepr(config, nl=1))) if config['src'] is None: raise Exception('must specify source: {}'.format(config['src'])) if config['dst'] is None: if config['inplace']: config['dst'] = config['src'] else: raise ValueError('must specify dst: {}'.format(config['dst'])) dset = kwcoco.CocoDataset.coerce(config['src']) new_root = abspath(dirname(config['dst'])) if config['absolute']: new_root = abspath(new_root) if config['autofix']: autfixer = find_reroot_autofix(dset) if autfixer is not None: print('Found autfixer = {}'.format(ub.urepr(autfixer, nl=1))) config['new_prefix'] = autfixer['new_prefix'] config['old_prefix'] = autfixer['old_prefix'] dset.reroot( new_root=new_root, new_prefix=config['new_prefix'], old_prefix=config['old_prefix'], absolute=config['absolute'], check=config['check'] ) dset.fpath = config['dst'] print('dump dset.fpath = {!r}'.format(dset.fpath)) dumpkw = { 'newlines': True, 'compress': config['compress'], } dset.dump(dset.fpath, **dumpkw)
[docs] def find_reroot_autofix(dset): import os # Given a set of missing images, is there a way we can autofix them? missing_tups = dset.missing_images() missing_gpaths = [t[1] for t in missing_tups] chosen = None if len(missing_gpaths) == 0: print('All paths look like they exist') return None print(f'Found {len(missing_tups)} missing images') if len(missing_gpaths) > 0: bundle_dpath = ub.Path(dset.bundle_dpath) print('bundle_dpath = {}'.format(ub.urepr(bundle_dpath, nl=1))) first = ub.Path(missing_gpaths[0]) print('first = {}'.format(ub.urepr(first, nl=1))) first_parts = first.parts print('first_parts = {}'.format(ub.urepr(first_parts, nl=1))) candidates = [] for i in range(len(first_parts)): cand_path = bundle_dpath / ub.Path(*first_parts[i:]) if cand_path.exists(): candidates.append({ 'old_prefix': os.fspath(ub.Path(*first_parts[:i])) + '/', 'new_prefix': '', }) if len(candidates) == 0: raise RuntimeError('Could not determine a valid autofix') # Check that the fix fixes everything or dont do it. for candidate in candidates: try: _check_candidates( candidate, bundle_dpath, missing_gpaths, fastfail=True) except FileNotFoundError: ... else: chosen = candidate break if not chosen: import rich rich.print('[red]ERROR: No candidate fixed all paths') # Give the user more info for candidate in candidates: errors = _check_candidates(candidate, bundle_dpath, missing_gpaths, fastfail=False) rich.print('candidate prefix replacement = {}'.format(ub.urepr(candidate, nl=1))) error1 = errors[0] print('error1 = {}'.format(ub.urepr(error1, nl=1))) rich.print(f'[red] Had {len(errors)} / {len(missing_gpaths)} errors') raise RuntimeError('No candidate fixed all paths') return chosen
[docs] def _check_candidates(candidate, bundle_dpath, missing_gpaths, fastfail=True): import os errors = [] old_pref = os.fspath(candidate['old_prefix']) new_pref = os.fspath(candidate['new_prefix']) for gpath in missing_gpaths: new_gpath = bundle_dpath / ub.Path(os.fspath(gpath).replace(old_pref, new_pref)) if not new_gpath.exists(): errors.append(gpath) if fastfail: raise FileNotFoundError return errors
_CLI = CocoRerootCLI if __name__ == '__main__': _CLI.main()