Source code for kwcoco.data.grab_domainnet

"""
References:
    http://ai.bu.edu/M3SDA/#dataset
"""


[docs]def grab_domain_net(): """ TODO: - [ ] Allow the user to specify the download directory, generalize this pattern across the data grab scripts. """ import zipfile import ubelt as ub import kwcoco import kwimage import os infos = { 'clipart_images': { 'url': 'http://csr.bu.edu/ftp/visda/2019/multi-source/groundtruth/clipart.zip', 'sha512': '3bcfb9ec1b4061e8d5b0b887d4ebd4a708732787fc563c6dfc2d' }, 'infograph_images': { 'url': 'http://csr.bu.edu/ftp/visda/2019/multi-source/infograph.zip', 'sha512': '47841f0d1b8606e4b02d508b250484d54f5cf04ef6c4875c6c5a39c', }, 'painting_images': { 'url': 'http://csr.bu.edu/ftp/visda/2019/multi-source/groundtruth/painting.zip', 'sha512': '4c98b02563075948668a298c488660fda1d1a7ed85fd69caa7985fba', }, 'quickdraw_images': { 'url': 'http://csr.bu.edu/ftp/visda/2019/multi-source/quickdraw.zip', 'sha512': '127cce1dd57fc99992f8614de77d03ae8ed6af242973f7a013ec0a', }, 'real_images': { 'url': 'http://csr.bu.edu/ftp/visda/2019/multi-source/real.zip', 'sha512': '751713d4592d1278b50bf69787988cfae280cabdac80ee34ec4016e', }, 'sketch_images': { 'url': 'http://csr.bu.edu/ftp/visda/2019/multi-source/sketch.zip', 'sha512': '369b8b6d78ac61bfcc85f660d878ba5fc701524b82bbc2eb65ca9a', }, 'clipart_train': { 'url': 'http://csr.bu.edu/ftp/visda/2019/multi-source/domainnet/txt/clipart_train.txt', 'sha512': '400985ce2a0878df2d8e54f1996c4fc253e577c1f91136304e553b' }, 'clipart_test': { 'url': 'http://csr.bu.edu/ftp/visda/2019/multi-source/domainnet/txt/clipart_test.txt', 'sha512': '4d34a4a540d8b139499581875e2d18bbedd37c347233523d66beaf', }, 'infograph_train': { 'url': 'http://csr.bu.edu/ftp/visda/2019/multi-source/domainnet/txt/infograph_train.txt', 'sha512': 'd9ed4d86e2ea20b44315699964b6b7dc09442f0539300444d9329d' }, 'infograph_test': { 'url': 'http://csr.bu.edu/ftp/visda/2019/multi-source/domainnet/txt/infograph_test.txt', 'sha512': '700f2b5deaad3923b0b8b85aeb694b73f24b13255cf48db3e770eaf', }, 'quickdraw_train': { 'url': 'http://csr.bu.edu/ftp/visda/2019/multi-source/domainnet/txt/quickdraw_train.txt', 'sha512': '7410e8a1debd769e0412725a0fcc646d83055d297b5f44a92f50308' }, 'quickdraw_test': { 'url': 'http://csr.bu.edu/ftp/visda/2019/multi-source/domainnet/txt/quickdraw_test.txt', 'sha512': '07a97e84d723f58dea67683ae5ddf013ebc5d33b76deca9a5d34f72', }, 'real_train': { 'url': 'http://csr.bu.edu/ftp/visda/2019/multi-source/domainnet/txt/real_train.txt', 'sha512': 'a5a510fec614018546510d55c378bca008fcfd51062f6414583639e' }, 'real_test': { 'url': 'http://csr.bu.edu/ftp/visda/2019/multi-source/domainnet/txt/real_test.txt', 'sha512': '9586235e335340f065de720f99332bc906aa0ab147006a1a7ea4925', }, 'sketch_train': { 'url': 'http://csr.bu.edu/ftp/visda/2019/multi-source/domainnet/txt/sketch_train.txt', 'sha512': 'ee92102f2b98f11765e6a97afbab15c60f2bac6e30fa7e1bc5a7db' }, 'sketch_test': { 'url': 'http://csr.bu.edu/ftp/visda/2019/multi-source/domainnet/txt/sketch_test.txt', 'sha512': '7384fc06670979911d7e1e9a34ba8a6cd20e9e77d6bc3536c739ff4', }, } dpath = ub.ensure_app_cache_dir('kwcoco', 'domain_net') # Assign a coco filepath to each dataset split for key, info in infos.items(): if key.endswith(('_train', '_test')): info['coco_fpath'] = os.path.join(dpath, key + '.kwcoco.json') stamp = ub.CacheStamp('domain_stamp', dpath=dpath, depends=['v001']) if stamp.expired(): errors = [] # TODO: Multi-file download manager with parallel jobs # TODO: Don't redownload if the data was already extracted for key, info in infos.items(): try: zip_fpath = ub.grabdata( info['url'], dpath=dpath, hash_prefix=info.get('sha512', 'x' * 64)) info['fpath'] = zip_fpath except Exception as ex: print('ex = {!r}'.format(ex)) errors.append(repr(ex)) if errors: raise Exception('download errors') # Extact images from archive files for key, info in infos.items(): if key.endswith('_images'): print('extract {} images'.format(key)) file = open(info['fpath'], 'rb') zfile = zipfile.ZipFile(file) zfile.extractall(path=dpath) # Construct the kwcoco manifests for key, info in infos.items(): if key.endswith(('_train', '_test')): coco_dset = kwcoco.CocoDataset() coco_dset.fpath = info['coco_fpath'] with open(info['fpath'], 'r') as file: lines = file.read().split('\n') for line in ub.ProgIter(lines, desc='parse ' + key): if line: print('line = {!r}'.format(line)) path, num = line.split(' ') gpath = os.path.join(dpath, path) shape = kwimage.load_image_shape(gpath) h, w = shape[0:2] domain, catname, image_name = path.split('/') gid = coco_dset.add_image(file_name=path, height=h, width=w, name=image_name) cid = int(num) cid = coco_dset.ensure_category(name=catname, id=cid) coco_dset.add_annotation(image_id=gid, category_id=cid, bbox=[0, 0, w, h]) # Mark the domain in an non-standard field coco_dset.index.imgs['domain'] = domain coco_dset.validate() coco_dset.dump(coco_dset.fpath, newlines=True) stamp.renew() # Read and return each domain-net dataset dsets = [] for key, info in infos.items(): if 'coco_fpath' in info: dset = kwcoco.CocoDataset(info['coco_fpath']) dsets.append(dset) return dsets
if __name__ == '__main__': """ CommandLine: python ~/code/kwcoco/kwcoco/data/grab_domainnet.py """ grab_domain_net()