Source code for kwcoco.util.util_special_json

"""
Special non-general json functions
"""
# import ubelt as ub
from packaging.version import parse as Version
import os
import json as pjson
from io import StringIO
from types import ModuleType
# The ujson library is faster than Python's json, but the API has some
# limitations and requires a minimum version. Currently we only use it to read,
# we have to wait for https://github.com/ultrajson/ultrajson/pull/518 to land
# before we use it to write.
try:
    import ujson
except ImportError:
    ujson = None

KWCOCO_USE_UJSON = bool(os.environ.get('KWCOCO_USE_UJSON'))

if ujson is not None and Version(ujson.__version__) >= Version('5.2.0') and KWCOCO_USE_UJSON:
    json_r: ModuleType = ujson
    json_w: ModuleType = pjson
else:
    json_r: ModuleType = pjson
    json_w: ModuleType = pjson


[docs] def _json_dumps(data, indent=None): try: text = json_w.dumps(data, indent=indent, ensure_ascii=False) except Exception: if indent is not None: if isinstance(indent, str): assert indent.count(' ') == len(indent), 'must be all spaces, got {!r}'.format(indent) indent = len(indent) if indent is None: indent = 0 fp = StringIO() json_w.dump(data, fp, indent=indent, ensure_ascii=False) fp.seek(0) text = fp.read() return text
[docs] def _json_lines_dumps(key, value, indent): value_lines = [_json_dumps(v) for v in value] if value_lines: value_body = (',\n' + indent).join(value_lines) value_repr = '[\n' + indent + value_body + '\n]' else: value_repr = '[]' item_repr = '{}: {}'.format(_json_dumps(key), value_repr) return item_repr
[docs] def _special_kwcoco_pretty_dumps_orig(data, indent=None): """ The old way of doing "pretty" dumping, except it isn't that pretty. See Also: Tried to do a "principled" lark version, but this this way is faster ~/code/kwcoco/dev/devcheck/json_dumps_experiments.py Ignore: import kwcoco dset = kwcoco.CocoDataset.demo('vidshapes8-msi-multisensor') dset.clear_annotations() data = dset.dataset print(_special_kwcoco_pretty_dumps_orig(data, indent=' ')) """ SPEC_KEYS = [ 'info', 'licenses', 'categories', 'keypoint_categories', # support only partially implemented 'videos', 'images', 'annotations', ] if indent is None: indent = '' if isinstance(indent, int): indent = ' ' * indent dict_lines = [] main_keys = SPEC_KEYS other_keys = sorted(set(data.keys()) - set(main_keys)) # TODO: optimize efficiency # TODO: general "flexible json" package that can read to/from # zipfiles, support ujson or pjson backends, has pretty newline # properties. This would abstrat much of the logic away from this # module and be generally useful when dealing with other larger # json files. for key in main_keys: if key not in data: continue # We know each main entry is a list, so make it such that # Each entry gets its own line value = data[key] if key == 'images': # Except image, where every auxiliary item also gets a line value_lines = [] for img in value: asset_key = None if 'auxiliary' in img: asset_key = 'auxiliary' elif 'assets' in img: asset_key = 'assets' if asset_key is not None: topimg = img.copy() aux_items = topimg.pop(asset_key) aux_items_repr = _json_lines_dumps(asset_key, aux_items, indent + indent) topimg_repr = _json_dumps(topimg) if len(topimg) == 0: v2 = '{' + aux_items_repr + '}' else: v2 = topimg_repr[:-1] + ', ' + aux_items_repr + '}' else: v2 = _json_dumps(img) value_lines.append(v2) else: value_lines = [_json_dumps(v) for v in value] if value_lines: value_body = (',\n' + indent).join(value_lines) value_repr = '[\n' + indent + value_body + '\n]' else: value_repr = '[]' item_repr = '{}: {}'.format(_json_dumps(key), value_repr) dict_lines.append(item_repr) for key in other_keys: # Dont assume anything about other data value = data.get(key, []) value_repr = _json_dumps(value) item_repr = '{}: {}'.format(_json_dumps(key), value_repr) dict_lines.append(item_repr) text = ''.join(['{\n', ',\n'.join(dict_lines), '\n}']) return text