Source code for kwcoco.util.util_special_json

"""
Special non-general json functions
"""
# import ubelt as ub
from packaging.version import parse as Version
import os
import json as pjson
from io import StringIO
from types import ModuleType
# The ujson library is faster than Python's json, but the API has some
# limitations and requires a minimum version. Currently we only use it to read,
# we have to wait for https://github.com/ultrajson/ultrajson/pull/518 to land
# before we use it to write.
try:
    import ujson
except ImportError:
    ujson = None

KWCOCO_USE_UJSON = bool(os.environ.get('KWCOCO_USE_UJSON'))

if ujson is not None and Version(ujson.__version__) >= Version('5.2.0') and KWCOCO_USE_UJSON:
    json_r: ModuleType = ujson
    json_w: ModuleType = pjson
else:
    json_r: ModuleType = pjson
    json_w: ModuleType = pjson



[docs]
def _json_dumps(data, indent=None):
    try:
        text = json_w.dumps(data, indent=indent, ensure_ascii=False)
    except Exception:
        if indent is not None:
            if isinstance(indent, str):
                assert indent.count(' ') == len(indent), 'must be all spaces, got {!r}'.format(indent)
                indent = len(indent)
        if indent is None:
            indent = 0
        fp = StringIO()
        json_w.dump(data, fp, indent=indent, ensure_ascii=False)
        fp.seek(0)
        text = fp.read()
    return text




[docs]
def _json_lines_dumps(key, value, indent):
    value_lines = [_json_dumps(v) for v in value]
    if value_lines:
        value_body = (',\n' + indent).join(value_lines)
        value_repr = '[\n' + indent + value_body + '\n]'
    else:
        value_repr = '[]'
    item_repr = '{}: {}'.format(_json_dumps(key), value_repr)
    return item_repr




[docs]
def _special_kwcoco_pretty_dumps_orig(data, indent=None):
    """
    The old way of doing "pretty" dumping, except it isn't that pretty.

    See Also:
        Tried to do a "principled" lark version, but this this way is faster
        ~/code/kwcoco/dev/devcheck/json_dumps_experiments.py

    Ignore:
        import kwcoco
        dset = kwcoco.CocoDataset.demo('vidshapes8-msi-multisensor')
        dset.clear_annotations()
        data = dset.dataset
        print(_special_kwcoco_pretty_dumps_orig(data, indent='    '))
    """
    SPEC_KEYS = [
        'info',
        'licenses',
        'categories',
        'keypoint_categories',  # support only partially implemented
        'videos',
        'images',
        'annotations',
    ]
    if indent is None:
        indent = ''
    if isinstance(indent, int):
        indent = ' ' * indent
    dict_lines = []
    main_keys = SPEC_KEYS
    other_keys = sorted(set(data.keys()) - set(main_keys))
    # TODO: optimize efficiency
    # TODO: general "flexible json" package that can read to/from
    # zipfiles, support ujson or pjson backends, has pretty newline
    # properties. This would abstrat much of the logic away from this
    # module and be generally useful when dealing with other larger
    # json files.
    for key in main_keys:
        if key not in data:
            continue
        # We know each main entry is a list, so make it such that
        # Each entry gets its own line
        value = data[key]
        if key == 'images':
            # Except image, where every auxiliary item also gets a line
            value_lines = []
            for img in value:
                asset_key = None
                if 'auxiliary' in img:
                    asset_key = 'auxiliary'
                elif 'assets' in img:
                    asset_key = 'assets'
                if asset_key is not None:
                    topimg = img.copy()
                    aux_items = topimg.pop(asset_key)
                    aux_items_repr = _json_lines_dumps(asset_key, aux_items, indent + indent)
                    topimg_repr = _json_dumps(topimg)
                    if len(topimg) == 0:
                        v2 = '{' + aux_items_repr + '}'
                    else:
                        v2 = topimg_repr[:-1] + ', ' + aux_items_repr + '}'
                else:
                    v2 = _json_dumps(img)
                value_lines.append(v2)
        else:
            value_lines = [_json_dumps(v) for v in value]
        if value_lines:
            value_body = (',\n' + indent).join(value_lines)
            value_repr = '[\n' + indent + value_body + '\n]'
        else:
            value_repr = '[]'
        item_repr = '{}: {}'.format(_json_dumps(key), value_repr)
        dict_lines.append(item_repr)

    for key in other_keys:
        # Dont assume anything about other data
        value = data.get(key, [])
        value_repr = _json_dumps(value)
        item_repr = '{}: {}'.format(_json_dumps(key), value_repr)
        dict_lines.append(item_repr)
    text = ''.join(['{\n', ',\n'.join(dict_lines), '\n}'])
    return text