Source code for kwcoco.util.util_json

import copy
import numpy as np
import ubelt as ub
import json
from collections import OrderedDict
from collections import deque


[docs]def ensure_json_serializable(dict_, normalize_containers=False, verbose=0):
    """
    Attempt to convert common types (e.g. numpy) into something json complient

    Convert numpy and tuples into lists

    Args:
        normalize_containers (bool, default=False):
            if True, normalizes dict containers to be standard python
            structures.

    Example:
        >>> data = ub.ddict(lambda: int)
        >>> data['foo'] = ub.ddict(lambda: int)
        >>> data['bar'] = np.array([1, 2, 3])
        >>> data['foo']['a'] = 1
        >>> result = ensure_json_serializable(data, normalize_containers=True)
        >>> assert type(result) is dict
    """
    dict_ = copy.deepcopy(dict_)

    def _norm_container(c):
        if isinstance(c, dict):
            # Cast to a normal dictionary
            if isinstance(c, OrderedDict):
                if type(c) is not OrderedDict:
                    c = OrderedDict(c)
            else:
                if type(c) is not dict:
                    c = dict(c)
        return c

    # inplace convert any ndarrays to lists
    def _walk_json(data, prefix=[]):
        items = None
        if isinstance(data, list):
            items = enumerate(data)
        elif isinstance(data, tuple):
            items = enumerate(data)
        elif isinstance(data, dict):
            items = data.items()
        else:
            raise TypeError(type(data))

        root = prefix
        level = {}
        for key, value in items:
            level[key] = value

        # yield a dict so the user can choose to not walk down a path
        yield root, level

        for key, value in level.items():
            if isinstance(value, (dict, list, tuple)):
                path = prefix + [key]
                for _ in _walk_json(value, prefix=path):
                    yield _

    def _convert(dict_, root, key, new_value):
        d = dict_
        for k in root:
            d = d[k]
        d[key] = new_value

    def _flatmap(func, data):
        if isinstance(data, list):
            return [_flatmap(func, item) for item in data]
        else:
            return func(data)

    to_convert = []
    for root, level in ub.ProgIter(_walk_json(dict_), desc='walk json',
                                   verbose=verbose):
        for key, value in level.items():
            if isinstance(value, tuple):
                # Convert tuples on the fly so they become mutable
                new_value = list(value)
                _convert(dict_, root, key, new_value)
            elif isinstance(value, np.ndarray):
                new_value = value.tolist()
                if 0:
                    if len(value.shape) == 1:
                        if value.dtype.kind in {'i', 'u'}:
                            new_value = list(map(int, new_value))
                        elif value.dtype.kind in {'f'}:
                            new_value = list(map(float, new_value))
                        elif value.dtype.kind in {'c'}:
                            new_value = list(map(complex, new_value))
                        else:
                            pass
                    else:
                        if value.dtype.kind in {'i', 'u'}:
                            new_value = _flatmap(int, new_value)
                        elif value.dtype.kind in {'f'}:
                            new_value = _flatmap(float, new_value)
                        elif value.dtype.kind in {'c'}:
                            new_value = _flatmap(complex, new_value)
                        else:
                            pass
                            # raise TypeError(value.dtype)
                to_convert.append((root, key, new_value))
            elif isinstance(value, (np.int16, np.int32, np.int64,
                                    np.uint16, np.uint32, np.uint64)):
                new_value = int(value)
                to_convert.append((root, key, new_value))
            elif isinstance(value, (np.float32, np.float64)):
                new_value = float(value)
                to_convert.append((root, key, new_value))
            elif isinstance(value, (np.complex64, np.complex128)):
                new_value = complex(value)
                to_convert.append((root, key, new_value))
            elif hasattr(value, '__json__'):
                new_value = value.__json__()
                to_convert.append((root, key, new_value))
            elif normalize_containers:
                if isinstance(value, dict):
                    new_value = _norm_container(value)
                    to_convert.append((root, key, new_value))

    for root, key, new_value in to_convert:
        _convert(dict_, root, key, new_value)

    if normalize_containers:
        # normalize the outer layer
        dict_ = _norm_container(dict_)
    return dict_


[docs]def find_json_unserializable(data, quickcheck=False):
    """
    Recurse through json datastructure and find any component that
    causes a serialization error. Record the location of these errors
    in the datastructure as we recurse through the call tree.

    Args:
        data (object): data that should be json serializable
        quickcheck (bool): if True, check the entire datastructure assuming
            its ok before doing the python-based recursive logic.

    Returns:
        List[Dict]: list of "bad part" dictionaries containing items
            'value' - the value that caused the serialization error
            'loc' - which contains a list of key/indexes that can be used
                    to lookup the location of the unserializable value.
                    If the "loc" is a list, then it indicates a rare case where
                    a key in a dictionary is causing the serialization error.

    Example:
        >>> from kwcoco.util.util_json import *  # NOQA
        >>> part = ub.ddict(lambda: int)
        >>> part['foo'] = ub.ddict(lambda: int)
        >>> part['bar'] = np.array([1, 2, 3])
        >>> part['foo']['a'] = 1
        >>> # Create a dictionary with two unserializable parts
        >>> data = [1, 2, {'nest1': [2, part]}, {frozenset({'badkey'}): 3, 2: 4}]
        >>> parts = list(find_json_unserializable(data))
        >>> print('parts = {}'.format(ub.repr2(parts, nl=1)))
        >>> # Check expected structure of bad parts
        >>> assert len(parts) == 2
        >>> part = parts[0]
        >>> assert list(part['loc']) == [2, 'nest1', 1, 'bar']
        >>> # We can use the "loc" to find the bad value
        >>> for part in parts:
        >>>     # "loc" is a list of directions containing which keys/indexes
        >>>     # to traverse at each descent into the data structure.
        >>>     directions = part['loc']
        >>>     curr = data
        >>>     special_flag = False
        >>>     for key in directions:
        >>>         if isinstance(key, list):
        >>>             # special case for bad keys
        >>>             special_flag = True
        >>>             break
        >>>         else:
        >>>             # normal case for bad values
        >>>             curr = curr[key]
        >>>     if special_flag:
        >>>         assert part['data'] in curr.keys()
        >>>         assert part['data'] is key[1]
        >>>     else:
        >>>         assert part['data'] is curr
    """
    needs_check = True
    is_serializable = None
    if quickcheck:
        try:
            # Might be a more efficient way to do this check. We duplicate a lot of
            # work by doing the check for unserializable data this way.
            json.dumps(data)
        except Exception:
            # If there is unserializable data, find out where it is.
            is_serializable = False
        else:
            is_serializable = True
            needs_check = False

    if needs_check:
        if isinstance(data, list):
            for idx, item in enumerate(data):
                subparts_item = find_json_unserializable(item, quickcheck=False)
                for sub in subparts_item:
                    sub['loc'].appendleft(idx)
                    yield sub
        elif isinstance(data, dict):
            for key, value in data.items():
                subparts_key = find_json_unserializable(key, quickcheck=False)
                for sub in subparts_key:
                    # Special case where a dict key is the error value
                    # Purposely make loc non-hashable so its not confused with
                    # an address. All we can know in this case is that they key
                    # is at this level, there is no concept of where.
                    sub['loc'].appendleft(['.keys', key])
                    yield sub

                subparts_val = find_json_unserializable(value, quickcheck=False)
                for sub in subparts_val:
                    sub['loc'].appendleft(key)
                    yield sub
        else:
            if is_serializable is None:
                try:
                    # Might be a more efficient way to do this check. We duplicate a lot of
                    # work by doing the check for unserializable data this way.
                    json.dumps(data)
                except Exception:
                    is_serializable = False
                else:
                    is_serializable = True

            if is_serializable is False:
                yield {'loc': deque(), 'data': data}