Source code for kwcoco.coco_schema

"""
The place where the formal KWCOCO schema is defined.

CommandLine:
    python -m kwcoco.coco_schema
    xdoctest -m kwcoco.coco_schema __doc__

TODO:
    - [ ] Perhaps use `voluptuous <https://pypi.org/project/voluptuous/>`_ instead?

Example:
    >>> import kwcoco
    >>> from kwcoco.coco_schema import COCO_SCHEMA
    >>> import jsonschema
    >>> dset = kwcoco.CocoDataset.demo('shapes1')
    >>> # print('dset.dataset = {}'.format(ub.urepr(dset.dataset, nl=2)))
    >>> COCO_SCHEMA.validate(dset.dataset)

    >>> try:
    >>>     jsonschema.validate(dset.dataset, schema=COCO_SCHEMA)
    >>> except jsonschema.exceptions.ValidationError as ex:
    >>>     vali_ex = ex
    >>>     print('ex = {!r}'.format(ex))
    >>>     raise
    >>> except jsonschema.exceptions.SchemaError as ex:
    >>>     print('ex = {!r}'.format(ex))
    >>>     schema_ex = ex
    >>>     print('schema_ex.instance = {}'.format(ub.urepr(schema_ex.instance, nl=-1)))
    >>>     raise

    >>> # Test the multispectral image defintino
    >>> import copy
    >>> dataset = dset.copy().dataset
    >>> img = dataset['images'][0]
    >>> img.pop('file_name')
    >>> import pytest
    >>> with pytest.raises(jsonschema.ValidationError):
    >>>     COCO_SCHEMA.validate(dataset)
    >>> import pytest
    >>> img['auxiliary'] = [{'file_name': 'foobar'}]
    >>> with pytest.raises(jsonschema.ValidationError):
    >>>     COCO_SCHEMA.validate(dataset)
    >>> img['name'] = 'asset-only images must have a name'
    >>> COCO_SCHEMA.validate(dataset)
"""

from kwcoco.util.jsonschema_elements import SchemaElements
from collections import OrderedDict
import ubelt as ub



[docs]
def deprecated(*args):
    return ANY(description='deprecated')




[docs]
def TUPLE(*args, **kw):
    if args and ub.allsame(args):
        return ARRAY(TYPE=ub.peek(args), numItems=len(args), **kw)
    else:
        return ARRAY(TYPE=ANY, numItems=len(args), **kw)


elem = SchemaElements()
ALLOF = elem.ALLOF
ANY = elem.ANY
ANYOF = elem.ANYOF
ARRAY = elem.ARRAY
BOOLEAN = elem.BOOLEAN
INTEGER = elem.INTEGER
NOT = elem.NOT
NULL = elem.NULL
NUMBER = elem.NUMBER
OBJECT = elem.OBJECT
ONEOF = elem.ONEOF
STRING = elem.STRING


UUID = STRING
PATH = STRING

KWCOCO_KEYPOINT = OBJECT(
    PROPERTIES={
        'xy': TUPLE(NUMBER, NUMBER, description='<x1, y1> in pixels'),
        'visible': INTEGER(description='choice(0, 1, 2)'),
        'keypoint_category_id': INTEGER,
        'keypoint_category': STRING(description='only to be used as a hint')
    },
    title='KWCOCO_KEYPOINT',
    descripton='A new-style point',
)

KWCOCO_POLYGON = OBJECT(
    PROPERTIES={
        'exterior': ARRAY(
            ARRAY(NUMBER, numItems=2),
            description='counter-clockwise xy exterior points'),
        'interiors': ARRAY(
            ARRAY(
                ARRAY(NUMBER, numItems=2),
                description='clockwise xy hole'),
        )
    },
    title='KWCOCO_POLYGON',
    description='A new-style polygon format that supports holes',
)


ORIG_COCO_KEYPOINTS = ARRAY(
    INTEGER,
    description='An old-style set of keypoints (x1,y1,v1,...,xk,yk,vk)',
    title='MSCOCO_KEYPOINTS'
)
KWCOCO_KEYPOINTS = ARRAY(KWCOCO_KEYPOINT)
KEYPOINTS = ANYOF(ORIG_COCO_KEYPOINTS, KWCOCO_KEYPOINTS)


MSCOCO_POLYGON = ARRAY(
    TYPE=NUMBER,
    description='an old-style polygon [x1,y1,v1,...,xk,yk,vk]',
    title='MSCOCO_POLYGON',
)
MSCOCO_MULTIPOLYGON = ARRAY(MSCOCO_POLYGON)

POLYGON = ANYOF(
    KWCOCO_POLYGON,
    ARRAY(KWCOCO_POLYGON),
    MSCOCO_POLYGON,
    MSCOCO_MULTIPOLYGON,
)

RUN_LENGTH_ENCODING = STRING(description='A run-length-encoding mask format read by pycocotools')

BBOX = ARRAY(
    TYPE=NUMBER,
    numItems=4,
    description='[top-left x, top-left-y, width, height] in image-space pixels',
    title='BBOX',
)

### ------------------------


SEGMENTATION = ANYOF(POLYGON, RUN_LENGTH_ENCODING)

# Names cannot contain certain special characters
NAME = STRING(pattern='[^/]+')


CATEGORY = OBJECT({
    'id': INTEGER(description='A unique internal category id'),
    'name': NAME(description='A unique external category name or identifier'),

    'alias': ARRAY(NAME, description='A list of alternate names that should be resolved to this category'),

    'supercategory': ANYOF(NAME(description='A coarser category name'), NULL),
    'parents': ARRAY(NAME, description='Used for multiple inheritance'),

    # Legacy
    'keypoints': deprecated(ARRAY(STRING)),
    'skeleton': deprecated(ARRAY(TUPLE(INTEGER, INTEGER))),
},
    required=['id', 'name'],
    description='High level information about an annotation category',
    title='CATEGORY')

KEYPOINT_CATEGORY = OBJECT(
    PROPERTIES={
        'name': NAME(description='The name of the keypoint category'),
        'id': INTEGER,
        'supercategory': ANYOF(NAME, NULL),
        # TODO: should have this name changed to reflect the fact it is horizontal.
        # TODO: should add a variant of this for vertical or other transforms.
        'reflection_id': ANYOF(INTEGER, NULL)(
            description='The keypoint category this should change to if the image is horizontally flipped'),
    },
    required=['id', 'name'],
    description='High level information about an annotation category',
    title='KEYPOINT_CATEGORY',
)

# Extension
VIDEO = OBJECT(
    PROPERTIES={
        'id': INTEGER(description='An internal video identifier'),
        'name': NAME(description='A unique name for this video'),
        'caption': STRING(description='A video level text caption'),
        'resolution': (NUMBER | STRING | NULL)(description='a unit representing the size of a pixel in video space'),
        },
    required=['id', 'name'],
    description='High level information about a group of temporally ordered images',
    title='VIDEO',
)

CHANNELS = STRING(
    pattern='[^/]*',  # a simple check, full pattern is a context free grammar
    description=(
        'A human readable channel name. '
        'Must be compatible with kwcoco.ChannelSpec'
    ),
    title='CHANNEL_SPEC',
)


ASSET = OBJECT(
    PROPERTIES={
        'file_name': PATH,
        'channels': CHANNELS,
        'id': INTEGER(description='The id of the asset (option for now, but will be required in the future when assets are moved to their own table)'),
        'image_id': INTEGER(description='The image id this asset is associated with (option for now, but will be required in the future)'),
        'width': INTEGER(description='The width in asset-space pixels'),
        'height': INTEGER(description='The height in asset-space pixels'),
    },
    required=['file_name'],
    description='Information about a single file belonging to an image',
    title='ASSET',
)

IMAGE = OBJECT(OrderedDict((
    ('id', INTEGER(description='a unique internal image identifier')),
    ('file_name', PATH(description=ub.paragraph(
        '''
        A relative or absolute path to the main image file. If this file_name
        is unspecified, then a name and auxiliary items or assets must be
        specified. Likewise this should be null if assets are used.
        ''')) | NULL),

    ('name', NAME(
        description=ub.paragraph(
            '''
            A unique name for the image.
            If unspecified the file_name should be used as the default value
            for the name property. Required if assets / auxiliary are
            specified.
            ''')) | NULL),

    ('width', INTEGER(description='The width of the image in image space pixels')),
    ('height', INTEGER(description='The height of the image in image space pixels')),

    # Extension
    ('video_id', INTEGER(description='The video this image belongs to')),

    ('timestamp', STRING(description='An ISO-8601 timestamp') | NUMBER(description='A UNIX timestamp')),

    ('frame_index', INTEGER(description='Used to temporally order the images in a video')),

    ('channels', CHANNELS | NULL),

    ('resolution', (NUMBER | STRING | NULL)(description='a unit representing the size of a pixel in image space')),

    ('auxiliary', ARRAY(TYPE=ASSET, description='This will be deprecated for assets in the future')),

    ('assets', ARRAY(TYPE=ASSET, description='A list of assets belonging to this image, used when image channels are split across multiple files')),

)),
    # required=['id', 'file_name']
    anyOf=[
        {'required': ['id', 'file_name']},
        {'required': ['id', 'name', 'auxiliary']},
        {'required': ['id', 'name', 'assets']},
    ],
    description=(
        'High level information about a image file or a collection of '
        'image files corresponding to a single point in (or small interval of) '
        'time'
    ),
    title='IMAGE',
)

TRACK = OBJECT(OrderedDict((
    ('id', INTEGER(description='A unique internal id for this track')),
    ('name', NAME(description='A unique external name or identifier')),
)))

ANNOTATION = OBJECT(OrderedDict((
    ('id', INTEGER(description='A unique internal id for this annotation')),
    ('image_id', INTEGER(description='The image id this annotation belongs to')),

    ('bbox', BBOX),

    ('category_id', INTEGER(description='The category id of this annotation')),
    ('track_id', ANYOF(INTEGER, STRING, UUID)(
        description='An identifier used to group annotations belonging to the same object over multiple frames in a video')),

    ('segmentation', SEGMENTATION(description='A polygon or mask specifying the pixels in this annotation in image-space')),
    ('keypoints', KEYPOINTS(description='A set of categorized points belonging to this annotation in image space')),

    ('prob', ARRAY(NUMBER, description=ub.paragraph(
        '''
        This needs to be in the same order as categories.
        The probability order currently needs to be known a-priori,
        typically in *order* of the classes, but its hard to always
        keep that consistent.
        This SPEC is subject to change in the future.
        '''))),

    ('score', NUMBER(description='Typically assigned to predicted annotations')),
    ('weight', NUMBER(description='Typically given to truth annotations to indicate quality.')),

    ('iscrowd', ANYOF(INTEGER, BOOLEAN)(description=(
        'A legacy mscoco field used to indicate if an annotation contains multiple objects'))),
    ('caption', STRING(description='An annotation-level text caption')),

    ('role', (STRING | NULL)(
        description=ub.paragraph(
            '''
            A optional application specific key used to differentiate between
            annotations used for different purposes: e.g. truth / prediction /
            confusion.
            '''))),
)),
    required=['id', 'image_id'],
    description='Metadata about some semantic attribute of an image.',
    title='ANNOTATION',
)


COCO_SCHEMA = OBJECT(
    PROPERTIES=ub.odict([
        ('info', ANY),
        ('licenses', ANY),

        ('categories', ARRAY(CATEGORY)),

        ('keypoint_categories', ARRAY(KEYPOINT_CATEGORY)),

        ('videos', ARRAY(VIDEO)),

        ('tracks', ARRAY(TRACK)),

        ('images', ARRAY(IMAGE)),

        ('annotations', ARRAY(ANNOTATION)),
    ]),
    required=[],
    description='The formal kwcoco schema',
    title='KWCOCO_SCHEMA',
)


if ub.argflag('--debug') or ub.argflag('--validate'):
    COCO_SCHEMA.validate()


if __name__ == '__main__':
    """
    CommandLine:
        KWCOCO_MODPATH=$(xdev modpath kwcoco)
        python $KWCOCO_MODPATH/coco_schema.py --validate
        python $KWCOCO_MODPATH/coco_schema.py > ~/code/kwcoco/kwcoco/coco_schema.json
        jq .properties.images $KWCOCO_MODPATH/coco_schema.json
        jq .properties.categories $KWCOCO_MODPATH/coco_schema.json
        jq . $KWCOCO_MODPATH/coco_schema.json
    """
    # import json
    print(ub.urepr(COCO_SCHEMA, nl=-1, trailsep=False, sort=False).replace("'", '"'))
    # print(json.dumps(COCO_SCHEMA, indent='    '))
    # print('COCO_SCHEMA = {}'.format(ub.urepr(COCO_SCHEMA, nl=-1)))