Source code for kwcoco.coco_schema

"""
The place where the formal KWCOCO schema is defined.

CommandLine:
    python -m kwcoco.coco_schema
    xdoctest -m kwcoco.coco_schema __doc__

TODO:
    - [ ] Perhaps use `voluptuous <https://pypi.org/project/voluptuous/>`_ instead?

Example:
    >>> import kwcoco
    >>> from kwcoco.coco_schema import COCO_SCHEMA
    >>> import jsonschema
    >>> dset = kwcoco.CocoDataset.demo('shapes1')
    >>> # print('dset.dataset = {}'.format(ub.urepr(dset.dataset, nl=2)))
    >>> COCO_SCHEMA.validate(dset.dataset)

    >>> try:
    >>>     jsonschema.validate(dset.dataset, schema=COCO_SCHEMA)
    >>> except jsonschema.exceptions.ValidationError as ex:
    >>>     vali_ex = ex
    >>>     print('ex = {!r}'.format(ex))
    >>>     raise
    >>> except jsonschema.exceptions.SchemaError as ex:
    >>>     print('ex = {!r}'.format(ex))
    >>>     schema_ex = ex
    >>>     print('schema_ex.instance = {}'.format(ub.urepr(schema_ex.instance, nl=-1)))
    >>>     raise

    >>> # Test the multispectral image defintino
    >>> import copy
    >>> dataset = dset.copy().dataset
    >>> img = dataset['images'][0]
    >>> img.pop('file_name')
    >>> import pytest
    >>> with pytest.raises(jsonschema.ValidationError):
    >>>     COCO_SCHEMA.validate(dataset)
    >>> import pytest
    >>> img['auxiliary'] = [{'file_name': 'foobar'}]
    >>> with pytest.raises(jsonschema.ValidationError):
    >>>     COCO_SCHEMA.validate(dataset)
    >>> img['name'] = 'asset-only images must have a name'
    >>> COCO_SCHEMA.validate(dataset)
"""

from kwcoco.util.jsonschema_elements import SchemaElements
from collections import OrderedDict
import ubelt as ub


[docs] def deprecated(*args): return ANY(description='deprecated')
[docs] def TUPLE(*args, **kw): if args and ub.allsame(args): return ARRAY(TYPE=ub.peek(args), numItems=len(args), **kw) else: return ARRAY(TYPE=ANY, numItems=len(args), **kw)
elem = SchemaElements() ALLOF = elem.ALLOF ANY = elem.ANY ANYOF = elem.ANYOF ARRAY = elem.ARRAY BOOLEAN = elem.BOOLEAN INTEGER = elem.INTEGER NOT = elem.NOT NULL = elem.NULL NUMBER = elem.NUMBER OBJECT = elem.OBJECT ONEOF = elem.ONEOF STRING = elem.STRING UUID = STRING PATH = STRING KWCOCO_KEYPOINT = OBJECT( PROPERTIES={ 'xy': TUPLE(NUMBER, NUMBER, description='<x1, y1> in pixels'), 'visible': INTEGER(description='choice(0, 1, 2)'), 'keypoint_category_id': INTEGER, 'keypoint_category': STRING(description='only to be used as a hint') }, title='KWCOCO_KEYPOINT', descripton='A new-style point', ) KWCOCO_POLYGON = OBJECT( PROPERTIES={ 'exterior': ARRAY( ARRAY(NUMBER, numItems=2), description='counter-clockwise xy exterior points'), 'interiors': ARRAY( ARRAY( ARRAY(NUMBER, numItems=2), description='clockwise xy hole'), ) }, title='KWCOCO_POLYGON', description='A new-style polygon format that supports holes', ) ORIG_COCO_KEYPOINTS = ARRAY( INTEGER, description='An old-style set of keypoints (x1,y1,v1,...,xk,yk,vk)', title='MSCOCO_KEYPOINTS' ) KWCOCO_KEYPOINTS = ARRAY(KWCOCO_KEYPOINT) KEYPOINTS = ANYOF(ORIG_COCO_KEYPOINTS, KWCOCO_KEYPOINTS) MSCOCO_POLYGON = ARRAY( TYPE=NUMBER, description='an old-style polygon [x1,y1,v1,...,xk,yk,vk]', title='MSCOCO_POLYGON', ) MSCOCO_MULTIPOLYGON = ARRAY(MSCOCO_POLYGON) POLYGON = ANYOF( KWCOCO_POLYGON, ARRAY(KWCOCO_POLYGON), MSCOCO_POLYGON, MSCOCO_MULTIPOLYGON, ) RUN_LENGTH_ENCODING = STRING(description='A run-length-encoding mask format read by pycocotools') BBOX = ARRAY( TYPE=NUMBER, numItems=4, description='[top-left x, top-left-y, width, height] in image-space pixels', title='BBOX', ) ### ------------------------ SEGMENTATION = ANYOF(POLYGON, RUN_LENGTH_ENCODING) # Names cannot contain certain special characters NAME = STRING(pattern='[^/]+') CATEGORY = OBJECT({ 'id': INTEGER(description='A unique internal category id'), 'name': NAME(description='A unique external category name or identifier'), 'alias': ARRAY(NAME, description='A list of alternate names that should be resolved to this category'), 'supercategory': ANYOF(NAME(description='A coarser category name'), NULL), 'parents': ARRAY(NAME, description='Used for multiple inheritance'), # Legacy 'keypoints': deprecated(ARRAY(STRING)), 'skeleton': deprecated(ARRAY(TUPLE(INTEGER, INTEGER))), }, required=['id', 'name'], description='High level information about an annotation category', title='CATEGORY') KEYPOINT_CATEGORY = OBJECT( PROPERTIES={ 'name': NAME(description='The name of the keypoint category'), 'id': INTEGER, 'supercategory': ANYOF(NAME, NULL), # TODO: should have this name changed to reflect the fact it is horizontal. # TODO: should add a variant of this for vertical or other transforms. 'reflection_id': ANYOF(INTEGER, NULL)( description='The keypoint category this should change to if the image is horizontally flipped'), }, required=['id', 'name'], description='High level information about an annotation category', title='KEYPOINT_CATEGORY', ) # Extension VIDEO = OBJECT( PROPERTIES={ 'id': INTEGER(description='An internal video identifier'), 'name': NAME(description='A unique name for this video'), 'caption': STRING(description='A video level text caption'), 'resolution': (NUMBER | STRING | NULL)(description='a unit representing the size of a pixel in video space'), }, required=['id', 'name'], description='High level information about a group of temporally ordered images', title='VIDEO', ) CHANNELS = STRING( pattern='[^/]*', # a simple check, full pattern is a context free grammar description=( 'A human readable channel name. ' 'Must be compatible with kwcoco.ChannelSpec' ), title='CHANNEL_SPEC', ) ASSET = OBJECT( PROPERTIES={ 'file_name': PATH, 'channels': CHANNELS, 'id': INTEGER(description='The id of the asset (option for now, but will be required in the future when assets are moved to their own table)'), 'image_id': INTEGER(description='The image id this asset is associated with (option for now, but will be required in the future)'), 'width': INTEGER(description='The width in asset-space pixels'), 'height': INTEGER(description='The height in asset-space pixels'), }, required=['file_name'], description='Information about a single file belonging to an image', title='ASSET', ) IMAGE = OBJECT(OrderedDict(( ('id', INTEGER(description='a unique internal image identifier')), ('file_name', PATH(description=ub.paragraph( ''' A relative or absolute path to the main image file. If this file_name is unspecified, then a name and auxiliary items or assets must be specified. Likewise this should be null if assets are used. ''')) | NULL), ('name', NAME( description=ub.paragraph( ''' A unique name for the image. If unspecified the file_name should be used as the default value for the name property. Required if assets / auxiliary are specified. ''')) | NULL), ('width', INTEGER(description='The width of the image in image space pixels')), ('height', INTEGER(description='The height of the image in image space pixels')), # Extension ('video_id', INTEGER(description='The video this image belongs to')), ('timestamp', STRING(description='An ISO-8601 timestamp') | NUMBER(description='A UNIX timestamp')), ('frame_index', INTEGER(description='Used to temporally order the images in a video')), ('channels', CHANNELS | NULL), ('resolution', (NUMBER | STRING | NULL)(description='a unit representing the size of a pixel in image space')), ('auxiliary', ARRAY(TYPE=ASSET, description='This will be deprecated for assets in the future')), ('assets', ARRAY(TYPE=ASSET, description='A list of assets belonging to this image, used when image channels are split across multiple files')), )), # required=['id', 'file_name'] anyOf=[ {'required': ['id', 'file_name']}, {'required': ['id', 'name', 'auxiliary']}, {'required': ['id', 'name', 'assets']}, ], description=( 'High level information about a image file or a collection of ' 'image files corresponding to a single point in (or small interval of) ' 'time' ), title='IMAGE', ) TRACK = OBJECT(OrderedDict(( ('id', INTEGER(description='A unique internal id for this track')), ('name', NAME(description='A unique external name or identifier')), ))) ANNOTATION = OBJECT(OrderedDict(( ('id', INTEGER(description='A unique internal id for this annotation')), ('image_id', INTEGER(description='The image id this annotation belongs to')), ('bbox', BBOX), ('category_id', INTEGER(description='The category id of this annotation')), ('track_id', ANYOF(INTEGER, STRING, UUID)( description='An identifier used to group annotations belonging to the same object over multiple frames in a video')), ('segmentation', SEGMENTATION(description='A polygon or mask specifying the pixels in this annotation in image-space')), ('keypoints', KEYPOINTS(description='A set of categorized points belonging to this annotation in image space')), ('prob', ARRAY(NUMBER, description=ub.paragraph( ''' This needs to be in the same order as categories. The probability order currently needs to be known a-priori, typically in *order* of the classes, but its hard to always keep that consistent. This SPEC is subject to change in the future. '''))), ('score', NUMBER(description='Typically assigned to predicted annotations')), ('weight', NUMBER(description='Typically given to truth annotations to indicate quality.')), ('iscrowd', ANYOF(INTEGER, BOOLEAN)(description=( 'A legacy mscoco field used to indicate if an annotation contains multiple objects'))), ('caption', STRING(description='An annotation-level text caption')), ('role', (STRING | NULL)( description=ub.paragraph( ''' A optional application specific key used to differentiate between annotations used for different purposes: e.g. truth / prediction / confusion. '''))), )), required=['id', 'image_id'], description='Metadata about some semantic attribute of an image.', title='ANNOTATION', ) COCO_SCHEMA = OBJECT( PROPERTIES=ub.odict([ ('info', ANY), ('licenses', ANY), ('categories', ARRAY(CATEGORY)), ('keypoint_categories', ARRAY(KEYPOINT_CATEGORY)), ('videos', ARRAY(VIDEO)), ('tracks', ARRAY(TRACK)), ('images', ARRAY(IMAGE)), ('annotations', ARRAY(ANNOTATION)), ]), required=[], description='The formal kwcoco schema', title='KWCOCO_SCHEMA', ) if ub.argflag('--debug') or ub.argflag('--validate'): COCO_SCHEMA.validate() if __name__ == '__main__': """ CommandLine: KWCOCO_MODPATH=$(xdev modpath kwcoco) python $KWCOCO_MODPATH/coco_schema.py --validate python $KWCOCO_MODPATH/coco_schema.py > ~/code/kwcoco/kwcoco/coco_schema.json jq .properties.images $KWCOCO_MODPATH/coco_schema.json jq .properties.categories $KWCOCO_MODPATH/coco_schema.json jq . $KWCOCO_MODPATH/coco_schema.json """ # import json print(ub.urepr(COCO_SCHEMA, nl=-1, trailsep=False, sort=False).replace("'", '"')) # print(json.dumps(COCO_SCHEMA, indent=' ')) # print('COCO_SCHEMA = {}'.format(ub.urepr(COCO_SCHEMA, nl=-1)))