"""
from __future__ import annotations
The place where the formal KWCOCO schema is defined.
CommandLine:
python -m kwcoco.coco_schema
xdoctest -m kwcoco.coco_schema __doc__
TODO:
- [ ] Perhaps use `voluptuous <https://pypi.org/project/voluptuous/>`_ instead?
Example:
>>> import kwcoco
>>> from kwcoco.coco_schema import COCO_SCHEMA
>>> import jsonschema
>>> dset = kwcoco.CocoDataset.demo('shapes1')
>>> # print('dset.dataset = {}'.format(ub.urepr(dset.dataset, nl=2)))
>>> COCO_SCHEMA.validate(dset.dataset)
>>> try:
>>> jsonschema.validate(dset.dataset, schema=COCO_SCHEMA)
>>> except jsonschema.exceptions.ValidationError as ex:
>>> vali_ex = ex
>>> print('ex = {!r}'.format(ex))
>>> raise
>>> except jsonschema.exceptions.SchemaError as ex:
>>> print('ex = {!r}'.format(ex))
>>> schema_ex = ex
>>> print('schema_ex.instance = {}'.format(ub.urepr(schema_ex.instance, nl=-1)))
>>> raise
>>> # Test the multispectral image defintino
>>> import copy
>>> dataset = dset.copy().dataset
>>> img = dataset['images'][0]
>>> img.pop('file_name')
>>> import pytest
>>> with pytest.raises(jsonschema.ValidationError):
>>> COCO_SCHEMA.validate(dataset)
>>> import pytest
>>> img['auxiliary'] = [{'file_name': 'foobar'}]
>>> with pytest.raises(jsonschema.ValidationError):
>>> COCO_SCHEMA.validate(dataset)
>>> img['name'] = 'asset-only images must have a name'
>>> COCO_SCHEMA.validate(dataset)
"""
from kwcoco.util.jsonschema_elements import SchemaElements
from collections import OrderedDict
import ubelt as ub
[docs]
def deprecated(*args):
return ANY(description='deprecated')
[docs]
def TUPLE(*args, **kw):
if args and ub.allsame(args):
return ARRAY(TYPE=ub.peek(args), numItems=len(args), **kw)
else:
return ARRAY(TYPE=ANY, numItems=len(args), **kw)
elem = SchemaElements()
ALLOF = elem.ALLOF
ANY = elem.ANY
ANYOF = elem.ANYOF
ARRAY = elem.ARRAY
BOOLEAN = elem.BOOLEAN
INTEGER = elem.INTEGER
NOT = elem.NOT
NULL = elem.NULL
NUMBER = elem.NUMBER
OBJECT = elem.OBJECT
ONEOF = elem.ONEOF
STRING = elem.STRING
UUID = STRING
PATH = STRING
KWCOCO_KEYPOINT = OBJECT(
PROPERTIES={
'xy': TUPLE(NUMBER, NUMBER, description='<x1, y1> in pixels'),
'visible': INTEGER(description='choice(0, 1, 2)'),
'keypoint_category_id': INTEGER,
'keypoint_category': STRING(description='only to be used as a hint'),
},
title='KWCOCO_KEYPOINT',
description='A new-style point',
)
KWCOCO_POLYGON = OBJECT(
PROPERTIES={
'exterior': ARRAY(
ARRAY(NUMBER, numItems=2),
description='counter-clockwise xy exterior points',
),
'interiors': ARRAY(
ARRAY(ARRAY(NUMBER, numItems=2), description='clockwise xy hole'),
),
},
title='KWCOCO_POLYGON',
description='A new-style polygon format that supports holes',
)
ORIG_COCO_KEYPOINTS = ARRAY(
NUMBER,
description='An old-style set of keypoints (x1,y1,v1,...,xk,yk,vk)',
title='MSCOCO_KEYPOINTS',
)
KWCOCO_KEYPOINTS = ARRAY(KWCOCO_KEYPOINT)
KEYPOINTS = ANYOF(ORIG_COCO_KEYPOINTS, KWCOCO_KEYPOINTS)
MSCOCO_POLYGON = ARRAY(
TYPE=NUMBER,
description='an old-style polygon [x1,y1,v1,...,xk,yk,vk]',
title='MSCOCO_POLYGON',
)
MSCOCO_MULTIPOLYGON = ARRAY(MSCOCO_POLYGON)
POLYGON = ANYOF(
KWCOCO_POLYGON,
ARRAY(KWCOCO_POLYGON),
MSCOCO_POLYGON,
MSCOCO_MULTIPOLYGON,
)
RUN_LENGTH_ENCODING = STRING(
description='A run-length-encoding mask format read by pycocotools'
)
BBOX = ARRAY(
TYPE=NUMBER,
numItems=4,
description='[top-left x, top-left-y, width, height] in image-space pixels',
title='BBOX',
)
### ------------------------
SEGMENTATION = ANYOF(POLYGON, RUN_LENGTH_ENCODING)
# Names cannot contain certain special characters
NAME = STRING(pattern='[^/]+')
EMPTY = STRING(pattern='^$')
CATEGORY = OBJECT(
{
'id': INTEGER(description='A unique internal category id'),
'name': NAME(description='A unique external category name or identifier'),
'alias': ARRAY(
NAME,
description='A list of alternate names that should be resolved to this category',
),
'supercategory': ANYOF(
NAME(description='A coarser category name'), NULL, EMPTY
),
'parents': ARRAY(NAME, description='Used for multiple inheritance'),
# Legacy
'keypoints': deprecated(ARRAY(STRING)),
'skeleton': deprecated(ARRAY(TUPLE(INTEGER, INTEGER))),
},
required=['id', 'name'],
description='High level information about an annotation category',
title='CATEGORY',
)
KEYPOINT_CATEGORY = OBJECT(
PROPERTIES={
'name': NAME(description='The name of the keypoint category'),
'id': INTEGER,
'supercategory': ANYOF(NAME, NULL, EMPTY),
# TODO: should have this name changed to reflect the fact it is horizontal.
# TODO: should add a variant of this for vertical or other transforms.
'reflection_id': ANYOF(INTEGER, NULL)(
description='The keypoint category this should change to if the image is horizontally flipped'
),
},
required=['id', 'name'],
description='High level information about an annotation category',
title='KEYPOINT_CATEGORY',
)
# Extension
VIDEO = OBJECT(
PROPERTIES={
'id': INTEGER(description='An internal video identifier'),
'name': NAME(description='A unique name for this video'),
'caption': STRING(description='A video level text caption'),
'resolution': (NUMBER | STRING | NULL)(
description='a unit representing the size of a pixel in video space'
),
},
required=['id', 'name'],
description='High level information about a group of temporally ordered images',
title='VIDEO',
)
CHANNELS = STRING(
pattern='[^/]*', # a simple check, full pattern is a context free grammar
description=(
'A human readable channel name. Must be compatible with kwcoco.ChannelSpec'
),
title='CHANNEL_SPEC',
)
ASSET = OBJECT(
PROPERTIES={
'file_name': PATH,
'channels': CHANNELS,
'id': INTEGER(
description='The id of the asset (option for now, but will be required in the future when assets are moved to their own table)'
),
'image_id': INTEGER(
description='The image id this asset is associated with (option for now, but will be required in the future)'
),
'width': INTEGER(description='The width in asset-space pixels'),
'height': INTEGER(description='The height in asset-space pixels'),
},
required=['file_name'],
description='Information about a single file belonging to an image',
title='ASSET',
)
IMAGE = OBJECT(
OrderedDict(
(
('id', INTEGER(description='a unique internal image identifier')),
(
'file_name',
PATH(
description=ub.paragraph(
"""
A relative or absolute path to the main image file. If this file_name
is unspecified, then a name and auxiliary items or assets must be
specified. Likewise this should be null if assets are used.
"""
)
)
| NULL,
),
(
'name',
NAME(
description=ub.paragraph(
"""
A unique name for the image.
If unspecified the file_name should be used as the default value
for the name property. Required if assets / auxiliary are
specified.
"""
)
)
| NULL,
),
(
'width',
INTEGER(description='The width of the image in image space pixels'),
),
(
'height',
INTEGER(description='The height of the image in image space pixels'),
),
# Extension
('video_id', INTEGER(description='The video this image belongs to')),
(
'timestamp',
STRING(description='An ISO-8601 timestamp')
| NUMBER(description='A UNIX timestamp'),
),
(
'frame_index',
INTEGER(description='Used to temporally order the images in a video'),
),
('channels', CHANNELS | NULL),
(
'resolution',
(NUMBER | STRING | NULL)(
description='a unit representing the size of a pixel in image space'
),
),
(
'auxiliary',
ARRAY(
TYPE=ASSET,
description='This will be deprecated for assets in the future',
),
),
(
'assets',
ARRAY(
TYPE=ASSET,
description='A list of assets belonging to this image, used when image channels are split across multiple files',
),
),
)
),
# required=['id', 'file_name']
anyOf=[
{'required': ['id', 'file_name']},
{'required': ['id', 'name', 'auxiliary']},
{'required': ['id', 'name', 'assets']},
],
description=(
'High level information about a image file or a collection of '
'image files corresponding to a single point in (or small interval of) '
'time'
),
title='IMAGE',
)
TRACK = OBJECT(
OrderedDict(
(
('id', INTEGER(description='A unique internal id for this track')),
('name', NAME(description='A unique external name or identifier')),
)
)
)
ANNOTATION = OBJECT(
OrderedDict(
(
('id', INTEGER(description='A unique internal id for this annotation')),
(
'image_id',
INTEGER(description='The image id this annotation belongs to'),
),
('bbox', BBOX),
('category_id', INTEGER(description='The category id of this annotation')),
(
'track_id',
ANYOF(INTEGER, STRING, UUID)(
description='An identifier used to group annotations belonging to the same object over multiple frames in a video'
),
),
(
'segmentation',
SEGMENTATION(
description='A polygon or mask specifying the pixels in this annotation in image-space'
),
),
(
'keypoints',
KEYPOINTS(
description='A set of categorized points belonging to this annotation in image space'
),
),
(
'prob',
ARRAY(
NUMBER,
description=ub.paragraph(
"""
This needs to be in the same order as categories.
The probability order currently needs to be known a-priori,
typically in *order* of the classes, but its hard to always
keep that consistent.
This SPEC is subject to change in the future.
"""
),
),
),
(
'score',
NUMBER(description='Typically assigned to predicted annotations'),
),
(
'weight',
NUMBER(
description='Typically given to truth annotations to indicate quality.'
),
),
(
'iscrowd',
ANYOF(INTEGER, BOOLEAN)(
description=(
'A legacy mscoco field used to indicate if an annotation contains multiple objects'
)
),
),
('caption', STRING(description='An annotation-level text caption')),
(
'role',
(STRING | NULL)(
description=ub.paragraph(
"""
A optional application specific key used to differentiate between
annotations used for different purposes: e.g. truth / prediction /
confusion.
"""
)
),
),
)
),
required=['id', 'image_id'],
description='Metadata about some semantic attribute of an image.',
title='ANNOTATION',
)
COCO_SCHEMA = OBJECT(
PROPERTIES=ub.odict(
[
('info', ANY),
('licenses', ANY),
('categories', ARRAY(CATEGORY)),
('keypoint_categories', ARRAY(KEYPOINT_CATEGORY)),
('videos', ARRAY(VIDEO)),
('tracks', ARRAY(TRACK)),
('images', ARRAY(IMAGE)),
('annotations', ARRAY(ANNOTATION)),
]
),
required=[],
description='The formal kwcoco schema',
title='KWCOCO_SCHEMA',
)
if ub.argflag('--debug') or ub.argflag('--validate'):
COCO_SCHEMA.validate()
if __name__ == '__main__':
"""
CommandLine:
KWCOCO_MODPATH=$(xdev modpath kwcoco)
python $KWCOCO_MODPATH/coco_schema.py --validate
python $KWCOCO_MODPATH/coco_schema.py > ~/code/kwcoco/kwcoco/coco_schema.json
jq .properties.images $KWCOCO_MODPATH/coco_schema.json
jq .properties.categories $KWCOCO_MODPATH/coco_schema.json
jq . $KWCOCO_MODPATH/coco_schema.json
"""
# import json
print(ub.urepr(COCO_SCHEMA, nl=-1, trailsep=False, sort=False).replace("'", '"'))
# print(json.dumps(COCO_SCHEMA, indent=' '))
# print('COCO_SCHEMA = {}'.format(ub.urepr(COCO_SCHEMA, nl=-1)))