Skip to main content

JSON Schema Viewer

Loading ....

Source :

{
"$schema": "https://json-schema.org/draft/2020-12/schema",
"$id": "./doc/INN_antibody_schema.json",
"title": "INN Antibody Annotations",
"description": "Annotations for a therapeutic antibody that has been published in a WHO International Nonprietary Name (INN) list and has made it through pre-clinical trials. The annotations are generated by a parser (https://github.com/pySin/Antibody-Annotation-to-JSON) that reads in a flat file format and converts it to the JSON data structure described by this schema.",
"$comment": "camelCase is used for $defs names, as per the most common JSON/JSON Schema convention, but the properties names and patternProperties names are in PascalCase, for consistency with the original INN Antibody Annotation Format",
"type": "object",
"properties": {
"AbML": {
"description": "A notation language for describing antibody formats in terms of domains - their order within chains, associations in space, disulfide linkages, and various other annotations. See http://www.bioinf.org.uk/abs/abml/ for more information.",
"$comment": "A Regex pattern would be too complicated! Also note that minLength is 3 because the shortest possible AbML is `VHH` (a camelid heavy chain variable domain, a.k.a. single-domain antibody or nanobody)",
"examples": [
"X(1)-L(2)-VH(3:5)-L(4)-VL(5:3)",
"VL(1:3)-CL^(2:4){1}|VH(3:1)-CH1(4:2){1}-H*(5:12){2}-CH2(6:13)-CH3(7:14)|VL(8:10)-CL^(9:11){1}|VH(10:8)-CH1(11:9){1}-H*(12:5){2}-CH2(13:6)-CH3(14:7)",
"VL(1:3)-L(2)-VH(3:1)-H*(4:10){3}[MOD:REMCYS][NOTE: Nter 5aa extension Cter 2aa extension]-L(5)-CH3(6:12)|VL(7:9)-L(8)-VH(9:7)-H(10:4){3}[MOD:REMCYS][NOTE: Nter 5aa extension Cter 2aa extension]-L(11)-CH3(12:6)"
],
"type": "string",
"minLength": 3
},
"CDRSource": {
"description": "Used for humanized antibodies to specify the source species of the complementarity-determining regions (CDRs) for each instance or pair of instances that contribute to an antigen-combining site. If different CDRs of the same instance have come from different sources, these are listed in separate items and the optional `Regions` subproperty is used to differentiate them.",
"examples": [
[
{
"Instances": [
1,
2
],
"Note": "humanized by CDR grafting",
"Species": "Oryctolagus cuniculus"
},
{
"Instances": [
3,
8
],
"Note": "transgenic mouse with human genes",
"Species": "Homo sapiens"
},
{
"Instances": [
4,
5
],
"Note": "humanized by CDR grafting",
"Species": "Oryctolagus cuniculus"
},
{
"Instances": [
6,
7
],
"Note": "humanized by CDR grafting",
"Species": "Oryctolagus cuniculus"
}
],
[
{
"Instances": [
0
],
"Regions": [
"L1",
"L2",
"L3"
],
"Species": "Homo sapiens"
},
{
"Instances": [
0
],
"Regions": [
"H1",
"H2",
"H3"
],
"Species": "Lama glama"
}
]
],
"items": {
"type": "object",
"required": [
"Instances",
"Species"
],
"properties": {
"Instances": {
"maxItems": 2,
"minItems": 1,
"allOf": [
{
"$ref": "#/$defs/multiInstances"
}
]
},
"Note": {
"$ref": "#/$defs/note"
},
"Regions": {
"description": "The abbreviated name(s) of one or more of the six complementarity-determining regions (a.k.a. hypervariable loops)",
"examples": [
[
"L1",
"L2",
"L3"
],
[
"H1",
"H2",
"H3"
]
],
"type": "array",
"maxItems": 5,
"minItems": 1,
"uniqueItems": true,
"items": {
"enum": [
"L1",
"L2",
"L3",
"H1",
"H2",
"H3"
]
}
},
"Species": {
"$ref": "#/$defs/speciesBinomial"
}
},
"unevaluatedProperties": false
},
"allOf": [
{
"$ref": "#/$defs/uniqueNonemptyArray"
}
]
},
"CHSRange": {
"description": "A range of residues encoded by the C gene segment which is equivalent to the CHS region or the protein. This is normally the last two residues and the standard sequence is GK. Unusual residues and deletions are listed in the `Mutations` subproperty. Commonly the last K is deleted. If both residues are deleted, the `Start` and `End` subproperties are null and the deletions are listed. If the chain is truncated beyond the two CHS residues, the whole `CHSRange` property will be absent.",
"examples": [
[
{
"End": null,
"Instance": 3,
"Mutations": [
"G712del",
"K712del"
],
"Start": null
}
],
[
{
"End": 448,
"Instance": 1,
"Mutations": [
"K449del"
],
"Start": 448
},
{
"End": 446,
"Instance": 2,
"Mutations": [
"K447del"
],
"Start": 446
}
]
],
"items": {
"type": "object",
"required": [
"Instance",
"Start",
"End"
],
"properties": {
"End": {
"anyOf": [
{
"$ref": "#/$defs/endOfRange"
},
{
"type": "null"
}
]
},
"Instance": {
"$ref": "#/$defs/singleInstance"
},
"Mutations": {
"description": "Uses standard mutation notation. Must be a substitution or deletion of one or both residues in the GK sequence",
"$comment": "`#/$defs/mutationsAllTypes` is not referenced as the pattern is more constrained here - no insertions and only one or two mutations in total",
"examples": [
[
"G712del",
"K712del"
],
[
"K447del"
]
],
"type": "array",
"maxItems": 2,
"minItems": 1,
"uniqueItems": true,
"items": {
"type": "string",
"pattern": "^[GK][0-9]{1,4}([A-Z]|del)$"
}
},
"Note": {
"$ref": "#/$defs/note"
},
"Start": {
"anyOf": [
{
"$ref": "#/$defs/startOfRange"
},
{
"type": "null"
}
]
}
},
"unevaluatedProperties": false
},
"allOf": [
{
"$ref": "#/$defs/uniqueNonemptyArray"
}
]
},
"ChainLength": {
"description": "The total number of amino acids in chains that cannot be assigned as predominantly heavy or light, such as in scFv/scFab/XscFv (DART) proteins, or fusions with non-antibody proteins. It is the full length of each chain including (perhaps) light and heavy domains and any fusion proteins.",
"$comment": "The items in this property always describe two or more instances, whereas in `(Heavy|Light)ChainLength` they only ever describe a single instance",
"examples": [
[
{
"Instances": [
1,
2,
3,
4,
5,
6,
7
],
"Value": 1230
}
],
[
{
"Instances": [
3,
4,
5
],
"Value": 702
},
{
"Instances": [
6,
7
],
"Value": 215
}
]
],
"items": {
"type": "object",
"required": [
"Instances",
"Value"
],
"properties": {
"Instances": {
"minItems": 2,
"allOf": [
{
"$ref": "#/$defs/multiInstances"
}
]
},
"Note": {
"$ref": "#/$defs/note"
},
"Value": {
"description": "The number of residues in the chain",
"type": "integer",
"minimum": 1
}
},
"unevaluatedProperties": false
},
"allOf": [
{
"$ref": "#/$defs/uniqueNonemptyArray"
}
]
},
"ConfirmedPTM": {
"description": "Like `#/patternProperties/^(Heavy|Light)ConfirmedPTM$`, but used for linker regions and chains that cannot be assigned as predominantly heavy or light. Each item refers to an array of instances, although only linker regions use two instances - in all other cases (fusion chains), the annotations refer to a single instance within the larger chain.",
"examples": [
[
{
"Instances": [
1
],
"Positions": [
59
],
"Type": "formylglycine"
}
],
[
{
"Instances": [
4,
5
],
"PositionsPartial": [
372,
377
],
"Type": "hydroxylation"
}
]
],
"items": {
"type": "object",
"allOf": [
{
"required": [
"Instances",
"Type"
]
},
{
"anyOf": [
{
"required": [
"Positions"
]
},
{
"required": [
"PositionsPartial"
]
},
{
"required": [
"PositionsRare"
]
}
]
}
],
"properties": {
"Instances": {
"minItems": 1,
"allOf": [
{
"$ref": "#/$defs/multiInstances"
}
]
},
"Note": {
"$ref": "#/$defs/note"
},
"Positions": {
"$ref": "#/$defs/ptmPositions"
},
"PositionsPartial": {
"$ref": "#/$defs/ptmPositionsPartial"
},
"PositionsRare": {
"$ref": "#/$defs/ptmPositionsRare"
},
"Type": {
"$ref": "#/$defs/ptmType"
}
},
"unevaluatedProperties": false
},
"allOf": [
{
"$ref": "#/$defs/uniqueNonemptyArray"
}
]
},
"Conjugate": {
"description": "This property is only used when the antibody is conjugated to a drug molecule and indicates the name of the conjugated drug. Details of the linker are not included. The possibility of multi-payload antibody-drug conjugates means that this property's value is an array of objects (like all other instance-specific properties) rather than a single object.",
"examples": [
[
{
"DrugName": "tazide",
"Instances": [
0
]
}
]
],
"items": {
"type": "object",
"required": [
"Instances",
"DrugName"
],
"properties": {
"DrugName": {
"description": "The name of the conjugated drug molecule; usually the generic INN-assigned name",
"examples": [
"tazide",
"paquemicin"
],
"type": "string",
"minLength": 1
},
"Instances": {
"$comment": "An array of 1 or 2 instances is specified because of the possibility of a drug being conjugated to a linker region between two instances",
"maxItems": 2,
"minItems": 1,
"allOf": [
{
"$ref": "#/$defs/multiInstances"
}
]
},
"Note": {
"$ref": "#/$defs/note"
}
},
"unevaluatedProperties": false
},
"allOf": [
{
"$ref": "#/$defs/uniqueNonemptyArray"
}
]
},
"CysPositions": {
"description": "An array of cysteine positions in each chain that cannot be assigned as predominantly heavy or light",
"$comment": "This subschema is the same as `#/patternProperties/^(Heavy|Light)CysPositions$` except for the plural `Instances` item subproperty.",
"examples": [
[
{
"Instances": [
1,
2
],
"Values": [
23,
92,
140,
196,
216,
222,
225,
257,
317,
350,
363,
421
]
},
{
"Instances": [
3,
4
],
"Values": [
22,
96,
147,
207,
227
]
}
]
],
"items": {
"type": "object",
"required": [
"Instances",
"Values"
],
"properties": {
"Instances": {
"minItems": 1,
"allOf": [
{
"$ref": "#/$defs/multiInstances"
}
]
},
"Note": {
"$ref": "#/$defs/note"
},
"Values": {
"description": "The cysteine position(s) for the instance(s) given in the item. Uses sequential numbering as observed in the final protein.",
"allOf": [
{
"$ref": "#/$defs/residuePositionsArray"
}
]
}
},
"unevaluatedProperties": false
},
"allOf": [
{
"$ref": "#/$defs/uniqueNonemptyArray"
}
]
},
"DisulfidesInter": {
"description": "Lists residue pairs involved in inter-chain disulfides between chains that cannot be assigned as predominantly heavy or light, or in cases where the same instance light chain interacts with multiple instances of heavy chains (or vice versa). Uses sequential numbering as observed in the final protein. Separate items are used in cases of multiple disulfide isoforms, and these are indicated in the optional `Isoform` subproperty at the root level of the items (not within the `Positions` items).",
"$comment": "This subschema is identical to that of `#/properties/DisulfidesInterH1H2`, except that here null is left as a valid option for the `Positions` item subproperty",
"examples": [
[
{
"InstanceX": 5,
"InstanceY": 7,
"Positions": [
{
"X": 702,
"Y": 215
}
]
}
]
],
"items": {
"type": "object",
"required": [
"InstanceX",
"InstanceY",
"Positions"
],
"properties": {
"InstanceX": {
"description": "The first instance (chain or single-origin portion of a fused chain) of a pair of chains with inter-chain disulfides",
"type": "integer",
"minimum": 0
},
"InstanceY": {
"description": "The second instance (chain or single-origin portion of a fused chain) of a pair of chains with inter-chain disulfides",
"type": "integer",
"minimum": 0
},
"Isoform": {
"$ref": "#/$defs/isoform"
},
"Note": {
"$ref": "#/$defs/note"
},
"Positions": {
"$ref": "#/$defs/disulfidesInterPositionsXY"
}
},
"unevaluatedProperties": false
},
"allOf": [
{
"$ref": "#/$defs/uniqueNonemptyArray"
}
]
},
"DisulfidesInterH1H2": {
"description": "Lists residue pairs involved in inter-chain disulfides between the two heavy chains (which often have the same instance number, as in standard formats - which are homodimers - but this isn't the case for many other formats). Uses sequential numbering as observed in the final protein. Separate items are used in cases of multiple disulfide isoforms, and these are indicated in the optional `Isoform` subproperty at the root level of the items (not within the `Positions` items). In most cases, this property will have only one item (if there are no isoforms) with between two and four disulfide pairs.",
"$comment": "TODO: check if it's theoretically possible to have a DisulfidesInterH3H4 annotation, in which case that could be included in a pattern property. We could also consider aggregating all of the DisulfidesInter* properties (in the data itself) by moving the chain pair suffix out of the property name and into an item subproperty.",
"examples": [
[
{
"InstanceX": 3,
"InstanceY": 3,
"Isoform": "A",
"Positions": [
{
"X": 492,
"Y": 492
},
{
"X": 495,
"Y": 495
}
]
},
{
"InstanceX": 3,
"InstanceY": 3,
"Isoform": "B",
"Positions": [
{
"X": 492,
"Y": 495
},
{
"X": 495,
"Y": 498
}
]
}
]
],
"items": {
"type": "object",
"required": [
"InstanceX",
"InstanceY",
"Positions"
],
"properties": {
"InstanceX": {
"description": "The first instance (chain or single-origin portion of a fused chain) of a pair of chains with inter-chain disulfides",
"type": "integer",
"minimum": 0
},
"InstanceY": {
"description": "The second instance (chain or single-origin portion of a fused chain) of a pair of chains with inter-chain disulfides",
"type": "integer",
"minimum": 0
},
"Isoform": {
"$ref": "#/$defs/isoform"
},
"Note": {
"$ref": "#/$defs/note"
},
"Positions": {
"$comment": "TODO: check if null is in fact a valid type here (if the heavy chains can be linked/stabilised with compensatory modifications?)",
"not": {
"type": "null"
},
"allOf": [
{
"$ref": "#/$defs/disulfidesInterPositionsXY"
}
]
}
},
"unevaluatedProperties": false
},
"allOf": [
{
"$ref": "#/$defs/uniqueNonemptyArray"
}
]
},
"DisulfidesIntra": {
"description": "Arrays of residue pairs involved in intra-chain disulfide bonds in each instance (or between linked instances) of a chain that cannot be assigned as predominantly heavy or light. Separate items are used in cases of multiple disulfide isoforms, and these are indicated in the optional `Isoform` subproperty.",
"$comment": "TODO: check if linker regions can have disulfides; if not, this property will only ever refer to single instances, so it can be aggregated with `#/patternProperties/^(Heavy|Light)DisulfidesIntra|FusionProtein(Heavy|Light)?Disulfides$`",
"examples": [
[
{
"Instances": [
1
],
"Positions": [
[
146,
159
],
[
397,
407
]
]
}
]
],
"items": {
"type": "object",
"required": [
"Instances",
"Positions"
],
"properties": {
"Instances": {
"minItems": 1,
"allOf": [
{
"$ref": "#/$defs/multiInstances"
}
]
},
"Isoform": {
"$ref": "#/$defs/isoform"
},
"Note": {
"$ref": "#/$defs/note"
},
"Positions": {
"$ref": "#/$defs/disulfidesIntraPositions"
}
},
"unevaluatedProperties": false
},
"allOf": [
{
"$ref": "#/$defs/uniqueNonemptyArray"
}
]
},
"Domains": {
"description": "Used with proteins such as CrossMabs to indicate (in order, from N to C terminus) the domains present in each instance or chain of fused instances. Linkers are not listed.",
"examples": [
[
{
"Instances": [
1
],
"Values": [
"VH",
"CH1",
"CH2",
"CH3"
]
},
{
"Instances": [
2
],
"Values": [
"VL",
"CL"
]
},
{
"Instances": [
3
],
"Values": [
"VH",
"CH1",
"CH2",
"CH3"
]
},
{
"Instances": [
4,
5
],
"Values": [
"VH",
"CL"
]
},
{
"Instances": [
6,
7
],
"Values": [
"VL",
"CH1"
]
}
],
[
{
"Instances": [
1,
2,
3
],
"Values": [
"OTHER",
"VH",
"VL"
]
}
]
],
"items": {
"type": "object",
"required": [
"Instances",
"Values"
],
"properties": {
"Instances": {
"minItems": 1,
"allOf": [
{
"$ref": "#/$defs/multiInstances"
}
]
},
"Note": {
"$ref": "#/$defs/note"
},
"Values": {
"description": "The list of domains, in order from N to C terminus, using their abbreviated names. VHH is a camelid heavy chain variable domain, and OTHER represents any non-antibody domain.",
"examples": [
[
"VL",
"CL"
],
[
"VH",
"CH1",
"CH2",
"CH3"
],
[
"VL",
"VH",
"OTHER"
],
[
"hinge"
]
],
"type": "array",
"minItems": 1,
"uniqueItems": true,
"items": {
"enum": [
"VL",
"VH",
"VHH",
"CL",
"CH1",
"CH2",
"CH3",
"CH4",
"hinge",
"OTHER"
]
}
}
},
"unevaluatedProperties": false
},
"allOf": [
{
"$ref": "#/$defs/uniqueNonemptyArray"
}
]
},
"Format": {
"description": "The overall nature of the molecule. Begins with optional indicators of conjugation, multispecificity and fusions. This is followed by the source (slash-separated if more than one), the words `monoclonal antibody`, and optional details about the type of antibody fragments/domains, details of multimerization and non-antibody fusions",
"$comment": "a more complete regex pattern could be used if the order of information is made consistent",
"examples": [
"human monoclonal antibody",
"conjugated humanized monoclonal antibody",
"bispecific humanized/human monoclonal antibody",
"human monoclonal antibody Fc fusion with human TACI fragment",
"tetraspecific humanized/chimeric scFv-Ab-scFv-scFv monoclonal antibody",
"murine monoclonal antibody scFv fusion with Shiga-like toxin 1 subunit A (stxA)"
],
"type": "string",
"pattern": ".+monoclonal.+antibody"
},
"Format-Note": {
"description": "Further context, detail or clarification on information in the `Format` property. These notes often refer to the source of the antibody chains (as this is part of the `Format` description), and so could equally be found in the optional `HeavySource-Note` and `LightSource-Note` properties or as item subproperties within the optional `Source` property.",
"examples": [
[
{
"Instances": [
1
],
"Value": "Homo sapiens synthetic phage display library"
}
],
[
{
"Instances": [
1,
3
],
"Value": "humanized by Kabat CDR grafting"
},
{
"Instances": [
5,
6
],
"Value": "transgenic rabbit with human genes"
}
]
],
"items": {
"type": "object",
"required": [
"Instances",
"Value"
],
"properties": {
"Instances": {
"minItems": 1,
"allOf": [
{
"$ref": "#/$defs/multiInstances"
}
]
},
"Value": {
"description": "The content of the note for the instance(s) given in the item",
"$comment": "The purpose of minLength is to disallow empty strings (2 is a somewhat arbitrary choice).",
"examples": [
"transgenic mouse with human genes",
"human Fab antibody phage display library"
],
"type": "string",
"minLength": 2
}
},
"unevaluatedProperties": false
},
"allOf": [
{
"$ref": "#/$defs/uniqueNonemptyArray"
}
]
},
"Fusion": {
"description": "Indicates instances that are fused together to form single chains",
"examples": [
[
{
"Instances": [
3,
4,
5
]
},
{
"Instances": [
6,
7
]
}
]
],
"items": {
"type": "object",
"required": [
"Instances"
],
"properties": {
"Instances": {
"minItems": 2,
"allOf": [
{
"$ref": "#/$defs/multiInstances"
}
]
},
"Note": {
"$ref": "#/$defs/note"
}
},
"unevaluatedProperties": false
},
"allOf": [
{
"$ref": "#/$defs/uniqueNonemptyArray"
}
]
},
"HCGermline": {
"description": "The IMGT germline assignment (including species name) for the heavy chain constant fragment of each heavy chain instance. Where the constant domains of a single instance come from different germlines, each domain or group of domains is recorded in a separate item and the optional `Domains` subproperty is used.",
"$comment": "See also: `#/properties/LCGermline` and `#/patternProperties/^(HV|HJ|LV|LJ)Germline$`, which have the same the same subschema as here, except for differences in the optional `Domain` or `Domains` item subproperties.",
"examples": [
[
{
"GeneID": "IGHG1*01",
"Instance": 3,
"Species": "Homo sapiens"
},
{
"GeneID": "IGHG1*09",
"Instance": 6,
"Species": "Homo sapiens"
}
],
[
{
"Domains": [
"CH1"
],
"GeneID": "IGHG1*01",
"Instance": 3,
"Species": "Homo sapiens"
},
{
"Domains": [
"CH2",
"CH3"
],
"GeneID": "IGHG1*09",
"Instance": 3,
"Species": "Homo sapiens"
}
]
],
"items": {
"type": "object",
"required": [
"Instance",
"Species",
"GeneID"
],
"properties": {
"Domains": {
"$ref": "#/$defs/heavyDomains"
},
"GeneID": {
"$ref": "#/$defs/imgtGeneID"
},
"Instance": {
"$ref": "#/$defs/singleInstance"
},
"Note": {
"$ref": "#/$defs/note"
},
"Species": {
"$ref": "#/$defs/speciesBinomial"
}
},
"unevaluatedProperties": false
},
"allOf": [
{
"$ref": "#/$defs/uniqueNonemptyArray"
}
]
},
"HeavyChainClass": {
"description": "The heavy chain class for the chain or domain(s) covered by the given instance. It must match the heavy chain specification given in the `Type` property item that includes the same instance. Where the heavy chain is a composite of more than one class, there are separate items for each group of one or more domains of the same class.",
"examples": [
[
{
"Instance": 0,
"Value": "Gamma1"
}
],
[
{
"Domains": [
"CH1"
],
"Instance": 2,
"Value": "Gamma4"
},
{
"Domains": [
"CH2",
"CH3"
],
"Instance": 2,
"Value": "Gamma1"
},
{
"Instance": 3,
"Value": null
}
]
],
"items": {
"type": "object",
"required": [
"Instance",
"Value"
],
"properties": {
"Domains": {
"$ref": "#/$defs/heavyDomains"
},
"Instance": {
"$ref": "#/$defs/singleInstance"
},
"Note": {
"$ref": "#/$defs/note"
},
"Value": {
"description": "The name of the class; null is optionally used for an Fv type (VH domain).",
"anyOf": [
{
"type": "string",
"pattern": "^Gamma[1-4]$"
},
{
"type": "null"
}
]
}
},
"unevaluatedProperties": false
},
"allOf": [
{
"$ref": "#/$defs/uniqueNonemptyArray"
}
]
},
"ID": {
"description": "Used to provide an original identifier for the antibody (e.g. clone name, lab code, etc)",
"$comment": "TODO: Sometimes the string is a comma-separated list of two or more alternative IDs. Consider changing the property type to array",
"examples": [
"IAB22M2C",
"GNC-038",
"DS-6016a",
"SI-B003, SI-3X35, SI-3"
],
"type": "string",
"minLength": 3
},
"Interaction": {
"description": "Indicates non-antibody domains that interact",
"$comment": "TODO: check if this annotation only ever applies to exactly 2 instances, in which case the `Instances` array should be further constrained",
"examples": [
[
{
"Instances": [
1,
3
],
"Note": "Experimentally-determined Kd of 10 nM"
}
]
],
"items": {
"type": "object",
"required": [
"Instances"
],
"properties": {
"Instances": {
"minItems": 2,
"allOf": [
{
"$ref": "#/$defs/multiInstances"
}
]
},
"Note": {
"$ref": "#/$defs/note"
}
},
"unevaluatedProperties": false
},
"allOf": [
{
"$ref": "#/$defs/uniqueNonemptyArray"
}
]
},
"LCGermline": {
"description": "The IMGT germline assignment (including species name) for the light chain constant fragment of each light chain instance. Where the CL and VL domains come from different germlines (or in formats like Crossmabs), this is indicated with `CL` in the optional item subproperty `Domain`.",
"$comment": "See also: `#/properties/HCGermline` and `#/patternProperties/^(HV|HJ|LV|LJ)Germline$`, which have the same the same subschema as here, except for differences in the optional `Domain` or `Domains` item subproperties.",
"examples": [
[
{
"Domain": "CL",
"GeneID": "IGKC*01",
"Instance": 8,
"Species": "Homo sapiens"
}
]
],
"items": {
"type": "object",
"required": [
"Instance",
"Species",
"GeneID"
],
"properties": {
"Domain": {
"description": "An optional indicator of the domain, for cases when the variable and constant domains come from different germline loci",
"$comment": "`#/$defs/lightDomain` is not used because in this case `VL` is not a valid value",
"const": "CL"
},
"GeneID": {
"$ref": "#/$defs/imgtGeneID"
},
"Instance": {
"$ref": "#/$defs/singleInstance"
},
"Note": {
"$ref": "#/$defs/note"
},
"Species": {
"$ref": "#/$defs/speciesBinomial"
}
},
"unevaluatedProperties": false
},
"allOf": [
{
"$ref": "#/$defs/uniqueNonemptyArray"
}
]
},
"LightChainClass": {
"description": "The light chain class for the chain or domain(s) covered by the given instance. It must match the light chain specification given in the `Type` property item that includes the same instance. Where the light chain is a composite of two classes, there are separate items for the VL and CL domains (see below example).",
"examples": [
[
{
"Domain": "VL",
"Instance": 1,
"Value": "kappa"
},
{
"Domain": "CL",
"Instance": 1,
"Value": "lambda"
}
],
[
{
"Instance": 1,
"Value": "kappa"
},
{
"Instance": 5,
"Value": "kappa"
},
{
"Instance": 7,
"Value": "kappa"
},
{
"Instance": 8,
"Value": "kappa"
}
]
],
"items": {
"type": "object",
"required": [
"Instance",
"Value"
],
"properties": {
"Domain": {
"$ref": "#/$defs/lightDomain"
},
"Instance": {
"$ref": "#/$defs/singleInstance"
},
"Note": {
"$ref": "#/$defs/note"
},
"Value": {
"description": "The name of the class",
"enum": [
"kappa",
"lambda"
]
}
},
"unevaluatedProperties": false
},
"allOf": [
{
"$ref": "#/$defs/uniqueNonemptyArray"
}
]
},
"PDB": {
"description": "The Protein Data Bank (PDB) identifier (if an experimentally-determined structure exists) for the chain or domains covered by the given instances",
"examples": [
[
{
"ID": "7ahv",
"Instances": [
1
]
},
{
"ID": "7ahu",
"Instances": [
2
]
}
]
],
"items": {
"type": "object",
"required": [
"Instances",
"ID"
],
"properties": {
"ID": {
"description": "The 4-character alphanumeric PDB identifier. Letters may be lowercase",
"$comment": "TODO: check if the ID is always written in the lower-case form",
"examples": [
"7ahv",
"7ahu"
],
"type": "string",
"pattern": "^[1-9][a-zA-Z0-9]{3}$"
},
"Instances": {
"minItems": 1,
"allOf": [
{
"$ref": "#/$defs/multiInstances"
}
]
},
"Note": {
"$ref": "#/$defs/note"
}
},
"unevaluatedProperties": false
},
"allOf": [
{
"$ref": "#/$defs/uniqueNonemptyArray"
}
]
},
"Request": {
"description": "The unique ID assigned to the INN application by the WHO. Usually a 5 digit string.",
"$comment": "TODO: check if it's always 5 digits",
"examples": [
"11678",
"12023"
],
"type": "string",
"pattern": "^[0-9]{5}$"
},
"Source": {
"description": "States the source of two or more chains when they are of different origins and the antibody is a non-standard format. In standard antibodies, (Heavy|Light)Source is used instead.",
"examples": [
[
{
"Instances": [
1
],
"Value": "human"
},
{
"Instances": [
2,
3
],
"Value": "humanized"
}
]
],
"items": {
"type": "object",
"required": [
"Instances",
"Value"
],
"properties": {
"Instances": {
"minItems": 1,
"allOf": [
{
"$ref": "#/$defs/multiInstances"
}
]
},
"Note": {
"$ref": "#/$defs/note"
},
"Value": {
"description": "The `Source` annotation for the chain(s) comprising the instances given in the item",
"examples": [
"human",
"humanized",
"chimeric"
],
"type": "string",
"minLength": 3
}
},
"unevaluatedProperties": false
},
"allOf": [
{
"$ref": "#/$defs/uniqueNonemptyArray"
}
]
},
"Type": {
"description": "This is a summary of the antibody type describing the nature of both heavy and light chains. There may be multiple such descriptions (one for each instance or pair of instances) in non-standard antibody formats. The basic format for each description is the heavy and light class separated by a dash. When a chain is a fusion from different sources, there may be two or more slash-separated classes. There is an optional indicator of a special format (e.g. `Crossmab` or `Fab`). Single-chain variable fragments (scFvs) follow a slightly different format, with a separate annotation for each of the two domains, and an optional indicator of dimerization. Finally, there are various miscellaneous types that may be specified, such as `single-domain` for nanobodies or `OTHER` for non-antibody proteins.",
"examples": [
[
{
"Instances": [
0
],
"Value": "IgG4-lambda"
}
],
[
{
"Instances": [
1
],
"Value": "Fv-kappa scFv"
},
{
"Instances": [
2
],
"Value": "Fv-heavy scFv"
},
{
"Instances": [
3
],
"Value": "IgG1-kappa"
},
{
"Instances": [
4
],
"Value": "Fv-heavy scFv"
},
{
"Instances": [
5
],
"Value": "Fv-kappa scFv"
},
{
"Instances": [
6
],
"Value": "Fv-heavy scFv"
},
{
"Instances": [
7
],
"Value": "Fv-kappa scFv"
},
{
"Instances": [
8
],
"Value": "IgG1-kappa"
}
],
[
{
"Instances": [
1,
2
],
"Value": "IgG1-lambda"
},
{
"Instances": [
1,
3
],
"Value": "IgG1-kappa"
}
]
],
"items": {
"type": "object",
"required": [
"Instances",
"Value"
],
"properties": {
"Instances": {
"$comment": "TODO: check whether maxItems should be 2",
"minItems": 1,
"allOf": [
{
"$ref": "#/$defs/multiInstances"
}
]
},
"Note": {
"$ref": "#/$defs/note"
},
"Value": {
"description": "The `Type` annotation for the instance(s) given in the item. See full description in parent property.",
"$comment": "The patterns may be too restrictive, so consider extending, simplifying or removing them. Also note that the anyOf array is used for clarity, even though the same effect could be achieved in a single very long pattern.",
"examples": [
"IgG1-kappa",
"IgG2/IgG4-lambda",
"IgG1-kappa/lambda",
"Fv-heavy scFv",
"Fv-kappa scFv dimer",
"OTHER"
],
"type": "string",
"anyOf": [
{
"pattern": "^IgG[1-4](/IgG[1-4])*-(kappa|lambda)(/(kappa|lambda))*( (Crossmab|Fab|scFab|VHFc|XscFv))?$"
},
{
"pattern": "^Fv-(heavy|kappa|lambda) scFv( dimer)?$"
},
{
"$comment": "e.g. in Record 12381",
"pattern": "^Gamma[1-4]-hcAb$"
},
{
"$comment": "e.g. in Record 11678",
"pattern": "^CH[1-4]-Gamma[1-4]$"
},
{
"$comment": "`hinge` e.g. in Record 11678; `Fc` e.g. in Records 12197, 12264, and 12295; `VHH` e.g. in Record 12204",
"enum": [
"VHH",
"hinge",
"Fc",
"single-domain",
"OTHER"
]
}
],
"minLength": 1
}
},
"unevaluatedProperties": false
},
"allOf": [
{
"$ref": "#/$defs/uniqueNonemptyArray"
}
]
}
},
"patternProperties": {
"^(CH1|Hinge|CH2|CH3|CH4|CL)Range$": {
"description": "The range of residues encoded by the C gene segment which is equivalent to one of the constant domains (CL, CH1, CH2, CH3, or CH4) or the hinge region (between CH1 and CH2) in the protein. CH4Range will only be present for IgM and IgE antibodies. If there are any differences (substitutions, insertions or deletions) from the germline-encoded sequence, these are indicated in the optional `Mutations` subproperty. (For Ctrl+F searches) Properties covered by this pattern: CH1Range, HingeRange, CH2Range, CH3Range, CH4Range, CLRange.",
"examples": [
[
{
"End": 224,
"Instance": 1,
"Mutations": [
"K156E",
"K222E"
],
"Start": 127
},
{
"End": 224,
"Instance": 3,
"Mutations": [
"K156E",
"K222E"
],
"Start": 127
}
],
[
{
"End": 260,
"Instance": 3,
"Mutations": [
"C248S",
"ins259P",
"ins260C"
],
"Start": 244
}
]
],
"items": {
"type": "object",
"required": [
"Instance",
"Start",
"End"
],
"properties": {
"End": {
"$ref": "#/$defs/endOfRange"
},
"Instance": {
"$ref": "#/$defs/singleInstance"
},
"Mutations": {
"$ref": "#/$defs/mutationsAllTypes"
},
"Note": {
"$ref": "#/$defs/note"
},
"Start": {
"$ref": "#/$defs/startOfRange"
}
},
"unevaluatedProperties": false
},
"allOf": [
{
"$ref": "#/$defs/uniqueNonemptyArray"
}
]
},
"^(HV|HJ|LV|LJ)Germline$": {
"description": "The IMGT germline assignment (including species name) for the heavy or light chain variable or joining fragment for each instance. A separate property is used for each type of germline fragment. (For Ctrl+F searches) Properties covered by this pattern: HVGermline, HJGermline, LVGermline, LJGermline.",
"$comment": "See also: `#/properties/HCGermline` and `#/properties/LCGermline`. These subschemas are identical to this one, except for the optional `Domains` or `Domain` item subproperties, which are not valid here because these properties always relate to only one domain (VH or VL), which is already implied in the property name.",
"examples": [
[
{
"GeneID": "IGKV1-5*01",
"Instance": 1,
"Species": "Homo sapiens"
},
{
"GeneID": "IGKV1-5*03",
"Instance": 5,
"Species": "Homo sapiens"
},
{
"GeneID": "IGKV1-39*01",
"Instance": 7,
"Species": "Homo sapiens"
},
{
"GeneID": "IGKV1-13*02",
"Instance": 8,
"Species": "Homo sapiens"
}
],
[
{
"GeneID": "IGKJ4*01",
"Instance": 1,
"Species": "Homo sapiens"
},
{
"GeneID": "IGKJ4*01",
"Instance": 5,
"Species": "Homo sapiens"
},
{
"GeneID": "IGKJ4*01",
"Instance": 7,
"Species": "Homo sapiens"
},
{
"GeneID": "IGKJ3*01",
"Instance": 8,
"Species": "Homo sapiens"
}
]
],
"items": {
"type": "object",
"required": [
"Instance",
"Species",
"GeneID"
],
"properties": {
"GeneID": {
"$ref": "#/$defs/imgtGeneID"
},
"Instance": {
"$ref": "#/$defs/singleInstance"
},
"Note": {
"$ref": "#/$defs/note"
},
"Species": {
"$ref": "#/$defs/speciesBinomial"
}
},
"unevaluatedProperties": false
},
"allOf": [
{
"$ref": "#/$defs/uniqueNonemptyArray"
}
]
},
"^(Heavy|Light)?Chain$": {
"description": "The amino acid sequence of each complete heavy or light chain (listed within separate properties), including all instances that are fused together. Chains that cannot be assigned as primarily heavy or light are listed under the property name without the `Heavy` or `Light` prefix. (For Ctrl+F searches) Properties covered by this pattern: Chain, HeavyChain, LightChain.",
"examples": [
[
{
"Instances": [
1,
2,
3,
4,
5,
6,
7
],
"Sequence": "DVVMTQSPSTLSASVGDRVTINCQASESISSWLAWYQQKPGKAPKLLIYEASKLASGVPSRFSGSGSGTEFTLTISSLQPDDFATYYCQGYFYFISRTYVNSFGGGTKVEIKGGGGSGGGGSGGGGSGGGGSEVQLVESGGGLVQPGGSLRLSCAASGFTISTNAMSWVRQAPGKGLEWIGVITGRDITYYASWAKGRFTISRDNSKNTLYLQMNSLRAEDTAVYYCARDGGSSAITSNNIWGQGTLVTVSSGGGGSGGGGSEVQLVQSGAEVKKPGESLKISCKGSGYSFSSSWIGWVRQAPGKGLEWMGIIYPDDSDTRYSPSFQGQVTISADKSIRTAYLQWSSLKASDTAMYYCARHVTMIWGVIIDFWGQGTLVTVSSASTKGPSVFPLAPSSKSTSGGTAALGCLVKDYFPEPVTVSWNSGALTSGVHTFPAVLQSSGLYSLSSVVTVPSSSLGTQTYICNVNHKPSNTKVDKRVEPKSCDKTHTCPPCPAPEAAGAPSVFLFPPKPKDTLMISRTPEVTCVVVDVSHEDPEVKFNWYVDGVEVHNAKTKPREEQYNSTYRVVSVLTVLHQDWLNGKEYKCAVSNKALPAPIEKTISKAKGQPREPQVYTLPPSRDELTKNQVSLTCLVKGFYPSDIAVEWESNGQPENNYKTTPPVLDSDGSFFLYSKLTVDKSRWQQGNVFSCSVMHEALHNHYTQKSLSLSPGGGGGSGGGGSEVQLLESGGGLVQPGGSLRLSCAASGFSFSSGYDMCWVRQAPGKGLEWIACIAAGSAGITYDANWAKGRFTISRDNSKNTLYLQMNSLRAEDTAVYYCARSAFSFDYAMDLWGQGTLVTVSSGGGGSGGGGSGGGGSGGGGSDIQMTQSPSTLSASVGDRVTITCQASQSISSHLNWYQQKPGKAPKLLIYKASTLASGVPSRFSGSGSGTEFTLTISSLQPDDFATYYCQQGYSWGNVDNVFGGGTKVEIKGGGGSGGGGSRSLVESGGGLVQPGGSLRLSCTASGFTISSYHMQWVRQAPGKGLEYIGTISSGGNVYYASSARGRFTISRPSSKNTVDLQMNSLRAEDTAVYYCARDSGYSDPMWGQGTLVTVSSGGGGSGGGGSGGGGSGGGGSDVVMTQSPSSVSASVGDRVTITCQASQNIRTYLSWYQQKPGKAPKLLIYAAANLASGVPSRFSGSGSGTDFTLTISDLEPGDAATYYCQSTYLGTDYVGGAFGGGTKVEIK"
}
],
[
{
"Instances": [
1
],
"Sequence": "QIVLTQSPATLSLSPGERATLSCSASSKHTNLYWSRHMYWYQQKPGQAPRLLIYLTSNRATGIPARFSGSGSGTDFTLTISSLEPEDFAVYYCQQWSSNPFTFGQGTKLEIKRTVAAPSVFIFPPSDEQLKSGTASVVCLLNNFYPREAKVQWKVDNALQSGNSESVTEQDSKDSTYSLSSTLTLSKADYEKHKVYACEVTHQGLSSPVTKSFNRGEC"
},
{
"Instances": [
2
],
"Sequence": "DIQMTQSPSSLSASVGDRVTITCRASQSINSYLDWYQQKPGKAPKLLIYAASSLQSGVPSRFSGSGSGTDFTLTISSLQPEDFATYYCQQYYSTPFTFGPGTKVEIKGQPKAAPSVTLFPPCSEELQANKATLVCLISDFYPGAVTVAWKADSSPVKAGVETTTPSKQSNNKYAASSYLSLTPEQWKSHRSYSCQVTHEGSTVEKTVAPTEVS"
}
]
],
"items": {
"type": "object",
"required": [
"Instances",
"Sequence"
],
"properties": {
"Instances": {
"minItems": 1,
"allOf": [
{
"$ref": "#/$defs/multiInstances"
}
]
},
"Note": {
"$ref": "#/$defs/note"
},
"Sequence": {
"$ref": "#/$defs/aminoAcidSequence"
}
},
"unevaluatedProperties": false
},
"allOf": [
{
"$ref": "#/$defs/uniqueNonemptyArray"
}
]
},
"^(Heavy|Light)ChainLength$": {
"description": "The total number of amino acids in the heavy or light chain. In the case of fusions, this property applies to the N-terminal antibody heavy or light chain. It is the full length of the chain including any fusion proteins (but these are not included in the `Instance` subproperty). In cases where you can't assign chains as (predominantly) heavy or light, the `ChainLength` property is used instead (see `#/properties/ChainLength/description` for more details). (For Ctrl+F searches) Properties covered by this pattern: HeavyChainLength, LightChainLength.",
"examples": [
[
{
"Instance": 8,
"Value": 214
}
],
[
{
"Instance": 1,
"Value": 448
},
{
"Instance": 2,
"Value": 446
}
]
],
"items": {
"type": "object",
"required": [
"Instance",
"Value"
],
"properties": {
"Instance": {
"$ref": "#/$defs/singleInstance"
},
"Note": {
"$ref": "#/$defs/note"
},
"Value": {
"description": "The number of residues in the chain",
"type": "integer",
"minimum": 1
}
},
"unevaluatedProperties": false
},
"allOf": [
{
"$ref": "#/$defs/uniqueNonemptyArray"
}
]
},
"^(Heavy|Light)ConfirmedPTM$": {
"description": "Arrays of residue positions in each instance of a heavy or light chain that have been confirmed by the INN applicant as sites of post-translational modifications (PTMs). Separate items are used for each type of PTM, and separate array subproperties are used for confirmed sites and optionally those where only partial or rare occupancy has been confirmed at the site. (For Ctrl+F searches) Properties covered by this pattern: HeavyConfirmedPTM, LightConfirmedPTM.",
"$comment": "This subschema is the same as `#/properties/ConfirmedPTM` except for the singular `Instance` item subproperty",
"examples": [
[
{
"Instance": 0,
"Positions": [
1
],
"Type": "nterpca"
},
{
"Instance": 0,
"PositionsPartial": [
31,
322
],
"Type": "deamidation"
},
{
"Instance": 0,
"PositionsRare": [
425
],
"Type": "oxidation"
}
],
[
{
"Instance": 0,
"PositionsPartial": [
171
],
"PositionsRare": [
25
],
"Type": "deamidation"
},
{
"Instance": 0,
"PositionsRare": [
49,
50,
52,
81,
84,
91,
95
],
"Type": "isomerization"
}
]
],
"items": {
"type": "object",
"allOf": [
{
"required": [
"Instance",
"Type"
]
},
{
"anyOf": [
{
"required": [
"Positions"
]
},
{
"required": [
"PositionsPartial"
]
},
{
"required": [
"PositionsRare"
]
}
]
}
],
"properties": {
"Instance": {
"$ref": "#/$defs/singleInstance"
},
"Note": {
"$ref": "#/$defs/note"
},
"Positions": {
"$ref": "#/$defs/ptmPositions"
},
"PositionsPartial": {
"$ref": "#/$defs/ptmPositionsPartial"
},
"PositionsRare": {
"$ref": "#/$defs/ptmPositionsRare"
},
"Type": {
"$ref": "#/$defs/ptmType"
}
},
"unevaluatedProperties": false
},
"allOf": [
{
"$ref": "#/$defs/uniqueNonemptyArray"
}
]
},
"^(Heavy|Light)CysPositions$": {
"description": "An array of cysteine positions in each instance that makes up all or part of a heavy or light chain. (For Ctrl+F searches) Properties covered by this pattern: HeavyCysPositions, LightCysPositions.",
"$comment": "This subschema is the same as `#/properties/CysPositions` except for the singular `Instance` item subproperty",
"examples": [
[
{
"Instance": 1,
"Values": [
23,
88
]
},
{
"Instance": 5,
"Values": [
887,
952
]
},
{
"Instance": 7,
"Values": [
1142,
1207
]
},
{
"Instance": 8,
"Values": [
23,
88,
134,
194,
214
]
}
],
[
{
"Instance": 2,
"Values": [
154,
227
]
},
{
"Instance": 3,
"Values": [
284,
358,
410,
466,
486,
492,
495,
527,
587,
633,
691
]
},
{
"Instance": 4,
"Values": [
744,
758,
773,
820
]
},
{
"Instance": 6,
"Values": [
1005,
1078
]
}
]
],
"items": {
"type": "object",
"required": [
"Instance",
"Values"
],
"properties": {
"Instance": {
"$ref": "#/$defs/singleInstance"
},
"Note": {
"$ref": "#/$defs/note"
},
"Values": {
"description": "The cysteine position(s) for the instance given in the item. Uses sequential numbering as observed in the final protein.",
"allOf": [
{
"$ref": "#/$defs/residuePositionsArray"
}
]
}
},
"unevaluatedProperties": false
},
"allOf": [
{
"$ref": "#/$defs/uniqueNonemptyArray"
}
]
},
"^(Heavy|Light)DisulfidesIntra|FusionProtein(Heavy|Light)?Disulfides$": {
"description": "Arrays of residue pairs involved in intra-chain disulfide bonds in each instance of a heavy chain, light chain, or fusion protein. Separate items are used in cases of multiple disulfide isoforms, and these are indicated in the optional `Isoform` subproperty. (For Ctrl+F searches) Properties covered by this pattern: HeavyDisulfidesIntra, LightDisulfidesIntra, FusionProteinDisulfides, FusionProteinHeavyDisulfides, FusionProteinLightDisulfides.",
"$comment": "This subschema is the same as `#/properties/DisulfidesIntra` except for the singular `Instance` item subproperty",
"examples": [
[
{
"Instance": 2,
"Positions": [
[
154,
227
]
]
},
{
"Instance": 3,
"Positions": [
[
284,
358
],
[
410,
466
],
[
527,
587
],
[
633,
691
]
]
},
{
"Instance": 4,
"Positions": [
[
744,
820
],
[
758,
773
]
]
},
{
"Instance": 6,
"Positions": [
[
1005,
1078
]
]
}
],
[
{
"Instance": 1,
"Positions": [
[
23,
88
]
]
},
{
"Instance": 5,
"Positions": [
[
887,
952
]
]
},
{
"Instance": 7,
"Positions": [
[
1142,
1207
]
]
},
{
"Instance": 8,
"Positions": [
[
23,
88
],
[
134,
194
]
]
}
]
],
"items": {
"type": "object",
"required": [
"Instance",
"Positions"
],
"properties": {
"Instance": {
"$ref": "#/$defs/singleInstance"
},
"Isoform": {
"$ref": "#/$defs/isoform"
},
"Note": {
"$ref": "#/$defs/note"
},
"Positions": {
"$ref": "#/$defs/disulfidesIntraPositions"
}
},
"unevaluatedProperties": false
},
"allOf": [
{
"$ref": "#/$defs/uniqueNonemptyArray"
}
]
},
"^(Heavy|Light)NGlycos|HeavyOGlycos$": {
"description": "Arrays of residue positions that are sites of N-linked glycosylation in light or heavy chains, or O-linked glycosylation in heavy chains, depending on the property name. Note that O-linked glycosylation is not observed in light chains, so `LightOGlycos` is not a valid property name. For each instance, the positions are listed separately for potential sites (predicted from sequence information, such as the characteristic `N-{P}-[ST]-{P}` motif for N-linked glycosylation) and for the subset of these sites that have been experimentally confirmed. There are also separate optional arrays for sites with only partial or rare occupancy of the glycosylation (these annotations are mutually exclusive with the sites in the basic `Confirmed` array). (For Ctrl+F searches) Properties covered by this pattern: HeavyNGlycos, LightNGlycos, HeavyOGlycos.",
"$comment": "See also: `#/patternProperties/^(N|O)Glycos$` - this covers the same annotations but for chains that can't be assigned as predominantly heavy or light (e.g. fusions), which may contain glycosylation annotations for linker regions, which require an array for the `Instances` item property",
"examples": [
[
{
"Confirmed": [
296
],
"Instance": 0,
"Potential": [
296
]
}
],
[
{
"Confirmed": [
306
],
"ConfirmedPartial": [
52
],
"Instance": 1,
"Potential": [
52,
306
]
},
{
"Confirmed": [
306
],
"Instance": 3,
"Potential": [
52,
306
]
},
{
"Confirmed": null,
"Instance": 4,
"Potential": null
},
{
"Confirmed": null,
"Instance": 7,
"Potential": null
}
]
],
"items": {
"type": "object",
"required": [
"Instance",
"Potential",
"Confirmed"
],
"properties": {
"Confirmed": {
"$ref": "#/$defs/glycosConfirmed"
},
"ConfirmedPartial": {
"$ref": "#/$defs/glycosConfirmedPartial"
},
"ConfirmedRare": {
"$ref": "#/$defs/glycosConfirmedRare"
},
"Instance": {
"$ref": "#/$defs/singleInstance"
},
"Note": {
"$ref": "#/$defs/note"
},
"Potential": {
"$ref": "#/$defs/glycosPotential"
}
},
"unevaluatedProperties": false
},
"allOf": [
{
"$ref": "#/$defs/uniqueNonemptyArray"
}
]
},
"^(Heavy|Light)Source$": {
"description": "States the source of the heavy and light chains (in two separate properties) when they are of different origins. (For Ctrl+F searches) Properties covered by this pattern: HeavySource, LightSource.",
"$comment": "HeavySource and LightSource could be changed to arrays of instances, like all of the other Heavy/Light versions of properties. The only reason they are here is that the INN format documentation suggests they will only be used in standard antibody formats, unlike the other Heavy/Light properties, which can take instance qualifiers and are only stripped of the Heavy/Light prefix when the chain cannot be assigned as predominantly heavy or light.",
"examples": [
"human",
"humanized",
"chimeric"
],
"type": "string",
"minLength": 3
},
"^(N|O)Glycos$": {
"description": "Used instead of `#/patternProperties/^(Heavy|Light)NGlycos|HeavyOGlycos$` for linker regions and chains that cannot be assigned as predominantly heavy or light. (For Ctrl+F searches) Properties covered by this pattern: NGlycos, OGlycos.",
"$comment": "This subschema is the same as for `#/patternProperties/^(Heavy|Light)NGlycos|HeavyOGlycos$`, except for the plural `Instances` item subproperty",
"examples": [
[
{
"Confirmed": [
293
],
"Instances": [
1,
2
],
"Potential": [
293
]
},
{
"Confirmed": null,
"Instances": [
3,
4
],
"Potential": null
}
],
[
{
"Confirmed": [
6,
90,
300,
488,
512
],
"Instances": [
1
],
"Potential": [
6,
90,
119,
221,
255,
300,
488,
512
]
}
]
],
"items": {
"type": "object",
"required": [
"Instances",
"Potential",
"Confirmed"
],
"properties": {
"Confirmed": {
"$ref": "#/$defs/glycosConfirmed"
},
"ConfirmedPartial": {
"$ref": "#/$defs/glycosConfirmedPartial"
},
"ConfirmedRare": {
"$ref": "#/$defs/glycosConfirmedRare"
},
"Instances": {
"minItems": 1,
"allOf": [
{
"$ref": "#/$defs/multiInstances"
}
]
},
"Note": {
"$ref": "#/$defs/note"
},
"Potential": {
"$ref": "#/$defs/glycosPotential"
}
},
"unevaluatedProperties": false
},
"allOf": [
{
"$ref": "#/$defs/uniqueNonemptyArray"
}
]
},
"^(Request|ID|AbML|(Heavy|Light)Source)-Note$": {
"description": "Any additional information relating to one of the properties in the name pattern. (For Ctrl+F searches) Properties covered by this pattern: Request-Note, ID-Note, AbML-Note, HeavySource-Note, LightSource-Note.",
"type": "string",
"minLength": 3
},
"^(VH|VL)Range$": {
"description": "The range of residues encoded by the V(D)J gene segments and equivalent to the VH or VL domain. Uses sequential numbering as observed in the final protein. (For Ctrl+F searches) Properties covered by this pattern: VHRange, VLRange.",
"$comment": "`Mutations` is not added as an item property (as in the constant domain range annotations), since by definition these domains are too variable (in the final protein) for the idea of a germline reference sequence to make sense",
"examples": [
[
{
"End": 252,
"Instance": 2,
"Start": 133
},
{
"End": 383,
"Instance": 3,
"Start": 263
}
]
],
"items": {
"type": "object",
"required": [
"Instance",
"Start",
"End"
],
"properties": {
"End": {
"$ref": "#/$defs/endOfRange"
},
"Instance": {
"$ref": "#/$defs/singleInstance"
},
"Note": {
"$ref": "#/$defs/note"
},
"Start": {
"$ref": "#/$defs/startOfRange"
}
},
"unevaluatedProperties": false
},
"allOf": [
{
"$ref": "#/$defs/uniqueNonemptyArray"
}
]
},
"^Antigen|Binding$": {
"description": "The antigen (or antigens in the case of multispecifics) that is the binding target of this antibody. The `Binding` property name is a more general form of `Antigen` and is used for fused non-antibody domains (e.g. anticalins), where the binding targets are not referred to as antigens. Each annotation (item) includes the name (and pseudonyms) of the antigen, the common name of the species, the name of the gene, and the antibody instance(s) containing the variable domains that form the antigen-combining site (paratope) for that particular antigen. In rare cases (see second example below, which is from Request 12086), two different antigens bind the same instances of the antibody, in which case there may be a `Note` subproperty to indicate which CDRs are involved in each binding interaction.",
"examples": [
[
{
"Gene": "CD3E",
"Instances": [
1,
2
],
"Names": "T-cell surface glycoprotein CD3 epsilon chain, T-cell surface antigen T3/Leu-4 epsilon chain, CD3e",
"Species": "human"
},
{
"Gene": "CD19",
"Instances": [
3,
8
],
"Names": "B-lymphocyte antigen CD19, B-lymphocyte surface antigen B4, T-cell surface antigen Leu-12",
"Species": "human"
},
{
"Gene": "CD274",
"Instances": [
4,
5
],
"Names": "programmed cell death 1 ligand 1, PD-L1, programmed death ligand 1, PDCD1 ligand 1, B7 homolog 1, B7-H1",
"Species": "human"
},
{
"Gene": "TNFSF9",
"Instances": [
6,
7
],
"Names": "Tumor necrosis factor ligand superfamily member 9, 4-1BB ligand, 4-1BBL",
"Species": "human"
}
],
[
{
"Gene": "VEGFA",
"Instances": [
1,
2
],
"Names": "vascular endothelial growth factor A, VEGF-A, Vascular permeability factor, VPF",
"Note": "CDRs L1 L3 H2",
"Species": "human"
},
{
"Gene": "ANGPT2",
"Instances": [
1,
2
],
"Names": "Angiopoietin-2, ANG-2",
"Note": "CDRs L2 H1 H3",
"Species": "human"
}
]
],
"items": {
"type": "object",
"required": [
"Instances",
"Species",
"Names",
"Gene"
],
"properties": {
"Gene": {
"description": "The name of the gene (official gene symbol) of which the antigen/binding target is the product",
"examples": [
"CD3E",
"TNFSF9",
"ALB",
"C2"
],
"type": "string",
"pattern": "^[-A-Z0-9]{2,}$"
},
"Instances": {
"examples": [
[
0
],
[
1,
2
],
[
4,
5
]
],
"maxItems": 2,
"minItems": 1,
"allOf": [
{
"$ref": "#/$defs/multiInstances"
}
]
},
"Names": {
"description": "The name of the antigen/binding target, followed by any pseudonyms separated by commas. Any greek characters are expanded to their Latin-script alphabet names e.g. alpha, beta, gamma.",
"$comment": "consider splitting the string of names into an array",
"examples": [
"Interleukin 31",
"T-cell surface glycoprotein CD3 epsilon chain, T-cell surface antigen T3/Leu-4 epsilon chain, CD3e",
"Interleukin-5, IL-5, B-cell differentiation factor I, Eosinophil differentiation factor, T-cell replacing factor",
"Receptor tyrosine-protein kinase erbB-2, metastatic lymph node gene 19 protein, MLN19, Proto-oncogene Neu, Proto-oncogene c-ErbB-2, Tyrosine kinase-type cell surface receptor HER2, epidermal growth factor receptor 2, p185erbB2, CD340"
],
"type": "string",
"minLength": 3
},
"Note": {
"$ref": "#/$defs/note"
},
"Species": {
"description": "The common name of the species in which the antigen/binding target is found",
"$comment": "could add a pattern or enumerated list of possible species (`human` and `dog` are the only ones in the sample files)",
"examples": [
"human",
"dog"
],
"type": "string",
"minLength": 3
}
},
"unevaluatedProperties": false
},
"allOf": [
{
"$ref": "#/$defs/uniqueNonemptyArray"
}
]
},
"^CDRKabat(L|H)[1-3]$": {
"description": "Specifies the amino acid sequence of one of the complementarity-determining regions (CDRs), a.k.a. hypervariable loops, as defined by the Kabat numbering scheme (which is based on sequence variability, as opposed to other schemes like Chothia or IMGT, which are based on structural features or a combination of sequence and structure). (For Ctrl+F searches) Properties covered by this pattern: CDRKabatL1, CDRKabatL2, CDRKabatL3, CDRKabatH1, CDRKabatH2, CDRKabatH3.",
"examples": [
[
{
"End": 102,
"Instance": 1,
"Sequence": "QGYFYFISRTYVNS",
"Start": 89
},
{
"End": 964,
"Instance": 5,
"Sequence": "QQGYSWGNVDNV",
"Start": 953
},
{
"End": 1220,
"Instance": 7,
"Sequence": "QSTYLGTDYVGGA",
"Start": 1208
},
{
"End": 97,
"Instance": 8,
"Sequence": "QQFNSYPFT",
"Start": 89
}
],
[
{
"End": 197,
"Instance": 2,
"Sequence": "VITGRDITYYASWAKG",
"Start": 182
},
{
"End": 328,
"Instance": 3,
"Sequence": "IIYPDDSDTRYSPSFQG",
"Start": 312
},
{
"End": 790,
"Instance": 4,
"Sequence": "CIAAGSAGITYDANWAKG",
"Start": 773
},
{
"End": 1048,
"Instance": 6,
"Sequence": "TISSGGNVYYASSARG",
"Start": 1033
}
]
],
"items": {
"type": "object",
"required": [
"Instance",
"Sequence",
"Start",
"End"
],
"properties": {
"End": {
"$ref": "#/$defs/endOfRange"
},
"Instance": {
"$ref": "#/$defs/singleInstance"
},
"Note": {
"$ref": "#/$defs/note"
},
"Sequence": {
"$ref": "#/$defs/aminoAcidSequence"
},
"Start": {
"$ref": "#/$defs/startOfRange"
}
},
"unevaluatedProperties": false
},
"allOf": [
{
"$ref": "#/$defs/uniqueNonemptyArray"
}
]
},
"^DisulfidesInterL[1-9]H[1-9]$": {
"description": "Lists residue pairs involved in inter-chain disulfides between a heavy and light chain (which often have the same instance number, as in standard formats, but this isn't the case for most other formats). Uses sequential numbering as observed in the final protein. Separate items are used in cases of multiple disulfide isoforms, and these are indicated in the optional `Isoform` subproperty at the root level of the items (not within the `Positions` items). In most cases, a property matching this pattern (usually with the `L1H1` or `L2H2` suffix) will have only one item (if there are no isoforms) with only one pair of positions (if no additional disulfides have been introduced compared to natural IgGs). (For Ctrl+F searches) Properties covered by this pattern: DisulfidesInterL1H1, DisulfidesInterL2H2, DisulfidesInterL3H3, (potentially more)...",
"$comment": "TODO: We could consider aggregating all of the DisulfidesInter* properties by moving the chain pair suffix out of the property name and into an item subproperty",
"examples": [
[
{
"InstanceH": 3,
"InstanceL": 8,
"Positions": [
{
"H": 486,
"L": 214
}
]
}
],
[
{
"InstanceH": 0,
"InstanceL": 0,
"Positions": null
}
]
],
"items": {
"type": "object",
"required": [
"InstanceL",
"InstanceH",
"Positions"
],
"properties": {
"InstanceH": {
"description": "The heavy chain instance (chain or single-origin portion of a fused chain) of a light-heavy pair with inter-chain disulfides",
"type": "integer",
"minimum": 0
},
"InstanceL": {
"description": "The light chain instance (chain or single-origin portion of a fused chain) of a light-heavy pair with inter-chain disulfides",
"type": "integer",
"minimum": 0
},
"Isoform": {
"$ref": "#/$defs/isoform"
},
"Note": {
"$ref": "#/$defs/note"
},
"Positions": {
"$ref": "#/$defs/disulfidesInterPositionsLH"
}
},
"unevaluatedProperties": false
},
"allOf": [
{
"$ref": "#/$defs/uniqueNonemptyArray"
}
]
},
"^FusionProtein(Heavy|Light)?$": {
"description": "Indicates a range of residues within a chain that are part of a fusion protein. When the antibody part of the chain cannot be assigned as either predominantly heavy or light, the (Heavy|Light) suffix is left out of the property name. (For Ctrl+F searches) Properties covered by this pattern: FusionProtein, FusionProteinHeavy, FusionProteinLight.",
"examples": [
[
{
"End": 525,
"Instance": 1,
"Start": 1
}
],
[
{
"End": 970,
"Instance": 2,
"Multimer": "trimer",
"Start": 443
}
]
],
"items": {
"type": "object",
"required": [
"Instance",
"Start",
"End"
],
"properties": {
"End": {
"$ref": "#/$defs/endOfRange"
},
"Instance": {
"$ref": "#/$defs/singleInstance"
},
"Multimer": {
"description": "An optional indicator of the homo-multimerization state of the fusion protein",
"examples": [
"dimer",
"trimer",
"tetramer"
],
"type": "string",
"pattern": "^(di|tri|tetra|penta|hexa|hepta|octa|nona|deca)mer$"
},
"Note": {
"$ref": "#/$defs/note"
},
"Start": {
"$ref": "#/$defs/startOfRange"
}
},
"unevaluatedProperties": false
},
"allOf": [
{
"$ref": "#/$defs/uniqueNonemptyArray"
}
]
},
"^FusionProtein(Heavy|Light)?Linker|Linker$": {
"description": "A range indicating the residue positions of the linker between two consecutive instances in a fused chain. `Linker` is used for connections between antibody domains, while `FusionProtein(Heavy|Light)?Linker` is used for connections between a non-antibody fusion protein and an antibody chain (heavy, light, or neither predominantly heavy or light). (For Ctrl+F searches) Properties covered by this pattern: Linker, FusionProteinLinker, FusionProteinHeavyLinker, FusionProteinLightLinker.",
"examples": [
[
{
"End": 132,
"Instances": [
1,
2
],
"Start": 113
},
{
"End": 262,
"Instances": [
2,
3
],
"Start": 253
},
{
"End": 722,
"Instances": [
3,
4
],
"Start": 712
}
]
],
"items": {
"type": "object",
"required": [
"Instances",
"Start",
"End"
],
"properties": {
"End": {
"$ref": "#/$defs/endOfRange"
},
"Instances": {
"maxItems": 2,
"minItems": 2,
"allOf": [
{
"$ref": "#/$defs/multiInstances"
}
]
},
"Note": {
"$ref": "#/$defs/note"
},
"Start": {
"$ref": "#/$defs/startOfRange"
}
},
"unevaluatedProperties": false
},
"allOf": [
{
"$ref": "#/$defs/uniqueNonemptyArray"
}
]
},
"^Mutation(H|L)?$": {
"description": "Records the purpose of a set of mutations. The mutations are grouped according to their shared purpose, rather than all mutations in the same instance of the antibody. The H|L suffix is used or left out of the property name depending on if the chain is predominantly heavy, light, or neither. (For Ctrl+F searches) Properties covered by this pattern: Mutation, MutationH, MutationL.",
"examples": [
[
{
"Instance": 3,
"Mutations": [
"L500A",
"L501A",
"G503A",
"K588A"
],
"Reason": "Reduce ADCC and CDC"
},
{
"Instance": 3,
"Mutations": [
"G712del",
"K712del"
],
"Reason": "Remove CHS"
}
]
],
"items": {
"type": "object",
"required": [
"Instance",
"Mutations",
"Reason"
],
"properties": {
"Instance": {
"$ref": "#/$defs/singleInstance"
},
"Mutations": {
"$ref": "#/$defs/mutationsAllTypes"
},
"Note": {
"$ref": "#/$defs/note"
},
"Reason": {
"description": "A concise summary of the purpose of the mutation(s)",
"examples": [
"Enforce pairing",
"disulfide",
"heterodimer formation hole",
"remove glycosylation site",
"hinge stabilization"
],
"type": "string",
"minLength": 1
}
},
"unevaluatedProperties": false
},
"allOf": [
{
"$ref": "#/$defs/uniqueNonemptyArray"
}
]
}
},
"unevaluatedProperties": false,
"required": [
"Request",
"Format",
"Type"
],
"allOf": [
{
"anyOf": [
{
"required": [
"ChainLength"
]
},
{
"required": [
"HeavyChainLength"
]
},
{
"required": [
"LightChainLength"
]
}
]
},
{
"anyOf": [
{
"required": [
"NGlycos"
]
},
{
"required": [
"HeavyNGlycos"
]
},
{
"required": [
"LightNGlycos"
]
}
]
},
{
"anyOf": [
{
"required": [
"CysPositions"
]
},
{
"required": [
"HeavyCysPositions"
]
},
{
"required": [
"LightCysPositions"
]
}
]
},
{
"anyOf": [
{
"required": [
"DisulfidesIntra"
]
},
{
"required": [
"HeavyDisulfidesIntra"
]
},
{
"required": [
"LightDisulfidesIntra"
]
}
]
},
{
"anyOf": [
{
"required": [
"HVGermline"
]
},
{
"required": [
"HJGermline"
]
},
{
"required": [
"HCGermline"
]
},
{
"required": [
"LVGermline"
]
},
{
"required": [
"LJGermline"
]
},
{
"required": [
"LCGermline"
]
}
]
},
{
"anyOf": [
{
"required": [
"VHRange"
]
},
{
"required": [
"CH1Range"
]
},
{
"required": [
"CH2Range"
]
},
{
"required": [
"CH3Range"
]
},
{
"required": [
"CH4Range"
]
},
{
"required": [
"VLRange"
]
},
{
"required": [
"CLRange"
]
}
]
}
],
"$defs": {
"aminoAcidSequence": {
"description": "An amino acid sequence using single-letter codes for the 20 natural amino acids plus X for non-natural",
"examples": [
"QASQSISSHLN",
"EASKLAS"
],
"type": "string",
"pattern": "^[ACDEFGHIKLMNPQRSTVWXY]+$",
"minLength": 1
},
"disulfidesInterPositionsLH": {
"description": "Each item (if not null) describes one disulfide-linked residue pair (between a light and heavy chain) with a chain indicator name and residue position value for each of the two residues. Uses sequential numbering as observed in the final protein. null is also valid (instead of a `Positions` array).",
"$comment": "The same as `#/$defs/disulfidesInterPositionsXY`, except that the chain indicators L and H are used instead of the more generic X and Y",
"anyOf": [
{
"type": "null"
},
{
"items": {
"type": "object",
"required": [
"L",
"H"
],
"properties": {
"H": {
"type": "integer",
"minimum": 1
},
"L": {
"type": "integer",
"minimum": 1
}
},
"unevaluatedProperties": false
},
"allOf": [
{
"$ref": "#/$defs/uniqueNonemptyArray"
}
]
}
]
},
"disulfidesInterPositionsXY": {
"description": "Each item (if not null) describes one disulfide-linked residue pair with a chain indicator name and residue position value for each of the two residues. Uses sequential numbering as observed in the final protein. null is also valid (instead of a `Positions` array).",
"$comment": "The same as `#/$defs/disulfidesInterPositionsLH`, except that the chain indicators X and Y are used instead of L and H",
"anyOf": [
{
"type": "null"
},
{
"items": {
"type": "object",
"required": [
"X",
"Y"
],
"properties": {
"X": {
"type": "integer",
"minimum": 1
},
"Y": {
"type": "integer",
"minimum": 1
}
},
"unevaluatedProperties": false
},
"allOf": [
{
"$ref": "#/$defs/uniqueNonemptyArray"
}
]
}
]
},
"disulfidesIntraPositions": {
"description": "The positions of pairs of residues involved in disulfide bonds. Uses sequential numbering as observed in the final protein.",
"examples": [
[
[
23,
88
]
],
[
[
284,
358
],
[
410,
466
],
[
527,
587
],
[
633,
691
]
]
],
"items": {
"type": "array",
"maxItems": 2,
"minItems": 2,
"uniqueItems": true,
"items": {
"type": "integer",
"minimum": 1
}
},
"allOf": [
{
"$ref": "#/$defs/uniqueNonemptyArray"
}
]
},
"endOfRange": {
"description": "The position of the last residue in a range, using sequential numbering as observed in the final protein",
"type": "integer",
"minimum": 2
},
"glycosConfirmed": {
"description": "An array of residue positions that have been confirmed by the INN applicant as being glycosylated, or null if there are none. Uses sequential numbering as observed in the final protein.",
"anyOf": [
{
"type": "null"
},
{
"$ref": "#/$defs/residuePositionsArray"
}
]
},
"glycosConfirmedPartial": {
"description": "An optional array of residue positions that have been experimentally confirmed by the INN applicant as being glycosylated with an intermediate occupancy (less than complete but more than 'rare'). Uses sequential numbering as observed in the final protein.",
"$comment": "TODO: check the provenance of the frequency/occupancy qualifier terms ('rare' and 'partial') and if they correspond to precise ranges e.g. 1-10% for rare and 10-95% for partial",
"allOf": [
{
"$ref": "#/$defs/residuePositionsArray"
}
]
},
"glycosConfirmedRare": {
"description": "An optional array of residue positions that have been experimentally confirmed by the INN applicant as being glycosylated with a low occupancy (non-zero but less than 'partial'). Uses sequential numbering as observed in the final protein.",
"$comment": "TODO: check if other frequency terms are possible (e.g. `common`, `complete`/`full`, `uncertain`), in which case more `Confirmed[Freq]` properties are needed",
"allOf": [
{
"$ref": "#/$defs/residuePositionsArray"
}
]
},
"glycosPotential": {
"description": "An array of residue positions that have been identified by the INN applicant as being potentially glycosylated (based on sequence motifs or homology with antibodies with confirmed sites), or null if there are none. Uses sequential numbering as observed in the final protein.",
"anyOf": [
{
"type": "null"
},
{
"$ref": "#/$defs/residuePositionsArray"
}
]
},
"heavyDomains": {
"description": "An optional indicator of the domain(s), for cases when the constant heavy domains come from different germline loci",
"examples": [
[
"CH1"
],
[
"CH2",
"CH3"
]
],
"type": "array",
"minItems": 1,
"uniqueItems": true,
"items": {
"$comment": "not sure if `hinge` should be allowed",
"enum": [
"CH1",
"hinge",
"CH2",
"CH3",
"CH4"
]
}
},
"imgtGeneID": {
"description": "The IMGT gene name, including the allele number; see https://www.imgt.org/IMGTScientificChart/Nomenclature/IMGTnomenclature.php for details about the nomenclature",
"$comment": "TODO: consider a simpler pattern (e.g. `^IG[HKL][GCVJ][0-9D-]*\\*0[1-9]$`), or more specific validation - a different GeneID pattern for each of the six *Germline records",
"examples": [
"IGHG1*03",
"IGHJ3*01",
"IGHV4-31*02",
"IGHV3-30-3*01",
"IGKC*01",
"IGKV3D-7*01",
"IGLJ7*01"
],
"type": "string",
"pattern": "^IG[HKL][GCVJ][0-9]{0,2}D?(-[0-9]{0,3}){0,2}\\*0[1-9]$"
},
"isoform": {
"description": "Optional indicator used to differentiate between disulfide isoforms",
"type": "string",
"pattern": "^[A-Z]$"
},
"lightDomain": {
"description": "An optional indicator of the domain for cases when the variable and constant light domains come from different germline loci",
"enum": [
"VL",
"CL"
]
},
"multiInstances": {
"description": "The instance(s) (chain or single-origin portion of a fused chain) to which an item's other properties apply. A single-item array with the integer value 0 is used as the default for standard antibodies without any other instances.",
"$comment": "This definition is always referenced together with the `minItems` and/or `maxItems` keywords. This is to disallow an empty array and to further constrain the number of instances for some properties (e.g. `Fusion` items should have at least 2 instances, `Antigen` and `CDRSource` should have either one or two instances, and `Linker` must have exactly two instances for each annotation.",
"type": "array",
"uniqueItems": true,
"items": {
"type": "integer",
"minimum": 0
}
},
"mutationsAllTypes": {
"description": "Uses standard mutation nomenclature with 1-letter amino acid codes. Consecutive deleted residues are listed individually.",
"$comment": "Referenced in `#/patternProperties/^Mutation(H|L)?$` and `#/patternProperties/^(CH1|Hinge|CH2|CH3|CH4|CL)Range$`. The three patterns in the anyOf statement are for substitutions, insertions and deletions respectively.",
"examples": [
[
"C248S",
"ins259P",
"ins260C"
],
[
"T109A"
],
[
"Q125-126del",
"K712del"
]
],
"items": {
"type": "string",
"anyOf": [
{
"pattern": "^[A-Z][0-9]{1,4}[A-Z]$"
},
{
"pattern": "^ins[0-9]{1,4}[A-Z]$"
},
{
"pattern": "^[A-Z][0-9]{1,4}(-[0-9]{1,4})?del$"
}
]
},
"allOf": [
{
"$ref": "#/$defs/uniqueNonemptyArray"
}
]
},
"note": {
"description": "An optional note that provides further context, detail, or clarification on the information in this item",
"type": "string",
"minLength": 2
},
"ptmPositions": {
"description": "An array of residue positions that have been confirmed by the INN applicant as PTM sites. Uses sequential numbering as observed in the final protein.",
"$comment": "Note that, unlike for glycosylation properties, null values are not allowed here - instead the item subproperty or whole property will simply be absent from the JSON instance if there are no such annotations",
"allOf": [
{
"$ref": "#/$defs/residuePositionsArray"
}
]
},
"ptmPositionsPartial": {
"description": "An optional array of residue positions that have been experimentally confirmed by the INN applicant as PTM sites with an intermediate occupancy (less than complete but more than 'rare'). Uses sequential numbering as observed in the final protein.",
"$comment": "TODO: check the provenance of the frequency/occupancy qualifier terms ('rare' and 'partial') and if they correspond to precise ranges e.g. 1-10% for rare and 10-95% for partial",
"allOf": [
{
"$ref": "#/$defs/residuePositionsArray"
}
]
},
"ptmPositionsRare": {
"description": "An optional array of residue positions that have been experimentally confirmed by the INN applicant as PTM sites with a low occupancy (non-zero but less than 'partial'). Uses sequential numbering as observed in the final protein.",
"$comment": "TODO: check if other frequency terms are possible (e.g. `common`, `complete`/`full`, `uncertain`), in which case more `Positions[Freq]` properties are needed",
"allOf": [
{
"$ref": "#/$defs/residuePositionsArray"
}
]
},
"ptmType": {
"description": "The type of post-translational modification (PTM)",
"$comment": "TODO: confirm that only single words are allowed (no whitespace)",
"examples": [
"cterclip",
"deamidation",
"formylglycine",
"glycation",
"hydroxylation",
"isomerization",
"nterdeformylate",
"nterpca",
"oxidation"
],
"type": "string",
"pattern": "^\\S{1,30}$"
},
"residuePositionsArray": {
"examples": [
[
6,
90,
300,
488,
512
],
[
154
]
],
"items": {
"type": "integer",
"minimum": 1
},
"allOf": [
{
"$ref": "#/$defs/uniqueNonemptyArray"
}
]
},
"singleInstance": {
"description": "The instance (chain or single-origin portion of a fused chain) to which an item's other properties apply. The integer value 0 is used as the default for standard antibodies without any other instances.",
"$comment": "Used as an item subproperty in all properties where each item (set of annotations) only ever applies to a single instance. Note that the term `instance` is specific to the antibody data and is not used here in the JSON Schema sense (a json document that is to be validated against this schema).",
"examples": [
2,
0
],
"type": "integer",
"minimum": 0
},
"speciesBinomial": {
"description": "The binomial name of a species",
"$comment": "The common name (e.g. human) is used for the `Species` subproperty of `#/patternProperties/^Antigen|Binding$` instead of this def",
"examples": [
"Homo sapiens",
"Canis lupus",
"Mus musculus",
"Rattus norvegicus",
"Lama glama",
"Oryctolagus cuniculus"
],
"type": "string",
"pattern": "^[A-Z][a-z]+\\s[a-z]+$"
},
"startOfRange": {
"description": "The position of the first residue in a range, using sequential numbering as observed in the final protein",
"type": "integer",
"minimum": 1
},
"uniqueNonemptyArray": {
"$comment": "Used as a base schema in all properties that can (but don't necessarily always) have multiple (instance-specific) annotations per antibody. The exceptions are `Request`, `Format`, `ID`, `AbML`, `HeavySource`, `LightSource`, and the patternProperties `^(Request|ID|AbML|(Heavy|Light)Source)-Note$`, which are string types as there is only one annotation per antibody. This def is also referenced in several other defs (for item properties).",
"type": "array",
"minItems": 1,
"uniqueItems": true
}
}
}