-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathEGA.analysis.json
293 lines (283 loc) · 15.3 KB
/
EGA.analysis.json
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
{
"$schema": "https://json-schema.org/draft/2019-09/schema",
"$id": "https://raw.githubusercontent.com/EbiEga/ega-metadata-schema/main/schemas/EGA.analysis.json",
"type": "object",
"title": "EGA analysis metadata schema",
"meta:version": "0.0.0",
"$async": true,
"description": "Metadata schema used by the European Genome-phenome Archive (EGA) to validate its analysis metadata object. This object is intended to contain metadata about a detailed examination of data (mainly data processing protocols) in order to come to some conclusion. It can be of different types (e.g. sequence variation, sequence alignment, phenotype characterization, gene expression, etc.) that will mainly differ in the protocols used to achieve the processed data of the analysis. Further details can be found in the EGA-metadata-schema GitHub repository (https://github.com/EbiEga/ega-metadata-schema/tree/main/schemas) and EGA-archive website (https://ega-archive.org/)",
"required": ["objectId", "analysisTypeSpecifications" ],
"additionalProperties": false,
"properties": {
"objectId": {
"type": "object",
"title": "Object's IDs block",
"description": "Node containing the main identifiers of the object (e.g. alias, centerName...), inherited from the common definitions.",
"allOf": [
{
"title": "Inherited objectCoreId object",
"$ref": "./EGA.common-definitions.json#/$defs/objectCoreId"
},
{
"title": "Check that analysis EGA ID (EGAZ) is correct",
"properties": {
"egaAccession": {
"$ref": "./EGA.common-definitions.json#/$defs/EGAAnalysisIdPattern"
}
}
}
]
},
"schemaDescriptor": {
"title": "Schema descriptor node",
"description": "Inherited schema descriptor node containing metadata about the schemas and standards used to create the JSON document itself.",
"$ref": "./EGA.common-definitions.json#/$defs/schemaDescriptor"
},
"objectTitle": {
"type": "string",
"title": "Title of the analysis",
"description": "An informative analysis title that should serve as an overview of the analysis, including: performed analysis, samples, purpose... (e.g. 'Variant calling analysis of tumor repressed cells'). This short text can be used to call out analyses records in searches or in displays.",
"minLength": 1,
"examples": [ "Variant calling analysis of tumor repressed cells" ]
},
"objectDescription": {
"type": "string",
"title": "Description of the analysis",
"description": "An in-depth description of the biological relevance and intent of the analysis, including its workflow.",
"minLength": 1,
"examples": [ "The analysis was conducted with the objective of... ...and for that purpose we compared untreated controls against..." ]
},
"targetedLoci": {
"type": "array",
"title": "Loci of the targeted genomic feature",
"description": "Array of items that unambiguously define the loci of targeted genomic features in the analysis. For example, if the aim of the analysis was to detect variants in the human gene TAF1 and TP53, their identifiers will be expected in two items of this array.",
"minItems": 1,
"uniqueItems": true,
"additionalProperties": false,
"items": {
"$ref": "./EGA.common-definitions.json#/$defs/locusIdentifier"
}
},
"typesOfInputData": {
"type": "array",
"title": "Types of input data",
"description": "Types of input data the analysis uses to obtain the processed files.",
"additionalProperties": false,
"uniqueItems": true,
"minItems": 1,
"items": {
"$ref": "./EGA.common-definitions.json#/$defs/typeOfData"
}
},
"typesOfOutputData": {
"type": "array",
"title": "Types of output data",
"description": "Types of output data the analysis uses to obtain the processed files.",
"additionalProperties": false,
"uniqueItems": true,
"minItems": 1,
"items": {
"$ref": "./EGA.common-definitions.json#/$defs/typeOfData"
}
},
"analysisTypeSpecifications": {
"type": "object",
"title": "Analysis type specifications",
"description": "Node containing different sets of fields that depend on the specific analysis type. Depending on the analysis types different metadata will be required.",
"additionalProperties": false,
"required": ["analysisTypes"],
"properties": {
"analysisTypes": {
"type": "array",
"title": "List of analysis types",
"description": "Array of all analysis types applicable to this analysis. Details on how the analysis was performed (instruments, software, procedure...) shall be included in the 'analysis_protocols' field, not here. For example, if the analysis includes sequence variation files (e.g. VCF) that were obtained by a sequencing assay (i.e. from the sequenced reads), at least the analysis type 'sequence variation' would be expected. Furthermore, depending on the types of analysis, different details may be required (e.g. reference sequence details in a 'sequence alignment' type).",
"additionalProperties": false,
"uniqueItems": true,
"minItems": 1,
"items": {
"type": "string",
"title": "Type of analysis",
"description": "Overall type of the analysis. Term chosen from a controlled vocabulary (CV) list. If you cannot find your term in the CV list, please create an issue at our [metadata GitHub repository](https://github.com/EbiEga/ega-metadata-schema/issues/new/choose) proposing its addition.",
"enum": [ "sequence variation", "sequence alignment", "phenotype characterization", "sequence annotation", "sequence assembly", "gene expression" ],
"meta:enum": {
"sequence variation": "sequence variation__ERO:0100211: Analysis of variations at specific loci in the genomes of organisms (mutation and polymorphism) across or within a species, population, or individual (e.g healthy vs diseased tissue).",
"sequence alignment": "sequence alignment__ERO:0100032: objective to display graphically how the sequences of two or more macromolecules align along a linear axis.",
"phenotype characterization": "phenotype characterization__ERO:0000923: The result of an organismal assay that involves characterization of a phenotype; any observable characteristic or trait of an organism: such as its morphology, development, biochemical or physiological properties, behavior, and products of behavior (such as a bird's nest). Phenotypes result from the expression of an organism's genes as well as the influence of environmental factors and the interactions between the two.",
"sequence annotation": "sequence annotation__operation:0361: Analysis where molecular sequence records are annotated with terms from a controlled vocabulary. For submitting sequence annotation files, which are usually 'tab' files. Examples include gene count and OTU tables from metagenomic studies.",
"sequence assembly": "sequence assembly__topic:0196: The assembly of fragments of a DNA sequence to reconstruct the original sequence.",
"gene expression": "gene expression__topic:0203: The analysis of levels and patterns of synthesis of gene products (proteins and functional RNA) including interpretation in functional terms of gene expression data."
}
}
},
"referenceAlignmentDetails": {
"title": "Reference assembly and sequence details",
"description": "Node containing details of the reference sequence used in the alignment of raw sequences.",
"$ref": "./EGA.common-definitions.json#/$defs/referenceAlignmentDetails"
}
}
},
"analysisFiles": {
"type": "array",
"title": "Files of the analysis",
"description": "This property contains the files derived from performing any processing or analysis over raw data (e.g. VCF, aligned BAM...) and those that add context to it (e.g. CSV, TXT...).",
"minItems": 1,
"additionalProperties": false,
"uniqueItems": true,
"items": {
"$ref": "./EGA.common-definitions.json#/$defs/fileObject"
}
},
"analysisRelationships": {
"type": "array",
"title": "Analysis relationships",
"description": "Comprises metadata (e.g. Source or Target) of a directional association between two entities. This relationships node contains all the possible relationships between metadata objects, both outside of (e.g. an Array Design Format that was submitted to ArrayExpress being linked to their microarray data within EGA) and within (e.g. an analysis being linked to a Sample) the EGA.",
"minItems": 1,
"uniqueItems": true,
"additionalProperties": false,
"items": {
"allOf": [
{
"title": "Inherited relationship node",
"$ref": "./EGA.common-definitions.json#/$defs/relationshipObject"
},
{
"title": "Relationship constraints for an analysis",
"description": "Not all possible relationships between objects are allowed (e.g. an individual should not be linked to a policy). This node contains the restricted relationships that can be given for a analysis.",
"anyOf": [
{
"title": "Allowed relationships of type referencedBy (main ones)",
"allOf": [
{
"$ref": "./EGA.common-definitions.json#/$defs/rTypeReferencedBy"
},
{
"anyOf": [
{
"$ref": "./EGA.common-definitions.json#/$defs/rSourceStudy"
},
{
"$ref": "./EGA.common-definitions.json#/$defs/rSourceSample"
},
{
"$ref": "./EGA.common-definitions.json#/$defs/rSourceExperiment"
},
{
"$ref": "./EGA.common-definitions.json#/$defs/rSourceAssay"
},
{
"$ref": "./EGA.common-definitions.json#/$defs/rTargetDataset"
},
{
"$ref": "./EGA.common-definitions.json#/$defs/rSourceSubmission"
},
{
"$ref": "./EGA.common-definitions.json#/$defs/rSourceProtocol"
},
{
"title": "Optional one, added here to simplify",
"$ref": "./EGA.common-definitions.json#/$defs/rTargetAnalysis"
}
]
}
]
},
{
"title": "Allowed relationships of type groupedWith, isAfter, sameAs (optional ones)",
"allOf": [
{
"anyOf": [
{
"$ref": "./EGA.common-definitions.json#/$defs/rTypeGroupedWith"
},
{
"$ref": "./EGA.common-definitions.json#/$defs/rTypeIsAfter"
},
{
"$ref": "./EGA.common-definitions.json#/$defs/rTypeSameAs"
}
]
},
{
"anyOf": [
{
"$ref": "./EGA.common-definitions.json#/$defs/rSourceAnalysis"
},
{
"$ref": "./EGA.common-definitions.json#/$defs/rTargetAnalysis"
}
]
}
]
},
{
"title": "Relationships of external accessions and URLs (optional ones)",
"description": "Almost any relationship is imaginable with external accessions and URLs.",
"allOf": [
{
"anyOf": [
{
"$ref": "./EGA.common-definitions.json#/$defs/rTypeChildOf"
},
{
"$ref": "./EGA.common-definitions.json#/$defs/rTypeFamilyRelationshipWith"
},
{
"$ref": "./EGA.common-definitions.json#/$defs/rTypeGroupedWith"
},
{
"$ref": "./EGA.common-definitions.json#/$defs/rTypeSameAs"
},
{
"$ref": "./EGA.common-definitions.json#/$defs/rTypeReferencedBy"
},
{
"$ref": "./EGA.common-definitions.json#/$defs/rTypeDevelopsFrom"
},
{
"$ref": "./EGA.common-definitions.json#/$defs/rTypeMemberOf"
},
{
"$ref": "./EGA.common-definitions.json#/$defs/rTypeIsAfter"
}
]
},
{
"anyOf": [
{
"$ref": "./EGA.common-definitions.json#/$defs/rSourceExternalAccession"
},
{
"$ref": "./EGA.common-definitions.json#/$defs/rSourceExternalURL"
},
{
"$ref": "./EGA.common-definitions.json#/$defs/rTargetExternalAccession"
},
{
"$ref": "./EGA.common-definitions.json#/$defs/rTargetExternalURL"
}
]
}
]
}
]
}
]
},
"contains": {
"title": "Constraint to have at least one 'submission' relationship",
"$ref": "./EGA.common-definitions.json#/$defs/rConstraintOneSourcedSubmission"
}
},
"analysisAttributes": {
"type": "array",
"title": "Analysis custom attributes",
"description": "Custom attributes of an analysis: reusable attributes to encode tag-value pairs (e.g. Tag being 'internal tag' and its Value 'this analysis is corresponds to internal tag XYZ') with optional units. Its properties are inherited from the common-definitions.json schema.",
"additionalProperties": false,
"uniqueItems": true,
"minItems": 1,
"items": {
"$ref": "./EGA.common-definitions.json#/$defs/customAttribute"
}
}
}
}