diff --git a/.env.example b/.env.example index 70b034eb..2d96dd44 100644 --- a/.env.example +++ b/.env.example @@ -23,6 +23,6 @@ API_SITE_ID=nmdc-runtime API_SITE_CLIENT_ID=generateme API_SITE_CLIENT_SECRET=generateme -MINTING_SERVICE_ID=feedme +MINTING_SERVICE_ID=nmdc:minter_service_11 DAGIT_HOST=http://dagster-dagit:3000 \ No newline at end of file diff --git a/.env.test b/.env.test index a07fa3ca..d5bf2fcd 100644 --- a/.env.test +++ b/.env.test @@ -5,7 +5,7 @@ MONGO_HOST=mongodb://mongo:27017 MONGO_USERNAME=admin MONGO_PASSWORD=root MONGO_DBNAME=nmdc -MONGO_TEST_DBNAME="nmdc-test" +MONGO_TEST_DBNAME=nmdc-test JWT_SECRET_KEY=2ff256fa4f332ef218192438bdf605f32292961afa86ce1b87a58e2cdc740fb3 @@ -18,4 +18,4 @@ API_SITE_ID=nmdc-runtime API_SITE_CLIENT_ID=chws-tk74-51 API_SITE_CLIENT_SECRET=070d56033559e3096d4f82d30aabdd51c3b8a57ac20d24f743a27444d5edc3db -MINTING_SERVICE_ID="nmdc:nt-11-zfj0tv58" \ No newline at end of file +MINTING_SERVICE_ID=nmdc:minter_service_11 \ No newline at end of file diff --git a/Makefile b/Makefile index 786057f5..ceaa97b4 100644 --- a/Makefile +++ b/Makefile @@ -32,7 +32,7 @@ up-test: test-build: docker compose --file docker-compose.test.yml \ - up test --build --force-recreate --detach + up test --build --force-recreate --detach --remove-orphans test-dbinit: docker compose --file docker-compose.test.yml \ diff --git a/nmdc_runtime/api/db/mongo.py b/nmdc_runtime/api/db/mongo.py index ba0f4a7b..dfd3b7b3 100644 --- a/nmdc_runtime/api/db/mongo.py +++ b/nmdc_runtime/api/db/mongo.py @@ -3,7 +3,7 @@ from typing import Set from motor.motor_asyncio import AsyncIOMotorClient, AsyncIOMotorDatabase -from nmdc_schema.nmdc_data import get_nmdc_jsonschema_dict +from nmdc_runtime.util import get_nmdc_jsonschema_dict from pymongo import MongoClient from pymongo.database import Database as MongoDatabase diff --git a/nmdc_runtime/api/endpoints/find.py b/nmdc_runtime/api/endpoints/find.py index 10afdf32..004633c7 100644 --- a/nmdc_runtime/api/endpoints/find.py +++ b/nmdc_runtime/api/endpoints/find.py @@ -2,7 +2,7 @@ from fastapi import APIRouter, Depends, Form from jinja2 import Environment, PackageLoader, select_autoescape -from nmdc_schema.nmdc_data import get_nmdc_jsonschema_dict +from nmdc_runtime.util import get_nmdc_jsonschema_dict from pymongo.database import Database as MongoDatabase from starlette.responses import HTMLResponse from toolz import merge, assoc_in diff --git a/nmdc_runtime/api/endpoints/metadata.py b/nmdc_runtime/api/endpoints/metadata.py index fcc27e58..d573ac3f 100644 --- a/nmdc_runtime/api/endpoints/metadata.py +++ b/nmdc_runtime/api/endpoints/metadata.py @@ -29,7 +29,7 @@ from nmdc_runtime.site.drsobjects.registration import specialize_activity_set_docs from nmdc_runtime.site.repository import repo, run_config_frozen__normal_env from nmdc_runtime.util import unfreeze -from nmdc_schema.nmdc_data import get_nmdc_jsonschema_dict +from nmdc_runtime.util import get_nmdc_jsonschema_dict from pymongo import ReturnDocument from pymongo.database import Database as MongoDatabase from starlette import status diff --git a/nmdc_runtime/minter/adapters/repository.py b/nmdc_runtime/minter/adapters/repository.py index f4356bd7..62211d80 100644 --- a/nmdc_runtime/minter/adapters/repository.py +++ b/nmdc_runtime/minter/adapters/repository.py @@ -19,6 +19,10 @@ from nmdc_runtime.util import find_one +class MinterError(Exception): + pass + + class IDStore(abc.ABC): @abc.abstractmethod def mint(self, req_mint: MintingRequest) -> list[Identifier]: @@ -55,15 +59,15 @@ def __init__( def mint(self, req_mint: MintingRequest) -> list[Identifier]: if not find_one({"id": req_mint.service.id}, self.services): - raise Exception(f"Unknown service {req_mint.service.id}") + raise MinterError(f"Unknown service {req_mint.service.id}") if not find_one({"id": req_mint.requester.id}, self.requesters): - raise Exception(f"Unknown requester {req_mint.requester.id}") + raise MinterError(f"Unknown requester {req_mint.requester.id}") if not find_one({"id": req_mint.schema_class.id}, self.schema_classes): - raise Exception(f"Unknown schema class {req_mint.schema_class.id}") + raise MinterError(f"Unknown schema class {req_mint.schema_class.id}") # ensure supplied schema class has typecode typecode = find_one({"schema_class": req_mint.schema_class.id}, self.typecodes) if not typecode: - raise Exception( + raise MinterError( f"Cannot map schema class {req_mint.schema_class.id} to a typecode" ) @@ -90,7 +94,7 @@ def mint(self, req_mint: MintingRequest) -> list[Identifier]: def bind(self, req_bind: BindingRequest) -> Identifier: id_stored = self.resolve(req_bind) if id_stored is None: - raise Exception(f"ID {req_bind.id_name} is unknown") + raise MinterError(f"ID {req_bind.id_name} is unknown") match id_stored.status: case Status.draft: @@ -99,7 +103,7 @@ def bind(self, req_bind: BindingRequest) -> Identifier: ) return Identifier(**self.db[id_stored.id]) case _: - raise Exception("Status not 'draft'. Can't change bound metadata") + raise MinterError("Status not 'draft'. Can't change bound metadata") def resolve(self, req_res: ResolutionRequest) -> Union[Identifier, None]: doc = self.db.get(req_res.id_name) @@ -108,13 +112,13 @@ def resolve(self, req_res: ResolutionRequest) -> Union[Identifier, None]: def delete(self, req_del: DeleteRequest): id_stored = self.resolve(req_del) if id_stored is None: - raise Exception(f"ID {req_del.id_name} is unknown") + raise MinterError(f"ID {req_del.id_name} is unknown") match id_stored.status: case Status.draft: self.db.pop(id_stored.id) case _: - raise Exception("Status not 'draft'. Can't delete.") + raise MinterError("Status not 'draft'. Can't delete.") class MongoIDStore(abc.ABC): @@ -122,20 +126,24 @@ def __init__(self, mdb: MongoDatabase): self.db = mdb def mint(self, req_mint: MintingRequest) -> list[Identifier]: - if not self.db.services.find_one({"id": req_mint.service.id}): - raise Exception(f"Unknown service {req_mint.service.id}") - if not self.db.requesters.find_one({"id": req_mint.requester.id}): - raise Exception(f"Unknown requester {req_mint.requester.id}") - if not self.db.schema_classes.find_one({"id": req_mint.schema_class.id}): - raise Exception(f"Unknown schema class {req_mint.schema_class.id}") - typecode = self.db.typecodes.find_one( + if not self.db["minter.services"].find_one({"id": req_mint.service.id}): + raise MinterError(f"Unknown service {req_mint.service.id}") + if not self.db["minter.requesters"].find_one({"id": req_mint.requester.id}): + raise MinterError(f"Unknown requester {req_mint.requester.id}") + if not self.db["minter.schema_classes"].find_one( + {"id": req_mint.schema_class.id} + ): + raise MinterError(f"Unknown schema class {req_mint.schema_class.id}") + typecode = self.db["minter.typecodes"].find_one( {"schema_class": req_mint.schema_class.id} ) if not typecode: - raise Exception( + raise MinterError( f"Cannot map schema class {req_mint.schema_class.id} to a typecode" ) - shoulder = self.db.shoulders.find_one({"assigned_to": req_mint.service.id}) + shoulder = self.db["minter.shoulders"].find_one( + {"assigned_to": req_mint.service.id} + ) collected = [] while True: id_names = set() @@ -147,7 +155,9 @@ def mint(self, req_mint: MintingRequest) -> list[Identifier]: id_names = list(id_names) taken = { d["id"] - for d in self.db.id_records.find({"id": {"$in": id_names}}, {"id": 1}) + for d in self.db["minter.id_records"].find( + {"id": {"$in": id_names}}, {"id": 1} + ) } not_taken = [n for n in id_names if n not in taken] if not_taken: @@ -163,7 +173,7 @@ def mint(self, req_mint: MintingRequest) -> list[Identifier]: ) for id_name in not_taken ] - self.db.id_records.insert_many([i.dict() for i in ids]) + self.db["minter.id_records"].insert_many([i.dict() for i in ids]) collected.extend(ids) if len(collected) == req_mint.how_many: break @@ -172,34 +182,34 @@ def mint(self, req_mint: MintingRequest) -> list[Identifier]: def bind(self, req_bind: BindingRequest) -> Identifier: id_stored = self.resolve(req_bind) if id_stored is None: - raise Exception(f"ID {req_bind.id_name} is unknown") + raise MinterError(f"ID {req_bind.id_name} is unknown") match id_stored.status: case Status.draft: - return self.db.id_records.find_one_and_update( + return self.db["minter.id_records"].find_one_and_update( {"id": id_stored.id}, {"$set": {"bindings": req_bind.metadata_record}}, return_document=ReturnDocument.AFTER, ) case _: - raise Exception("Status not 'draft'. Can't change bound metadata") + raise MinterError("Status not 'draft'. Can't change bound metadata") def resolve(self, req_res: ResolutionRequest) -> Union[Identifier, None]: match re.match(r"nmdc:([^-]+)-([^-]+)-.*", req_res.id_name).groups(): case (_, _): - doc = self.db.id_records.find_one({"id": req_res.id_name}) + doc = self.db["minter.id_records"].find_one({"id": req_res.id_name}) # TODO if draft ID, check requester return Identifier(**doc) if doc else None case _: - raise Exception("Invalid ID name") + raise MinterError("Invalid ID name") def delete(self, req_del: DeleteRequest): id_stored = self.resolve(req_del) if id_stored is None: - raise Exception(f"ID {req_del.id_name} is unknown") + raise MinterError(f"ID {req_del.id_name} is unknown") match id_stored.status: case Status.draft: - self.db.id_records.delete_one({"id": id_stored.id}) + self.db["minter.id_records"].delete_one({"id": id_stored.id}) case _: - raise Exception("Status not 'draft'. Can't delete.") + raise MinterError("Status not 'draft'. Can't delete.") diff --git a/nmdc_runtime/minter/bootstrap.py b/nmdc_runtime/minter/bootstrap.py index 8f2d9f6c..0e33d8ce 100644 --- a/nmdc_runtime/minter/bootstrap.py +++ b/nmdc_runtime/minter/bootstrap.py @@ -13,7 +13,9 @@ def bootstrap(): "schema_classes", ]: for d in getattr(config, collection_name)(): - s.db[collection_name].replace_one({"id": d["id"]}, d, upsert=True) + s.db["minter." + collection_name].replace_one( + {"id": d["id"]}, d, upsert=True + ) site_ids = [d["id"] for d in mdb.sites.find({}, {"id": 1})] for sid in site_ids: - s.db.requesters.replace_one({"id": sid}, {"id": sid}, upsert=True) + s.db["minter.requesters"].replace_one({"id": sid}, {"id": sid}, upsert=True) diff --git a/nmdc_runtime/minter/config.py b/nmdc_runtime/minter/config.py index 902d11ac..9a6d1f74 100644 --- a/nmdc_runtime/minter/config.py +++ b/nmdc_runtime/minter/config.py @@ -1,11 +1,22 @@ +import json import os from functools import lru_cache +from pathlib import Path -from nmdc_schema.nmdc_data import get_nmdc_jsonschema_dict from pymongo import MongoClient from pymongo.database import Database as MongoDatabase +def get_nmdc_jsonschema_dict(): + """Get NMDC JSON Schema. + + May replace this with `from nmdc_runtime.util import get_nmdc_jsonschema_dict` + once the whole codebase uses nmdc-schema~=v7.2.0 + """ + with (Path(__file__).parent / "nmdc.schema.json").open() as f: + return json.load(f) + + @lru_cache def get_mongo_db() -> MongoDatabase: _client = MongoClient( @@ -23,18 +34,29 @@ def minting_service_id(): @lru_cache() def typecodes(): - return [ - {"id": "nmdc:nt-11-gha2fh68", "name": "bsm", "schema_class": "nmdc:Biosample"}, - {"id": "nmdc:nt-11-rb11ex57", "name": "nt", "schema_class": "nmdc:NamedThing"}, - ] + rv = [] + schema_dict = get_nmdc_jsonschema_dict() + for cls_name, defn in schema_dict["$defs"].items(): + match defn.get("properties"): + case {"id": {"pattern": p}} if p.startswith("^(nmdc):"): + rv.append( + { + "id": "nmdc:" + cls_name + "_" + "typecode", + "schema_class": "nmdc:" + cls_name, + "name": p.split(":", maxsplit=1)[-1].split("-", maxsplit=1)[0], + } + ) + case _: + pass + return rv @lru_cache() def shoulders(): return [ { - "id": "nmdc:nt-11-6weqb260", - "assigned_to": "nmdc:nt-11-zfj0tv58", + "id": "nmdc:minter_shoulder_11", + "assigned_to": "nmdc:minter_service_11", "name": "11", }, ] @@ -42,20 +64,14 @@ def shoulders(): @lru_cache def services(): - return [{"id": "nmdc:nt-11-zfj0tv58", "name": "central minting service"}] + return [{"id": "nmdc:minter_service_11", "name": "central minting service"}] -@lru_cache def requesters(): - return [ - {"id": "nmdc-runtime"}, - ] + """not cached because sites are created dynamically""" + return [{"id": s["id"]} for s in get_mongo_db().sites.find()] @lru_cache() def schema_classes(): - return [ - {"id": f"nmdc:{k}"} - for k, v in get_nmdc_jsonschema_dict()["$defs"].items() - if "required" in v and "id" in v["required"] - ] + [{"id": "nmdc:NamedThing"}] + return [{"id": d["schema_class"]} for d in typecodes()] diff --git a/nmdc_runtime/minter/domain/model.py b/nmdc_runtime/minter/domain/model.py index 77745ae7..57bc213a 100644 --- a/nmdc_runtime/minter/domain/model.py +++ b/nmdc_runtime/minter/domain/model.py @@ -3,6 +3,8 @@ from pydantic import BaseModel, PositiveInt +from nmdc_runtime.minter.config import schema_classes + class Entity(BaseModel): """A domain object whose attributes may change but has a recognizable identity over time.""" @@ -31,7 +33,7 @@ class MintingRequest(ValueObject): class AuthenticatedMintingRequest(ValueObject): - schema_class: Entity = Entity(id="nmdc:NamedThing") + schema_class: Entity = Entity(id=schema_classes()[0]["id"]) how_many: PositiveInt = 1 @@ -64,3 +66,8 @@ class Identifier(Entity): shoulder: Entity status: Status bindings: Optional[dict] + + +class Typecode(Entity): + schema_class: str + name: str diff --git a/nmdc_runtime/minter/entrypoints/fastapi_app.py b/nmdc_runtime/minter/entrypoints/fastapi_app.py index eaa1197a..47f17d6e 100644 --- a/nmdc_runtime/minter/entrypoints/fastapi_app.py +++ b/nmdc_runtime/minter/entrypoints/fastapi_app.py @@ -7,7 +7,7 @@ from nmdc_runtime.api.core.util import raise404_if_none from nmdc_runtime.api.db.mongo import get_mongo_db from nmdc_runtime.api.models.site import get_current_client_site, Site -from nmdc_runtime.minter.adapters.repository import MongoIDStore +from nmdc_runtime.minter.adapters.repository import MongoIDStore, MinterError from nmdc_runtime.minter.config import minting_service_id, schema_classes from nmdc_runtime.minter.domain.model import ( Identifier, @@ -40,7 +40,10 @@ def mint_ids( MintingRequest(service=service, requester=requester, **req_mint.dict()) ) return [d.id for d in minted] - except Exception as e: + except MinterError as e: + raise HTTPException(status_code=status.HTTP_406_NOT_ACCEPTABLE, detail=str(e)) + + except Exception: raise HTTPException( status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail=traceback.format_exc(), diff --git a/nmdc_runtime/minter/nmdc.schema.json b/nmdc_runtime/minter/nmdc.schema.json new file mode 100644 index 00000000..71442f7a --- /dev/null +++ b/nmdc_runtime/minter/nmdc.schema.json @@ -0,0 +1,5573 @@ +{ + "$defs": { + "Activity": { + "additionalProperties": false, + "description": "a provence-generating activity", + "properties": { + "ended_at_time": { + "format": "date-time", + "pattern": "^([\\+-]?\\d{4}(?!\\d{2}\\b))((-?)((0[1-9]|1[0-2])(\\3([12]\\d|0[1-9]|3[01]))?|W([0-4]\\d|5[0-2])(-?[1-7])?|(00[1-9]|0[1-9]\\d|[12]\\d{2}|3([0-5]\\d|6[1-6])))([T\\s]((([01]\\d|2[0-3])((:?)[0-5]\\d)?|24\\:?00)([\\.,]\\d+(?!:))?)?(\\17[0-5]\\d([\\.,]\\d+)?)?([zZ]|([\\+-])([01]\\d|2[0-3]):?([0-5]\\d)?)?)?)?$", + "type": "string" + }, + "id": { + "description": "A unique identifier for a thing. Must be either a CURIE shorthand for a URI or a complete URI", + "pattern": "^(nmdc):act-([0-9][a-z]{0,6}[0-9])-([A-Za-z0-9]{1,})(\\.[A-Za-z0-9]{1,})*(_[A-Za-z0-9_\\.-]+)?$", + "type": "string" + }, + "name": { + "description": "A human readable label for an entity", + "type": "string" + }, + "started_at_time": { + "format": "date-time", + "pattern": "^([\\+-]?\\d{4}(?!\\d{2}\\b))((-?)((0[1-9]|1[0-2])(\\3([12]\\d|0[1-9]|3[01]))?|W([0-4]\\d|5[0-2])(-?[1-7])?|(00[1-9]|0[1-9]\\d|[12]\\d{2}|3([0-5]\\d|6[1-6])))([T\\s]((([01]\\d|2[0-3])((:?)[0-5]\\d)?|24\\:?00)([\\.,]\\d+(?!:))?)?(\\17[0-5]\\d([\\.,]\\d+)?)?([zZ]|([\\+-])([01]\\d|2[0-3]):?([0-5]\\d)?)?)?)?$", + "type": "string" + }, + "used": { + "type": "string" + }, + "was_associated_with": { + "$ref": "#/$defs/Agent" + }, + "was_informed_by": { + "type": "string" + } + }, + "required": [ + "id" + ], + "title": "Activity", + "type": "object" + }, + "Agent": { + "additionalProperties": false, + "description": "a provence-generating agent", + "properties": { + "acted_on_behalf_of": { + "$ref": "#/$defs/Agent" + }, + "was_informed_by": { + "type": "string" + } + }, + "title": "Agent", + "type": "object" + }, + "AnalysisTypeEnum": { + "description": "", + "enum": [ + "metabolomics", + "metagenomics", + "metaproteomics", + "metatranscriptomics", + "natural organic matter" + ], + "title": "AnalysisTypeEnum", + "type": "string" + }, + "AnalyticalSample": { + "additionalProperties": false, + "description": "", + "properties": { + "alternative_identifiers": { + "description": "A list of alternative identifiers for the entity.", + "items": { + "type": "string" + }, + "type": "array" + }, + "description": { + "description": "a human-readable description of a thing", + "type": "string" + }, + "id": { + "description": "A unique identifier for a thing. Must be either a CURIE shorthand for a URI or a complete URI", + "pattern": "^(nmdc):ansm-([0-9][a-z]{0,6}[0-9])-([A-Za-z0-9]{1,})(\\.[A-Za-z0-9]{1,})*(_[A-Za-z0-9_\\.-]+)?$", + "type": "string" + }, + "name": { + "description": "A human readable label for an entity", + "type": "string" + } + }, + "required": [ + "id" + ], + "title": "AnalyticalSample", + "type": "object" + }, + "ArchStrucEnum": { + "description": "", + "enum": [ + "building", + "shed", + "home" + ], + "title": "ArchStrucEnum", + "type": "string" + }, + "AttributeValue": { + "additionalProperties": false, + "description": "The value for any value of a attribute for a sample. This object can hold both the un-normalized atomic value and the structured value", + "properties": { + "has_raw_value": { + "description": "The value that was specified for an annotation in raw form, i.e. a string. E.g. \"2 cm\" or \"2-4 cm\"", + "type": "string" + }, + "was_generated_by": { + "type": "string" + } + }, + "title": "AttributeValue", + "type": "object" + }, + "BiolStatEnum": { + "description": "", + "enum": [ + "wild", + "natural", + "semi-natural", + "inbred line", + "breeder's line", + "hybrid", + "clonal selection", + "mutant" + ], + "title": "BiolStatEnum", + "type": "string" + }, + "Biosample": { + "additionalProperties": false, + "description": "Biological source material which can be characterized by an experiment.", + "properties": { + "add_date": { + "description": "The date on which the information was added to the database.", + "type": "string" + }, + "agrochem_addition": { + "$ref": "#/$defs/QuantityValue", + "description": "Addition of fertilizers, pesticides, etc. - amount and time of applications" + }, + "air_temp_regm": { + "$ref": "#/$defs/QuantityValue", + "description": "Information about treatment involving an exposure to varying temperatures; should include the temperature, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include different temperature regimens" + }, + "al_sat": { + "$ref": "#/$defs/QuantityValue", + "description": "Aluminum saturation (esp. For tropical soils)" + }, + "al_sat_meth": { + "$ref": "#/$defs/TextValue", + "description": "Reference or method used in determining Al saturation" + }, + "alkalinity": { + "$ref": "#/$defs/QuantityValue", + "description": "Alkalinity, the ability of a solution to neutralize acids to the equivalence point of carbonate or bicarbonate" + }, + "alkalinity_method": { + "$ref": "#/$defs/TextValue", + "description": "Method used for alkalinity measurement" + }, + "alkyl_diethers": { + "$ref": "#/$defs/QuantityValue", + "description": "Concentration of alkyl diethers" + }, + "alt": { + "$ref": "#/$defs/QuantityValue", + "description": "Altitude is a term used to identify heights of objects such as airplanes, space shuttles, rockets, atmospheric balloons and heights of places such as atmospheric layers and clouds. It is used to measure the height of an object which is above the earth's surface. In this context, the altitude measurement is the vertical distance between the earth's surface above sea level and the sampled position in the air" + }, + "alternative_identifiers": { + "description": "Unique identifier for a biosample submitted to additional resources. Matches the entity that has been submitted to NMDC", + "items": { + "type": "string" + }, + "type": "array" + }, + "aminopept_act": { + "$ref": "#/$defs/QuantityValue", + "description": "Measurement of aminopeptidase activity" + }, + "ammonium": { + "$ref": "#/$defs/QuantityValue", + "description": "Concentration of ammonium in the sample" + }, + "ammonium_nitrogen": { + "$ref": "#/$defs/QuantityValue", + "description": "Concentration of ammonium nitrogen in the sample" + }, + "analysis_type": { + "description": "Select all the data types associated or available for this biosample", + "items": { + "$ref": "#/$defs/AnalysisTypeEnum" + }, + "type": "array" + }, + "annual_precpt": { + "$ref": "#/$defs/QuantityValue", + "description": "The average of all annual precipitation values known, or an estimated equivalent value derived by such methods as regional indexes or Isohyetal maps." + }, + "annual_temp": { + "$ref": "#/$defs/QuantityValue", + "description": "Mean annual temperature" + }, + "bacteria_carb_prod": { + "$ref": "#/$defs/QuantityValue", + "description": "Measurement of bacterial carbon production" + }, + "biosample_categories": { + "items": { + "$ref": "#/$defs/BiosampleCategoryEnum" + }, + "type": "array" + }, + "biotic_regm": { + "$ref": "#/$defs/TextValue", + "description": "Information about treatment(s) involving use of biotic factors, such as bacteria, viruses or fungi." + }, + "biotic_relationship": { + "$ref": "#/$defs/TextValue", + "description": "Description of relationship(s) between the subject organism and other organism(s) it is associated with. E.g., parasite on species X; mutualist with species Y. The target organism is the subject of the relationship, and the other organism(s) is the object" + }, + "bishomohopanol": { + "$ref": "#/$defs/QuantityValue", + "description": "Concentration of bishomohopanol" + }, + "bromide": { + "$ref": "#/$defs/QuantityValue", + "description": "Concentration of bromide" + }, + "calcium": { + "$ref": "#/$defs/QuantityValue", + "description": "Concentration of calcium in the sample" + }, + "carb_nitro_ratio": { + "$ref": "#/$defs/QuantityValue", + "description": "Ratio of amount or concentrations of carbon to nitrogen" + }, + "chem_administration": { + "$ref": "#/$defs/ControlledTermValue", + "description": "List of chemical compounds administered to the host or site where sampling occurred, and when (e.g. Antibiotics, n fertilizer, air filter); can include multiple compounds. For chemical entities of biological interest ontology (chebi) (v 163), http://purl.bioontology.org/ontology/chebi" + }, + "chloride": { + "$ref": "#/$defs/QuantityValue", + "description": "Concentration of chloride in the sample" + }, + "chlorophyll": { + "$ref": "#/$defs/QuantityValue", + "description": "Concentration of chlorophyll" + }, + "climate_environment": { + "$ref": "#/$defs/TextValue", + "description": "Treatment involving an exposure to a particular climate; treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple climates" + }, + "collected_from": { + "description": "The Site from which a Biosample was collected", + "type": "string" + }, + "collection_date": { + "$ref": "#/$defs/TimestampValue", + "description": "The time of sampling, either as an instance (single point in time) or interval. In case no exact time is available, the date/time can be right truncated i.e. all of these are valid times: 2008-01-23T19:23:10+00:00; 2008-01-23T19:23:10; 2008-01-23; 2008-01; 2008; Except: 2008-01; 2008 all are ISO8601 compliant" + }, + "collection_date_inc": { + "description": "Date the incubation was harvested/collected/ended. Only relevant for incubation samples.", + "type": "string" + }, + "collection_time": { + "description": "The time of sampling, either as an instance (single point) or interval.", + "type": "string" + }, + "collection_time_inc": { + "description": "Time the incubation was harvested/collected/ended. Only relevant for incubation samples.", + "type": "string" + }, + "community": { + "type": "string" + }, + "crop_rotation": { + "$ref": "#/$defs/TextValue", + "description": "Whether or not crop is rotated, and if yes, rotation schedule" + }, + "cur_land_use": { + "$ref": "#/$defs/TextValue", + "description": "Present state of sample site" + }, + "cur_vegetation": { + "$ref": "#/$defs/TextValue", + "description": "Vegetation classification from one or more standard classification systems, or agricultural crop" + }, + "cur_vegetation_meth": { + "$ref": "#/$defs/TextValue", + "description": "Reference or method used in vegetation classification" + }, + "density": { + "$ref": "#/$defs/QuantityValue", + "description": "Density of the sample, which is its mass per unit volume (aka volumetric mass density)" + }, + "depth": { + "$ref": "#/$defs/QuantityValue", + "description": "The vertical distance below local surface, e.g. for sediment or soil samples depth is measured from sediment or soil surface, respectively. Depth can be reported as an interval for subsurface samples." + }, + "description": { + "description": "a human-readable description of a thing", + "type": "string" + }, + "diss_carb_dioxide": { + "$ref": "#/$defs/QuantityValue", + "description": "Concentration of dissolved carbon dioxide in the sample or liquid portion of the sample" + }, + "diss_hydrogen": { + "$ref": "#/$defs/QuantityValue", + "description": "Concentration of dissolved hydrogen" + }, + "diss_inorg_carb": { + "$ref": "#/$defs/QuantityValue", + "description": "Dissolved inorganic carbon concentration in the sample, typically measured after filtering the sample using a 0.45 micrometer filter" + }, + "diss_inorg_phosp": { + "$ref": "#/$defs/QuantityValue", + "description": "Concentration of dissolved inorganic phosphorus in the sample" + }, + "diss_org_carb": { + "$ref": "#/$defs/QuantityValue", + "description": "Concentration of dissolved organic carbon in the sample, liquid portion of the sample, or aqueous phase of the fluid" + }, + "diss_org_nitro": { + "$ref": "#/$defs/QuantityValue", + "description": "Dissolved organic nitrogen concentration measured as; total dissolved nitrogen - NH4 - NO3 - NO2" + }, + "diss_oxygen": { + "$ref": "#/$defs/QuantityValue", + "description": "Concentration of dissolved oxygen" + }, + "dna_absorb1": { + "description": "260/280 measurement of DNA sample purity", + "type": "string" + }, + "dna_absorb2": { + "description": "260/230 measurement of DNA sample purity", + "type": "string" + }, + "dna_collect_site": { + "description": "Provide information on the site your DNA sample was collected from", + "type": "string" + }, + "dna_concentration": { + "maximum": 2000, + "minimum": 0, + "type": "string" + }, + "dna_cont_type": { + "$ref": "#/$defs/DnaContTypeEnum", + "description": "Tube or plate (96-well)" + }, + "dna_cont_well": { + "pattern": "^(?!A1|A12|H1|H12)(([A-H][1-9])|([A-H]1[0-2]))$", + "type": "string" + }, + "dna_container_id": { + "type": "string" + }, + "dna_dnase": { + "$ref": "#/$defs/DnaDnaseEnum" + }, + "dna_isolate_meth": { + "description": "Describe the method/protocol/kit used to extract DNA/RNA.", + "type": "string" + }, + "dna_organisms": { + "description": "List any organisms known or suspected to grow in co-culture, as well as estimated % of the organism in that culture.", + "type": "string" + }, + "dna_project_contact": { + "type": "string" + }, + "dna_samp_id": { + "type": "string" + }, + "dna_sample_format": { + "$ref": "#/$defs/DnaSampleFormatEnum", + "description": "Solution in which the DNA sample has been suspended" + }, + "dna_sample_name": { + "description": "Give the DNA sample a name that is meaningful to you. Sample names must be unique across all JGI projects and contain a-z, A-Z, 0-9, - and _ only.", + "type": "string" + }, + "dna_seq_project": { + "type": "string" + }, + "dna_seq_project_name": { + "type": "string" + }, + "dna_seq_project_pi": { + "type": "string" + }, + "dna_volume": { + "maximum": 1000, + "minimum": 0, + "type": "string" + }, + "dnase_rna": { + "$ref": "#/$defs/DnaseRnaEnum" + }, + "drainage_class": { + "$ref": "#/$defs/TextValue", + "description": "Drainage classification from a standard system such as the USDA system" + }, + "ecosystem": { + "description": "An ecosystem is a combination of a physical environment (abiotic factors) and all the organisms (biotic factors) that interact with this environment. Ecosystem is in position 1/5 in a GOLD path.", + "type": "string" + }, + "ecosystem_category": { + "description": "Ecosystem categories represent divisions within the ecosystem based on specific characteristics of the environment from where an organism or sample is isolated. Ecosystem category is in position 2/5 in a GOLD path.", + "type": "string" + }, + "ecosystem_subtype": { + "description": "Ecosystem subtypes represent further subdivision of Ecosystem types into more distinct subtypes. Ecosystem subtype is in position 4/5 in a GOLD path.", + "type": "string" + }, + "ecosystem_type": { + "description": "Ecosystem types represent things having common characteristics within the Ecosystem Category. These common characteristics based grouping is still broad but specific to the characteristics of a given environment. Ecosystem type is in position 3/5 in a GOLD path.", + "type": "string" + }, + "elev": { + "$ref": "#/$defs/QuantityValue", + "description": "Elevation of the sampling site is its height above a fixed reference point, most commonly the mean sea level. Elevation is mainly used when referring to points on the earth's surface, while altitude is used for points above the surface, such as an aircraft in flight or a spacecraft in orbit." + }, + "emsl_biosample_identifiers": { + "description": "A list of identifiers for the biosample from the EMSL database. This is used to link the biosample, as modeled by NMDC, to the biosample in the planned EMSL NEXUS database.", + "items": { + "type": "string" + }, + "type": "array" + }, + "env_broad_scale": { + "$ref": "#/$defs/ControlledIdentifiedTermValue", + "description": "Report the major environmental system the sample or specimen came from. The system(s) identified should have a coarse spatial grain, to provide the general environmental context of where the sampling was done (e.g. in the desert or a rainforest). We recommend using subclasses of EnvO\u2019s biome class: http://purl.obolibrary.org/obo/ENVO_00000428. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS" + }, + "env_local_scale": { + "$ref": "#/$defs/ControlledIdentifiedTermValue", + "description": "Report the entity or entities which are in the sample or specimen\u2019s local vicinity and which you believe have significant causal influences on your sample or specimen. We recommend using EnvO terms which are of smaller spatial grain than your entry for env_broad_scale. Terms, such as anatomical sites, from other OBO Library ontologies which interoperate with EnvO (e.g. UBERON) are accepted in this field. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS." + }, + "env_medium": { + "$ref": "#/$defs/ControlledIdentifiedTermValue", + "description": "Report the environmental material(s) immediately surrounding the sample or specimen at the time of sampling. We recommend using subclasses of 'environmental material' (http://purl.obolibrary.org/obo/ENVO_00010483). EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS . Terms from other OBO ontologies are permissible as long as they reference mass/volume nouns (e.g. air, water, blood) and not discrete, countable entities (e.g. a tree, a leaf, a table top)." + }, + "env_package": { + "$ref": "#/$defs/TextValue", + "description": "MIxS extension for reporting of measurements and observations obtained from one or more of the environments where the sample was obtained. All environmental packages listed here are further defined in separate subtables. By giving the name of the environmental package, a selection of fields can be made from the subtables and can be reported", + "pattern": "[air|built environment|host\\-associated|human\\-associated|human\\-skin|human\\-oral|human\\-gut|human\\-vaginal|hydrocarbon resources\\-cores|hydrocarbon resources\\-fluids\\/swabs|microbial mat\\/biofilm|misc environment|plant\\-associated|sediment|soil|wastewater\\/sludge|water]" + }, + "experimental_factor": { + "$ref": "#/$defs/ControlledTermValue", + "description": "Experimental factors are essentially the variable aspects of an experiment design which can be used to describe an experiment, or set of experiments, in an increasingly detailed manner. This field accepts ontology terms from Experimental Factor Ontology (EFO) and/or Ontology for Biomedical Investigations (OBI). For a browser of EFO (v 2.95) terms, please see http://purl.bioontology.org/ontology/EFO; for a browser of OBI (v 2018-02-12) terms please see http://purl.bioontology.org/ontology/OBI" + }, + "experimental_factor_other": { + "description": "Other details about your sample that you feel can't be accurately represented in the available columns.", + "type": "string" + }, + "extreme_event": { + "description": "Unusual physical events that may have affected microbial populations", + "type": "string" + }, + "fao_class": { + "$ref": "#/$defs/TextValue", + "description": "Soil classification from the FAO World Reference Database for Soil Resources. The list can be found at http://www.fao.org/nr/land/sols/soil/wrb-soil-maps/reference-groups" + }, + "filter_method": { + "description": "Type of filter used or how the sample was filtered", + "type": "string" + }, + "fire": { + "$ref": "#/$defs/TimestampValue", + "description": "Historical and/or physical evidence of fire" + }, + "flooding": { + "$ref": "#/$defs/TimestampValue", + "description": "Historical and/or physical evidence of flooding" + }, + "gaseous_environment": { + "$ref": "#/$defs/QuantityValue", + "description": "Use of conditions with differing gaseous environments; should include the name of gaseous compound, amount administered, treatment duration, interval and total experimental duration; can include multiple gaseous environment regimens" + }, + "geo_loc_name": { + "$ref": "#/$defs/TextValue", + "description": "The geographical origin of the sample as defined by the country or sea name followed by specific region name. Country or sea names should be chosen from the INSDC country list (http://insdc.org/country.html), or the GAZ ontology (http://purl.bioontology.org/ontology/GAZ)" + }, + "glucosidase_act": { + "$ref": "#/$defs/QuantityValue", + "description": "Measurement of glucosidase activity" + }, + "gold_biosample_identifiers": { + "description": "Unique identifier for a biosample submitted to GOLD that matches the NMDC submitted biosample", + "items": { + "type": "string" + }, + "pattern": "^GOLD:Gb[0-9]+$", + "type": "array" + }, + "growth_facil": { + "$ref": "#/$defs/ControlledTermValue", + "description": "Type of facility where the sampled plant was grown; controlled vocabulary: growth chamber, open top chamber, glasshouse, experimental garden, field. Alternatively use Crop Ontology (CO) terms, see http://www.cropontology.org/ontology/CO_715/Crop%20Research" + }, + "habitat": { + "type": "string" + }, + "heavy_metals": { + "$ref": "#/$defs/QuantityValue", + "description": "Heavy metals present in the sequenced sample and their concentrations. For multiple heavy metals and concentrations, add multiple copies of this field." + }, + "heavy_metals_meth": { + "$ref": "#/$defs/TextValue", + "description": "Reference or method used in determining heavy metals" + }, + "host_name": { + "type": "string" + }, + "humidity_regm": { + "$ref": "#/$defs/QuantityValue", + "description": "Information about treatment involving an exposure to varying degree of humidity; information about treatment involving use of growth hormones; should include amount of humidity administered, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple regimens" + }, + "id": { + "description": "An NMDC assigned unique identifier for a biosample submitted to NMDC.", + "pattern": "^(nmdc):bsm-([0-9][a-z]{0,6}[0-9])-([A-Za-z0-9]{1,})(\\.[A-Za-z0-9]{1,})*(_[A-Za-z0-9_\\.-]+)?$", + "type": "string" + }, + "igsn_biosample_identifiers": { + "description": "A list of identifiers for the biosample from the IGSN database.", + "items": { + "type": "string" + }, + "type": "array" + }, + "img_identifiers": { + "description": "A list of identifiers that relate the biosample to records in the IMG database.", + "items": { + "type": "string" + }, + "type": "array" + }, + "insdc_biosample_identifiers": { + "description": "identifiers for corresponding sample in INSDC", + "items": { + "type": "string" + }, + "pattern": "^biosample:SAM[NED]([A-Z])?[0-9]+$", + "type": "array" + }, + "isotope_exposure": { + "description": "List isotope exposure or addition applied to your sample.", + "type": "string" + }, + "lat_lon": { + "$ref": "#/$defs/GeolocationValue", + "description": "This is currently a required field but it's not clear if this should be required for human hosts" + }, + "lbc_thirty": { + "$ref": "#/$defs/QuantityValue", + "description": "lime buffer capacity, determined after 30 minute incubation" + }, + "lbceq": { + "$ref": "#/$defs/QuantityValue", + "description": "lime buffer capacity, determined at equilibrium after 5 day incubation" + }, + "light_regm": { + "$ref": "#/$defs/QuantityValue", + "description": "Information about treatment(s) involving exposure to light, including both light intensity and quality." + }, + "link_addit_analys": { + "$ref": "#/$defs/TextValue", + "description": "Link to additional analysis results performed on the sample" + }, + "link_class_info": { + "$ref": "#/$defs/TextValue", + "description": "Link to digitized soil maps or other soil classification information" + }, + "link_climate_info": { + "$ref": "#/$defs/TextValue", + "description": "Link to climate resource" + }, + "local_class": { + "$ref": "#/$defs/TextValue", + "description": "Soil classification based on local soil classification system" + }, + "local_class_meth": { + "$ref": "#/$defs/TextValue", + "description": "Reference or method used in determining the local soil classification" + }, + "location": { + "type": "string" + }, + "magnesium": { + "$ref": "#/$defs/QuantityValue", + "description": "Concentration of magnesium in the sample" + }, + "manganese": { + "$ref": "#/$defs/QuantityValue", + "description": "Concentration of manganese in the sample" + }, + "mean_frict_vel": { + "$ref": "#/$defs/QuantityValue", + "description": "Measurement of mean friction velocity" + }, + "mean_peak_frict_vel": { + "$ref": "#/$defs/QuantityValue", + "description": "Measurement of mean peak friction velocity" + }, + "micro_biomass_c_meth": { + "description": "Reference or method used in determining microbial biomass", + "type": "string" + }, + "micro_biomass_n_meth": { + "description": "Reference or method used in determining microbial biomass nitrogen", + "type": "string" + }, + "microbial_biomass_c": { + "description": "The part of the organic matter in the soil that constitutes living microorganisms smaller than 5-10 micrometer. If you keep this, you would need to have correction factors used for conversion to the final units", + "type": "string" + }, + "microbial_biomass_n": { + "description": "The part of the organic matter in the soil that constitutes living microorganisms smaller than 5-10 micrometer. If you keep this, you would need to have correction factors used for conversion to the final units", + "type": "string" + }, + "misc_param": { + "$ref": "#/$defs/QuantityValue", + "description": "Any other measurement performed or parameter collected, that is not listed here" + }, + "mod_date": { + "description": "The last date on which the database information was modified.", + "type": "string" + }, + "n_alkanes": { + "$ref": "#/$defs/QuantityValue", + "description": "Concentration of n-alkanes; can include multiple n-alkanes" + }, + "name": { + "description": "A human readable label for an entity", + "type": "string" + }, + "ncbi_taxonomy_name": { + "type": "string" + }, + "nitrate": { + "$ref": "#/$defs/QuantityValue", + "description": "Concentration of nitrate in the sample" + }, + "nitrate_nitrogen": { + "$ref": "#/$defs/QuantityValue", + "description": "Concentration of nitrate nitrogen in the sample" + }, + "nitrite": { + "$ref": "#/$defs/QuantityValue", + "description": "Concentration of nitrite in the sample" + }, + "nitrite_nitrogen": { + "$ref": "#/$defs/QuantityValue", + "description": "Concentration of nitrite nitrogen in the sample" + }, + "non_microb_biomass": { + "description": "Amount of biomass; should include the name for the part of biomass measured, e.g.insect, plant, total. Can include multiple measurements separated by ;", + "type": "string" + }, + "non_microb_biomass_method": { + "description": "Reference or method used in determining biomass", + "type": "string" + }, + "org_matter": { + "$ref": "#/$defs/QuantityValue", + "description": "Concentration of organic matter" + }, + "org_nitro": { + "$ref": "#/$defs/QuantityValue", + "description": "Concentration of organic nitrogen" + }, + "org_nitro_method": { + "description": "Method used for obtaining organic nitrogen", + "type": "string" + }, + "organism_count": { + "$ref": "#/$defs/QuantityValue", + "description": "Total cell count of any organism (or group of organisms) per gram, volume or area of sample, should include name of organism followed by count. The method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) Should also be provided. (example: total prokaryotes; 3.5e7 cells per ml; qpcr)" + }, + "other_treatment": { + "description": "Other treatments applied to your samples that are not applicable to the provided fields", + "type": "string" + }, + "oxy_stat_samp": { + "$ref": "#/$defs/TextValue", + "description": "Oxygenation status of sample" + }, + "part_of": { + "description": "Links a resource to another resource that either logically or physically includes it.", + "items": { + "type": "string" + }, + "type": "array" + }, + "part_org_carb": { + "$ref": "#/$defs/QuantityValue", + "description": "Concentration of particulate organic carbon" + }, + "perturbation": { + "$ref": "#/$defs/TextValue", + "description": "Type of perturbation, e.g. chemical administration, physical disturbance, etc., coupled with perturbation regimen including how many times the perturbation was repeated, how long each perturbation lasted, and the start and end time of the entire perturbation period; can include multiple perturbation types" + }, + "petroleum_hydrocarb": { + "$ref": "#/$defs/QuantityValue", + "description": "Concentration of petroleum hydrocarbon" + }, + "ph": { + "$ref": "#/$defs/QuantityValue", + "description": "Ph measurement of the sample, or liquid portion of sample, or aqueous phase of the fluid" + }, + "ph_meth": { + "$ref": "#/$defs/TextValue", + "description": "Reference or method used in determining ph" + }, + "phaeopigments": { + "$ref": "#/$defs/QuantityValue", + "description": "Concentration of phaeopigments; can include multiple phaeopigments" + }, + "phosphate": { + "$ref": "#/$defs/QuantityValue", + "description": "Concentration of phosphate" + }, + "phosplipid_fatt_acid": { + "$ref": "#/$defs/QuantityValue", + "description": "Concentration of phospholipid fatty acids; can include multiple values" + }, + "pool_dna_extracts": { + "$ref": "#/$defs/TextValue", + "description": "Indicate whether multiple DNA extractions were mixed. If the answer yes, the number of extracts that were pooled should be given" + }, + "potassium": { + "$ref": "#/$defs/QuantityValue", + "description": "Concentration of potassium in the sample" + }, + "pressure": { + "$ref": "#/$defs/QuantityValue", + "description": "Pressure to which the sample is subject to, in atmospheres" + }, + "profile_position": { + "$ref": "#/$defs/TextValue", + "description": "Cross-sectional position in the hillslope where sample was collected.sample area position in relation to surrounding areas" + }, + "project_id": { + "description": "Proposal IDs or names associated with dataset", + "type": "string" + }, + "proport_woa_temperature": { + "type": "string" + }, + "proposal_dna": { + "type": "string" + }, + "proposal_rna": { + "type": "string" + }, + "redox_potential": { + "$ref": "#/$defs/QuantityValue", + "description": "Redox potential, measured relative to a hydrogen cell, indicating oxidation or reduction potential" + }, + "rel_to_oxygen": { + "$ref": "#/$defs/TextValue", + "description": "Is this organism an aerobe, anaerobe? Please note that aerobic and anaerobic are valid descriptors for microbial environments" + }, + "replicate_number": { + "description": "If sending biological replicates, indicate the rep number here.", + "type": "string" + }, + "rna_absorb1": { + "description": "260/280 measurement of RNA sample purity", + "type": "string" + }, + "rna_absorb2": { + "description": "260/230 measurement of RNA sample purity", + "type": "string" + }, + "rna_collect_site": { + "description": "Provide information on the site your RNA sample was collected from", + "type": "string" + }, + "rna_concentration": { + "maximum": 1000, + "minimum": 0, + "type": "string" + }, + "rna_cont_type": { + "$ref": "#/$defs/RnaContTypeEnum", + "description": "Tube or plate (96-well)" + }, + "rna_cont_well": { + "pattern": "^(?!A1|A12|H1|H12)(([A-H][1-9])|([A-H]1[0-2]))$", + "type": "string" + }, + "rna_container_id": { + "type": "string" + }, + "rna_isolate_meth": { + "description": "Describe the method/protocol/kit used to extract DNA/RNA.", + "type": "string" + }, + "rna_organisms": { + "description": "List any organisms known or suspected to grow in co-culture, as well as estimated % of the organism in that culture.", + "type": "string" + }, + "rna_project_contact": { + "type": "string" + }, + "rna_samp_id": { + "type": "string" + }, + "rna_sample_format": { + "$ref": "#/$defs/RnaSampleFormatEnum", + "description": "Solution in which the RNA sample has been suspended" + }, + "rna_sample_name": { + "description": "Give the RNA sample a name that is meaningful to you. Sample names must be unique across all JGI projects and contain a-z, A-Z, 0-9, - and _ only.", + "maximum": 2000, + "minimum": 0, + "type": "string" + }, + "rna_seq_project": { + "type": "string" + }, + "rna_seq_project_name": { + "type": "string" + }, + "rna_seq_project_pi": { + "type": "string" + }, + "rna_volume": { + "type": "string" + }, + "salinity": { + "$ref": "#/$defs/QuantityValue", + "description": "The total concentration of all dissolved salts in a liquid or solid sample. While salinity can be measured by a complete chemical analysis, this method is difficult and time consuming. More often, it is instead derived from the conductivity measurement. This is known as practical salinity. These derivations compare the specific conductance of the sample to a salinity standard such as seawater." + }, + "salinity_category": { + "description": "Categorcial description of the sample's salinity. Examples: halophile, halotolerant, hypersaline, huryhaline", + "type": "string" + }, + "salinity_meth": { + "$ref": "#/$defs/TextValue", + "description": "Reference or method used in determining salinity" + }, + "samp_collec_method": { + "description": "The method employed for collecting the sample.", + "type": "string" + }, + "samp_mat_process": { + "$ref": "#/$defs/ControlledTermValue", + "description": "A brief description of any processing applied to the sample during or after retrieving the sample from environment, or a link to the relevant protocol(s) performed." + }, + "samp_name": { + "description": "A local identifier or name that for the material sample used for extracting nucleic acids, and subsequent sequencing. It can refer either to the original material collected or to any derived sub-samples. It can have any format, but we suggest that you make it concise, unique and consistent within your lab, and as informative as possible. INSDC requires every sample name from a single Submitter to be unique. Use of a globally unique identifier for the field source_mat_id is recommended in addition to sample_name.", + "type": "string" + }, + "samp_size": { + "$ref": "#/$defs/QuantityValue", + "description": "The total amount or size (volume (ml), mass (g) or area (m2) ) of sample collected." + }, + "samp_store_dur": { + "$ref": "#/$defs/TextValue", + "description": "Duration for which the sample was stored" + }, + "samp_store_loc": { + "$ref": "#/$defs/TextValue", + "description": "Location at which sample was stored, usually name of a specific freezer/room" + }, + "samp_store_temp": { + "$ref": "#/$defs/QuantityValue", + "description": "Temperature at which sample was stored, e.g. -80 degree Celsius" + }, + "samp_vol_we_dna_ext": { + "$ref": "#/$defs/QuantityValue", + "description": "Volume (ml) or mass (g) of total collected sample processed for DNA extraction. Note: total sample collected should be entered under the term Sample Size (mixs:0000001)." + }, + "sample_collection_site": { + "type": "string" + }, + "sample_link": { + "description": "JsonObj()", + "items": { + "type": "string" + }, + "type": "array" + }, + "sample_shipped": { + "description": "The total amount or size (volume (ml), mass (g) or area (m2) ) of sample sent to EMSL", + "type": "string" + }, + "sample_type": { + "$ref": "#/$defs/SampleTypeEnum", + "description": "Type of sample being submitted" + }, + "season_precpt": { + "$ref": "#/$defs/QuantityValue", + "description": "The average of all seasonal precipitation values known, or an estimated equivalent value derived by such methods as regional indexes or Isohyetal maps." + }, + "season_temp": { + "$ref": "#/$defs/QuantityValue", + "description": "Mean seasonal temperature" + }, + "sieving": { + "$ref": "#/$defs/QuantityValue", + "description": "Collection design of pooled samples and/or sieve size and amount of sample sieved" + }, + "size_frac_low": { + "$ref": "#/$defs/QuantityValue", + "description": "Refers to the mesh/pore size used to pre-filter/pre-sort the sample. Materials larger than the size threshold are excluded from the sample" + }, + "size_frac_up": { + "$ref": "#/$defs/QuantityValue", + "description": "Refers to the mesh/pore size used to retain the sample. Materials smaller than the size threshold are excluded from the sample" + }, + "slope_aspect": { + "$ref": "#/$defs/QuantityValue", + "description": "The direction a slope faces. While looking down a slope use a compass to record the direction you are facing (direction or degrees); e.g., nw or 315 degrees. This measure provides an indication of sun and wind exposure that will influence soil temperature and evapotranspiration." + }, + "slope_gradient": { + "$ref": "#/$defs/QuantityValue", + "description": "Commonly called 'slope'. The angle between ground surface and a horizontal line (in percent). This is the direction that overland water would flow. This measure is usually taken with a hand level meter or clinometer" + }, + "sodium": { + "$ref": "#/$defs/QuantityValue", + "description": "Sodium concentration in the sample" + }, + "soil_type": { + "$ref": "#/$defs/TextValue", + "description": "Description of the soil type or classification. This field accepts terms under soil (http://purl.obolibrary.org/obo/ENVO_00001998). Multiple terms can be separated by pipes." + }, + "soil_type_meth": { + "$ref": "#/$defs/TextValue", + "description": "Reference or method used in determining soil series name or other lower-level classification" + }, + "soluble_iron_micromol": { + "type": "string" + }, + "source_mat_id": { + "$ref": "#/$defs/TextValue", + "description": "A unique identifier assigned to a material sample (as defined by http://rs.tdwg.org/dwc/terms/materialSampleID, and as opposed to a particular digital record of a material sample) used for extracting nucleic acids, and subsequent sequencing. The identifier can refer either to the original material collected or to any derived sub-samples. The INSDC qualifiers /specimen_voucher, /bio_material, or /culture_collection may or may not share the same value as the source_mat_id field. For instance, the /specimen_voucher qualifier and source_mat_id may both contain 'UAM:Herps:14' , referring to both the specimen voucher and sampled tissue with the same identifier. However, the /culture_collection qualifier may refer to a value from an initial culture (e.g. ATCC:11775) while source_mat_id would refer to an identifier from some derived culture from which the nucleic acids were extracted (e.g. xatc123 or ark:/2154/R2)." + }, + "specific_ecosystem": { + "description": "Specific ecosystems represent specific features of the environment like aphotic zone in an ocean or gastric mucosa within a host digestive system. Specific ecosystem is in position 5/5 in a GOLD path.", + "type": "string" + }, + "start_date_inc": { + "description": "Date the incubation was started. Only relevant for incubation samples.", + "type": "string" + }, + "start_time_inc": { + "description": "Time the incubation was started. Only relevant for incubation samples.", + "type": "string" + }, + "store_cond": { + "$ref": "#/$defs/TextValue", + "description": "Explain how and for how long the soil sample was stored before DNA extraction (fresh/frozen/other)." + }, + "subsurface_depth": { + "$ref": "#/$defs/QuantityValue" + }, + "sulfate": { + "$ref": "#/$defs/QuantityValue", + "description": "Concentration of sulfate in the sample" + }, + "sulfide": { + "$ref": "#/$defs/QuantityValue", + "description": "Concentration of sulfide in the sample" + }, + "technical_reps": { + "description": "If sending multiple technical replicates of the same sample, indicate how many replicates are being sent", + "type": "string" + }, + "temp": { + "$ref": "#/$defs/QuantityValue", + "description": "Temperature of the sample at the time of sampling." + }, + "tidal_stage": { + "$ref": "#/$defs/TextValue", + "description": "Stage of tide" + }, + "tillage": { + "$ref": "#/$defs/TextValue", + "description": "Note method(s) used for tilling" + }, + "tot_carb": { + "$ref": "#/$defs/QuantityValue", + "description": "Total carbon content" + }, + "tot_depth_water_col": { + "$ref": "#/$defs/QuantityValue", + "description": "Measurement of total depth of water column" + }, + "tot_diss_nitro": { + "$ref": "#/$defs/QuantityValue", + "description": "Total dissolved nitrogen concentration, reported as nitrogen, measured by: total dissolved nitrogen = NH4 + NO3NO2 + dissolved organic nitrogen" + }, + "tot_nitro_cont_meth": { + "description": "Reference or method used in determining the total nitrogen", + "type": "string" + }, + "tot_nitro_content": { + "$ref": "#/$defs/QuantityValue", + "description": "Total nitrogen content of the sample" + }, + "tot_org_c_meth": { + "$ref": "#/$defs/TextValue", + "description": "Reference or method used in determining total organic carbon" + }, + "tot_org_carb": { + "$ref": "#/$defs/QuantityValue", + "description": "Definition for soil: total organic carbon content of the soil, definition otherwise: total organic carbon content" + }, + "tot_phosp": { + "$ref": "#/$defs/QuantityValue", + "description": "Total phosphorus concentration in the sample, calculated by: total phosphorus = total dissolved phosphorus + particulate phosphorus" + }, + "type": { + "description": "An optional string that specifies the type object. This is used to allow for searches for different kinds of objects.", + "type": "string" + }, + "water_cont_soil_meth": { + "description": "Reference or method used in determining the water content of soil", + "type": "string" + }, + "water_content": { + "$ref": "#/$defs/QuantityValue", + "description": "Water content measurement" + }, + "watering_regm": { + "$ref": "#/$defs/QuantityValue", + "description": "Information about treatment involving an exposure to watering frequencies, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple regimens" + }, + "zinc": { + "$ref": "#/$defs/QuantityValue", + "description": "Concentration of zinc in the sample" + } + }, + "required": [ + "part_of", + "id", + "env_broad_scale", + "env_local_scale", + "env_medium" + ], + "title": "Biosample", + "type": "object" + }, + "BiosampleCategoryEnum": { + "description": "Funding-based, sample location-based, or experimental method-based defined categories", + "enum": [ + "LTER", + "SIP", + "SFA", + "FICUS", + "NEON" + ], + "title": "BiosampleCategoryEnum", + "type": "string" + }, + "BiosampleProcessing": { + "additionalProperties": false, + "description": "A process that takes one or more biosamples as inputs and generates one or as outputs. Examples of outputs include samples cultivated from another sample or data objects created by instruments runs.", + "properties": { + "alternative_identifiers": { + "description": "A list of alternative identifiers for the entity.", + "items": { + "type": "string" + }, + "type": "array" + }, + "description": { + "description": "a human-readable description of a thing", + "type": "string" + }, + "has_input": { + "description": "An input to a process.", + "items": { + "type": "string" + }, + "type": "array" + }, + "id": { + "description": "A unique identifier for a thing. Must be either a CURIE shorthand for a URI or a complete URI", + "pattern": "^(nmdc):bsmprc-([0-9][a-z]{0,6}[0-9])-([A-Za-z0-9]{1,})(\\.[A-Za-z0-9]{1,})*(_[A-Za-z0-9_\\.-]+)?$", + "type": "string" + }, + "name": { + "description": "A human readable label for an entity", + "type": "string" + } + }, + "required": [ + "id" + ], + "title": "BiosampleProcessing", + "type": "object" + }, + "BioticRelationshipEnum": { + "description": "", + "enum": [ + "free living", + "parasite", + "commensal", + "symbiont" + ], + "title": "BioticRelationshipEnum", + "type": "string" + }, + "BooleanValue": { + "additionalProperties": false, + "description": "A value that is a boolean", + "properties": { + "has_boolean_value": { + "description": "Links a quantity value to a boolean", + "type": "boolean" + }, + "has_raw_value": { + "description": "The value that was specified for an annotation in raw form, i.e. a string. E.g. \"2 cm\" or \"2-4 cm\"", + "type": "string" + }, + "was_generated_by": { + "type": "string" + } + }, + "title": "BooleanValue", + "type": "object" + }, + "BuildDocsEnum": { + "description": "", + "enum": [ + "building information model", + "commissioning report", + "complaint logs", + "contract administration", + "cost estimate", + "janitorial schedules or logs", + "maintenance plans", + "schedule", + "sections", + "shop drawings", + "submittals", + "ventilation system", + "windows" + ], + "title": "BuildDocsEnum", + "type": "string" + }, + "BuildOccupTypeEnum": { + "description": "", + "enum": [ + "office", + "market", + "restaurant", + "residence", + "school", + "residential", + "commercial", + "low rise", + "high rise", + "wood framed", + "health care", + "airport", + "sports complex" + ], + "title": "BuildOccupTypeEnum", + "type": "string" + }, + "BuildingSettingEnum": { + "description": "", + "enum": [ + "urban", + "suburban", + "exurban", + "rural" + ], + "title": "BuildingSettingEnum", + "type": "string" + }, + "CeilCondEnum": { + "description": "", + "enum": [ + "new", + "visible wear", + "needs repair", + "damaged", + "rupture" + ], + "title": "CeilCondEnum", + "type": "string" + }, + "CeilFinishMatEnum": { + "description": "", + "enum": [ + "drywall", + "mineral fibre", + "tiles", + "PVC", + "plasterboard", + "metal", + "fiberglass", + "stucco", + "mineral wool/calcium silicate", + "wood" + ], + "title": "CeilFinishMatEnum", + "type": "string" + }, + "CeilTextureEnum": { + "description": "", + "enum": [ + "crows feet", + "crows-foot stomp", + "double skip", + "hawk and trowel", + "knockdown", + "popcorn", + "orange peel", + "rosebud stomp", + "Santa-Fe texture", + "skip trowel", + "smooth", + "stomp knockdown", + "swirl" + ], + "title": "CeilTextureEnum", + "type": "string" + }, + "CeilTypeEnum": { + "description": "", + "enum": [ + "cathedral", + "dropped", + "concave", + "barrel-shaped", + "coffered", + "cove", + "stretched" + ], + "title": "CeilTypeEnum", + "type": "string" + }, + "ChemicalEntity": { + "additionalProperties": false, + "description": "An atom or molecule that can be represented with a chemical formula. Include lipids, glycans, natural products, drugs. There may be different terms for distinct acid-base forms, protonation states", + "properties": { + "alternative_identifiers": { + "description": "A list of alternative identifiers for the entity.", + "items": { + "type": "string" + }, + "type": "array" + }, + "description": { + "description": "a human-readable description of a thing", + "type": "string" + }, + "id": { + "description": "A unique identifier for a thing. Must be either a CURIE shorthand for a URI or a complete URI", + "type": "string" + }, + "name": { + "description": "A human readable label for an entity", + "type": "string" + } + }, + "required": [ + "id" + ], + "title": "ChemicalEntity", + "type": "object" + }, + "CollectingBiosamplesFromSite": { + "additionalProperties": false, + "description": "", + "properties": { + "alternative_identifiers": { + "description": "A list of alternative identifiers for the entity.", + "items": { + "type": "string" + }, + "type": "array" + }, + "description": { + "description": "a human-readable description of a thing", + "type": "string" + }, + "has_inputs": { + "items": { + "type": "string" + }, + "type": "array" + }, + "has_outputs": { + "items": { + "type": "string" + }, + "type": "array" + }, + "id": { + "description": "A unique identifier for a thing. Must be either a CURIE shorthand for a URI or a complete URI", + "pattern": "^(nmdc):clsite-([0-9][a-z]{0,6}[0-9])-([A-Za-z0-9]{1,})(\\.[A-Za-z0-9]{1,})*(_[A-Za-z0-9_\\.-]+)?$", + "type": "string" + }, + "name": { + "description": "A human readable label for an entity", + "type": "string" + }, + "participating_agent": { + "$ref": "#/$defs/Agent" + } + }, + "required": [ + "has_inputs", + "has_outputs", + "id" + ], + "title": "CollectingBiosamplesFromSite", + "type": "object" + }, + "ContainerTypeEnum": { + "description": "", + "enum": [ + "screw_top_conical" + ], + "title": "ContainerTypeEnum", + "type": "string" + }, + "ControlledIdentifiedTermValue": { + "additionalProperties": false, + "description": "A controlled term or class from an ontology, requiring the presence of term with an id", + "properties": { + "has_raw_value": { + "description": "The value that was specified for an annotation in raw form, i.e. a string. E.g. \"2 cm\" or \"2-4 cm\"", + "type": "string" + }, + "term": { + "$ref": "#/$defs/OntologyClass", + "description": "pointer to an ontology class" + }, + "was_generated_by": { + "type": "string" + } + }, + "required": [ + "term" + ], + "title": "ControlledIdentifiedTermValue", + "type": "object" + }, + "ControlledTermValue": { + "additionalProperties": false, + "description": "A controlled term or class from an ontology", + "properties": { + "has_raw_value": { + "description": "The value that was specified for an annotation in raw form, i.e. a string. E.g. \"2 cm\" or \"2-4 cm\"", + "type": "string" + }, + "term": { + "$ref": "#/$defs/OntologyClass", + "description": "pointer to an ontology class" + }, + "was_generated_by": { + "type": "string" + } + }, + "title": "ControlledTermValue", + "type": "object" + }, + "CreditAssociation": { + "additionalProperties": false, + "description": "This class supports binding associated researchers to studies. There will be at least a slot for a CRediT Contributor Role (https://casrai.org/credit/) and for a person value Specifically see the associated researchers tab on the NMDC_SampleMetadata-V4_CommentsForUpdates at https://docs.google.com/spreadsheets/d/1INlBo5eoqn2efn4H2P2i8rwRBtnbDVTqXrochJEAPko/edit#gid=0", + "properties": { + "applied_role": { + "$ref": "#/$defs/CreditEnum" + }, + "applied_roles": { + "items": { + "$ref": "#/$defs/CreditEnum" + }, + "type": "array" + }, + "applies_to_person": { + "$ref": "#/$defs/PersonValue" + }, + "type": { + "description": "An optional string that specifies the type object. This is used to allow for searches for different kinds of objects.", + "type": "string" + } + }, + "required": [ + "applies_to_person", + "applied_roles" + ], + "title": "CreditAssociation", + "type": "object" + }, + "CreditEnum": { + "description": "", + "enum": [ + "Conceptualization", + "Data curation", + "Formal Analysis", + "Funding acquisition", + "Investigation", + "Methodology", + "Project administration", + "Resources", + "Software", + "Supervision", + "Validation", + "Visualization", + "Writing original draft", + "Writing review and editing", + "Principal Investigator", + "Submitter" + ], + "title": "CreditEnum", + "type": "string" + }, + "CurLandUseEnum": { + "description": "", + "enum": [ + "cities", + "farmstead", + "industrial areas", + "roads/railroads", + "rock", + "sand", + "gravel", + "mudflats", + "salt flats", + "badlands", + "permanent snow or ice", + "saline seeps", + "mines/quarries", + "oil waste areas", + "small grains", + "row crops", + "vegetable crops", + "horticultural plants (e.g. tulips)", + "marshlands (grass,sedges,rushes)", + "tundra (mosses,lichens)", + "rangeland", + "pastureland (grasslands used for livestock grazing)", + "hayland", + "meadows (grasses,alfalfa,fescue,bromegrass,timothy)", + "shrub land (e.g. mesquite,sage-brush,creosote bush,shrub oak,eucalyptus)", + "successional shrub land (tree saplings,hazels,sumacs,chokecherry,shrub dogwoods,blackberries)", + "shrub crops (blueberries,nursery ornamentals,filberts)", + "vine crops (grapes)", + "conifers (e.g. pine,spruce,fir,cypress)", + "hardwoods (e.g. oak,hickory,elm,aspen)", + "intermixed hardwood and conifers", + "tropical (e.g. mangrove,palms)", + "rainforest (evergreen forest receiving greater than 406 cm annual rainfall)", + "swamp (permanent or semi-permanent water body dominated by woody plants)", + "crop trees (nuts,fruit,christmas trees,nursery trees)" + ], + "title": "CurLandUseEnum", + "type": "string" + }, + "DataObject": { + "additionalProperties": false, + "description": "An object that primarily consists of symbols that represent information. Files, records, and omics data are examples of data objects.", + "properties": { + "alternative_identifiers": { + "description": "A list of alternative identifiers for the entity.", + "items": { + "type": "string" + }, + "type": "array" + }, + "compression_type": { + "description": "If provided, specifies the compression type", + "type": "string" + }, + "data_object_type": { + "$ref": "#/$defs/FileTypeEnum", + "description": "The type of file represented by the data object." + }, + "description": { + "description": "a human-readable description of a thing", + "type": "string" + }, + "file_size_bytes": { + "description": "Size of the file in bytes", + "type": "integer" + }, + "id": { + "description": "A unique identifier for a thing. Must be either a CURIE shorthand for a URI or a complete URI", + "pattern": "^(nmdc):dobj-([0-9][a-z]{0,6}[0-9])-([A-Za-z0-9]{1,})(\\.[A-Za-z0-9]{1,})*(_[A-Za-z0-9_\\.-]+)?$", + "type": "string" + }, + "md5_checksum": { + "description": "MD5 checksum of file (pre-compressed)", + "type": "string" + }, + "name": { + "description": "A human readable label for an entity", + "type": "string" + }, + "type": { + "description": "An optional string that specifies the type object. This is used to allow for searches for different kinds of objects.", + "type": "string" + }, + "url": { + "type": "string" + }, + "was_generated_by": { + "type": "string" + } + }, + "required": [ + "name", + "description" + ], + "title": "DataObject", + "type": "object" + }, + "Database": { + "additionalProperties": false, + "description": "An abstract holder for any set of metadata and data. It does not need to correspond to an actual managed database top level holder class. When translated to JSON-Schema this is the 'root' object. It should contain pointers to other objects of interest", + "properties": { + "activity_set": { + "description": "This property links a database object to the set of workflow activities.", + "items": { + "$ref": "#/$defs/WorkflowExecutionActivity" + }, + "type": "array" + }, + "biosample_set": { + "description": "This property links a database object to the set of samples within it.", + "items": { + "$ref": "#/$defs/Biosample" + }, + "type": "array" + }, + "collecting_biosamples_from_site_set": { + "items": { + "$ref": "#/$defs/CollectingBiosamplesFromSite" + }, + "type": "array" + }, + "data_object_set": { + "description": "This property links a database object to the set of data objects within it.", + "items": { + "$ref": "#/$defs/DataObject" + }, + "type": "array" + }, + "dissolving_activity_set": { + "items": { + "$ref": "#/$defs/DissolvingActivity" + }, + "type": "array" + }, + "field_research_site_set": { + "items": { + "$ref": "#/$defs/FieldResearchSite" + }, + "type": "array" + }, + "functional_annotation_set": { + "description": "This property links a database object to the set of all functional annotations", + "items": { + "$ref": "#/$defs/FunctionalAnnotation" + }, + "type": "array" + }, + "genome_feature_set": { + "description": "This property links a database object to the set of all features", + "items": { + "$ref": "#/$defs/GenomeFeature" + }, + "type": "array" + }, + "mags_activity_set": { + "description": "This property links a database object to the set of MAGs analysis activities.", + "items": { + "$ref": "#/$defs/MagsAnalysisActivity" + }, + "type": "array" + }, + "material_sample_set": { + "items": { + "$ref": "#/$defs/MaterialSample" + }, + "type": "array" + }, + "material_sampling_activity_set": { + "items": { + "$ref": "#/$defs/MaterialSamplingActivity" + }, + "type": "array" + }, + "metabolomics_analysis_activity_set": { + "description": "This property links a database object to the set of metabolomics analysis activities.", + "items": { + "$ref": "#/$defs/MetabolomicsAnalysisActivity" + }, + "type": "array" + }, + "metagenome_annotation_activity_set": { + "description": "This property links a database object to the set of metagenome annotation activities.", + "items": { + "$ref": "#/$defs/MetagenomeAnnotationActivity" + }, + "type": "array" + }, + "metagenome_assembly_set": { + "description": "This property links a database object to the set of metagenome assembly activities.", + "items": { + "$ref": "#/$defs/MetagenomeAssembly" + }, + "type": "array" + }, + "metaproteomics_analysis_activity_set": { + "description": "This property links a database object to the set of metaproteomics analysis activities.", + "items": { + "$ref": "#/$defs/MetaproteomicsAnalysisActivity" + }, + "type": "array" + }, + "metatranscriptome_activity_set": { + "description": "TODO", + "items": { + "$ref": "#/$defs/MetatranscriptomeActivity" + }, + "type": "array" + }, + "nom_analysis_activity_set": { + "description": "This property links a database object to the set of natural organic matter (NOM) analysis activities.", + "items": { + "$ref": "#/$defs/NomAnalysisActivity" + }, + "type": "array" + }, + "omics_processing_set": { + "description": "This property links a database object to the set of omics processings within it.", + "items": { + "$ref": "#/$defs/OmicsProcessing" + }, + "type": "array" + }, + "reaction_activity_set": { + "items": { + "$ref": "#/$defs/ReactionActivity" + }, + "type": "array" + }, + "read_based_taxonomy_analysis_activity_set": { + "description": "This property links a database object to the set of read based analysis activities.", + "items": { + "$ref": "#/$defs/ReadBasedTaxonomyAnalysisActivity" + }, + "type": "array" + }, + "read_qc_analysis_activity_set": { + "description": "This property links a database object to the set of read QC analysis activities.", + "items": { + "$ref": "#/$defs/ReadQcAnalysisActivity" + }, + "type": "array" + }, + "study_set": { + "description": "This property links a database object to the set of studies within it.", + "items": { + "$ref": "#/$defs/Study" + }, + "type": "array" + } + }, + "title": "Database", + "type": "object" + }, + "DeposEnvEnum": { + "description": "", + "enum": [ + "Continental - Alluvial", + "Continental - Aeolian", + "Continental - Fluvial", + "Continental - Lacustrine", + "Transitional - Deltaic", + "Transitional - Tidal", + "Transitional - Lagoonal", + "Transitional - Beach", + "Transitional - Lake", + "Marine - Shallow", + "Marine - Deep", + "Marine - Reef", + "Other - Evaporite", + "Other - Glacial", + "Other - Volcanic", + "other" + ], + "title": "DeposEnvEnum", + "type": "string" + }, + "DeviceTypeEnum": { + "description": "", + "enum": [ + "orbital_shaker", + "thermomixer" + ], + "title": "DeviceTypeEnum", + "type": "string" + }, + "DissolvingActivity": { + "additionalProperties": false, + "description": "", + "properties": { + "dissolution_aided_by": { + "$ref": "#/$defs/LabDevice" + }, + "dissolution_reagent": { + "$ref": "#/$defs/SolventEnum" + }, + "dissolution_volume": { + "$ref": "#/$defs/QuantityValue" + }, + "dissolved_in": { + "$ref": "#/$defs/MaterialContainer" + }, + "material_input": { + "type": "string" + }, + "material_output": { + "type": "string" + } + }, + "title": "DissolvingActivity", + "type": "object" + }, + "DnaContTypeEnum": { + "description": "", + "enum": [ + "plate", + "tube" + ], + "title": "DnaContTypeEnum", + "type": "string" + }, + "DnaDnaseEnum": { + "description": "", + "enum": [ + "no", + "yes" + ], + "title": "DnaDnaseEnum", + "type": "string" + }, + "DnaSampleFormatEnum": { + "description": "", + "enum": [ + "10 mM Tris-HCl", + "DNAStable", + "Ethanol", + "Low EDTA TE", + "MDA reaction buffer", + "PBS", + "Pellet", + "RNAStable", + "TE", + "Water", + "Gentegra-DNA", + "Gentegra-RNA" + ], + "title": "DnaSampleFormatEnum", + "type": "string" + }, + "DnaseRnaEnum": { + "description": "", + "enum": [ + "no", + "yes" + ], + "title": "DnaseRnaEnum", + "type": "string" + }, + "DoorCompTypeEnum": { + "description": "", + "enum": [ + "metal covered", + "revolving", + "sliding", + "telescopic" + ], + "title": "DoorCompTypeEnum", + "type": "string" + }, + "DoorCondEnum": { + "description": "", + "enum": [ + "damaged", + "needs repair", + "new", + "rupture", + "visible wear" + ], + "title": "DoorCondEnum", + "type": "string" + }, + "DoorDirectEnum": { + "description": "", + "enum": [ + "inward", + "outward", + "sideways" + ], + "title": "DoorDirectEnum", + "type": "string" + }, + "DoorLocEnum": { + "description": "", + "enum": [ + "north", + "south", + "east", + "west" + ], + "title": "DoorLocEnum", + "type": "string" + }, + "DoorMatEnum": { + "description": "", + "enum": [ + "aluminum", + "cellular PVC", + "engineered plastic", + "fiberboard", + "fiberglass", + "metal", + "thermoplastic alloy", + "vinyl", + "wood", + "wood/plastic composite" + ], + "title": "DoorMatEnum", + "type": "string" + }, + "DoorMoveEnum": { + "description": "", + "enum": [ + "collapsible", + "folding", + "revolving", + "rolling shutter", + "sliding", + "swinging" + ], + "title": "DoorMoveEnum", + "type": "string" + }, + "DoorTypeEnum": { + "description": "", + "enum": [ + "composite", + "metal", + "wooden" + ], + "title": "DoorTypeEnum", + "type": "string" + }, + "DoorTypeMetalEnum": { + "description": "", + "enum": [ + "collapsible", + "corrugated steel", + "hollow", + "rolling shutters", + "steel plate" + ], + "title": "DoorTypeMetalEnum", + "type": "string" + }, + "DoorTypeWoodEnum": { + "description": "", + "enum": [ + "bettened and ledged", + "battened", + "ledged and braced", + "ledged and framed", + "ledged, braced and frame", + "framed and paneled", + "glashed or sash", + "flush", + "louvered", + "wire gauged" + ], + "title": "DoorTypeWoodEnum", + "type": "string" + }, + "DrainageClassEnum": { + "description": "", + "enum": [ + "very poorly", + "poorly", + "somewhat poorly", + "moderately well", + "well", + "excessively drained" + ], + "title": "DrainageClassEnum", + "type": "string" + }, + "DrawingsEnum": { + "description": "", + "enum": [ + "operation", + "as built", + "construction", + "bid", + "design", + "building navigation map", + "diagram", + "sketch" + ], + "title": "DrawingsEnum", + "type": "string" + }, + "EnvironmentalMaterialTerm": { + "additionalProperties": false, + "description": "", + "properties": { + "alternative_identifiers": { + "description": "A list of alternative identifiers for the entity.", + "items": { + "type": "string" + }, + "type": "array" + }, + "description": { + "description": "a human-readable description of a thing", + "type": "string" + }, + "id": { + "description": "A unique identifier for a thing. Must be either a CURIE shorthand for a URI or a complete URI", + "type": "string" + }, + "name": { + "description": "A human readable label for an entity", + "type": "string" + } + }, + "required": [ + "id" + ], + "title": "EnvironmentalMaterialTerm", + "type": "object" + }, + "ExtWallOrientEnum": { + "description": "", + "enum": [ + "north", + "south", + "east", + "west", + "northeast", + "southeast", + "southwest", + "northwest" + ], + "title": "ExtWallOrientEnum", + "type": "string" + }, + "ExtWindowOrientEnum": { + "description": "", + "enum": [ + "north", + "south", + "east", + "west", + "northeast", + "southeast", + "southwest", + "northwest" + ], + "title": "ExtWindowOrientEnum", + "type": "string" + }, + "FaoClassEnum": { + "description": "", + "enum": [ + "Acrisols", + "Andosols", + "Arenosols", + "Cambisols", + "Chernozems", + "Ferralsols", + "Fluvisols", + "Gleysols", + "Greyzems", + "Gypsisols", + "Histosols", + "Kastanozems", + "Lithosols", + "Luvisols", + "Nitosols", + "Phaeozems", + "Planosols", + "Podzols", + "Podzoluvisols", + "Rankers", + "Regosols", + "Rendzinas", + "Solonchaks", + "Solonetz", + "Vertisols", + "Yermosols" + ], + "title": "FaoClassEnum", + "type": "string" + }, + "FieldResearchSite": { + "additionalProperties": false, + "description": "A site, outside of a laboratory, from which biosamples may be collected.", + "properties": { + "alternative_identifiers": { + "description": "A list of alternative identifiers for the entity.", + "items": { + "type": "string" + }, + "type": "array" + }, + "description": { + "description": "a human-readable description of a thing", + "type": "string" + }, + "id": { + "description": "A unique identifier for a thing. Must be either a CURIE shorthand for a URI or a complete URI", + "pattern": "^(nmdc):frsite-([0-9][a-z]{0,6}[0-9])-([A-Za-z0-9]{1,})(\\.[A-Za-z0-9]{1,})*(_[A-Za-z0-9_\\.-]+)?$", + "type": "string" + }, + "name": { + "description": "A human readable label for an entity", + "type": "string" + } + }, + "required": [ + "id" + ], + "title": "FieldResearchSite", + "type": "object" + }, + "FileTypeEnum": { + "description": "", + "enum": [ + "Metagenome Raw Reads", + "FT ICR-MS Analysis Results", + "GC-MS Metabolomics Results", + "Metaproteomics Workflow Statistics", + "Protein Report", + "Peptide Report", + "Unfiltered Metaproteomics Results", + "Read Count and RPKM", + "QC non-rRNA R2", + "QC non-rRNA R1", + "Metagenome Bins", + "CheckM Statistics", + "GOTTCHA2 Krona Plot", + "GOTTCHA2 Classification Report", + "GOTTCHA2 Report Full", + "Kraken2 Krona Plot", + "Centrifuge Krona Plot", + "Centrifuge output report file", + "Kraken2 Classification Report", + "Kraken2 Taxonomic Classification", + "Centrifuge Classification Report", + "Centrifuge Taxonomic Classification", + "Structural Annotation GFF", + "Functional Annotation GFF", + "Annotation Amino Acid FASTA", + "Annotation Enzyme Commission", + "Annotation KEGG Orthology", + "Assembly Coverage BAM", + "Assembly AGP", + "Assembly Scaffolds", + "Assembly Contigs", + "Assembly Coverage Stats", + "Filtered Sequencing Reads", + "QC Statistics", + "TIGRFam Annotation GFF", + "CRT Annotation GFF", + "Genmark Annotation GFF", + "Prodigal Annotation GFF", + "TRNA Annotation GFF", + "Misc Annotation GFF", + "RFAM Annotation GFF", + "TMRNA Annotation GFF", + "KO_EC Annotation GFF", + "Product Names", + "Gene Phylogeny tsv", + "Crisprt Terms", + "Clusters of Orthologous Groups (COG) Annotation GFF", + "CATH FunFams (Functional Families) Annotation GFF", + "SUPERFam Annotation GFF", + "SMART Annotation GFF", + "Pfam Annotation GFF", + "Direct Infusion FT ICR-MS Raw Data" + ], + "title": "FileTypeEnum", + "type": "string" + }, + "FilterTypeEnum": { + "description": "", + "enum": [ + "particulate air filter", + "chemical air filter", + "low-MERV pleated media", + "HEPA", + "electrostatic", + "gas-phase or ultraviolet air treatments" + ], + "title": "FilterTypeEnum", + "type": "string" + }, + "FloorCondEnum": { + "description": "", + "enum": [ + "new", + "visible wear", + "needs repair", + "damaged", + "rupture" + ], + "title": "FloorCondEnum", + "type": "string" + }, + "FloorFinishMatEnum": { + "description": "", + "enum": [ + "tile", + "wood strip or parquet", + "carpet", + "rug", + "laminate wood", + "lineoleum", + "vinyl composition tile", + "sheet vinyl", + "stone", + "bamboo", + "cork", + "terrazo", + "concrete", + "none", + "sealed", + "clear finish", + "paint", + "none or unfinished" + ], + "title": "FloorFinishMatEnum", + "type": "string" + }, + "FloorStrucEnum": { + "description": "", + "enum": [ + "balcony", + "floating floor", + "glass floor", + "raised floor", + "sprung floor", + "wood-framed", + "concrete" + ], + "title": "FloorStrucEnum", + "type": "string" + }, + "FloorWaterMoldEnum": { + "description": "", + "enum": [ + "mold odor", + "wet floor", + "water stains", + "wall discoloration", + "floor discoloration", + "ceiling discoloration", + "peeling paint or wallpaper", + "bulging walls", + "condensation" + ], + "title": "FloorWaterMoldEnum", + "type": "string" + }, + "FreqCleanEnum": { + "description": "", + "enum": [ + "Daily", + "Weekly", + "Monthly", + "Quarterly", + "Annually", + "other" + ], + "title": "FreqCleanEnum", + "type": "string" + }, + "FunctionalAnnotation": { + "additionalProperties": false, + "description": "An assignment of a function term (e.g. reaction or pathway) that is executed by a gene product, or which the gene product plays an active role in. Functional annotations can be assigned manually by curators, or automatically in workflows. In the context of NMDC, all function annotation is performed automatically, typically using HMM or Blast type methods", + "properties": { + "has_function": { + "pattern": "^(KEGG_PATHWAY:\\w{2,4}\\d{5}|KEGG.REACTION:R\\d+|RHEA:\\d{5}|MetaCyc:[A-Za-z0-9+_.%-:]+|EC:\\d{1,2}(\\.\\d{0,3}){0,3}|GO:\\d{7}|MetaNetX:(MNXR\\d+|EMPTY)|SEED:\\w+|KEGG\\.ORTHOLOGY:K\\d+|EGGNOG:\\w+|PFAM:PF\\d{5}|TIGRFAM:TIGR\\d+|SUPFAM:\\w+|CATH:[1-6]\\.[0-9]+\\.[0-9]+\\.[0-9]+|PANTHER.FAMILY:PTHR\\d{5}(\\:SF\\d{1,3})?)$", + "type": "string" + }, + "subject": { + "type": "string" + }, + "was_generated_by": { + "description": "provenance for the annotation.", + "type": "string" + } + }, + "title": "FunctionalAnnotation", + "type": "object" + }, + "FurnitureEnum": { + "description": "", + "enum": [ + "cabinet", + "chair", + "desks" + ], + "title": "FurnitureEnum", + "type": "string" + }, + "GenderRestroomEnum": { + "description": "", + "enum": [ + "all gender", + "female", + "gender neurtral", + "male", + "male and female", + "unisex" + ], + "title": "GenderRestroomEnum", + "type": "string" + }, + "GeneProduct": { + "additionalProperties": false, + "description": "A molecule encoded by a gene that has an evolved function", + "properties": { + "alternative_identifiers": { + "description": "A list of alternative identifiers for the entity.", + "items": { + "type": "string" + }, + "type": "array" + }, + "description": { + "description": "a human-readable description of a thing", + "type": "string" + }, + "id": { + "description": "A unique identifier for a thing. Must be either a CURIE shorthand for a URI or a complete URI", + "type": "string" + }, + "name": { + "description": "A human readable label for an entity", + "type": "string" + } + }, + "required": [ + "id" + ], + "title": "GeneProduct", + "type": "object" + }, + "GenomeFeature": { + "additionalProperties": false, + "description": "A feature localized to an interval along a genome", + "title": "GenomeFeature", + "type": "object" + }, + "GeolocationValue": { + "additionalProperties": false, + "description": "A normalized value for a location on the earth's surface", + "properties": { + "has_raw_value": { + "description": "The raw value for a geolocation should follow {lat} {long}", + "type": "string" + }, + "latitude": { + "description": "latitude", + "type": "number" + }, + "longitude": { + "description": "longitude", + "type": "number" + }, + "was_generated_by": { + "type": "string" + } + }, + "title": "GeolocationValue", + "type": "object" + }, + "GrowthHabitEnum": { + "description": "", + "enum": [ + "erect", + "semi-erect", + "spreading", + "prostrate" + ], + "title": "GrowthHabitEnum", + "type": "string" + }, + "HandidnessEnum": { + "description": "", + "enum": [ + "ambidexterity", + "left handedness", + "mixed-handedness", + "right handedness" + ], + "title": "HandidnessEnum", + "type": "string" + }, + "HcProducedEnum": { + "description": "", + "enum": [ + "Oil", + "Gas-Condensate", + "Gas", + "Bitumen", + "Coalbed Methane", + "other" + ], + "title": "HcProducedEnum", + "type": "string" + }, + "HcrEnum": { + "description": "", + "enum": [ + "Oil Reservoir", + "Gas Reservoir", + "Oil Sand", + "Coalbed", + "Shale", + "Tight Oil Reservoir", + "Tight Gas Reservoir", + "other" + ], + "title": "HcrEnum", + "type": "string" + }, + "HcrGeolAgeEnum": { + "description": "", + "enum": [ + "Archean", + "Cambrian", + "Carboniferous", + "Cenozoic", + "Cretaceous", + "Devonian", + "Jurassic", + "Mesozoic", + "Neogene", + "Ordovician", + "Paleogene", + "Paleozoic", + "Permian", + "Precambrian", + "Proterozoic", + "Silurian", + "Triassic", + "other" + ], + "title": "HcrGeolAgeEnum", + "type": "string" + }, + "HeatCoolTypeEnum": { + "description": "", + "enum": [ + "radiant system", + "heat pump", + "forced air system", + "steam forced heat", + "wood stove" + ], + "title": "HeatCoolTypeEnum", + "type": "string" + }, + "HeatDelivLocEnum": { + "description": "", + "enum": [ + "north", + "south", + "east", + "west" + ], + "title": "HeatDelivLocEnum", + "type": "string" + }, + "HorizonEnum": { + "description": "", + "enum": [ + "O horizon", + "A horizon", + "E horizon", + "B horizon", + "C horizon", + "R layer", + "Permafrost" + ], + "title": "HorizonEnum", + "type": "string" + }, + "HostSexEnum": { + "description": "", + "enum": [ + "female", + "hermaphrodite", + "male", + "neuter" + ], + "title": "HostSexEnum", + "type": "string" + }, + "ImageValue": { + "additionalProperties": false, + "description": "An attribute value representing an image.", + "properties": { + "description": { + "description": "a human-readable description of a thing", + "type": "string" + }, + "display_order": { + "description": "When rendering information, this attribute to specify the order in which the information should be rendered.", + "type": "string" + }, + "has_raw_value": { + "description": "The value that was specified for an annotation in raw form, i.e. a string. E.g. \"2 cm\" or \"2-4 cm\"", + "type": "string" + }, + "url": { + "type": "string" + }, + "was_generated_by": { + "type": "string" + } + }, + "title": "ImageValue", + "type": "object" + }, + "IndoorSpaceEnum": { + "description": "", + "enum": [ + "bedroom", + "office", + "bathroom", + "foyer", + "kitchen", + "locker room", + "hallway", + "elevator" + ], + "title": "IndoorSpaceEnum", + "type": "string" + }, + "IndoorSurfEnum": { + "description": "", + "enum": [ + "cabinet", + "ceiling", + "counter top", + "door", + "shelving", + "vent cover", + "window", + "wall" + ], + "title": "IndoorSurfEnum", + "type": "string" + }, + "Instrument": { + "additionalProperties": false, + "description": "A material entity that is designed to perform a function in a scientific investigation, but is not a reagent[OBI].", + "properties": { + "alternative_identifiers": { + "description": "A list of alternative identifiers for the entity.", + "items": { + "type": "string" + }, + "type": "array" + }, + "description": { + "description": "a human-readable description of a thing", + "type": "string" + }, + "id": { + "description": "A unique identifier for a thing. Must be either a CURIE shorthand for a URI or a complete URI", + "pattern": "^(nmdc):inst-([0-9][a-z]{0,6}[0-9])-([A-Za-z0-9]{1,})(\\.[A-Za-z0-9]{1,})*(_[A-Za-z0-9_\\.-]+)?$", + "type": "string" + }, + "name": { + "description": "A human readable label for an entity", + "type": "string" + } + }, + "required": [ + "id" + ], + "title": "Instrument", + "type": "object" + }, + "IntWallCondEnum": { + "description": "", + "enum": [ + "new", + "visible wear", + "needs repair", + "damaged", + "rupture" + ], + "title": "IntWallCondEnum", + "type": "string" + }, + "IntegerValue": { + "additionalProperties": false, + "description": "A value that is an integer", + "properties": { + "has_numeric_value": { + "description": "Links a quantity value to a number", + "type": "number" + }, + "has_raw_value": { + "description": "The value that was specified for an annotation in raw form, i.e. a string. E.g. \"2 cm\" or \"2-4 cm\"", + "type": "string" + }, + "was_generated_by": { + "type": "string" + } + }, + "title": "IntegerValue", + "type": "object" + }, + "LabDevice": { + "additionalProperties": false, + "description": "", + "properties": { + "activity_speed": { + "$ref": "#/$defs/QuantityValue" + }, + "activity_temperature": { + "$ref": "#/$defs/QuantityValue" + }, + "activity_time": { + "$ref": "#/$defs/QuantityValue" + }, + "device_type": { + "$ref": "#/$defs/DeviceTypeEnum" + } + }, + "title": "LabDevice", + "type": "object" + }, + "LightTypeEnum": { + "description": "", + "enum": [ + "natural light", + "electric light", + "desk lamp", + "flourescent lights", + "none" + ], + "title": "LightTypeEnum", + "type": "string" + }, + "LithologyEnum": { + "description": "", + "enum": [ + "Basement", + "Chalk", + "Chert", + "Coal", + "Conglomerate", + "Diatomite", + "Dolomite", + "Limestone", + "Sandstone", + "Shale", + "Siltstone", + "Volcanic", + "other" + ], + "title": "LithologyEnum", + "type": "string" + }, + "MagBin": { + "additionalProperties": false, + "description": "", + "properties": { + "bin_name": { + "type": "string" + }, + "bin_quality": { + "type": "string" + }, + "completeness": { + "type": "number" + }, + "contamination": { + "type": "number" + }, + "gene_count": { + "type": "integer" + }, + "gtdbtk_class": { + "type": "string" + }, + "gtdbtk_domain": { + "type": "string" + }, + "gtdbtk_family": { + "type": "string" + }, + "gtdbtk_genus": { + "type": "string" + }, + "gtdbtk_order": { + "type": "string" + }, + "gtdbtk_phylum": { + "type": "string" + }, + "gtdbtk_species": { + "type": "string" + }, + "num_16s": { + "type": "integer" + }, + "num_23s": { + "type": "integer" + }, + "num_5s": { + "type": "integer" + }, + "num_t_rna": { + "type": "integer" + }, + "number_of_contig": { + "type": "integer" + }, + "type": { + "description": "An optional string that specifies the type object. This is used to allow for searches for different kinds of objects.", + "type": "string" + } + }, + "title": "MagBin", + "type": "object" + }, + "MagsAnalysisActivity": { + "additionalProperties": false, + "description": "A workflow execution activity that uses computational binning tools to group assembled contigs into genomes", + "properties": { + "binned_contig_num": { + "type": "integer" + }, + "ended_at_time": { + "format": "date-time", + "pattern": "^([\\+-]?\\d{4}(?!\\d{2}\\b))((-?)((0[1-9]|1[0-2])(\\3([12]\\d|0[1-9]|3[01]))?|W([0-4]\\d|5[0-2])(-?[1-7])?|(00[1-9]|0[1-9]\\d|[12]\\d{2}|3([0-5]\\d|6[1-6])))([T\\s]((([01]\\d|2[0-3])((:?)[0-5]\\d)?|24\\:?00)([\\.,]\\d+(?!:))?)?(\\17[0-5]\\d([\\.,]\\d+)?)?([zZ]|([\\+-])([01]\\d|2[0-3]):?([0-5]\\d)?)?)?)?$", + "type": "string" + }, + "execution_resource": { + "description": "Example: NERSC-Cori", + "type": "string" + }, + "git_url": { + "description": "Example: https://github.com/microbiomedata/mg_annotation/releases/tag/0.1", + "type": "string" + }, + "has_input": { + "description": "An input to a process.", + "items": { + "type": "string" + }, + "type": "array" + }, + "has_output": { + "description": "An output biosample to a processing step", + "items": { + "type": "string" + }, + "type": "array" + }, + "id": { + "description": "A unique identifier for a thing. Must be either a CURIE shorthand for a URI or a complete URI", + "pattern": "^(nmdc):wfmag-([0-9][a-z]{0,6}[0-9])-([A-Za-z0-9]{1,})(\\.[A-Za-z0-9]{1,})*(_[A-Za-z0-9_\\.-]+)?$", + "type": "string" + }, + "input_contig_num": { + "type": "integer" + }, + "low_depth_contig_num": { + "type": "integer" + }, + "mags_list": { + "items": { + "$ref": "#/$defs/MagBin" + }, + "type": "array" + }, + "name": { + "description": "A human readable label for an entity", + "type": "string" + }, + "part_of": { + "description": "Links a resource to another resource that either logically or physically includes it.", + "items": { + "type": "string" + }, + "type": "array" + }, + "started_at_time": { + "format": "date-time", + "pattern": "^([\\+-]?\\d{4}(?!\\d{2}\\b))((-?)((0[1-9]|1[0-2])(\\3([12]\\d|0[1-9]|3[01]))?|W([0-4]\\d|5[0-2])(-?[1-7])?|(00[1-9]|0[1-9]\\d|[12]\\d{2}|3([0-5]\\d|6[1-6])))([T\\s]((([01]\\d|2[0-3])((:?)[0-5]\\d)?|24\\:?00)([\\.,]\\d+(?!:))?)?(\\17[0-5]\\d([\\.,]\\d+)?)?([zZ]|([\\+-])([01]\\d|2[0-3]):?([0-5]\\d)?)?)?)?$", + "type": "string" + }, + "too_short_contig_num": { + "type": "integer" + }, + "type": { + "description": "An optional string that specifies the type object. This is used to allow for searches for different kinds of objects.", + "type": "string" + }, + "unbinned_contig_num": { + "type": "integer" + }, + "used": { + "type": "string" + }, + "was_associated_with": { + "description": "the agent/entity associated with the generation of the file", + "type": "string" + }, + "was_informed_by": { + "type": "string" + } + }, + "required": [ + "execution_resource", + "git_url", + "has_input", + "has_output", + "started_at_time", + "ended_at_time", + "was_informed_by" + ], + "title": "MagsAnalysisActivity", + "type": "object" + }, + "MaterialContainer": { + "additionalProperties": false, + "description": "", + "properties": { + "container_size": { + "$ref": "#/$defs/QuantityValue" + }, + "container_type": { + "$ref": "#/$defs/ContainerTypeEnum" + } + }, + "title": "MaterialContainer", + "type": "object" + }, + "MaterialSample": { + "additionalProperties": false, + "description": "", + "properties": { + "alternative_identifiers": { + "description": "A list of alternative identifiers for the entity.", + "items": { + "type": "string" + }, + "type": "array" + }, + "description": { + "description": "a human-readable description of a thing", + "type": "string" + }, + "id": { + "description": "A unique identifier for a thing. Must be either a CURIE shorthand for a URI or a complete URI", + "pattern": "^(nmdc):matsm-([0-9][a-z]{0,6}[0-9])-([A-Za-z0-9]{1,})(\\.[A-Za-z0-9]{1,})*(_[A-Za-z0-9_\\.-]+)?$", + "type": "string" + }, + "name": { + "description": "A human readable label for an entity", + "type": "string" + } + }, + "title": "MaterialSample", + "type": "object" + }, + "MaterialSamplingActivity": { + "additionalProperties": false, + "description": "", + "properties": { + "amount_collected": { + "$ref": "#/$defs/QuantityValue" + }, + "biosample_input": { + "type": "string" + }, + "collected_into": { + "$ref": "#/$defs/MaterialContainer" + }, + "material_output": { + "type": "string" + }, + "sampling_method": { + "$ref": "#/$defs/SamplingMethodEnum" + } + }, + "title": "MaterialSamplingActivity", + "type": "object" + }, + "MechStrucEnum": { + "description": "", + "enum": [ + "subway", + "coach", + "carriage", + "elevator", + "escalator", + "boat", + "train", + "car", + "bus" + ], + "title": "MechStrucEnum", + "type": "string" + }, + "MetaboliteQuantification": { + "additionalProperties": false, + "description": "This is used to link a metabolomics analysis workflow to a specific metabolite", + "properties": { + "alternative_identifiers": { + "description": "A list of alternative identifiers for the entity.", + "items": { + "type": "string" + }, + "type": "array" + } + }, + "title": "MetaboliteQuantification", + "type": "object" + }, + "MetabolomicsAnalysisActivity": { + "additionalProperties": false, + "description": "", + "properties": { + "ended_at_time": { + "format": "date-time", + "pattern": "^([\\+-]?\\d{4}(?!\\d{2}\\b))((-?)((0[1-9]|1[0-2])(\\3([12]\\d|0[1-9]|3[01]))?|W([0-4]\\d|5[0-2])(-?[1-7])?|(00[1-9]|0[1-9]\\d|[12]\\d{2}|3([0-5]\\d|6[1-6])))([T\\s]((([01]\\d|2[0-3])((:?)[0-5]\\d)?|24\\:?00)([\\.,]\\d+(?!:))?)?(\\17[0-5]\\d([\\.,]\\d+)?)?([zZ]|([\\+-])([01]\\d|2[0-3]):?([0-5]\\d)?)?)?)?$", + "type": "string" + }, + "execution_resource": { + "description": "Example: NERSC-Cori", + "type": "string" + }, + "git_url": { + "description": "Example: https://github.com/microbiomedata/mg_annotation/releases/tag/0.1", + "type": "string" + }, + "has_input": { + "description": "An input to a process.", + "items": { + "type": "string" + }, + "type": "array" + }, + "has_output": { + "description": "An output biosample to a processing step", + "items": { + "type": "string" + }, + "type": "array" + }, + "id": { + "description": "A unique identifier for a thing. Must be either a CURIE shorthand for a URI or a complete URI", + "pattern": "^(nmdc):wfmb-([0-9][a-z]{0,6}[0-9])-([A-Za-z0-9]{1,})(\\.[A-Za-z0-9]{1,})*(_[A-Za-z0-9_\\.-]+)?$", + "type": "string" + }, + "name": { + "description": "A human readable label for an entity", + "type": "string" + }, + "part_of": { + "description": "Links a resource to another resource that either logically or physically includes it.", + "items": { + "type": "string" + }, + "type": "array" + }, + "started_at_time": { + "format": "date-time", + "pattern": "^([\\+-]?\\d{4}(?!\\d{2}\\b))((-?)((0[1-9]|1[0-2])(\\3([12]\\d|0[1-9]|3[01]))?|W([0-4]\\d|5[0-2])(-?[1-7])?|(00[1-9]|0[1-9]\\d|[12]\\d{2}|3([0-5]\\d|6[1-6])))([T\\s]((([01]\\d|2[0-3])((:?)[0-5]\\d)?|24\\:?00)([\\.,]\\d+(?!:))?)?(\\17[0-5]\\d([\\.,]\\d+)?)?([zZ]|([\\+-])([01]\\d|2[0-3]):?([0-5]\\d)?)?)?)?$", + "type": "string" + }, + "type": { + "description": "An optional string that specifies the type object. This is used to allow for searches for different kinds of objects.", + "type": "string" + }, + "used": { + "description": "The instrument used to collect the data used in the analysis", + "type": "string" + }, + "was_associated_with": { + "description": "the agent/entity associated with the generation of the file", + "type": "string" + }, + "was_informed_by": { + "type": "string" + } + }, + "required": [ + "execution_resource", + "git_url", + "has_input", + "has_output", + "started_at_time", + "ended_at_time", + "was_informed_by" + ], + "title": "MetabolomicsAnalysisActivity", + "type": "object" + }, + "MetagenomeAnnotationActivity": { + "additionalProperties": false, + "description": "A workflow execution activity that provides functional and structural annotation of assembled metagenome contigs", + "properties": { + "ended_at_time": { + "format": "date-time", + "pattern": "^([\\+-]?\\d{4}(?!\\d{2}\\b))((-?)((0[1-9]|1[0-2])(\\3([12]\\d|0[1-9]|3[01]))?|W([0-4]\\d|5[0-2])(-?[1-7])?|(00[1-9]|0[1-9]\\d|[12]\\d{2}|3([0-5]\\d|6[1-6])))([T\\s]((([01]\\d|2[0-3])((:?)[0-5]\\d)?|24\\:?00)([\\.,]\\d+(?!:))?)?(\\17[0-5]\\d([\\.,]\\d+)?)?([zZ]|([\\+-])([01]\\d|2[0-3]):?([0-5]\\d)?)?)?)?$", + "type": "string" + }, + "execution_resource": { + "description": "Example: NERSC-Cori", + "type": "string" + }, + "git_url": { + "description": "Example: https://github.com/microbiomedata/mg_annotation/releases/tag/0.1", + "type": "string" + }, + "gold_analysis_project_identifiers": { + "description": "identifiers for corresponding analysis project in GOLD", + "items": { + "type": "string" + }, + "pattern": "^GOLD:Ga[0-9]+$", + "type": "array" + }, + "has_input": { + "description": "An input to a process.", + "items": { + "type": "string" + }, + "type": "array" + }, + "has_output": { + "description": "An output biosample to a processing step", + "items": { + "type": "string" + }, + "type": "array" + }, + "id": { + "description": "A unique identifier for a thing. Must be either a CURIE shorthand for a URI or a complete URI", + "pattern": "^(nmdc):wfmgan-([0-9][a-z]{0,6}[0-9])-([A-Za-z0-9]{1,})(\\.[A-Za-z0-9]{1,})*(_[A-Za-z0-9_\\.-]+)?$", + "type": "string" + }, + "name": { + "description": "A human readable label for an entity", + "type": "string" + }, + "part_of": { + "description": "Links a resource to another resource that either logically or physically includes it.", + "items": { + "type": "string" + }, + "type": "array" + }, + "started_at_time": { + "format": "date-time", + "pattern": "^([\\+-]?\\d{4}(?!\\d{2}\\b))((-?)((0[1-9]|1[0-2])(\\3([12]\\d|0[1-9]|3[01]))?|W([0-4]\\d|5[0-2])(-?[1-7])?|(00[1-9]|0[1-9]\\d|[12]\\d{2}|3([0-5]\\d|6[1-6])))([T\\s]((([01]\\d|2[0-3])((:?)[0-5]\\d)?|24\\:?00)([\\.,]\\d+(?!:))?)?(\\17[0-5]\\d([\\.,]\\d+)?)?([zZ]|([\\+-])([01]\\d|2[0-3]):?([0-5]\\d)?)?)?)?$", + "type": "string" + }, + "type": { + "description": "An optional string that specifies the type object. This is used to allow for searches for different kinds of objects.", + "type": "string" + }, + "used": { + "type": "string" + }, + "was_associated_with": { + "description": "the agent/entity associated with the generation of the file", + "type": "string" + }, + "was_informed_by": { + "type": "string" + } + }, + "required": [ + "execution_resource", + "git_url", + "has_input", + "has_output", + "started_at_time", + "ended_at_time", + "was_informed_by" + ], + "title": "MetagenomeAnnotationActivity", + "type": "object" + }, + "MetagenomeAssembly": { + "additionalProperties": false, + "description": "A workflow execution activity that converts sequencing reads into an assembled metagenome.", + "properties": { + "asm_score": { + "description": "A score for comparing metagenomic assembly quality from same sample.", + "type": "number" + }, + "contig_bp": { + "description": "Total size in bp of all contigs.", + "type": "number" + }, + "contigs": { + "description": "The sum of the (length*log(length)) of all contigs, times some constant. Increase the contiguity, the score will increase", + "type": "number" + }, + "ctg_l50": { + "description": "Given a set of contigs, the L50 is defined as the sequence length of the shortest contig at 50% of the total genome length.", + "type": "number" + }, + "ctg_l90": { + "description": "The L90 statistic is less than or equal to the L50 statistic; it is the length for which the collection of all contigs of that length or longer contains at least 90% of the sum of the lengths of all contigs.", + "type": "number" + }, + "ctg_logsum": { + "description": "Maximum contig length.", + "type": "number" + }, + "ctg_max": { + "description": "Maximum contig length.", + "type": "number" + }, + "ctg_n50": { + "description": "Given a set of contigs, each with its own length, the N50 count is defined as the smallest number_of_contigs whose length sum makes up half of genome size.", + "type": "number" + }, + "ctg_n90": { + "description": "Given a set of contigs, each with its own length, the N90 count is defined as the smallest number of contigs whose length sum makes up 90% of genome size.", + "type": "number" + }, + "ctg_powsum": { + "description": "Powersum of all contigs is the same as logsum except that it uses the sum of (length*(length^P)) for some power P (default P=0.25).", + "type": "number" + }, + "ended_at_time": { + "format": "date-time", + "pattern": "^([\\+-]?\\d{4}(?!\\d{2}\\b))((-?)((0[1-9]|1[0-2])(\\3([12]\\d|0[1-9]|3[01]))?|W([0-4]\\d|5[0-2])(-?[1-7])?|(00[1-9]|0[1-9]\\d|[12]\\d{2}|3([0-5]\\d|6[1-6])))([T\\s]((([01]\\d|2[0-3])((:?)[0-5]\\d)?|24\\:?00)([\\.,]\\d+(?!:))?)?(\\17[0-5]\\d([\\.,]\\d+)?)?([zZ]|([\\+-])([01]\\d|2[0-3]):?([0-5]\\d)?)?)?)?$", + "type": "string" + }, + "execution_resource": { + "description": "Example: NERSC-Cori", + "type": "string" + }, + "gap_pct": { + "description": "The gap size percentage of all scaffolds.", + "type": "number" + }, + "gc_avg": { + "description": "Average of GC content of all contigs.", + "type": "number" + }, + "gc_std": { + "description": "Standard deviation of GC content of all contigs.", + "type": "number" + }, + "git_url": { + "description": "Example: https://github.com/microbiomedata/mg_annotation/releases/tag/0.1", + "type": "string" + }, + "has_input": { + "description": "An input to a process.", + "items": { + "type": "string" + }, + "type": "array" + }, + "has_output": { + "description": "An output biosample to a processing step", + "items": { + "type": "string" + }, + "type": "array" + }, + "id": { + "description": "A unique identifier for a thing. Must be either a CURIE shorthand for a URI or a complete URI", + "pattern": "^(nmdc):wfmgas-([0-9][a-z]{0,6}[0-9])-([A-Za-z0-9]{1,})(\\.[A-Za-z0-9]{1,})*(_[A-Za-z0-9_\\.-]+)?$", + "type": "string" + }, + "insdc_assembly_identifiers": { + "pattern": "^insdc.sra:[A-Z]+[0-9]+(\\.[0-9]+)?$", + "type": "string" + }, + "name": { + "description": "A human readable label for an entity", + "type": "string" + }, + "num_aligned_reads": { + "description": "The sequence count number of input reads aligned to assembled contigs.", + "type": "number" + }, + "num_input_reads": { + "description": "The sequence count number of input reads for assembly.", + "type": "number" + }, + "part_of": { + "description": "Links a resource to another resource that either logically or physically includes it.", + "items": { + "type": "string" + }, + "type": "array" + }, + "scaf_bp": { + "description": "Total size in bp of all scaffolds.", + "type": "number" + }, + "scaf_l50": { + "description": "Given a set of scaffolds, the L50 is defined as the sequence length of the shortest scaffold at 50% of the total genome length.", + "type": "number" + }, + "scaf_l90": { + "description": "The L90 statistic is less than or equal to the L50 statistic; it is the length for which the collection of all scaffolds of that length or longer contains at least 90% of the sum of the lengths of all scaffolds.", + "type": "number" + }, + "scaf_l_gt50k": { + "description": "Total size in bp of all scaffolds greater than 50 KB.", + "type": "number" + }, + "scaf_logsum": { + "description": "The sum of the (length*log(length)) of all scaffolds, times some constant. Increase the contiguity, the score will increase", + "type": "number" + }, + "scaf_max": { + "description": "Maximum scaffold length.", + "type": "number" + }, + "scaf_n50": { + "description": "Given a set of scaffolds, each with its own length, the N50 count is defined as the smallest number of scaffolds whose length sum makes up half of genome size.", + "type": "number" + }, + "scaf_n90": { + "description": "Given a set of scaffolds, each with its own length, the N90 count is defined as the smallest number of scaffolds whose length sum makes up 90% of genome size.", + "type": "number" + }, + "scaf_n_gt50k": { + "description": "Total sequence count of scaffolds greater than 50 KB.", + "type": "number" + }, + "scaf_pct_gt50k": { + "description": "Total sequence size percentage of scaffolds greater than 50 KB.", + "type": "number" + }, + "scaf_powsum": { + "description": "Powersum of all scaffolds is the same as logsum except that it uses the sum of (length*(length^P)) for some power P (default P=0.25).", + "type": "number" + }, + "scaffolds": { + "description": "Total sequence count of all scaffolds.", + "type": "number" + }, + "started_at_time": { + "format": "date-time", + "pattern": "^([\\+-]?\\d{4}(?!\\d{2}\\b))((-?)((0[1-9]|1[0-2])(\\3([12]\\d|0[1-9]|3[01]))?|W([0-4]\\d|5[0-2])(-?[1-7])?|(00[1-9]|0[1-9]\\d|[12]\\d{2}|3([0-5]\\d|6[1-6])))([T\\s]((([01]\\d|2[0-3])((:?)[0-5]\\d)?|24\\:?00)([\\.,]\\d+(?!:))?)?(\\17[0-5]\\d([\\.,]\\d+)?)?([zZ]|([\\+-])([01]\\d|2[0-3]):?([0-5]\\d)?)?)?)?$", + "type": "string" + }, + "type": { + "description": "An optional string that specifies the type object. This is used to allow for searches for different kinds of objects.", + "type": "string" + }, + "used": { + "type": "string" + }, + "was_associated_with": { + "description": "the agent/entity associated with the generation of the file", + "type": "string" + }, + "was_informed_by": { + "type": "string" + } + }, + "required": [ + "execution_resource", + "git_url", + "has_input", + "has_output", + "started_at_time", + "ended_at_time", + "was_informed_by" + ], + "title": "MetagenomeAssembly", + "type": "object" + }, + "MetaproteomicsAnalysisActivity": { + "additionalProperties": false, + "description": "", + "properties": { + "ended_at_time": { + "format": "date-time", + "pattern": "^([\\+-]?\\d{4}(?!\\d{2}\\b))((-?)((0[1-9]|1[0-2])(\\3([12]\\d|0[1-9]|3[01]))?|W([0-4]\\d|5[0-2])(-?[1-7])?|(00[1-9]|0[1-9]\\d|[12]\\d{2}|3([0-5]\\d|6[1-6])))([T\\s]((([01]\\d|2[0-3])((:?)[0-5]\\d)?|24\\:?00)([\\.,]\\d+(?!:))?)?(\\17[0-5]\\d([\\.,]\\d+)?)?([zZ]|([\\+-])([01]\\d|2[0-3]):?([0-5]\\d)?)?)?)?$", + "type": "string" + }, + "execution_resource": { + "description": "Example: NERSC-Cori", + "type": "string" + }, + "git_url": { + "description": "Example: https://github.com/microbiomedata/mg_annotation/releases/tag/0.1", + "type": "string" + }, + "has_input": { + "description": "An input to a process.", + "items": { + "type": "string" + }, + "type": "array" + }, + "has_output": { + "description": "An output biosample to a processing step", + "items": { + "type": "string" + }, + "type": "array" + }, + "id": { + "description": "A unique identifier for a thing. Must be either a CURIE shorthand for a URI or a complete URI", + "pattern": "^(nmdc):wfmp-([0-9][a-z]{0,6}[0-9])-([A-Za-z0-9]{1,})(\\.[A-Za-z0-9]{1,})*(_[A-Za-z0-9_\\.-]+)?$", + "type": "string" + }, + "name": { + "description": "A human readable label for an entity", + "type": "string" + }, + "part_of": { + "description": "Links a resource to another resource that either logically or physically includes it.", + "items": { + "type": "string" + }, + "type": "array" + }, + "started_at_time": { + "format": "date-time", + "pattern": "^([\\+-]?\\d{4}(?!\\d{2}\\b))((-?)((0[1-9]|1[0-2])(\\3([12]\\d|0[1-9]|3[01]))?|W([0-4]\\d|5[0-2])(-?[1-7])?|(00[1-9]|0[1-9]\\d|[12]\\d{2}|3([0-5]\\d|6[1-6])))([T\\s]((([01]\\d|2[0-3])((:?)[0-5]\\d)?|24\\:?00)([\\.,]\\d+(?!:))?)?(\\17[0-5]\\d([\\.,]\\d+)?)?([zZ]|([\\+-])([01]\\d|2[0-3]):?([0-5]\\d)?)?)?)?$", + "type": "string" + }, + "type": { + "description": "An optional string that specifies the type object. This is used to allow for searches for different kinds of objects.", + "type": "string" + }, + "used": { + "description": "The instrument used to collect the data used in the analysis", + "type": "string" + }, + "was_associated_with": { + "description": "the agent/entity associated with the generation of the file", + "type": "string" + }, + "was_informed_by": { + "type": "string" + } + }, + "required": [ + "execution_resource", + "git_url", + "has_input", + "has_output", + "started_at_time", + "ended_at_time", + "was_informed_by" + ], + "title": "MetaproteomicsAnalysisActivity", + "type": "object" + }, + "MetatranscriptomeActivity": { + "additionalProperties": false, + "description": "A metatranscriptome activity that e.g. pools assembly and annotation activity.", + "properties": { + "ended_at_time": { + "format": "date-time", + "pattern": "^([\\+-]?\\d{4}(?!\\d{2}\\b))((-?)((0[1-9]|1[0-2])(\\3([12]\\d|0[1-9]|3[01]))?|W([0-4]\\d|5[0-2])(-?[1-7])?|(00[1-9]|0[1-9]\\d|[12]\\d{2}|3([0-5]\\d|6[1-6])))([T\\s]((([01]\\d|2[0-3])((:?)[0-5]\\d)?|24\\:?00)([\\.,]\\d+(?!:))?)?(\\17[0-5]\\d([\\.,]\\d+)?)?([zZ]|([\\+-])([01]\\d|2[0-3]):?([0-5]\\d)?)?)?)?$", + "type": "string" + }, + "execution_resource": { + "description": "Example: NERSC-Cori", + "type": "string" + }, + "git_url": { + "description": "Example: https://github.com/microbiomedata/mg_annotation/releases/tag/0.1", + "type": "string" + }, + "has_input": { + "description": "An input to a process.", + "items": { + "type": "string" + }, + "type": "array" + }, + "has_output": { + "description": "An output biosample to a processing step", + "items": { + "type": "string" + }, + "type": "array" + }, + "id": { + "description": "A unique identifier for a thing. Must be either a CURIE shorthand for a URI or a complete URI", + "pattern": "^(nmdc):wfmt-([0-9][a-z]{0,6}[0-9])-([A-Za-z0-9]{1,})(\\.[A-Za-z0-9]{1,})*(_[A-Za-z0-9_\\.-]+)?$", + "type": "string" + }, + "name": { + "description": "A human readable label for an entity", + "type": "string" + }, + "part_of": { + "description": "Links a resource to another resource that either logically or physically includes it.", + "items": { + "type": "string" + }, + "type": "array" + }, + "started_at_time": { + "format": "date-time", + "pattern": "^([\\+-]?\\d{4}(?!\\d{2}\\b))((-?)((0[1-9]|1[0-2])(\\3([12]\\d|0[1-9]|3[01]))?|W([0-4]\\d|5[0-2])(-?[1-7])?|(00[1-9]|0[1-9]\\d|[12]\\d{2}|3([0-5]\\d|6[1-6])))([T\\s]((([01]\\d|2[0-3])((:?)[0-5]\\d)?|24\\:?00)([\\.,]\\d+(?!:))?)?(\\17[0-5]\\d([\\.,]\\d+)?)?([zZ]|([\\+-])([01]\\d|2[0-3]):?([0-5]\\d)?)?)?)?$", + "type": "string" + }, + "type": { + "description": "An optional string that specifies the type object. This is used to allow for searches for different kinds of objects.", + "type": "string" + }, + "used": { + "type": "string" + }, + "was_associated_with": { + "description": "the agent/entity associated with the generation of the file", + "type": "string" + }, + "was_informed_by": { + "type": "string" + } + }, + "required": [ + "execution_resource", + "git_url", + "has_input", + "has_output", + "started_at_time", + "ended_at_time", + "was_informed_by" + ], + "title": "MetatranscriptomeActivity", + "type": "object" + }, + "MetatranscriptomeAnnotationActivity": { + "additionalProperties": false, + "description": "", + "properties": { + "ended_at_time": { + "format": "date-time", + "pattern": "^([\\+-]?\\d{4}(?!\\d{2}\\b))((-?)((0[1-9]|1[0-2])(\\3([12]\\d|0[1-9]|3[01]))?|W([0-4]\\d|5[0-2])(-?[1-7])?|(00[1-9]|0[1-9]\\d|[12]\\d{2}|3([0-5]\\d|6[1-6])))([T\\s]((([01]\\d|2[0-3])((:?)[0-5]\\d)?|24\\:?00)([\\.,]\\d+(?!:))?)?(\\17[0-5]\\d([\\.,]\\d+)?)?([zZ]|([\\+-])([01]\\d|2[0-3]):?([0-5]\\d)?)?)?)?$", + "type": "string" + }, + "execution_resource": { + "description": "Example: NERSC-Cori", + "type": "string" + }, + "git_url": { + "description": "Example: https://github.com/microbiomedata/mg_annotation/releases/tag/0.1", + "type": "string" + }, + "gold_analysis_project_identifiers": { + "description": "identifiers for corresponding analysis project in GOLD", + "items": { + "type": "string" + }, + "pattern": "^GOLD:Ga[0-9]+$", + "type": "array" + }, + "has_input": { + "description": "An input to a process.", + "items": { + "type": "string" + }, + "type": "array" + }, + "has_output": { + "description": "An output biosample to a processing step", + "items": { + "type": "string" + }, + "type": "array" + }, + "id": { + "description": "A unique identifier for a thing. Must be either a CURIE shorthand for a URI or a complete URI", + "pattern": "^(nmdc):wfmtan-([0-9][a-z]{0,6}[0-9])-([A-Za-z0-9]{1,})(\\.[A-Za-z0-9]{1,})*(_[A-Za-z0-9_\\.-]+)?$", + "type": "string" + }, + "name": { + "description": "A human readable label for an entity", + "type": "string" + }, + "part_of": { + "description": "Links a resource to another resource that either logically or physically includes it.", + "items": { + "type": "string" + }, + "type": "array" + }, + "started_at_time": { + "format": "date-time", + "pattern": "^([\\+-]?\\d{4}(?!\\d{2}\\b))((-?)((0[1-9]|1[0-2])(\\3([12]\\d|0[1-9]|3[01]))?|W([0-4]\\d|5[0-2])(-?[1-7])?|(00[1-9]|0[1-9]\\d|[12]\\d{2}|3([0-5]\\d|6[1-6])))([T\\s]((([01]\\d|2[0-3])((:?)[0-5]\\d)?|24\\:?00)([\\.,]\\d+(?!:))?)?(\\17[0-5]\\d([\\.,]\\d+)?)?([zZ]|([\\+-])([01]\\d|2[0-3]):?([0-5]\\d)?)?)?)?$", + "type": "string" + }, + "type": { + "description": "An optional string that specifies the type object. This is used to allow for searches for different kinds of objects.", + "type": "string" + }, + "used": { + "type": "string" + }, + "was_associated_with": { + "description": "the agent/entity associated with the generation of the file", + "type": "string" + }, + "was_informed_by": { + "type": "string" + } + }, + "required": [ + "execution_resource", + "git_url", + "has_input", + "has_output", + "id", + "started_at_time", + "ended_at_time", + "was_informed_by" + ], + "title": "MetatranscriptomeAnnotationActivity", + "type": "object" + }, + "MetatranscriptomeAssembly": { + "additionalProperties": false, + "description": "", + "properties": { + "asm_score": { + "description": "A score for comparing metagenomic assembly quality from same sample.", + "type": "number" + }, + "contig_bp": { + "description": "Total size in bp of all contigs.", + "type": "number" + }, + "contigs": { + "description": "The sum of the (length*log(length)) of all contigs, times some constant. Increase the contiguity, the score will increase", + "type": "number" + }, + "ctg_l50": { + "description": "Given a set of contigs, the L50 is defined as the sequence length of the shortest contig at 50% of the total genome length.", + "type": "number" + }, + "ctg_l90": { + "description": "The L90 statistic is less than or equal to the L50 statistic; it is the length for which the collection of all contigs of that length or longer contains at least 90% of the sum of the lengths of all contigs.", + "type": "number" + }, + "ctg_logsum": { + "description": "Maximum contig length.", + "type": "number" + }, + "ctg_max": { + "description": "Maximum contig length.", + "type": "number" + }, + "ctg_n50": { + "description": "Given a set of contigs, each with its own length, the N50 count is defined as the smallest number_of_contigs whose length sum makes up half of genome size.", + "type": "number" + }, + "ctg_n90": { + "description": "Given a set of contigs, each with its own length, the N90 count is defined as the smallest number of contigs whose length sum makes up 90% of genome size.", + "type": "number" + }, + "ctg_powsum": { + "description": "Powersum of all contigs is the same as logsum except that it uses the sum of (length*(length^P)) for some power P (default P=0.25).", + "type": "number" + }, + "ended_at_time": { + "format": "date-time", + "pattern": "^([\\+-]?\\d{4}(?!\\d{2}\\b))((-?)((0[1-9]|1[0-2])(\\3([12]\\d|0[1-9]|3[01]))?|W([0-4]\\d|5[0-2])(-?[1-7])?|(00[1-9]|0[1-9]\\d|[12]\\d{2}|3([0-5]\\d|6[1-6])))([T\\s]((([01]\\d|2[0-3])((:?)[0-5]\\d)?|24\\:?00)([\\.,]\\d+(?!:))?)?(\\17[0-5]\\d([\\.,]\\d+)?)?([zZ]|([\\+-])([01]\\d|2[0-3]):?([0-5]\\d)?)?)?)?$", + "type": "string" + }, + "execution_resource": { + "description": "Example: NERSC-Cori", + "type": "string" + }, + "gap_pct": { + "description": "The gap size percentage of all scaffolds.", + "type": "number" + }, + "gc_avg": { + "description": "Average of GC content of all contigs.", + "type": "number" + }, + "gc_std": { + "description": "Standard deviation of GC content of all contigs.", + "type": "number" + }, + "git_url": { + "description": "Example: https://github.com/microbiomedata/mg_annotation/releases/tag/0.1", + "type": "string" + }, + "has_input": { + "description": "An input to a process.", + "items": { + "type": "string" + }, + "type": "array" + }, + "has_output": { + "description": "An output biosample to a processing step", + "items": { + "type": "string" + }, + "type": "array" + }, + "id": { + "description": "A unique identifier for a thing. Must be either a CURIE shorthand for a URI or a complete URI", + "pattern": "^(nmdc):wfmtas-([0-9][a-z]{0,6}[0-9])-([A-Za-z0-9]{1,})(\\.[A-Za-z0-9]{1,})*(_[A-Za-z0-9_\\.-]+)?$", + "type": "string" + }, + "insdc_assembly_identifiers": { + "pattern": "^insdc.sra:[A-Z]+[0-9]+(\\.[0-9]+)?$", + "type": "string" + }, + "name": { + "description": "A human readable label for an entity", + "type": "string" + }, + "num_aligned_reads": { + "description": "The sequence count number of input reads aligned to assembled contigs.", + "type": "number" + }, + "num_input_reads": { + "description": "The sequence count number of input reads for assembly.", + "type": "number" + }, + "part_of": { + "description": "Links a resource to another resource that either logically or physically includes it.", + "items": { + "type": "string" + }, + "type": "array" + }, + "scaf_bp": { + "description": "Total size in bp of all scaffolds.", + "type": "number" + }, + "scaf_l50": { + "description": "Given a set of scaffolds, the L50 is defined as the sequence length of the shortest scaffold at 50% of the total genome length.", + "type": "number" + }, + "scaf_l90": { + "description": "The L90 statistic is less than or equal to the L50 statistic; it is the length for which the collection of all scaffolds of that length or longer contains at least 90% of the sum of the lengths of all scaffolds.", + "type": "number" + }, + "scaf_l_gt50k": { + "description": "Total size in bp of all scaffolds greater than 50 KB.", + "type": "number" + }, + "scaf_logsum": { + "description": "The sum of the (length*log(length)) of all scaffolds, times some constant. Increase the contiguity, the score will increase", + "type": "number" + }, + "scaf_max": { + "description": "Maximum scaffold length.", + "type": "number" + }, + "scaf_n50": { + "description": "Given a set of scaffolds, each with its own length, the N50 count is defined as the smallest number of scaffolds whose length sum makes up half of genome size.", + "type": "number" + }, + "scaf_n90": { + "description": "Given a set of scaffolds, each with its own length, the N90 count is defined as the smallest number of scaffolds whose length sum makes up 90% of genome size.", + "type": "number" + }, + "scaf_n_gt50k": { + "description": "Total sequence count of scaffolds greater than 50 KB.", + "type": "number" + }, + "scaf_pct_gt50k": { + "description": "Total sequence size percentage of scaffolds greater than 50 KB.", + "type": "number" + }, + "scaf_powsum": { + "description": "Powersum of all scaffolds is the same as logsum except that it uses the sum of (length*(length^P)) for some power P (default P=0.25).", + "type": "number" + }, + "scaffolds": { + "description": "Total sequence count of all scaffolds.", + "type": "number" + }, + "started_at_time": { + "format": "date-time", + "pattern": "^([\\+-]?\\d{4}(?!\\d{2}\\b))((-?)((0[1-9]|1[0-2])(\\3([12]\\d|0[1-9]|3[01]))?|W([0-4]\\d|5[0-2])(-?[1-7])?|(00[1-9]|0[1-9]\\d|[12]\\d{2}|3([0-5]\\d|6[1-6])))([T\\s]((([01]\\d|2[0-3])((:?)[0-5]\\d)?|24\\:?00)([\\.,]\\d+(?!:))?)?(\\17[0-5]\\d([\\.,]\\d+)?)?([zZ]|([\\+-])([01]\\d|2[0-3]):?([0-5]\\d)?)?)?)?$", + "type": "string" + }, + "type": { + "description": "An optional string that specifies the type object. This is used to allow for searches for different kinds of objects.", + "type": "string" + }, + "used": { + "type": "string" + }, + "was_associated_with": { + "description": "the agent/entity associated with the generation of the file", + "type": "string" + }, + "was_informed_by": { + "type": "string" + } + }, + "required": [ + "execution_resource", + "git_url", + "has_input", + "has_output", + "id", + "started_at_time", + "ended_at_time", + "was_informed_by" + ], + "title": "MetatranscriptomeAssembly", + "type": "object" + }, + "NomAnalysisActivity": { + "additionalProperties": false, + "description": "", + "properties": { + "ended_at_time": { + "format": "date-time", + "pattern": "^([\\+-]?\\d{4}(?!\\d{2}\\b))((-?)((0[1-9]|1[0-2])(\\3([12]\\d|0[1-9]|3[01]))?|W([0-4]\\d|5[0-2])(-?[1-7])?|(00[1-9]|0[1-9]\\d|[12]\\d{2}|3([0-5]\\d|6[1-6])))([T\\s]((([01]\\d|2[0-3])((:?)[0-5]\\d)?|24\\:?00)([\\.,]\\d+(?!:))?)?(\\17[0-5]\\d([\\.,]\\d+)?)?([zZ]|([\\+-])([01]\\d|2[0-3]):?([0-5]\\d)?)?)?)?$", + "type": "string" + }, + "execution_resource": { + "description": "Example: NERSC-Cori", + "type": "string" + }, + "git_url": { + "description": "Example: https://github.com/microbiomedata/mg_annotation/releases/tag/0.1", + "type": "string" + }, + "has_input": { + "description": "An input to a process.", + "items": { + "type": "string" + }, + "type": "array" + }, + "has_output": { + "description": "An output biosample to a processing step", + "items": { + "type": "string" + }, + "type": "array" + }, + "id": { + "description": "A unique identifier for a thing. Must be either a CURIE shorthand for a URI or a complete URI", + "pattern": "^(nmdc):wfnom-([0-9][a-z]{0,6}[0-9])-([A-Za-z0-9]{1,})(\\.[A-Za-z0-9]{1,})*(_[A-Za-z0-9_\\.-]+)?$", + "type": "string" + }, + "name": { + "description": "A human readable label for an entity", + "type": "string" + }, + "part_of": { + "description": "Links a resource to another resource that either logically or physically includes it.", + "items": { + "type": "string" + }, + "type": "array" + }, + "started_at_time": { + "format": "date-time", + "pattern": "^([\\+-]?\\d{4}(?!\\d{2}\\b))((-?)((0[1-9]|1[0-2])(\\3([12]\\d|0[1-9]|3[01]))?|W([0-4]\\d|5[0-2])(-?[1-7])?|(00[1-9]|0[1-9]\\d|[12]\\d{2}|3([0-5]\\d|6[1-6])))([T\\s]((([01]\\d|2[0-3])((:?)[0-5]\\d)?|24\\:?00)([\\.,]\\d+(?!:))?)?(\\17[0-5]\\d([\\.,]\\d+)?)?([zZ]|([\\+-])([01]\\d|2[0-3]):?([0-5]\\d)?)?)?)?$", + "type": "string" + }, + "type": { + "description": "An optional string that specifies the type object. This is used to allow for searches for different kinds of objects.", + "type": "string" + }, + "used": { + "description": "The instrument used to collect the data used in the analysis", + "type": "string" + }, + "was_associated_with": { + "description": "the agent/entity associated with the generation of the file", + "type": "string" + }, + "was_informed_by": { + "type": "string" + } + }, + "required": [ + "execution_resource", + "git_url", + "has_input", + "has_output", + "started_at_time", + "ended_at_time", + "was_informed_by" + ], + "title": "NomAnalysisActivity", + "type": "object" + }, + "OccupDocumentEnum": { + "description": "", + "enum": [ + "automated count", + "estimate", + "manual count", + "videos" + ], + "title": "OccupDocumentEnum", + "type": "string" + }, + "OmicsProcessing": { + "additionalProperties": false, + "description": "The methods and processes used to generate omics data from a biosample or organism.", + "properties": { + "add_date": { + "description": "The date on which the information was added to the database.", + "type": "string" + }, + "alternative_identifiers": { + "description": "A list of alternative identifiers for the entity.", + "items": { + "type": "string" + }, + "type": "array" + }, + "chimera_check": { + "$ref": "#/$defs/TextValue", + "description": "Tool(s) used for chimera checking, including version number and parameters, to discover and remove chimeric sequences. A chimeric sequence is comprised of two or more phylogenetically distinct parent sequences." + }, + "description": { + "description": "a human-readable description of a thing", + "type": "string" + }, + "gold_sequencing_project_identifiers": { + "description": "identifiers for corresponding sequencing project in GOLD", + "items": { + "type": "string" + }, + "pattern": "^GOLD:Gp[0-9]+$", + "type": "array" + }, + "has_input": { + "description": "An input to a process.", + "items": { + "type": "string" + }, + "type": "array" + }, + "has_output": { + "description": "An output biosample to a processing step", + "items": { + "type": "string" + }, + "type": "array" + }, + "id": { + "description": "A unique identifier for a thing. Must be either a CURIE shorthand for a URI or a complete URI", + "pattern": "^(nmdc):omprc-([0-9][a-z]{0,6}[0-9])-([A-Za-z0-9]{1,})(\\.[A-Za-z0-9]{1,})*(_[A-Za-z0-9_\\.-]+)?$", + "type": "string" + }, + "insdc_experiment_identifiers": { + "items": { + "type": "string" + }, + "pattern": "^insdc.sra:(E|D|S)RX[0-9]{6,}$", + "type": "array" + }, + "instrument_name": { + "description": "The name of the instrument that was used for processing the sample.", + "type": "string" + }, + "mod_date": { + "description": "The last date on which the database information was modified.", + "type": "string" + }, + "name": { + "description": "A human readable label for an entity", + "type": "string" + }, + "ncbi_project_name": { + "type": "string" + }, + "nucl_acid_amp": { + "$ref": "#/$defs/TextValue", + "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the enzymatic amplification (PCR, TMA, NASBA) of specific nucleic acids" + }, + "nucl_acid_ext": { + "$ref": "#/$defs/TextValue", + "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the material separation to recover the nucleic acid fraction from a sample" + }, + "omics_type": { + "$ref": "#/$defs/ControlledTermValue", + "description": "The type of omics data" + }, + "part_of": { + "description": "Links a resource to another resource that either logically or physically includes it.", + "items": { + "type": "string" + }, + "type": "array" + }, + "pcr_cond": { + "$ref": "#/$defs/TextValue", + "description": "Description of reaction conditions and components of PCR in the form of 'initial denaturation:94degC_1.5min; annealing=...'" + }, + "pcr_primers": { + "$ref": "#/$defs/TextValue", + "description": "PCR primers that were used to amplify the sequence of the targeted gene, locus or subfragment. This field should contain all the primers used for a single PCR reaction if multiple forward or reverse primers are present in a single PCR reaction. The primer sequence should be reported in uppercase letters" + }, + "principal_investigator": { + "$ref": "#/$defs/PersonValue", + "description": "Principal Investigator who led the study and/or generated the dataset." + }, + "processing_institution": { + "$ref": "#/$defs/ProcessingInstitutionEnum", + "description": "The organization that processed the sample." + }, + "samp_vol_we_dna_ext": { + "$ref": "#/$defs/QuantityValue", + "description": "Volume (ml) or mass (g) of total collected sample processed for DNA extraction. Note: total sample collected should be entered under the term Sample Size (mixs:0000001)." + }, + "seq_meth": { + "$ref": "#/$defs/TextValue", + "description": "Sequencing machine used. Where possible the term should be taken from the OBI list of DNA sequencers (http://purl.obolibrary.org/obo/OBI_0400103)." + }, + "seq_quality_check": { + "$ref": "#/$defs/TextValue", + "description": "Indicate if the sequence has been called by automatic systems (none) or undergone a manual editing procedure (e.g. by inspecting the raw data or chromatograms). Applied only for sequences that are not submitted to SRA,ENA or DRA" + }, + "target_gene": { + "$ref": "#/$defs/TextValue", + "description": "Targeted gene or locus name for marker gene studies" + }, + "target_subfragment": { + "$ref": "#/$defs/TextValue", + "description": "Name of subfragment of a gene or locus. Important to e.g. identify special regions on marker genes like V6 on 16S rRNA" + }, + "type": { + "description": "An optional string that specifies the type object. This is used to allow for searches for different kinds of objects.", + "type": "string" + } + }, + "required": [ + "has_input" + ], + "title": "OmicsProcessing", + "type": "object" + }, + "OntologyClass": { + "additionalProperties": false, + "description": "", + "properties": { + "alternative_identifiers": { + "description": "A list of alternative identifiers for the entity.", + "items": { + "type": "string" + }, + "type": "array" + }, + "description": { + "description": "a human-readable description of a thing", + "type": "string" + }, + "id": { + "description": "A unique identifier for a thing. Must be either a CURIE shorthand for a URI or a complete URI", + "type": "string" + }, + "name": { + "description": "A human readable label for an entity", + "type": "string" + } + }, + "required": [ + "id" + ], + "title": "OntologyClass", + "type": "object" + }, + "OrganismCountEnum": { + "description": "", + "enum": [ + "ATP", + "MPN", + "other" + ], + "title": "OrganismCountEnum", + "type": "string" + }, + "OrthologyGroup": { + "additionalProperties": false, + "description": "A set of genes or gene products in which all members are orthologous", + "properties": { + "alternative_identifiers": { + "description": "A list of alternative identifiers for the entity.", + "items": { + "type": "string" + }, + "type": "array" + }, + "description": { + "description": "a human-readable description of a thing", + "type": "string" + }, + "id": { + "description": "A unique identifier for a thing. Must be either a CURIE shorthand for a URI or a complete URI", + "type": "string" + }, + "name": { + "description": "A human readable label for an entity", + "type": "string" + } + }, + "required": [ + "id" + ], + "title": "OrthologyGroup", + "type": "object" + }, + "OxyStatSampEnum": { + "description": "", + "enum": [ + "aerobic", + "anaerobic", + "other" + ], + "title": "OxyStatSampEnum", + "type": "string" + }, + "Pathway": { + "additionalProperties": false, + "description": "A pathway is a sequence of steps/reactions carried out by an organism or community of organisms", + "properties": { + "alternative_identifiers": { + "description": "A list of alternative identifiers for the entity.", + "items": { + "type": "string" + }, + "type": "array" + }, + "description": { + "description": "a human-readable description of a thing", + "type": "string" + }, + "id": { + "description": "A unique identifier for a thing. Must be either a CURIE shorthand for a URI or a complete URI", + "type": "string" + }, + "name": { + "description": "A human readable label for an entity", + "type": "string" + } + }, + "required": [ + "id" + ], + "title": "Pathway", + "type": "object" + }, + "PeptideQuantification": { + "additionalProperties": false, + "description": "This is used to link a metaproteomics analysis workflow to a specific peptide sequence and related information", + "title": "PeptideQuantification", + "type": "object" + }, + "Person": { + "additionalProperties": false, + "description": "represents a person, such as a researcher", + "properties": { + "alternative_identifiers": { + "description": "A list of alternative identifiers for the entity.", + "items": { + "type": "string" + }, + "type": "array" + }, + "description": { + "description": "a human-readable description of a thing", + "type": "string" + }, + "id": { + "description": "Should be an ORCID. Specify in CURIE format. E.g ORCID:0000-1111-...", + "type": "string" + }, + "name": { + "description": "A human readable label for an entity", + "type": "string" + } + }, + "required": [ + "id" + ], + "title": "Person", + "type": "object" + }, + "PersonValue": { + "additionalProperties": false, + "description": "An attribute value representing a person", + "properties": { + "email": { + "description": "An email address for an entity such as a person. This should be the primarly email address used.", + "type": "string" + }, + "has_raw_value": { + "description": "The full name of the Investigator in format FIRST LAST.", + "type": "string" + }, + "name": { + "description": "The full name of the Investigator. It should follow the format FIRST [MIDDLE NAME| MIDDLE INITIAL] LAST, where MIDDLE NAME| MIDDLE INITIAL is optional.", + "type": "string" + }, + "orcid": { + "description": "The ORCID of a person.", + "type": "string" + }, + "profile_image_url": { + "description": "A url that points to an image of a person.", + "type": "string" + }, + "was_generated_by": { + "type": "string" + }, + "websites": { + "description": "A list of websites that are associated with the entity.", + "items": { + "type": "string" + }, + "type": "array" + } + }, + "title": "PersonValue", + "type": "object" + }, + "PlantGrowthMedEnum": { + "description": "", + "enum": [ + "other artificial liquid medium", + "other artificial solid medium", + "peat moss", + "perlite", + "pumice", + "sand", + "soil", + "vermiculite", + "water" + ], + "title": "PlantGrowthMedEnum", + "type": "string" + }, + "PlantSexEnum": { + "description": "", + "enum": [ + "Androdioecious", + "Androecious", + "Androgynous", + "Androgynomonoecious", + "Andromonoecious", + "Bisexual", + "Dichogamous", + "Diclinous", + "Dioecious", + "Gynodioecious", + "Gynoecious", + "Gynomonoecious", + "Hermaphroditic", + "Imperfect", + "Monoclinous", + "Monoecious", + "Perfect", + "Polygamodioecious", + "Polygamomonoecious", + "Polygamous", + "Protandrous", + "Protogynous", + "Subandroecious", + "Subdioecious", + "Subgynoecious", + "Synoecious", + "Trimonoecious", + "Trioecious", + "Unisexual" + ], + "title": "PlantSexEnum", + "type": "string" + }, + "ProcessingInstitutionEnum": { + "description": "", + "enum": [ + "UCSD", + "JGI", + "EMSL" + ], + "title": "ProcessingInstitutionEnum", + "type": "string" + }, + "ProfilePositionEnum": { + "description": "", + "enum": [ + "summit", + "shoulder", + "backslope", + "footslope", + "toeslope" + ], + "title": "ProfilePositionEnum", + "type": "string" + }, + "ProteinQuantification": { + "additionalProperties": false, + "description": "This is used to link a metaproteomics analysis workflow to a specific protein", + "title": "ProteinQuantification", + "type": "object" + }, + "QuadPosEnum": { + "description": "", + "enum": [ + "North side", + "West side", + "South side", + "East side" + ], + "title": "QuadPosEnum", + "type": "string" + }, + "QuantityValue": { + "additionalProperties": false, + "description": "A simple quantity, e.g. 2cm", + "properties": { + "has_maximum_numeric_value": { + "description": "The maximum value part, expressed as number, of the quantity value when the value covers a range.", + "type": "number" + }, + "has_minimum_numeric_value": { + "description": "The minimum value part, expressed as number, of the quantity value when the value covers a range.", + "type": "number" + }, + "has_numeric_value": { + "description": "The number part of the quantity", + "type": "number" + }, + "has_raw_value": { + "description": "Unnormalized atomic string representation, should in syntax {number} {unit}", + "type": "string" + }, + "has_unit": { + "description": "The unit of the quantity", + "type": "string" + }, + "was_generated_by": { + "type": "string" + } + }, + "title": "QuantityValue", + "type": "object" + }, + "Reaction": { + "additionalProperties": false, + "description": "An individual biochemical transformation carried out by a functional unit of an organism, in which a collection of substrates are transformed into a collection of products. Can also represent transporters", + "properties": { + "alternative_identifiers": { + "description": "A list of alternative identifiers for the entity.", + "items": { + "type": "string" + }, + "type": "array" + }, + "description": { + "description": "a human-readable description of a thing", + "type": "string" + }, + "id": { + "description": "A unique identifier for a thing. Must be either a CURIE shorthand for a URI or a complete URI", + "type": "string" + }, + "name": { + "description": "A human readable label for an entity", + "type": "string" + } + }, + "required": [ + "id" + ], + "title": "Reaction", + "type": "object" + }, + "ReactionActivity": { + "additionalProperties": false, + "description": "", + "properties": { + "material_input": { + "type": "string" + }, + "material_output": { + "type": "string" + }, + "reaction_aided_by": { + "$ref": "#/$defs/LabDevice" + }, + "reaction_temperature": { + "type": "string" + }, + "reaction_time": { + "$ref": "#/$defs/QuantityValue" + } + }, + "title": "ReactionActivity", + "type": "object" + }, + "ReactionParticipant": { + "additionalProperties": false, + "description": "Instances of this link a reaction to a chemical entity participant", + "title": "ReactionParticipant", + "type": "object" + }, + "ReadBasedTaxonomyAnalysisActivity": { + "additionalProperties": false, + "description": "A workflow execution activity that performs taxonomy classification using sequencing reads", + "properties": { + "ended_at_time": { + "format": "date-time", + "pattern": "^([\\+-]?\\d{4}(?!\\d{2}\\b))((-?)((0[1-9]|1[0-2])(\\3([12]\\d|0[1-9]|3[01]))?|W([0-4]\\d|5[0-2])(-?[1-7])?|(00[1-9]|0[1-9]\\d|[12]\\d{2}|3([0-5]\\d|6[1-6])))([T\\s]((([01]\\d|2[0-3])((:?)[0-5]\\d)?|24\\:?00)([\\.,]\\d+(?!:))?)?(\\17[0-5]\\d([\\.,]\\d+)?)?([zZ]|([\\+-])([01]\\d|2[0-3]):?([0-5]\\d)?)?)?)?$", + "type": "string" + }, + "execution_resource": { + "description": "Example: NERSC-Cori", + "type": "string" + }, + "git_url": { + "description": "Example: https://github.com/microbiomedata/mg_annotation/releases/tag/0.1", + "type": "string" + }, + "has_input": { + "description": "An input to a process.", + "items": { + "type": "string" + }, + "type": "array" + }, + "has_output": { + "description": "An output biosample to a processing step", + "items": { + "type": "string" + }, + "type": "array" + }, + "id": { + "description": "A unique identifier for a thing. Must be either a CURIE shorthand for a URI or a complete URI", + "pattern": "^(nmdc):wfrbt-([0-9][a-z]{0,6}[0-9])-([A-Za-z0-9]{1,})(\\.[A-Za-z0-9]{1,})*(_[A-Za-z0-9_\\.-]+)?$", + "type": "string" + }, + "name": { + "description": "A human readable label for an entity", + "type": "string" + }, + "part_of": { + "description": "Links a resource to another resource that either logically or physically includes it.", + "items": { + "type": "string" + }, + "type": "array" + }, + "started_at_time": { + "format": "date-time", + "pattern": "^([\\+-]?\\d{4}(?!\\d{2}\\b))((-?)((0[1-9]|1[0-2])(\\3([12]\\d|0[1-9]|3[01]))?|W([0-4]\\d|5[0-2])(-?[1-7])?|(00[1-9]|0[1-9]\\d|[12]\\d{2}|3([0-5]\\d|6[1-6])))([T\\s]((([01]\\d|2[0-3])((:?)[0-5]\\d)?|24\\:?00)([\\.,]\\d+(?!:))?)?(\\17[0-5]\\d([\\.,]\\d+)?)?([zZ]|([\\+-])([01]\\d|2[0-3]):?([0-5]\\d)?)?)?)?$", + "type": "string" + }, + "type": { + "description": "An optional string that specifies the type object. This is used to allow for searches for different kinds of objects.", + "type": "string" + }, + "used": { + "type": "string" + }, + "was_associated_with": { + "description": "the agent/entity associated with the generation of the file", + "type": "string" + }, + "was_informed_by": { + "type": "string" + } + }, + "required": [ + "execution_resource", + "git_url", + "has_input", + "has_output", + "started_at_time", + "ended_at_time", + "was_informed_by" + ], + "title": "ReadBasedTaxonomyAnalysisActivity", + "type": "object" + }, + "ReadQcAnalysisActivity": { + "additionalProperties": false, + "description": "A workflow execution activity that performs quality control on raw Illumina reads including quality trimming, artifact removal, linker trimming, adapter trimming, spike-in removal, and human/cat/dog/mouse/microbe contaminant removal", + "properties": { + "ended_at_time": { + "format": "date-time", + "pattern": "^([\\+-]?\\d{4}(?!\\d{2}\\b))((-?)((0[1-9]|1[0-2])(\\3([12]\\d|0[1-9]|3[01]))?|W([0-4]\\d|5[0-2])(-?[1-7])?|(00[1-9]|0[1-9]\\d|[12]\\d{2}|3([0-5]\\d|6[1-6])))([T\\s]((([01]\\d|2[0-3])((:?)[0-5]\\d)?|24\\:?00)([\\.,]\\d+(?!:))?)?(\\17[0-5]\\d([\\.,]\\d+)?)?([zZ]|([\\+-])([01]\\d|2[0-3]):?([0-5]\\d)?)?)?)?$", + "type": "string" + }, + "execution_resource": { + "description": "Example: NERSC-Cori", + "type": "string" + }, + "git_url": { + "description": "Example: https://github.com/microbiomedata/mg_annotation/releases/tag/0.1", + "type": "string" + }, + "has_input": { + "description": "An input to a process.", + "items": { + "type": "string" + }, + "type": "array" + }, + "has_output": { + "description": "An output biosample to a processing step", + "items": { + "type": "string" + }, + "type": "array" + }, + "id": { + "description": "A unique identifier for a thing. Must be either a CURIE shorthand for a URI or a complete URI", + "pattern": "^(nmdc):wfrqc-([0-9][a-z]{0,6}[0-9])-([A-Za-z0-9]{1,})(\\.[A-Za-z0-9]{1,})*(_[A-Za-z0-9_\\.-]+)?$", + "type": "string" + }, + "input_base_count": { + "description": "The nucleotide base count number of input reads for QC analysis.", + "type": "number" + }, + "input_read_count": { + "description": "The sequence count number of input reads for QC analysis.", + "type": "number" + }, + "name": { + "description": "A human readable label for an entity", + "type": "string" + }, + "output_base_count": { + "description": "After QC analysis nucleotide base count number.", + "type": "number" + }, + "output_read_count": { + "description": "After QC analysis sequence count number.", + "type": "number" + }, + "part_of": { + "description": "Links a resource to another resource that either logically or physically includes it.", + "items": { + "type": "string" + }, + "type": "array" + }, + "started_at_time": { + "format": "date-time", + "pattern": "^([\\+-]?\\d{4}(?!\\d{2}\\b))((-?)((0[1-9]|1[0-2])(\\3([12]\\d|0[1-9]|3[01]))?|W([0-4]\\d|5[0-2])(-?[1-7])?|(00[1-9]|0[1-9]\\d|[12]\\d{2}|3([0-5]\\d|6[1-6])))([T\\s]((([01]\\d|2[0-3])((:?)[0-5]\\d)?|24\\:?00)([\\.,]\\d+(?!:))?)?(\\17[0-5]\\d([\\.,]\\d+)?)?([zZ]|([\\+-])([01]\\d|2[0-3]):?([0-5]\\d)?)?)?)?$", + "type": "string" + }, + "type": { + "description": "An optional string that specifies the type object. This is used to allow for searches for different kinds of objects.", + "type": "string" + }, + "used": { + "type": "string" + }, + "was_associated_with": { + "description": "the agent/entity associated with the generation of the file", + "type": "string" + }, + "was_informed_by": { + "type": "string" + } + }, + "required": [ + "execution_resource", + "git_url", + "has_input", + "has_output", + "started_at_time", + "ended_at_time", + "was_informed_by" + ], + "title": "ReadQcAnalysisActivity", + "type": "object" + }, + "RelSampLocEnum": { + "description": "", + "enum": [ + "edge of car", + "center of car", + "under a seat" + ], + "title": "RelSampLocEnum", + "type": "string" + }, + "RelToOxygenEnum": { + "description": "", + "enum": [ + "aerobe", + "anaerobe", + "facultative", + "microaerophilic", + "microanaerobe", + "obligate aerobe", + "obligate anaerobe" + ], + "title": "RelToOxygenEnum", + "type": "string" + }, + "RnaContTypeEnum": { + "description": "", + "enum": [ + "plate", + "tube" + ], + "title": "RnaContTypeEnum", + "type": "string" + }, + "RnaSampleFormatEnum": { + "description": "", + "enum": [ + "10 mM Tris-HCl", + "DNAStable", + "Ethanol", + "Low EDTA TE", + "MDA reaction buffer", + "PBS", + "Pellet", + "RNAStable", + "TE", + "Water", + "Gentegra-DNA", + "Gentegra-RNA" + ], + "title": "RnaSampleFormatEnum", + "type": "string" + }, + "RoomCondtEnum": { + "description": "", + "enum": [ + "new", + "visible wear", + "needs repair", + "damaged", + "rupture", + "visible signs of mold/mildew" + ], + "title": "RoomCondtEnum", + "type": "string" + }, + "RoomConnectedEnum": { + "description": "", + "enum": [ + "attic", + "bathroom", + "closet", + "conference room", + "elevator", + "examining room", + "hallway", + "kitchen", + "mail room", + "office", + "stairwell" + ], + "title": "RoomConnectedEnum", + "type": "string" + }, + "RoomLocEnum": { + "description": "", + "enum": [ + "corner room", + "interior room", + "exterior wall" + ], + "title": "RoomLocEnum", + "type": "string" + }, + "RoomSampPosEnum": { + "description": "", + "enum": [ + "north corner", + "south corner", + "west corner", + "east corner", + "northeast corner", + "northwest corner", + "southeast corner", + "southwest corner", + "center" + ], + "title": "RoomSampPosEnum", + "type": "string" + }, + "RoomTypeEnum": { + "description": "", + "enum": [ + "attic", + "bathroom", + "closet", + "conference room", + "elevator", + "examining room", + "hallway", + "kitchen", + "mail room", + "private office", + "open office", + "stairwell", + ",restroom", + "lobby", + "vestibule", + "mechanical or electrical room", + "data center", + "laboratory_wet", + "laboratory_dry", + "gymnasium", + "natatorium", + "auditorium", + "lockers", + "cafe", + "warehouse" + ], + "title": "RoomTypeEnum", + "type": "string" + }, + "SampCaptStatusEnum": { + "description": "", + "enum": [ + "active surveillance in response to an outbreak", + "active surveillance not initiated by an outbreak", + "farm sample", + "market sample", + "other" + ], + "title": "SampCaptStatusEnum", + "type": "string" + }, + "SampCollectPointEnum": { + "description": "", + "enum": [ + "well", + "test well", + "drilling rig", + "wellhead", + "separator", + "storage tank", + "other" + ], + "title": "SampCollectPointEnum", + "type": "string" + }, + "SampDisStageEnum": { + "description": "", + "enum": [ + "dissemination", + "growth and reproduction", + "infection", + "inoculation", + "penetration", + "other" + ], + "title": "SampDisStageEnum", + "type": "string" + }, + "SampFloorEnum": { + "description": "", + "enum": [ + "1st floor", + "2nd floor", + "basement", + "lobby" + ], + "title": "SampFloorEnum", + "type": "string" + }, + "SampMdEnum": { + "description": "", + "enum": [ + "DF", + "RT", + "KB", + "MSL", + "other" + ], + "title": "SampMdEnum", + "type": "string" + }, + "SampSubtypeEnum": { + "description": "", + "enum": [ + "oil phase", + "water phase", + "biofilm", + "not applicable", + "other" + ], + "title": "SampSubtypeEnum", + "type": "string" + }, + "SampWeatherEnum": { + "description": "", + "enum": [ + "clear sky", + "cloudy", + "foggy", + "hail", + "rain", + "snow", + "sleet", + "sunny", + "windy" + ], + "title": "SampWeatherEnum", + "type": "string" + }, + "SampleTypeEnum": { + "description": "", + "enum": [ + "soil", + "water_extract_soil" + ], + "title": "SampleTypeEnum", + "type": "string" + }, + "SamplingMethodEnum": { + "description": "", + "enum": [ + "weighing" + ], + "title": "SamplingMethodEnum", + "type": "string" + }, + "SeasonUseEnum": { + "description": "", + "enum": [ + "Spring", + "Summer", + "Fall", + "Winter" + ], + "title": "SeasonUseEnum", + "type": "string" + }, + "SedimentTypeEnum": { + "description": "", + "enum": [ + "biogenous", + "cosmogenous", + "hydrogenous", + "lithogenous" + ], + "title": "SedimentTypeEnum", + "type": "string" + }, + "ShadingDeviceCondEnum": { + "description": "", + "enum": [ + "damaged", + "needs repair", + "new", + "rupture", + "visible wear" + ], + "title": "ShadingDeviceCondEnum", + "type": "string" + }, + "ShadingDeviceTypeEnum": { + "description": "", + "enum": [ + "bahama shutters", + "exterior roll blind", + "gambrel awning", + "hood awning", + "porchroller awning", + "sarasota shutters", + "slatted aluminum", + "solid aluminum awning", + "sun screen", + "tree", + "trellis", + "venetian awning" + ], + "title": "ShadingDeviceTypeEnum", + "type": "string" + }, + "SoilHorizonEnum": { + "description": "", + "enum": [ + "O horizon", + "A horizon", + "E horizon", + "B horizon", + "C horizon", + "R layer", + "Permafrost" + ], + "title": "SoilHorizonEnum", + "type": "string" + }, + "SolventEnum": { + "description": "", + "enum": [ + "deionized_water", + "methanol", + "chloroform" + ], + "title": "SolventEnum", + "type": "string" + }, + "SpecificEnum": { + "description": "", + "enum": [ + "operation", + "as built", + "construction", + "bid", + "design", + "photos" + ], + "title": "SpecificEnum", + "type": "string" + }, + "SrDepEnvEnum": { + "description": "", + "enum": [ + "Lacustine", + "Fluvioldeltaic", + "Fluviomarine", + "Marine", + "other" + ], + "title": "SrDepEnvEnum", + "type": "string" + }, + "SrGeolAgeEnum": { + "description": "", + "enum": [ + "Archean", + "Cambrian", + "Carboniferous", + "Cenozoic", + "Cretaceous", + "Devonian", + "Jurassic", + "Mesozoic", + "Neogene", + "Ordovician", + "Paleogene", + "Paleozoic", + "Permian", + "Precambrian", + "Proterozoic", + "Silurian", + "Triassic", + "other" + ], + "title": "SrGeolAgeEnum", + "type": "string" + }, + "SrKerogTypeEnum": { + "description": "", + "enum": [ + "Type I", + "Type II", + "Type III", + "Type IV", + "other" + ], + "title": "SrKerogTypeEnum", + "type": "string" + }, + "SrLithologyEnum": { + "description": "", + "enum": [ + "Clastic", + "Carbonate", + "Coal", + "Biosilicieous", + "other" + ], + "title": "SrLithologyEnum", + "type": "string" + }, + "Study": { + "additionalProperties": false, + "description": "A study summarizes the overall goal of a research initiative and outlines the key objective of its underlying projects.", + "properties": { + "abstract": { + "description": "The abstract of manuscript/grant associated with the entity; i.e., a summary of the resource.", + "type": "string" + }, + "alternative_descriptions": { + "description": "A list of alternative descriptions for the entity. The distinction between description and alternative descriptions is application-specific.", + "items": { + "type": "string" + }, + "type": "array" + }, + "alternative_identifiers": { + "description": "Unique identifier for a study submitted to additional resources. Matches that which has been submitted to NMDC", + "items": { + "type": "string" + }, + "type": "array" + }, + "alternative_names": { + "description": "A list of alternative names used to refer to the entity. The distinction between name and alternative names is application-specific.", + "items": { + "type": "string" + }, + "type": "array" + }, + "alternative_titles": { + "description": "A list of alternative titles for the entity. The distinction between title and alternative titles is application-specific.", + "items": { + "type": "string" + }, + "type": "array" + }, + "description": { + "description": "A brief summary that details the study you're submitted to NMDC", + "type": "string" + }, + "doi": { + "$ref": "#/$defs/AttributeValue", + "description": "The dataset citation for this study" + }, + "ecosystem": { + "description": "An ecosystem is a combination of a physical environment (abiotic factors) and all the organisms (biotic factors) that interact with this environment. Ecosystem is in position 1/5 in a GOLD path.", + "type": "string" + }, + "ecosystem_category": { + "description": "Ecosystem categories represent divisions within the ecosystem based on specific characteristics of the environment from where an organism or sample is isolated. Ecosystem category is in position 2/5 in a GOLD path.", + "type": "string" + }, + "ecosystem_subtype": { + "description": "Ecosystem subtypes represent further subdivision of Ecosystem types into more distinct subtypes. Ecosystem subtype is in position 4/5 in a GOLD path.", + "type": "string" + }, + "ecosystem_type": { + "description": "Ecosystem types represent things having common characteristics within the Ecosystem Category. These common characteristics based grouping is still broad but specific to the characteristics of a given environment. Ecosystem type is in position 3/5 in a GOLD path.", + "type": "string" + }, + "emsl_proposal_doi": { + "description": "The DOI for the EMSL awarded study that relates to the NMDC submitted study", + "type": "string" + }, + "emsl_proposal_identifier": { + "description": "The proposal number assigned to the EMSL awarded study that relates to that which is represented in NMDC.", + "type": "string" + }, + "ess_dive_datasets": { + "description": "List of ESS-DIVE dataset DOIs", + "items": { + "type": "string" + }, + "type": "array" + }, + "funding_sources": { + "items": { + "type": "string" + }, + "type": "array" + }, + "gold_study_identifiers": { + "description": "identifiers for corresponding project(s) in GOLD", + "items": { + "type": "string" + }, + "pattern": "^GOLD:Gs[0-9]+$", + "type": "array" + }, + "has_credit_associations": { + "description": "This slot links a study to a credit association. The credit association will be linked to a person value and to a CRediT Contributor Roles term. Overall semantics: person should get credit X for their participation in the study", + "items": { + "$ref": "#/$defs/CreditAssociation" + }, + "type": "array" + }, + "id": { + "description": "A unique identifier for a thing. Must be either a CURIE shorthand for a URI or a complete URI", + "pattern": "^(nmdc):sty-([0-9][a-z]{0,6}[0-9])-([A-Za-z0-9]{1,})(\\.[A-Za-z0-9]{1,})*(_[A-Za-z0-9_\\.-]+)?$", + "type": "string" + }, + "mgnify_project_identifiers": { + "description": "identifiers for corresponding project in MGnify", + "pattern": "^mgnify.proj:[A-Z]+[0-9]+$", + "type": "string" + }, + "name": { + "description": "A human readable label for an entity", + "type": "string" + }, + "objective": { + "description": "The scientific objectives associated with the entity. It SHOULD correspond to scientific norms for objectives field in a structured abstract.", + "type": "string" + }, + "principal_investigator": { + "$ref": "#/$defs/PersonValue", + "description": "Principal Investigator who led the study and/or generated the dataset." + }, + "publications": { + "description": "A list of publications that are associated with the entity. The publications SHOULD be given using an identifier, such as a DOI or Pubmed ID, if possible.", + "items": { + "type": "string" + }, + "type": "array" + }, + "related_identifiers": { + "description": "Unique identifier for a study submitted to additional resources. Similar, but not necessarily identical to that which has been submitted to NMDC", + "type": "string" + }, + "relevant_protocols": { + "items": { + "type": "string" + }, + "type": "array" + }, + "specific_ecosystem": { + "description": "Specific ecosystems represent specific features of the environment like aphotic zone in an ocean or gastric mucosa within a host digestive system. Specific ecosystem is in position 5/5 in a GOLD path.", + "type": "string" + }, + "study_image": { + "description": "Links a study to one or more images.", + "items": { + "$ref": "#/$defs/ImageValue" + }, + "type": "array" + }, + "title": { + "description": "A name given to the entity that differs from the name/label programmatically assigned to it. For example, when extracting study information for GOLD, the GOLD system has assigned a name/label. However, for display purposes, we may also wish the capture the title of the proposal that was used to fund the study.", + "type": "string" + }, + "type": { + "description": "An optional string that specifies the type object. This is used to allow for searches for different kinds of objects.", + "type": "string" + }, + "websites": { + "description": "A list of websites that are associated with the entity.", + "items": { + "type": "string" + }, + "type": "array" + } + }, + "title": "Study", + "type": "object" + }, + "SubstructureTypeEnum": { + "description": "", + "enum": [ + "crawlspace", + "slab on grade", + "basement" + ], + "title": "SubstructureTypeEnum", + "type": "string" + }, + "SurfAirContEnum": { + "description": "", + "enum": [ + "dust", + "organic matter", + "particulate matter", + "volatile organic compounds", + "biological contaminants", + "radon", + "nutrients", + "biocides" + ], + "title": "SurfAirContEnum", + "type": "string" + }, + "SurfMaterialEnum": { + "description": "", + "enum": [ + "adobe", + "carpet", + "cinder blocks", + "concrete", + "hay bales", + "glass", + "metal", + "paint", + "plastic", + "stainless steel", + "stone", + "stucco", + "tile", + "vinyl", + "wood" + ], + "title": "SurfMaterialEnum", + "type": "string" + }, + "TextValue": { + "additionalProperties": false, + "description": "A basic string value", + "properties": { + "has_raw_value": { + "description": "The value that was specified for an annotation in raw form, i.e. a string. E.g. \"2 cm\" or \"2-4 cm\"", + "type": "string" + }, + "language": { + "description": "Should use ISO 639-1 code e.g. \"en\", \"fr\"", + "type": "string" + }, + "was_generated_by": { + "type": "string" + } + }, + "title": "TextValue", + "type": "object" + }, + "TidalStageEnum": { + "description": "", + "enum": [ + "low tide", + "ebb tide", + "flood tide", + "high tide" + ], + "title": "TidalStageEnum", + "type": "string" + }, + "TillageEnum": { + "description": "", + "enum": [ + "drill", + "cutting disc", + "ridge till", + "strip tillage", + "zonal tillage", + "chisel", + "tined", + "mouldboard", + "disc plough" + ], + "title": "TillageEnum", + "type": "string" + }, + "TimestampValue": { + "additionalProperties": false, + "description": "A value that is a timestamp. The range should be ISO-8601", + "properties": { + "has_raw_value": { + "description": "The value that was specified for an annotation in raw form, i.e. a string. E.g. \"2 cm\" or \"2-4 cm\"", + "type": "string" + }, + "was_generated_by": { + "type": "string" + } + }, + "title": "TimestampValue", + "type": "object" + }, + "TrainLineEnum": { + "description": "", + "enum": [ + "red", + "green", + "orange" + ], + "title": "TrainLineEnum", + "type": "string" + }, + "TrainStatLocEnum": { + "description": "", + "enum": [ + "south station above ground", + "south station underground", + "south station amtrak", + "forest hills", + "riverside" + ], + "title": "TrainStatLocEnum", + "type": "string" + }, + "TrainStopLocEnum": { + "description": "", + "enum": [ + "end", + "mid", + "downtown" + ], + "title": "TrainStopLocEnum", + "type": "string" + }, + "UrlValue": { + "additionalProperties": false, + "description": "A value that is a string that conforms to URL syntax", + "properties": { + "has_raw_value": { + "description": "The value that was specified for an annotation in raw form, i.e. a string. E.g. \"2 cm\" or \"2-4 cm\"", + "type": "string" + }, + "was_generated_by": { + "type": "string" + } + }, + "title": "UrlValue", + "type": "object" + }, + "VisMediaEnum": { + "description": "", + "enum": [ + "photos", + "videos", + "commonly of the building", + "site context (adjacent buildings, vegetation, terrain, streets)", + "interiors", + "equipment", + "3D scans" + ], + "title": "VisMediaEnum", + "type": "string" + }, + "WallConstTypeEnum": { + "description": "", + "enum": [ + "frame construction", + "joisted masonry", + "light noncombustible", + "masonry noncombustible", + "modified fire resistive", + "fire resistive" + ], + "title": "WallConstTypeEnum", + "type": "string" + }, + "WallFinishMatEnum": { + "description": "", + "enum": [ + "plaster", + "gypsum plaster", + "veneer plaster", + "gypsum board", + "tile", + "terrazzo", + "stone facing", + "acoustical treatment", + "wood", + "metal", + "masonry" + ], + "title": "WallFinishMatEnum", + "type": "string" + }, + "WallLocEnum": { + "description": "", + "enum": [ + "north", + "south", + "east", + "west" + ], + "title": "WallLocEnum", + "type": "string" + }, + "WallSurfTreatmentEnum": { + "description": "", + "enum": [ + "painted", + "wall paper", + "no treatment", + "paneling", + "stucco", + "fabric" + ], + "title": "WallSurfTreatmentEnum", + "type": "string" + }, + "WallTextureEnum": { + "description": "", + "enum": [ + "crows feet", + "crows-foot stomp", + "double skip", + "hawk and trowel", + "knockdown", + "popcorn", + "orange peel", + "rosebud stomp", + "Santa-Fe texture", + "skip trowel", + "smooth", + "stomp knockdown", + "swirl" + ], + "title": "WallTextureEnum", + "type": "string" + }, + "WaterFeatTypeEnum": { + "description": "", + "enum": [ + "fountain", + "pool", + "standing feature", + "stream", + "waterfall" + ], + "title": "WaterFeatTypeEnum", + "type": "string" + }, + "WeekdayEnum": { + "description": "", + "enum": [ + "Monday", + "Tuesday", + "Wednesday", + "Thursday", + "Friday", + "Saturday", + "Sunday" + ], + "title": "WeekdayEnum", + "type": "string" + }, + "WindowCondEnum": { + "description": "", + "enum": [ + "damaged", + "needs repair", + "new", + "rupture", + "visible wear" + ], + "title": "WindowCondEnum", + "type": "string" + }, + "WindowCoverEnum": { + "description": "", + "enum": [ + "blinds", + "curtains", + "none" + ], + "title": "WindowCoverEnum", + "type": "string" + }, + "WindowHorizPosEnum": { + "description": "", + "enum": [ + "left", + "middle", + "right" + ], + "title": "WindowHorizPosEnum", + "type": "string" + }, + "WindowLocEnum": { + "description": "", + "enum": [ + "north", + "south", + "east", + "west" + ], + "title": "WindowLocEnum", + "type": "string" + }, + "WindowMatEnum": { + "description": "", + "enum": [ + "clad", + "fiberglass", + "metal", + "vinyl", + "wood" + ], + "title": "WindowMatEnum", + "type": "string" + }, + "WindowTypeEnum": { + "description": "", + "enum": [ + "single-hung sash window", + "horizontal sash window", + "fixed window" + ], + "title": "WindowTypeEnum", + "type": "string" + }, + "WindowVertPosEnum": { + "description": "", + "enum": [ + "bottom", + "middle", + "top", + "low", + "high" + ], + "title": "WindowVertPosEnum", + "type": "string" + }, + "WorkflowExecutionActivity": { + "additionalProperties": false, + "description": "Represents an instance of an execution of a particular workflow", + "properties": { + "ended_at_time": { + "format": "date-time", + "pattern": "^([\\+-]?\\d{4}(?!\\d{2}\\b))((-?)((0[1-9]|1[0-2])(\\3([12]\\d|0[1-9]|3[01]))?|W([0-4]\\d|5[0-2])(-?[1-7])?|(00[1-9]|0[1-9]\\d|[12]\\d{2}|3([0-5]\\d|6[1-6])))([T\\s]((([01]\\d|2[0-3])((:?)[0-5]\\d)?|24\\:?00)([\\.,]\\d+(?!:))?)?(\\17[0-5]\\d([\\.,]\\d+)?)?([zZ]|([\\+-])([01]\\d|2[0-3]):?([0-5]\\d)?)?)?)?$", + "type": "string" + }, + "execution_resource": { + "description": "Example: NERSC-Cori", + "type": "string" + }, + "git_url": { + "description": "Example: https://github.com/microbiomedata/mg_annotation/releases/tag/0.1", + "type": "string" + }, + "has_input": { + "description": "An input to a process.", + "items": { + "type": "string" + }, + "type": "array" + }, + "has_output": { + "description": "An output biosample to a processing step", + "items": { + "type": "string" + }, + "type": "array" + }, + "id": { + "description": "A unique identifier for a thing. Must be either a CURIE shorthand for a URI or a complete URI", + "pattern": "^(nmdc):wf-([0-9][a-z]{0,6}[0-9])-([A-Za-z0-9]{1,})(\\.[A-Za-z0-9]{1,})*(_[A-Za-z0-9_\\.-]+)?$", + "type": "string" + }, + "name": { + "description": "A human readable label for an entity", + "type": "string" + }, + "part_of": { + "description": "Links a resource to another resource that either logically or physically includes it.", + "items": { + "type": "string" + }, + "type": "array" + }, + "started_at_time": { + "format": "date-time", + "pattern": "^([\\+-]?\\d{4}(?!\\d{2}\\b))((-?)((0[1-9]|1[0-2])(\\3([12]\\d|0[1-9]|3[01]))?|W([0-4]\\d|5[0-2])(-?[1-7])?|(00[1-9]|0[1-9]\\d|[12]\\d{2}|3([0-5]\\d|6[1-6])))([T\\s]((([01]\\d|2[0-3])((:?)[0-5]\\d)?|24\\:?00)([\\.,]\\d+(?!:))?)?(\\17[0-5]\\d([\\.,]\\d+)?)?([zZ]|([\\+-])([01]\\d|2[0-3]):?([0-5]\\d)?)?)?)?$", + "type": "string" + }, + "type": { + "description": "An optional string that specifies the type object. This is used to allow for searches for different kinds of objects.", + "type": "string" + }, + "used": { + "type": "string" + }, + "was_associated_with": { + "description": "the agent/entity associated with the generation of the file", + "type": "string" + }, + "was_informed_by": { + "type": "string" + } + }, + "required": [ + "execution_resource", + "git_url", + "has_input", + "has_output", + "started_at_time", + "ended_at_time", + "was_informed_by" + ], + "title": "WorkflowExecutionActivity", + "type": "object" + } + }, + "$id": "https://microbiomedata/schema", + "$schema": "http://json-schema.org/draft-07/schema#", + "additionalProperties": false, + "metamodel_version": "1.7.0", + "properties": { + "activity_set": { + "description": "This property links a database object to the set of workflow activities.", + "items": { + "$ref": "#/$defs/WorkflowExecutionActivity" + }, + "type": "array" + }, + "biosample_set": { + "description": "This property links a database object to the set of samples within it.", + "items": { + "$ref": "#/$defs/Biosample" + }, + "type": "array" + }, + "collecting_biosamples_from_site_set": { + "items": { + "$ref": "#/$defs/CollectingBiosamplesFromSite" + }, + "type": "array" + }, + "data_object_set": { + "description": "This property links a database object to the set of data objects within it.", + "items": { + "$ref": "#/$defs/DataObject" + }, + "type": "array" + }, + "dissolving_activity_set": { + "items": { + "$ref": "#/$defs/DissolvingActivity" + }, + "type": "array" + }, + "field_research_site_set": { + "items": { + "$ref": "#/$defs/FieldResearchSite" + }, + "type": "array" + }, + "functional_annotation_set": { + "description": "This property links a database object to the set of all functional annotations", + "items": { + "$ref": "#/$defs/FunctionalAnnotation" + }, + "type": "array" + }, + "genome_feature_set": { + "description": "This property links a database object to the set of all features", + "items": { + "$ref": "#/$defs/GenomeFeature" + }, + "type": "array" + }, + "mags_activity_set": { + "description": "This property links a database object to the set of MAGs analysis activities.", + "items": { + "$ref": "#/$defs/MagsAnalysisActivity" + }, + "type": "array" + }, + "material_sample_set": { + "items": { + "$ref": "#/$defs/MaterialSample" + }, + "type": "array" + }, + "material_sampling_activity_set": { + "items": { + "$ref": "#/$defs/MaterialSamplingActivity" + }, + "type": "array" + }, + "metabolomics_analysis_activity_set": { + "description": "This property links a database object to the set of metabolomics analysis activities.", + "items": { + "$ref": "#/$defs/MetabolomicsAnalysisActivity" + }, + "type": "array" + }, + "metagenome_annotation_activity_set": { + "description": "This property links a database object to the set of metagenome annotation activities.", + "items": { + "$ref": "#/$defs/MetagenomeAnnotationActivity" + }, + "type": "array" + }, + "metagenome_assembly_set": { + "description": "This property links a database object to the set of metagenome assembly activities.", + "items": { + "$ref": "#/$defs/MetagenomeAssembly" + }, + "type": "array" + }, + "metaproteomics_analysis_activity_set": { + "description": "This property links a database object to the set of metaproteomics analysis activities.", + "items": { + "$ref": "#/$defs/MetaproteomicsAnalysisActivity" + }, + "type": "array" + }, + "metatranscriptome_activity_set": { + "description": "TODO", + "items": { + "$ref": "#/$defs/MetatranscriptomeActivity" + }, + "type": "array" + }, + "nom_analysis_activity_set": { + "description": "This property links a database object to the set of natural organic matter (NOM) analysis activities.", + "items": { + "$ref": "#/$defs/NomAnalysisActivity" + }, + "type": "array" + }, + "omics_processing_set": { + "description": "This property links a database object to the set of omics processings within it.", + "items": { + "$ref": "#/$defs/OmicsProcessing" + }, + "type": "array" + }, + "reaction_activity_set": { + "items": { + "$ref": "#/$defs/ReactionActivity" + }, + "type": "array" + }, + "read_based_taxonomy_analysis_activity_set": { + "description": "This property links a database object to the set of read based analysis activities.", + "items": { + "$ref": "#/$defs/ReadBasedTaxonomyAnalysisActivity" + }, + "type": "array" + }, + "read_qc_analysis_activity_set": { + "description": "This property links a database object to the set of read QC analysis activities.", + "items": { + "$ref": "#/$defs/ReadQcAnalysisActivity" + }, + "type": "array" + }, + "study_set": { + "description": "This property links a database object to the set of studies within it.", + "items": { + "$ref": "#/$defs/Study" + }, + "type": "array" + } + }, + "title": "NMDC", + "type": "object", + "version": "7.0.0" +} diff --git a/nmdc_runtime/nmdc.schema.json b/nmdc_runtime/nmdc.schema.json new file mode 100644 index 00000000..3a18b598 --- /dev/null +++ b/nmdc_runtime/nmdc.schema.json @@ -0,0 +1,3629 @@ +{ + "$defs": { + "Activity": { + "additionalProperties": false, + "description": "a provence-generating activity", + "properties": { + "ended_at_time": { + "format": "date-time", + "pattern": "^([\\+-]?\\d{4}(?!\\d{2}\\b))((-?)((0[1-9]|1[0-2])(\\3([12]\\d|0[1-9]|3[01]))?|W([0-4]\\d|5[0-2])(-?[1-7])?|(00[1-9]|0[1-9]\\d|[12]\\d{2}|3([0-5]\\d|6[1-6])))([T\\s]((([01]\\d|2[0-3])((:?)[0-5]\\d)?|24\\:?00)([\\.,]\\d+(?!:))?)?(\\17[0-5]\\d([\\.,]\\d+)?)?([zZ]|([\\+-])([01]\\d|2[0-3]):?([0-5]\\d)?)?)?)?$", + "type": "string" + }, + "id": { + "description": "A unique identifier for a thing. Must be either a CURIE shorthand for a URI or a complete URI", + "type": "string" + }, + "name": { + "description": "A human readable label for an entity", + "type": "string" + }, + "started_at_time": { + "format": "date-time", + "pattern": "^([\\+-]?\\d{4}(?!\\d{2}\\b))((-?)((0[1-9]|1[0-2])(\\3([12]\\d|0[1-9]|3[01]))?|W([0-4]\\d|5[0-2])(-?[1-7])?|(00[1-9]|0[1-9]\\d|[12]\\d{2}|3([0-5]\\d|6[1-6])))([T\\s]((([01]\\d|2[0-3])((:?)[0-5]\\d)?|24\\:?00)([\\.,]\\d+(?!:))?)?(\\17[0-5]\\d([\\.,]\\d+)?)?([zZ]|([\\+-])([01]\\d|2[0-3]):?([0-5]\\d)?)?)?)?$", + "type": "string" + }, + "used": { + "type": "string" + }, + "was_associated_with": { + "$ref": "#/$defs/Agent" + }, + "was_informed_by": { + "type": "string" + } + }, + "required": [ + "id" + ], + "title": "Activity", + "type": "object" + }, + "Agent": { + "additionalProperties": false, + "description": "a provence-generating agent", + "properties": { + "acted_on_behalf_of": { + "$ref": "#/$defs/Agent" + }, + "was_informed_by": { + "type": "string" + } + }, + "required": [], + "title": "Agent", + "type": "object" + }, + "AttributeValue": { + "additionalProperties": false, + "description": "The value for any value of a attribute for a sample. This object can hold both the un-normalized atomic value and the structured value", + "properties": { + "has_raw_value": { + "description": "The value that was specified for an annotation in raw form, i.e. a string. E.g. \"2 cm\" or \"2-4 cm\"", + "type": "string" + }, + "type": { + "description": "An optional string that specified the type of object.", + "type": "string" + }, + "was_generated_by": { + "type": "string" + } + }, + "required": [], + "title": "AttributeValue", + "type": "object" + }, + "Biosample": { + "additionalProperties": false, + "description": "A material sample. It may be environmental (encompassing many organisms) or isolate or tissue. An environmental sample containing genetic material from multiple individuals is commonly referred to as a biosample.", + "properties": { + "GOLD_sample_identifiers": { + "description": "identifiers for corresponding sample in GOLD", + "items": { + "type": "string" + }, + "pattern": "^GOLD:Gb[0-9]+$", + "type": "array" + }, + "INSDC_biosample_identifiers": { + "description": "identifiers for corresponding sample in INSDC", + "items": { + "type": "string" + }, + "pattern": "^biosample:SAM[NED]([A-Z])?[0-9]+$", + "type": "array" + }, + "INSDC_secondary_sample_identifiers": { + "description": "secondary identifiers for corresponding sample in INSDC", + "items": { + "type": "string" + }, + "pattern": "^biosample:(E|D|S)RS[0-9]{6,}$", + "type": "array" + }, + "add_date": { + "description": "The date on which the information was added to the database.", + "type": "string" + }, + "agrochem_addition": { + "$ref": "#/$defs/QuantityValue", + "description": "Addition of fertilizers, pesticides, etc. - amount and time of applications" + }, + "al_sat": { + "$ref": "#/$defs/QuantityValue", + "description": "Aluminum saturation (esp. For tropical soils)", + "pattern": "\\d+[.\\d+] \\S+" + }, + "al_sat_meth": { + "$ref": "#/$defs/TextValue", + "description": "Reference or method used in determining Al saturation" + }, + "alkalinity": { + "$ref": "#/$defs/QuantityValue", + "description": "Alkalinity, the ability of a solution to neutralize acids to the equivalence point of carbonate or bicarbonate", + "pattern": "\\d+[.\\d+] \\S+" + }, + "alkalinity_method": { + "$ref": "#/$defs/TextValue", + "description": "Method used for alkalinity measurement" + }, + "alkyl_diethers": { + "$ref": "#/$defs/QuantityValue", + "description": "Concentration of alkyl diethers ", + "pattern": "\\d+[.\\d+] \\S+" + }, + "alt": { + "$ref": "#/$defs/QuantityValue", + "description": "Altitude is a term used to identify heights of objects such as airplanes, space shuttles, rockets, atmospheric balloons and heights of places such as atmospheric layers and clouds. It is used to measure the height of an object which is above the earthbs surface. In this context, the altitude measurement is the vertical distance between the earth's surface above sea level and the sampled position in the air", + "pattern": "\\d+[.\\d+] \\S+" + }, + "alternative_identifiers": { + "description": "A list of alternative identifiers for the entity.", + "items": { + "type": "string" + }, + "type": "array" + }, + "aminopept_act": { + "$ref": "#/$defs/QuantityValue", + "description": "Measurement of aminopeptidase activity", + "pattern": "\\d+[.\\d+] \\S+" + }, + "ammonium": { + "$ref": "#/$defs/QuantityValue", + "description": "Concentration of ammonium in the sample", + "pattern": "\\d+[.\\d+] \\S+" + }, + "annual_precpt": { + "$ref": "#/$defs/QuantityValue", + "description": "The average of all annual precipitation values known, or an estimated equivalent value derived by such methods as regional indexes or Isohyetal maps.", + "pattern": "\\d+[.\\d+] \\S+" + }, + "annual_temp": { + "$ref": "#/$defs/QuantityValue", + "description": "Mean annual temperature", + "pattern": "\\d+[.\\d+] \\S+" + }, + "bacteria_carb_prod": { + "$ref": "#/$defs/QuantityValue", + "description": "Measurement of bacterial carbon production", + "pattern": "\\d+[.\\d+] \\S+" + }, + "bishomohopanol": { + "$ref": "#/$defs/QuantityValue", + "description": "Concentration of bishomohopanol ", + "pattern": "\\d+[.\\d+] \\S+" + }, + "bromide": { + "$ref": "#/$defs/QuantityValue", + "description": "Concentration of bromide ", + "pattern": "\\d+[.\\d+] \\S+" + }, + "calcium": { + "$ref": "#/$defs/QuantityValue", + "description": "Concentration of calcium in the sample", + "pattern": "\\d+[.\\d+] \\S+" + }, + "carb_nitro_ratio": { + "$ref": "#/$defs/QuantityValue", + "description": "Ratio of amount or concentrations of carbon to nitrogen", + "pattern": "\\d+[.\\d+] \\S+" + }, + "chem_administration": { + "$ref": "#/$defs/ControlledTermValue", + "description": "List of chemical compounds administered to the host or site where sampling occurred, and when (e.g. Antibiotics, n fertilizer, air filter); can include multiple compounds. For chemical entities of biological interest ontology (chebi) (v 163), http://purl.bioontology.org/ontology/chebi" + }, + "chloride": { + "$ref": "#/$defs/QuantityValue", + "description": "Concentration of chloride in the sample", + "pattern": "\\d+[.\\d+] \\S+" + }, + "chlorophyll": { + "$ref": "#/$defs/QuantityValue", + "description": "Concentration of chlorophyll", + "pattern": "\\d+[.\\d+] \\S+" + }, + "collection_date": { + "$ref": "#/$defs/TimestampValue", + "description": "The time of sampling, either as an instance (single point in time) or interval. In case no exact time is available, the date/time can be right truncated i.e. all of these are valid times: 2008-01-23T19:23:10+00:00; 2008-01-23T19:23:10; 2008-01-23; 2008-01; 2008; Except: 2008-01; 2008 all are ISO8601 compliant" + }, + "community": { + "type": "string" + }, + "crop_rotation": { + "$ref": "#/$defs/TextValue", + "description": "Whether or not crop is rotated, and if yes, rotation schedule" + }, + "cur_land_use": { + "$ref": "#/$defs/TextValue", + "description": "Present state of sample site", + "pattern": "[cities|farmstead|industrial areas|roads\\/railroads|rock|sand|gravel|mudflats|salt flats|badlands|permanent snow or ice|saline seeps|mines\\/quarries|oil waste areas|small grains|row crops|vegetable crops|horticultural plants (e.g. tulips)|marshlands (grass,sedges,rushes)|tundra (mosses,lichens)|rangeland|pastureland (grasslands used for livestock grazing)|hayland|meadows (grasses,alfalfa,fescue,bromegrass,timothy)|shrub land (e.g. mesquite,sage\\-brush,creosote bush,shrub oak,eucalyptus)|successional shrub land (tree saplings,hazels,sumacs,chokecherry,shrub dogwoods,blackberries)|shrub crops (blueberries,nursery ornamentals,filberts)|vine crops (grapes)|conifers (e.g. pine,spruce,fir,cypress)|hardwoods (e.g. oak,hickory,elm,aspen)|intermixed hardwood and conifers|tropical (e.g. mangrove,palms)|rainforest (evergreen forest receiving >406 cm annual rainfall)|swamp (permanent or semi\\-permanent water body dominated by woody plants)|crop trees (nuts,fruit,christmas trees,nursery trees)]" + }, + "cur_vegetation": { + "$ref": "#/$defs/TextValue", + "description": "Vegetation classification from one or more standard classification systems, or agricultural crop" + }, + "cur_vegetation_meth": { + "$ref": "#/$defs/TextValue", + "description": "Reference or method used in vegetation classification " + }, + "density": { + "$ref": "#/$defs/QuantityValue", + "description": "Density of the sample, which is its mass per unit volume (aka volumetric mass density)", + "pattern": "\\d+[.\\d+] \\S+" + }, + "depth": { + "$ref": "#/$defs/QuantityValue", + "description": "Depth is defined as the vertical distance below local surface, e.g. For sediment or soil samples depth is measured from sediment or soil surface, respectively. Depth can be reported as an interval for subsurface samples" + }, + "depth2": { + "$ref": "#/$defs/QuantityValue" + }, + "description": { + "description": "a human-readable description of a thing", + "type": "string" + }, + "diss_carb_dioxide": { + "$ref": "#/$defs/QuantityValue", + "description": "Concentration of dissolved carbon dioxide in the sample or liquid portion of the sample", + "pattern": "\\d+[.\\d+] \\S+" + }, + "diss_hydrogen": { + "$ref": "#/$defs/QuantityValue", + "description": "Concentration of dissolved hydrogen", + "pattern": "\\d+[.\\d+] \\S+" + }, + "diss_inorg_carb": { + "$ref": "#/$defs/QuantityValue", + "description": "Dissolved inorganic carbon concentration in the sample, typically measured after filtering the sample using a 0.45 micrometer filter", + "pattern": "\\d+[.\\d+] \\S+" + }, + "diss_inorg_phosp": { + "$ref": "#/$defs/QuantityValue", + "description": "Concentration of dissolved inorganic phosphorus in the sample", + "pattern": "\\d+[.\\d+] \\S+" + }, + "diss_org_carb": { + "$ref": "#/$defs/QuantityValue", + "description": "Concentration of dissolved organic carbon in the sample, liquid portion of the sample, or aqueous phase of the fluid", + "pattern": "\\d+[.\\d+] \\S+" + }, + "diss_org_nitro": { + "$ref": "#/$defs/QuantityValue", + "description": "Dissolved organic nitrogen concentration measured as; total dissolved nitrogen - NH4 - NO3 - NO2", + "pattern": "\\d+[.\\d+] \\S+" + }, + "diss_oxygen": { + "$ref": "#/$defs/QuantityValue", + "description": "Concentration of dissolved oxygen", + "pattern": "\\d+[.\\d+] \\S+" + }, + "drainage_class": { + "$ref": "#/$defs/TextValue", + "description": "Drainage classification from a standard system such as the USDA system", + "pattern": "[very poorly|poorly|somewhat poorly|moderately well|well|excessively drained]" + }, + "ecosystem": { + "description": "An ecosystem is a combination of a physical environment (abiotic factors) and all the organisms (biotic factors) that interact with this environment. Ecosystem is in position 1/5 in a GOLD path.", + "type": "string" + }, + "ecosystem_category": { + "description": "Ecosystem categories represent divisions within the ecosystem based on specific characteristics of the environment from where an organism or sample is isolated. Ecosystem category is in position 2/5 in a GOLD path.", + "type": "string" + }, + "ecosystem_subtype": { + "description": "Ecosystem subtypes represent further subdivision of Ecosystem types into more distinct subtypes. Ecosystem subtype is in position 4/5 in a GOLD path.", + "type": "string" + }, + "ecosystem_type": { + "description": "Ecosystem types represent things having common characteristics within the Ecosystem Category. These common characteristics based grouping is still broad but specific to the characteristics of a given environment. Ecosystem type is in position 3/5 in a GOLD path.", + "type": "string" + }, + "elev": { + "$ref": "#/$defs/QuantityValue", + "description": "Elevation of the sampling site is its height above a fixed reference point, most commonly the mean sea level. Elevation is mainly used when referring to points on the earth's surface, while altitude is used for points above the surface, such as an aircraft in flight or a spacecraft in orbit", + "pattern": "\\d+[.\\d+] \\S+" + }, + "env_broad_scale": { + "$ref": "#/$defs/ControlledTermValue", + "description": "In this field, report which major environmental system your sample or specimen came from. The systems identified should have a coarse spatial grain, to provide the general environmental context of where the sampling was done (e.g. were you in the desert or a rainforest?). We recommend using subclasses of ENVOUs biome class: http://purl.obolibrary.org/obo/ENVO_00000428. Format (one term): termLabel [termID], Format (multiple terms): termLabel [termID]|termLabel [termID]|termLabel [termID]. Example: Annotating a water sample from the photic zone in middle of the Atlantic Ocean, consider: oceanic epipelagic zone biome [ENVO:01000033]. Example: Annotating a sample from the Amazon rainforest consider: tropical moist broadleaf forest biome [ENVO:01000228]. If needed, request new terms on the ENVO tracker, identified here: http://www.obofoundry.org/ontology/envo.html", + "pattern": ".* \\S+:\\S+" + }, + "env_local_scale": { + "$ref": "#/$defs/ControlledTermValue", + "description": "In this field, report the entity or entities which are in your sample or specimenUs local vicinity and which you believe have significant causal influences on your sample or specimen. Please use terms that are present in ENVO and which are of smaller spatial grain than your entry for env_broad_scale. Format (one term): termLabel [termID]; Format (multiple terms): termLabel [termID]|termLabel [termID]|termLabel [termID]. Example: Annotating a pooled sample taken from various vegetation layers in a forest consider: canopy [ENVO:00000047]|herb and fern layer [ENVO:01000337]|litter layer [ENVO:01000338]|understory [01000335]|shrub layer [ENVO:01000336]. If needed, request new terms on the ENVO tracker, identified here: http://www.obofoundry.org/ontology/envo.html", + "pattern": ".* \\S+:\\S+" + }, + "env_medium": { + "$ref": "#/$defs/ControlledTermValue", + "description": "In this field, report which environmental material or materials (pipe separated) immediately surrounded your sample or specimen prior to sampling, using one or more subclasses of ENVOUs environmental material class: http://purl.obolibrary.org/obo/ENVO_00010483. Format (one term): termLabel [termID]; Format (multiple terms): termLabel [termID]|termLabel [termID]|termLabel [termID]. Example: Annotating a fish swimming in the upper 100 m of the Atlantic Ocean, consider: ocean water [ENVO:00002151]. Example: Annotating a duck on a pond consider: pond water [ENVO:00002228]|air ENVO_00002005. If needed, request new terms on the ENVO tracker, identified here: http://www.obofoundry.org/ontology/envo.html", + "pattern": ".* \\S+:\\S+" + }, + "env_package": { + "$ref": "#/$defs/TextValue", + "description": "MIxS extension for reporting of measurements and observations obtained from one or more of the environments where the sample was obtained. All environmental packages listed here are further defined in separate subtables. By giving the name of the environmental package, a selection of fields can be made from the subtables and can be reported", + "pattern": "[air|built environment|host\\-associated|human\\-associated|human\\-skin|human\\-oral|human\\-gut|human\\-vaginal|hydrocarbon resources\\-cores|hydrocarbon resources\\-fluids\\/swabs|microbial mat\\/biofilm|misc environment|plant\\-associated|sediment|soil|wastewater\\/sludge|water]" + }, + "extreme_event": { + "$ref": "#/$defs/TimestampValue", + "description": "Unusual physical events that may have affected microbial populations" + }, + "fao_class": { + "$ref": "#/$defs/TextValue", + "description": "Soil classification from the FAO World Reference Database for Soil Resources. The list can be found at http://www.fao.org/nr/land/sols/soil/wrb-soil-maps/reference-groups", + "pattern": "[Acrisols|Andosols|Arenosols|Cambisols|Chernozems|Ferralsols|Fluvisols|Gleysols|Greyzems|Gypsisols|Histosols|Kastanozems|Lithosols|Luvisols|Nitosols|Phaeozems|Planosols|Podzols|Podzoluvisols|Rankers|Regosols|Rendzinas|Solonchaks|Solonetz|Vertisols|Yermosols]" + }, + "fire": { + "$ref": "#/$defs/TimestampValue", + "description": "Historical and/or physical evidence of fire" + }, + "flooding": { + "$ref": "#/$defs/TimestampValue", + "description": "Historical and/or physical evidence of flooding" + }, + "geo_loc_name": { + "$ref": "#/$defs/TextValue", + "description": "The geographical origin of the sample as defined by the country or sea name followed by specific region name. Country or sea names should be chosen from the INSDC country list (http://insdc.org/country.html), or the GAZ ontology (v 1.512) (http://purl.bioontology.org/ontology/GAZ)" + }, + "glucosidase_act": { + "$ref": "#/$defs/QuantityValue", + "description": "Measurement of glucosidase activity", + "pattern": "\\d+[.\\d+] \\S+" + }, + "habitat": { + "type": "string" + }, + "heavy_metals": { + "$ref": "#/$defs/QuantityValue", + "description": "Heavy metals present and concentrationsany drug used by subject and the frequency of usage; can include multiple heavy metals and concentrations" + }, + "heavy_metals_meth": { + "$ref": "#/$defs/TextValue", + "description": "Reference or method used in determining heavy metals" + }, + "horizon": { + "$ref": "#/$defs/TextValue", + "description": "Specific layer in the land area which measures parallel to the soil surface and possesses physical characteristics which differ from the layers above and beneath", + "pattern": "[O horizon|A horizon|E horizon|B horizon|C horizon|R layer|Permafrost]" + }, + "horizon_meth": { + "$ref": "#/$defs/TextValue", + "description": "Reference or method used in determining the horizon" + }, + "host_name": { + "type": "string" + }, + "id": { + "description": "A unique identifier for a thing. Must be either a CURIE shorthand for a URI or a complete URI", + "type": "string" + }, + "identifier": { + "type": "string" + }, + "lat_lon": { + "$ref": "#/$defs/GeolocationValue", + "description": "This is currently a required field but it's not clear if this should be required for human hosts", + "pattern": "\\d+[.\\d+] \\d+[.\\d+]" + }, + "link_addit_analys": { + "$ref": "#/$defs/TextValue", + "description": "Link to additional analysis results performed on the sample" + }, + "link_class_info": { + "$ref": "#/$defs/TextValue", + "description": "Link to digitized soil maps or other soil classification information" + }, + "link_climate_info": { + "$ref": "#/$defs/TextValue", + "description": "Link to climate resource" + }, + "local_class": { + "$ref": "#/$defs/TextValue", + "description": "Soil classification based on local soil classification system" + }, + "local_class_meth": { + "$ref": "#/$defs/TextValue", + "description": "Reference or method used in determining the local soil classification " + }, + "location": { + "type": "string" + }, + "magnesium": { + "$ref": "#/$defs/QuantityValue", + "description": "Concentration of magnesium in the sample", + "pattern": "\\d+[.\\d+] \\S+" + }, + "mean_frict_vel": { + "$ref": "#/$defs/QuantityValue", + "description": "Measurement of mean friction velocity", + "pattern": "\\d+[.\\d+] \\S+" + }, + "mean_peak_frict_vel": { + "$ref": "#/$defs/QuantityValue", + "description": "Measurement of mean peak friction velocity", + "pattern": "\\d+[.\\d+] \\S+" + }, + "microbial_biomass": { + "$ref": "#/$defs/QuantityValue", + "description": "The part of the organic matter in the soil that constitutes living microorganisms smaller than 5-10 micrometer. If you keep this, you would need to have correction factors used for conversion to the final units", + "pattern": "\\d+[.\\d+] \\S+" + }, + "microbial_biomass_meth": { + "$ref": "#/$defs/TextValue", + "description": "Reference or method used in determining microbial biomass" + }, + "misc_param": { + "$ref": "#/$defs/QuantityValue", + "description": "Any other measurement performed or parameter collected, that is not listed here" + }, + "mod_date": { + "description": "The last date on which the database information was modified.", + "type": "string" + }, + "n_alkanes": { + "$ref": "#/$defs/QuantityValue", + "description": "Concentration of n-alkanes; can include multiple n-alkanes" + }, + "name": { + "description": "A human readable label for an entity", + "type": "string" + }, + "ncbi_taxonomy_name": { + "type": "string" + }, + "nitrate": { + "$ref": "#/$defs/QuantityValue", + "description": "Concentration of nitrate in the sample", + "pattern": "\\d+[.\\d+] \\S+" + }, + "nitrite": { + "$ref": "#/$defs/QuantityValue", + "description": "Concentration of nitrite in the sample", + "pattern": "\\d+[.\\d+] \\S+" + }, + "org_matter": { + "$ref": "#/$defs/QuantityValue", + "description": "Concentration of organic matter ", + "pattern": "\\d+[.\\d+] \\S+" + }, + "org_nitro": { + "$ref": "#/$defs/QuantityValue", + "description": "Concentration of organic nitrogen", + "pattern": "\\d+[.\\d+] \\S+" + }, + "organism_count": { + "$ref": "#/$defs/QuantityValue", + "description": "Total cell count of any organism (or group of organisms) per gram, volume or area of sample, should include name of organism followed by count. The method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) Should also be provided. (example: total prokaryotes; 3.5e7 cells per ml; qpcr)" + }, + "oxy_stat_samp": { + "$ref": "#/$defs/TextValue", + "description": "Oxygenation status of sample", + "pattern": "[aerobic|anaerobic|other]" + }, + "part_of": { + "description": "Links a resource to another resource that either logically or physically includes it.", + "items": { + "type": "string" + }, + "type": "array" + }, + "part_org_carb": { + "$ref": "#/$defs/QuantityValue", + "description": "Concentration of particulate organic carbon", + "pattern": "\\d+[.\\d+] \\S+" + }, + "perturbation": { + "$ref": "#/$defs/TextValue", + "description": "Type of perturbation, e.g. chemical administration, physical disturbance, etc., coupled with perturbation regimen including how many times the perturbation was repeated, how long each perturbation lasted, and the start and end time of the entire perturbation period; can include multiple perturbation types" + }, + "petroleum_hydrocarb": { + "$ref": "#/$defs/QuantityValue", + "description": "Concentration of petroleum hydrocarbon", + "pattern": "\\d+[.\\d+] \\S+" + }, + "ph": { + "$ref": "#/$defs/QuantityValue", + "description": "Ph measurement of the sample, or liquid portion of sample, or aqueous phase of the fluid", + "pattern": "\\d+[.\\d+]" + }, + "ph_meth": { + "$ref": "#/$defs/TextValue", + "description": "Reference or method used in determining ph" + }, + "phaeopigments": { + "$ref": "#/$defs/QuantityValue", + "description": "Concentration of phaeopigments; can include multiple phaeopigments" + }, + "phosplipid_fatt_acid": { + "$ref": "#/$defs/QuantityValue", + "description": "Concentration of phospholipid fatty acids; can include multiple values" + }, + "pool_dna_extracts": { + "$ref": "#/$defs/TextValue", + "description": "Indicate whether multiple DNA extractions were mixed. If the answer yes, the number of extracts that were pooled should be given" + }, + "potassium": { + "$ref": "#/$defs/QuantityValue", + "description": "Concentration of potassium in the sample", + "pattern": "\\d+[.\\d+] \\S+" + }, + "pressure": { + "$ref": "#/$defs/QuantityValue", + "description": "Pressure to which the sample is subject to, in atmospheres", + "pattern": "\\d+[.\\d+] \\S+" + }, + "previous_land_use": { + "$ref": "#/$defs/TextValue", + "description": "Previous land use and dates" + }, + "previous_land_use_meth": { + "$ref": "#/$defs/TextValue", + "description": "Reference or method used in determining previous land use and dates" + }, + "profile_position": { + "$ref": "#/$defs/TextValue", + "description": "Cross-sectional position in the hillslope where sample was collected.sample area position in relation to surrounding areas", + "pattern": "[summit|shoulder|backslope|footslope|toeslope]" + }, + "proport_woa_temperature": { + "type": "string" + }, + "redox_potential": { + "$ref": "#/$defs/QuantityValue", + "description": "Redox potential, measured relative to a hydrogen cell, indicating oxidation or reduction potential", + "pattern": "\\d+[.\\d+] \\S+" + }, + "salinity": { + "$ref": "#/$defs/QuantityValue", + "description": "Salinity is the total concentration of all dissolved salts in a water sample. While salinity can be measured by a complete chemical analysis, this method is difficult and time consuming. More often, it is instead derived from the conductivity measurement. This is known as practical salinity. These derivations compare the specific conductance of the sample to a salinity standard such as seawater", + "pattern": "\\d+[.\\d+] \\S+" + }, + "salinity_category": { + "description": "Categorcial description of the sample's salinity. Examples: halophile, halotolerant, hypersaline, huryhaline", + "type": "string" + }, + "salinity_meth": { + "$ref": "#/$defs/TextValue", + "description": "Reference or method used in determining salinity" + }, + "samp_collect_device": { + "$ref": "#/$defs/TextValue", + "description": "The method or device employed for collecting the sample" + }, + "samp_mat_process": { + "$ref": "#/$defs/ControlledTermValue", + "description": "Any processing applied to the sample during or after retrieving the sample from environment. This field accepts OBI, for a browser of OBI (v 2018-02-12) terms please see http://purl.bioontology.org/ontology/OBI" + }, + "samp_store_dur": { + "$ref": "#/$defs/TextValue", + "description": "Duration for which the sample was stored" + }, + "samp_store_loc": { + "$ref": "#/$defs/TextValue", + "description": "Location at which sample was stored, usually name of a specific freezer/room" + }, + "samp_store_temp": { + "$ref": "#/$defs/QuantityValue", + "description": "Temperature at which sample was stored, e.g. -80 degree Celsius", + "pattern": "\\d+[.\\d+] \\S+" + }, + "samp_vol_we_dna_ext": { + "$ref": "#/$defs/QuantityValue", + "description": "Volume (ml), weight (g) of processed sample, or surface area swabbed from sample for DNA extraction", + "pattern": "\\d+[.\\d+] \\S+" + }, + "sample_collection_site": { + "type": "string" + }, + "season_precpt": { + "$ref": "#/$defs/QuantityValue", + "description": "The average of all seasonal precipitation values known, or an estimated equivalent value derived by such methods as regional indexes or Isohyetal maps.", + "pattern": "\\d+[.\\d+] \\S+" + }, + "season_temp": { + "$ref": "#/$defs/QuantityValue", + "description": "Mean seasonal temperature", + "pattern": "\\d+[.\\d+] \\S+" + }, + "sieving": { + "$ref": "#/$defs/QuantityValue", + "description": "Collection design of pooled samples and/or sieve size and amount of sample sieved " + }, + "size_frac_low": { + "$ref": "#/$defs/QuantityValue", + "description": "Refers to the mesh/pore size used to pre-filter/pre-sort the sample. Materials larger than the size threshold are excluded from the sample", + "pattern": "\\d+[.\\d+] \\S+" + }, + "size_frac_up": { + "$ref": "#/$defs/QuantityValue", + "description": "Refers to the mesh/pore size used to retain the sample. Materials smaller than the size threshold are excluded from the sample", + "pattern": "\\d+[.\\d+] \\S+" + }, + "slope_aspect": { + "$ref": "#/$defs/QuantityValue", + "description": "The direction a slope faces. While looking down a slope use a compass to record the direction you are facing (direction or degrees); e.g., nw or 315 degrees. This measure provides an indication of sun and wind exposure that will influence soil temperature and evapotranspiration.", + "pattern": "\\d+[.\\d+] \\S+" + }, + "slope_gradient": { + "$ref": "#/$defs/QuantityValue", + "description": "Commonly called 'slope'. The angle between ground surface and a horizontal line (in percent). This is the direction that overland water would flow. This measure is usually taken with a hand level meter or clinometer", + "pattern": "\\d+[.\\d+] \\S+" + }, + "sodium": { + "$ref": "#/$defs/QuantityValue", + "description": "Sodium concentration in the sample", + "pattern": "\\d+[.\\d+] \\S+" + }, + "soil_type": { + "$ref": "#/$defs/TextValue", + "description": "Soil series name or other lower-level classification" + }, + "soil_type_meth": { + "$ref": "#/$defs/TextValue", + "description": "Reference or method used in determining soil series name or other lower-level classification" + }, + "soluble_iron_micromol": { + "type": "string" + }, + "specific_ecosystem": { + "description": "Specific ecosystems represent specific features of the environment like aphotic zone in an ocean or gastric mucosa within a host digestive system. Specific ecosystem is in position 5/5 in a GOLD path.", + "type": "string" + }, + "store_cond": { + "$ref": "#/$defs/TextValue", + "description": "Explain how and for how long the soil sample was stored before DNA extraction" + }, + "subsurface_depth": { + "$ref": "#/$defs/QuantityValue" + }, + "subsurface_depth2": { + "$ref": "#/$defs/QuantityValue" + }, + "sulfate": { + "$ref": "#/$defs/QuantityValue", + "description": "Concentration of sulfate in the sample", + "pattern": "\\d+[.\\d+] \\S+" + }, + "sulfide": { + "$ref": "#/$defs/QuantityValue", + "description": "Concentration of sulfide in the sample", + "pattern": "\\d+[.\\d+] \\S+" + }, + "temp": { + "$ref": "#/$defs/QuantityValue", + "description": "Temperature of the sample at the time of sampling", + "pattern": "\\d+[.\\d+] \\S+" + }, + "texture": { + "$ref": "#/$defs/QuantityValue", + "description": "The relative proportion of different grain sizes of mineral particles in a soil, as described using a standard system; express as % sand (50 um to 2 mm), silt (2 um to 50 um), and clay (<2 um) with textural name (e.g., silty clay loam) optional.", + "pattern": "\\d+[.\\d+] \\S+" + }, + "texture_meth": { + "$ref": "#/$defs/TextValue", + "description": "Reference or method used in determining soil texture" + }, + "tidal_stage": { + "$ref": "#/$defs/TextValue", + "description": "Stage of tide", + "pattern": "[low tide|ebb tide|flood tide|high tide]" + }, + "tillage": { + "$ref": "#/$defs/TextValue", + "description": "Note method(s) used for tilling", + "pattern": "[drill|cutting disc|ridge till|strip tillage|zonal tillage|chisel|tined|mouldboard|disc plough]" + }, + "tot_carb": { + "$ref": "#/$defs/QuantityValue", + "description": "Total carbon content", + "pattern": "\\d+[.\\d+] \\S+" + }, + "tot_depth_water_col": { + "$ref": "#/$defs/QuantityValue", + "description": "Measurement of total depth of water column", + "pattern": "\\d+[.\\d+] \\S+" + }, + "tot_diss_nitro": { + "$ref": "#/$defs/QuantityValue", + "description": "Total dissolved nitrogen concentration, reported as nitrogen, measured by: total dissolved nitrogen = NH4 + NO3NO2 + dissolved organic nitrogen", + "pattern": "\\d+[.\\d+] \\S+" + }, + "tot_nitro_content": { + "$ref": "#/$defs/QuantityValue", + "description": "Total nitrogen content of the sample", + "pattern": "\\d+[.\\d+] \\S+" + }, + "tot_nitro_content_meth": { + "$ref": "#/$defs/TextValue", + "description": "Reference or method used in determining the total nitrogen" + }, + "tot_org_c_meth": { + "$ref": "#/$defs/TextValue", + "description": "Reference or method used in determining total organic carbon" + }, + "tot_org_carb": { + "$ref": "#/$defs/QuantityValue", + "description": "Definition for soil: total organic carbon content of the soil, definition otherwise: total organic carbon content", + "pattern": "\\d+[.\\d+] \\S+" + }, + "tot_phosp": { + "$ref": "#/$defs/QuantityValue", + "description": "Total phosphorus concentration in the sample, calculated by: total phosphorus = total dissolved phosphorus + particulate phosphorus", + "pattern": "\\d+[.\\d+] \\S+" + }, + "type": { + "description": "An optional string that specifies the type object. This is used to allow for searches for different kinds of objects.", + "type": "string" + }, + "water_content": { + "$ref": "#/$defs/QuantityValue", + "description": "Water content measurement", + "pattern": "\\d+[.\\d+] \\S+" + }, + "water_content_soil_meth": { + "$ref": "#/$defs/TextValue", + "description": "Reference or method used in determining the water content of soil" + } + }, + "required": [ + "id", + "part_of", + "env_broad_scale", + "env_local_scale", + "env_medium" + ], + "title": "Biosample", + "type": "object" + }, + "BiosampleProcessing": { + "additionalProperties": false, + "description": "A process that takes one or more biosamples as inputs and generates one or as outputs. Examples of outputs include samples cultivated from another sample or data objects created by instruments runs.", + "properties": { + "alternative_identifiers": { + "description": "A list of alternative identifiers for the entity.", + "items": { + "type": "string" + }, + "type": "array" + }, + "description": { + "description": "a human-readable description of a thing", + "type": "string" + }, + "has_input": { + "description": "An input to a process.", + "items": { + "type": "string" + }, + "type": "array" + }, + "id": { + "description": "A unique identifier for a thing. Must be either a CURIE shorthand for a URI or a complete URI", + "type": "string" + }, + "name": { + "description": "A human readable label for an entity", + "type": "string" + } + }, + "required": [ + "id" + ], + "title": "BiosampleProcessing", + "type": "object" + }, + "BooleanValue": { + "additionalProperties": false, + "description": "A value that is a boolean", + "properties": { + "has_boolean_value": { + "description": "Links a quantity value to a boolean", + "type": "boolean" + }, + "has_raw_value": { + "description": "The value that was specified for an annotation in raw form, i.e. a string. E.g. \"2 cm\" or \"2-4 cm\"", + "type": "string" + }, + "type": { + "description": "An optional string that specified the type of object.", + "type": "string" + }, + "was_generated_by": { + "type": "string" + } + }, + "required": [], + "title": "BooleanValue", + "type": "object" + }, + "ChemicalEntity": { + "additionalProperties": false, + "description": "An atom or molecule that can be represented with a chemical formula. Include lipids, glycans, natural products, drugs. There may be different terms for distinct acid-base forms, protonation states", + "properties": { + "alternative_identifiers": { + "description": "A list of alternative identifiers for the entity.", + "items": { + "type": "string" + }, + "type": "array" + }, + "chemical_formula": { + "description": "A generic grouping for miolecular formulae and empirican formulae", + "type": "string" + }, + "description": { + "description": "a human-readable description of a thing", + "type": "string" + }, + "id": { + "description": "A unique identifier for a thing. Must be either a CURIE shorthand for a URI or a complete URI", + "type": "string" + }, + "inchi": { + "type": "string" + }, + "inchi_key": { + "type": "string" + }, + "name": { + "description": "A human readable label for an entity", + "type": "string" + }, + "smiles": { + "description": "A string encoding of a molecular graph, no chiral or isotopic information. There are usually a large number of valid SMILES which represent a given structure. For example, CCO, OCC and C(O)C all specify the structure of ethanol.", + "items": { + "type": "string" + }, + "type": "array" + } + }, + "required": [ + "id" + ], + "title": "ChemicalEntity", + "type": "object" + }, + "ControlledTermValue": { + "additionalProperties": false, + "description": "A controlled term or class from an ontology", + "properties": { + "has_raw_value": { + "description": "The value that was specified for an annotation in raw form, i.e. a string. E.g. \"2 cm\" or \"2-4 cm\"", + "type": "string" + }, + "term": { + "$ref": "#/$defs/OntologyClass", + "description": "pointer to an ontology class" + }, + "type": { + "description": "An optional string that specified the type of object.", + "type": "string" + }, + "was_generated_by": { + "type": "string" + } + }, + "required": [], + "title": "ControlledTermValue", + "type": "object" + }, + "CreditAssociation": { + "additionalProperties": false, + "description": "This class supports binding associated researchers to studies. There will be at least a slot for a CRediT Contributor Role (https://casrai.org/credit/) and for a person value Specifically see the associated researchers tab on the NMDC_SampleMetadata-V4_CommentsForUpdates at https://docs.google.com/spreadsheets/d/1INlBo5eoqn2efn4H2P2i8rwRBtnbDVTqXrochJEAPko/edit#gid=0", + "properties": { + "applied_role": { + "$ref": "#/$defs/CreditEnum" + }, + "applied_roles": { + "items": { + "$ref": "#/$defs/CreditEnum" + }, + "type": "array" + }, + "applies_to_person": { + "$ref": "#/$defs/PersonValue" + }, + "type": { + "description": "An optional string that specifies the type object. This is used to allow for searches for different kinds of objects.", + "type": "string" + } + }, + "required": [ + "applies_to_person", + "applied_roles" + ], + "title": "CreditAssociation", + "type": "object" + }, + "CreditEnum": { + "description": "", + "enum": [ + "Conceptualization", + "Data curation", + "Formal Analysis", + "Funding acquisition", + "Investigation", + "Methodology", + "Project administration", + "Resources", + "Software", + "Supervision", + "Validation", + "Visualization", + "Writing original draft", + "Writing review and editing", + "Principal Investigator" + ], + "title": "CreditEnum", + "type": "string" + }, + "DataObject": { + "additionalProperties": false, + "description": "An object that primarily consists of symbols that represent information. Files, records, and omics data are examples of data objects.", + "properties": { + "alternative_identifiers": { + "description": "A list of alternative identifiers for the entity.", + "items": { + "type": "string" + }, + "type": "array" + }, + "compression_type": { + "description": "If provided, specifies the compression type", + "type": "string" + }, + "data_object_type": { + "$ref": "#/$defs/FileTypeEnum", + "description": "The type of file represented by the data object." + }, + "description": { + "description": "a human-readable description of a thing", + "type": "string" + }, + "file_size_bytes": { + "description": "Size of the file in bytes", + "type": "integer" + }, + "id": { + "description": "A unique identifier for a thing. Must be either a CURIE shorthand for a URI or a complete URI", + "type": "string" + }, + "md5_checksum": { + "description": "MD5 checksum of file (pre-compressed)", + "type": "string" + }, + "name": { + "description": "A human readable label for an entity", + "type": "string" + }, + "type": { + "description": "An optional string that specifies the type object. This is used to allow for searches for different kinds of objects.", + "type": "string" + }, + "url": { + "type": "string" + }, + "was_generated_by": { + "type": "string" + } + }, + "required": [ + "id", + "name", + "description" + ], + "title": "DataObject", + "type": "object" + }, + "Database": { + "additionalProperties": false, + "description": "An abstract holder for any set of metadata and data. It does not need to correspond to an actual managed databse top level holder class. When translated to JSON-Schema this is the 'root' object. It should contain pointers to other objects of interest", + "properties": { + "activity_set": { + "description": "This property links a database object to the set of workflow activities.", + "items": { + "$ref": "#/$defs/WorkflowExecutionActivity" + }, + "type": "array" + }, + "biosample_set": { + "description": "This property links a database object to the set of samples within it.", + "items": { + "$ref": "#/$defs/Biosample" + }, + "type": "array" + }, + "data_object_set": { + "description": "This property links a database object to the set of data objects within it.", + "items": { + "$ref": "#/$defs/DataObject" + }, + "type": "array" + }, + "date_created": { + "description": "TODO", + "type": "string" + }, + "etl_software_version": { + "description": "TODO", + "type": "string" + }, + "functional_annotation_set": { + "description": "This property links a database object to the set of all functional annotations", + "items": { + "$ref": "#/$defs/FunctionalAnnotation" + }, + "type": "array" + }, + "genome_feature_set": { + "description": "This property links a database object to the set of all features", + "items": { + "$ref": "#/$defs/GenomeFeature" + }, + "type": "array" + }, + "mags_activity_set": { + "description": "This property links a database object to the set of MAGs analysis activities.", + "items": { + "$ref": "#/$defs/MAGsAnalysisActivity" + }, + "type": "array" + }, + "metabolomics_analysis_activity_set": { + "description": "This property links a database object to the set of metabolomics analysis activities.", + "items": { + "$ref": "#/$defs/MetabolomicsAnalysisActivity" + }, + "type": "array" + }, + "metagenome_annotation_activity_set": { + "description": "This property links a database object to the set of metagenome annotation activities.", + "items": { + "$ref": "#/$defs/MetagenomeAnnotationActivity" + }, + "type": "array" + }, + "metagenome_assembly_set": { + "description": "This property links a database object to the set of metagenome assembly activities.", + "items": { + "$ref": "#/$defs/MetagenomeAssembly" + }, + "type": "array" + }, + "metaproteomics_analysis_activity_set": { + "description": "This property links a database object to the set of metaproteomics analysis activities.", + "items": { + "$ref": "#/$defs/MetaproteomicsAnalysisActivity" + }, + "type": "array" + }, + "metatranscriptome_activity_set": { + "description": "This property links a database object to the set of metatranscriptome analysis activities.", + "items": { + "$ref": "#/$defs/MetatranscriptomeActivity" + }, + "type": "array" + }, + "nmdc_schema_version": { + "description": "TODO", + "type": "string" + }, + "nom_analysis_activity_set": { + "description": "This property links a database object to the set of natural organic matter (NOM) analysis activities.", + "items": { + "$ref": "#/$defs/NomAnalysisActivity" + }, + "type": "array" + }, + "omics_processing_set": { + "description": "This property links a database object to the set of omics processings within it.", + "items": { + "$ref": "#/$defs/OmicsProcessing" + }, + "type": "array" + }, + "read_QC_analysis_activity_set": { + "description": "This property links a database object to the set of read QC analysis activities.", + "items": { + "$ref": "#/$defs/ReadQCAnalysisActivity" + }, + "type": "array" + }, + "read_based_analysis_activity_set": { + "description": "This property links a database object to the set of read based analysis activities.\n ", + "items": { + "$ref": "#/$defs/ReadBasedAnalysisActivity" + }, + "type": "array" + }, + "study_set": { + "description": "This property links a database object to the set of studies within it.", + "items": { + "$ref": "#/$defs/Study" + }, + "type": "array" + } + }, + "required": [], + "title": "Database", + "type": "object" + }, + "EnvironmentalMaterialTerm": { + "additionalProperties": false, + "description": "", + "properties": { + "alternative_identifiers": { + "description": "A list of alternative identifiers for the entity.", + "items": { + "type": "string" + }, + "type": "array" + }, + "description": { + "description": "a human-readable description of a thing", + "type": "string" + }, + "id": { + "description": "A unique identifier for a thing. Must be either a CURIE shorthand for a URI or a complete URI", + "type": "string" + }, + "name": { + "description": "A human readable label for an entity", + "type": "string" + } + }, + "required": [ + "id" + ], + "title": "EnvironmentalMaterialTerm", + "type": "object" + }, + "FileTypeEnum": { + "description": "", + "enum": [ + "FT ICR-MS Analysis Results", + "GC-MS Metabolomics Results", + "Metaproteomics Workflow Statistics", + "Protein Report", + "Peptide Report", + "Unfiltered Metaproteomics Results", + "Read Count and RPKM", + "QC non-rRNA R2", + "QC non-rRNA R1", + "Metagenome Bins", + "CheckM Statistics", + "GOTTCHA2 Krona Plot", + "Kraken2 Krona Plot", + "Centrifuge Krona Plot", + "Kraken2 Classification Report", + "Kraken2 Taxonomic Classification", + "Centrifuge Classification Report", + "Centrifuge Taxonomic Classification", + "Structural Annotation GFF", + "Functional Annotation GFF", + "Annotation Amino Acid FASTA", + "Annotation Enzyme Commission", + "Annotation KEGG Orthology", + "Assembly Coverage BAM", + "Assembly AGP", + "Assembly Scaffolds", + "Assembly Contigs", + "Assembly Coverage Stats", + "Filtered Sequencing Reads", + "QC Statistics", + "TIGRFam Annotation GFF", + "Clusters of Orthologous Groups (COG) Annotation GFF", + "CATH FunFams (Functional Families) Annotation GFF", + "SUPERFam Annotation GFF", + "SMART Annotation GFF", + "Pfam Annotation GFF", + "Direct Infusion FT ICR-MS Raw Data" + ], + "title": "FileTypeEnum", + "type": "string" + }, + "FunctionalAnnotation": { + "additionalProperties": false, + "description": "An assignment of a function term (e.g. reaction or pathway) that is executed by a gene product, or which the gene product plays an active role in. Functional annotations can be assigned manually by curators, or automatically in workflows. In the context of NMDC, all function annotation is performed automatically, typically using HMM or Blast type methods", + "properties": { + "has_function": { + "pattern": "^(KEGG.PATHWAY:\\w{2,4}\\d{5}|KEGG.REACTION:R\\d+|RHEA:\\d{5}|MetaCyc:[A-Za-z0-9+_.%-:]+|EC:\\d{1,2}(\\.\\d{0,3}){0,3}|GO:\\d{7}|MetaNetX:(MNXR\\d+|EMPTY)|SEED:\\w+|KEGG\\.ORTHOLOGY:K\\d+|EGGNOG:\\w+|PFAM:PF\\d{5}|TIGRFAM:TIGR\\d+|SUPFAM:\\w+|CATH:[1-6]\\.[0-9]+\\.[0-9]+\\.[0-9]+|PANTHER.FAMILY:PTHR\\d{5}(\\:SF\\d{1,3})?)$", + "type": "string" + }, + "subject": { + "type": "string" + }, + "type": { + "description": "TODO", + "type": "string" + }, + "was_generated_by": { + "description": "provenance for the annotation.", + "type": "string" + } + }, + "required": [], + "title": "FunctionalAnnotation", + "type": "object" + }, + "GeneProduct": { + "additionalProperties": false, + "description": "A molecule encoded by a gene that has an evolved function", + "properties": { + "alternative_identifiers": { + "description": "A list of alternative identifiers for the entity.", + "items": { + "type": "string" + }, + "type": "array" + }, + "description": { + "description": "a human-readable description of a thing", + "type": "string" + }, + "id": { + "description": "A unique identifier for a thing. Must be either a CURIE shorthand for a URI or a complete URI", + "type": "string" + }, + "name": { + "description": "A human readable label for an entity", + "type": "string" + } + }, + "required": [ + "id" + ], + "title": "GeneProduct", + "type": "object" + }, + "GenomeFeature": { + "additionalProperties": false, + "description": "A feature localized to an interval along a genome", + "properties": { + "encodes": { + "description": "The gene product encoded by this feature. Typically this is used for a CDS feature or gene feature which will encode a protein. It can also be used by a nc transcript ot gene feature that encoded a ncRNA", + "type": "string" + }, + "end": { + "description": "The end of the feature in positive 1-based integer coordinates", + "minimum": 1, + "type": "integer" + }, + "feature_type": { + "description": "TODO: Yuri to write", + "type": "string" + }, + "phase": { + "description": "The phase for a coding sequence entity. For example, phase of a CDS as represented in a GFF3 with a value of 0, 1 or 2.", + "maximum": 0, + "minimum": 0, + "type": "integer" + }, + "seqid": { + "description": "The ID of the landmark used to establish the coordinate system for the current feature.", + "type": "string" + }, + "start": { + "description": "The start of the feature in positive 1-based integer coordinates", + "minimum": 1, + "type": "integer" + }, + "strand": { + "description": "The strand on which a feature is located. Has a value of '+' (sense strand or forward strand) or '-' (anti-sense strand or reverse strand).", + "type": "string" + }, + "type": { + "description": "A type from the sequence ontology", + "type": "string" + } + }, + "required": [ + "seqid", + "start", + "end" + ], + "title": "GenomeFeature", + "type": "object" + }, + "GeolocationValue": { + "additionalProperties": false, + "description": "A normalized value for a location on the earth's surface", + "properties": { + "has_raw_value": { + "description": "The raw value for a geolocation should follow {lat} {long}", + "type": "string" + }, + "latitude": { + "description": "latitude", + "type": "number" + }, + "longitude": { + "description": "longitude", + "type": "number" + }, + "type": { + "description": "An optional string that specified the type of object.", + "type": "string" + }, + "was_generated_by": { + "type": "string" + } + }, + "required": [], + "title": "GeolocationValue", + "type": "object" + }, + "ImageValue": { + "additionalProperties": false, + "description": "An attribute value representing an image.", + "properties": { + "description": { + "description": "a human-readable description of a thing", + "type": "string" + }, + "display_order": { + "description": "When rendering information, this attribute to specify the order in which the information should be rendered.", + "type": "string" + }, + "has_raw_value": { + "description": "The value that was specified for an annotation in raw form, i.e. a string. E.g. \"2 cm\" or \"2-4 cm\"", + "type": "string" + }, + "type": { + "description": "An optional string that specified the type of object.", + "type": "string" + }, + "url": { + "type": "string" + }, + "was_generated_by": { + "type": "string" + } + }, + "required": [], + "title": "ImageValue", + "type": "object" + }, + "Instrument": { + "additionalProperties": false, + "description": "A material entity that is designed to perform a function in a scientific investigation, but is not a reagent[OBI].", + "properties": { + "alternative_identifiers": { + "description": "A list of alternative identifiers for the entity.", + "items": { + "type": "string" + }, + "type": "array" + }, + "description": { + "description": "a human-readable description of a thing", + "type": "string" + }, + "id": { + "description": "A unique identifier for a thing. Must be either a CURIE shorthand for a URI or a complete URI", + "type": "string" + }, + "name": { + "description": "A human readable label for an entity", + "type": "string" + } + }, + "required": [ + "id" + ], + "title": "Instrument", + "type": "object" + }, + "IntegerValue": { + "additionalProperties": false, + "description": "A value that is an integer", + "properties": { + "has_numeric_value": { + "description": "Links a quantity value to a number", + "type": "number" + }, + "has_raw_value": { + "description": "The value that was specified for an annotation in raw form, i.e. a string. E.g. \"2 cm\" or \"2-4 cm\"", + "type": "string" + }, + "type": { + "description": "An optional string that specified the type of object.", + "type": "string" + }, + "was_generated_by": { + "type": "string" + } + }, + "required": [], + "title": "IntegerValue", + "type": "object" + }, + "MAGBin": { + "additionalProperties": false, + "description": "", + "properties": { + "bin_name": { + "type": "string" + }, + "bin_quality": { + "type": "string" + }, + "completeness": { + "type": "number" + }, + "contamination": { + "type": "number" + }, + "gene_count": { + "type": "integer" + }, + "gtdbtk_class": { + "type": "string" + }, + "gtdbtk_domain": { + "type": "string" + }, + "gtdbtk_family": { + "type": "string" + }, + "gtdbtk_genus": { + "type": "string" + }, + "gtdbtk_order": { + "type": "string" + }, + "gtdbtk_phylum": { + "type": "string" + }, + "gtdbtk_species": { + "type": "string" + }, + "num_16s": { + "type": "integer" + }, + "num_23s": { + "type": "integer" + }, + "num_5s": { + "type": "integer" + }, + "num_tRNA": { + "type": "integer" + }, + "number_of_contig": { + "type": "integer" + }, + "type": { + "description": "An optional string that specifies the type object. This is used to allow for searches for different kinds of objects.", + "type": "string" + } + }, + "required": [], + "title": "MAGBin", + "type": "object" + }, + "MAGsAnalysisActivity": { + "additionalProperties": false, + "description": "", + "properties": { + "binned_contig_num": { + "type": "integer" + }, + "ended_at_time": { + "format": "date-time", + "pattern": "^([\\+-]?\\d{4}(?!\\d{2}\\b))((-?)((0[1-9]|1[0-2])(\\3([12]\\d|0[1-9]|3[01]))?|W([0-4]\\d|5[0-2])(-?[1-7])?|(00[1-9]|0[1-9]\\d|[12]\\d{2}|3([0-5]\\d|6[1-6])))([T\\s]((([01]\\d|2[0-3])((:?)[0-5]\\d)?|24\\:?00)([\\.,]\\d+(?!:))?)?(\\17[0-5]\\d([\\.,]\\d+)?)?([zZ]|([\\+-])([01]\\d|2[0-3]):?([0-5]\\d)?)?)?)?$", + "type": "string" + }, + "execution_resource": { + "description": "Example: NERSC-Cori", + "type": "string" + }, + "git_url": { + "description": "Example: https://github.com/microbiomedata/mg_annotation/releases/tag/0.1", + "type": "string" + }, + "has_input": { + "description": "An input to a process.", + "items": { + "type": "string" + }, + "type": "array" + }, + "has_output": { + "description": "An output biosample to a processing step", + "items": { + "type": "string" + }, + "type": "array" + }, + "id": { + "description": "A unique identifier for a thing. Must be either a CURIE shorthand for a URI or a complete URI", + "type": "string" + }, + "input_contig_num": { + "type": "integer" + }, + "lowDepth_contig_num": { + "type": "integer" + }, + "mags_list": { + "items": { + "$ref": "#/$defs/MAGBin" + }, + "type": "array" + }, + "name": { + "description": "A human readable label for an entity", + "type": "string" + }, + "part_of": { + "description": "Links a resource to another resource that either logically or physically includes it.", + "items": { + "type": "string" + }, + "type": "array" + }, + "started_at_time": { + "format": "date-time", + "pattern": "^([\\+-]?\\d{4}(?!\\d{2}\\b))((-?)((0[1-9]|1[0-2])(\\3([12]\\d|0[1-9]|3[01]))?|W([0-4]\\d|5[0-2])(-?[1-7])?|(00[1-9]|0[1-9]\\d|[12]\\d{2}|3([0-5]\\d|6[1-6])))([T\\s]((([01]\\d|2[0-3])((:?)[0-5]\\d)?|24\\:?00)([\\.,]\\d+(?!:))?)?(\\17[0-5]\\d([\\.,]\\d+)?)?([zZ]|([\\+-])([01]\\d|2[0-3]):?([0-5]\\d)?)?)?)?$", + "type": "string" + }, + "too_short_contig_num": { + "type": "integer" + }, + "type": { + "description": "An optional string that specifies the type object. This is used to allow for searches for different kinds of objects.", + "type": "string" + }, + "unbinned_contig_num": { + "type": "integer" + }, + "used": { + "type": "string" + }, + "was_associated_with": { + "description": "the agent/entity associated with the generation of the file", + "type": "string" + }, + "was_informed_by": { + "type": "string" + } + }, + "required": [ + "id", + "execution_resource", + "git_url", + "has_input", + "has_output", + "type", + "started_at_time", + "ended_at_time", + "was_informed_by" + ], + "title": "MAGsAnalysisActivity", + "type": "object" + }, + "MetaboliteQuantification": { + "additionalProperties": false, + "description": "This is used to link a metabolomics analysis workflow to a specific metabolite", + "properties": { + "alternative_identifiers": { + "description": "A list of alternative identifiers for the entity.", + "items": { + "type": "string" + }, + "type": "array" + }, + "highest_similarity_score": { + "description": "TODO: Yuri to fill in", + "type": "number" + }, + "metabolite_quantified": { + "description": "the specific metabolite identifier", + "type": "string" + } + }, + "required": [], + "title": "MetaboliteQuantification", + "type": "object" + }, + "MetabolomicsAnalysisActivity": { + "additionalProperties": false, + "description": "", + "properties": { + "ended_at_time": { + "format": "date-time", + "pattern": "^([\\+-]?\\d{4}(?!\\d{2}\\b))((-?)((0[1-9]|1[0-2])(\\3([12]\\d|0[1-9]|3[01]))?|W([0-4]\\d|5[0-2])(-?[1-7])?|(00[1-9]|0[1-9]\\d|[12]\\d{2}|3([0-5]\\d|6[1-6])))([T\\s]((([01]\\d|2[0-3])((:?)[0-5]\\d)?|24\\:?00)([\\.,]\\d+(?!:))?)?(\\17[0-5]\\d([\\.,]\\d+)?)?([zZ]|([\\+-])([01]\\d|2[0-3]):?([0-5]\\d)?)?)?)?$", + "type": "string" + }, + "execution_resource": { + "description": "Example: NERSC-Cori", + "type": "string" + }, + "git_url": { + "description": "Example: https://github.com/microbiomedata/mg_annotation/releases/tag/0.1", + "type": "string" + }, + "has_calibration": { + "description": "TODO: Yuri to fill in", + "type": "string" + }, + "has_input": { + "description": "An input to a process.", + "items": { + "type": "string" + }, + "type": "array" + }, + "has_metabolite_quantifications": { + "items": { + "$ref": "#/$defs/MetaboliteQuantification" + }, + "type": "array" + }, + "has_output": { + "description": "An output biosample to a processing step", + "items": { + "type": "string" + }, + "type": "array" + }, + "id": { + "description": "A unique identifier for a thing. Must be either a CURIE shorthand for a URI or a complete URI", + "type": "string" + }, + "name": { + "description": "A human readable label for an entity", + "type": "string" + }, + "part_of": { + "description": "Links a resource to another resource that either logically or physically includes it.", + "items": { + "type": "string" + }, + "type": "array" + }, + "started_at_time": { + "format": "date-time", + "pattern": "^([\\+-]?\\d{4}(?!\\d{2}\\b))((-?)((0[1-9]|1[0-2])(\\3([12]\\d|0[1-9]|3[01]))?|W([0-4]\\d|5[0-2])(-?[1-7])?|(00[1-9]|0[1-9]\\d|[12]\\d{2}|3([0-5]\\d|6[1-6])))([T\\s]((([01]\\d|2[0-3])((:?)[0-5]\\d)?|24\\:?00)([\\.,]\\d+(?!:))?)?(\\17[0-5]\\d([\\.,]\\d+)?)?([zZ]|([\\+-])([01]\\d|2[0-3]):?([0-5]\\d)?)?)?)?$", + "type": "string" + }, + "type": { + "description": "An optional string that specifies the type object. This is used to allow for searches for different kinds of objects.", + "type": "string" + }, + "used": { + "description": "The instrument used to collect the data used in the analysis", + "type": "string" + }, + "was_associated_with": { + "description": "the agent/entity associated with the generation of the file", + "type": "string" + }, + "was_informed_by": { + "type": "string" + } + }, + "required": [ + "id", + "execution_resource", + "git_url", + "has_input", + "has_output", + "type", + "started_at_time", + "ended_at_time", + "was_informed_by" + ], + "title": "MetabolomicsAnalysisActivity", + "type": "object" + }, + "MetagenomeAnnotationActivity": { + "additionalProperties": false, + "description": "", + "properties": { + "ended_at_time": { + "format": "date-time", + "pattern": "^([\\+-]?\\d{4}(?!\\d{2}\\b))((-?)((0[1-9]|1[0-2])(\\3([12]\\d|0[1-9]|3[01]))?|W([0-4]\\d|5[0-2])(-?[1-7])?|(00[1-9]|0[1-9]\\d|[12]\\d{2}|3([0-5]\\d|6[1-6])))([T\\s]((([01]\\d|2[0-3])((:?)[0-5]\\d)?|24\\:?00)([\\.,]\\d+(?!:))?)?(\\17[0-5]\\d([\\.,]\\d+)?)?([zZ]|([\\+-])([01]\\d|2[0-3]):?([0-5]\\d)?)?)?)?$", + "type": "string" + }, + "execution_resource": { + "description": "Example: NERSC-Cori", + "type": "string" + }, + "git_url": { + "description": "Example: https://github.com/microbiomedata/mg_annotation/releases/tag/0.1", + "type": "string" + }, + "has_input": { + "description": "An input to a process.", + "items": { + "type": "string" + }, + "type": "array" + }, + "has_output": { + "description": "An output biosample to a processing step", + "items": { + "type": "string" + }, + "type": "array" + }, + "id": { + "description": "A unique identifier for a thing. Must be either a CURIE shorthand for a URI or a complete URI", + "type": "string" + }, + "name": { + "description": "A human readable label for an entity", + "type": "string" + }, + "part_of": { + "description": "Links a resource to another resource that either logically or physically includes it.", + "items": { + "type": "string" + }, + "type": "array" + }, + "started_at_time": { + "format": "date-time", + "pattern": "^([\\+-]?\\d{4}(?!\\d{2}\\b))((-?)((0[1-9]|1[0-2])(\\3([12]\\d|0[1-9]|3[01]))?|W([0-4]\\d|5[0-2])(-?[1-7])?|(00[1-9]|0[1-9]\\d|[12]\\d{2}|3([0-5]\\d|6[1-6])))([T\\s]((([01]\\d|2[0-3])((:?)[0-5]\\d)?|24\\:?00)([\\.,]\\d+(?!:))?)?(\\17[0-5]\\d([\\.,]\\d+)?)?([zZ]|([\\+-])([01]\\d|2[0-3]):?([0-5]\\d)?)?)?)?$", + "type": "string" + }, + "type": { + "description": "An optional string that specifies the type object. This is used to allow for searches for different kinds of objects.", + "type": "string" + }, + "used": { + "type": "string" + }, + "was_associated_with": { + "description": "the agent/entity associated with the generation of the file", + "type": "string" + }, + "was_informed_by": { + "type": "string" + } + }, + "required": [ + "id", + "execution_resource", + "git_url", + "has_input", + "has_output", + "type", + "started_at_time", + "ended_at_time", + "was_informed_by" + ], + "title": "MetagenomeAnnotationActivity", + "type": "object" + }, + "MetagenomeAssembly": { + "additionalProperties": false, + "description": "", + "properties": { + "INSDC_assembly_identifiers": { + "pattern": "^insdc.sra:[A-Z]+[0-9]+(\\.[0-9]+)?$", + "type": "string" + }, + "asm_score": { + "description": "A score for comparing metagenomic assembly quality from same sample.", + "type": "number" + }, + "contig_bp": { + "description": "Total size in bp of all contigs.", + "type": "number" + }, + "contigs": { + "description": "The sum of the (length*log(length)) of all contigs, times some constant. Increase the contiguity, the score will increase", + "type": "number" + }, + "ctg_L50": { + "description": "Given a set of contigs, the L50 is defined as the sequence length of the shortest contig at 50% of the total genome length.", + "type": "number" + }, + "ctg_L90": { + "description": "The L90 statistic is less than or equal to the L50 statistic; it is the length for which the collection of all contigs of that length or longer contains at least 90% of the sum of the lengths of all contigs.", + "type": "number" + }, + "ctg_N50": { + "description": "Given a set of contigs, each with its own length, the N50 count is defined as the smallest number of contigs whose length sum makes up half of genome size.", + "type": "number" + }, + "ctg_N90": { + "description": "Given a set of contigs, each with its own length, the N90 count is defined as the smallest number of contigs whose length sum makes up 90% of genome size.", + "type": "number" + }, + "ctg_logsum": { + "description": "Maximum contig length.", + "type": "number" + }, + "ctg_max": { + "description": "Maximum contig length.", + "type": "number" + }, + "ctg_powsum": { + "description": "Powersum of all contigs is the same as logsum except that it uses the sum of (length*(length^P)) for some power P (default P=0.25).", + "type": "number" + }, + "ended_at_time": { + "format": "date-time", + "pattern": "^([\\+-]?\\d{4}(?!\\d{2}\\b))((-?)((0[1-9]|1[0-2])(\\3([12]\\d|0[1-9]|3[01]))?|W([0-4]\\d|5[0-2])(-?[1-7])?|(00[1-9]|0[1-9]\\d|[12]\\d{2}|3([0-5]\\d|6[1-6])))([T\\s]((([01]\\d|2[0-3])((:?)[0-5]\\d)?|24\\:?00)([\\.,]\\d+(?!:))?)?(\\17[0-5]\\d([\\.,]\\d+)?)?([zZ]|([\\+-])([01]\\d|2[0-3]):?([0-5]\\d)?)?)?)?$", + "type": "string" + }, + "execution_resource": { + "description": "Example: NERSC-Cori", + "type": "string" + }, + "gap_pct": { + "description": "The gap size percentage of all scaffolds.", + "type": "number" + }, + "gc_avg": { + "description": "Average of GC content of all contigs.", + "type": "number" + }, + "gc_std": { + "description": "Standard deviation of GC content of all contigs.", + "type": "number" + }, + "git_url": { + "description": "Example: https://github.com/microbiomedata/mg_annotation/releases/tag/0.1", + "type": "string" + }, + "has_input": { + "description": "An input to a process.", + "items": { + "type": "string" + }, + "type": "array" + }, + "has_output": { + "description": "An output biosample to a processing step", + "items": { + "type": "string" + }, + "type": "array" + }, + "id": { + "description": "A unique identifier for a thing. Must be either a CURIE shorthand for a URI or a complete URI", + "type": "string" + }, + "name": { + "description": "A human readable label for an entity", + "type": "string" + }, + "num_aligned_reads": { + "description": "The sequence count number of input reads aligned to assembled contigs.", + "type": "number" + }, + "num_input_reads": { + "description": "The sequence count number of input reads for assembly.", + "type": "number" + }, + "part_of": { + "description": "Links a resource to another resource that either logically or physically includes it.", + "items": { + "type": "string" + }, + "type": "array" + }, + "scaf_L50": { + "description": "Given a set of scaffolds, the L50 is defined as the sequence length of the shortest scaffold at 50% of the total genome length.", + "type": "number" + }, + "scaf_L90": { + "description": "The L90 statistic is less than or equal to the L50 statistic; it is the length for which the collection of all scaffolds of that length or longer contains at least 90% of the sum of the lengths of all scaffolds.", + "type": "number" + }, + "scaf_N50": { + "description": "Given a set of scaffolds, each with its own length, the N50 count is defined as the smallest number of scaffolds whose length sum makes up half of genome size.", + "type": "number" + }, + "scaf_N90": { + "description": "Given a set of scaffolds, each with its own length, the N90 count is defined as the smallest number of scaffolds whose length sum makes up 90% of genome size.", + "type": "number" + }, + "scaf_bp": { + "description": "Total size in bp of all scaffolds.", + "type": "number" + }, + "scaf_l_gt50K": { + "description": "Total size in bp of all scaffolds greater than 50 KB.", + "type": "number" + }, + "scaf_logsum": { + "description": "The sum of the (length*log(length)) of all scaffolds, times some constant. Increase the contiguity, the score will increase", + "type": "number" + }, + "scaf_max": { + "description": "Maximum scaffold length.", + "type": "number" + }, + "scaf_n_gt50K": { + "description": "Total sequence count of scaffolds greater than 50 KB.", + "type": "number" + }, + "scaf_pct_gt50K": { + "description": "Total sequence size percentage of scaffolds greater than 50 KB.", + "type": "number" + }, + "scaf_powsum": { + "description": "Powersum of all scaffolds is the same as logsum except that it uses the sum of (length*(length^P)) for some power P (default P=0.25).", + "type": "number" + }, + "scaffolds": { + "description": "Total sequence count of all scaffolds.", + "type": "number" + }, + "started_at_time": { + "format": "date-time", + "pattern": "^([\\+-]?\\d{4}(?!\\d{2}\\b))((-?)((0[1-9]|1[0-2])(\\3([12]\\d|0[1-9]|3[01]))?|W([0-4]\\d|5[0-2])(-?[1-7])?|(00[1-9]|0[1-9]\\d|[12]\\d{2}|3([0-5]\\d|6[1-6])))([T\\s]((([01]\\d|2[0-3])((:?)[0-5]\\d)?|24\\:?00)([\\.,]\\d+(?!:))?)?(\\17[0-5]\\d([\\.,]\\d+)?)?([zZ]|([\\+-])([01]\\d|2[0-3]):?([0-5]\\d)?)?)?)?$", + "type": "string" + }, + "type": { + "description": "An optional string that specifies the type object. This is used to allow for searches for different kinds of objects.", + "type": "string" + }, + "used": { + "type": "string" + }, + "was_associated_with": { + "description": "the agent/entity associated with the generation of the file", + "type": "string" + }, + "was_informed_by": { + "type": "string" + } + }, + "required": [ + "id", + "execution_resource", + "git_url", + "has_input", + "has_output", + "type", + "started_at_time", + "ended_at_time", + "was_informed_by" + ], + "title": "MetagenomeAssembly", + "type": "object" + }, + "MetaproteomicsAnalysisActivity": { + "additionalProperties": false, + "description": "", + "properties": { + "ended_at_time": { + "format": "date-time", + "pattern": "^([\\+-]?\\d{4}(?!\\d{2}\\b))((-?)((0[1-9]|1[0-2])(\\3([12]\\d|0[1-9]|3[01]))?|W([0-4]\\d|5[0-2])(-?[1-7])?|(00[1-9]|0[1-9]\\d|[12]\\d{2}|3([0-5]\\d|6[1-6])))([T\\s]((([01]\\d|2[0-3])((:?)[0-5]\\d)?|24\\:?00)([\\.,]\\d+(?!:))?)?(\\17[0-5]\\d([\\.,]\\d+)?)?([zZ]|([\\+-])([01]\\d|2[0-3]):?([0-5]\\d)?)?)?)?$", + "type": "string" + }, + "execution_resource": { + "description": "Example: NERSC-Cori", + "type": "string" + }, + "git_url": { + "description": "Example: https://github.com/microbiomedata/mg_annotation/releases/tag/0.1", + "type": "string" + }, + "has_input": { + "description": "An input to a process.", + "items": { + "type": "string" + }, + "type": "array" + }, + "has_output": { + "description": "An output biosample to a processing step", + "items": { + "type": "string" + }, + "type": "array" + }, + "has_peptide_quantifications": { + "items": { + "$ref": "#/$defs/PeptideQuantification" + }, + "type": "array" + }, + "id": { + "description": "A unique identifier for a thing. Must be either a CURIE shorthand for a URI or a complete URI", + "type": "string" + }, + "name": { + "description": "A human readable label for an entity", + "type": "string" + }, + "part_of": { + "description": "Links a resource to another resource that either logically or physically includes it.", + "items": { + "type": "string" + }, + "type": "array" + }, + "started_at_time": { + "format": "date-time", + "pattern": "^([\\+-]?\\d{4}(?!\\d{2}\\b))((-?)((0[1-9]|1[0-2])(\\3([12]\\d|0[1-9]|3[01]))?|W([0-4]\\d|5[0-2])(-?[1-7])?|(00[1-9]|0[1-9]\\d|[12]\\d{2}|3([0-5]\\d|6[1-6])))([T\\s]((([01]\\d|2[0-3])((:?)[0-5]\\d)?|24\\:?00)([\\.,]\\d+(?!:))?)?(\\17[0-5]\\d([\\.,]\\d+)?)?([zZ]|([\\+-])([01]\\d|2[0-3]):?([0-5]\\d)?)?)?)?$", + "type": "string" + }, + "type": { + "description": "An optional string that specifies the type object. This is used to allow for searches for different kinds of objects.", + "type": "string" + }, + "used": { + "description": "The instrument used to collect the data used in the analysis", + "type": "string" + }, + "was_associated_with": { + "description": "the agent/entity associated with the generation of the file", + "type": "string" + }, + "was_informed_by": { + "type": "string" + } + }, + "required": [ + "id", + "execution_resource", + "git_url", + "has_input", + "has_output", + "type", + "started_at_time", + "ended_at_time", + "was_informed_by" + ], + "title": "MetaproteomicsAnalysisActivity", + "type": "object" + }, + "MetatranscriptomeActivity": { + "additionalProperties": false, + "description": "A metatranscriptome activity that e.g. pools assembly and annotation activity.", + "properties": { + "ended_at_time": { + "format": "date-time", + "pattern": "^([\\+-]?\\d{4}(?!\\d{2}\\b))((-?)((0[1-9]|1[0-2])(\\3([12]\\d|0[1-9]|3[01]))?|W([0-4]\\d|5[0-2])(-?[1-7])?|(00[1-9]|0[1-9]\\d|[12]\\d{2}|3([0-5]\\d|6[1-6])))([T\\s]((([01]\\d|2[0-3])((:?)[0-5]\\d)?|24\\:?00)([\\.,]\\d+(?!:))?)?(\\17[0-5]\\d([\\.,]\\d+)?)?([zZ]|([\\+-])([01]\\d|2[0-3]):?([0-5]\\d)?)?)?)?$", + "type": "string" + }, + "execution_resource": { + "description": "Example: NERSC-Cori", + "type": "string" + }, + "git_url": { + "description": "Example: https://github.com/microbiomedata/mg_annotation/releases/tag/0.1", + "type": "string" + }, + "has_input": { + "description": "An input to a process.", + "items": { + "type": "string" + }, + "type": "array" + }, + "has_output": { + "description": "An output biosample to a processing step", + "items": { + "type": "string" + }, + "type": "array" + }, + "id": { + "description": "A unique identifier for a thing. Must be either a CURIE shorthand for a URI or a complete URI", + "type": "string" + }, + "name": { + "description": "A human readable label for an entity", + "type": "string" + }, + "part_of": { + "description": "Links a resource to another resource that either logically or physically includes it.", + "items": { + "type": "string" + }, + "type": "array" + }, + "started_at_time": { + "format": "date-time", + "pattern": "^([\\+-]?\\d{4}(?!\\d{2}\\b))((-?)((0[1-9]|1[0-2])(\\3([12]\\d|0[1-9]|3[01]))?|W([0-4]\\d|5[0-2])(-?[1-7])?|(00[1-9]|0[1-9]\\d|[12]\\d{2}|3([0-5]\\d|6[1-6])))([T\\s]((([01]\\d|2[0-3])((:?)[0-5]\\d)?|24\\:?00)([\\.,]\\d+(?!:))?)?(\\17[0-5]\\d([\\.,]\\d+)?)?([zZ]|([\\+-])([01]\\d|2[0-3]):?([0-5]\\d)?)?)?)?$", + "type": "string" + }, + "type": { + "description": "An optional string that specifies the type object. This is used to allow for searches for different kinds of objects.", + "type": "string" + }, + "used": { + "type": "string" + }, + "was_associated_with": { + "description": "the agent/entity associated with the generation of the file", + "type": "string" + }, + "was_informed_by": { + "type": "string" + } + }, + "required": [ + "id", + "execution_resource", + "git_url", + "has_input", + "has_output", + "type", + "started_at_time", + "ended_at_time", + "was_informed_by" + ], + "title": "MetatranscriptomeActivity", + "type": "object" + }, + "MetatranscriptomeAnnotationActivity": { + "additionalProperties": false, + "description": "", + "properties": { + "ended_at_time": { + "format": "date-time", + "pattern": "^([\\+-]?\\d{4}(?!\\d{2}\\b))((-?)((0[1-9]|1[0-2])(\\3([12]\\d|0[1-9]|3[01]))?|W([0-4]\\d|5[0-2])(-?[1-7])?|(00[1-9]|0[1-9]\\d|[12]\\d{2}|3([0-5]\\d|6[1-6])))([T\\s]((([01]\\d|2[0-3])((:?)[0-5]\\d)?|24\\:?00)([\\.,]\\d+(?!:))?)?(\\17[0-5]\\d([\\.,]\\d+)?)?([zZ]|([\\+-])([01]\\d|2[0-3]):?([0-5]\\d)?)?)?)?$", + "type": "string" + }, + "execution_resource": { + "description": "Example: NERSC-Cori", + "type": "string" + }, + "git_url": { + "description": "Example: https://github.com/microbiomedata/mg_annotation/releases/tag/0.1", + "type": "string" + }, + "has_input": { + "description": "An input to a process.", + "items": { + "type": "string" + }, + "type": "array" + }, + "has_output": { + "description": "An output biosample to a processing step", + "items": { + "type": "string" + }, + "type": "array" + }, + "id": { + "description": "A unique identifier for a thing. Must be either a CURIE shorthand for a URI or a complete URI", + "type": "string" + }, + "name": { + "description": "A human readable label for an entity", + "type": "string" + }, + "part_of": { + "description": "Links a resource to another resource that either logically or physically includes it.", + "items": { + "type": "string" + }, + "type": "array" + }, + "started_at_time": { + "format": "date-time", + "pattern": "^([\\+-]?\\d{4}(?!\\d{2}\\b))((-?)((0[1-9]|1[0-2])(\\3([12]\\d|0[1-9]|3[01]))?|W([0-4]\\d|5[0-2])(-?[1-7])?|(00[1-9]|0[1-9]\\d|[12]\\d{2}|3([0-5]\\d|6[1-6])))([T\\s]((([01]\\d|2[0-3])((:?)[0-5]\\d)?|24\\:?00)([\\.,]\\d+(?!:))?)?(\\17[0-5]\\d([\\.,]\\d+)?)?([zZ]|([\\+-])([01]\\d|2[0-3]):?([0-5]\\d)?)?)?)?$", + "type": "string" + }, + "type": { + "description": "An optional string that specifies the type object. This is used to allow for searches for different kinds of objects.", + "type": "string" + }, + "used": { + "type": "string" + }, + "was_associated_with": { + "description": "the agent/entity associated with the generation of the file", + "type": "string" + }, + "was_informed_by": { + "type": "string" + } + }, + "required": [ + "id", + "execution_resource", + "git_url", + "has_input", + "has_output", + "type", + "started_at_time", + "ended_at_time", + "was_informed_by" + ], + "title": "MetatranscriptomeAnnotationActivity", + "type": "object" + }, + "MetatranscriptomeAssembly": { + "additionalProperties": false, + "description": "", + "properties": { + "INSDC_assembly_identifiers": { + "pattern": "^insdc.sra:[A-Z]+[0-9]+(\\.[0-9]+)?$", + "type": "string" + }, + "asm_score": { + "description": "A score for comparing metagenomic assembly quality from same sample.", + "type": "number" + }, + "contig_bp": { + "description": "Total size in bp of all contigs.", + "type": "number" + }, + "contigs": { + "description": "The sum of the (length*log(length)) of all contigs, times some constant. Increase the contiguity, the score will increase", + "type": "number" + }, + "ctg_L50": { + "description": "Given a set of contigs, the L50 is defined as the sequence length of the shortest contig at 50% of the total genome length.", + "type": "number" + }, + "ctg_L90": { + "description": "The L90 statistic is less than or equal to the L50 statistic; it is the length for which the collection of all contigs of that length or longer contains at least 90% of the sum of the lengths of all contigs.", + "type": "number" + }, + "ctg_N50": { + "description": "Given a set of contigs, each with its own length, the N50 count is defined as the smallest number of contigs whose length sum makes up half of genome size.", + "type": "number" + }, + "ctg_N90": { + "description": "Given a set of contigs, each with its own length, the N90 count is defined as the smallest number of contigs whose length sum makes up 90% of genome size.", + "type": "number" + }, + "ctg_logsum": { + "description": "Maximum contig length.", + "type": "number" + }, + "ctg_max": { + "description": "Maximum contig length.", + "type": "number" + }, + "ctg_powsum": { + "description": "Powersum of all contigs is the same as logsum except that it uses the sum of (length*(length^P)) for some power P (default P=0.25).", + "type": "number" + }, + "ended_at_time": { + "format": "date-time", + "pattern": "^([\\+-]?\\d{4}(?!\\d{2}\\b))((-?)((0[1-9]|1[0-2])(\\3([12]\\d|0[1-9]|3[01]))?|W([0-4]\\d|5[0-2])(-?[1-7])?|(00[1-9]|0[1-9]\\d|[12]\\d{2}|3([0-5]\\d|6[1-6])))([T\\s]((([01]\\d|2[0-3])((:?)[0-5]\\d)?|24\\:?00)([\\.,]\\d+(?!:))?)?(\\17[0-5]\\d([\\.,]\\d+)?)?([zZ]|([\\+-])([01]\\d|2[0-3]):?([0-5]\\d)?)?)?)?$", + "type": "string" + }, + "execution_resource": { + "description": "Example: NERSC-Cori", + "type": "string" + }, + "gap_pct": { + "description": "The gap size percentage of all scaffolds.", + "type": "number" + }, + "gc_avg": { + "description": "Average of GC content of all contigs.", + "type": "number" + }, + "gc_std": { + "description": "Standard deviation of GC content of all contigs.", + "type": "number" + }, + "git_url": { + "description": "Example: https://github.com/microbiomedata/mg_annotation/releases/tag/0.1", + "type": "string" + }, + "has_input": { + "description": "An input to a process.", + "items": { + "type": "string" + }, + "type": "array" + }, + "has_output": { + "description": "An output biosample to a processing step", + "items": { + "type": "string" + }, + "type": "array" + }, + "id": { + "description": "A unique identifier for a thing. Must be either a CURIE shorthand for a URI or a complete URI", + "type": "string" + }, + "name": { + "description": "A human readable label for an entity", + "type": "string" + }, + "num_aligned_reads": { + "description": "The sequence count number of input reads aligned to assembled contigs.", + "type": "number" + }, + "num_input_reads": { + "description": "The sequence count number of input reads for assembly.", + "type": "number" + }, + "part_of": { + "description": "Links a resource to another resource that either logically or physically includes it.", + "items": { + "type": "string" + }, + "type": "array" + }, + "scaf_L50": { + "description": "Given a set of scaffolds, the L50 is defined as the sequence length of the shortest scaffold at 50% of the total genome length.", + "type": "number" + }, + "scaf_L90": { + "description": "The L90 statistic is less than or equal to the L50 statistic; it is the length for which the collection of all scaffolds of that length or longer contains at least 90% of the sum of the lengths of all scaffolds.", + "type": "number" + }, + "scaf_N50": { + "description": "Given a set of scaffolds, each with its own length, the N50 count is defined as the smallest number of scaffolds whose length sum makes up half of genome size.", + "type": "number" + }, + "scaf_N90": { + "description": "Given a set of scaffolds, each with its own length, the N90 count is defined as the smallest number of scaffolds whose length sum makes up 90% of genome size.", + "type": "number" + }, + "scaf_bp": { + "description": "Total size in bp of all scaffolds.", + "type": "number" + }, + "scaf_l_gt50K": { + "description": "Total size in bp of all scaffolds greater than 50 KB.", + "type": "number" + }, + "scaf_logsum": { + "description": "The sum of the (length*log(length)) of all scaffolds, times some constant. Increase the contiguity, the score will increase", + "type": "number" + }, + "scaf_max": { + "description": "Maximum scaffold length.", + "type": "number" + }, + "scaf_n_gt50K": { + "description": "Total sequence count of scaffolds greater than 50 KB.", + "type": "number" + }, + "scaf_pct_gt50K": { + "description": "Total sequence size percentage of scaffolds greater than 50 KB.", + "type": "number" + }, + "scaf_powsum": { + "description": "Powersum of all scaffolds is the same as logsum except that it uses the sum of (length*(length^P)) for some power P (default P=0.25).", + "type": "number" + }, + "scaffolds": { + "description": "Total sequence count of all scaffolds.", + "type": "number" + }, + "started_at_time": { + "format": "date-time", + "pattern": "^([\\+-]?\\d{4}(?!\\d{2}\\b))((-?)((0[1-9]|1[0-2])(\\3([12]\\d|0[1-9]|3[01]))?|W([0-4]\\d|5[0-2])(-?[1-7])?|(00[1-9]|0[1-9]\\d|[12]\\d{2}|3([0-5]\\d|6[1-6])))([T\\s]((([01]\\d|2[0-3])((:?)[0-5]\\d)?|24\\:?00)([\\.,]\\d+(?!:))?)?(\\17[0-5]\\d([\\.,]\\d+)?)?([zZ]|([\\+-])([01]\\d|2[0-3]):?([0-5]\\d)?)?)?)?$", + "type": "string" + }, + "type": { + "description": "An optional string that specifies the type object. This is used to allow for searches for different kinds of objects.", + "type": "string" + }, + "used": { + "type": "string" + }, + "was_associated_with": { + "description": "the agent/entity associated with the generation of the file", + "type": "string" + }, + "was_informed_by": { + "type": "string" + } + }, + "required": [ + "id", + "execution_resource", + "git_url", + "has_input", + "has_output", + "type", + "started_at_time", + "ended_at_time", + "was_informed_by" + ], + "title": "MetatranscriptomeAssembly", + "type": "object" + }, + "NomAnalysisActivity": { + "additionalProperties": false, + "description": "", + "properties": { + "ended_at_time": { + "format": "date-time", + "pattern": "^([\\+-]?\\d{4}(?!\\d{2}\\b))((-?)((0[1-9]|1[0-2])(\\3([12]\\d|0[1-9]|3[01]))?|W([0-4]\\d|5[0-2])(-?[1-7])?|(00[1-9]|0[1-9]\\d|[12]\\d{2}|3([0-5]\\d|6[1-6])))([T\\s]((([01]\\d|2[0-3])((:?)[0-5]\\d)?|24\\:?00)([\\.,]\\d+(?!:))?)?(\\17[0-5]\\d([\\.,]\\d+)?)?([zZ]|([\\+-])([01]\\d|2[0-3]):?([0-5]\\d)?)?)?)?$", + "type": "string" + }, + "execution_resource": { + "description": "Example: NERSC-Cori", + "type": "string" + }, + "git_url": { + "description": "Example: https://github.com/microbiomedata/mg_annotation/releases/tag/0.1", + "type": "string" + }, + "has_calibration": { + "description": "A reference to a file that holds calibration information.", + "type": "string" + }, + "has_input": { + "description": "An input to a process.", + "items": { + "type": "string" + }, + "type": "array" + }, + "has_output": { + "description": "An output biosample to a processing step", + "items": { + "type": "string" + }, + "type": "array" + }, + "id": { + "description": "A unique identifier for a thing. Must be either a CURIE shorthand for a URI or a complete URI", + "type": "string" + }, + "name": { + "description": "A human readable label for an entity", + "type": "string" + }, + "part_of": { + "description": "Links a resource to another resource that either logically or physically includes it.", + "items": { + "type": "string" + }, + "type": "array" + }, + "started_at_time": { + "format": "date-time", + "pattern": "^([\\+-]?\\d{4}(?!\\d{2}\\b))((-?)((0[1-9]|1[0-2])(\\3([12]\\d|0[1-9]|3[01]))?|W([0-4]\\d|5[0-2])(-?[1-7])?|(00[1-9]|0[1-9]\\d|[12]\\d{2}|3([0-5]\\d|6[1-6])))([T\\s]((([01]\\d|2[0-3])((:?)[0-5]\\d)?|24\\:?00)([\\.,]\\d+(?!:))?)?(\\17[0-5]\\d([\\.,]\\d+)?)?([zZ]|([\\+-])([01]\\d|2[0-3]):?([0-5]\\d)?)?)?)?$", + "type": "string" + }, + "type": { + "description": "An optional string that specifies the type object. This is used to allow for searches for different kinds of objects.", + "type": "string" + }, + "used": { + "description": "The instrument used to collect the data used in the analysis", + "type": "string" + }, + "was_associated_with": { + "description": "the agent/entity associated with the generation of the file", + "type": "string" + }, + "was_informed_by": { + "type": "string" + } + }, + "required": [ + "id", + "execution_resource", + "git_url", + "has_input", + "has_output", + "type", + "started_at_time", + "ended_at_time", + "was_informed_by" + ], + "title": "NomAnalysisActivity", + "type": "object" + }, + "OmicsProcessing": { + "additionalProperties": false, + "description": "The methods and processes used to generate omics data from a biosample or organism.", + "properties": { + "GOLD_sequencing_project_identifiers": { + "description": "identifiers for corresponding sequencing project in GOLD", + "items": { + "type": "string" + }, + "pattern": "^GOLD:Gp[0-9]+$", + "type": "array" + }, + "INSDC_experiment_identifiers": { + "items": { + "type": "string" + }, + "pattern": "^insdc.sra:(E|D|S)RX[0-9]{6,}$", + "type": "array" + }, + "add_date": { + "description": "The date on which the information was added to the database.", + "type": "string" + }, + "alternative_identifiers": { + "description": "A list of alternative identifiers for the entity.", + "items": { + "type": "string" + }, + "type": "array" + }, + "chimera_check": { + "$ref": "#/$defs/TextValue", + "description": "A chimeric sequence, or chimera for short, is a sequence comprised of two or more phylogenetically distinct parent sequences. Chimeras are usually PCR artifacts thought to occur when a prematurely terminated amplicon reanneals to a foreign DNA strand and is copied to completion in the following PCR cycles. The point at which the chimeric sequence changes from one parent to the next is called the breakpoint or conversion point" + }, + "description": { + "description": "a human-readable description of a thing", + "type": "string" + }, + "has_input": { + "description": "An input to a process.", + "items": { + "type": "string" + }, + "type": "array" + }, + "has_output": { + "description": "An output biosample to a processing step", + "items": { + "type": "string" + }, + "type": "array" + }, + "id": { + "description": "A unique identifier for a thing. Must be either a CURIE shorthand for a URI or a complete URI", + "type": "string" + }, + "instrument_name": { + "description": "The name of the instrument that was used for processing the sample.\n ", + "type": "string" + }, + "mod_date": { + "description": "The last date on which the database information was modified.", + "type": "string" + }, + "name": { + "description": "A human readable label for an entity", + "type": "string" + }, + "ncbi_project_name": { + "type": "string" + }, + "nucl_acid_amp": { + "$ref": "#/$defs/TextValue", + "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the enzymatic amplification (PCR, TMA, NASBA) of specific nucleic acids" + }, + "nucl_acid_ext": { + "$ref": "#/$defs/TextValue", + "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the material separation to recover the nucleic acid fraction from a sample" + }, + "omics_type": { + "$ref": "#/$defs/ControlledTermValue", + "description": "The type of omics data" + }, + "part_of": { + "description": "Links a resource to another resource that either logically or physically includes it.", + "items": { + "type": "string" + }, + "type": "array" + }, + "pcr_cond": { + "$ref": "#/$defs/TextValue", + "description": "Description of reaction conditions and components of PCR in the form of 'initial denaturation:94degC_1.5min; annealing=...'", + "pattern": "initial denaturation:degrees_minutes;annealing:degrees_minutes;elongation:degrees_minutes;final elongation:degrees_minutes;total cycles" + }, + "pcr_primers": { + "$ref": "#/$defs/TextValue", + "description": "PCR primers that were used to amplify the sequence of the targeted gene, locus or subfragment. This field should contain all the primers used for a single PCR reaction if multiple forward or reverse primers are present in a single PCR reaction. The primer sequence should be reported in uppercase letters" + }, + "principal_investigator": { + "$ref": "#/$defs/PersonValue", + "description": "Principal Investigator who led the study and/or generated the dataset." + }, + "processing_institution": { + "description": "The organization that processed the sample.", + "type": "string" + }, + "samp_vol_we_dna_ext": { + "$ref": "#/$defs/QuantityValue", + "description": "Volume (ml), weight (g) of processed sample, or surface area swabbed from sample for DNA extraction", + "pattern": "\\d+[.\\d+] \\S+" + }, + "seq_meth": { + "$ref": "#/$defs/TextValue", + "description": "Sequencing method used; e.g. Sanger, pyrosequencing, ABI-solid", + "pattern": "[MinION|GridION|PromethION|454 GS|454 GS 20|454 GS FLX|454 GS FLX+|454 GS FLX Titanium|454 GS Junior|Illumina Genome Analyzer|Illumina Genome Analyzer II|Illumina Genome Analyzer IIx|Illumina HiSeq 4000|Illumina HiSeq 3000|Illumina HiSeq 2500|Illumina HiSeq 2000|Illumina HiSeq 1500|Illumina HiSeq 1000|Illumina HiScanSQ|Illumina MiSeq|Illumina HiSeq X Five|Illumina HiSeq X Ten|Illumina NextSeq 500|Illumina NextSeq 550|AB SOLiD System|AB SOLiD System 2.0|AB SOLiD System 3.0|AB SOLiD 3 Plus System|AB SOLiD 4 System|AB SOLiD 4hq System|AB SOLiD PI System|AB 5500 Genetic Analyzer|AB 5500xl Genetic Analyzer|AB 5500xl\\-W Genetic Analysis System|Ion Torrent PGM|Ion Torrent Proton|Ion Torrent S5|Ion Torrent S5 XL|PacBio RS|PacBio RS II|Sequel|AB 3730xL Genetic Analyzer|AB 3730 Genetic Analyzer|AB 3500xL Genetic Analyzer|AB 3500 Genetic Analyzer|AB 3130xL Genetic Analyzer|AB 3130 Genetic Analyzer|AB 310 Genetic Analyzer|BGISEQ\\-500]" + }, + "seq_quality_check": { + "$ref": "#/$defs/TextValue", + "description": "Indicate if the sequence has been called by automatic systems (none) or undergone a manual editing procedure (e.g. by inspecting the raw data or chromatograms). Applied only for sequences that are not submitted to SRA,ENA or DRA", + "pattern": "[none|manually edited]" + }, + "target_gene": { + "$ref": "#/$defs/TextValue", + "description": "Targeted gene or locus name for marker gene studies" + }, + "target_subfragment": { + "$ref": "#/$defs/TextValue", + "description": "Name of subfragment of a gene or locus. Important to e.g. identify special regions on marker genes like V6 on 16S rRNA" + }, + "type": { + "description": "An optional string that specifies the type object. This is used to allow for searches for different kinds of objects.", + "type": "string" + } + }, + "required": [ + "id" + ], + "title": "OmicsProcessing", + "type": "object" + }, + "OntologyClass": { + "additionalProperties": false, + "description": "", + "properties": { + "alternative_identifiers": { + "description": "A list of alternative identifiers for the entity.", + "items": { + "type": "string" + }, + "type": "array" + }, + "description": { + "description": "a human-readable description of a thing", + "type": "string" + }, + "id": { + "description": "A unique identifier for a thing. Must be either a CURIE shorthand for a URI or a complete URI", + "type": "string" + }, + "name": { + "description": "A human readable label for an entity", + "type": "string" + } + }, + "required": [ + "id" + ], + "title": "OntologyClass", + "type": "object" + }, + "OrthologyGroup": { + "additionalProperties": false, + "description": "A set of genes or gene products in which all members are orthologous", + "properties": { + "alternative_identifiers": { + "description": "A list of alternative identifiers for the entity.", + "items": { + "type": "string" + }, + "type": "array" + }, + "description": { + "description": "a human-readable description of a thing", + "type": "string" + }, + "id": { + "description": "A unique identifier for a thing. Must be either a CURIE shorthand for a URI or a complete URI", + "type": "string" + }, + "name": { + "description": "A human readable label for an entity", + "type": "string" + } + }, + "required": [ + "id" + ], + "title": "OrthologyGroup", + "type": "object" + }, + "Pathway": { + "additionalProperties": false, + "description": "A pathway is a sequence of steps/reactions carried out by an organism or community of organisms", + "properties": { + "alternative_identifiers": { + "description": "A list of alternative identifiers for the entity.", + "items": { + "type": "string" + }, + "type": "array" + }, + "description": { + "description": "a human-readable description of a thing", + "type": "string" + }, + "has_part": { + "description": "A pathway can be broken down to a series of reaction step", + "items": { + "type": "string" + }, + "type": "array" + }, + "id": { + "description": "A unique identifier for a thing. Must be either a CURIE shorthand for a URI or a complete URI", + "type": "string" + }, + "name": { + "description": "A human readable label for an entity", + "type": "string" + } + }, + "required": [ + "id" + ], + "title": "Pathway", + "type": "object" + }, + "PeptideQuantification": { + "additionalProperties": false, + "description": "This is used to link a metaproteomics analysis workflow to a specific peptide sequence and related information", + "properties": { + "all_proteins": { + "description": "the list of protein identifiers that are associated with the peptide sequence", + "items": { + "type": "string" + }, + "type": "array" + }, + "best_protein": { + "description": "the specific protein identifier most correctly associated with the peptide sequence", + "type": "string" + }, + "min_q_value": { + "description": "smallest Q-Value associated with the peptide sequence as provided by MSGFPlus tool", + "type": "number" + }, + "peptide_sequence": { + "type": "string" + }, + "peptide_spectral_count": { + "description": "sum of filter passing MS2 spectra associated with the peptide sequence within a given LC-MS/MS data file", + "type": "integer" + }, + "peptide_sum_masic_abundance": { + "description": "combined MS1 extracted ion chromatograms derived from MS2 spectra associated with the peptide sequence from a given LC-MS/MS data file using the MASIC tool", + "type": "integer" + } + }, + "required": [], + "title": "PeptideQuantification", + "type": "object" + }, + "Person": { + "additionalProperties": false, + "description": "represents a person, such as a researcher", + "properties": { + "alternative_identifiers": { + "description": "A list of alternative identifiers for the entity.", + "items": { + "type": "string" + }, + "type": "array" + }, + "description": { + "description": "a human-readable description of a thing", + "type": "string" + }, + "id": { + "description": "Should be an ORCID. Specify in CURIE format. E.g ORCID:0000-1111-...", + "type": "string" + }, + "name": { + "description": "A human readable label for an entity", + "type": "string" + } + }, + "required": [ + "id" + ], + "title": "Person", + "type": "object" + }, + "PersonValue": { + "additionalProperties": false, + "description": "An attribute value representing a person", + "properties": { + "email": { + "description": "An email address for an entity such as a person. This should be the primarly email address used.", + "type": "string" + }, + "has_raw_value": { + "description": "The full name of the Investgator in format FIRST LAST.", + "type": "string" + }, + "name": { + "description": "The full name of the Investgator. It should follow the format FIRST [MIDDLE NAME| MIDDLE INITIAL] LAST, where MIDDLE NAME| MIDDLE INITIAL is optional.", + "type": "string" + }, + "orcid": { + "description": "The ORICD of a person.", + "type": "string" + }, + "profile_image_url": { + "description": "A url that points to an image of a person.", + "type": "string" + }, + "type": { + "description": "An optional string that specified the type of object.", + "type": "string" + }, + "was_generated_by": { + "type": "string" + }, + "websites": { + "description": "A list of websites that are assocatiated with the entity.", + "items": { + "type": "string" + }, + "type": "array" + } + }, + "required": [], + "title": "PersonValue", + "type": "object" + }, + "ProteinQuantification": { + "additionalProperties": false, + "description": "This is used to link a metaproteomics analysis workflow to a specific protein", + "properties": { + "all_proteins": { + "description": "the grouped list of protein identifiers associated with the peptide sequences that were grouped to a best protein", + "items": { + "type": "string" + }, + "type": "array" + }, + "best_protein": { + "description": "the specific protein identifier most correctly grouped to its associated peptide sequences", + "type": "string" + }, + "peptide_sequence_count": { + "description": "count of peptide sequences grouped to the best_protein", + "type": "integer" + }, + "protein_spectral_count": { + "description": "sum of filter passing MS2 spectra associated with the best protein within a given LC-MS/MS data file", + "type": "integer" + }, + "protein_sum_masic_abundance": { + "description": "combined MS1 extracted ion chromatograms derived from MS2 spectra associated with the best protein from a given LC-MS/MS data file using the MASIC tool", + "type": "integer" + } + }, + "required": [], + "title": "ProteinQuantification", + "type": "object" + }, + "QuantityValue": { + "additionalProperties": false, + "description": "A simple quantity, e.g. 2cm", + "properties": { + "has_maximum_numeric_value": { + "description": "The maximum value part, expressed as number, of the quantity value when the value covers a range.", + "type": "number" + }, + "has_minimum_numeric_value": { + "description": "The minimum value part, expressed as number, of the quantity value when the value covers a range.", + "type": "number" + }, + "has_numeric_value": { + "description": "The number part of the quantity", + "type": "number" + }, + "has_raw_value": { + "description": "Unnormalized atomic string representation, should in syntax {number} {unit}", + "type": "string" + }, + "has_unit": { + "description": "The unit of the quantity", + "type": "string" + }, + "type": { + "description": "An optional string that specified the type of object.", + "type": "string" + }, + "was_generated_by": { + "type": "string" + } + }, + "required": [], + "title": "QuantityValue", + "type": "object" + }, + "Reaction": { + "additionalProperties": false, + "description": "An individual biochemical transformation carried out by a functional unit of an organism, in which a collection of substrates are transformed into a collection of products. Can also represent transporters", + "properties": { + "alternative_identifiers": { + "description": "A list of alternative identifiers for the entity.", + "items": { + "type": "string" + }, + "type": "array" + }, + "description": { + "description": "a human-readable description of a thing", + "type": "string" + }, + "direction": { + "description": "One of l->r, r->l, bidirectional, neutral", + "type": "string" + }, + "id": { + "description": "A unique identifier for a thing. Must be either a CURIE shorthand for a URI or a complete URI", + "type": "string" + }, + "is_balanced": { + "type": "boolean" + }, + "is_diastereoselective": { + "type": "boolean" + }, + "is_fully_characterized": { + "description": "False if includes R-groups", + "type": "boolean" + }, + "is_stereo": { + "type": "boolean" + }, + "is_transport": { + "type": "boolean" + }, + "left_participants": { + "items": { + "$ref": "#/$defs/ReactionParticipant" + }, + "type": "array" + }, + "name": { + "description": "A human readable label for an entity", + "type": "string" + }, + "right_participants": { + "items": { + "$ref": "#/$defs/ReactionParticipant" + }, + "type": "array" + }, + "smarts_string": { + "type": "string" + } + }, + "required": [ + "id" + ], + "title": "Reaction", + "type": "object" + }, + "ReactionParticipant": { + "additionalProperties": false, + "description": "Instances of this link a reaction to a chemical entity participant", + "properties": { + "chemical": { + "type": "string" + }, + "stoichiometry": { + "type": "integer" + } + }, + "required": [], + "title": "ReactionParticipant", + "type": "object" + }, + "ReadBasedAnalysisActivity": { + "additionalProperties": false, + "description": "", + "properties": { + "ended_at_time": { + "format": "date-time", + "pattern": "^([\\+-]?\\d{4}(?!\\d{2}\\b))((-?)((0[1-9]|1[0-2])(\\3([12]\\d|0[1-9]|3[01]))?|W([0-4]\\d|5[0-2])(-?[1-7])?|(00[1-9]|0[1-9]\\d|[12]\\d{2}|3([0-5]\\d|6[1-6])))([T\\s]((([01]\\d|2[0-3])((:?)[0-5]\\d)?|24\\:?00)([\\.,]\\d+(?!:))?)?(\\17[0-5]\\d([\\.,]\\d+)?)?([zZ]|([\\+-])([01]\\d|2[0-3]):?([0-5]\\d)?)?)?)?$", + "type": "string" + }, + "execution_resource": { + "description": "Example: NERSC-Cori", + "type": "string" + }, + "git_url": { + "description": "Example: https://github.com/microbiomedata/mg_annotation/releases/tag/0.1", + "type": "string" + }, + "has_input": { + "description": "An input to a process.", + "items": { + "type": "string" + }, + "type": "array" + }, + "has_output": { + "description": "An output biosample to a processing step", + "items": { + "type": "string" + }, + "type": "array" + }, + "id": { + "description": "A unique identifier for a thing. Must be either a CURIE shorthand for a URI or a complete URI", + "type": "string" + }, + "name": { + "description": "A human readable label for an entity", + "type": "string" + }, + "part_of": { + "description": "Links a resource to another resource that either logically or physically includes it.", + "items": { + "type": "string" + }, + "type": "array" + }, + "started_at_time": { + "format": "date-time", + "pattern": "^([\\+-]?\\d{4}(?!\\d{2}\\b))((-?)((0[1-9]|1[0-2])(\\3([12]\\d|0[1-9]|3[01]))?|W([0-4]\\d|5[0-2])(-?[1-7])?|(00[1-9]|0[1-9]\\d|[12]\\d{2}|3([0-5]\\d|6[1-6])))([T\\s]((([01]\\d|2[0-3])((:?)[0-5]\\d)?|24\\:?00)([\\.,]\\d+(?!:))?)?(\\17[0-5]\\d([\\.,]\\d+)?)?([zZ]|([\\+-])([01]\\d|2[0-3]):?([0-5]\\d)?)?)?)?$", + "type": "string" + }, + "type": { + "description": "An optional string that specifies the type object. This is used to allow for searches for different kinds of objects.", + "type": "string" + }, + "used": { + "type": "string" + }, + "was_associated_with": { + "description": "the agent/entity associated with the generation of the file", + "type": "string" + }, + "was_informed_by": { + "type": "string" + } + }, + "required": [ + "id", + "execution_resource", + "git_url", + "has_input", + "has_output", + "type", + "started_at_time", + "ended_at_time", + "was_informed_by" + ], + "title": "ReadBasedAnalysisActivity", + "type": "object" + }, + "ReadQCAnalysisActivity": { + "additionalProperties": false, + "description": "", + "properties": { + "ended_at_time": { + "format": "date-time", + "pattern": "^([\\+-]?\\d{4}(?!\\d{2}\\b))((-?)((0[1-9]|1[0-2])(\\3([12]\\d|0[1-9]|3[01]))?|W([0-4]\\d|5[0-2])(-?[1-7])?|(00[1-9]|0[1-9]\\d|[12]\\d{2}|3([0-5]\\d|6[1-6])))([T\\s]((([01]\\d|2[0-3])((:?)[0-5]\\d)?|24\\:?00)([\\.,]\\d+(?!:))?)?(\\17[0-5]\\d([\\.,]\\d+)?)?([zZ]|([\\+-])([01]\\d|2[0-3]):?([0-5]\\d)?)?)?)?$", + "type": "string" + }, + "execution_resource": { + "description": "Example: NERSC-Cori", + "type": "string" + }, + "git_url": { + "description": "Example: https://github.com/microbiomedata/mg_annotation/releases/tag/0.1", + "type": "string" + }, + "has_input": { + "description": "An input to a process.", + "items": { + "type": "string" + }, + "type": "array" + }, + "has_output": { + "description": "An output biosample to a processing step", + "items": { + "type": "string" + }, + "type": "array" + }, + "id": { + "description": "A unique identifier for a thing. Must be either a CURIE shorthand for a URI or a complete URI", + "type": "string" + }, + "input_base_count": { + "description": "The nucleotide base count number of input reads for QC analysis.", + "type": "number" + }, + "input_read_bases": { + "description": "TODO", + "type": "number" + }, + "input_read_count": { + "description": "The sequence count number of input reads for QC analysis.", + "type": "number" + }, + "name": { + "description": "A human readable label for an entity", + "type": "string" + }, + "output_base_count": { + "description": "After QC analysis nucleotide base count number.", + "type": "number" + }, + "output_read_bases": { + "description": "TODO", + "type": "number" + }, + "output_read_count": { + "description": "After QC analysis sequence count number. ", + "type": "number" + }, + "part_of": { + "description": "Links a resource to another resource that either logically or physically includes it.", + "items": { + "type": "string" + }, + "type": "array" + }, + "started_at_time": { + "format": "date-time", + "pattern": "^([\\+-]?\\d{4}(?!\\d{2}\\b))((-?)((0[1-9]|1[0-2])(\\3([12]\\d|0[1-9]|3[01]))?|W([0-4]\\d|5[0-2])(-?[1-7])?|(00[1-9]|0[1-9]\\d|[12]\\d{2}|3([0-5]\\d|6[1-6])))([T\\s]((([01]\\d|2[0-3])((:?)[0-5]\\d)?|24\\:?00)([\\.,]\\d+(?!:))?)?(\\17[0-5]\\d([\\.,]\\d+)?)?([zZ]|([\\+-])([01]\\d|2[0-3]):?([0-5]\\d)?)?)?)?$", + "type": "string" + }, + "type": { + "description": "An optional string that specifies the type object. This is used to allow for searches for different kinds of objects.", + "type": "string" + }, + "used": { + "type": "string" + }, + "was_associated_with": { + "description": "the agent/entity associated with the generation of the file", + "type": "string" + }, + "was_informed_by": { + "type": "string" + } + }, + "required": [ + "id", + "execution_resource", + "git_url", + "type", + "started_at_time", + "ended_at_time", + "was_informed_by", + "has_input", + "has_output" + ], + "title": "ReadQCAnalysisActivity", + "type": "object" + }, + "Study": { + "additionalProperties": false, + "description": "A study summarizes the overall goal of a research initiative and outlines the key objective of its underlying projects.", + "properties": { + "GOLD_study_identifiers": { + "description": "identifiers for corresponding project in GOLD", + "items": { + "type": "string" + }, + "pattern": "^GOLD:Gs[0-9]+$", + "type": "array" + }, + "INSDC_SRA_ENA_study_identifiers": { + "description": "identifiers for corresponding project in INSDC SRA / ENA", + "items": { + "type": "string" + }, + "pattern": "^insdc.sra:(E|D|S)RP[0-9]{6,}$", + "type": "array" + }, + "INSDC_bioproject_identifiers": { + "description": "identifiers for corresponding project in INSDC Bioproject", + "items": { + "type": "string" + }, + "pattern": "^bioproject:PRJ[DEN][A-Z][0-9]+$", + "type": "array" + }, + "MGnify_project_identifiers": { + "description": "identifiers for corresponding project in MGnify", + "items": { + "type": "string" + }, + "pattern": "^mgnify.proj:[A-Z]+[0-9]+$", + "type": "array" + }, + "abstract": { + "description": "The abstract of manuscript/grant associated with the entity; i.e., a summary of the resource.", + "type": "string" + }, + "alternative_descriptions": { + "description": "A list of alternative descriptions for the entity. The distinction between desciption and alternative descriptions is application-specific.", + "items": { + "type": "string" + }, + "type": "array" + }, + "alternative_identifiers": { + "description": "A list of alternative identifiers for the entity.", + "items": { + "type": "string" + }, + "type": "array" + }, + "alternative_names": { + "description": "A list of alternative names used to refer to the entity. The distinction between name and alternative names is application-specific.", + "items": { + "type": "string" + }, + "type": "array" + }, + "alternative_titles": { + "description": "A list of alternative titles for the entity. The distinction between title and alternative titles is application-specific.", + "items": { + "type": "string" + }, + "type": "array" + }, + "description": { + "description": "a human-readable description of a thing", + "type": "string" + }, + "doi": { + "$ref": "#/$defs/AttributeValue", + "description": "The dataset citation for this study" + }, + "ecosystem": { + "description": "An ecosystem is a combination of a physical environment (abiotic factors) and all the organisms (biotic factors) that interact with this environment. Ecosystem is in position 1/5 in a GOLD path.", + "type": "string" + }, + "ecosystem_category": { + "description": "Ecosystem categories represent divisions within the ecosystem based on specific characteristics of the environment from where an organism or sample is isolated. Ecosystem category is in position 2/5 in a GOLD path.", + "type": "string" + }, + "ecosystem_subtype": { + "description": "Ecosystem subtypes represent further subdivision of Ecosystem types into more distinct subtypes. Ecosystem subtype is in position 4/5 in a GOLD path.", + "type": "string" + }, + "ecosystem_type": { + "description": "Ecosystem types represent things having common characteristics within the Ecosystem Category. These common characteristics based grouping is still broad but specific to the characteristics of a given environment. Ecosystem type is in position 3/5 in a GOLD path.", + "type": "string" + }, + "ess_dive_datasets": { + "description": "List of ESS-DIVE dataset DOIs", + "items": { + "type": "string" + }, + "type": "array" + }, + "funding_sources": { + "items": { + "type": "string" + }, + "type": "array" + }, + "has_credit_associations": { + "description": "This slot links a study to a credit association. The credit association will be linked to a person value and to a CRediT Contributor Roles term. Overall semantics: person should get credit X for their participation in the study", + "items": { + "$ref": "#/$defs/CreditAssociation" + }, + "type": "array" + }, + "id": { + "description": "A unique identifier for a thing. Must be either a CURIE shorthand for a URI or a complete URI", + "type": "string" + }, + "name": { + "description": "A human readable label for an entity", + "type": "string" + }, + "objective": { + "description": "The scientific objectives associated with the entity. It SHOULD correspond to scientific norms for objectives field in a structured abstract.", + "type": "string" + }, + "principal_investigator": { + "$ref": "#/$defs/PersonValue", + "description": "Principal Investigator who led the study and/or generated the dataset." + }, + "publications": { + "description": "A list of publications that are assocatiated with the entity. The publicatons SHOULD be given using an identifier, such as a DOI or Pubmed ID, if possible.", + "items": { + "type": "string" + }, + "type": "array" + }, + "relevant_protocols": { + "items": { + "type": "string" + }, + "type": "array" + }, + "specific_ecosystem": { + "description": "Specific ecosystems represent specific features of the environment like aphotic zone in an ocean or gastric mucosa within a host digestive system. Specific ecosystem is in position 5/5 in a GOLD path.", + "type": "string" + }, + "study_image": { + "description": "Links a study to one or more images.", + "items": { + "$ref": "#/$defs/ImageValue" + }, + "type": "array" + }, + "title": { + "description": "A name given to the entity that differs from the name/label programatically assigned to it. For example, when extracting study information for GOLD, the GOLD system has assigned a name/label. However, for display purposes, we may also wish the capture the title of the proposal that was used to fund the study.", + "type": "string" + }, + "type": { + "description": "An optional string that specifies the type object. This is used to allow for searches for different kinds of objects.", + "type": "string" + }, + "websites": { + "description": "A list of websites that are assocatiated with the entity.", + "items": { + "type": "string" + }, + "type": "array" + } + }, + "required": [ + "id" + ], + "title": "Study", + "type": "object" + }, + "TextValue": { + "additionalProperties": false, + "description": "A basic string value", + "properties": { + "has_raw_value": { + "description": "The value that was specified for an annotation in raw form, i.e. a string. E.g. \"2 cm\" or \"2-4 cm\"", + "type": "string" + }, + "language": { + "description": "Should use ISO 639-1 code e.g. \"en\", \"fr\"", + "type": "string" + }, + "type": { + "description": "An optional string that specified the type of object.", + "type": "string" + }, + "was_generated_by": { + "type": "string" + } + }, + "required": [], + "title": "TextValue", + "type": "object" + }, + "TimestampValue": { + "additionalProperties": false, + "description": "A value that is a timestamp. The range should be ISO-8601", + "properties": { + "has_raw_value": { + "description": "The value that was specified for an annotation in raw form, i.e. a string. E.g. \"2 cm\" or \"2-4 cm\"", + "type": "string" + }, + "type": { + "description": "An optional string that specified the type of object.", + "type": "string" + }, + "was_generated_by": { + "type": "string" + } + }, + "required": [], + "title": "TimestampValue", + "type": "object" + }, + "UrlValue": { + "additionalProperties": false, + "description": "A value that is a string that conforms to URL syntax", + "properties": { + "has_raw_value": { + "description": "The value that was specified for an annotation in raw form, i.e. a string. E.g. \"2 cm\" or \"2-4 cm\"", + "type": "string" + }, + "type": { + "description": "An optional string that specified the type of object.", + "type": "string" + }, + "was_generated_by": { + "type": "string" + } + }, + "required": [], + "title": "UrlValue", + "type": "object" + }, + "WorkflowExecutionActivity": { + "additionalProperties": false, + "description": "Represents an instance of an execution of a particular workflow", + "properties": { + "ended_at_time": { + "format": "date-time", + "pattern": "^([\\+-]?\\d{4}(?!\\d{2}\\b))((-?)((0[1-9]|1[0-2])(\\3([12]\\d|0[1-9]|3[01]))?|W([0-4]\\d|5[0-2])(-?[1-7])?|(00[1-9]|0[1-9]\\d|[12]\\d{2}|3([0-5]\\d|6[1-6])))([T\\s]((([01]\\d|2[0-3])((:?)[0-5]\\d)?|24\\:?00)([\\.,]\\d+(?!:))?)?(\\17[0-5]\\d([\\.,]\\d+)?)?([zZ]|([\\+-])([01]\\d|2[0-3]):?([0-5]\\d)?)?)?)?$", + "type": "string" + }, + "execution_resource": { + "description": "Example: NERSC-Cori", + "type": "string" + }, + "git_url": { + "description": "Example: https://github.com/microbiomedata/mg_annotation/releases/tag/0.1", + "type": "string" + }, + "has_input": { + "description": "An input to a process.", + "items": { + "type": "string" + }, + "type": "array" + }, + "has_output": { + "description": "An output biosample to a processing step", + "items": { + "type": "string" + }, + "type": "array" + }, + "id": { + "description": "A unique identifier for a thing. Must be either a CURIE shorthand for a URI or a complete URI", + "type": "string" + }, + "name": { + "description": "A human readable label for an entity", + "type": "string" + }, + "part_of": { + "description": "Links a resource to another resource that either logically or physically includes it.", + "items": { + "type": "string" + }, + "type": "array" + }, + "started_at_time": { + "format": "date-time", + "pattern": "^([\\+-]?\\d{4}(?!\\d{2}\\b))((-?)((0[1-9]|1[0-2])(\\3([12]\\d|0[1-9]|3[01]))?|W([0-4]\\d|5[0-2])(-?[1-7])?|(00[1-9]|0[1-9]\\d|[12]\\d{2}|3([0-5]\\d|6[1-6])))([T\\s]((([01]\\d|2[0-3])((:?)[0-5]\\d)?|24\\:?00)([\\.,]\\d+(?!:))?)?(\\17[0-5]\\d([\\.,]\\d+)?)?([zZ]|([\\+-])([01]\\d|2[0-3]):?([0-5]\\d)?)?)?)?$", + "type": "string" + }, + "type": { + "description": "An optional string that specifies the type object. This is used to allow for searches for different kinds of objects.", + "type": "string" + }, + "used": { + "type": "string" + }, + "was_associated_with": { + "description": "the agent/entity associated with the generation of the file", + "type": "string" + }, + "was_informed_by": { + "type": "string" + } + }, + "required": [ + "id", + "execution_resource", + "git_url", + "has_input", + "has_output", + "type", + "started_at_time", + "ended_at_time", + "was_informed_by" + ], + "title": "WorkflowExecutionActivity", + "type": "object" + } + }, + "$id": "https://microbiomedata/schema", + "$schema": "http://json-schema.org/draft-07/schema#", + "additionalProperties": false, + "metamodel_version": "1.7.0", + "properties": { + "activity_set": { + "description": "This property links a database object to the set of workflow activities.", + "items": { + "$ref": "#/$defs/WorkflowExecutionActivity" + }, + "type": "array" + }, + "biosample_set": { + "description": "This property links a database object to the set of samples within it.", + "items": { + "$ref": "#/$defs/Biosample" + }, + "type": "array" + }, + "data_object_set": { + "description": "This property links a database object to the set of data objects within it.", + "items": { + "$ref": "#/$defs/DataObject" + }, + "type": "array" + }, + "date_created": { + "description": "TODO", + "type": "string" + }, + "etl_software_version": { + "description": "TODO", + "type": "string" + }, + "functional_annotation_set": { + "description": "This property links a database object to the set of all functional annotations", + "items": { + "$ref": "#/$defs/FunctionalAnnotation" + }, + "type": "array" + }, + "genome_feature_set": { + "description": "This property links a database object to the set of all features", + "items": { + "$ref": "#/$defs/GenomeFeature" + }, + "type": "array" + }, + "mags_activity_set": { + "description": "This property links a database object to the set of MAGs analysis activities.", + "items": { + "$ref": "#/$defs/MAGsAnalysisActivity" + }, + "type": "array" + }, + "metabolomics_analysis_activity_set": { + "description": "This property links a database object to the set of metabolomics analysis activities.", + "items": { + "$ref": "#/$defs/MetabolomicsAnalysisActivity" + }, + "type": "array" + }, + "metagenome_annotation_activity_set": { + "description": "This property links a database object to the set of metagenome annotation activities.", + "items": { + "$ref": "#/$defs/MetagenomeAnnotationActivity" + }, + "type": "array" + }, + "metagenome_assembly_set": { + "description": "This property links a database object to the set of metagenome assembly activities.", + "items": { + "$ref": "#/$defs/MetagenomeAssembly" + }, + "type": "array" + }, + "metaproteomics_analysis_activity_set": { + "description": "This property links a database object to the set of metaproteomics analysis activities.", + "items": { + "$ref": "#/$defs/MetaproteomicsAnalysisActivity" + }, + "type": "array" + }, + "metatranscriptome_activity_set": { + "description": "This property links a database object to the set of metatranscriptome analysis activities.", + "items": { + "$ref": "#/$defs/MetatranscriptomeActivity" + }, + "type": "array" + }, + "nmdc_schema_version": { + "description": "TODO", + "type": "string" + }, + "nom_analysis_activity_set": { + "description": "This property links a database object to the set of natural organic matter (NOM) analysis activities.", + "items": { + "$ref": "#/$defs/NomAnalysisActivity" + }, + "type": "array" + }, + "omics_processing_set": { + "description": "This property links a database object to the set of omics processings within it.", + "items": { + "$ref": "#/$defs/OmicsProcessing" + }, + "type": "array" + }, + "read_QC_analysis_activity_set": { + "description": "This property links a database object to the set of read QC analysis activities.", + "items": { + "$ref": "#/$defs/ReadQCAnalysisActivity" + }, + "type": "array" + }, + "read_based_analysis_activity_set": { + "description": "This property links a database object to the set of read based analysis activities.\n ", + "items": { + "$ref": "#/$defs/ReadBasedAnalysisActivity" + }, + "type": "array" + }, + "study_set": { + "description": "This property links a database object to the set of studies within it.", + "items": { + "$ref": "#/$defs/Study" + }, + "type": "array" + } + }, + "required": [], + "title": "NMDC", + "type": "object", + "version": "2.1.0" +} + diff --git a/nmdc_runtime/site/ops.py b/nmdc_runtime/site/ops.py index 78534974..71397957 100644 --- a/nmdc_runtime/site/ops.py +++ b/nmdc_runtime/site/ops.py @@ -48,7 +48,7 @@ from nmdc_runtime.site.resources import RuntimeApiSiteClient from nmdc_runtime.site.util import collection_indexed_on_id, run_and_log from nmdc_runtime.util import drs_object_in_for, pluralize, put_object -from nmdc_schema.nmdc_data import get_nmdc_jsonschema_dict +from nmdc_runtime.util import get_nmdc_jsonschema_dict from pydantic import BaseModel from pymongo.database import Database as MongoDatabase from starlette import status diff --git a/nmdc_runtime/site/validation/util.py b/nmdc_runtime/site/validation/util.py index caff486f..fd5d8bcc 100644 --- a/nmdc_runtime/site/validation/util.py +++ b/nmdc_runtime/site/validation/util.py @@ -1,6 +1,6 @@ from dagster import op, AssetMaterialization, AssetKey, EventMetadata from jsonschema import Draft7Validator -from nmdc_schema.nmdc_data import get_nmdc_jsonschema_dict +from nmdc_runtime.util import get_nmdc_jsonschema_dict from toolz import dissoc from nmdc_runtime.site.resources import mongo_resource diff --git a/nmdc_runtime/test.Dockerfile b/nmdc_runtime/test.Dockerfile index bec48630..3e12ecdb 100644 --- a/nmdc_runtime/test.Dockerfile +++ b/nmdc_runtime/test.Dockerfile @@ -28,7 +28,6 @@ RUN export DEBIAN_FRONTEND=noninteractive && \ # Install requirements WORKDIR /code COPY ./requirements/main.txt /code/requirements.txt - RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt # Add repository code diff --git a/nmdc_runtime/util.py b/nmdc_runtime/util.py index 5440087b..6e8b7bd8 100644 --- a/nmdc_runtime/util.py +++ b/nmdc_runtime/util.py @@ -3,17 +3,29 @@ import os from collections.abc import Iterable from datetime import datetime, timezone +from functools import lru_cache from pathlib import Path import fastjsonschema import requests from frozendict import frozendict -from nmdc_schema.nmdc_data import get_nmdc_jsonschema_dict from toolz import merge, pluck from nmdc_runtime.api.core.util import sha256hash_from_file from nmdc_runtime.api.models.object import DrsObjectIn + +@lru_cache +def get_nmdc_jsonschema_dict(): + """Get the JSON Schema in use by the runtime. + + Currently: + https://raw.githubusercontent.com/microbiomedata/nmdc-schema/v3.2.0/nmdc_schema/nmdc.schema.json + """ + with (Path(__file__).parent / "nmdc.schema.json").open() as f: + return json.load(f) + + nmdc_jsonschema = get_nmdc_jsonschema_dict() nmdc_jsonschema_validate = fastjsonschema.compile(nmdc_jsonschema) diff --git a/requirements/dev.txt b/requirements/dev.txt index b22c26e6..4f056338 100644 --- a/requirements/dev.txt +++ b/requirements/dev.txt @@ -1,5 +1,5 @@ # -# This file is autogenerated by pip-compile with Python 3.9 +# This file is autogenerated by pip-compile with Python 3.10 # by the following command: # # pip-compile --generate-hashes --output-file=requirements/dev.txt requirements/dev.in @@ -26,9 +26,9 @@ black==22.12.0 \ # via # -c requirements/main.txt # -r requirements/dev.in -bleach==5.0.1 \ - --hash=sha256:085f7f33c15bd408dd9b17a4ad77c577db66d76203e5984b1bd59baeee948b2a \ - --hash=sha256:0d03255c47eb9bd2f26aa9bb7f2107732e7e8fe195ca2f64709fcf3b0a4a085c +bleach==6.0.0 \ + --hash=sha256:1a1a85c1595e07d8db14c5f09f09e6433502c51c595970edc090551f0db99414 \ + --hash=sha256:33c16e3353dbd13028ab4799a0f89a83f113405c766e9c122df8a06f5b85b3f4 # via # -c requirements/main.txt # readme-renderer @@ -38,74 +38,6 @@ certifi==2022.12.7 \ # via # -c requirements/main.txt # requests -cffi==1.15.1 \ - --hash=sha256:00a9ed42e88df81ffae7a8ab6d9356b371399b91dbdf0c3cb1e84c03a13aceb5 \ - --hash=sha256:03425bdae262c76aad70202debd780501fabeaca237cdfddc008987c0e0f59ef \ - --hash=sha256:04ed324bda3cda42b9b695d51bb7d54b680b9719cfab04227cdd1e04e5de3104 \ - --hash=sha256:0e2642fe3142e4cc4af0799748233ad6da94c62a8bec3a6648bf8ee68b1c7426 \ - --hash=sha256:173379135477dc8cac4bc58f45db08ab45d228b3363adb7af79436135d028405 \ - --hash=sha256:198caafb44239b60e252492445da556afafc7d1e3ab7a1fb3f0584ef6d742375 \ - --hash=sha256:1e74c6b51a9ed6589199c787bf5f9875612ca4a8a0785fb2d4a84429badaf22a \ - --hash=sha256:2012c72d854c2d03e45d06ae57f40d78e5770d252f195b93f581acf3ba44496e \ - --hash=sha256:21157295583fe8943475029ed5abdcf71eb3911894724e360acff1d61c1d54bc \ - --hash=sha256:2470043b93ff09bf8fb1d46d1cb756ce6132c54826661a32d4e4d132e1977adf \ - --hash=sha256:285d29981935eb726a4399badae8f0ffdff4f5050eaa6d0cfc3f64b857b77185 \ - --hash=sha256:30d78fbc8ebf9c92c9b7823ee18eb92f2e6ef79b45ac84db507f52fbe3ec4497 \ - --hash=sha256:320dab6e7cb2eacdf0e658569d2575c4dad258c0fcc794f46215e1e39f90f2c3 \ - --hash=sha256:33ab79603146aace82c2427da5ca6e58f2b3f2fb5da893ceac0c42218a40be35 \ - --hash=sha256:3548db281cd7d2561c9ad9984681c95f7b0e38881201e157833a2342c30d5e8c \ - --hash=sha256:3799aecf2e17cf585d977b780ce79ff0dc9b78d799fc694221ce814c2c19db83 \ - --hash=sha256:39d39875251ca8f612b6f33e6b1195af86d1b3e60086068be9cc053aa4376e21 \ - --hash=sha256:3b926aa83d1edb5aa5b427b4053dc420ec295a08e40911296b9eb1b6170f6cca \ - --hash=sha256:3bcde07039e586f91b45c88f8583ea7cf7a0770df3a1649627bf598332cb6984 \ - --hash=sha256:3d08afd128ddaa624a48cf2b859afef385b720bb4b43df214f85616922e6a5ac \ - --hash=sha256:3eb6971dcff08619f8d91607cfc726518b6fa2a9eba42856be181c6d0d9515fd \ - --hash=sha256:40f4774f5a9d4f5e344f31a32b5096977b5d48560c5592e2f3d2c4374bd543ee \ - --hash=sha256:4289fc34b2f5316fbb762d75362931e351941fa95fa18789191b33fc4cf9504a \ - --hash=sha256:470c103ae716238bbe698d67ad020e1db9d9dba34fa5a899b5e21577e6d52ed2 \ - --hash=sha256:4f2c9f67e9821cad2e5f480bc8d83b8742896f1242dba247911072d4fa94c192 \ - --hash=sha256:50a74364d85fd319352182ef59c5c790484a336f6db772c1a9231f1c3ed0cbd7 \ - --hash=sha256:54a2db7b78338edd780e7ef7f9f6c442500fb0d41a5a4ea24fff1c929d5af585 \ - --hash=sha256:5635bd9cb9731e6d4a1132a498dd34f764034a8ce60cef4f5319c0541159392f \ - --hash=sha256:59c0b02d0a6c384d453fece7566d1c7e6b7bae4fc5874ef2ef46d56776d61c9e \ - --hash=sha256:5d598b938678ebf3c67377cdd45e09d431369c3b1a5b331058c338e201f12b27 \ - --hash=sha256:5df2768244d19ab7f60546d0c7c63ce1581f7af8b5de3eb3004b9b6fc8a9f84b \ - --hash=sha256:5ef34d190326c3b1f822a5b7a45f6c4535e2f47ed06fec77d3d799c450b2651e \ - --hash=sha256:6975a3fac6bc83c4a65c9f9fcab9e47019a11d3d2cf7f3c0d03431bf145a941e \ - --hash=sha256:6c9a799e985904922a4d207a94eae35c78ebae90e128f0c4e521ce339396be9d \ - --hash=sha256:70df4e3b545a17496c9b3f41f5115e69a4f2e77e94e1d2a8e1070bc0c38c8a3c \ - --hash=sha256:7473e861101c9e72452f9bf8acb984947aa1661a7704553a9f6e4baa5ba64415 \ - --hash=sha256:8102eaf27e1e448db915d08afa8b41d6c7ca7a04b7d73af6514df10a3e74bd82 \ - --hash=sha256:87c450779d0914f2861b8526e035c5e6da0a3199d8f1add1a665e1cbc6fc6d02 \ - --hash=sha256:8b7ee99e510d7b66cdb6c593f21c043c248537a32e0bedf02e01e9553a172314 \ - --hash=sha256:91fc98adde3d7881af9b59ed0294046f3806221863722ba7d8d120c575314325 \ - --hash=sha256:94411f22c3985acaec6f83c6df553f2dbe17b698cc7f8ae751ff2237d96b9e3c \ - --hash=sha256:98d85c6a2bef81588d9227dde12db8a7f47f639f4a17c9ae08e773aa9c697bf3 \ - --hash=sha256:9ad5db27f9cabae298d151c85cf2bad1d359a1b9c686a275df03385758e2f914 \ - --hash=sha256:a0b71b1b8fbf2b96e41c4d990244165e2c9be83d54962a9a1d118fd8657d2045 \ - --hash=sha256:a0f100c8912c114ff53e1202d0078b425bee3649ae34d7b070e9697f93c5d52d \ - --hash=sha256:a591fe9e525846e4d154205572a029f653ada1a78b93697f3b5a8f1f2bc055b9 \ - --hash=sha256:a5c84c68147988265e60416b57fc83425a78058853509c1b0629c180094904a5 \ - --hash=sha256:a66d3508133af6e8548451b25058d5812812ec3798c886bf38ed24a98216fab2 \ - --hash=sha256:a8c4917bd7ad33e8eb21e9a5bbba979b49d9a97acb3a803092cbc1133e20343c \ - --hash=sha256:b3bbeb01c2b273cca1e1e0c5df57f12dce9a4dd331b4fa1635b8bec26350bde3 \ - --hash=sha256:cba9d6b9a7d64d4bd46167096fc9d2f835e25d7e4c121fb2ddfc6528fb0413b2 \ - --hash=sha256:cc4d65aeeaa04136a12677d3dd0b1c0c94dc43abac5860ab33cceb42b801c1e8 \ - --hash=sha256:ce4bcc037df4fc5e3d184794f27bdaab018943698f4ca31630bc7f84a7b69c6d \ - --hash=sha256:cec7d9412a9102bdc577382c3929b337320c4c4c4849f2c5cdd14d7368c5562d \ - --hash=sha256:d400bfb9a37b1351253cb402671cea7e89bdecc294e8016a707f6d1d8ac934f9 \ - --hash=sha256:d61f4695e6c866a23a21acab0509af1cdfd2c013cf256bbf5b6b5e2695827162 \ - --hash=sha256:db0fbb9c62743ce59a9ff687eb5f4afbe77e5e8403d6697f7446e5f609976f76 \ - --hash=sha256:dd86c085fae2efd48ac91dd7ccffcfc0571387fe1193d33b6394db7ef31fe2a4 \ - --hash=sha256:e00b098126fd45523dd056d2efba6c5a63b71ffe9f2bbe1a4fe1716e1d0c331e \ - --hash=sha256:e229a521186c75c8ad9490854fd8bbdd9a0c9aa3a524326b55be83b54d4e0ad9 \ - --hash=sha256:e263d77ee3dd201c3a142934a086a4450861778baaeeb45db4591ef65550b0a6 \ - --hash=sha256:ed9cb427ba5504c1dc15ede7d516b84757c3e3d7868ccc85121d9310d27eed0b \ - --hash=sha256:fa6693661a4c91757f4412306191b6dc88c1703f780c8234035eac011922bc01 \ - --hash=sha256:fcd131dd944808b5bdb38e6f5b53013c5aa4f334c5cad0c72742f6eba4b73db0 - # via - # -c requirements/main.txt - # cryptography charset-normalizer==3.0.1 \ --hash=sha256:00d3ffdaafe92a5dc603cb9bd5111aaa36dfa187c8285c543be562e61b755f6b \ --hash=sha256:024e606be3ed92216e2b6952ed859d86b4cfa52cd5bc5f050e7dc28f9b43ec42 \ @@ -204,88 +136,61 @@ click==8.1.3 \ # via # -c requirements/main.txt # black -coverage[toml]==7.0.5 \ - --hash=sha256:051afcbd6d2ac39298d62d340f94dbb6a1f31de06dfaf6fcef7b759dd3860c45 \ - --hash=sha256:0a1890fca2962c4f1ad16551d660b46ea77291fba2cc21c024cd527b9d9c8809 \ - --hash=sha256:0ee30375b409d9a7ea0f30c50645d436b6f5dfee254edffd27e45a980ad2c7f4 \ - --hash=sha256:13250b1f0bd023e0c9f11838bdeb60214dd5b6aaf8e8d2f110c7e232a1bff83b \ - --hash=sha256:17e01dd8666c445025c29684d4aabf5a90dc6ef1ab25328aa52bedaa95b65ad7 \ - --hash=sha256:19245c249aa711d954623d94f23cc94c0fd65865661f20b7781210cb97c471c0 \ - --hash=sha256:1caed2367b32cc80a2b7f58a9f46658218a19c6cfe5bc234021966dc3daa01f0 \ - --hash=sha256:1f66862d3a41674ebd8d1a7b6f5387fe5ce353f8719040a986551a545d7d83ea \ - --hash=sha256:220e3fa77d14c8a507b2d951e463b57a1f7810a6443a26f9b7591ef39047b1b2 \ - --hash=sha256:276f4cd0001cd83b00817c8db76730938b1ee40f4993b6a905f40a7278103b3a \ - --hash=sha256:29de916ba1099ba2aab76aca101580006adfac5646de9b7c010a0f13867cba45 \ - --hash=sha256:2a7f23bbaeb2a87f90f607730b45564076d870f1fb07b9318d0c21f36871932b \ - --hash=sha256:2c407b1950b2d2ffa091f4e225ca19a66a9bd81222f27c56bd12658fc5ca1209 \ - --hash=sha256:30b5fec1d34cc932c1bc04017b538ce16bf84e239378b8f75220478645d11fca \ - --hash=sha256:3c2155943896ac78b9b0fd910fb381186d0c345911f5333ee46ac44c8f0e43ab \ - --hash=sha256:411d4ff9d041be08fdfc02adf62e89c735b9468f6d8f6427f8a14b6bb0a85095 \ - --hash=sha256:436e103950d05b7d7f55e39beeb4d5be298ca3e119e0589c0227e6d0b01ee8c7 \ - --hash=sha256:49640bda9bda35b057b0e65b7c43ba706fa2335c9a9896652aebe0fa399e80e6 \ - --hash=sha256:4a950f83fd3f9bca23b77442f3a2b2ea4ac900944d8af9993743774c4fdc57af \ - --hash=sha256:50a6adc2be8edd7ee67d1abc3cd20678987c7b9d79cd265de55941e3d0d56499 \ - --hash=sha256:52ab14b9e09ce052237dfe12d6892dd39b0401690856bcfe75d5baba4bfe2831 \ - --hash=sha256:54f7e9705e14b2c9f6abdeb127c390f679f6dbe64ba732788d3015f7f76ef637 \ - --hash=sha256:66e50680e888840c0995f2ad766e726ce71ca682e3c5f4eee82272c7671d38a2 \ - --hash=sha256:790e4433962c9f454e213b21b0fd4b42310ade9c077e8edcb5113db0818450cb \ - --hash=sha256:7a38362528a9115a4e276e65eeabf67dcfaf57698e17ae388599568a78dcb029 \ - --hash=sha256:7b05ed4b35bf6ee790832f68932baf1f00caa32283d66cc4d455c9e9d115aafc \ - --hash=sha256:7e109f1c9a3ece676597831874126555997c48f62bddbcace6ed17be3e372de8 \ - --hash=sha256:949844af60ee96a376aac1ded2a27e134b8c8d35cc006a52903fc06c24a3296f \ - --hash=sha256:95304068686545aa368b35dfda1cdfbbdbe2f6fe43de4a2e9baa8ebd71be46e2 \ - --hash=sha256:9e662e6fc4f513b79da5d10a23edd2b87685815b337b1a30cd11307a6679148d \ - --hash=sha256:a9fed35ca8c6e946e877893bbac022e8563b94404a605af1d1e6accc7eb73289 \ - --hash=sha256:b69522b168a6b64edf0c33ba53eac491c0a8f5cc94fa4337f9c6f4c8f2f5296c \ - --hash=sha256:b78729038abea6a5df0d2708dce21e82073463b2d79d10884d7d591e0f385ded \ - --hash=sha256:b8c56bec53d6e3154eaff6ea941226e7bd7cc0d99f9b3756c2520fc7a94e6d96 \ - --hash=sha256:b9727ac4f5cf2cbf87880a63870b5b9730a8ae3a4a360241a0fdaa2f71240ff0 \ - --hash=sha256:ba3027deb7abf02859aca49c865ece538aee56dcb4871b4cced23ba4d5088904 \ - --hash=sha256:be9fcf32c010da0ba40bf4ee01889d6c737658f4ddff160bd7eb9cac8f094b21 \ - --hash=sha256:c18d47f314b950dbf24a41787ced1474e01ca816011925976d90a88b27c22b89 \ - --hash=sha256:c76a3075e96b9c9ff00df8b5f7f560f5634dffd1658bafb79eb2682867e94f78 \ - --hash=sha256:cbfcba14a3225b055a28b3199c3d81cd0ab37d2353ffd7f6fd64844cebab31ad \ - --hash=sha256:d254666d29540a72d17cc0175746cfb03d5123db33e67d1020e42dae611dc196 \ - --hash=sha256:d66187792bfe56f8c18ba986a0e4ae44856b1c645336bd2c776e3386da91e1dd \ - --hash=sha256:d8d04e755934195bdc1db45ba9e040b8d20d046d04d6d77e71b3b34a8cc002d0 \ - --hash=sha256:d8f3e2e0a1d6777e58e834fd5a04657f66affa615dae61dd67c35d1568c38882 \ - --hash=sha256:e057e74e53db78122a3979f908973e171909a58ac20df05c33998d52e6d35757 \ - --hash=sha256:e4ce984133b888cc3a46867c8b4372c7dee9cee300335e2925e197bcd45b9e16 \ - --hash=sha256:ea76dbcad0b7b0deb265d8c36e0801abcddf6cc1395940a24e3595288b405ca0 \ - --hash=sha256:ecb0f73954892f98611e183f50acdc9e21a4653f294dfbe079da73c6378a6f47 \ - --hash=sha256:ef14d75d86f104f03dea66c13188487151760ef25dd6b2dbd541885185f05f40 \ - --hash=sha256:f26648e1b3b03b6022b48a9b910d0ae209e2d51f50441db5dce5b530fad6d9b1 \ - --hash=sha256:f67472c09a0c7486e27f3275f617c964d25e35727af952869dd496b9b5b7f6a3 +coverage[toml]==7.1.0 \ + --hash=sha256:04481245ef966fbd24ae9b9e537ce899ae584d521dfbe78f89cad003c38ca2ab \ + --hash=sha256:0c45948f613d5d18c9ec5eaa203ce06a653334cf1bd47c783a12d0dd4fd9c851 \ + --hash=sha256:10188fe543560ec4874f974b5305cd1a8bdcfa885ee00ea3a03733464c4ca265 \ + --hash=sha256:218fe982371ac7387304153ecd51205f14e9d731b34fb0568181abaf7b443ba0 \ + --hash=sha256:29571503c37f2ef2138a306d23e7270687c0efb9cab4bd8038d609b5c2393a3a \ + --hash=sha256:2a60d6513781e87047c3e630b33b4d1e89f39836dac6e069ffee28c4786715f5 \ + --hash=sha256:2bf1d5f2084c3932b56b962a683074a3692bce7cabd3aa023c987a2a8e7612f6 \ + --hash=sha256:3164d31078fa9efe406e198aecd2a02d32a62fecbdef74f76dad6a46c7e48311 \ + --hash=sha256:32df215215f3af2c1617a55dbdfb403b772d463d54d219985ac7cd3bf124cada \ + --hash=sha256:33d1ae9d4079e05ac4cc1ef9e20c648f5afabf1a92adfaf2ccf509c50b85717f \ + --hash=sha256:33ff26d0f6cc3ca8de13d14fde1ff8efe1456b53e3f0273e63cc8b3c84a063d8 \ + --hash=sha256:38da2db80cc505a611938d8624801158e409928b136c8916cd2e203970dde4dc \ + --hash=sha256:3b155caf3760408d1cb903b21e6a97ad4e2bdad43cbc265e3ce0afb8e0057e73 \ + --hash=sha256:3b946bbcd5a8231383450b195cfb58cb01cbe7f8949f5758566b881df4b33baf \ + --hash=sha256:3baf5f126f30781b5e93dbefcc8271cb2491647f8283f20ac54d12161dff080e \ + --hash=sha256:4b14d5e09c656de5038a3f9bfe5228f53439282abcab87317c9f7f1acb280352 \ + --hash=sha256:51b236e764840a6df0661b67e50697aaa0e7d4124ca95e5058fa3d7cbc240b7c \ + --hash=sha256:63ffd21aa133ff48c4dff7adcc46b7ec8b565491bfc371212122dd999812ea1c \ + --hash=sha256:6a43c7823cd7427b4ed763aa7fb63901ca8288591323b58c9cd6ec31ad910f3c \ + --hash=sha256:755e89e32376c850f826c425ece2c35a4fc266c081490eb0a841e7c1cb0d3bda \ + --hash=sha256:7a726d742816cb3a8973c8c9a97539c734b3a309345236cd533c4883dda05b8d \ + --hash=sha256:7c7c0d0827e853315c9bbd43c1162c006dd808dbbe297db7ae66cd17b07830f0 \ + --hash=sha256:7ed681b0f8e8bcbbffa58ba26fcf5dbc8f79e7997595bf071ed5430d8c08d6f3 \ + --hash=sha256:7ee5c9bb51695f80878faaa5598040dd6c9e172ddcf490382e8aedb8ec3fec8d \ + --hash=sha256:8361be1c2c073919500b6601220a6f2f98ea0b6d2fec5014c1d9cfa23dd07038 \ + --hash=sha256:8ae125d1134bf236acba8b83e74c603d1b30e207266121e76484562bc816344c \ + --hash=sha256:9817733f0d3ea91bea80de0f79ef971ae94f81ca52f9b66500c6a2fea8e4b4f8 \ + --hash=sha256:98b85dd86514d889a2e3dd22ab3c18c9d0019e696478391d86708b805f4ea0fa \ + --hash=sha256:9ccb092c9ede70b2517a57382a601619d20981f56f440eae7e4d7eaafd1d1d09 \ + --hash=sha256:9d58885215094ab4a86a6aef044e42994a2bd76a446dc59b352622655ba6621b \ + --hash=sha256:b643cb30821e7570c0aaf54feaf0bfb630b79059f85741843e9dc23f33aaca2c \ + --hash=sha256:bc7c85a150501286f8b56bd8ed3aa4093f4b88fb68c0843d21ff9656f0009d6a \ + --hash=sha256:beeb129cacea34490ffd4d6153af70509aa3cda20fdda2ea1a2be870dfec8d52 \ + --hash=sha256:c31b75ae466c053a98bf26843563b3b3517b8f37da4d47b1c582fdc703112bc3 \ + --hash=sha256:c4e4881fa9e9667afcc742f0c244d9364d197490fbc91d12ac3b5de0bf2df146 \ + --hash=sha256:c5b15ed7644ae4bee0ecf74fee95808dcc34ba6ace87e8dfbf5cb0dc20eab45a \ + --hash=sha256:d12d076582507ea460ea2a89a8c85cb558f83406c8a41dd641d7be9a32e1274f \ + --hash=sha256:d248cd4a92065a4d4543b8331660121b31c4148dd00a691bfb7a5cdc7483cfa4 \ + --hash=sha256:d47dd659a4ee952e90dc56c97d78132573dc5c7b09d61b416a9deef4ebe01a0c \ + --hash=sha256:d4a5a5879a939cb84959d86869132b00176197ca561c664fc21478c1eee60d75 \ + --hash=sha256:da9b41d4539eefd408c46725fb76ecba3a50a3367cafb7dea5f250d0653c1040 \ + --hash=sha256:db61a79c07331e88b9a9974815c075fbd812bc9dbc4dc44b366b5368a2936063 \ + --hash=sha256:ddb726cb861c3117a553f940372a495fe1078249ff5f8a5478c0576c7be12050 \ + --hash=sha256:ded59300d6330be27bc6cf0b74b89ada58069ced87c48eaf9344e5e84b0072f7 \ + --hash=sha256:e2617759031dae1bf183c16cef8fcfb3de7617f394c813fa5e8e46e9b82d4222 \ + --hash=sha256:e5cdbb5cafcedea04924568d990e20ce7f1945a1dd54b560f879ee2d57226912 \ + --hash=sha256:ec8e767f13be637d056f7e07e61d089e555f719b387a7070154ad80a0ff31801 \ + --hash=sha256:ef382417db92ba23dfb5864a3fc9be27ea4894e86620d342a116b243ade5d35d \ + --hash=sha256:f2cba5c6db29ce991029b5e4ac51eb36774458f0a3b8d3137241b32d1bb91f06 \ + --hash=sha256:f5b4198d85a3755d27e64c52f8c95d6333119e49fd001ae5798dac872c95e0f8 \ + --hash=sha256:ffeeb38ee4a80a30a6877c5c4c359e5498eec095878f1581453202bfacc8fbc2 # via # -r requirements/dev.in # pytest-cov -cryptography==39.0.0 \ - --hash=sha256:1a6915075c6d3a5e1215eab5d99bcec0da26036ff2102a1038401d6ef5bef25b \ - --hash=sha256:1ee1fd0de9851ff32dbbb9362a4d833b579b4a6cc96883e8e6d2ff2a6bc7104f \ - --hash=sha256:407cec680e811b4fc829de966f88a7c62a596faa250fc1a4b520a0355b9bc190 \ - --hash=sha256:50386acb40fbabbceeb2986332f0287f50f29ccf1497bae31cf5c3e7b4f4b34f \ - --hash=sha256:6f97109336df5c178ee7c9c711b264c502b905c2d2a29ace99ed761533a3460f \ - --hash=sha256:754978da4d0457e7ca176f58c57b1f9de6556591c19b25b8bcce3c77d314f5eb \ - --hash=sha256:76c24dd4fd196a80f9f2f5405a778a8ca132f16b10af113474005635fe7e066c \ - --hash=sha256:7dacfdeee048814563eaaec7c4743c8aea529fe3dd53127313a792f0dadc1773 \ - --hash=sha256:80ee674c08aaef194bc4627b7f2956e5ba7ef29c3cc3ca488cf15854838a8f72 \ - --hash=sha256:844ad4d7c3850081dffba91cdd91950038ee4ac525c575509a42d3fc806b83c8 \ - --hash=sha256:875aea1039d78557c7c6b4db2fe0e9d2413439f4676310a5f269dd342ca7a717 \ - --hash=sha256:887cbc1ea60786e534b00ba8b04d1095f4272d380ebd5f7a7eb4cc274710fad9 \ - --hash=sha256:ad04f413436b0781f20c52a661660f1e23bcd89a0e9bb1d6d20822d048cf2856 \ - --hash=sha256:bae6c7f4a36a25291b619ad064a30a07110a805d08dc89984f4f441f6c1f3f96 \ - --hash=sha256:c52a1a6f81e738d07f43dab57831c29e57d21c81a942f4602fac7ee21b27f288 \ - --hash=sha256:e0a05aee6a82d944f9b4edd6a001178787d1546ec7c6223ee9a848a7ade92e39 \ - --hash=sha256:e324de6972b151f99dc078defe8fb1b0a82c6498e37bff335f5bc6b1e3ab5a1e \ - --hash=sha256:e5d71c5d5bd5b5c3eebcf7c5c2bb332d62ec68921a8c593bea8c394911a005ce \ - --hash=sha256:f3ed2d864a2fa1666e749fe52fb8e23d8e06b8012e8bd8147c73797c506e86f1 \ - --hash=sha256:f671c1bb0d6088e94d61d80c606d65baacc0d374e67bf895148883461cd848de \ - --hash=sha256:f6c0db08d81ead9576c4d94bbb27aed8d7a430fa27890f39084c2d0e2ec6b0df \ - --hash=sha256:f964c7dcf7802d133e8dbd1565914fa0194f9d683d82411989889ecd701e8adf \ - --hash=sha256:fec8b932f51ae245121c4671b4bbc030880f363354b2f0e0bd1366017d891458 - # via - # -c requirements/main.txt - # secretstorage docutils==0.17.1 \ --hash=sha256:686577d2e4c32380bb50cbb22f575ed742d58168cee37e99117a854bcd88f125 \ --hash=sha256:cf316c8370a737a022b72b56874f6602acf974a37a9fba42ec2876387549fc61 @@ -329,12 +234,6 @@ jaraco-classes==3.2.3 \ --hash=sha256:2353de3288bc6b82120752201c6b1c1a14b058267fa424ed5ce5984e3b922158 \ --hash=sha256:89559fa5c1d3c34eff6f631ad80bb21f378dbcbb35dd161fd2c6b93f5be2f98a # via keyring -jeepney==0.8.0 \ - --hash=sha256:5efe48d255973902f6badc3ce55e2aa6c5c3b3bc642059ef3a91247bcfcc5806 \ - --hash=sha256:c0a454ad016ca575060802ee4d590dd912e35c122fa04e70306de3d076cce755 - # via - # keyring - # secretstorage keyring==23.13.1 \ --hash=sha256:771ed2a91909389ed6148631de678f82ddc73737d85a927f382a8a1b157898cd \ --hash=sha256:ba2e15a9b35e21908d0aaf4e0a47acc52d6ae33444df0da2b49d41a46ef6d678 @@ -372,9 +271,9 @@ packaging==23.0 \ # -c requirements/main.txt # pytest # setuptools-scm -pathspec==0.10.3 \ - --hash=sha256:3c95343af8b756205e2aba76e843ba9520a24dd84f68c22b9f93251507509dd6 \ - --hash=sha256:56200de4077d9d0791465aa9095a01d421861e405b5096955051deefd697d6f6 +pathspec==0.11.0 \ + --hash=sha256:3a66eb970cbac598f9e5ccb5b2cf58930cd8e3ed86d393d541eaf2d8b1705229 \ + --hash=sha256:64d338d4e0914e91c1792321e6907b5a593f1ab1851de7fc269557a21b30ebbc # via # -c requirements/main.txt # black @@ -398,12 +297,6 @@ pycodestyle==2.10.0 \ --hash=sha256:347187bdb476329d98f695c213d7295a846d1152ff4fe9bacb8a9590b8ee7053 \ --hash=sha256:8a4eaf0d0495c7395bdab3589ac2db602797d76207242c17d470186815706610 # via flake8 -pycparser==2.21 \ - --hash=sha256:8ee45429555515e1f6b185e78100aea234072576aa43ab53aefcae078162fca9 \ - --hash=sha256:e644fdec12f7872f86c58ff790da456218b10f863970249516d60a5eaca77206 - # via - # -c requirements/main.txt - # cffi pyflakes==3.0.1 \ --hash=sha256:ec55bf7fe21fff7f1ad2f7da62363d749e2a470500eab1b555334b67aa1ef8cf \ --hash=sha256:ec8b276a6b60bd80defed25add7e439881c19e64850afd9b346283d4165fd0fd @@ -458,10 +351,6 @@ rich==13.2.0 \ --hash=sha256:7c963f0d03819221e9ac561e1bc866e3f95a02248c1234daa48954e6d381c003 \ --hash=sha256:f1a00cdd3eebf999a15d85ec498bfe0b1a77efe9b34f645768a54132ef444ac5 # via twine -secretstorage==3.3.3 \ - --hash=sha256:2403533ef369eca6d2ba81718576c5e0f564d5cca1b58f73a8b23e7d4eeebd77 \ - --hash=sha256:f356e6628222568e3af06f2eba8df495efa13b3b63081dafd4f7d9a7b7bc9f99 - # via keyring setuptools-scm==7.1.0 \ --hash=sha256:6c508345a771aad7d56ebff0e70628bf2b0ec7573762be9960214730de278f27 \ --hash=sha256:73988b6d848709e2af142aa48c986ea29592bbcfca5375678064708205253d8e @@ -490,7 +379,6 @@ typing-extensions==4.4.0 \ --hash=sha256:16fa4864408f655d35ec496218b85f79b3437c829e93320c7c9215ccfd92489e # via # -c requirements/main.txt - # black # setuptools-scm urllib3==1.26.14 \ --hash=sha256:076907bf8fd355cde77728471316625a4d2f7e713c125f51953bb5b3eecf4f72 \ diff --git a/requirements/main.txt b/requirements/main.txt index ef98d027..d0b5dbc7 100644 --- a/requirements/main.txt +++ b/requirements/main.txt @@ -1,5 +1,5 @@ # -# This file is autogenerated by pip-compile with Python 3.9 +# This file is autogenerated by pip-compile with Python 3.10 # by the following command: # # pip-compile --generate-hashes --output-file=requirements/main.txt requirements/main.in @@ -29,6 +29,12 @@ anyio==3.6.2 \ # jupyter-server # starlette # watchfiles +appnope==0.1.3 \ + --hash=sha256:02bd91c4de869fbb1e1c50aafc4098827a7a54ab2f39d9dcba6c9547ed920e24 \ + --hash=sha256:265a455292d0bd8a72453494fa24df5a11eb18373a60c7c0430889f22548605e + # via + # ipykernel + # ipython argon2-cffi==21.3.0 \ --hash=sha256:8c976986f2c5c0e5000919e6de187906cfd81fb1c72bf9d88c01177e77da7f80 \ --hash=sha256:d384164d944190a7dd7ef22c6aa3ff197da12962bd04b17f64d4e93d934dba5b @@ -143,17 +149,17 @@ black==22.12.0 \ --hash=sha256:c116eed0efb9ff870ded8b62fe9f28dd61ef6e9ddd28d83d7d264a38417dcee2 \ --hash=sha256:d30b212bffeb1e252b31dd269dfae69dd17e06d92b87ad26e23890f3efea366f # via shed -bleach==5.0.1 \ - --hash=sha256:085f7f33c15bd408dd9b17a4ad77c577db66d76203e5984b1bd59baeee948b2a \ - --hash=sha256:0d03255c47eb9bd2f26aa9bb7f2107732e7e8fe195ca2f64709fcf3b0a4a085c +bleach==6.0.0 \ + --hash=sha256:1a1a85c1595e07d8db14c5f09f09e6433502c51c595970edc090551f0db99414 \ + --hash=sha256:33c16e3353dbd13028ab4799a0f89a83f113405c766e9c122df8a06f5b85b3f4 # via nbconvert -boto3==1.26.54 \ - --hash=sha256:4e876ba5d64928cde0c416dd844f04f22d6b73d14002bbc3ca55591f80f49927 \ - --hash=sha256:c729bb0af76e85a2776b6bd3da8d9fa0f4b91b425eab51612aa53956f644ee23 +boto3==1.26.56 \ + --hash=sha256:0ff8667fbfda8390cab2718a4d129374a6ddd6fd1913f79777fd4498f93c84f1 \ + --hash=sha256:72214a08f337d29a1300d7861872f60ea41016b2a8ad8094fab20d783c8cf1ae # via -r requirements/main.in -botocore==1.29.54 \ - --hash=sha256:ca3ef7588daa664fe196d3234718db5f6b5dab961507500b4bb921e31133eea1 \ - --hash=sha256:f2fe17ed6b8e163769a715f81cb6ce3d4628d172918de535256bdf34d29b704f +botocore==1.29.56 \ + --hash=sha256:669ed3a256c4352f8f8a77a24b4d623ab7acc966d843b460d7ce2261a9813a79 \ + --hash=sha256:ca4d6403d745218270a20d9ca3ca9a33e3ad2fabb59a96ed8d6e1a824b274c86 # via # boto3 # s3transfer @@ -412,25 +418,25 @@ dagster-postgres==0.17.13 \ --hash=sha256:0509e56fa35d4d652e9bc4efc4f7fa762a4ab2d720fec7490069f413dbbf1b91 \ --hash=sha256:f6db35e53391a2909eb96d3452b7cf5bf9c1c765642f82d780195bd50c217846 # via -r requirements/main.in -debugpy==1.6.5 \ - --hash=sha256:048368f121c08b00bbded161e8583817af5055982d2722450a69efe2051621c2 \ - --hash=sha256:0f9afcc8cad6424695f3356dc9a7406d5b18e37ee2e73f34792881a44b02cc50 \ - --hash=sha256:15bc5febe0edc79726517b1f8d57d7ac7c784567b5ba804aab8b1c9d07a57018 \ - --hash=sha256:17039e392d6f38388a68bd02c5f823b32a92142a851e96ba3ec52aeb1ce9d900 \ - --hash=sha256:286ae0c2def18ee0dc8a61fa76d51039ca8c11485b6ed3ef83e3efe8a23926ae \ - --hash=sha256:377391341c4b86f403d93e467da8e2d05c22b683f08f9af3e16d980165b06b90 \ - --hash=sha256:500dd4a9ff818f5c52dddb4a608c7de5371c2d7d905c505eb745556c579a9f11 \ - --hash=sha256:5e55e6c79e215239dd0794ee0bf655412b934735a58e9d705e5c544f596f1603 \ - --hash=sha256:62a06eb78378292ba6c427d861246574dc8b84471904973797b29dd33c7c2495 \ - --hash=sha256:696165f021a6a17da08163eaae84f3faf5d8be68fb78cd78488dd347e625279c \ - --hash=sha256:74e4eca42055759032e3f1909d1374ba1d729143e0c2729bb8cb5e8b5807c458 \ - --hash=sha256:7e84d9e4420122384cb2cc762a00b4e17cbf998022890f89b195ce178f78ff47 \ - --hash=sha256:8116e40a1cd0593bd2aba01d4d560ee08f018da8e8fbd4cbd24ff09b5f0e41ef \ - --hash=sha256:8f3fab217fe7e2acb2d90732af1a871947def4e2b6654945ba1ebd94bd0bea26 \ - --hash=sha256:947c686e8adb46726f3d5f19854f6aebf66c2edb91225643c7f44b40b064a235 \ - --hash=sha256:9984fc00ab372c97f63786c400107f54224663ea293daab7b365a5b821d26309 \ - --hash=sha256:9e809ef787802c808995e5b6ade714a25fa187f892b41a412d418a15a9c4a432 \ - --hash=sha256:b5a74ecebe5253344501d9b23f74459c46428b30437fa9254cfb8cb129943242 +debugpy==1.6.6 \ + --hash=sha256:0ea1011e94416e90fb3598cc3ef5e08b0a4dd6ce6b9b33ccd436c1dffc8cd664 \ + --hash=sha256:11a0f3a106f69901e4a9a5683ce943a7a5605696024134b522aa1bfda25b5fec \ + --hash=sha256:23363e6d2a04d726bbc1400bd4e9898d54419b36b2cdf7020e3e215e1dcd0f8e \ + --hash=sha256:23c29e40e39ad7d869d408ded414f6d46d82f8a93b5857ac3ac1e915893139ca \ + --hash=sha256:549ae0cb2d34fc09d1675f9b01942499751d174381b6082279cf19cdb3c47cbe \ + --hash=sha256:70ab53918fd907a3ade01909b3ed783287ede362c80c75f41e79596d5ccacd32 \ + --hash=sha256:72687b62a54d9d9e3fb85e7a37ea67f0e803aaa31be700e61d2f3742a5683917 \ + --hash=sha256:78739f77c58048ec006e2b3eb2e0cd5a06d5f48c915e2fc7911a337354508110 \ + --hash=sha256:7aa7e103610e5867d19a7d069e02e72eb2b3045b124d051cfd1538f1d8832d1b \ + --hash=sha256:87755e173fcf2ec45f584bb9d61aa7686bb665d861b81faa366d59808bbd3494 \ + --hash=sha256:9b5d1b13d7c7bf5d7cf700e33c0b8ddb7baf030fcf502f76fc061ddd9405d16c \ + --hash=sha256:a771739902b1ae22a120dbbb6bd91b2cae6696c0e318b5007c5348519a4211c6 \ + --hash=sha256:b9c2130e1c632540fbf9c2c88341493797ddf58016e7cba02e311de9b0a96b67 \ + --hash=sha256:be596b44448aac14eb3614248c91586e2bc1728e020e82ef3197189aae556115 \ + --hash=sha256:c05349890804d846eca32ce0623ab66c06f8800db881af7a876dc073ac1c2225 \ + --hash=sha256:de4a045fbf388e120bb6ec66501458d3134f4729faed26ff95de52a754abddb1 \ + --hash=sha256:dff595686178b0e75580c24d316aa45a8f4d56e2418063865c114eef651a982e \ + --hash=sha256:f6383c29e796203a0bba74a250615ad262c4279d398e89d895a69d3069498305 # via ipykernel decorator==5.1.1 \ --hash=sha256:637996211036b6385ef91435e4fae22989472f9d571faba8927ba8253acbc330 \ @@ -552,9 +558,9 @@ editorconfig==0.12.3 \ --hash=sha256:57f8ce78afcba15c8b18d46b5170848c88d56fd38f05c2ec60dbbfcb8996e89e \ --hash=sha256:6b0851425aa875b08b16789ee0eeadbd4ab59666e9ebe728e526314c4a2e52c1 # via jsbeautifier -email-validator==1.3.0 \ - --hash=sha256:553a66f8be2ec2dea641ae1d3f29017ab89e9d603d4a25cdaac39eefa283d769 \ - --hash=sha256:816073f2a7cffef786b29928f58ec16cdac42710a53bb18aa94317e3e145ec5c +email-validator==1.3.1 \ + --hash=sha256:49a72f5fa6ed26be1c964f0567d931d10bf3fdeeacdf97bc26ef1cd2a44e0bda \ + --hash=sha256:d178c5c6fa6c6824e9b04f199cf23e79ac15756786573c190d2ad13089411ad2 # via pydantic entrypoints==0.4 \ --hash=sha256:b706eddaa9218a19ebcd67b56818f05bb27589b1ca9e8d797b74affad4ccacd4 \ @@ -842,12 +848,7 @@ imagesize==1.4.1 \ importlib-metadata==4.13.0 \ --hash=sha256:8a8a81bcf996e74fee46f0d16bd3eaa382a7eb20fd82445c3ad11f4090334116 \ --hash=sha256:dd0173e8f150d6815e098fd354f6414b0f079af4644ddfe90c71e2fc6174346d - # via - # jupyterlab-server - # markdown - # mkdocs - # prefixmaps - # sphinx + # via prefixmaps iniconfig==2.0.0 \ --hash=sha256:2d91e135bf72d31a410b17c16da610a82cb55f6b0477d1a902134b24a455b8b3 \ --hash=sha256:b6a85871a79d2e3b22d2d1b94ac2824226a63c6b741c88f7ae975f18b6778374 @@ -1048,9 +1049,9 @@ jupyter-console==6.4.4 \ --hash=sha256:172f5335e31d600df61613a97b7f0352f2c8250bbd1092ef2d658f77249f89fb \ --hash=sha256:756df7f4f60c986e7bc0172e4493d3830a7e6e75c08750bbe59c0a5403ad6dee # via jupyter -jupyter-core==5.1.3 \ - --hash=sha256:82e1cff0ef804c38677eff7070d5ff1d45037fef01a2d9ba9e6b7b8201831e9f \ - --hash=sha256:d23ab7db81ca1759f13780cd6b65f37f59bf8e0186ac422d5ca4982cc7d56716 +jupyter-core==5.1.5 \ + --hash=sha256:83064d61bb2a9bc874e8184331c117b3778c2a7e1851f60cb00d273ceb3285ae \ + --hash=sha256:8e54c48cde1e0c8345f64bcf9658b78044ddf02b273726cea9d9f59be4b02130 # via # jupyter-client # jupyter-server @@ -1077,9 +1078,9 @@ jupyter-server-terminals==0.4.4 \ --hash=sha256:57ab779797c25a7ba68e97bcfb5d7740f2b5e8a83b5e8102b10438041a7eac5d \ --hash=sha256:75779164661cec02a8758a5311e18bb8eb70c4e86c6b699403100f1585a12a36 # via jupyter-server -jupyterlab==3.5.2 \ - --hash=sha256:10ac094215ffb872ddffbe2982bf1c039a79fecc326e191e7cc5efd84f331dad \ - --hash=sha256:16e9b8320dcec469c70bb883e993e0bb84c4ea1a734063731f66922cf72add1b +jupyterlab==3.5.3 \ + --hash=sha256:51e889448ae194eeef8e50f63f5c4f487f728f477befe436e9749672f7511dbe \ + --hash=sha256:8e1a4414b681dafd3f19bd45cb0c79cb713bc78ef4e8440b95d86881c23a9fe5 # via -r requirements/main.in jupyterlab-pygments==0.2.2 \ --hash=sha256:2405800db07c9f770863bcf8049a529c3dd4d3e28536638bd7c1c01d2748309f \ @@ -1592,9 +1593,9 @@ passlib[bcrypt]==1.7.4 \ --hash=sha256:aa6bca462b8d8bda89c70b382f0c298a20b5560af6cbfa2dce410c0a2fb669f1 \ --hash=sha256:defd50f72b65c5402ab2c573830a6978e5f202ad0d984793c8dde2c4152ebe04 # via -r requirements/main.in -pathspec==0.10.3 \ - --hash=sha256:3c95343af8b756205e2aba76e843ba9520a24dd84f68c22b9f93251507509dd6 \ - --hash=sha256:56200de4077d9d0791465aa9095a01d421861e405b5096955051deefd697d6f6 +pathspec==0.11.0 \ + --hash=sha256:3a66eb970cbac598f9e5ccb5b2cf58930cd8e3ed86d393d541eaf2d8b1705229 \ + --hash=sha256:64d338d4e0914e91c1792321e6907b5a593f1ab1851de7fc269557a21b30ebbc # via black pendulum==2.1.2 \ --hash=sha256:0731f0c661a3cb779d398803655494893c9f581f6488048b3fb629c2342b5394 \ @@ -1656,9 +1657,9 @@ prefixmaps==0.1.4 \ # via # linkml # linkml-runtime -prometheus-client==0.15.0 \ - --hash=sha256:be26aa452490cfcf6da953f9436e95a9f2b4d578ca80094b4458930e5f584ab1 \ - --hash=sha256:db7c05cbd13a0f79975592d112320f2605a325969b270a94b71dcabc47b931d2 +prometheus-client==0.16.0 \ + --hash=sha256:0836af6eb2c8f4fed712b2f279f6c0a8bbab29f9f4aa15276b91c7cb0d1616ab \ + --hash=sha256:a03e35b359f14dd1630898543e2120addfdeacd1a6069c1367ae90fd93ad3f48 # via # jupyter-server # nbclassic @@ -2021,9 +2022,9 @@ python-dateutil==2.8.2 \ # linkml # pandas # pendulum -python-dotenv==0.21.0 \ - --hash=sha256:1684eb44636dd462b66c3ee016599815514527ad99965de77f43e0944634a7e5 \ - --hash=sha256:b77d08274639e3d34145dfa6c7008e66df0f04b7be7a75fd0d5292c191d79045 +python-dotenv==0.21.1 \ + --hash=sha256:1c93de8f636cde3ce377292818d0e440b6e45a82f215c3744979151fa8151c49 \ + --hash=sha256:41e12e0318bebc859fcc4d97d4db8d20ad21721a6aa5047dd59f090391cb549a # via # dagster # uvicorn @@ -2399,9 +2400,9 @@ sphinx-rtd-theme==1.1.1 \ --hash=sha256:31faa07d3e97c8955637fc3f1423a5ab2c44b74b8cc558a51498c202ce5cbda7 \ --hash=sha256:6146c845f1e1947b3c3dd4432c28998a1693ccc742b4f9ad7c63129f0757c103 # via linkml -sphinxcontrib-applehelp==1.0.3 \ - --hash=sha256:83749f09f6ac843b8cb685277dbc818a8bf2d76cc19602699094fe9a74db529e \ - --hash=sha256:ba0f2a22e6eeada8da6428d0d520215ee8864253f32facf958cca81e426f661d +sphinxcontrib-applehelp==1.0.4 \ + --hash=sha256:29d341f67fb0f6f586b23ad80e072c8e6ad0b48417db2bde114a4c9746feb228 \ + --hash=sha256:828f867945bbe39817c210a1abfd1bc4895c8b73fcaade56d45357a348a07d7e # via sphinx sphinxcontrib-devhelp==1.0.2 \ --hash=sha256:8165223f9a335cc1af7ffe1ed31d2871f325254c0423bc0c4c7cd1c1e4734a2e \ @@ -2590,12 +2591,10 @@ typing-extensions==4.4.0 \ --hash=sha256:1511434bb92bf8dd198c12b1cc812e800d4181cfcb867674e0f8279cc93087aa \ --hash=sha256:16fa4864408f655d35ec496218b85f79b3437c829e93320c7c9215ccfd92489e # via - # black # dagster # libcst # myst-parser # pydantic - # starlette # typing-inspect typing-inspect==0.8.0 \ --hash=sha256:5fbf9c1e65d4fa01e701fe12a5bca6c6e08a4ffd5bc60bfac028253a447c5188 \ diff --git a/tests/conftest.py b/tests/conftest.py index d3f36f25..04feb81a 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1,49 +1,43 @@ import os from functools import lru_cache -from nmdc_schema.nmdc_data import get_nmdc_jsonschema_dict +from nmdc_runtime.api.core.util import import_via_dotted_path +from nmdc_runtime.util import get_nmdc_jsonschema_dict from pymongo import MongoClient from pymongo.database import Database as MongoDatabase from nmdc_runtime.minter.adapters.repository import InMemoryIDStore +from nmdc_runtime.minter.config import ( + typecodes, + shoulders, + services, + requesters, + schema_classes, +) from nmdc_runtime.minter.domain.model import MintingRequest, Identifier -TYPECODES = [ - {"id": "nmdc:nt-11-gha2fh68", "name": "bsm", "schema_class": "nmdc:Biosample"}, - {"id": "nmdc:nt-11-rb11ex57", "name": "nt", "schema_class": "nmdc:NamedThing"}, -] -SHOULDERS = [ - {"id": "nmdc:nt-11-6weqb260", "assigned_to": "nmdc:nt-11-zfj0tv58", "name": "11"}, -] -SERVICES = [{"id": "nmdc:nt-11-zfj0tv58", "name": "central minting service"}] -REQUESTERS = [{"id": "nmdc:pers-11-pm7mfv46", "name": "Alicia"}] - -SCHEMA_CLASSES = [ - {"id": f"nmdc:{k}"} - for k, v in get_nmdc_jsonschema_dict()["$defs"].items() - if "required" in v and "id" in v["required"] -] - def minting_request(): return MintingRequest( **{ - "service": {"id": "nmdc:nt-11-zfj0tv58"}, - "requester": {"id": "nmdc:pers-11-pm7mfv46"}, - "schema_class": {"id": "nmdc:Biosample"}, + "service": services()[0], + "requester": requesters()[0], + "schema_class": schema_classes()[0], "how_many": 1, } ) def draft_identifier(): - id_ = "nmdc:nt-11-z8x8p723" + id_ = "nmdc:bsm-11-z8x8p723" return Identifier( **{ "id": id_, "name": id_, - "typecode": {"id": next(d["id"] for d in TYPECODES if d["name"] == "nt")}, - "shoulder": {"id": next(d["id"] for d in SHOULDERS if d["name"] == "11")}, + "typecode": { + "id": next(d["id"] for d in typecodes() if d["name"] == "bsm") + }, + "shoulder": {"id": next(d["id"] for d in shoulders() if d["name"] == "11")}, "status": "draft", } ) @@ -58,20 +52,17 @@ def get_mongo_test_db() -> MongoDatabase: ) db: MongoDatabase = _client[os.getenv("MONGO_TEST_DBNAME")] - db.typecodes.drop() - db.typecodes.insert_many(TYPECODES) - - db.shoulders.drop() - db.shoulders.insert_many(SHOULDERS) - - db.services.drop() - db.services.insert_many(SERVICES) - - db.requesters.drop() - db.requesters.insert_many(REQUESTERS) - - db.schema_classes.drop() - db.schema_classes.insert_many(SCHEMA_CLASSES) + for coll_name in [ + "typecodes", + "shoulders", + "services", + "requesters", + "schema_classes", + ]: + db[f"minter.{coll_name}"].drop() + db[f"minter.{coll_name}"].insert_many( + import_via_dotted_path(f"nmdc_runtime.minter.config.{coll_name}")() + ) return db @@ -79,9 +70,9 @@ def get_mongo_test_db() -> MongoDatabase: @lru_cache() def get_test_inmemoryidstore() -> InMemoryIDStore: return InMemoryIDStore( - services=SERVICES, - shoulders=SHOULDERS, - typecodes=TYPECODES, - requesters=REQUESTERS, - schema_classes=SCHEMA_CLASSES, + services=services(), + shoulders=shoulders(), + typecodes=typecodes(), + requesters=requesters(), + schema_classes=schema_classes(), ) diff --git a/tests/e2e/test_minter_api.py b/tests/e2e/test_minter_api.py index 4f9257bb..5ffe2443 100644 --- a/tests/e2e/test_minter_api.py +++ b/tests/e2e/test_minter_api.py @@ -1,9 +1,12 @@ import os +from nmdc_runtime.minter.config import schema_classes from nmdc_runtime.site.repository import run_config_frozen__normal_env from nmdc_runtime.site.resources import get_mongo, RuntimeApiSiteClient from tests.test_api.test_endpoints import ensure_test_resources +schema_class = schema_classes()[0] + def _get_client(): mdb = get_mongo(run_config_frozen__normal_env).db @@ -14,7 +17,7 @@ def _get_client(): def test_minter_api_mint(): client = _get_client() rv = client.request( - "POST", "/pids/mint", {"schema_class": {"id": "nmdc:NamedThing"}, "how_many": 1} + "POST", "/pids/mint", {"schema_class": schema_class, "how_many": 1} ).json() assert len(rv) == 1 and rv[0].startswith("nmdc:") @@ -22,7 +25,7 @@ def test_minter_api_mint(): def test_minter_api_resolve(): client = _get_client() [id_name] = client.request( - "POST", "/pids/mint", {"schema_class": {"id": "nmdc:NamedThing"}, "how_many": 1} + "POST", "/pids/mint", {"schema_class": schema_class, "how_many": 1} ).json() rv = client.request("GET", f"/pids/resolve/{id_name}").json() assert rv["id"] == id_name and rv["status"] == "draft" @@ -31,7 +34,7 @@ def test_minter_api_resolve(): def test_minter_api_bind(): client = _get_client() [id_name] = client.request( - "POST", "/pids/mint", {"schema_class": {"id": "nmdc:NamedThing"}, "how_many": 1} + "POST", "/pids/mint", {"schema_class": schema_class, "how_many": 1} ).json() rv = client.request( "POST", @@ -48,7 +51,7 @@ def test_minter_api_bind(): def test_minter_api_delete(): client = _get_client() [id_name] = client.request( - "POST", "/pids/mint", {"schema_class": {"id": "nmdc:NamedThing"}, "how_many": 1} + "POST", "/pids/mint", {"schema_class": schema_class, "how_many": 1} ).json() rv = client.request( "POST", diff --git a/tests/integration/test_minter_repository.py b/tests/integration/test_minter_repository.py index 494e2ce9..96199670 100644 --- a/tests/integration/test_minter_repository.py +++ b/tests/integration/test_minter_repository.py @@ -44,31 +44,31 @@ def test_mint_and_delete(): def test_mongo_mint_one(): s = MongoIDStore(get_mongo_test_db()) + s.db["minter.id_records"].drop() + req_mint = minting_request() assert req_mint.how_many == 1 - s.db.id_records.drop() - ids = s.mint(req_mint) assert len(ids) == 1 - assert s.db.id_records.count_documents({}) == 1 + assert s.db["minter.id_records"].count_documents({}) == 1 def test_mongo_mint_many(): s = MongoIDStore(get_mongo_test_db()) - req_mint = minting_request() - s.db.id_records.drop() + s.db["minter.id_records"].drop() + req_mint = minting_request() req_mint.how_many = 1_000 ids = s.mint(req_mint) assert len(ids) == 1_000 - assert s.db.id_records.count_documents({}) == 1_000 + assert s.db["minter.id_records"].count_documents({}) == 1_000 def test_mongo_mint_and_resolve(): s = MongoIDStore(get_mongo_test_db()) - req_mint = minting_request() - s.db.id_records.drop() + s.db["minter.id_records"].drop() + req_mint = minting_request() id_: Identifier = next(i for i in s.mint(req_mint)) req_res = ResolutionRequest(id_name=id_.name, **req_mint.dict()) assert s.resolve(req_res) is not None @@ -76,11 +76,11 @@ def test_mongo_mint_and_resolve(): def test_mongo_mint_and_delete(): s = MongoIDStore(get_mongo_test_db()) - req_mint = minting_request() - s.db.id_records.drop() + s.db["minter.id_records"].drop() + req_mint = minting_request() id_: Identifier = next(i for i in s.mint(req_mint)) req_del = DeleteRequest(id_name=id_.name, **req_mint.dict()) s.delete(req_del) assert s.resolve(ResolutionRequest(**req_del.dict())) is None - assert s.db.id_records.count_documents({}) == 0 + assert s.db["minter.id_records"].count_documents({}) == 0 diff --git a/tests/test_api/test_metadata.py b/tests/test_api/test_metadata.py index acb12c1f..febde956 100644 --- a/tests/test_api/test_metadata.py +++ b/tests/test_api/test_metadata.py @@ -5,7 +5,7 @@ import fastjsonschema import pandas as pd import pytest -from nmdc_schema.nmdc_data import get_nmdc_jsonschema_dict +from nmdc_runtime.util import get_nmdc_jsonschema_dict from toolz import dissoc from nmdc_runtime.api.core.metadata import ( diff --git a/tests/test_util.py b/tests/test_util.py index d9917055..a54ab465 100644 --- a/tests/test_util.py +++ b/tests/test_util.py @@ -1,23 +1,34 @@ import json import sys from pathlib import Path +from subprocess import run import fastjsonschema import pytest from fastjsonschema import JsonSchemaValueException, JsonSchemaException -# from nmdc_schema.validate_nmdc_json import jsonschema from jsonschema import ValidationError, Draft7Validator -from nmdc_schema.nmdc_data import get_nmdc_jsonschema_dict +from nmdc_runtime.util import get_nmdc_jsonschema_dict from pymongo.database import Database as MongoDatabase from pymongo.write_concern import WriteConcern from nmdc_runtime.site.repository import run_config_frozen__normal_env from nmdc_runtime.site.resources import get_mongo -from nmdc_runtime.util import nmdc_jsonschema_validate REPO_ROOT = Path(__file__).parent.parent +nmdc_jsonschema_validate = fastjsonschema.compile(get_nmdc_jsonschema_dict()) + + +def test_nmdc_jsonschema_using_old_id_scheme(): + if (version := get_nmdc_jsonschema_dict()["version"]) > "3": + pytest.fail(version) + for class_name, defn in get_nmdc_jsonschema_dict()["$defs"].items(): + if "properties" in defn and "id" in defn["properties"]: + if "pattern" in defn["properties"]["id"]: + if defn["properties"]["id"]["pattern"].startswith("^(nmdc):"): + pytest.fail(f"{class_name}.id: {defn['properties']['id']}") + def test_nmdc_jsonschema_validate(): with open(REPO_ROOT.joinpath("metadata-translation/examples/study_test.json")) as f: diff --git a/tests/unit/test_minter_model.py b/tests/unit/test_minter_model.py index 3bb67546..b5bc191a 100644 --- a/tests/unit/test_minter_model.py +++ b/tests/unit/test_minter_model.py @@ -2,29 +2,31 @@ from toolz import pluck +from nmdc_runtime.minter.config import ( + services, + requesters, + schema_classes, + typecodes, + shoulders, +) from tests.conftest import ( minting_request, - SERVICES, - REQUESTERS, - SCHEMA_CLASSES, - TYPECODES, draft_identifier, - SHOULDERS, ) def test_minting_request(): mr = minting_request() - assert mr.service.id in list(pluck("id", SERVICES)) - assert mr.requester.id in list(pluck("id", REQUESTERS)) - assert mr.schema_class.id in list(pluck("id", SCHEMA_CLASSES)) - assert mr.schema_class.id in list(pluck("schema_class", TYPECODES)) + assert mr.service.id in list(pluck("id", services())) + assert mr.requester.id in list(pluck("id", requesters())) + assert mr.schema_class.id in list(pluck("id", schema_classes())) + assert mr.schema_class.id in list(pluck("schema_class", typecodes())) assert mr.how_many > 0 def test_draft_identifier(): did = draft_identifier() assert did.status == "draft" - assert re.fullmatch(r"nmdc:..-..-.*", did.name) - assert did.typecode.id in list(pluck("id", TYPECODES)) - assert did.shoulder.id in list(pluck("id", SHOULDERS)) + assert re.fullmatch(r"nmdc:[a-z]{2,6}-..-.*", did.name) + assert did.typecode.id in list(pluck("id", typecodes())) + assert did.shoulder.id in list(pluck("id", shoulders()))