From 457114c43631d3ed037a073c8013e969d2aa3a91 Mon Sep 17 00:00:00 2001 From: "K. Joeri van der Velde" <82420+joerivandervelde@users.noreply.github.com> Date: Fri, 14 Feb 2025 10:28:53 +0100 Subject: [PATCH] Custom ontologies for HMD submissions --- fair-genomes.yml | 6 ++--- generated/art-decor/fair-genomes_en-US.xml | 10 ++++---- generated/latex/fair-genomes.tex | 6 ++--- .../markdown/fairgenomes-semantic-model.md | 6 ++--- .../molgenis-emx/HmdSubmission_attributes.tsv | 2 +- generated/molgenis-emx2/molgenis.csv | 4 ++-- generated/ontology/fair-genomes.ttl | 6 ++--- generated/palga-codebook/CODEBOOK.tsv | 4 ++-- generated/resource/FG_0000750.xml | 24 +++++++++++++++++++ generated/resource/FG_0000751.xml | 24 +++++++++++++++++++ .../implementations/ToRDFResources.java | 14 ++++++++++- 11 files changed, 83 insertions(+), 23 deletions(-) create mode 100644 generated/resource/FG_0000750.xml create mode 100644 generated/resource/FG_0000751.xml diff --git a/fair-genomes.yml b/fair-genomes.yml index 3bbab63..7bcc9bb 100644 --- a/fair-genomes.yml +++ b/fair-genomes.yml @@ -577,7 +577,7 @@ modules: values: String - name: HMD Submission description: Items specific to use in the 1+MG GDI project for metadata submission. - ontology: AFRL:0000449 [http://purl.allotrope.org/ontologies/role#AFRL_0000449] + ontology: FG:0000750 [https://w3id.org/fair-genomes/resource/FG_0000750] elements: - name: Submitter Role description: Role of the Submitter (e.g. Oncologist in charge of the patient, Research Project PI). @@ -604,8 +604,8 @@ modules: ontology: MS:1001922 [http://purl.obolibrary.org/obo/MS_1001922] values: String - name: Collection - description: Identifies collection (Biobank, Collection, Cohort, other types of projects) in which data are part of. - ontology: DCAT:Collection [http://www.w3.org/ns/prov#Collection] + description: Here goes the description + ontology: FG:0000751 [https://w3id.org/fair-genomes/resource/FG_0000751] values: String - name: Research Consortia description: Identifies Research Consortia Involved. diff --git a/generated/art-decor/fair-genomes_en-US.xml b/generated/art-decor/fair-genomes_en-US.xml index 6a6bfe0..373bbdd 100644 --- a/generated/art-decor/fair-genomes_en-US.xml +++ b/generated/art-decor/fair-genomes_en-US.xml @@ -256,7 +256,7 @@ href="http://art-decor.org/ADAR/rv/DECOR.sch" type="application/xml" schematypes HMD Submission Items specific to use in the 1+MG GDI project for metadata submission. -http://purl.allotrope.org/ontologies/role#AFRL_0000449 +https://w3id.org/fair-genomes/resource/FG_0000750 Clinical Center Refers to the Clinical Department in charge of the patient. @@ -266,8 +266,8 @@ href="http://art-decor.org/ADAR/rv/DECOR.sch" type="application/xml" schematypes Collection -Identifies collection (Biobank, Collection, Cohort, other types of projects) in which data are part of. -http://www.w3.org/ns/prov#Collection +Here goes the description +https://w3id.org/fair-genomes/resource/FG_0000751 @@ -1155,14 +1155,14 @@ href="http://art-decor.org/ADAR/rv/DECOR.sch" type="application/xml" schematypes - + - + diff --git a/generated/latex/fair-genomes.tex b/generated/latex/fair-genomes.tex index 414e89a..48612f7 100644 --- a/generated/latex/fair-genomes.tex +++ b/generated/latex/fair-genomes.tex @@ -21,7 +21,7 @@ Sample preparation & OBI:0001902 & 9 \\ Sequencing & EDAM:topic\_3168 & 12 \\ Analysis & EDAM:operation\_2945 & 11 \\ -HMD Submission & AFRL:0000449 & 8 \\ +HMD Submission & FG:0000750 & 8 \\ \hline \end{tabular} \caption[Module overview]{\label{table:table1} FAIR Genomes metadata schema v1.3-SNAPSHOT overview of all modules.} @@ -230,11 +230,11 @@ Institution Clinical & SIO:000688 & Institutes lookup (219 choices) \\ Institution Data Center & SIO:000688 & Institutes lookup (219 choices) \\ Publication description & MS:1001922 & String \\ -Collection & DCAT:Collection & String \\ +Collection & FG:0000751 & String \\ Research Consortia & NCIT:C61538 & String \\ \hline \end{tabular} -\caption[Module: HMD Submission]{\label{table:table11} Module: HMD Submission. Items specific to use in the 1+MG GDI project for metadata submission. Ontology: AFRL:0000449. } +\caption[Module: HMD Submission]{\label{table:table11} Module: HMD Submission. Items specific to use in the 1+MG GDI project for metadata submission. Ontology: FG:0000750. } \end{table} \begin{table}[htb] diff --git a/generated/markdown/fairgenomes-semantic-model.md b/generated/markdown/fairgenomes-semantic-model.md index ff05450..7c58ea0 100644 --- a/generated/markdown/fairgenomes-semantic-model.md +++ b/generated/markdown/fairgenomes-semantic-model.md @@ -15,7 +15,7 @@ The FAIR Genomes semantic metadata schema to power reuse of NGS data in research | [Sample preparation](#module-sample-preparation) | A sample preparation for a nucleic acids sequencing assay. | [OBI:0001902](http://purl.obolibrary.org/obo/OBI_0001902) | 9 | | [Sequencing](#module-sequencing) | The determination of complete (typically nucleotide) sequences, including those of genomes (full genome sequencing, de novo sequencing and resequencing), amplicons and transcriptomes. | [EDAM:topic_3168](http://edamontology.org/topic_3168) | 12 | | [Analysis](#module-analysis) | An analysis applies analytical (often computational) methods to existing data of a specific type to produce some desired output. | [EDAM:operation_2945](http://edamontology.org/operation_2945) | 11 | -| [HMD Submission](#module-hmd-submission) | Items specific to use in the 1+MG GDI project for metadata submission. | [AFRL:0000449](http://purl.allotrope.org/ontologies/role#AFRL_0000449) | 8 | +| [HMD Submission](#module-hmd-submission) | Items specific to use in the 1+MG GDI project for metadata submission. | [FG:0000750](https://w3id.org/fair-genomes/resource/FG_0000750) | 8 | ## Module: Study A detailed examination, analysis, or critical inspection of one or multiple subjects designed to discover facts. Ontology: [NCIT:C63536](http://purl.obolibrary.org/obo/NCIT_C63536). @@ -184,7 +184,7 @@ An analysis applies analytical (often computational) methods to existing data of | WGS guideline followed | Any followed systematic statement of policy rules or principles. Guidelines may be developed by government agencies at any level, institutions, professional societies, governing boards, or by convening expert panels. | [NCIT:C17564](http://purl.obolibrary.org/obo/NCIT_C17564) | String | ## Module: HMD Submission -Items specific to use in the 1+MG GDI project for metadata submission. Ontology: [AFRL:0000449](http://purl.allotrope.org/ontologies/role#AFRL_0000449). +Items specific to use in the 1+MG GDI project for metadata submission. Ontology: [FG:0000750](https://w3id.org/fair-genomes/resource/FG_0000750). | Element | Description | Ontology | Values | |---|---|---|---| @@ -194,7 +194,7 @@ Items specific to use in the 1+MG GDI project for metadata submission. Ontology: | Institution Clinical | Identifies the Institution/s involved. | [SIO:000688](http://semanticscience.org/resource/SIO_000688) | [Institutes](../../lookups/Institutes.txt) lookup (219 choices [of type](http://semanticscience.org/resource/SIO_000688)) | | Institution Data Center | Identifies the Institution/s involved. | [SIO:000688](http://semanticscience.org/resource/SIO_000688) | [Institutes](../../lookups/Institutes.txt) lookup (219 choices [of type](http://semanticscience.org/resource/SIO_000688)) | | Publication description | Linked to the instance of publication_doi; should provide a human-readably description of the publication. | [MS:1001922](http://purl.obolibrary.org/obo/MS_1001922) | String | -| Collection | Identifies collection (Biobank, Collection, Cohort, other types of projects) in which data are part of. | [DCAT:Collection](http://www.w3.org/ns/prov#Collection) | String | +| Collection | Here goes the description | [FG:0000751](https://w3id.org/fair-genomes/resource/FG_0000751) | String | | Research Consortia | Identifies Research Consortia Involved. | [NCIT:C61538](http://purl.obolibrary.org/obo/NCIT_C61538) | String | ## Null flavors diff --git a/generated/molgenis-emx/HmdSubmission_attributes.tsv b/generated/molgenis-emx/HmdSubmission_attributes.tsv index 5ba624d..85ae72d 100644 --- a/generated/molgenis-emx/HmdSubmission_attributes.tsv +++ b/generated/molgenis-emx/HmdSubmission_attributes.tsv @@ -5,5 +5,5 @@ ClinicalCenter Clinical Center Refers to the Clinical Department in charge of th InstitutionClinical Institution Clinical Identifies the Institution/s involved. (SIO:000688) HmdSubmission mref FALSE FALSE TRUE TRUE fair-genomes_HmdSubmission_InstitutionClinical InstitutionDataCenter Institution Data Center Identifies the Institution/s involved. (SIO:000688) HmdSubmission mref FALSE FALSE TRUE TRUE fair-genomes_HmdSubmission_InstitutionDataCenter PublicationDescription Publication description Linked to the instance of publication_doi; should provide a human-readably description of the publication. (MS:1001922) HmdSubmission string FALSE FALSE TRUE TRUE -Collection Collection Identifies collection (Biobank, Collection, Cohort, other types of projects) in which data are part of. (DCAT:Collection) HmdSubmission string FALSE FALSE TRUE TRUE +Collection Collection Here goes the description (FG:0000751) HmdSubmission string FALSE FALSE TRUE TRUE ResearchConsortia Research Consortia Identifies Research Consortia Involved. (NCIT:C61538) HmdSubmission string FALSE FALSE TRUE TRUE diff --git a/generated/molgenis-emx2/molgenis.csv b/generated/molgenis-emx2/molgenis.csv index d5e1cec..417f322 100644 --- a/generated/molgenis-emx2/molgenis.csv +++ b/generated/molgenis-emx2/molgenis.csv @@ -120,12 +120,12 @@ tableName,tableExtends,columnName,columnType,key,required,refSchema,refTable,ref "Analysis","","BioinformaticProtocolDeviation","string","","","","","","","","http://purl.obolibrary.org/obo/NCIT_C50996","A variation from processes or procedures defined in the bioinformatic protocol. Deviations usually do not preclude the overall evaluability of subject data for either efficacy or safety, and are often acknowledged and accepted in advance by the sponsor.","FAIR Genomes" "Analysis","","ReasonForBioinformaticProtocolDeviation","string","","","","","","","","http://purl.obolibrary.org/obo/NCIT_C93529","The rationale for why a deviation from the bioinformatic protocol has occurred.","FAIR Genomes" "Analysis","","WgsGuidelineFollowed","string","","","","","","","","http://purl.obolibrary.org/obo/NCIT_C17564","Any followed systematic statement of policy rules or principles. Guidelines may be developed by government agencies at any level, institutions, professional societies, governing boards, or by convening expert panels.","FAIR Genomes" -"HmdSubmission","","","","","","","","","","","http://purl.allotrope.org/ontologies/role#AFRL_0000449","Items specific to use in the 1+MG GDI project for metadata submission.","FAIR Genomes" +"HmdSubmission","","","","","","","","","","","https://w3id.org/fair-genomes/resource/FG_0000750","Items specific to use in the 1+MG GDI project for metadata submission.","FAIR Genomes" "HmdSubmission","","SubmitterRole","string","","","","","","","","https://www.w3.org/ns/dcat#Role","Role of the Submitter (e.g. Oncologist in charge of the patient, Research Project PI).","FAIR Genomes" "HmdSubmission","","DataCenter","string","","","","","","","","https://www.w3.org/TR/vocab-adms/#identifier","Refers to the Department in charge of Data Production and/ or analysis","FAIR Genomes" "HmdSubmission","","ClinicalCenter","string","","","","","","","","https://www.w3.org/TR/vocab-adms/#identifier","Refers to the Clinical Department in charge of the patient.","FAIR Genomes" "HmdSubmission","","InstitutionClinical","ontology_array","","","","InstitutionClinical","","","","http://semanticscience.org/resource/SIO_000688","Identifies the Institution/s involved.","FAIR Genomes" "HmdSubmission","","InstitutionDataCenter","ontology_array","","","","InstitutionDataCenter","","","","http://semanticscience.org/resource/SIO_000688","Identifies the Institution/s involved.","FAIR Genomes" "HmdSubmission","","PublicationDescription","string","","","","","","","","http://purl.obolibrary.org/obo/MS_1001922","Linked to the instance of publication_doi; should provide a human-readably description of the publication.","FAIR Genomes" -"HmdSubmission","","Collection","string","","","","","","","","http://www.w3.org/ns/prov#Collection","Identifies collection (Biobank, Collection, Cohort, other types of projects) in which data are part of.","FAIR Genomes" +"HmdSubmission","","Collection","string","","","","","","","","https://w3id.org/fair-genomes/resource/FG_0000751","Here goes the description","FAIR Genomes" "HmdSubmission","","ResearchConsortia","string","","","","","","","","http://purl.obolibrary.org/obo/NCIT_C61538","Identifies Research Consortia Involved.","FAIR Genomes" diff --git a/generated/ontology/fair-genomes.ttl b/generated/ontology/fair-genomes.ttl index 9bab994..b8c5ce7 100644 --- a/generated/ontology/fair-genomes.ttl +++ b/generated/ontology/fair-genomes.ttl @@ -828,7 +828,7 @@ fg:Analysis_WGS_guideline_followed a owl:DatatypeProperty; dc:description "Any followed systematic statement of policy rules or principles. Guidelines may be developed by government agencies at any level, institutions, professional societies, governing boards, or by convening expert panels." . fg:HMD_Submission a owl:Class; - rdfs:isDefinedBy ; + rdfs:isDefinedBy ; rdfs:label "HMD Submission"; dc:description "Items specific to use in the 1+MG GDI project for metadata submission." . @@ -873,8 +873,8 @@ fg:HMD_Submission_Publication_description a owl:DatatypeProperty; fg:HMD_Submission_Collection a owl:DatatypeProperty; rdfs:label "Collection"; rdfs:domain fg:HMD_Submission; - rdfs:isDefinedBy ; - dc:description "Identifies collection (Biobank, Collection, Cohort, other types of projects) in which data are part of." . + rdfs:isDefinedBy ; + dc:description "Here goes the description" . fg:HMD_Submission_Research_Consortia a owl:DatatypeProperty; rdfs:label "Research Consortia"; diff --git a/generated/palga-codebook/CODEBOOK.tsv b/generated/palga-codebook/CODEBOOK.tsv index 2856c3a..5ed77fe 100644 --- a/generated/palga-codebook/CODEBOOK.tsv +++ b/generated/palga-codebook/CODEBOOK.tsv @@ -120,12 +120,12 @@ Bioinformatic protocol used A human-readable collection of information about abo Bioinformatic protocol deviation A variation from processes or procedures defined in the bioinformatic protocol. Deviations usually do not preclude the overall evaluability of subject data for either efficacy or safety, and are often acknowledged and accepted in advance by the sponsor. NCIT C50996 ST Bioinformatic protocol deviation text {url=http://purl.obolibrary.org/obo/NCIT_C50996} Analysis Reason for bioinformatic protocol deviation The rationale for why a deviation from the bioinformatic protocol has occurred. NCIT C93529 ST Reason for bioinformatic protocol deviation text {url=http://purl.obolibrary.org/obo/NCIT_C93529} Analysis WGS guideline followed Any followed systematic statement of policy rules or principles. Guidelines may be developed by government agencies at any level, institutions, professional societies, governing boards, or by convening expert panels. NCIT C17564 ST WGS guideline followed text {url=http://purl.obolibrary.org/obo/NCIT_C17564} Analysis -HMD Submission Items specific to use in the 1+MG GDI project for metadata submission. AFRL 0000449 ST HMD Submission multi-select {url=http://purl.allotrope.org/ontologies/role#AFRL_0000449} +HMD Submission Items specific to use in the 1+MG GDI project for metadata submission. FG 0000750 ST HMD Submission multi-select {url=https://w3id.org/fair-genomes/resource/FG_0000750} Submitter Role Role of the Submitter (e.g. Oncologist in charge of the patient, Research Project PI). DCAT Role ST Submitter Role text {url=https://www.w3.org/ns/dcat#Role} HMD Submission Data Center Refers to the Department in charge of Data Production and/ or analysis ADMS Identifier ST Data Center text {url=https://www.w3.org/TR/vocab-adms/#identifier} HMD Submission Clinical Center Refers to the Clinical Department in charge of the patient. ADMS Identifier ST Clinical Center text {url=https://www.w3.org/TR/vocab-adms/#identifier} HMD Submission Institution Clinical Identifies the Institution/s involved. SIO 000688 ST Institution Clinical Institutes multi-select {url=http://semanticscience.org/resource/SIO_000688} HMD Submission Institution Data Center Identifies the Institution/s involved. SIO 000688 ST Institution Data Center Institutes multi-select {url=http://semanticscience.org/resource/SIO_000688} HMD Submission Publication description Linked to the instance of publication_doi; should provide a human-readably description of the publication. MS 1001922 ST Publication description text {url=http://purl.obolibrary.org/obo/MS_1001922} HMD Submission -Collection Identifies collection (Biobank, Collection, Cohort, other types of projects) in which data are part of. DCAT Collection ST Collection text {url=http://www.w3.org/ns/prov#Collection} HMD Submission +Collection Here goes the description FG 0000751 ST Collection text {url=https://w3id.org/fair-genomes/resource/FG_0000751} HMD Submission Research Consortia Identifies Research Consortia Involved. NCIT C61538 ST Research Consortia text {url=http://purl.obolibrary.org/obo/NCIT_C61538} HMD Submission diff --git a/generated/resource/FG_0000750.xml b/generated/resource/FG_0000750.xml new file mode 100644 index 0000000..4eabb50 --- /dev/null +++ b/generated/resource/FG_0000750.xml @@ -0,0 +1,24 @@ + + + + + + + Items specific to use in the 1+MG GDI project for metadata submission. + + + + + HMD Submission + + + Items specific to use in the 1+MG GDI project for metadata submission. + FG:0000750 + FG_0000750 + + + diff --git a/generated/resource/FG_0000751.xml b/generated/resource/FG_0000751.xml new file mode 100644 index 0000000..c31f76c --- /dev/null +++ b/generated/resource/FG_0000751.xml @@ -0,0 +1,24 @@ + + + + + + + Items specific to use in the 1+MG GDI project for metadata submission. + + + + + Collection + + + Here goes the description + FG:0000751 + FG_0000751 + + + diff --git a/src/main/java/org/fairgenomes/generator/implementations/ToRDFResources.java b/src/main/java/org/fairgenomes/generator/implementations/ToRDFResources.java index cb4ce67..37e6e4d 100644 --- a/src/main/java/org/fairgenomes/generator/implementations/ToRDFResources.java +++ b/src/main/java/org/fairgenomes/generator/implementations/ToRDFResources.java @@ -34,7 +34,19 @@ public void start() throws Exception { for (Module m : fg.modules) { if(m.ontology.startsWith("FG:")){ - throw new Exception("Not implemented yet!"); + String term = m.parsedOntology.codeSystem + "_" + m.parsedOntology.code; + if(uniqueTerms.contains(term)) + { + throw new Exception("Term already in use: " + term); + } + uniqueTerms.add(term); + FileWriter fw = new FileWriter(new File(outputFolder, term + ".xml")); + BufferedWriter bw = new BufferedWriter(fw); + IRI type = OWL.CLASS; + String srcTTL = fg.fileName + ".ttl"; + bw.write(toRDF(m.parsedOntology.codeSystem, m.parsedOntology.code, type, m.name, m.description, iri(m.parsedOntology.iri), m.description, srcTTL)); + bw.flush(); + bw.close(); } for(Element e : m.elements) {