From 15b3d59ad6b28accfcfcfc1e29cb8f4bea113d02 Mon Sep 17 00:00:00 2001 From: JuanfeSanahuja Date: Mon, 11 Dec 2023 17:21:28 +0100 Subject: [PATCH 01/51] pom: Some security version updates #TASK-4437 --- cellbase-lib/pom.xml | 4 ++-- .../lib/impl/core/MetaMongoDBAdaptor.java | 4 ++-- pom.xml | 17 +++++++++-------- 3 files changed, 13 insertions(+), 12 deletions(-) diff --git a/cellbase-lib/pom.xml b/cellbase-lib/pom.xml index f76602ad3e..780ead8687 100644 --- a/cellbase-lib/pom.xml +++ b/cellbase-lib/pom.xml @@ -137,10 +137,10 @@ com.github.samtools htsjdk - + io.jsonwebtoken jjwt-api diff --git a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/impl/core/MetaMongoDBAdaptor.java b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/impl/core/MetaMongoDBAdaptor.java index 9361f48d0f..926548de16 100644 --- a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/impl/core/MetaMongoDBAdaptor.java +++ b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/impl/core/MetaMongoDBAdaptor.java @@ -16,17 +16,17 @@ package org.opencb.cellbase.lib.impl.core; +import com.fasterxml.jackson.databind.ObjectMapper; import com.mongodb.client.model.Filters; import com.mongodb.client.model.Updates; import org.bson.BsonDocument; import org.bson.Document; import org.bson.conversions.Bson; -import org.codehaus.jackson.map.ObjectMapper; +import org.opencb.cellbase.core.api.key.ApiKeyStats; import org.opencb.cellbase.core.api.query.AbstractQuery; import org.opencb.cellbase.core.api.query.ProjectionQueryOptions; import org.opencb.cellbase.core.exception.CellBaseException; import org.opencb.cellbase.core.result.CellBaseDataResult; -import org.opencb.cellbase.core.api.key.ApiKeyStats; import org.opencb.cellbase.lib.iterator.CellBaseIterator; import org.opencb.commons.datastore.core.FacetField; import org.opencb.commons.datastore.core.QueryOptions; diff --git a/pom.xml b/pom.xml index f85f5212ec..65a5d65480 100644 --- a/pom.xml +++ b/pom.xml @@ -26,15 +26,17 @@ 4.13.0-SNAPSHOT 2.13.0-SNAPSHOT 0.1.0 - 2.11.4 - 1.9.13 + 9.4.51.v20230217 + + 2.14.3 + 3.14.0 + 1.7.36 + 2.30.1 - 1.7.32 2.17.2 1.5.2 5.5.2 0.8.8 - 9.4.17.v20190418 0.11.5 1.6.5 3.1.0 @@ -46,11 +48,10 @@ 19.0 1.9.1 1.3 - 2.23.0 + 3.0.5 1.48.0 2.4 2.4 - 3.12.0 2.1.6 4.4 1.69 @@ -412,11 +413,11 @@ swagger-annotations ${swagger-annotations.version} - + io.jsonwebtoken jjwt-jackson From ab2d74de4cf0f77dc01a2ab0f49ea16dbf615065 Mon Sep 17 00:00:00 2001 From: JuanfeSanahuja Date: Thu, 21 Dec 2023 19:10:59 +0100 Subject: [PATCH 02/51] Prepare portPatch 1.10.1 -> 2.0.0 --- cellbase-app/pom.xml | 2 +- cellbase-client/pom.xml | 2 +- cellbase-core/pom.xml | 2 +- cellbase-lib/pom.xml | 2 +- cellbase-server/pom.xml | 2 +- pom.xml | 2 +- 6 files changed, 6 insertions(+), 6 deletions(-) diff --git a/cellbase-app/pom.xml b/cellbase-app/pom.xml index 6119cebf49..af1dda8b9d 100644 --- a/cellbase-app/pom.xml +++ b/cellbase-app/pom.xml @@ -6,7 +6,7 @@ org.opencb.cellbase cellbase - 5.8.1 + 5.9.0-SNAPSHOT ../pom.xml diff --git a/cellbase-client/pom.xml b/cellbase-client/pom.xml index 5a4245010d..7e5c9a5884 100644 --- a/cellbase-client/pom.xml +++ b/cellbase-client/pom.xml @@ -6,7 +6,7 @@ org.opencb.cellbase cellbase - 5.8.1 + 5.9.0-SNAPSHOT ../pom.xml diff --git a/cellbase-core/pom.xml b/cellbase-core/pom.xml index b6331664e9..89f51a1ffc 100644 --- a/cellbase-core/pom.xml +++ b/cellbase-core/pom.xml @@ -6,7 +6,7 @@ org.opencb.cellbase cellbase - 5.8.1 + 5.9.0-SNAPSHOT ../pom.xml diff --git a/cellbase-lib/pom.xml b/cellbase-lib/pom.xml index 1feb57905d..f76602ad3e 100644 --- a/cellbase-lib/pom.xml +++ b/cellbase-lib/pom.xml @@ -6,7 +6,7 @@ org.opencb.cellbase cellbase - 5.8.1 + 5.9.0-SNAPSHOT ../pom.xml diff --git a/cellbase-server/pom.xml b/cellbase-server/pom.xml index 793cbeae90..ff3868b7c7 100644 --- a/cellbase-server/pom.xml +++ b/cellbase-server/pom.xml @@ -6,7 +6,7 @@ org.opencb.cellbase cellbase - 5.8.1 + 5.9.0-SNAPSHOT ../pom.xml diff --git a/pom.xml b/pom.xml index e92b552106..41bbd12476 100644 --- a/pom.xml +++ b/pom.xml @@ -6,7 +6,7 @@ org.opencb.cellbase cellbase - 5.8.1 + 5.9.0-SNAPSHOT pom CellBase project From 611cc8bb2467ba084f904f905db92707c082ed1f Mon Sep 17 00:00:00 2001 From: JuanfeSanahuja Date: Thu, 21 Dec 2023 19:13:24 +0100 Subject: [PATCH 03/51] Prepare portPatch 1.10.1 -> 2.0.0 --- pom.xml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pom.xml b/pom.xml index 41bbd12476..f85f5212ec 100644 --- a/pom.xml +++ b/pom.xml @@ -23,8 +23,8 @@ ${project.version} - 4.12.0 - 2.12.1 + 4.13.0-SNAPSHOT + 2.13.0-SNAPSHOT 0.1.0 2.11.4 1.9.13 From abf46226956df5d44839459aaaa039873d9c2e38 Mon Sep 17 00:00:00 2001 From: JuanfeSanahuja Date: Thu, 21 Dec 2023 19:15:07 +0100 Subject: [PATCH 04/51] Prepare portPatch 1.10.1 -> 2.0.0 --- pom.xml | 1 + 1 file changed, 1 insertion(+) diff --git a/pom.xml b/pom.xml index f85f5212ec..256c88511e 100644 --- a/pom.xml +++ b/pom.xml @@ -21,6 +21,7 @@ cellbase-server + ${project.version} 4.13.0-SNAPSHOT From dff75584ef065820928064b781c76068fbb9d105 Mon Sep 17 00:00:00 2001 From: JuanfeSanahuja Date: Thu, 21 Dec 2023 19:16:39 +0100 Subject: [PATCH 05/51] Prepare portPatch 1.10.1 -> 2.0.0 --- pom.xml | 1 - 1 file changed, 1 deletion(-) diff --git a/pom.xml b/pom.xml index 256c88511e..f85f5212ec 100644 --- a/pom.xml +++ b/pom.xml @@ -21,7 +21,6 @@ cellbase-server - ${project.version} 4.13.0-SNAPSHOT From bae792fc1766f37c082b47c090dea13e08a3517c Mon Sep 17 00:00:00 2001 From: imedina Date: Thu, 21 Dec 2023 23:43:07 +0000 Subject: [PATCH 06/51] builder: update ensembl version to 110 --- .../cloud/docker/cellbase-builder/Dockerfile | 2 +- .../app/scripts/ensembl-scripts/DB_CONFIG.pm | 8 +-- cellbase-app/app/scripts/gnomad_mt_prepare.py | 57 +++++++++++++++++++ 3 files changed, 62 insertions(+), 5 deletions(-) create mode 100755 cellbase-app/app/scripts/gnomad_mt_prepare.py diff --git a/cellbase-app/app/cloud/docker/cellbase-builder/Dockerfile b/cellbase-app/app/cloud/docker/cellbase-builder/Dockerfile index 17d5accff4..6e1657d1bf 100644 --- a/cellbase-app/app/cloud/docker/cellbase-builder/Dockerfile +++ b/cellbase-app/app/cloud/docker/cellbase-builder/Dockerfile @@ -28,4 +28,4 @@ RUN cd /opt/ensembl && \ git clone https://github.com/Ensembl/ensembl-compara.git && \ git clone https://github.com/Ensembl/ensembl-io.git -ENV PERL5LIB=$PERL5LIB:/opt/ensembl/bioperl-live:/opt/ensembl/ensembl/modules:/opt/ensembl/ensembl-variation/modules:/opt/ensembl/ensembl-funcgen/modules:/opt/ensembl/ensembl-compara/modules:/opt/ensembl/lib/perl/5.18.2:/opt/cellbase +ENV PERL5LIB=$PERL5LIB:/opt/ensembl/bioperl-live:/opt/ensembl/ensembl/modules:/opt/ensembl/ensembl-variation/modules:/opt/ensembl/ensembl-funcgen/modules:/opt/ensembl/ensembl-compara/modules:/opt/ensembl/lib/perl/5.18.2:/opt/cellbase/scripts/ensembl-scripts diff --git a/cellbase-app/app/scripts/ensembl-scripts/DB_CONFIG.pm b/cellbase-app/app/scripts/ensembl-scripts/DB_CONFIG.pm index aa22cf10b1..70865465e9 100755 --- a/cellbase-app/app/scripts/ensembl-scripts/DB_CONFIG.pm +++ b/cellbase-app/app/scripts/ensembl-scripts/DB_CONFIG.pm @@ -134,10 +134,10 @@ our $ENSEMBL_GENOMES_PORT = "4157"; our $ENSEMBL_GENOMES_USER = "anonymous"; ## Vertebrates -our $HOMO_SAPIENS_CORE = "homo_sapiens_core_104_38"; -our $HOMO_SAPIENS_VARIATION = "homo_sapiens_variation_104_38"; -our $HOMO_SAPIENS_FUNCTIONAL = "homo_sapiens_funcgen_104_38"; -our $HOMO_SAPIENS_COMPARA = "homo_sapiens_compara_104_38"; +our $HOMO_SAPIENS_CORE = "homo_sapiens_core_110_38"; +our $HOMO_SAPIENS_VARIATION = "homo_sapiens_variation_110_38"; +our $HOMO_SAPIENS_FUNCTIONAL = "homo_sapiens_funcgen_110_38"; +our $HOMO_SAPIENS_COMPARA = "homo_sapiens_compara_110_38"; #our $HOMO_SAPIENS_CORE = "homo_sapiens_core_78_38"; #our $HOMO_SAPIENS_VARIATION = "homo_sapiens_variation_78_38"; #our $HOMO_SAPIENS_FUNCTIONAL = "homo_sapiens_funcgen_78_38"; diff --git a/cellbase-app/app/scripts/gnomad_mt_prepare.py b/cellbase-app/app/scripts/gnomad_mt_prepare.py new file mode 100755 index 0000000000..0863370c33 --- /dev/null +++ b/cellbase-app/app/scripts/gnomad_mt_prepare.py @@ -0,0 +1,57 @@ +#!/usr/bin/env python3 + +# Copyright 2015-2020 OpenCB +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import argparse +import os +import requests +import sys +import json +import pathlib +from pathlib import Path + + +## Configure command-line options +parser = argparse.ArgumentParser() +parser.add_argument('-i', help="VCF file", required=True) + + +## Parse command-line parameters and init basedir, tag and build_folder +args = parser.parse_args() +print(args.i) + +if os.path.isfile(args.i) == False: + print("no existe") + + +# Opening file +vcf_file = open(args.i, 'r') +count = 0 + +# Using for loop +print("Using for loop") +for line in vcf_file: + count += 1 + if not line.startswith("#"): + line = line.strip() + cols = line.split("\t") + print(line) + info_cols = cols[7].split(";") + var = [x for x in info_cols if x.startswith("AN=")] + print("{}".format(var)) + + +# Closing files +vcf_file.close() \ No newline at end of file From 8ce81c6d42ec75bfe14ea1bb058e6bf8a5ad22ac Mon Sep 17 00:00:00 2001 From: imedina Date: Wed, 27 Dec 2023 02:56:17 +0000 Subject: [PATCH 07/51] configuration: update most of the data source versions --- .../src/main/resources/configuration.yml | 36 +++++++++++++------ 1 file changed, 25 insertions(+), 11 deletions(-) diff --git a/cellbase-core/src/main/resources/configuration.yml b/cellbase-core/src/main/resources/configuration.yml index 0f8d199118..2945c629b9 100644 --- a/cellbase-core/src/main/resources/configuration.yml +++ b/cellbase-core/src/main/resources/configuration.yml @@ -62,7 +62,7 @@ download: url: host: ftp://ftp.ensemblgenomes.org/pub hgnc: - host: https://ftp.ebi.ac.uk/pub/databases/genenames/hgnc/archive/monthly/tsv/hgnc_complete_set_2022-01-01.txt + host: https://ftp.ebi.ac.uk/pub/databases/genenames/hgnc/archive/monthly/tsv/hgnc_complete_set_2023-11-01.txt refSeq: host: https://ftp.ncbi.nih.gov/refseq/H_sapiens/annotation/GRCh38_latest/refseq_identifiers/GRCh38_latest_genomic.gtf.gz refSeqFasta: @@ -73,12 +73,15 @@ download: host: https://ftp.ncbi.nih.gov/refseq/H_sapiens/annotation/GRCh38_latest/refseq_identifiers/GRCh38_latest_rna.fna.gz maneSelect: # host: https://ftp.ncbi.nlm.nih.gov/refseq/MANE/MANE_human/release_0.93/MANE.GRCh38.v0.93.summary.txt.gz - host: https://ftp.ncbi.nlm.nih.gov/refseq/MANE/MANE_human/release_1.0/MANE.GRCh38.v1.0.summary.txt.gz - version: 0.93 +# host: https://ftp.ncbi.nlm.nih.gov/refseq/MANE/MANE_human/release_1.0/MANE.GRCh38.v1.0.summary.txt.gz + host: https://ftp.ncbi.nlm.nih.gov/refseq/MANE/MANE_human/release_1.1/MANE.GRCh38.v1.1.summary.txt.gz + version: "1.1" lrg: host: http://ftp.ebi.ac.uk/pub/databases/lrgex/list_LRGs_transcripts_xrefs.txt + version: "2021-03-30" geneUniprotXref: host: http://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/idmapping/by_organism/ + version: "2023-11-08" geneExpressionAtlas: host: ftp://ftp.ebi.ac.uk/pub/databases/microarray/data/gxa/allgenes_updown_in_organism_part_2.0.14.tab.gz mirbase: @@ -88,33 +91,43 @@ download: targetScan: host: http://hgdownload.cse.ucsc.edu/goldenPath/ miRTarBase: - host: https://mirtarbase.cuhk.edu.cn/~miRTarBase/miRTarBase_2022/cache/download/8.0/hsa_MTI.xlsx + host: https://mirtarbase.cuhk.edu.cn/~miRTarBase/miRTarBase_2022/cache/download/9.0/hsa_MTI.xlsx + version: "9.0" uniprot: host: ftp://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/complete/uniprot_sprot.xml.gz + version: "2023-11-08" uniprotRelNotes: host: ftp://ftp.uniprot.org/pub/databases/uniprot/relnotes.txt + version: "2023-11-08" intact: host: ftp://ftp.ebi.ac.uk/pub/databases/intact/current/psimitab/intact.txt interpro: - host: ftp://ftp.ebi.ac.uk/pub/databases/interpro/current/protein2ipr.dat.gz +# host: ftp://ftp.ebi.ac.uk/pub/databases/interpro/current/protein2ipr.dat.gz + host: https://ftp.ebi.ac.uk/pub/databases/interpro/current_release/protein2ipr.dat.gz interproRelNotes: - host: ftp://ftp.ebi.ac.uk/pub/databases/interpro/current/release_notes.txt +# host: ftp://ftp.ebi.ac.uk/pub/databases/interpro/current/release_notes.txt + host: https://ftp.ebi.ac.uk/pub/databases/interpro/current_release/release_notes.txt conservation: host: https://hgdownload.cse.ucsc.edu/goldenPath/ gerp: - host: http://ftp.ensembl.org/pub/release-104/compara/conservation_scores/90_mammals.gerp_conservation_score/gerp_conservation_scores.homo_sapiens.GRCh38.bw + host: http://ftp.ensembl.org/pub/release-110/compara/conservation_scores/91_mammals.gerp_conservation_score/gerp_conservation_scores.homo_sapiens.GRCh38.bw + version: "2023-04-22" clinvar: # host: https://ftp.ncbi.nlm.nih.gov/pub/clinvar/xml/ClinVarFullRelease_2021-07.xml.gz # host: https://ftp.ncbi.nlm.nih.gov/pub/clinvar/xml/ClinVarFullRelease_2022-02.xml.gz - host: https://ftp.ncbi.nlm.nih.gov/pub/clinvar/xml/ClinVarFullRelease_2022-11.xml.gz +# host: https://ftp.ncbi.nlm.nih.gov/pub/clinvar/xml/ClinVarFullRelease_2022-11.xml.gz + host: https://ftp.ncbi.nlm.nih.gov/pub/clinvar/xml/ClinVarFullRelease_2023-12.xml.gz clinvarVariation: # host: https://ftp.ncbi.nlm.nih.gov/pub/clinvar/xml/clinvar_variation/ClinVarVariationRelease_2021-07.xml.gz # host: https://ftp.ncbi.nlm.nih.gov/pub/clinvar/xml/clinvar_variation/ClinVarVariationRelease_2022-02.xml.gz - host: https://ftp.ncbi.nlm.nih.gov/pub/clinvar/xml/clinvar_variation/ClinVarVariationRelease_2022-11.xml.gz +# host: https://ftp.ncbi.nlm.nih.gov/pub/clinvar/xml/clinvar_variation/ClinVarVariationRelease_2022-11.xml.gz + host: https://ftp.ncbi.nlm.nih.gov/pub/clinvar/xml/clinvar_variation/ClinVarVariationRelease_2023-12.xml.gz clinvarSummary: host: http://ftp.ncbi.nlm.nih.gov/pub/clinvar/tab_delimited/variant_summary.txt.gz + version: "2023-12-17" clinvarVariationAllele: host: http://ftp.ncbi.nlm.nih.gov/pub/clinvar/tab_delimited/variation_allele.txt.gz + version: "2023-12-17" clinvarEfoTerms: host: ftp://ftp.ebi.ac.uk/pub/databases/eva/ClinVar/2015/ClinVar_Traits_EFO_Names_260615.csv iarctp53: @@ -144,7 +157,8 @@ download: dgidb: host: https://dgidb.org/data/monthly_tsvs/2021-Jan/interactions.tsv cadd: - host: https://krishna.gs.washington.edu/download/CADD/v1.6/GRCh38/whole_genome_SNVs.tsv.gz +# host: https://krishna.gs.washington.edu/download/CADD/v1.6/GRCh38/whole_genome_SNVs.tsv.gz + host: https://krishna.gs.washington.edu/download/CADD/v1.7-pre/GRCh38/whole_genome_SNVs.tsv.gz reactome: host: http://www.reactome.org/download/current/biopax.zip gnomadConstraints: @@ -182,7 +196,7 @@ species: - id: hsapiens scientificName: Homo sapiens assemblies: - - ensemblVersion: '104_38' + - ensemblVersion: '110_38' name: GRCh38 - ensemblVersion: '82_37' name: GRCh37 From 79726a068631abe4a820e930dcd6fd8639bafecd Mon Sep 17 00:00:00 2001 From: imedina Date: Wed, 27 Dec 2023 03:03:47 +0000 Subject: [PATCH 08/51] download: fix download fix configuration --- .../opencb/cellbase/lib/download/GenomeDownloadManager.java | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/download/GenomeDownloadManager.java b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/download/GenomeDownloadManager.java index 5a0609867f..bdd68fcf00 100644 --- a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/download/GenomeDownloadManager.java +++ b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/download/GenomeDownloadManager.java @@ -47,11 +47,11 @@ public GenomeDownloadManager(String species, String assembly, Path targetDirecto public List download() throws IOException, InterruptedException { List downloadFiles = new ArrayList<>(); downloadFiles.addAll(downloadReferenceGenome()); -// downloadFiles.addAll(downloadConservation()); -// downloadFiles.addAll(downloadRepeats()); + downloadFiles.addAll(downloadConservation()); + downloadFiles.addAll(downloadRepeats()); // cytobands - runGenomeInfo(); +// runGenomeInfo(); return downloadFiles; } From f85a5c66c4e5675a9fbf4595417817d6f4214daa Mon Sep 17 00:00:00 2001 From: imedina Date: Thu, 28 Dec 2023 15:12:26 +0000 Subject: [PATCH 09/51] config: update phylop and phastcons conservation scores to 470 way --- cellbase-core/src/main/resources/configuration.yml | 3 ++- .../cellbase/lib/download/GenomeDownloadManager.java | 12 ++++++------ 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/cellbase-core/src/main/resources/configuration.yml b/cellbase-core/src/main/resources/configuration.yml index 2945c629b9..70973b8c5c 100644 --- a/cellbase-core/src/main/resources/configuration.yml +++ b/cellbase-core/src/main/resources/configuration.yml @@ -109,9 +109,10 @@ download: host: https://ftp.ebi.ac.uk/pub/databases/interpro/current_release/release_notes.txt conservation: host: https://hgdownload.cse.ucsc.edu/goldenPath/ + version: "2022-08-30" gerp: host: http://ftp.ensembl.org/pub/release-110/compara/conservation_scores/91_mammals.gerp_conservation_score/gerp_conservation_scores.homo_sapiens.GRCh38.bw - version: "2023-04-22" + version: "2023-05-17" clinvar: # host: https://ftp.ncbi.nlm.nih.gov/pub/clinvar/xml/ClinVarFullRelease_2021-07.xml.gz # host: https://ftp.ncbi.nlm.nih.gov/pub/clinvar/xml/ClinVarFullRelease_2022-02.xml.gz diff --git a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/download/GenomeDownloadManager.java b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/download/GenomeDownloadManager.java index bdd68fcf00..a05afbf657 100644 --- a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/download/GenomeDownloadManager.java +++ b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/download/GenomeDownloadManager.java @@ -115,16 +115,16 @@ public List downloadConservation() throws IOException, Interrupted List phastconsUrls = new ArrayList<>(chromosomes.length); List phyloPUrls = new ArrayList<>(chromosomes.length); for (String chromosome : chromosomes) { - String phastConsUrl = url + "/phastCons100way/hg38.100way.phastCons/chr" + chromosome - + ".phastCons100way.wigFix.gz"; + String phastConsUrl = url + "/phastCons470way/hg38.470way.phastCons/chr" + chromosome + + ".phastCons470way.wigFix.gz"; downloadFiles.add(downloadFile(phastConsUrl, conservationFolder.resolve("phastCons") - .resolve("chr" + chromosome + ".phastCons100way.wigFix.gz").toString())); + .resolve("chr" + chromosome + ".phastCons470way.wigFix.gz").toString())); phastconsUrls.add(phastConsUrl); - String phyloPUrl = url + "/phyloP100way/hg38.100way.phyloP100way/chr" + chromosome - + ".phyloP100way.wigFix.gz"; + String phyloPUrl = url + "/phyloP470way/hg38.470way.phyloP470way/chr" + chromosome + + ".phyloP470way.wigFix.gz"; downloadFiles.add(downloadFile(phyloPUrl, conservationFolder.resolve("phylop") - .resolve("chr" + chromosome + ".phyloP100way.wigFix.gz").toString())); + .resolve("chr" + chromosome + ".phyloP470way.wigFix.gz").toString())); phyloPUrls.add(phyloPUrl); } String gerpUrl = configuration.getDownload().getGerp().getHost(); From 15a82896a3fe75fbc9fe31c0395892774c34714a Mon Sep 17 00:00:00 2001 From: imedina Date: Thu, 28 Dec 2023 15:29:25 +0000 Subject: [PATCH 10/51] config: fix phylop URL --- .../org/opencb/cellbase/lib/download/GenomeDownloadManager.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/download/GenomeDownloadManager.java b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/download/GenomeDownloadManager.java index a05afbf657..0ba9f39db4 100644 --- a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/download/GenomeDownloadManager.java +++ b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/download/GenomeDownloadManager.java @@ -121,7 +121,7 @@ public List downloadConservation() throws IOException, Interrupted .resolve("chr" + chromosome + ".phastCons470way.wigFix.gz").toString())); phastconsUrls.add(phastConsUrl); - String phyloPUrl = url + "/phyloP470way/hg38.470way.phyloP470way/chr" + chromosome + String phyloPUrl = url + "/phyloP470way/hg38.470way.phyloP/chr" + chromosome + ".phyloP470way.wigFix.gz"; downloadFiles.add(downloadFile(phyloPUrl, conservationFolder.resolve("phylop") .resolve("chr" + chromosome + ".phyloP470way.wigFix.gz").toString())); From 73c49392a147d631bfa5f30d3d2a46868bfee774 Mon Sep 17 00:00:00 2001 From: imedina Date: Sun, 31 Dec 2023 14:55:05 +0000 Subject: [PATCH 11/51] download: fix HGNC in gene downloader --- cellbase-core/src/main/resources/configuration.yml | 4 ++-- .../opencb/cellbase/lib/builders/GeneBuilder.java | 2 +- .../cellbase/lib/download/GeneDownloadManager.java | 13 +++++++++++++ 3 files changed, 16 insertions(+), 3 deletions(-) diff --git a/cellbase-core/src/main/resources/configuration.yml b/cellbase-core/src/main/resources/configuration.yml index 70973b8c5c..f2dfd5913a 100644 --- a/cellbase-core/src/main/resources/configuration.yml +++ b/cellbase-core/src/main/resources/configuration.yml @@ -63,6 +63,7 @@ download: host: ftp://ftp.ensemblgenomes.org/pub hgnc: host: https://ftp.ebi.ac.uk/pub/databases/genenames/hgnc/archive/monthly/tsv/hgnc_complete_set_2023-11-01.txt + version: 2023-11-01 refSeq: host: https://ftp.ncbi.nih.gov/refseq/H_sapiens/annotation/GRCh38_latest/refseq_identifiers/GRCh38_latest_genomic.gtf.gz refSeqFasta: @@ -158,8 +159,7 @@ download: dgidb: host: https://dgidb.org/data/monthly_tsvs/2021-Jan/interactions.tsv cadd: -# host: https://krishna.gs.washington.edu/download/CADD/v1.6/GRCh38/whole_genome_SNVs.tsv.gz - host: https://krishna.gs.washington.edu/download/CADD/v1.7-pre/GRCh38/whole_genome_SNVs.tsv.gz + host: https://krishna.gs.washington.edu/download/CADD/v1.6/GRCh38/whole_genome_SNVs.tsv.gz reactome: host: http://www.reactome.org/download/current/biopax.zip gnomadConstraints: diff --git a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/GeneBuilder.java b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/GeneBuilder.java index 563f76dea7..4da5a3e056 100644 --- a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/GeneBuilder.java +++ b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/GeneBuilder.java @@ -91,7 +91,7 @@ public GeneBuilder(Path geneDirectoryPath, Path genomeSequenceFastaFile, Species this(null, geneDirectoryPath.resolve("description.txt"), geneDirectoryPath.resolve("xrefs.txt"), geneDirectoryPath.resolve("hgnc_complete_set_2022-01-01.txt"), - geneDirectoryPath.resolve("MANE.GRCh38.v1.0.summary.txt.gz"), + geneDirectoryPath.resolve("MANE.GRCh38.v1.1.summary.txt.gz"), geneDirectoryPath.resolve("list_LRGs_transcripts_xrefs.txt"), geneDirectoryPath.resolve("idmapping_selected.tab.gz"), geneDirectoryPath.getParent().resolve("regulation/motif_features.gff.gz"), diff --git a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/download/GeneDownloadManager.java b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/download/GeneDownloadManager.java index 9d2685eadf..9bd82a951f 100644 --- a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/download/GeneDownloadManager.java +++ b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/download/GeneDownloadManager.java @@ -82,6 +82,7 @@ public List download() throws IOException, InterruptedException { downloadFiles.addAll(downloadRefSeq(refseqFolder)); downloadFiles.add(downloadMane(geneFolder)); downloadFiles.add(downloadLrg(geneFolder)); + downloadFiles.add(downloadHgnc(geneFolder)); downloadFiles.add(downloadDrugData(geneFolder)); downloadFiles.addAll(downloadGeneUniprotXref(geneFolder)); downloadFiles.add(downloadGeneExpressionAtlas(geneFolder)); @@ -208,6 +209,18 @@ private DownloadFile downloadLrg(Path geneFolder) throws IOException, Interrupte return null; } + private DownloadFile downloadHgnc(Path geneFolder) throws IOException, InterruptedException { + if (speciesConfiguration.getScientificName().equals("Homo sapiens")) { + logger.info("Downloading LRG ..."); + String url = configuration.getDownload().getHgnc().getHost(); + saveVersionData(EtlCommons.GENE_DATA, "HGNC_GENE", configuration.getDownload().getHgnc().getVersion(), + getTimeStamp(), Collections.singletonList(url), geneFolder.resolve("hgncVersion.json")); + String[] array = url.split("/"); + return downloadFile(url, geneFolder.resolve(array[array.length - 1]).toString()); + } + return null; + } + private DownloadFile downloadGO(Path geneFolder) throws IOException, InterruptedException { if (speciesConfiguration.getScientificName().equals("Homo sapiens")) { logger.info("Downloading go annotation..."); From 1629ad4f8a1f9a6f55b54bb5ed132cc3e8eefd65 Mon Sep 17 00:00:00 2001 From: imedina Date: Sun, 31 Dec 2023 15:38:26 +0000 Subject: [PATCH 12/51] download: fix dgidb in gene downloader --- cellbase-core/src/main/resources/configuration.yml | 5 +++-- .../java/org/opencb/cellbase/lib/builders/GeneBuilder.java | 2 +- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/cellbase-core/src/main/resources/configuration.yml b/cellbase-core/src/main/resources/configuration.yml index f2dfd5913a..d6823aa7aa 100644 --- a/cellbase-core/src/main/resources/configuration.yml +++ b/cellbase-core/src/main/resources/configuration.yml @@ -157,14 +157,15 @@ download: - all_gene_disease_associations.tsv.gz - readme.txt dgidb: - host: https://dgidb.org/data/monthly_tsvs/2021-Jan/interactions.tsv + host: https://old.dgidb.org/data/monthly_tsvs/2022-Feb/interactions.tsv + version: "2022-02-01" cadd: host: https://krishna.gs.washington.edu/download/CADD/v1.6/GRCh38/whole_genome_SNVs.tsv.gz reactome: host: http://www.reactome.org/download/current/biopax.zip gnomadConstraints: host: https://storage.googleapis.com/gcp-public-data--gnomad/release/2.1.1/constraint/gnomad.v2.1.1.lof_metrics.by_transcript.txt.bgz - version: 2.1.1 + version: "2.1.1" hpoObo: host: http://purl.obolibrary.org/obo/hp.obo goObo: diff --git a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/GeneBuilder.java b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/GeneBuilder.java index 4da5a3e056..cd0863a259 100644 --- a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/GeneBuilder.java +++ b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/GeneBuilder.java @@ -90,7 +90,7 @@ public GeneBuilder(Path geneDirectoryPath, Path genomeSequenceFastaFile, Species boolean flexibleGTFParsing, CellBaseSerializer serializer) throws CellBaseException { this(null, geneDirectoryPath.resolve("description.txt"), geneDirectoryPath.resolve("xrefs.txt"), - geneDirectoryPath.resolve("hgnc_complete_set_2022-01-01.txt"), + geneDirectoryPath.resolve("hgnc_complete_set_2023-11-01.txt"), geneDirectoryPath.resolve("MANE.GRCh38.v1.1.summary.txt.gz"), geneDirectoryPath.resolve("list_LRGs_transcripts_xrefs.txt"), geneDirectoryPath.resolve("idmapping_selected.tab.gz"), From 2c47ee57f91b5b2f2fdf937894d06eda9cc22ad3 Mon Sep 17 00:00:00 2001 From: imedina Date: Sun, 31 Dec 2023 16:59:29 +0000 Subject: [PATCH 13/51] download: fix PFM in gene downloader --- cellbase-core/src/main/resources/configuration.yml | 3 ++- .../cellbase/lib/download/RegulationDownloadManager.java | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/cellbase-core/src/main/resources/configuration.yml b/cellbase-core/src/main/resources/configuration.yml index d6823aa7aa..172f950f55 100644 --- a/cellbase-core/src/main/resources/configuration.yml +++ b/cellbase-core/src/main/resources/configuration.yml @@ -150,7 +150,8 @@ download: host: http://resources.opencb.org/opencb/cellbase/data/gwas/gwas_catalog_v1.0.2-associations_e106_r2022-05-17.tsv version: "1.0.2 associations_e106_r2022-05-17" hpo: - host: https://ci.monarchinitiative.org/view/hpo/job/hpo.annotations/lastSuccessfulBuild/artifact/rare-diseases/util/annotation/phenotype_to_genes.txt + ## Downlaod manually from here now: https://hpo.jax.org/app/data/annotations +# host: https://ci.monarchinitiative.org/view/hpo/job/hpo.annotations/lastSuccessfulBuild/artifact/rare-diseases/util/annotation/phenotype_to_genes.txt disgenet: host: https://www.disgenet.org/static/disgenet_ap1/files/downloads files: diff --git a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/download/RegulationDownloadManager.java b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/download/RegulationDownloadManager.java index 1abb352fbe..1a6fe5f9e0 100644 --- a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/download/RegulationDownloadManager.java +++ b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/download/RegulationDownloadManager.java @@ -125,7 +125,8 @@ private void loadPfmMatrices() throws IOException, NoSuchMethodException, FileFo CellBaseSerializer serializer = new CellBaseJsonFileSerializer(buildFolder, "regulatory_pfm", true); logger.info("Looking up " + motifIds.size() + " pfms"); for (String pfmId : motifIds) { - String urlString = "https://rest.ensembl.org/species/homo_sapiens/binding_matrix/" + pfmId + System.out.println(motifIds) + String urlString = "https://rest.ensembl.org/species/homo_sapiens/binding_matrix/ENSPFM" + pfmId + "?unit=frequencies;content-type=application/json"; URL url = new URL(urlString); RegulatoryPfm regulatoryPfm = mapper.readValue(url, RegulatoryPfm.class); From fe4bba7f5bf42b29e437509a95e73fef70fb8bfc Mon Sep 17 00:00:00 2001 From: imedina Date: Sun, 31 Dec 2023 17:09:41 +0000 Subject: [PATCH 14/51] download: fix PFM in gene downloader --- .../opencb/cellbase/lib/download/RegulationDownloadManager.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/download/RegulationDownloadManager.java b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/download/RegulationDownloadManager.java index 1a6fe5f9e0..7fddaf6768 100644 --- a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/download/RegulationDownloadManager.java +++ b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/download/RegulationDownloadManager.java @@ -125,7 +125,7 @@ private void loadPfmMatrices() throws IOException, NoSuchMethodException, FileFo CellBaseSerializer serializer = new CellBaseJsonFileSerializer(buildFolder, "regulatory_pfm", true); logger.info("Looking up " + motifIds.size() + " pfms"); for (String pfmId : motifIds) { - System.out.println(motifIds) + System.out.println(motifIds); String urlString = "https://rest.ensembl.org/species/homo_sapiens/binding_matrix/ENSPFM" + pfmId + "?unit=frequencies;content-type=application/json"; URL url = new URL(urlString); From e2170dffd7f302d308cdb1945bb4d471df279095 Mon Sep 17 00:00:00 2001 From: imedina Date: Sun, 31 Dec 2023 17:30:41 +0000 Subject: [PATCH 15/51] download: fix PFM in gene downloader --- .../cellbase/lib/download/RegulationDownloadManager.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/download/RegulationDownloadManager.java b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/download/RegulationDownloadManager.java index 7fddaf6768..7d3a5a410b 100644 --- a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/download/RegulationDownloadManager.java +++ b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/download/RegulationDownloadManager.java @@ -116,6 +116,7 @@ private void loadPfmMatrices() throws IOException, NoSuchMethodException, FileFo while ((tfbsMotifFeature = motifsFeatureReader.read()) != null) { String pfmId = getMatrixId(filePattern, tfbsMotifFeature); if (StringUtils.isNotEmpty(pfmId)) { + System.out.println(pfmId); motifIds.add(pfmId); } } @@ -125,9 +126,9 @@ private void loadPfmMatrices() throws IOException, NoSuchMethodException, FileFo CellBaseSerializer serializer = new CellBaseJsonFileSerializer(buildFolder, "regulatory_pfm", true); logger.info("Looking up " + motifIds.size() + " pfms"); for (String pfmId : motifIds) { - System.out.println(motifIds); String urlString = "https://rest.ensembl.org/species/homo_sapiens/binding_matrix/ENSPFM" + pfmId + "?unit=frequencies;content-type=application/json"; + System.out.println(urlString); URL url = new URL(urlString); RegulatoryPfm regulatoryPfm = mapper.readValue(url, RegulatoryPfm.class); serializer.serialize(regulatoryPfm); From 89831a689cf6fc969640ab2f3f9c8a7de00b3613 Mon Sep 17 00:00:00 2001 From: imedina Date: Sun, 31 Dec 2023 17:39:01 +0000 Subject: [PATCH 16/51] download: fix PFM in gene downloader --- .../cellbase/lib/download/RegulationDownloadManager.java | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/download/RegulationDownloadManager.java b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/download/RegulationDownloadManager.java index 7d3a5a410b..d9e01cef4d 100644 --- a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/download/RegulationDownloadManager.java +++ b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/download/RegulationDownloadManager.java @@ -126,9 +126,8 @@ private void loadPfmMatrices() throws IOException, NoSuchMethodException, FileFo CellBaseSerializer serializer = new CellBaseJsonFileSerializer(buildFolder, "regulatory_pfm", true); logger.info("Looking up " + motifIds.size() + " pfms"); for (String pfmId : motifIds) { - String urlString = "https://rest.ensembl.org/species/homo_sapiens/binding_matrix/ENSPFM" + pfmId + String urlString = "https://rest.ensembl.org/species/homo_sapiens/binding_matrix/" + pfmId + "?unit=frequencies;content-type=application/json"; - System.out.println(urlString); URL url = new URL(urlString); RegulatoryPfm regulatoryPfm = mapper.readValue(url, RegulatoryPfm.class); serializer.serialize(regulatoryPfm); From ce1767a53a30eb019f4cf77e1680c0014cf1429b Mon Sep 17 00:00:00 2001 From: imedina Date: Tue, 2 Jan 2024 02:33:13 +0000 Subject: [PATCH 17/51] downloader: add cancer hotspot --- .../cellbase/core/config/DownloadProperties.java | 10 ++++++++++ .../src/main/resources/configuration.yml | 4 ++++ .../lib/download/GeneDownloadManager.java | 15 ++++++++++++++- 3 files changed, 28 insertions(+), 1 deletion(-) diff --git a/cellbase-core/src/main/java/org/opencb/cellbase/core/config/DownloadProperties.java b/cellbase-core/src/main/java/org/opencb/cellbase/core/config/DownloadProperties.java index ee4216f560..a897625eff 100644 --- a/cellbase-core/src/main/java/org/opencb/cellbase/core/config/DownloadProperties.java +++ b/cellbase-core/src/main/java/org/opencb/cellbase/core/config/DownloadProperties.java @@ -26,6 +26,7 @@ public class DownloadProperties { private EnsemblProperties ensembl; private EnsemblProperties ensemblGenomes; private URLProperties hgnc; + private URLProperties cancerHotspot; private URLProperties refSeq; private URLProperties refSeqFasta; private URLProperties refSeqProteinFasta; @@ -517,6 +518,15 @@ public DownloadProperties setHgnc(URLProperties hgnc) { return this; } + public URLProperties getCancerHotspot() { + return cancerHotspot; + } + + public DownloadProperties setCancerHotspot(URLProperties cancerHotspot) { + this.cancerHotspot = cancerHotspot; + return this; + } + public static class EnsemblProperties { private DatabaseCredentials database; diff --git a/cellbase-core/src/main/resources/configuration.yml b/cellbase-core/src/main/resources/configuration.yml index 172f950f55..7a5b25ea63 100644 --- a/cellbase-core/src/main/resources/configuration.yml +++ b/cellbase-core/src/main/resources/configuration.yml @@ -64,6 +64,9 @@ download: hgnc: host: https://ftp.ebi.ac.uk/pub/databases/genenames/hgnc/archive/monthly/tsv/hgnc_complete_set_2023-11-01.txt version: 2023-11-01 + cancerHotspot: + host: https://www.cancerhotspots.org/files/hotspots_v2.xls + version: "v2" refSeq: host: https://ftp.ncbi.nih.gov/refseq/H_sapiens/annotation/GRCh38_latest/refseq_identifiers/GRCh38_latest_genomic.gtf.gz refSeqFasta: @@ -161,6 +164,7 @@ download: host: https://old.dgidb.org/data/monthly_tsvs/2022-Feb/interactions.tsv version: "2022-02-01" cadd: + ## Nacho: Move to https://krishna.gs.washington.edu/download/CADD/v1.7-pre/GRCh38/whole_genome_SNVs.tsv.gz ASAP! host: https://krishna.gs.washington.edu/download/CADD/v1.6/GRCh38/whole_genome_SNVs.tsv.gz reactome: host: http://www.reactome.org/download/current/biopax.zip diff --git a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/download/GeneDownloadManager.java b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/download/GeneDownloadManager.java index 9bd82a951f..260ff75427 100644 --- a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/download/GeneDownloadManager.java +++ b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/download/GeneDownloadManager.java @@ -83,6 +83,7 @@ public List download() throws IOException, InterruptedException { downloadFiles.add(downloadMane(geneFolder)); downloadFiles.add(downloadLrg(geneFolder)); downloadFiles.add(downloadHgnc(geneFolder)); + downloadFiles.add(downloadCancerHotspot(geneFolder)); downloadFiles.add(downloadDrugData(geneFolder)); downloadFiles.addAll(downloadGeneUniprotXref(geneFolder)); downloadFiles.add(downloadGeneExpressionAtlas(geneFolder)); @@ -211,7 +212,7 @@ private DownloadFile downloadLrg(Path geneFolder) throws IOException, Interrupte private DownloadFile downloadHgnc(Path geneFolder) throws IOException, InterruptedException { if (speciesConfiguration.getScientificName().equals("Homo sapiens")) { - logger.info("Downloading LRG ..."); + logger.info("Downloading HGNC ..."); String url = configuration.getDownload().getHgnc().getHost(); saveVersionData(EtlCommons.GENE_DATA, "HGNC_GENE", configuration.getDownload().getHgnc().getVersion(), getTimeStamp(), Collections.singletonList(url), geneFolder.resolve("hgncVersion.json")); @@ -221,6 +222,18 @@ private DownloadFile downloadHgnc(Path geneFolder) throws IOException, Interrupt return null; } + private DownloadFile downloadCancerHotspot(Path geneFolder) throws IOException, InterruptedException { + if (speciesConfiguration.getScientificName().equals("Homo sapiens")) { + logger.info("Downloading Cancer Hotspot ..."); + String url = configuration.getDownload().getCancerHotspot().getHost(); + saveVersionData(EtlCommons.GENE_DATA, "CANCER_HOTSPOT", configuration.getDownload().getHgnc().getVersion(), + getTimeStamp(), Collections.singletonList(url), geneFolder.resolve("cancerHotspotVersion.json")); + String[] array = url.split("/"); + return downloadFile(url, geneFolder.resolve(array[array.length - 1]).toString()); + } + return null; + } + private DownloadFile downloadGO(Path geneFolder) throws IOException, InterruptedException { if (speciesConfiguration.getScientificName().equals("Homo sapiens")) { logger.info("Downloading go annotation..."); From 0004be62a4e21d9c712f4253db822ec3af1caca6 Mon Sep 17 00:00:00 2001 From: imedina Date: Tue, 2 Jan 2024 02:50:08 +0000 Subject: [PATCH 18/51] downloader: add MONDO ontology --- .../cellbase/core/config/DownloadProperties.java | 10 ++++++++++ cellbase-core/src/main/resources/configuration.yml | 2 ++ .../main/java/org/opencb/cellbase/lib/EtlCommons.java | 1 + .../opencb/cellbase/lib/builders/OntologyBuilder.java | 9 +++++++++ 4 files changed, 22 insertions(+) diff --git a/cellbase-core/src/main/java/org/opencb/cellbase/core/config/DownloadProperties.java b/cellbase-core/src/main/java/org/opencb/cellbase/core/config/DownloadProperties.java index a897625eff..19f1606c91 100644 --- a/cellbase-core/src/main/java/org/opencb/cellbase/core/config/DownloadProperties.java +++ b/cellbase-core/src/main/java/org/opencb/cellbase/core/config/DownloadProperties.java @@ -71,6 +71,7 @@ public class DownloadProperties { private URLProperties hpoObo; private URLProperties goObo; private URLProperties doidObo; + private URLProperties mondoObo; private URLProperties goAnnotation; private URLProperties revel; private URLProperties pubmed; @@ -527,6 +528,15 @@ public DownloadProperties setCancerHotspot(URLProperties cancerHotspot) { return this; } + public URLProperties getMondoObo() { + return mondoObo; + } + + public DownloadProperties setMondoObo(URLProperties mondoObo) { + this.mondoObo = mondoObo; + return this; + } + public static class EnsemblProperties { private DatabaseCredentials database; diff --git a/cellbase-core/src/main/resources/configuration.yml b/cellbase-core/src/main/resources/configuration.yml index 7a5b25ea63..2841f135ca 100644 --- a/cellbase-core/src/main/resources/configuration.yml +++ b/cellbase-core/src/main/resources/configuration.yml @@ -177,6 +177,8 @@ download: host: http://purl.obolibrary.org/obo/go/go-basic.obo doidObo: host: http://purl.obolibrary.org/obo/doid.obo + mondoObo: + host: http://purl.obolibrary.org/obo/mondo.obo goAnnotation: host: http://geneontology.org/gene-associations/goa_human.gaf.gz revel: diff --git a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/EtlCommons.java b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/EtlCommons.java index 4396f0c2f1..124ac6e6fc 100644 --- a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/EtlCommons.java +++ b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/EtlCommons.java @@ -71,6 +71,7 @@ public class EtlCommons { public static final String HPO_FILE = "hp.obo"; public static final String GO_FILE = "go-basic.obo"; public static final String DOID_FILE = "doid.obo"; + public static final String MONDO_FILE = "mondo.obo"; public static final String PFM_DATA = "regulatory_pfm"; // Build specific data options diff --git a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/OntologyBuilder.java b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/OntologyBuilder.java index 8873dd7f93..1eabf8975a 100644 --- a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/OntologyBuilder.java +++ b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/OntologyBuilder.java @@ -32,12 +32,14 @@ public class OntologyBuilder extends CellBaseBuilder { private Path hpoFile; private Path goFile; private Path doidFile; + private Path mondoFile; public OntologyBuilder(Path oboDirectoryPath, CellBaseSerializer serializer) { super(serializer); hpoFile = oboDirectoryPath.resolve(EtlCommons.HPO_FILE); goFile = oboDirectoryPath.resolve(EtlCommons.GO_FILE); doidFile = oboDirectoryPath.resolve(EtlCommons.DOID_FILE); + mondoFile = oboDirectoryPath.resolve(EtlCommons.MONDO_FILE); } @Override @@ -64,6 +66,13 @@ public void parse() throws Exception { serializer.serialize(term); } + bufferedReader = FileUtils.newBufferedReader(mondoFile); + terms = parser.parseOBO(bufferedReader, "Mondo Ontology"); + for (OntologyTerm term : terms) { + term.setSource("MONDO"); + serializer.serialize(term); + } + serializer.close(); } } From 7e72929a1065ab1be41e3ab900b57c8150086344 Mon Sep 17 00:00:00 2001 From: imedina Date: Tue, 2 Jan 2024 02:55:03 +0000 Subject: [PATCH 19/51] downloader: add MONDO ontology --- cellbase-core/src/main/resources/configuration.yml | 4 ++++ .../cellbase/lib/download/OntologyDownloadManager.java | 10 ++++++---- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/cellbase-core/src/main/resources/configuration.yml b/cellbase-core/src/main/resources/configuration.yml index 2841f135ca..be50a2f717 100644 --- a/cellbase-core/src/main/resources/configuration.yml +++ b/cellbase-core/src/main/resources/configuration.yml @@ -173,12 +173,16 @@ download: version: "2.1.1" hpoObo: host: http://purl.obolibrary.org/obo/hp.obo + version: "2023-12-01" goObo: host: http://purl.obolibrary.org/obo/go/go-basic.obo + version: "2023-12-01" doidObo: host: http://purl.obolibrary.org/obo/doid.obo + version: "2023-12-01" mondoObo: host: http://purl.obolibrary.org/obo/mondo.obo + version: "2023-12-01" goAnnotation: host: http://geneontology.org/gene-associations/goa_human.gaf.gz revel: diff --git a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/download/OntologyDownloadManager.java b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/download/OntologyDownloadManager.java index 0776354e80..522be7b27d 100644 --- a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/download/OntologyDownloadManager.java +++ b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/download/OntologyDownloadManager.java @@ -36,7 +36,7 @@ public OntologyDownloadManager(String species, String assembly, Path targetDirec public List download() throws IOException, InterruptedException { - logger.info("Downloading obo files ..."); + logger.info("Downloading OBO files ..."); List downloadFiles = new ArrayList<>(); Path oboFolder = downloadFolder.resolve("ontology"); @@ -44,22 +44,24 @@ public List download() throws IOException, InterruptedException { String url = configuration.getDownload().getHpoObo().getHost(); downloadFiles.add(downloadFile(url, oboFolder.resolve("hp.obo").toString())); - saveVersionData(EtlCommons.OBO_DATA, "HPO", getTimeStamp(), getTimeStamp(), Collections.singletonList(url), buildFolder.resolve(EtlCommons.HPO_VERSION_FILE)); url = configuration.getDownload().getGoObo().getHost(); downloadFiles.add(downloadFile(url, oboFolder.resolve("go-basic.obo").toString())); - saveVersionData(EtlCommons.OBO_DATA, "GO", getTimeStamp(), getTimeStamp(), Collections.singletonList(url), buildFolder.resolve(EtlCommons.GO_VERSION_FILE)); url = configuration.getDownload().getDoidObo().getHost(); downloadFiles.add(downloadFile(url, oboFolder.resolve("doid.obo").toString())); - saveVersionData(EtlCommons.OBO_DATA, "DO", getTimeStamp(), getTimeStamp(), Collections.singletonList(url), buildFolder.resolve(EtlCommons.DO_VERSION_FILE)); + url = configuration.getDownload().getMondoObo().getHost(); + downloadFiles.add(downloadFile(url, oboFolder.resolve("mondo.obo").toString())); + saveVersionData(EtlCommons.OBO_DATA, "MONDO", getTimeStamp(), getTimeStamp(), + Collections.singletonList(url), buildFolder.resolve(EtlCommons.DO_VERSION_FILE)); + return downloadFiles; } } From 8e12d7d7df2b863bb953adf38dba5543ef7fd825 Mon Sep 17 00:00:00 2001 From: imedina Date: Tue, 2 Jan 2024 03:02:48 +0000 Subject: [PATCH 20/51] downloader: fix HPO configuration --- cellbase-core/src/main/resources/configuration.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cellbase-core/src/main/resources/configuration.yml b/cellbase-core/src/main/resources/configuration.yml index be50a2f717..3f6bf7b928 100644 --- a/cellbase-core/src/main/resources/configuration.yml +++ b/cellbase-core/src/main/resources/configuration.yml @@ -154,7 +154,7 @@ download: version: "1.0.2 associations_e106_r2022-05-17" hpo: ## Downlaod manually from here now: https://hpo.jax.org/app/data/annotations -# host: https://ci.monarchinitiative.org/view/hpo/job/hpo.annotations/lastSuccessfulBuild/artifact/rare-diseases/util/annotation/phenotype_to_genes.txt + host: https://ci.monarchinitiative.org/view/hpo/job/hpo.annotations/lastSuccessfulBuild/artifact/rare-diseases/util/annotation/phenotype_to_genes.txt disgenet: host: https://www.disgenet.org/static/disgenet_ap1/files/downloads files: From 42f843af084053cabcc24c96e60a116d7a50e4f6 Mon Sep 17 00:00:00 2001 From: imedina Date: Tue, 2 Jan 2024 10:34:48 +0000 Subject: [PATCH 21/51] download: remove println --- .../opencb/cellbase/lib/download/RegulationDownloadManager.java | 1 - 1 file changed, 1 deletion(-) diff --git a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/download/RegulationDownloadManager.java b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/download/RegulationDownloadManager.java index d9e01cef4d..1abb352fbe 100644 --- a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/download/RegulationDownloadManager.java +++ b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/download/RegulationDownloadManager.java @@ -116,7 +116,6 @@ private void loadPfmMatrices() throws IOException, NoSuchMethodException, FileFo while ((tfbsMotifFeature = motifsFeatureReader.read()) != null) { String pfmId = getMatrixId(filePattern, tfbsMotifFeature); if (StringUtils.isNotEmpty(pfmId)) { - System.out.println(pfmId); motifIds.add(pfmId); } } From dd638e4362cde9bb2a0238b9dacfc434b99ea3b7 Mon Sep 17 00:00:00 2001 From: imedina Date: Tue, 2 Jan 2024 11:23:52 +0000 Subject: [PATCH 22/51] download: fix mirna regulation downloader --- .../opencb/cellbase/lib/download/RegulationDownloadManager.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/download/RegulationDownloadManager.java b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/download/RegulationDownloadManager.java index 1abb352fbe..51152e478d 100644 --- a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/download/RegulationDownloadManager.java +++ b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/download/RegulationDownloadManager.java @@ -64,8 +64,8 @@ public List download() throws IOException, InterruptedException, N List downloadFiles = new ArrayList<>(); downloadFiles.addAll(downloadRegulatoryaAndMotifFeatures()); - downloadFiles.add(downloadMirna()); downloadFiles.add(downloadMiRTarBase()); + downloadFiles.add(downloadMirna()); return downloadFiles; } From b6ab08a4e6a338ead005a290e4dfa64ba65d9bd5 Mon Sep 17 00:00:00 2001 From: imedina Date: Tue, 2 Jan 2024 15:26:49 +0000 Subject: [PATCH 23/51] download: fix uniprot protocol --- cellbase-core/src/main/resources/configuration.yml | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/cellbase-core/src/main/resources/configuration.yml b/cellbase-core/src/main/resources/configuration.yml index 3f6bf7b928..93287439c4 100644 --- a/cellbase-core/src/main/resources/configuration.yml +++ b/cellbase-core/src/main/resources/configuration.yml @@ -98,7 +98,7 @@ download: host: https://mirtarbase.cuhk.edu.cn/~miRTarBase/miRTarBase_2022/cache/download/9.0/hsa_MTI.xlsx version: "9.0" uniprot: - host: ftp://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/complete/uniprot_sprot.xml.gz + host: https://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/complete/uniprot_sprot.xml.gz version: "2023-11-08" uniprotRelNotes: host: ftp://ftp.uniprot.org/pub/databases/uniprot/relnotes.txt @@ -122,6 +122,7 @@ download: # host: https://ftp.ncbi.nlm.nih.gov/pub/clinvar/xml/ClinVarFullRelease_2022-02.xml.gz # host: https://ftp.ncbi.nlm.nih.gov/pub/clinvar/xml/ClinVarFullRelease_2022-11.xml.gz host: https://ftp.ncbi.nlm.nih.gov/pub/clinvar/xml/ClinVarFullRelease_2023-12.xml.gz + version: "2023-12-01" clinvarVariation: # host: https://ftp.ncbi.nlm.nih.gov/pub/clinvar/xml/clinvar_variation/ClinVarVariationRelease_2021-07.xml.gz # host: https://ftp.ncbi.nlm.nih.gov/pub/clinvar/xml/clinvar_variation/ClinVarVariationRelease_2022-02.xml.gz @@ -129,10 +130,10 @@ download: host: https://ftp.ncbi.nlm.nih.gov/pub/clinvar/xml/clinvar_variation/ClinVarVariationRelease_2023-12.xml.gz clinvarSummary: host: http://ftp.ncbi.nlm.nih.gov/pub/clinvar/tab_delimited/variant_summary.txt.gz - version: "2023-12-17" + version: "2023-12-01" clinvarVariationAllele: host: http://ftp.ncbi.nlm.nih.gov/pub/clinvar/tab_delimited/variation_allele.txt.gz - version: "2023-12-17" + version: "2023-12-01" clinvarEfoTerms: host: ftp://ftp.ebi.ac.uk/pub/databases/eva/ClinVar/2015/ClinVar_Traits_EFO_Names_260615.csv iarctp53: @@ -150,8 +151,10 @@ download: genomicSuperDups: host: http://hgdownload.cse.ucsc.edu/goldenPath gwasCatalog: - host: http://resources.opencb.org/opencb/cellbase/data/gwas/gwas_catalog_v1.0.2-associations_e106_r2022-05-17.tsv - version: "1.0.2 associations_e106_r2022-05-17" +# host: http://resources.opencb.org/opencb/cellbase/data/gwas/gwas_catalog_v1.0.2-associations_e106_r2022-05-17.tsv +# version: "1.0.2 associations_e106_r2022-05-17" + host: ftp://ftp.ebi.ac.uk/pub/databases/gwas/releases/2023/12/21/gwas-catalog-associations.tsv + version: "23-12-21" hpo: ## Downlaod manually from here now: https://hpo.jax.org/app/data/annotations host: https://ci.monarchinitiative.org/view/hpo/job/hpo.annotations/lastSuccessfulBuild/artifact/rare-diseases/util/annotation/phenotype_to_genes.txt From 5c83213b467e2ef55f7ab18297e82cdef14eda75 Mon Sep 17 00:00:00 2001 From: imedina Date: Tue, 2 Jan 2024 15:42:23 +0000 Subject: [PATCH 24/51] download: fix uniprot protocol --- cellbase-core/src/main/resources/configuration.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cellbase-core/src/main/resources/configuration.yml b/cellbase-core/src/main/resources/configuration.yml index 93287439c4..8f3d756e04 100644 --- a/cellbase-core/src/main/resources/configuration.yml +++ b/cellbase-core/src/main/resources/configuration.yml @@ -101,7 +101,7 @@ download: host: https://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/complete/uniprot_sprot.xml.gz version: "2023-11-08" uniprotRelNotes: - host: ftp://ftp.uniprot.org/pub/databases/uniprot/relnotes.txt + host: https://ftp.uniprot.org/pub/databases/uniprot/relnotes.txt version: "2023-11-08" intact: host: ftp://ftp.ebi.ac.uk/pub/databases/intact/current/psimitab/intact.txt From 531c6462d8a0c77a1d9d3b1257f662546046e5c4 Mon Sep 17 00:00:00 2001 From: imedina Date: Wed, 3 Jan 2024 01:00:36 +0000 Subject: [PATCH 25/51] download: fix protein interpro and intact data downloader --- .../src/main/resources/configuration.yml | 12 ++++--- .../lib/download/ProteinDownloadManager.java | 34 +++++++++++-------- 2 files changed, 27 insertions(+), 19 deletions(-) diff --git a/cellbase-core/src/main/resources/configuration.yml b/cellbase-core/src/main/resources/configuration.yml index 8f3d756e04..279ff8ea72 100644 --- a/cellbase-core/src/main/resources/configuration.yml +++ b/cellbase-core/src/main/resources/configuration.yml @@ -97,20 +97,24 @@ download: miRTarBase: host: https://mirtarbase.cuhk.edu.cn/~miRTarBase/miRTarBase_2022/cache/download/9.0/hsa_MTI.xlsx version: "9.0" + + ## Protein Data uniprot: host: https://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/complete/uniprot_sprot.xml.gz version: "2023-11-08" uniprotRelNotes: host: https://ftp.uniprot.org/pub/databases/uniprot/relnotes.txt version: "2023-11-08" - intact: - host: ftp://ftp.ebi.ac.uk/pub/databases/intact/current/psimitab/intact.txt interpro: -# host: ftp://ftp.ebi.ac.uk/pub/databases/interpro/current/protein2ipr.dat.gz host: https://ftp.ebi.ac.uk/pub/databases/interpro/current_release/protein2ipr.dat.gz + version: "2023-11-08" interproRelNotes: -# host: ftp://ftp.ebi.ac.uk/pub/databases/interpro/current/release_notes.txt host: https://ftp.ebi.ac.uk/pub/databases/interpro/current_release/release_notes.txt + intact: + host: https://ftp.ebi.ac.uk/pub/databases/intact/current/psimitab/intact.txt + version: "2023-10-07" + + ## Conservation Scores conservation: host: https://hgdownload.cse.ucsc.edu/goldenPath/ version: "2022-08-30" diff --git a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/download/ProteinDownloadManager.java b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/download/ProteinDownloadManager.java index 08f28cfdad..5a722ed448 100644 --- a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/download/ProteinDownloadManager.java +++ b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/download/ProteinDownloadManager.java @@ -22,7 +22,6 @@ import org.opencb.commons.utils.FileUtils; import java.io.BufferedReader; -import java.io.FileOutputStream; import java.io.IOException; import java.io.PrintWriter; import java.nio.file.Files; @@ -34,6 +33,8 @@ public class ProteinDownloadManager extends AbstractDownloadManager { private static final String UNIPROT_NAME = "UniProt"; + private static final String INTERPRO_NAME = "InterPro"; + private static final String INTACT_NAME = "IntAct"; public ProteinDownloadManager(String species, String assembly, Path targetDirectory, CellBaseConfiguration configuration) throws IOException, CellBaseException { @@ -56,6 +57,7 @@ public List download() throws IOException, InterruptedException { Files.createDirectories(proteinFolder); List downloadFiles = new ArrayList<>(); + // Uniprot String url = configuration.getDownload().getUniprot().getHost(); downloadFiles.add(downloadFile(url, proteinFolder.resolve("uniprot_sprot.xml.gz").toString())); Files.createDirectories(proteinFolder.resolve("uniprot_chunks")); @@ -63,23 +65,25 @@ public List download() throws IOException, InterruptedException { String relNotesUrl = configuration.getDownload().getUniprotRelNotes().getHost(); downloadFiles.add(downloadFile(relNotesUrl, proteinFolder.resolve("uniprotRelnotes.txt").toString())); - saveVersionData(EtlCommons.PROTEIN_DATA, UNIPROT_NAME, getLine(proteinFolder.resolve("uniprotRelnotes.txt"), 1), getTimeStamp(), Collections.singletonList(url), proteinFolder.resolve("uniprotVersion.json")); - return downloadFiles; + // Interpro + String interproUrl = configuration.getDownload().getInterpro().getHost(); + downloadFiles.add(downloadFile(interproUrl, proteinFolder.resolve("protein2ipr.dat.gz").toString())); + + relNotesUrl = configuration.getDownload().getInterproRelNotes().getHost(); + downloadFiles.add(downloadFile(relNotesUrl, proteinFolder.resolve("interproRelnotes.txt").toString())); + saveVersionData(EtlCommons.PROTEIN_DATA, INTERPRO_NAME, getLine(proteinFolder.resolve("interproRelnotes.txt"), 5), + getTimeStamp(), Collections.singletonList(interproUrl), proteinFolder.resolve("interproVersion.json")); -// url = configuration.getDownload().getIntact().getHost(); -// downloadFile(url, proteinFolder.resolve("intact.txt").toString()); -// saveVersionData(EtlCommons.PROTEIN_DATA, INTACT_NAME, null, getTimeStamp(), Collections.singletonList(url), -// proteinFolder.resolve("intactVersion.json")); -// -// url = configuration.getDownload().getInterpro().getHost(); -// downloadFile(url, proteinFolder.resolve("protein2ipr.dat.gz").toString()); -// relNotesUrl = configuration.getDownload().getInterproRelNotes().getHost(); -// downloadFile(relNotesUrl, proteinFolder.resolve("interproRelnotes.txt").toString()); -// saveVersionData(EtlCommons.PROTEIN_DATA, INTERPRO_NAME, getLine(proteinFolder.resolve("interproRelnotes.txt"), 5), -// getTimeStamp(), Collections.singletonList(url), proteinFolder.resolve("interproVersion.json")); + // Intact + String intactUrl = configuration.getDownload().getIntact().getHost(); + downloadFiles.add(downloadFile(intactUrl, proteinFolder.resolve("intact.txt").toString())); + saveVersionData(EtlCommons.PROTEIN_DATA, INTACT_NAME, configuration.getDownload().getIntact().getVersion(), + getTimeStamp(), Collections.singletonList(intactUrl), proteinFolder.resolve("intactVersion.json")); + + return downloadFiles; } private void splitUniprot(Path uniprotFilePath, Path splitOutdirPath) throws IOException { @@ -96,7 +100,7 @@ private void splitUniprot(Path uniprotFilePath, Path splitOutdirPath) throws IOE inEntry = true; beforeEntry = false; if (count % 10000 == 0) { - pw = new PrintWriter(new FileOutputStream(splitOutdirPath.resolve("chunk_" + chunk + ".xml").toFile())); + pw = new PrintWriter(Files.newOutputStream(splitOutdirPath.resolve("chunk_" + chunk + ".xml").toFile().toPath())); pw.println(header.toString().trim()); } count++; From 5d007212aa88ca07df0b6191ae6110efeb97ceee Mon Sep 17 00:00:00 2001 From: imedina Date: Wed, 3 Jan 2024 02:28:45 +0000 Subject: [PATCH 26/51] download: update CADD to 1.7-pre --- cellbase-core/src/main/resources/configuration.yml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/cellbase-core/src/main/resources/configuration.yml b/cellbase-core/src/main/resources/configuration.yml index 279ff8ea72..f24827532c 100644 --- a/cellbase-core/src/main/resources/configuration.yml +++ b/cellbase-core/src/main/resources/configuration.yml @@ -172,7 +172,9 @@ download: version: "2022-02-01" cadd: ## Nacho: Move to https://krishna.gs.washington.edu/download/CADD/v1.7-pre/GRCh38/whole_genome_SNVs.tsv.gz ASAP! - host: https://krishna.gs.washington.edu/download/CADD/v1.6/GRCh38/whole_genome_SNVs.tsv.gz +# host: https://krishna.gs.washington.edu/download/CADD/v1.6/GRCh38/whole_genome_SNVs.tsv.gz + host: https://krishna.gs.washington.edu/download/CADD/v1.7-pre/GRCh38/whole_genome_SNVs.tsv.gz + version: "1.7-pre" reactome: host: http://www.reactome.org/download/current/biopax.zip gnomadConstraints: From 68a3c83abc24c017229fcd8f32abc6342a467e98 Mon Sep 17 00:00:00 2001 From: JuanfeSanahuja Date: Mon, 15 Jan 2024 16:24:25 +0100 Subject: [PATCH 27/51] pom: Restore htsjdk version #TASK-4437 --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index 65a5d65480..df2b3ee01a 100644 --- a/pom.xml +++ b/pom.xml @@ -48,7 +48,7 @@ 19.0 1.9.1 1.3 - 3.0.5 + 2.23.0 1.48.0 2.4 2.4 From 4954d8336408035252316e4dda21a3beea1de6f9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Joaqu=C3=ADn=20T=C3=A1rraga=20Gim=C3=A9nez?= Date: Wed, 24 Jan 2024 13:05:51 +0100 Subject: [PATCH 28/51] app: add scritpts to preprocess original VCF from gnomAD, #TASK-5385 --- .../scripts/gnomad/mitochondrial/README.md | 10 ++ .../scripts/gnomad/mitochondrial/gnomad_mt.py | 120 ++++++++++++++++++ .../gnomad/mitochondrial/opencga_gnomad_mt.sh | 44 +++++++ 3 files changed, 174 insertions(+) create mode 100644 cellbase-app/app/scripts/gnomad/mitochondrial/README.md create mode 100644 cellbase-app/app/scripts/gnomad/mitochondrial/gnomad_mt.py create mode 100644 cellbase-app/app/scripts/gnomad/mitochondrial/opencga_gnomad_mt.sh diff --git a/cellbase-app/app/scripts/gnomad/mitochondrial/README.md b/cellbase-app/app/scripts/gnomad/mitochondrial/README.md new file mode 100644 index 0000000000..27aefcb881 --- /dev/null +++ b/cellbase-app/app/scripts/gnomad/mitochondrial/README.md @@ -0,0 +1,10 @@ +gnomAD Mitochondrial DNA (mtDNA) variants v3.1: +URL: https://storage.googleapis.com/gcp-public-data--gnomad/release/3.1/vcf/genomes/gnomad.genomes.v3.1.sites.chrM.vcf.bgz + +Mapping file in ticket BIOINFO-99: mapping_file_gnomad_mt_mod_file.txt + +Script to preprocess original VCF from gnomad: gnomad_mt.py + +Script to load gnomad mt variants into OpenCGA and export them in json format annotation.populationFrequencies object: opencga_gnomad_mt.sh + + diff --git a/cellbase-app/app/scripts/gnomad/mitochondrial/gnomad_mt.py b/cellbase-app/app/scripts/gnomad/mitochondrial/gnomad_mt.py new file mode 100644 index 0000000000..8e010ebf36 --- /dev/null +++ b/cellbase-app/app/scripts/gnomad/mitochondrial/gnomad_mt.py @@ -0,0 +1,120 @@ +import sys +import gzip + + +POPULATIONS = ['afr', 'ami', 'amr', 'asj', 'eas', 'fin', 'nfe', 'oth', 'sas', 'mid'] +HEADER_COMMON = [ + '##INFO=', + '##INFO=', + '##INFO=' +] +HEADER_POP = [ + '##INFO=', + '##INFO=', + '##INFO=', + '##INFO=' +] + + +def main(): + + # Creating custom header + custom_header = [] + custom_header += HEADER_COMMON + for pop in POPULATIONS: + custom_header += ['\n'.join(HEADER_POP).format(pop=pop)] + custom_header = '\n'.join(custom_header) + '\n' + + # Opening input/output files + vcf_input_fpath = sys.argv[1] + vcf_output_fpath = sys.argv[2] + vcf_input_fhand = gzip.open(vcf_input_fpath, 'r') + vcf_output_fhand = gzip.open(vcf_output_fpath, 'wt') + + # Calculating new INFO fields for each variant + for line in vcf_input_fhand: + line = line.decode() + + # Writing header to output + if line.startswith('##VEP'): # adding custom header before "##VEP" line + vcf_output_fhand.write(custom_header) + vcf_output_fhand.write(line) + continue + if line.startswith('#'): + vcf_output_fhand.write(line) + continue + + # Dict to store the new calculated data + new_info = {} + + # Getting variant and INFO data + variant_items = line.strip().split() + info_items = variant_items[7].split(';') + + for info_item in info_items: + + # Getting key/value for each INFO item + if len(info_item.split('=', maxsplit=1)) < 2: # skipping flags + continue + info_key, info_value = info_item.split('=', maxsplit=1) + + # Getting INFO data for calculations + if info_key == 'pop_AF_hom': + pop_AF_hom = list(map(float, info_value.split('|'))) + if info_key == 'pop_AF_het': + pop_AF_het = list(map(float, info_value.split('|'))) + if info_key == 'AF_hom': + AF_hom = float(info_value) + if info_key == 'AF_het': + AF_het = float(info_value) + if info_key == 'pop_AC_hom': + pop_AC_hom = list(map(int, info_value.split('|'))) + if info_key == 'pop_AC_het': + pop_AC_het = list(map(int, info_value.split('|'))) + if info_key == 'AC_hom': + AC_hom = int(info_value) + if info_key == 'AC_het': + AC_het = int(info_value) + if info_key == 'pop_AN': + pop_AN = list(map(int, info_value.split('|'))) + if info_key == 'AN': + AN = int(info_value) + + # Calculating AF_{pop} and AF + # e.g. AF_sas = pop_AF_hom[i] + pop_AF_het[i] (i = index of sas population) + pop_AF = [x + y for x, y in zip(pop_AF_hom, pop_AF_het)] + for i, pop in enumerate(POPULATIONS): + new_info['AF_' + pop] = pop_AF[i] + new_info['AF'] = AF_hom + AF_het + + # Calculating AC_{pop} and AC + # e.g. AC_sas = pop_AC_hom[i] + pop_AC_het[i] (i = index of sas population) + pop_AC = [x + y for x, y in zip(pop_AC_hom, pop_AC_het)] + for i, pop in enumerate(POPULATIONS): + new_info['AC_' + pop] = pop_AC[i] + new_info['AC'] = AC_hom + AC_het + + # Calculating AN_{pop} + # e.g. AN_sas = pop_AN[i] (i = index of sas population) + for i, pop in enumerate(POPULATIONS): + new_info['AN_' + pop] = pop_AN[i] + + # Calculating GTC_{pop} + # e.g. GTC_sas = (pop_AN[i] - (pop_AC_het[i] + pop_AC_hom[i])) + "," + pop_AC_het[i] + "," + pop_AC_hom[i] + pop_AC = [x + y for x, y in zip(pop_AC_hom, pop_AC_het)] + hom_ref = [x - y for x, y in zip(pop_AN, pop_AC)] + for i, pop in enumerate(POPULATIONS): + new_info['GTC_' + pop] = ','.join(map(str, [hom_ref[i], pop_AC_het[i], pop_AC_hom[i]])) + new_info['GTC'] = ','.join(map(str, [AN - AC_hom + AC_het, AC_het, AC_hom])) + + # Joining existing INFO field and new custom INFO data + custom_info_data = ';'.join(['='.join([k, str(new_info[k])]) for k in new_info]) + new_info_field = ';'.join(info_items + [custom_info_data]) + + # Replacing original INFO field + variant_items[7] = new_info_field + vcf_output_fhand.write('\t'.join(variant_items) + '\n') + + +if __name__ == '__main__': + sys.exit(main()) diff --git a/cellbase-app/app/scripts/gnomad/mitochondrial/opencga_gnomad_mt.sh b/cellbase-app/app/scripts/gnomad/mitochondrial/opencga_gnomad_mt.sh new file mode 100644 index 0000000000..8b1f97dcac --- /dev/null +++ b/cellbase-app/app/scripts/gnomad/mitochondrial/opencga_gnomad_mt.sh @@ -0,0 +1,44 @@ +#!/bin/bash + +# Variables +user="user" +host="host_name" +project="population" +project_name="Population" +study="gnomad_mt" +study_name="gnomAD v3.1 Mitocondrial DNA Variants" +study_path="data/"$study +folder_path="/home/gnomad_mt" +mapping_file="mapping_file_gnomad_mt_mod_file.txt" +vcf_file="gnomad.genomes.v3.1.sites.chrM.mod.vcf.gz" +mapping_file_path=$folder_path$mapping_file +vcf_file_path=$folder_path$vcf_file + +# Login +/home/opencga-client-2.12.0/bin/opencga.sh login $user --host $host + +# Project creation +/home/opencga-client-2.12.0/bin/opencga.sh projects create --id $project --name $project_name --organism-scientific-name hsapiens --organism-assembly grch38 --host $host + +# Study creation +/home/opencga-client-2.12.0/bin/opencga.sh studies create --id $study --name $study_name --project $project --host $host + +# Folders creation within Catalog +/home/opencga-client-2.12.0/bin/opencga.sh files create --path $study_path --parents --study $study --type DIRECTORY --host $host + +# Uploading gnomad mt variants VCF and mapping file for gnomad mt variants +/home/opencga-client-2.12.0/bin/opencga.sh files upload -i $mapping_file_path --path $study_path --study $study --host $host + +/home/opencga-client-2.12.0/bin/opencga.sh files upload -i $vcf_file_path --path $study_path --study $study --host $host + +# Variant index for gnomad mt variants VCF +/home/opencga-client-2.12.0/bin/opencga.sh operations variant-index --study $study --file $vcf_file --load-archive NO --load-split-data CHROMOSOME --host $host + +# Variant stats index for gnomad mt variants. The corresponding cohorts and variant cohort stats will be generated using the information of interest provided in the mapping file and INFO column of the gnomad mt VCF +/home/opencga-client-2.12.0/bin/opencga.sh operations variant-stats-index --study $study --aggregation-mapping-file $mapping_file --aggregated BASIC --host $host + +# Variant cohort stats will be converted to population frequencies data model (julie-tool) +/home/opencga-client-2.12.0/bin/opencga.sh operations variant-julie-run --project $project --host $host + +# Export of annotation.populationFrequencies in json format +/home/opencga-client-2.12.0/bin/opencga.sh variant export-run --body_include annotation.populationFrequencies --body_project $project --project $project --output-file-format json --host $host From 3d9395b582f2812432f475913d9caef320703d66 Mon Sep 17 00:00:00 2001 From: JuanfeSanahuja Date: Wed, 24 Jan 2024 13:30:05 +0100 Subject: [PATCH 29/51] Fix OpenCB repository versions to conform to XetaBase 2.0.0 version #TASK-5444 --- cellbase-app/pom.xml | 2 +- cellbase-client/pom.xml | 2 +- cellbase-core/pom.xml | 2 +- cellbase-lib/pom.xml | 2 +- cellbase-server/pom.xml | 2 +- pom.xml | 6 +++--- 6 files changed, 8 insertions(+), 8 deletions(-) diff --git a/cellbase-app/pom.xml b/cellbase-app/pom.xml index af1dda8b9d..adf59c092a 100644 --- a/cellbase-app/pom.xml +++ b/cellbase-app/pom.xml @@ -6,7 +6,7 @@ org.opencb.cellbase cellbase - 5.9.0-SNAPSHOT + 6.0.0-SNAPSHOT ../pom.xml diff --git a/cellbase-client/pom.xml b/cellbase-client/pom.xml index 7e5c9a5884..977b4b81cd 100644 --- a/cellbase-client/pom.xml +++ b/cellbase-client/pom.xml @@ -6,7 +6,7 @@ org.opencb.cellbase cellbase - 5.9.0-SNAPSHOT + 6.0.0-SNAPSHOT ../pom.xml diff --git a/cellbase-core/pom.xml b/cellbase-core/pom.xml index 89f51a1ffc..8fcd89455f 100644 --- a/cellbase-core/pom.xml +++ b/cellbase-core/pom.xml @@ -6,7 +6,7 @@ org.opencb.cellbase cellbase - 5.9.0-SNAPSHOT + 6.0.0-SNAPSHOT ../pom.xml diff --git a/cellbase-lib/pom.xml b/cellbase-lib/pom.xml index 780ead8687..59fb65dfda 100644 --- a/cellbase-lib/pom.xml +++ b/cellbase-lib/pom.xml @@ -6,7 +6,7 @@ org.opencb.cellbase cellbase - 5.9.0-SNAPSHOT + 6.0.0-SNAPSHOT ../pom.xml diff --git a/cellbase-server/pom.xml b/cellbase-server/pom.xml index ff3868b7c7..71cdea12ee 100644 --- a/cellbase-server/pom.xml +++ b/cellbase-server/pom.xml @@ -6,7 +6,7 @@ org.opencb.cellbase cellbase - 5.9.0-SNAPSHOT + 6.0.0-SNAPSHOT ../pom.xml diff --git a/pom.xml b/pom.xml index df2b3ee01a..bc06bbd099 100644 --- a/pom.xml +++ b/pom.xml @@ -6,7 +6,7 @@ org.opencb.cellbase cellbase - 5.9.0-SNAPSHOT + 6.0.0-SNAPSHOT pom CellBase project @@ -23,8 +23,8 @@ ${project.version} - 4.13.0-SNAPSHOT - 2.13.0-SNAPSHOT + 5.0.0-SNAPSHOT + 3.0.0-SNAPSHOT 0.1.0 9.4.51.v20230217 From ad29e6cbd9dc0155d67e4166cc6758858154e682 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Joaqu=C3=ADn=20T=C3=A1rraga=20Gim=C3=A9nez?= Date: Mon, 29 Jan 2024 11:24:40 +0100 Subject: [PATCH 30/51] app: fix script gnomad_mt.py, #TASK-5385 --- cellbase-app/app/scripts/gnomad/mitochondrial/gnomad_mt.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cellbase-app/app/scripts/gnomad/mitochondrial/gnomad_mt.py b/cellbase-app/app/scripts/gnomad/mitochondrial/gnomad_mt.py index 8e010ebf36..34ae614eef 100644 --- a/cellbase-app/app/scripts/gnomad/mitochondrial/gnomad_mt.py +++ b/cellbase-app/app/scripts/gnomad/mitochondrial/gnomad_mt.py @@ -105,7 +105,7 @@ def main(): hom_ref = [x - y for x, y in zip(pop_AN, pop_AC)] for i, pop in enumerate(POPULATIONS): new_info['GTC_' + pop] = ','.join(map(str, [hom_ref[i], pop_AC_het[i], pop_AC_hom[i]])) - new_info['GTC'] = ','.join(map(str, [AN - AC_hom + AC_het, AC_het, AC_hom])) + new_info['GTC'] = ','.join(map(str, [AN - (AC_hom + AC_het), AC_het, AC_hom])) # Joining existing INFO field and new custom INFO data custom_info_data = ';'.join(['='.join([k, str(new_info[k])]) for k in new_info]) From a5202e407a4964ee104c39e24b9658c6c8facd51 Mon Sep 17 00:00:00 2001 From: JuanfeSanahuja Date: Mon, 4 Mar 2024 12:23:58 +0100 Subject: [PATCH 31/51] Prepare release 6.0.0 --- cellbase-app/pom.xml | 2 +- cellbase-client/pom.xml | 2 +- cellbase-core/pom.xml | 2 +- cellbase-lib/pom.xml | 2 +- cellbase-server/pom.xml | 2 +- pom.xml | 6 +++--- 6 files changed, 8 insertions(+), 8 deletions(-) diff --git a/cellbase-app/pom.xml b/cellbase-app/pom.xml index adf59c092a..d7b8c8a47e 100644 --- a/cellbase-app/pom.xml +++ b/cellbase-app/pom.xml @@ -6,7 +6,7 @@ org.opencb.cellbase cellbase - 6.0.0-SNAPSHOT + 6.0.0 ../pom.xml diff --git a/cellbase-client/pom.xml b/cellbase-client/pom.xml index 977b4b81cd..566e1b40e8 100644 --- a/cellbase-client/pom.xml +++ b/cellbase-client/pom.xml @@ -6,7 +6,7 @@ org.opencb.cellbase cellbase - 6.0.0-SNAPSHOT + 6.0.0 ../pom.xml diff --git a/cellbase-core/pom.xml b/cellbase-core/pom.xml index 8fcd89455f..b628de3b21 100644 --- a/cellbase-core/pom.xml +++ b/cellbase-core/pom.xml @@ -6,7 +6,7 @@ org.opencb.cellbase cellbase - 6.0.0-SNAPSHOT + 6.0.0 ../pom.xml diff --git a/cellbase-lib/pom.xml b/cellbase-lib/pom.xml index 59fb65dfda..dd087fb692 100644 --- a/cellbase-lib/pom.xml +++ b/cellbase-lib/pom.xml @@ -6,7 +6,7 @@ org.opencb.cellbase cellbase - 6.0.0-SNAPSHOT + 6.0.0 ../pom.xml diff --git a/cellbase-server/pom.xml b/cellbase-server/pom.xml index 71cdea12ee..d5e995998f 100644 --- a/cellbase-server/pom.xml +++ b/cellbase-server/pom.xml @@ -6,7 +6,7 @@ org.opencb.cellbase cellbase - 6.0.0-SNAPSHOT + 6.0.0 ../pom.xml diff --git a/pom.xml b/pom.xml index bc06bbd099..ebe4aeb5f5 100644 --- a/pom.xml +++ b/pom.xml @@ -6,7 +6,7 @@ org.opencb.cellbase cellbase - 6.0.0-SNAPSHOT + 6.0.0 pom CellBase project @@ -23,8 +23,8 @@ ${project.version} - 5.0.0-SNAPSHOT - 3.0.0-SNAPSHOT + 5.0.0 + 3.0.0 0.1.0 9.4.51.v20230217 From 0c17b30db323cc99a2eb37c663ea5d9c357fc626 Mon Sep 17 00:00:00 2001 From: JuanfeSanahuja Date: Mon, 4 Mar 2024 12:46:40 +0100 Subject: [PATCH 32/51] Prepare release 6.0.0 --- cellbase-app/pom.xml | 2 +- cellbase-client/pom.xml | 2 +- cellbase-core/pom.xml | 2 +- cellbase-lib/pom.xml | 2 +- cellbase-server/pom.xml | 2 +- pom.xml | 6 +++--- 6 files changed, 8 insertions(+), 8 deletions(-) diff --git a/cellbase-app/pom.xml b/cellbase-app/pom.xml index adf59c092a..d7b8c8a47e 100644 --- a/cellbase-app/pom.xml +++ b/cellbase-app/pom.xml @@ -6,7 +6,7 @@ org.opencb.cellbase cellbase - 6.0.0-SNAPSHOT + 6.0.0 ../pom.xml diff --git a/cellbase-client/pom.xml b/cellbase-client/pom.xml index 977b4b81cd..566e1b40e8 100644 --- a/cellbase-client/pom.xml +++ b/cellbase-client/pom.xml @@ -6,7 +6,7 @@ org.opencb.cellbase cellbase - 6.0.0-SNAPSHOT + 6.0.0 ../pom.xml diff --git a/cellbase-core/pom.xml b/cellbase-core/pom.xml index 8fcd89455f..b628de3b21 100644 --- a/cellbase-core/pom.xml +++ b/cellbase-core/pom.xml @@ -6,7 +6,7 @@ org.opencb.cellbase cellbase - 6.0.0-SNAPSHOT + 6.0.0 ../pom.xml diff --git a/cellbase-lib/pom.xml b/cellbase-lib/pom.xml index 59fb65dfda..dd087fb692 100644 --- a/cellbase-lib/pom.xml +++ b/cellbase-lib/pom.xml @@ -6,7 +6,7 @@ org.opencb.cellbase cellbase - 6.0.0-SNAPSHOT + 6.0.0 ../pom.xml diff --git a/cellbase-server/pom.xml b/cellbase-server/pom.xml index 71cdea12ee..d5e995998f 100644 --- a/cellbase-server/pom.xml +++ b/cellbase-server/pom.xml @@ -6,7 +6,7 @@ org.opencb.cellbase cellbase - 6.0.0-SNAPSHOT + 6.0.0 ../pom.xml diff --git a/pom.xml b/pom.xml index bc06bbd099..ebe4aeb5f5 100644 --- a/pom.xml +++ b/pom.xml @@ -6,7 +6,7 @@ org.opencb.cellbase cellbase - 6.0.0-SNAPSHOT + 6.0.0 pom CellBase project @@ -23,8 +23,8 @@ ${project.version} - 5.0.0-SNAPSHOT - 3.0.0-SNAPSHOT + 5.0.0 + 3.0.0 0.1.0 9.4.51.v20230217 From c538275bf5353f0497a7c547e2cee89a432921ae Mon Sep 17 00:00:00 2001 From: JuanfeSanahuja Date: Mon, 4 Mar 2024 12:52:58 +0100 Subject: [PATCH 33/51] Prepare version to release --- cellbase-app/pom.xml | 2 +- cellbase-client/pom.xml | 2 +- cellbase-core/pom.xml | 2 +- cellbase-lib/pom.xml | 2 +- cellbase-server/pom.xml | 2 +- pom.xml | 2 +- 6 files changed, 6 insertions(+), 6 deletions(-) diff --git a/cellbase-app/pom.xml b/cellbase-app/pom.xml index d7b8c8a47e..adf59c092a 100644 --- a/cellbase-app/pom.xml +++ b/cellbase-app/pom.xml @@ -6,7 +6,7 @@ org.opencb.cellbase cellbase - 6.0.0 + 6.0.0-SNAPSHOT ../pom.xml diff --git a/cellbase-client/pom.xml b/cellbase-client/pom.xml index 566e1b40e8..977b4b81cd 100644 --- a/cellbase-client/pom.xml +++ b/cellbase-client/pom.xml @@ -6,7 +6,7 @@ org.opencb.cellbase cellbase - 6.0.0 + 6.0.0-SNAPSHOT ../pom.xml diff --git a/cellbase-core/pom.xml b/cellbase-core/pom.xml index b628de3b21..8fcd89455f 100644 --- a/cellbase-core/pom.xml +++ b/cellbase-core/pom.xml @@ -6,7 +6,7 @@ org.opencb.cellbase cellbase - 6.0.0 + 6.0.0-SNAPSHOT ../pom.xml diff --git a/cellbase-lib/pom.xml b/cellbase-lib/pom.xml index dd087fb692..59fb65dfda 100644 --- a/cellbase-lib/pom.xml +++ b/cellbase-lib/pom.xml @@ -6,7 +6,7 @@ org.opencb.cellbase cellbase - 6.0.0 + 6.0.0-SNAPSHOT ../pom.xml diff --git a/cellbase-server/pom.xml b/cellbase-server/pom.xml index d5e995998f..71cdea12ee 100644 --- a/cellbase-server/pom.xml +++ b/cellbase-server/pom.xml @@ -6,7 +6,7 @@ org.opencb.cellbase cellbase - 6.0.0 + 6.0.0-SNAPSHOT ../pom.xml diff --git a/pom.xml b/pom.xml index ebe4aeb5f5..21f0dc3f84 100644 --- a/pom.xml +++ b/pom.xml @@ -6,7 +6,7 @@ org.opencb.cellbase cellbase - 6.0.0 + 6.0.0-SNAPSHOT pom CellBase project From 6fca7ff314e612544025fe2ca1301d33d3965387 Mon Sep 17 00:00:00 2001 From: JuanfeSanahuja Date: Mon, 4 Mar 2024 12:55:38 +0100 Subject: [PATCH 34/51] Prepare release 6.0.0 --- cellbase-app/pom.xml | 2 +- cellbase-client/pom.xml | 2 +- cellbase-core/pom.xml | 2 +- cellbase-lib/pom.xml | 2 +- cellbase-server/pom.xml | 2 +- pom.xml | 2 +- 6 files changed, 6 insertions(+), 6 deletions(-) diff --git a/cellbase-app/pom.xml b/cellbase-app/pom.xml index adf59c092a..d7b8c8a47e 100644 --- a/cellbase-app/pom.xml +++ b/cellbase-app/pom.xml @@ -6,7 +6,7 @@ org.opencb.cellbase cellbase - 6.0.0-SNAPSHOT + 6.0.0 ../pom.xml diff --git a/cellbase-client/pom.xml b/cellbase-client/pom.xml index 977b4b81cd..566e1b40e8 100644 --- a/cellbase-client/pom.xml +++ b/cellbase-client/pom.xml @@ -6,7 +6,7 @@ org.opencb.cellbase cellbase - 6.0.0-SNAPSHOT + 6.0.0 ../pom.xml diff --git a/cellbase-core/pom.xml b/cellbase-core/pom.xml index 8fcd89455f..b628de3b21 100644 --- a/cellbase-core/pom.xml +++ b/cellbase-core/pom.xml @@ -6,7 +6,7 @@ org.opencb.cellbase cellbase - 6.0.0-SNAPSHOT + 6.0.0 ../pom.xml diff --git a/cellbase-lib/pom.xml b/cellbase-lib/pom.xml index 59fb65dfda..dd087fb692 100644 --- a/cellbase-lib/pom.xml +++ b/cellbase-lib/pom.xml @@ -6,7 +6,7 @@ org.opencb.cellbase cellbase - 6.0.0-SNAPSHOT + 6.0.0 ../pom.xml diff --git a/cellbase-server/pom.xml b/cellbase-server/pom.xml index 71cdea12ee..d5e995998f 100644 --- a/cellbase-server/pom.xml +++ b/cellbase-server/pom.xml @@ -6,7 +6,7 @@ org.opencb.cellbase cellbase - 6.0.0-SNAPSHOT + 6.0.0 ../pom.xml diff --git a/pom.xml b/pom.xml index 21f0dc3f84..ebe4aeb5f5 100644 --- a/pom.xml +++ b/pom.xml @@ -6,7 +6,7 @@ org.opencb.cellbase cellbase - 6.0.0-SNAPSHOT + 6.0.0 pom CellBase project From e236e532bf3757097ff77945139ac419b92cc326 Mon Sep 17 00:00:00 2001 From: JuanfeSanahuja Date: Mon, 4 Mar 2024 16:24:53 +0100 Subject: [PATCH 35/51] Prepare new development branch release-6.0.x --- cellbase-app/pom.xml | 2 +- cellbase-client/pom.xml | 2 +- cellbase-core/pom.xml | 2 +- cellbase-lib/pom.xml | 2 +- cellbase-server/pom.xml | 2 +- pom.xml | 6 +++--- 6 files changed, 8 insertions(+), 8 deletions(-) diff --git a/cellbase-app/pom.xml b/cellbase-app/pom.xml index d7b8c8a47e..999f545d10 100644 --- a/cellbase-app/pom.xml +++ b/cellbase-app/pom.xml @@ -6,7 +6,7 @@ org.opencb.cellbase cellbase - 6.0.0 + 6.1.0-SNAPSHOT ../pom.xml diff --git a/cellbase-client/pom.xml b/cellbase-client/pom.xml index 566e1b40e8..fdc2ece46b 100644 --- a/cellbase-client/pom.xml +++ b/cellbase-client/pom.xml @@ -6,7 +6,7 @@ org.opencb.cellbase cellbase - 6.0.0 + 6.1.0-SNAPSHOT ../pom.xml diff --git a/cellbase-core/pom.xml b/cellbase-core/pom.xml index b628de3b21..d711a053f8 100644 --- a/cellbase-core/pom.xml +++ b/cellbase-core/pom.xml @@ -6,7 +6,7 @@ org.opencb.cellbase cellbase - 6.0.0 + 6.1.0-SNAPSHOT ../pom.xml diff --git a/cellbase-lib/pom.xml b/cellbase-lib/pom.xml index dd087fb692..71964bb36e 100644 --- a/cellbase-lib/pom.xml +++ b/cellbase-lib/pom.xml @@ -6,7 +6,7 @@ org.opencb.cellbase cellbase - 6.0.0 + 6.1.0-SNAPSHOT ../pom.xml diff --git a/cellbase-server/pom.xml b/cellbase-server/pom.xml index d5e995998f..3b3e1b743c 100644 --- a/cellbase-server/pom.xml +++ b/cellbase-server/pom.xml @@ -6,7 +6,7 @@ org.opencb.cellbase cellbase - 6.0.0 + 6.1.0-SNAPSHOT ../pom.xml diff --git a/pom.xml b/pom.xml index ebe4aeb5f5..95ffb702c6 100644 --- a/pom.xml +++ b/pom.xml @@ -6,7 +6,7 @@ org.opencb.cellbase cellbase - 6.0.0 + 6.1.0-SNAPSHOT pom CellBase project @@ -23,8 +23,8 @@ ${project.version} - 5.0.0 - 3.0.0 + 5.1.0-SNAPSHOT + 3.1.0-SNAPSHOT 0.1.0 9.4.51.v20230217 From ee4e21242c2cb3ea5eadf3e4b65c84865a3d3a39 Mon Sep 17 00:00:00 2001 From: JuanfeSanahuja Date: Mon, 4 Mar 2024 16:29:56 +0100 Subject: [PATCH 36/51] Prepare new development version 6.1.0-SNAPSHOT --- cellbase-app/pom.xml | 4 ---- cellbase-client/pom.xml | 4 ---- cellbase-core/pom.xml | 4 ---- cellbase-lib/pom.xml | 4 ---- cellbase-server/pom.xml | 4 ---- pom.xml | 10 +--------- 6 files changed, 1 insertion(+), 29 deletions(-) diff --git a/cellbase-app/pom.xml b/cellbase-app/pom.xml index a2cb67967a..999f545d10 100644 --- a/cellbase-app/pom.xml +++ b/cellbase-app/pom.xml @@ -6,11 +6,7 @@ org.opencb.cellbase cellbase -<<<<<<< HEAD - 6.0.0 -======= 6.1.0-SNAPSHOT ->>>>>>> release-6.0.x ../pom.xml diff --git a/cellbase-client/pom.xml b/cellbase-client/pom.xml index daed6848f7..fdc2ece46b 100644 --- a/cellbase-client/pom.xml +++ b/cellbase-client/pom.xml @@ -6,11 +6,7 @@ org.opencb.cellbase cellbase -<<<<<<< HEAD - 6.0.0 -======= 6.1.0-SNAPSHOT ->>>>>>> release-6.0.x ../pom.xml diff --git a/cellbase-core/pom.xml b/cellbase-core/pom.xml index 2c7cdab95e..d711a053f8 100644 --- a/cellbase-core/pom.xml +++ b/cellbase-core/pom.xml @@ -6,11 +6,7 @@ org.opencb.cellbase cellbase -<<<<<<< HEAD - 6.0.0 -======= 6.1.0-SNAPSHOT ->>>>>>> release-6.0.x ../pom.xml diff --git a/cellbase-lib/pom.xml b/cellbase-lib/pom.xml index 02cf753a39..71964bb36e 100644 --- a/cellbase-lib/pom.xml +++ b/cellbase-lib/pom.xml @@ -6,11 +6,7 @@ org.opencb.cellbase cellbase -<<<<<<< HEAD - 6.0.0 -======= 6.1.0-SNAPSHOT ->>>>>>> release-6.0.x ../pom.xml diff --git a/cellbase-server/pom.xml b/cellbase-server/pom.xml index ae54a2c52d..3b3e1b743c 100644 --- a/cellbase-server/pom.xml +++ b/cellbase-server/pom.xml @@ -6,11 +6,7 @@ org.opencb.cellbase cellbase -<<<<<<< HEAD - 6.0.0 -======= 6.1.0-SNAPSHOT ->>>>>>> release-6.0.x ../pom.xml diff --git a/pom.xml b/pom.xml index c63805c517..42f2e09323 100644 --- a/pom.xml +++ b/pom.xml @@ -6,11 +6,7 @@ org.opencb.cellbase cellbase -<<<<<<< HEAD - 6.0.0 -======= 6.1.0-SNAPSHOT ->>>>>>> release-6.0.x pom CellBase project @@ -27,13 +23,9 @@ ${project.version} -<<<<<<< HEAD - 5.0.0 - 3.0.0 -======= 5.1.0-SNAPSHOT 3.1.0-SNAPSHOT ->>>>>>> release-6.0.x + 0.1.0 9.4.51.v20230217 From cb72ab642970b3474df4fbc5869e463d02c76466 Mon Sep 17 00:00:00 2001 From: JuanfeSanahuja Date: Tue, 5 Mar 2024 11:54:47 +0100 Subject: [PATCH 37/51] Restore branch before aborted release --- .../cloud/docker/cellbase-builder/Dockerfile | 2 +- .../app/scripts/ensembl-scripts/DB_CONFIG.pm | 8 +- cellbase-app/pom.xml | 2 +- cellbase-client/pom.xml | 2 +- cellbase-core/pom.xml | 2 +- .../core/config/DownloadProperties.java | 20 ----- .../src/main/resources/configuration.yml | 74 +++++-------------- cellbase-lib/pom.xml | 6 +- .../org/opencb/cellbase/lib/EtlCommons.java | 1 - .../cellbase/lib/builders/GeneBuilder.java | 4 +- .../lib/builders/OntologyBuilder.java | 9 --- .../lib/download/GeneDownloadManager.java | 26 ------- .../lib/download/GenomeDownloadManager.java | 18 ++--- .../lib/download/OntologyDownloadManager.java | 10 +-- .../lib/download/ProteinDownloadManager.java | 34 ++++----- .../download/RegulationDownloadManager.java | 2 +- .../lib/impl/core/MetaMongoDBAdaptor.java | 3 +- cellbase-server/pom.xml | 2 +- pom.xml | 22 +++--- 19 files changed, 73 insertions(+), 174 deletions(-) diff --git a/cellbase-app/app/cloud/docker/cellbase-builder/Dockerfile b/cellbase-app/app/cloud/docker/cellbase-builder/Dockerfile index 6e1657d1bf..17d5accff4 100644 --- a/cellbase-app/app/cloud/docker/cellbase-builder/Dockerfile +++ b/cellbase-app/app/cloud/docker/cellbase-builder/Dockerfile @@ -28,4 +28,4 @@ RUN cd /opt/ensembl && \ git clone https://github.com/Ensembl/ensembl-compara.git && \ git clone https://github.com/Ensembl/ensembl-io.git -ENV PERL5LIB=$PERL5LIB:/opt/ensembl/bioperl-live:/opt/ensembl/ensembl/modules:/opt/ensembl/ensembl-variation/modules:/opt/ensembl/ensembl-funcgen/modules:/opt/ensembl/ensembl-compara/modules:/opt/ensembl/lib/perl/5.18.2:/opt/cellbase/scripts/ensembl-scripts +ENV PERL5LIB=$PERL5LIB:/opt/ensembl/bioperl-live:/opt/ensembl/ensembl/modules:/opt/ensembl/ensembl-variation/modules:/opt/ensembl/ensembl-funcgen/modules:/opt/ensembl/ensembl-compara/modules:/opt/ensembl/lib/perl/5.18.2:/opt/cellbase diff --git a/cellbase-app/app/scripts/ensembl-scripts/DB_CONFIG.pm b/cellbase-app/app/scripts/ensembl-scripts/DB_CONFIG.pm index 70865465e9..aa22cf10b1 100755 --- a/cellbase-app/app/scripts/ensembl-scripts/DB_CONFIG.pm +++ b/cellbase-app/app/scripts/ensembl-scripts/DB_CONFIG.pm @@ -134,10 +134,10 @@ our $ENSEMBL_GENOMES_PORT = "4157"; our $ENSEMBL_GENOMES_USER = "anonymous"; ## Vertebrates -our $HOMO_SAPIENS_CORE = "homo_sapiens_core_110_38"; -our $HOMO_SAPIENS_VARIATION = "homo_sapiens_variation_110_38"; -our $HOMO_SAPIENS_FUNCTIONAL = "homo_sapiens_funcgen_110_38"; -our $HOMO_SAPIENS_COMPARA = "homo_sapiens_compara_110_38"; +our $HOMO_SAPIENS_CORE = "homo_sapiens_core_104_38"; +our $HOMO_SAPIENS_VARIATION = "homo_sapiens_variation_104_38"; +our $HOMO_SAPIENS_FUNCTIONAL = "homo_sapiens_funcgen_104_38"; +our $HOMO_SAPIENS_COMPARA = "homo_sapiens_compara_104_38"; #our $HOMO_SAPIENS_CORE = "homo_sapiens_core_78_38"; #our $HOMO_SAPIENS_VARIATION = "homo_sapiens_variation_78_38"; #our $HOMO_SAPIENS_FUNCTIONAL = "homo_sapiens_funcgen_78_38"; diff --git a/cellbase-app/pom.xml b/cellbase-app/pom.xml index 999f545d10..289046b4cc 100644 --- a/cellbase-app/pom.xml +++ b/cellbase-app/pom.xml @@ -6,7 +6,7 @@ org.opencb.cellbase cellbase - 6.1.0-SNAPSHOT + 5.8.2 ../pom.xml diff --git a/cellbase-client/pom.xml b/cellbase-client/pom.xml index fdc2ece46b..ce235ddfa6 100644 --- a/cellbase-client/pom.xml +++ b/cellbase-client/pom.xml @@ -6,7 +6,7 @@ org.opencb.cellbase cellbase - 6.1.0-SNAPSHOT + 5.8.2 ../pom.xml diff --git a/cellbase-core/pom.xml b/cellbase-core/pom.xml index d711a053f8..3e68b37513 100644 --- a/cellbase-core/pom.xml +++ b/cellbase-core/pom.xml @@ -6,7 +6,7 @@ org.opencb.cellbase cellbase - 6.1.0-SNAPSHOT + 5.8.2 ../pom.xml diff --git a/cellbase-core/src/main/java/org/opencb/cellbase/core/config/DownloadProperties.java b/cellbase-core/src/main/java/org/opencb/cellbase/core/config/DownloadProperties.java index 19f1606c91..ee4216f560 100644 --- a/cellbase-core/src/main/java/org/opencb/cellbase/core/config/DownloadProperties.java +++ b/cellbase-core/src/main/java/org/opencb/cellbase/core/config/DownloadProperties.java @@ -26,7 +26,6 @@ public class DownloadProperties { private EnsemblProperties ensembl; private EnsemblProperties ensemblGenomes; private URLProperties hgnc; - private URLProperties cancerHotspot; private URLProperties refSeq; private URLProperties refSeqFasta; private URLProperties refSeqProteinFasta; @@ -71,7 +70,6 @@ public class DownloadProperties { private URLProperties hpoObo; private URLProperties goObo; private URLProperties doidObo; - private URLProperties mondoObo; private URLProperties goAnnotation; private URLProperties revel; private URLProperties pubmed; @@ -519,24 +517,6 @@ public DownloadProperties setHgnc(URLProperties hgnc) { return this; } - public URLProperties getCancerHotspot() { - return cancerHotspot; - } - - public DownloadProperties setCancerHotspot(URLProperties cancerHotspot) { - this.cancerHotspot = cancerHotspot; - return this; - } - - public URLProperties getMondoObo() { - return mondoObo; - } - - public DownloadProperties setMondoObo(URLProperties mondoObo) { - this.mondoObo = mondoObo; - return this; - } - public static class EnsemblProperties { private DatabaseCredentials database; diff --git a/cellbase-core/src/main/resources/configuration.yml b/cellbase-core/src/main/resources/configuration.yml index f24827532c..0f8d199118 100644 --- a/cellbase-core/src/main/resources/configuration.yml +++ b/cellbase-core/src/main/resources/configuration.yml @@ -62,11 +62,7 @@ download: url: host: ftp://ftp.ensemblgenomes.org/pub hgnc: - host: https://ftp.ebi.ac.uk/pub/databases/genenames/hgnc/archive/monthly/tsv/hgnc_complete_set_2023-11-01.txt - version: 2023-11-01 - cancerHotspot: - host: https://www.cancerhotspots.org/files/hotspots_v2.xls - version: "v2" + host: https://ftp.ebi.ac.uk/pub/databases/genenames/hgnc/archive/monthly/tsv/hgnc_complete_set_2022-01-01.txt refSeq: host: https://ftp.ncbi.nih.gov/refseq/H_sapiens/annotation/GRCh38_latest/refseq_identifiers/GRCh38_latest_genomic.gtf.gz refSeqFasta: @@ -77,15 +73,12 @@ download: host: https://ftp.ncbi.nih.gov/refseq/H_sapiens/annotation/GRCh38_latest/refseq_identifiers/GRCh38_latest_rna.fna.gz maneSelect: # host: https://ftp.ncbi.nlm.nih.gov/refseq/MANE/MANE_human/release_0.93/MANE.GRCh38.v0.93.summary.txt.gz -# host: https://ftp.ncbi.nlm.nih.gov/refseq/MANE/MANE_human/release_1.0/MANE.GRCh38.v1.0.summary.txt.gz - host: https://ftp.ncbi.nlm.nih.gov/refseq/MANE/MANE_human/release_1.1/MANE.GRCh38.v1.1.summary.txt.gz - version: "1.1" + host: https://ftp.ncbi.nlm.nih.gov/refseq/MANE/MANE_human/release_1.0/MANE.GRCh38.v1.0.summary.txt.gz + version: 0.93 lrg: host: http://ftp.ebi.ac.uk/pub/databases/lrgex/list_LRGs_transcripts_xrefs.txt - version: "2021-03-30" geneUniprotXref: host: http://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/idmapping/by_organism/ - version: "2023-11-08" geneExpressionAtlas: host: ftp://ftp.ebi.ac.uk/pub/databases/microarray/data/gxa/allgenes_updown_in_organism_part_2.0.14.tab.gz mirbase: @@ -95,49 +88,33 @@ download: targetScan: host: http://hgdownload.cse.ucsc.edu/goldenPath/ miRTarBase: - host: https://mirtarbase.cuhk.edu.cn/~miRTarBase/miRTarBase_2022/cache/download/9.0/hsa_MTI.xlsx - version: "9.0" - - ## Protein Data + host: https://mirtarbase.cuhk.edu.cn/~miRTarBase/miRTarBase_2022/cache/download/8.0/hsa_MTI.xlsx uniprot: - host: https://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/complete/uniprot_sprot.xml.gz - version: "2023-11-08" + host: ftp://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/complete/uniprot_sprot.xml.gz uniprotRelNotes: - host: https://ftp.uniprot.org/pub/databases/uniprot/relnotes.txt - version: "2023-11-08" + host: ftp://ftp.uniprot.org/pub/databases/uniprot/relnotes.txt + intact: + host: ftp://ftp.ebi.ac.uk/pub/databases/intact/current/psimitab/intact.txt interpro: - host: https://ftp.ebi.ac.uk/pub/databases/interpro/current_release/protein2ipr.dat.gz - version: "2023-11-08" + host: ftp://ftp.ebi.ac.uk/pub/databases/interpro/current/protein2ipr.dat.gz interproRelNotes: - host: https://ftp.ebi.ac.uk/pub/databases/interpro/current_release/release_notes.txt - intact: - host: https://ftp.ebi.ac.uk/pub/databases/intact/current/psimitab/intact.txt - version: "2023-10-07" - - ## Conservation Scores + host: ftp://ftp.ebi.ac.uk/pub/databases/interpro/current/release_notes.txt conservation: host: https://hgdownload.cse.ucsc.edu/goldenPath/ - version: "2022-08-30" gerp: - host: http://ftp.ensembl.org/pub/release-110/compara/conservation_scores/91_mammals.gerp_conservation_score/gerp_conservation_scores.homo_sapiens.GRCh38.bw - version: "2023-05-17" + host: http://ftp.ensembl.org/pub/release-104/compara/conservation_scores/90_mammals.gerp_conservation_score/gerp_conservation_scores.homo_sapiens.GRCh38.bw clinvar: # host: https://ftp.ncbi.nlm.nih.gov/pub/clinvar/xml/ClinVarFullRelease_2021-07.xml.gz # host: https://ftp.ncbi.nlm.nih.gov/pub/clinvar/xml/ClinVarFullRelease_2022-02.xml.gz -# host: https://ftp.ncbi.nlm.nih.gov/pub/clinvar/xml/ClinVarFullRelease_2022-11.xml.gz - host: https://ftp.ncbi.nlm.nih.gov/pub/clinvar/xml/ClinVarFullRelease_2023-12.xml.gz - version: "2023-12-01" + host: https://ftp.ncbi.nlm.nih.gov/pub/clinvar/xml/ClinVarFullRelease_2022-11.xml.gz clinvarVariation: # host: https://ftp.ncbi.nlm.nih.gov/pub/clinvar/xml/clinvar_variation/ClinVarVariationRelease_2021-07.xml.gz # host: https://ftp.ncbi.nlm.nih.gov/pub/clinvar/xml/clinvar_variation/ClinVarVariationRelease_2022-02.xml.gz -# host: https://ftp.ncbi.nlm.nih.gov/pub/clinvar/xml/clinvar_variation/ClinVarVariationRelease_2022-11.xml.gz - host: https://ftp.ncbi.nlm.nih.gov/pub/clinvar/xml/clinvar_variation/ClinVarVariationRelease_2023-12.xml.gz + host: https://ftp.ncbi.nlm.nih.gov/pub/clinvar/xml/clinvar_variation/ClinVarVariationRelease_2022-11.xml.gz clinvarSummary: host: http://ftp.ncbi.nlm.nih.gov/pub/clinvar/tab_delimited/variant_summary.txt.gz - version: "2023-12-01" clinvarVariationAllele: host: http://ftp.ncbi.nlm.nih.gov/pub/clinvar/tab_delimited/variation_allele.txt.gz - version: "2023-12-01" clinvarEfoTerms: host: ftp://ftp.ebi.ac.uk/pub/databases/eva/ClinVar/2015/ClinVar_Traits_EFO_Names_260615.csv iarctp53: @@ -155,12 +132,9 @@ download: genomicSuperDups: host: http://hgdownload.cse.ucsc.edu/goldenPath gwasCatalog: -# host: http://resources.opencb.org/opencb/cellbase/data/gwas/gwas_catalog_v1.0.2-associations_e106_r2022-05-17.tsv -# version: "1.0.2 associations_e106_r2022-05-17" - host: ftp://ftp.ebi.ac.uk/pub/databases/gwas/releases/2023/12/21/gwas-catalog-associations.tsv - version: "23-12-21" + host: http://resources.opencb.org/opencb/cellbase/data/gwas/gwas_catalog_v1.0.2-associations_e106_r2022-05-17.tsv + version: "1.0.2 associations_e106_r2022-05-17" hpo: - ## Downlaod manually from here now: https://hpo.jax.org/app/data/annotations host: https://ci.monarchinitiative.org/view/hpo/job/hpo.annotations/lastSuccessfulBuild/artifact/rare-diseases/util/annotation/phenotype_to_genes.txt disgenet: host: https://www.disgenet.org/static/disgenet_ap1/files/downloads @@ -168,30 +142,20 @@ download: - all_gene_disease_associations.tsv.gz - readme.txt dgidb: - host: https://old.dgidb.org/data/monthly_tsvs/2022-Feb/interactions.tsv - version: "2022-02-01" + host: https://dgidb.org/data/monthly_tsvs/2021-Jan/interactions.tsv cadd: - ## Nacho: Move to https://krishna.gs.washington.edu/download/CADD/v1.7-pre/GRCh38/whole_genome_SNVs.tsv.gz ASAP! -# host: https://krishna.gs.washington.edu/download/CADD/v1.6/GRCh38/whole_genome_SNVs.tsv.gz - host: https://krishna.gs.washington.edu/download/CADD/v1.7-pre/GRCh38/whole_genome_SNVs.tsv.gz - version: "1.7-pre" + host: https://krishna.gs.washington.edu/download/CADD/v1.6/GRCh38/whole_genome_SNVs.tsv.gz reactome: host: http://www.reactome.org/download/current/biopax.zip gnomadConstraints: host: https://storage.googleapis.com/gcp-public-data--gnomad/release/2.1.1/constraint/gnomad.v2.1.1.lof_metrics.by_transcript.txt.bgz - version: "2.1.1" + version: 2.1.1 hpoObo: host: http://purl.obolibrary.org/obo/hp.obo - version: "2023-12-01" goObo: host: http://purl.obolibrary.org/obo/go/go-basic.obo - version: "2023-12-01" doidObo: host: http://purl.obolibrary.org/obo/doid.obo - version: "2023-12-01" - mondoObo: - host: http://purl.obolibrary.org/obo/mondo.obo - version: "2023-12-01" goAnnotation: host: http://geneontology.org/gene-associations/goa_human.gaf.gz revel: @@ -218,7 +182,7 @@ species: - id: hsapiens scientificName: Homo sapiens assemblies: - - ensemblVersion: '110_38' + - ensemblVersion: '104_38' name: GRCh38 - ensemblVersion: '82_37' name: GRCh37 diff --git a/cellbase-lib/pom.xml b/cellbase-lib/pom.xml index 71964bb36e..ffb98718f8 100644 --- a/cellbase-lib/pom.xml +++ b/cellbase-lib/pom.xml @@ -6,7 +6,7 @@ org.opencb.cellbase cellbase - 6.1.0-SNAPSHOT + 5.8.2 ../pom.xml @@ -137,10 +137,10 @@ com.github.samtools htsjdk - + io.jsonwebtoken jjwt-api diff --git a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/EtlCommons.java b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/EtlCommons.java index 124ac6e6fc..4396f0c2f1 100644 --- a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/EtlCommons.java +++ b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/EtlCommons.java @@ -71,7 +71,6 @@ public class EtlCommons { public static final String HPO_FILE = "hp.obo"; public static final String GO_FILE = "go-basic.obo"; public static final String DOID_FILE = "doid.obo"; - public static final String MONDO_FILE = "mondo.obo"; public static final String PFM_DATA = "regulatory_pfm"; // Build specific data options diff --git a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/GeneBuilder.java b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/GeneBuilder.java index cd0863a259..563f76dea7 100644 --- a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/GeneBuilder.java +++ b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/GeneBuilder.java @@ -90,8 +90,8 @@ public GeneBuilder(Path geneDirectoryPath, Path genomeSequenceFastaFile, Species boolean flexibleGTFParsing, CellBaseSerializer serializer) throws CellBaseException { this(null, geneDirectoryPath.resolve("description.txt"), geneDirectoryPath.resolve("xrefs.txt"), - geneDirectoryPath.resolve("hgnc_complete_set_2023-11-01.txt"), - geneDirectoryPath.resolve("MANE.GRCh38.v1.1.summary.txt.gz"), + geneDirectoryPath.resolve("hgnc_complete_set_2022-01-01.txt"), + geneDirectoryPath.resolve("MANE.GRCh38.v1.0.summary.txt.gz"), geneDirectoryPath.resolve("list_LRGs_transcripts_xrefs.txt"), geneDirectoryPath.resolve("idmapping_selected.tab.gz"), geneDirectoryPath.getParent().resolve("regulation/motif_features.gff.gz"), diff --git a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/OntologyBuilder.java b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/OntologyBuilder.java index 1eabf8975a..8873dd7f93 100644 --- a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/OntologyBuilder.java +++ b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/OntologyBuilder.java @@ -32,14 +32,12 @@ public class OntologyBuilder extends CellBaseBuilder { private Path hpoFile; private Path goFile; private Path doidFile; - private Path mondoFile; public OntologyBuilder(Path oboDirectoryPath, CellBaseSerializer serializer) { super(serializer); hpoFile = oboDirectoryPath.resolve(EtlCommons.HPO_FILE); goFile = oboDirectoryPath.resolve(EtlCommons.GO_FILE); doidFile = oboDirectoryPath.resolve(EtlCommons.DOID_FILE); - mondoFile = oboDirectoryPath.resolve(EtlCommons.MONDO_FILE); } @Override @@ -66,13 +64,6 @@ public void parse() throws Exception { serializer.serialize(term); } - bufferedReader = FileUtils.newBufferedReader(mondoFile); - terms = parser.parseOBO(bufferedReader, "Mondo Ontology"); - for (OntologyTerm term : terms) { - term.setSource("MONDO"); - serializer.serialize(term); - } - serializer.close(); } } diff --git a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/download/GeneDownloadManager.java b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/download/GeneDownloadManager.java index 260ff75427..9d2685eadf 100644 --- a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/download/GeneDownloadManager.java +++ b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/download/GeneDownloadManager.java @@ -82,8 +82,6 @@ public List download() throws IOException, InterruptedException { downloadFiles.addAll(downloadRefSeq(refseqFolder)); downloadFiles.add(downloadMane(geneFolder)); downloadFiles.add(downloadLrg(geneFolder)); - downloadFiles.add(downloadHgnc(geneFolder)); - downloadFiles.add(downloadCancerHotspot(geneFolder)); downloadFiles.add(downloadDrugData(geneFolder)); downloadFiles.addAll(downloadGeneUniprotXref(geneFolder)); downloadFiles.add(downloadGeneExpressionAtlas(geneFolder)); @@ -210,30 +208,6 @@ private DownloadFile downloadLrg(Path geneFolder) throws IOException, Interrupte return null; } - private DownloadFile downloadHgnc(Path geneFolder) throws IOException, InterruptedException { - if (speciesConfiguration.getScientificName().equals("Homo sapiens")) { - logger.info("Downloading HGNC ..."); - String url = configuration.getDownload().getHgnc().getHost(); - saveVersionData(EtlCommons.GENE_DATA, "HGNC_GENE", configuration.getDownload().getHgnc().getVersion(), - getTimeStamp(), Collections.singletonList(url), geneFolder.resolve("hgncVersion.json")); - String[] array = url.split("/"); - return downloadFile(url, geneFolder.resolve(array[array.length - 1]).toString()); - } - return null; - } - - private DownloadFile downloadCancerHotspot(Path geneFolder) throws IOException, InterruptedException { - if (speciesConfiguration.getScientificName().equals("Homo sapiens")) { - logger.info("Downloading Cancer Hotspot ..."); - String url = configuration.getDownload().getCancerHotspot().getHost(); - saveVersionData(EtlCommons.GENE_DATA, "CANCER_HOTSPOT", configuration.getDownload().getHgnc().getVersion(), - getTimeStamp(), Collections.singletonList(url), geneFolder.resolve("cancerHotspotVersion.json")); - String[] array = url.split("/"); - return downloadFile(url, geneFolder.resolve(array[array.length - 1]).toString()); - } - return null; - } - private DownloadFile downloadGO(Path geneFolder) throws IOException, InterruptedException { if (speciesConfiguration.getScientificName().equals("Homo sapiens")) { logger.info("Downloading go annotation..."); diff --git a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/download/GenomeDownloadManager.java b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/download/GenomeDownloadManager.java index 0ba9f39db4..5a0609867f 100644 --- a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/download/GenomeDownloadManager.java +++ b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/download/GenomeDownloadManager.java @@ -47,11 +47,11 @@ public GenomeDownloadManager(String species, String assembly, Path targetDirecto public List download() throws IOException, InterruptedException { List downloadFiles = new ArrayList<>(); downloadFiles.addAll(downloadReferenceGenome()); - downloadFiles.addAll(downloadConservation()); - downloadFiles.addAll(downloadRepeats()); +// downloadFiles.addAll(downloadConservation()); +// downloadFiles.addAll(downloadRepeats()); // cytobands -// runGenomeInfo(); + runGenomeInfo(); return downloadFiles; } @@ -115,16 +115,16 @@ public List downloadConservation() throws IOException, Interrupted List phastconsUrls = new ArrayList<>(chromosomes.length); List phyloPUrls = new ArrayList<>(chromosomes.length); for (String chromosome : chromosomes) { - String phastConsUrl = url + "/phastCons470way/hg38.470way.phastCons/chr" + chromosome - + ".phastCons470way.wigFix.gz"; + String phastConsUrl = url + "/phastCons100way/hg38.100way.phastCons/chr" + chromosome + + ".phastCons100way.wigFix.gz"; downloadFiles.add(downloadFile(phastConsUrl, conservationFolder.resolve("phastCons") - .resolve("chr" + chromosome + ".phastCons470way.wigFix.gz").toString())); + .resolve("chr" + chromosome + ".phastCons100way.wigFix.gz").toString())); phastconsUrls.add(phastConsUrl); - String phyloPUrl = url + "/phyloP470way/hg38.470way.phyloP/chr" + chromosome - + ".phyloP470way.wigFix.gz"; + String phyloPUrl = url + "/phyloP100way/hg38.100way.phyloP100way/chr" + chromosome + + ".phyloP100way.wigFix.gz"; downloadFiles.add(downloadFile(phyloPUrl, conservationFolder.resolve("phylop") - .resolve("chr" + chromosome + ".phyloP470way.wigFix.gz").toString())); + .resolve("chr" + chromosome + ".phyloP100way.wigFix.gz").toString())); phyloPUrls.add(phyloPUrl); } String gerpUrl = configuration.getDownload().getGerp().getHost(); diff --git a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/download/OntologyDownloadManager.java b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/download/OntologyDownloadManager.java index 522be7b27d..0776354e80 100644 --- a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/download/OntologyDownloadManager.java +++ b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/download/OntologyDownloadManager.java @@ -36,7 +36,7 @@ public OntologyDownloadManager(String species, String assembly, Path targetDirec public List download() throws IOException, InterruptedException { - logger.info("Downloading OBO files ..."); + logger.info("Downloading obo files ..."); List downloadFiles = new ArrayList<>(); Path oboFolder = downloadFolder.resolve("ontology"); @@ -44,22 +44,20 @@ public List download() throws IOException, InterruptedException { String url = configuration.getDownload().getHpoObo().getHost(); downloadFiles.add(downloadFile(url, oboFolder.resolve("hp.obo").toString())); + saveVersionData(EtlCommons.OBO_DATA, "HPO", getTimeStamp(), getTimeStamp(), Collections.singletonList(url), buildFolder.resolve(EtlCommons.HPO_VERSION_FILE)); url = configuration.getDownload().getGoObo().getHost(); downloadFiles.add(downloadFile(url, oboFolder.resolve("go-basic.obo").toString())); + saveVersionData(EtlCommons.OBO_DATA, "GO", getTimeStamp(), getTimeStamp(), Collections.singletonList(url), buildFolder.resolve(EtlCommons.GO_VERSION_FILE)); url = configuration.getDownload().getDoidObo().getHost(); downloadFiles.add(downloadFile(url, oboFolder.resolve("doid.obo").toString())); - saveVersionData(EtlCommons.OBO_DATA, "DO", getTimeStamp(), getTimeStamp(), - Collections.singletonList(url), buildFolder.resolve(EtlCommons.DO_VERSION_FILE)); - url = configuration.getDownload().getMondoObo().getHost(); - downloadFiles.add(downloadFile(url, oboFolder.resolve("mondo.obo").toString())); - saveVersionData(EtlCommons.OBO_DATA, "MONDO", getTimeStamp(), getTimeStamp(), + saveVersionData(EtlCommons.OBO_DATA, "DO", getTimeStamp(), getTimeStamp(), Collections.singletonList(url), buildFolder.resolve(EtlCommons.DO_VERSION_FILE)); return downloadFiles; diff --git a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/download/ProteinDownloadManager.java b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/download/ProteinDownloadManager.java index 5a722ed448..08f28cfdad 100644 --- a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/download/ProteinDownloadManager.java +++ b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/download/ProteinDownloadManager.java @@ -22,6 +22,7 @@ import org.opencb.commons.utils.FileUtils; import java.io.BufferedReader; +import java.io.FileOutputStream; import java.io.IOException; import java.io.PrintWriter; import java.nio.file.Files; @@ -33,8 +34,6 @@ public class ProteinDownloadManager extends AbstractDownloadManager { private static final String UNIPROT_NAME = "UniProt"; - private static final String INTERPRO_NAME = "InterPro"; - private static final String INTACT_NAME = "IntAct"; public ProteinDownloadManager(String species, String assembly, Path targetDirectory, CellBaseConfiguration configuration) throws IOException, CellBaseException { @@ -57,7 +56,6 @@ public List download() throws IOException, InterruptedException { Files.createDirectories(proteinFolder); List downloadFiles = new ArrayList<>(); - // Uniprot String url = configuration.getDownload().getUniprot().getHost(); downloadFiles.add(downloadFile(url, proteinFolder.resolve("uniprot_sprot.xml.gz").toString())); Files.createDirectories(proteinFolder.resolve("uniprot_chunks")); @@ -65,25 +63,23 @@ public List download() throws IOException, InterruptedException { String relNotesUrl = configuration.getDownload().getUniprotRelNotes().getHost(); downloadFiles.add(downloadFile(relNotesUrl, proteinFolder.resolve("uniprotRelnotes.txt").toString())); + saveVersionData(EtlCommons.PROTEIN_DATA, UNIPROT_NAME, getLine(proteinFolder.resolve("uniprotRelnotes.txt"), 1), getTimeStamp(), Collections.singletonList(url), proteinFolder.resolve("uniprotVersion.json")); - // Interpro - String interproUrl = configuration.getDownload().getInterpro().getHost(); - downloadFiles.add(downloadFile(interproUrl, proteinFolder.resolve("protein2ipr.dat.gz").toString())); - - relNotesUrl = configuration.getDownload().getInterproRelNotes().getHost(); - downloadFiles.add(downloadFile(relNotesUrl, proteinFolder.resolve("interproRelnotes.txt").toString())); - saveVersionData(EtlCommons.PROTEIN_DATA, INTERPRO_NAME, getLine(proteinFolder.resolve("interproRelnotes.txt"), 5), - getTimeStamp(), Collections.singletonList(interproUrl), proteinFolder.resolve("interproVersion.json")); - - // Intact - String intactUrl = configuration.getDownload().getIntact().getHost(); - downloadFiles.add(downloadFile(intactUrl, proteinFolder.resolve("intact.txt").toString())); - saveVersionData(EtlCommons.PROTEIN_DATA, INTACT_NAME, configuration.getDownload().getIntact().getVersion(), - getTimeStamp(), Collections.singletonList(intactUrl), proteinFolder.resolve("intactVersion.json")); - return downloadFiles; + +// url = configuration.getDownload().getIntact().getHost(); +// downloadFile(url, proteinFolder.resolve("intact.txt").toString()); +// saveVersionData(EtlCommons.PROTEIN_DATA, INTACT_NAME, null, getTimeStamp(), Collections.singletonList(url), +// proteinFolder.resolve("intactVersion.json")); +// +// url = configuration.getDownload().getInterpro().getHost(); +// downloadFile(url, proteinFolder.resolve("protein2ipr.dat.gz").toString()); +// relNotesUrl = configuration.getDownload().getInterproRelNotes().getHost(); +// downloadFile(relNotesUrl, proteinFolder.resolve("interproRelnotes.txt").toString()); +// saveVersionData(EtlCommons.PROTEIN_DATA, INTERPRO_NAME, getLine(proteinFolder.resolve("interproRelnotes.txt"), 5), +// getTimeStamp(), Collections.singletonList(url), proteinFolder.resolve("interproVersion.json")); } private void splitUniprot(Path uniprotFilePath, Path splitOutdirPath) throws IOException { @@ -100,7 +96,7 @@ private void splitUniprot(Path uniprotFilePath, Path splitOutdirPath) throws IOE inEntry = true; beforeEntry = false; if (count % 10000 == 0) { - pw = new PrintWriter(Files.newOutputStream(splitOutdirPath.resolve("chunk_" + chunk + ".xml").toFile().toPath())); + pw = new PrintWriter(new FileOutputStream(splitOutdirPath.resolve("chunk_" + chunk + ".xml").toFile())); pw.println(header.toString().trim()); } count++; diff --git a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/download/RegulationDownloadManager.java b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/download/RegulationDownloadManager.java index 51152e478d..1abb352fbe 100644 --- a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/download/RegulationDownloadManager.java +++ b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/download/RegulationDownloadManager.java @@ -64,8 +64,8 @@ public List download() throws IOException, InterruptedException, N List downloadFiles = new ArrayList<>(); downloadFiles.addAll(downloadRegulatoryaAndMotifFeatures()); - downloadFiles.add(downloadMiRTarBase()); downloadFiles.add(downloadMirna()); + downloadFiles.add(downloadMiRTarBase()); return downloadFiles; } diff --git a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/impl/core/MetaMongoDBAdaptor.java b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/impl/core/MetaMongoDBAdaptor.java index 5d7dbc65d0..e5cd4d38cc 100644 --- a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/impl/core/MetaMongoDBAdaptor.java +++ b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/impl/core/MetaMongoDBAdaptor.java @@ -16,8 +16,6 @@ package org.opencb.cellbase.lib.impl.core; - -import com.fasterxml.jackson.databind.ObjectMapper; import com.mongodb.ReadPreference; import com.mongodb.WriteConcern; import com.mongodb.client.model.Filters; @@ -25,6 +23,7 @@ import org.bson.BsonDocument; import org.bson.Document; import org.bson.conversions.Bson; +import org.codehaus.jackson.map.ObjectMapper; import org.opencb.cellbase.core.api.key.ApiKeyStats; import org.opencb.cellbase.core.api.query.AbstractQuery; import org.opencb.cellbase.core.api.query.ProjectionQueryOptions; diff --git a/cellbase-server/pom.xml b/cellbase-server/pom.xml index 3b3e1b743c..691fc60712 100644 --- a/cellbase-server/pom.xml +++ b/cellbase-server/pom.xml @@ -6,7 +6,7 @@ org.opencb.cellbase cellbase - 6.1.0-SNAPSHOT + 5.8.2 ../pom.xml diff --git a/pom.xml b/pom.xml index 42f2e09323..b79658f109 100644 --- a/pom.xml +++ b/pom.xml @@ -6,7 +6,7 @@ org.opencb.cellbase cellbase - 6.1.0-SNAPSHOT + 5.8.2 pom CellBase project @@ -23,21 +23,18 @@ ${project.version} - 5.1.0-SNAPSHOT - 3.1.0-SNAPSHOT - + 4.12.0 + 2.12.1 0.1.0 - 9.4.51.v20230217 - - 2.14.3 - 3.14.0 - 1.7.36 - + 2.11.4 + 1.9.13 2.30.1 + 1.7.32 2.17.2 1.5.2 5.5.2 0.8.8 + 9.4.17.v20190418 0.11.5 1.6.5 3.1.0 @@ -53,6 +50,7 @@ 1.48.0 2.4 2.4 + 3.12.0 2.1.6 4.4 1.69 @@ -414,11 +412,11 @@ swagger-annotations ${swagger-annotations.version} - + io.jsonwebtoken jjwt-jackson From f95bf41c9b09a7bbb88818c5297d4908474c592c Mon Sep 17 00:00:00 2001 From: JuanfeSanahuja Date: Tue, 5 Mar 2024 14:43:59 +0100 Subject: [PATCH 38/51] Prepare release 6.0.0 --- .../cloud/docker/cellbase-builder/Dockerfile | 2 +- .../app/scripts/ensembl-scripts/DB_CONFIG.pm | 8 +- cellbase-app/pom.xml | 2 +- cellbase-client/pom.xml | 2 +- cellbase-core/pom.xml | 2 +- .../core/config/DownloadProperties.java | 20 +++++ .../src/main/resources/configuration.yml | 74 ++++++++++++++----- cellbase-lib/pom.xml | 6 +- .../org/opencb/cellbase/lib/EtlCommons.java | 1 + .../cellbase/lib/builders/GeneBuilder.java | 4 +- .../lib/builders/OntologyBuilder.java | 9 +++ .../lib/download/GeneDownloadManager.java | 26 +++++++ .../lib/download/GenomeDownloadManager.java | 18 ++--- .../lib/download/OntologyDownloadManager.java | 10 ++- .../lib/download/ProteinDownloadManager.java | 34 +++++---- .../download/RegulationDownloadManager.java | 2 +- .../lib/impl/core/MetaMongoDBAdaptor.java | 3 +- cellbase-server/pom.xml | 2 +- pom.xml | 21 +++--- 19 files changed, 173 insertions(+), 73 deletions(-) diff --git a/cellbase-app/app/cloud/docker/cellbase-builder/Dockerfile b/cellbase-app/app/cloud/docker/cellbase-builder/Dockerfile index 17d5accff4..6e1657d1bf 100644 --- a/cellbase-app/app/cloud/docker/cellbase-builder/Dockerfile +++ b/cellbase-app/app/cloud/docker/cellbase-builder/Dockerfile @@ -28,4 +28,4 @@ RUN cd /opt/ensembl && \ git clone https://github.com/Ensembl/ensembl-compara.git && \ git clone https://github.com/Ensembl/ensembl-io.git -ENV PERL5LIB=$PERL5LIB:/opt/ensembl/bioperl-live:/opt/ensembl/ensembl/modules:/opt/ensembl/ensembl-variation/modules:/opt/ensembl/ensembl-funcgen/modules:/opt/ensembl/ensembl-compara/modules:/opt/ensembl/lib/perl/5.18.2:/opt/cellbase +ENV PERL5LIB=$PERL5LIB:/opt/ensembl/bioperl-live:/opt/ensembl/ensembl/modules:/opt/ensembl/ensembl-variation/modules:/opt/ensembl/ensembl-funcgen/modules:/opt/ensembl/ensembl-compara/modules:/opt/ensembl/lib/perl/5.18.2:/opt/cellbase/scripts/ensembl-scripts diff --git a/cellbase-app/app/scripts/ensembl-scripts/DB_CONFIG.pm b/cellbase-app/app/scripts/ensembl-scripts/DB_CONFIG.pm index aa22cf10b1..70865465e9 100755 --- a/cellbase-app/app/scripts/ensembl-scripts/DB_CONFIG.pm +++ b/cellbase-app/app/scripts/ensembl-scripts/DB_CONFIG.pm @@ -134,10 +134,10 @@ our $ENSEMBL_GENOMES_PORT = "4157"; our $ENSEMBL_GENOMES_USER = "anonymous"; ## Vertebrates -our $HOMO_SAPIENS_CORE = "homo_sapiens_core_104_38"; -our $HOMO_SAPIENS_VARIATION = "homo_sapiens_variation_104_38"; -our $HOMO_SAPIENS_FUNCTIONAL = "homo_sapiens_funcgen_104_38"; -our $HOMO_SAPIENS_COMPARA = "homo_sapiens_compara_104_38"; +our $HOMO_SAPIENS_CORE = "homo_sapiens_core_110_38"; +our $HOMO_SAPIENS_VARIATION = "homo_sapiens_variation_110_38"; +our $HOMO_SAPIENS_FUNCTIONAL = "homo_sapiens_funcgen_110_38"; +our $HOMO_SAPIENS_COMPARA = "homo_sapiens_compara_110_38"; #our $HOMO_SAPIENS_CORE = "homo_sapiens_core_78_38"; #our $HOMO_SAPIENS_VARIATION = "homo_sapiens_variation_78_38"; #our $HOMO_SAPIENS_FUNCTIONAL = "homo_sapiens_funcgen_78_38"; diff --git a/cellbase-app/pom.xml b/cellbase-app/pom.xml index 289046b4cc..adf59c092a 100644 --- a/cellbase-app/pom.xml +++ b/cellbase-app/pom.xml @@ -6,7 +6,7 @@ org.opencb.cellbase cellbase - 5.8.2 + 6.0.0-SNAPSHOT ../pom.xml diff --git a/cellbase-client/pom.xml b/cellbase-client/pom.xml index ce235ddfa6..977b4b81cd 100644 --- a/cellbase-client/pom.xml +++ b/cellbase-client/pom.xml @@ -6,7 +6,7 @@ org.opencb.cellbase cellbase - 5.8.2 + 6.0.0-SNAPSHOT ../pom.xml diff --git a/cellbase-core/pom.xml b/cellbase-core/pom.xml index 3e68b37513..8fcd89455f 100644 --- a/cellbase-core/pom.xml +++ b/cellbase-core/pom.xml @@ -6,7 +6,7 @@ org.opencb.cellbase cellbase - 5.8.2 + 6.0.0-SNAPSHOT ../pom.xml diff --git a/cellbase-core/src/main/java/org/opencb/cellbase/core/config/DownloadProperties.java b/cellbase-core/src/main/java/org/opencb/cellbase/core/config/DownloadProperties.java index ee4216f560..19f1606c91 100644 --- a/cellbase-core/src/main/java/org/opencb/cellbase/core/config/DownloadProperties.java +++ b/cellbase-core/src/main/java/org/opencb/cellbase/core/config/DownloadProperties.java @@ -26,6 +26,7 @@ public class DownloadProperties { private EnsemblProperties ensembl; private EnsemblProperties ensemblGenomes; private URLProperties hgnc; + private URLProperties cancerHotspot; private URLProperties refSeq; private URLProperties refSeqFasta; private URLProperties refSeqProteinFasta; @@ -70,6 +71,7 @@ public class DownloadProperties { private URLProperties hpoObo; private URLProperties goObo; private URLProperties doidObo; + private URLProperties mondoObo; private URLProperties goAnnotation; private URLProperties revel; private URLProperties pubmed; @@ -517,6 +519,24 @@ public DownloadProperties setHgnc(URLProperties hgnc) { return this; } + public URLProperties getCancerHotspot() { + return cancerHotspot; + } + + public DownloadProperties setCancerHotspot(URLProperties cancerHotspot) { + this.cancerHotspot = cancerHotspot; + return this; + } + + public URLProperties getMondoObo() { + return mondoObo; + } + + public DownloadProperties setMondoObo(URLProperties mondoObo) { + this.mondoObo = mondoObo; + return this; + } + public static class EnsemblProperties { private DatabaseCredentials database; diff --git a/cellbase-core/src/main/resources/configuration.yml b/cellbase-core/src/main/resources/configuration.yml index 0f8d199118..f24827532c 100644 --- a/cellbase-core/src/main/resources/configuration.yml +++ b/cellbase-core/src/main/resources/configuration.yml @@ -62,7 +62,11 @@ download: url: host: ftp://ftp.ensemblgenomes.org/pub hgnc: - host: https://ftp.ebi.ac.uk/pub/databases/genenames/hgnc/archive/monthly/tsv/hgnc_complete_set_2022-01-01.txt + host: https://ftp.ebi.ac.uk/pub/databases/genenames/hgnc/archive/monthly/tsv/hgnc_complete_set_2023-11-01.txt + version: 2023-11-01 + cancerHotspot: + host: https://www.cancerhotspots.org/files/hotspots_v2.xls + version: "v2" refSeq: host: https://ftp.ncbi.nih.gov/refseq/H_sapiens/annotation/GRCh38_latest/refseq_identifiers/GRCh38_latest_genomic.gtf.gz refSeqFasta: @@ -73,12 +77,15 @@ download: host: https://ftp.ncbi.nih.gov/refseq/H_sapiens/annotation/GRCh38_latest/refseq_identifiers/GRCh38_latest_rna.fna.gz maneSelect: # host: https://ftp.ncbi.nlm.nih.gov/refseq/MANE/MANE_human/release_0.93/MANE.GRCh38.v0.93.summary.txt.gz - host: https://ftp.ncbi.nlm.nih.gov/refseq/MANE/MANE_human/release_1.0/MANE.GRCh38.v1.0.summary.txt.gz - version: 0.93 +# host: https://ftp.ncbi.nlm.nih.gov/refseq/MANE/MANE_human/release_1.0/MANE.GRCh38.v1.0.summary.txt.gz + host: https://ftp.ncbi.nlm.nih.gov/refseq/MANE/MANE_human/release_1.1/MANE.GRCh38.v1.1.summary.txt.gz + version: "1.1" lrg: host: http://ftp.ebi.ac.uk/pub/databases/lrgex/list_LRGs_transcripts_xrefs.txt + version: "2021-03-30" geneUniprotXref: host: http://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/idmapping/by_organism/ + version: "2023-11-08" geneExpressionAtlas: host: ftp://ftp.ebi.ac.uk/pub/databases/microarray/data/gxa/allgenes_updown_in_organism_part_2.0.14.tab.gz mirbase: @@ -88,33 +95,49 @@ download: targetScan: host: http://hgdownload.cse.ucsc.edu/goldenPath/ miRTarBase: - host: https://mirtarbase.cuhk.edu.cn/~miRTarBase/miRTarBase_2022/cache/download/8.0/hsa_MTI.xlsx + host: https://mirtarbase.cuhk.edu.cn/~miRTarBase/miRTarBase_2022/cache/download/9.0/hsa_MTI.xlsx + version: "9.0" + + ## Protein Data uniprot: - host: ftp://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/complete/uniprot_sprot.xml.gz + host: https://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/complete/uniprot_sprot.xml.gz + version: "2023-11-08" uniprotRelNotes: - host: ftp://ftp.uniprot.org/pub/databases/uniprot/relnotes.txt - intact: - host: ftp://ftp.ebi.ac.uk/pub/databases/intact/current/psimitab/intact.txt + host: https://ftp.uniprot.org/pub/databases/uniprot/relnotes.txt + version: "2023-11-08" interpro: - host: ftp://ftp.ebi.ac.uk/pub/databases/interpro/current/protein2ipr.dat.gz + host: https://ftp.ebi.ac.uk/pub/databases/interpro/current_release/protein2ipr.dat.gz + version: "2023-11-08" interproRelNotes: - host: ftp://ftp.ebi.ac.uk/pub/databases/interpro/current/release_notes.txt + host: https://ftp.ebi.ac.uk/pub/databases/interpro/current_release/release_notes.txt + intact: + host: https://ftp.ebi.ac.uk/pub/databases/intact/current/psimitab/intact.txt + version: "2023-10-07" + + ## Conservation Scores conservation: host: https://hgdownload.cse.ucsc.edu/goldenPath/ + version: "2022-08-30" gerp: - host: http://ftp.ensembl.org/pub/release-104/compara/conservation_scores/90_mammals.gerp_conservation_score/gerp_conservation_scores.homo_sapiens.GRCh38.bw + host: http://ftp.ensembl.org/pub/release-110/compara/conservation_scores/91_mammals.gerp_conservation_score/gerp_conservation_scores.homo_sapiens.GRCh38.bw + version: "2023-05-17" clinvar: # host: https://ftp.ncbi.nlm.nih.gov/pub/clinvar/xml/ClinVarFullRelease_2021-07.xml.gz # host: https://ftp.ncbi.nlm.nih.gov/pub/clinvar/xml/ClinVarFullRelease_2022-02.xml.gz - host: https://ftp.ncbi.nlm.nih.gov/pub/clinvar/xml/ClinVarFullRelease_2022-11.xml.gz +# host: https://ftp.ncbi.nlm.nih.gov/pub/clinvar/xml/ClinVarFullRelease_2022-11.xml.gz + host: https://ftp.ncbi.nlm.nih.gov/pub/clinvar/xml/ClinVarFullRelease_2023-12.xml.gz + version: "2023-12-01" clinvarVariation: # host: https://ftp.ncbi.nlm.nih.gov/pub/clinvar/xml/clinvar_variation/ClinVarVariationRelease_2021-07.xml.gz # host: https://ftp.ncbi.nlm.nih.gov/pub/clinvar/xml/clinvar_variation/ClinVarVariationRelease_2022-02.xml.gz - host: https://ftp.ncbi.nlm.nih.gov/pub/clinvar/xml/clinvar_variation/ClinVarVariationRelease_2022-11.xml.gz +# host: https://ftp.ncbi.nlm.nih.gov/pub/clinvar/xml/clinvar_variation/ClinVarVariationRelease_2022-11.xml.gz + host: https://ftp.ncbi.nlm.nih.gov/pub/clinvar/xml/clinvar_variation/ClinVarVariationRelease_2023-12.xml.gz clinvarSummary: host: http://ftp.ncbi.nlm.nih.gov/pub/clinvar/tab_delimited/variant_summary.txt.gz + version: "2023-12-01" clinvarVariationAllele: host: http://ftp.ncbi.nlm.nih.gov/pub/clinvar/tab_delimited/variation_allele.txt.gz + version: "2023-12-01" clinvarEfoTerms: host: ftp://ftp.ebi.ac.uk/pub/databases/eva/ClinVar/2015/ClinVar_Traits_EFO_Names_260615.csv iarctp53: @@ -132,9 +155,12 @@ download: genomicSuperDups: host: http://hgdownload.cse.ucsc.edu/goldenPath gwasCatalog: - host: http://resources.opencb.org/opencb/cellbase/data/gwas/gwas_catalog_v1.0.2-associations_e106_r2022-05-17.tsv - version: "1.0.2 associations_e106_r2022-05-17" +# host: http://resources.opencb.org/opencb/cellbase/data/gwas/gwas_catalog_v1.0.2-associations_e106_r2022-05-17.tsv +# version: "1.0.2 associations_e106_r2022-05-17" + host: ftp://ftp.ebi.ac.uk/pub/databases/gwas/releases/2023/12/21/gwas-catalog-associations.tsv + version: "23-12-21" hpo: + ## Downlaod manually from here now: https://hpo.jax.org/app/data/annotations host: https://ci.monarchinitiative.org/view/hpo/job/hpo.annotations/lastSuccessfulBuild/artifact/rare-diseases/util/annotation/phenotype_to_genes.txt disgenet: host: https://www.disgenet.org/static/disgenet_ap1/files/downloads @@ -142,20 +168,30 @@ download: - all_gene_disease_associations.tsv.gz - readme.txt dgidb: - host: https://dgidb.org/data/monthly_tsvs/2021-Jan/interactions.tsv + host: https://old.dgidb.org/data/monthly_tsvs/2022-Feb/interactions.tsv + version: "2022-02-01" cadd: - host: https://krishna.gs.washington.edu/download/CADD/v1.6/GRCh38/whole_genome_SNVs.tsv.gz + ## Nacho: Move to https://krishna.gs.washington.edu/download/CADD/v1.7-pre/GRCh38/whole_genome_SNVs.tsv.gz ASAP! +# host: https://krishna.gs.washington.edu/download/CADD/v1.6/GRCh38/whole_genome_SNVs.tsv.gz + host: https://krishna.gs.washington.edu/download/CADD/v1.7-pre/GRCh38/whole_genome_SNVs.tsv.gz + version: "1.7-pre" reactome: host: http://www.reactome.org/download/current/biopax.zip gnomadConstraints: host: https://storage.googleapis.com/gcp-public-data--gnomad/release/2.1.1/constraint/gnomad.v2.1.1.lof_metrics.by_transcript.txt.bgz - version: 2.1.1 + version: "2.1.1" hpoObo: host: http://purl.obolibrary.org/obo/hp.obo + version: "2023-12-01" goObo: host: http://purl.obolibrary.org/obo/go/go-basic.obo + version: "2023-12-01" doidObo: host: http://purl.obolibrary.org/obo/doid.obo + version: "2023-12-01" + mondoObo: + host: http://purl.obolibrary.org/obo/mondo.obo + version: "2023-12-01" goAnnotation: host: http://geneontology.org/gene-associations/goa_human.gaf.gz revel: @@ -182,7 +218,7 @@ species: - id: hsapiens scientificName: Homo sapiens assemblies: - - ensemblVersion: '104_38' + - ensemblVersion: '110_38' name: GRCh38 - ensemblVersion: '82_37' name: GRCh37 diff --git a/cellbase-lib/pom.xml b/cellbase-lib/pom.xml index ffb98718f8..59fb65dfda 100644 --- a/cellbase-lib/pom.xml +++ b/cellbase-lib/pom.xml @@ -6,7 +6,7 @@ org.opencb.cellbase cellbase - 5.8.2 + 6.0.0-SNAPSHOT ../pom.xml @@ -137,10 +137,10 @@ com.github.samtools htsjdk - + io.jsonwebtoken jjwt-api diff --git a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/EtlCommons.java b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/EtlCommons.java index 4396f0c2f1..124ac6e6fc 100644 --- a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/EtlCommons.java +++ b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/EtlCommons.java @@ -71,6 +71,7 @@ public class EtlCommons { public static final String HPO_FILE = "hp.obo"; public static final String GO_FILE = "go-basic.obo"; public static final String DOID_FILE = "doid.obo"; + public static final String MONDO_FILE = "mondo.obo"; public static final String PFM_DATA = "regulatory_pfm"; // Build specific data options diff --git a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/GeneBuilder.java b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/GeneBuilder.java index 563f76dea7..cd0863a259 100644 --- a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/GeneBuilder.java +++ b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/GeneBuilder.java @@ -90,8 +90,8 @@ public GeneBuilder(Path geneDirectoryPath, Path genomeSequenceFastaFile, Species boolean flexibleGTFParsing, CellBaseSerializer serializer) throws CellBaseException { this(null, geneDirectoryPath.resolve("description.txt"), geneDirectoryPath.resolve("xrefs.txt"), - geneDirectoryPath.resolve("hgnc_complete_set_2022-01-01.txt"), - geneDirectoryPath.resolve("MANE.GRCh38.v1.0.summary.txt.gz"), + geneDirectoryPath.resolve("hgnc_complete_set_2023-11-01.txt"), + geneDirectoryPath.resolve("MANE.GRCh38.v1.1.summary.txt.gz"), geneDirectoryPath.resolve("list_LRGs_transcripts_xrefs.txt"), geneDirectoryPath.resolve("idmapping_selected.tab.gz"), geneDirectoryPath.getParent().resolve("regulation/motif_features.gff.gz"), diff --git a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/OntologyBuilder.java b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/OntologyBuilder.java index 8873dd7f93..1eabf8975a 100644 --- a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/OntologyBuilder.java +++ b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/OntologyBuilder.java @@ -32,12 +32,14 @@ public class OntologyBuilder extends CellBaseBuilder { private Path hpoFile; private Path goFile; private Path doidFile; + private Path mondoFile; public OntologyBuilder(Path oboDirectoryPath, CellBaseSerializer serializer) { super(serializer); hpoFile = oboDirectoryPath.resolve(EtlCommons.HPO_FILE); goFile = oboDirectoryPath.resolve(EtlCommons.GO_FILE); doidFile = oboDirectoryPath.resolve(EtlCommons.DOID_FILE); + mondoFile = oboDirectoryPath.resolve(EtlCommons.MONDO_FILE); } @Override @@ -64,6 +66,13 @@ public void parse() throws Exception { serializer.serialize(term); } + bufferedReader = FileUtils.newBufferedReader(mondoFile); + terms = parser.parseOBO(bufferedReader, "Mondo Ontology"); + for (OntologyTerm term : terms) { + term.setSource("MONDO"); + serializer.serialize(term); + } + serializer.close(); } } diff --git a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/download/GeneDownloadManager.java b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/download/GeneDownloadManager.java index 9d2685eadf..260ff75427 100644 --- a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/download/GeneDownloadManager.java +++ b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/download/GeneDownloadManager.java @@ -82,6 +82,8 @@ public List download() throws IOException, InterruptedException { downloadFiles.addAll(downloadRefSeq(refseqFolder)); downloadFiles.add(downloadMane(geneFolder)); downloadFiles.add(downloadLrg(geneFolder)); + downloadFiles.add(downloadHgnc(geneFolder)); + downloadFiles.add(downloadCancerHotspot(geneFolder)); downloadFiles.add(downloadDrugData(geneFolder)); downloadFiles.addAll(downloadGeneUniprotXref(geneFolder)); downloadFiles.add(downloadGeneExpressionAtlas(geneFolder)); @@ -208,6 +210,30 @@ private DownloadFile downloadLrg(Path geneFolder) throws IOException, Interrupte return null; } + private DownloadFile downloadHgnc(Path geneFolder) throws IOException, InterruptedException { + if (speciesConfiguration.getScientificName().equals("Homo sapiens")) { + logger.info("Downloading HGNC ..."); + String url = configuration.getDownload().getHgnc().getHost(); + saveVersionData(EtlCommons.GENE_DATA, "HGNC_GENE", configuration.getDownload().getHgnc().getVersion(), + getTimeStamp(), Collections.singletonList(url), geneFolder.resolve("hgncVersion.json")); + String[] array = url.split("/"); + return downloadFile(url, geneFolder.resolve(array[array.length - 1]).toString()); + } + return null; + } + + private DownloadFile downloadCancerHotspot(Path geneFolder) throws IOException, InterruptedException { + if (speciesConfiguration.getScientificName().equals("Homo sapiens")) { + logger.info("Downloading Cancer Hotspot ..."); + String url = configuration.getDownload().getCancerHotspot().getHost(); + saveVersionData(EtlCommons.GENE_DATA, "CANCER_HOTSPOT", configuration.getDownload().getHgnc().getVersion(), + getTimeStamp(), Collections.singletonList(url), geneFolder.resolve("cancerHotspotVersion.json")); + String[] array = url.split("/"); + return downloadFile(url, geneFolder.resolve(array[array.length - 1]).toString()); + } + return null; + } + private DownloadFile downloadGO(Path geneFolder) throws IOException, InterruptedException { if (speciesConfiguration.getScientificName().equals("Homo sapiens")) { logger.info("Downloading go annotation..."); diff --git a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/download/GenomeDownloadManager.java b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/download/GenomeDownloadManager.java index 5a0609867f..0ba9f39db4 100644 --- a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/download/GenomeDownloadManager.java +++ b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/download/GenomeDownloadManager.java @@ -47,11 +47,11 @@ public GenomeDownloadManager(String species, String assembly, Path targetDirecto public List download() throws IOException, InterruptedException { List downloadFiles = new ArrayList<>(); downloadFiles.addAll(downloadReferenceGenome()); -// downloadFiles.addAll(downloadConservation()); -// downloadFiles.addAll(downloadRepeats()); + downloadFiles.addAll(downloadConservation()); + downloadFiles.addAll(downloadRepeats()); // cytobands - runGenomeInfo(); +// runGenomeInfo(); return downloadFiles; } @@ -115,16 +115,16 @@ public List downloadConservation() throws IOException, Interrupted List phastconsUrls = new ArrayList<>(chromosomes.length); List phyloPUrls = new ArrayList<>(chromosomes.length); for (String chromosome : chromosomes) { - String phastConsUrl = url + "/phastCons100way/hg38.100way.phastCons/chr" + chromosome - + ".phastCons100way.wigFix.gz"; + String phastConsUrl = url + "/phastCons470way/hg38.470way.phastCons/chr" + chromosome + + ".phastCons470way.wigFix.gz"; downloadFiles.add(downloadFile(phastConsUrl, conservationFolder.resolve("phastCons") - .resolve("chr" + chromosome + ".phastCons100way.wigFix.gz").toString())); + .resolve("chr" + chromosome + ".phastCons470way.wigFix.gz").toString())); phastconsUrls.add(phastConsUrl); - String phyloPUrl = url + "/phyloP100way/hg38.100way.phyloP100way/chr" + chromosome - + ".phyloP100way.wigFix.gz"; + String phyloPUrl = url + "/phyloP470way/hg38.470way.phyloP/chr" + chromosome + + ".phyloP470way.wigFix.gz"; downloadFiles.add(downloadFile(phyloPUrl, conservationFolder.resolve("phylop") - .resolve("chr" + chromosome + ".phyloP100way.wigFix.gz").toString())); + .resolve("chr" + chromosome + ".phyloP470way.wigFix.gz").toString())); phyloPUrls.add(phyloPUrl); } String gerpUrl = configuration.getDownload().getGerp().getHost(); diff --git a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/download/OntologyDownloadManager.java b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/download/OntologyDownloadManager.java index 0776354e80..522be7b27d 100644 --- a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/download/OntologyDownloadManager.java +++ b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/download/OntologyDownloadManager.java @@ -36,7 +36,7 @@ public OntologyDownloadManager(String species, String assembly, Path targetDirec public List download() throws IOException, InterruptedException { - logger.info("Downloading obo files ..."); + logger.info("Downloading OBO files ..."); List downloadFiles = new ArrayList<>(); Path oboFolder = downloadFolder.resolve("ontology"); @@ -44,22 +44,24 @@ public List download() throws IOException, InterruptedException { String url = configuration.getDownload().getHpoObo().getHost(); downloadFiles.add(downloadFile(url, oboFolder.resolve("hp.obo").toString())); - saveVersionData(EtlCommons.OBO_DATA, "HPO", getTimeStamp(), getTimeStamp(), Collections.singletonList(url), buildFolder.resolve(EtlCommons.HPO_VERSION_FILE)); url = configuration.getDownload().getGoObo().getHost(); downloadFiles.add(downloadFile(url, oboFolder.resolve("go-basic.obo").toString())); - saveVersionData(EtlCommons.OBO_DATA, "GO", getTimeStamp(), getTimeStamp(), Collections.singletonList(url), buildFolder.resolve(EtlCommons.GO_VERSION_FILE)); url = configuration.getDownload().getDoidObo().getHost(); downloadFiles.add(downloadFile(url, oboFolder.resolve("doid.obo").toString())); - saveVersionData(EtlCommons.OBO_DATA, "DO", getTimeStamp(), getTimeStamp(), Collections.singletonList(url), buildFolder.resolve(EtlCommons.DO_VERSION_FILE)); + url = configuration.getDownload().getMondoObo().getHost(); + downloadFiles.add(downloadFile(url, oboFolder.resolve("mondo.obo").toString())); + saveVersionData(EtlCommons.OBO_DATA, "MONDO", getTimeStamp(), getTimeStamp(), + Collections.singletonList(url), buildFolder.resolve(EtlCommons.DO_VERSION_FILE)); + return downloadFiles; } } diff --git a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/download/ProteinDownloadManager.java b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/download/ProteinDownloadManager.java index 08f28cfdad..5a722ed448 100644 --- a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/download/ProteinDownloadManager.java +++ b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/download/ProteinDownloadManager.java @@ -22,7 +22,6 @@ import org.opencb.commons.utils.FileUtils; import java.io.BufferedReader; -import java.io.FileOutputStream; import java.io.IOException; import java.io.PrintWriter; import java.nio.file.Files; @@ -34,6 +33,8 @@ public class ProteinDownloadManager extends AbstractDownloadManager { private static final String UNIPROT_NAME = "UniProt"; + private static final String INTERPRO_NAME = "InterPro"; + private static final String INTACT_NAME = "IntAct"; public ProteinDownloadManager(String species, String assembly, Path targetDirectory, CellBaseConfiguration configuration) throws IOException, CellBaseException { @@ -56,6 +57,7 @@ public List download() throws IOException, InterruptedException { Files.createDirectories(proteinFolder); List downloadFiles = new ArrayList<>(); + // Uniprot String url = configuration.getDownload().getUniprot().getHost(); downloadFiles.add(downloadFile(url, proteinFolder.resolve("uniprot_sprot.xml.gz").toString())); Files.createDirectories(proteinFolder.resolve("uniprot_chunks")); @@ -63,23 +65,25 @@ public List download() throws IOException, InterruptedException { String relNotesUrl = configuration.getDownload().getUniprotRelNotes().getHost(); downloadFiles.add(downloadFile(relNotesUrl, proteinFolder.resolve("uniprotRelnotes.txt").toString())); - saveVersionData(EtlCommons.PROTEIN_DATA, UNIPROT_NAME, getLine(proteinFolder.resolve("uniprotRelnotes.txt"), 1), getTimeStamp(), Collections.singletonList(url), proteinFolder.resolve("uniprotVersion.json")); - return downloadFiles; + // Interpro + String interproUrl = configuration.getDownload().getInterpro().getHost(); + downloadFiles.add(downloadFile(interproUrl, proteinFolder.resolve("protein2ipr.dat.gz").toString())); + + relNotesUrl = configuration.getDownload().getInterproRelNotes().getHost(); + downloadFiles.add(downloadFile(relNotesUrl, proteinFolder.resolve("interproRelnotes.txt").toString())); + saveVersionData(EtlCommons.PROTEIN_DATA, INTERPRO_NAME, getLine(proteinFolder.resolve("interproRelnotes.txt"), 5), + getTimeStamp(), Collections.singletonList(interproUrl), proteinFolder.resolve("interproVersion.json")); -// url = configuration.getDownload().getIntact().getHost(); -// downloadFile(url, proteinFolder.resolve("intact.txt").toString()); -// saveVersionData(EtlCommons.PROTEIN_DATA, INTACT_NAME, null, getTimeStamp(), Collections.singletonList(url), -// proteinFolder.resolve("intactVersion.json")); -// -// url = configuration.getDownload().getInterpro().getHost(); -// downloadFile(url, proteinFolder.resolve("protein2ipr.dat.gz").toString()); -// relNotesUrl = configuration.getDownload().getInterproRelNotes().getHost(); -// downloadFile(relNotesUrl, proteinFolder.resolve("interproRelnotes.txt").toString()); -// saveVersionData(EtlCommons.PROTEIN_DATA, INTERPRO_NAME, getLine(proteinFolder.resolve("interproRelnotes.txt"), 5), -// getTimeStamp(), Collections.singletonList(url), proteinFolder.resolve("interproVersion.json")); + // Intact + String intactUrl = configuration.getDownload().getIntact().getHost(); + downloadFiles.add(downloadFile(intactUrl, proteinFolder.resolve("intact.txt").toString())); + saveVersionData(EtlCommons.PROTEIN_DATA, INTACT_NAME, configuration.getDownload().getIntact().getVersion(), + getTimeStamp(), Collections.singletonList(intactUrl), proteinFolder.resolve("intactVersion.json")); + + return downloadFiles; } private void splitUniprot(Path uniprotFilePath, Path splitOutdirPath) throws IOException { @@ -96,7 +100,7 @@ private void splitUniprot(Path uniprotFilePath, Path splitOutdirPath) throws IOE inEntry = true; beforeEntry = false; if (count % 10000 == 0) { - pw = new PrintWriter(new FileOutputStream(splitOutdirPath.resolve("chunk_" + chunk + ".xml").toFile())); + pw = new PrintWriter(Files.newOutputStream(splitOutdirPath.resolve("chunk_" + chunk + ".xml").toFile().toPath())); pw.println(header.toString().trim()); } count++; diff --git a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/download/RegulationDownloadManager.java b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/download/RegulationDownloadManager.java index 1abb352fbe..51152e478d 100644 --- a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/download/RegulationDownloadManager.java +++ b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/download/RegulationDownloadManager.java @@ -64,8 +64,8 @@ public List download() throws IOException, InterruptedException, N List downloadFiles = new ArrayList<>(); downloadFiles.addAll(downloadRegulatoryaAndMotifFeatures()); - downloadFiles.add(downloadMirna()); downloadFiles.add(downloadMiRTarBase()); + downloadFiles.add(downloadMirna()); return downloadFiles; } diff --git a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/impl/core/MetaMongoDBAdaptor.java b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/impl/core/MetaMongoDBAdaptor.java index e5cd4d38cc..5d7dbc65d0 100644 --- a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/impl/core/MetaMongoDBAdaptor.java +++ b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/impl/core/MetaMongoDBAdaptor.java @@ -16,6 +16,8 @@ package org.opencb.cellbase.lib.impl.core; + +import com.fasterxml.jackson.databind.ObjectMapper; import com.mongodb.ReadPreference; import com.mongodb.WriteConcern; import com.mongodb.client.model.Filters; @@ -23,7 +25,6 @@ import org.bson.BsonDocument; import org.bson.Document; import org.bson.conversions.Bson; -import org.codehaus.jackson.map.ObjectMapper; import org.opencb.cellbase.core.api.key.ApiKeyStats; import org.opencb.cellbase.core.api.query.AbstractQuery; import org.opencb.cellbase.core.api.query.ProjectionQueryOptions; diff --git a/cellbase-server/pom.xml b/cellbase-server/pom.xml index 691fc60712..71cdea12ee 100644 --- a/cellbase-server/pom.xml +++ b/cellbase-server/pom.xml @@ -6,7 +6,7 @@ org.opencb.cellbase cellbase - 5.8.2 + 6.0.0-SNAPSHOT ../pom.xml diff --git a/pom.xml b/pom.xml index b79658f109..bc06bbd099 100644 --- a/pom.xml +++ b/pom.xml @@ -6,7 +6,7 @@ org.opencb.cellbase cellbase - 5.8.2 + 6.0.0-SNAPSHOT pom CellBase project @@ -23,18 +23,20 @@ ${project.version} - 4.12.0 - 2.12.1 + 5.0.0-SNAPSHOT + 3.0.0-SNAPSHOT 0.1.0 - 2.11.4 - 1.9.13 + 9.4.51.v20230217 + + 2.14.3 + 3.14.0 + 1.7.36 + 2.30.1 - 1.7.32 2.17.2 1.5.2 5.5.2 0.8.8 - 9.4.17.v20190418 0.11.5 1.6.5 3.1.0 @@ -50,7 +52,6 @@ 1.48.0 2.4 2.4 - 3.12.0 2.1.6 4.4 1.69 @@ -412,11 +413,11 @@ swagger-annotations ${swagger-annotations.version} - + io.jsonwebtoken jjwt-jackson From e8999521869c8cbcbdc745447d0cc3d783b3100d Mon Sep 17 00:00:00 2001 From: JuanfeSanahuja Date: Thu, 7 Mar 2024 18:15:08 +0100 Subject: [PATCH 39/51] Prepare release 6.0.0 --- cellbase-app/pom.xml | 2 +- cellbase-client/pom.xml | 2 +- cellbase-core/pom.xml | 2 +- cellbase-lib/pom.xml | 2 +- cellbase-server/pom.xml | 2 +- pom.xml | 6 +++--- 6 files changed, 8 insertions(+), 8 deletions(-) diff --git a/cellbase-app/pom.xml b/cellbase-app/pom.xml index adf59c092a..d7b8c8a47e 100644 --- a/cellbase-app/pom.xml +++ b/cellbase-app/pom.xml @@ -6,7 +6,7 @@ org.opencb.cellbase cellbase - 6.0.0-SNAPSHOT + 6.0.0 ../pom.xml diff --git a/cellbase-client/pom.xml b/cellbase-client/pom.xml index 977b4b81cd..566e1b40e8 100644 --- a/cellbase-client/pom.xml +++ b/cellbase-client/pom.xml @@ -6,7 +6,7 @@ org.opencb.cellbase cellbase - 6.0.0-SNAPSHOT + 6.0.0 ../pom.xml diff --git a/cellbase-core/pom.xml b/cellbase-core/pom.xml index 8fcd89455f..b628de3b21 100644 --- a/cellbase-core/pom.xml +++ b/cellbase-core/pom.xml @@ -6,7 +6,7 @@ org.opencb.cellbase cellbase - 6.0.0-SNAPSHOT + 6.0.0 ../pom.xml diff --git a/cellbase-lib/pom.xml b/cellbase-lib/pom.xml index 59fb65dfda..dd087fb692 100644 --- a/cellbase-lib/pom.xml +++ b/cellbase-lib/pom.xml @@ -6,7 +6,7 @@ org.opencb.cellbase cellbase - 6.0.0-SNAPSHOT + 6.0.0 ../pom.xml diff --git a/cellbase-server/pom.xml b/cellbase-server/pom.xml index 71cdea12ee..d5e995998f 100644 --- a/cellbase-server/pom.xml +++ b/cellbase-server/pom.xml @@ -6,7 +6,7 @@ org.opencb.cellbase cellbase - 6.0.0-SNAPSHOT + 6.0.0 ../pom.xml diff --git a/pom.xml b/pom.xml index bc06bbd099..ebe4aeb5f5 100644 --- a/pom.xml +++ b/pom.xml @@ -6,7 +6,7 @@ org.opencb.cellbase cellbase - 6.0.0-SNAPSHOT + 6.0.0 pom CellBase project @@ -23,8 +23,8 @@ ${project.version} - 5.0.0-SNAPSHOT - 3.0.0-SNAPSHOT + 5.0.0 + 3.0.0 0.1.0 9.4.51.v20230217 From 596816d9283070bee5d626cb96f5877207083657 Mon Sep 17 00:00:00 2001 From: JuanfeSanahuja Date: Thu, 7 Mar 2024 18:15:48 +0100 Subject: [PATCH 40/51] Prepare new development branch release-6.0.x --- cellbase-app/pom.xml | 2 +- cellbase-client/pom.xml | 2 +- cellbase-core/pom.xml | 2 +- cellbase-lib/pom.xml | 2 +- cellbase-server/pom.xml | 2 +- pom.xml | 6 +++--- 6 files changed, 8 insertions(+), 8 deletions(-) diff --git a/cellbase-app/pom.xml b/cellbase-app/pom.xml index d7b8c8a47e..999f545d10 100644 --- a/cellbase-app/pom.xml +++ b/cellbase-app/pom.xml @@ -6,7 +6,7 @@ org.opencb.cellbase cellbase - 6.0.0 + 6.1.0-SNAPSHOT ../pom.xml diff --git a/cellbase-client/pom.xml b/cellbase-client/pom.xml index 566e1b40e8..fdc2ece46b 100644 --- a/cellbase-client/pom.xml +++ b/cellbase-client/pom.xml @@ -6,7 +6,7 @@ org.opencb.cellbase cellbase - 6.0.0 + 6.1.0-SNAPSHOT ../pom.xml diff --git a/cellbase-core/pom.xml b/cellbase-core/pom.xml index b628de3b21..d711a053f8 100644 --- a/cellbase-core/pom.xml +++ b/cellbase-core/pom.xml @@ -6,7 +6,7 @@ org.opencb.cellbase cellbase - 6.0.0 + 6.1.0-SNAPSHOT ../pom.xml diff --git a/cellbase-lib/pom.xml b/cellbase-lib/pom.xml index dd087fb692..71964bb36e 100644 --- a/cellbase-lib/pom.xml +++ b/cellbase-lib/pom.xml @@ -6,7 +6,7 @@ org.opencb.cellbase cellbase - 6.0.0 + 6.1.0-SNAPSHOT ../pom.xml diff --git a/cellbase-server/pom.xml b/cellbase-server/pom.xml index d5e995998f..3b3e1b743c 100644 --- a/cellbase-server/pom.xml +++ b/cellbase-server/pom.xml @@ -6,7 +6,7 @@ org.opencb.cellbase cellbase - 6.0.0 + 6.1.0-SNAPSHOT ../pom.xml diff --git a/pom.xml b/pom.xml index ebe4aeb5f5..95ffb702c6 100644 --- a/pom.xml +++ b/pom.xml @@ -6,7 +6,7 @@ org.opencb.cellbase cellbase - 6.0.0 + 6.1.0-SNAPSHOT pom CellBase project @@ -23,8 +23,8 @@ ${project.version} - 5.0.0 - 3.0.0 + 5.1.0-SNAPSHOT + 3.1.0-SNAPSHOT 0.1.0 9.4.51.v20230217 From 0ce422e8297e49aba8f893632b60ad8de25babfb Mon Sep 17 00:00:00 2001 From: JuanfeSanahuja Date: Fri, 17 May 2024 13:52:41 +0200 Subject: [PATCH 41/51] cicd: create temporal manual cicd Junit test #TASK-6252 --- .github/workflows/manual-test.yml | 44 +++++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) create mode 100644 .github/workflows/manual-test.yml diff --git a/.github/workflows/manual-test.yml b/.github/workflows/manual-test.yml new file mode 100644 index 0000000000..e57ff55328 --- /dev/null +++ b/.github/workflows/manual-test.yml @@ -0,0 +1,44 @@ +name: Manual Junit test the project + +on: + workflow_dispatch: + inputs: + branch: + description: 'Opencga branch to run the tests' + default: 'develop' + required: true + +jobs: + test: + name: Test and push Sonar analysis + runs-on: ubuntu-22.04 + steps: + - uses: actions/checkout@v3 + with: + ref: ${{ github.event.inputs.branch }} + fetch-depth: '0' + - name: Set up JDK 11 + uses: actions/setup-java@v3 + with: + distribution: 'temurin' + java-version: '11' + cache: 'maven' + - name: K8s Tunnel MongoDB + run: | + wget https://dl.k8s.io/release/v1.28.2/bin/linux/amd64/kubectl + chmod +x ./kubectl + echo "${{ secrets.AZURE_KUBE_CONFIG }}" > admin.conf + ./kubectl -n cellbase-db port-forward services/cellbase-rs0-svc 27017:27017 --kubeconfig ./admin.conf & + - name: Install dependencies branches + run: | + if [ -f "./.github/workflows/scripts/get_same_branch.sh" ]; then + chmod +x ./.github/workflows/scripts/get_same_branch.sh + ./.github/workflows/scripts/get_same_branch.sh ${{ github.ref_name }} + else + echo "./.github/workflows/scripts/get_same_branch.sh does not exist." + fi + - name: Test and Analyze + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} # Needed to get PR information, if any + SONAR_TOKEN: ${{ secrets.SONAR_TOKEN }} + run: mvn -B verify surefire-report:report --fail-never From e669e5f82cad7fdd7a1126a93f533cd9f854da1d Mon Sep 17 00:00:00 2001 From: JuanfeSanahuja Date: Fri, 17 May 2024 13:55:16 +0200 Subject: [PATCH 42/51] cicd: change name to Junit test #TASK-6252 --- .github/workflows/manual-test.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/manual-test.yml b/.github/workflows/manual-test.yml index e57ff55328..dd6fcad792 100644 --- a/.github/workflows/manual-test.yml +++ b/.github/workflows/manual-test.yml @@ -10,7 +10,7 @@ on: jobs: test: - name: Test and push Sonar analysis + name: JUnit Test runs-on: ubuntu-22.04 steps: - uses: actions/checkout@v3 From 572963bdf42c17a1a8be8bf7e8f5bca8dede4e49 Mon Sep 17 00:00:00 2001 From: JuanfeSanahuja Date: Fri, 17 May 2024 14:08:18 +0200 Subject: [PATCH 43/51] cicd: Added fail never optionally #TASK-6252 --- .github/workflows/manual-test.yml | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/.github/workflows/manual-test.yml b/.github/workflows/manual-test.yml index dd6fcad792..26ace545fa 100644 --- a/.github/workflows/manual-test.yml +++ b/.github/workflows/manual-test.yml @@ -7,6 +7,11 @@ on: description: 'Opencga branch to run the tests' default: 'develop' required: true + fail-never: + type: boolean + description: 'The process executes all tests even if some fail.' + default: false + required: false jobs: test: @@ -41,4 +46,8 @@ jobs: env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} # Needed to get PR information, if any SONAR_TOKEN: ${{ secrets.SONAR_TOKEN }} - run: mvn -B verify surefire-report:report --fail-never + run: | + if [ "${{ github.event.inputs.fail-never }}" == "true" ]; then + FAIL_NEVER="--fail-never" + fi + mvn install surefire-report:report ${FAIL_NEVER} -Dcheckstyle.skip \ No newline at end of file From a19e7bcea8eec80a198b59667c19e63b7170f363 Mon Sep 17 00:00:00 2001 From: JuanfeSanahuja Date: Fri, 17 May 2024 14:12:03 +0200 Subject: [PATCH 44/51] cicd: Added fail never optionally #TASK-6252 --- .github/workflows/manual-test.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/manual-test.yml b/.github/workflows/manual-test.yml index 26ace545fa..b77df41654 100644 --- a/.github/workflows/manual-test.yml +++ b/.github/workflows/manual-test.yml @@ -47,6 +47,7 @@ jobs: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} # Needed to get PR information, if any SONAR_TOKEN: ${{ secrets.SONAR_TOKEN }} run: | + FAIL_NEVER="" if [ "${{ github.event.inputs.fail-never }}" == "true" ]; then FAIL_NEVER="--fail-never" fi From b4c171c1c20a494df0671a2f52924b89d3daf3a7 Mon Sep 17 00:00:00 2001 From: JuanfeSanahuja Date: Tue, 21 May 2024 11:19:55 +0200 Subject: [PATCH 45/51] Prepare release 6.1.0 --- cellbase-app/pom.xml | 2 +- cellbase-client/pom.xml | 2 +- cellbase-core/pom.xml | 2 +- cellbase-lib/pom.xml | 2 +- cellbase-server/pom.xml | 2 +- pom.xml | 6 +++--- 6 files changed, 8 insertions(+), 8 deletions(-) diff --git a/cellbase-app/pom.xml b/cellbase-app/pom.xml index 999f545d10..98f2b62ac1 100644 --- a/cellbase-app/pom.xml +++ b/cellbase-app/pom.xml @@ -6,7 +6,7 @@ org.opencb.cellbase cellbase - 6.1.0-SNAPSHOT + 6.1.0 ../pom.xml diff --git a/cellbase-client/pom.xml b/cellbase-client/pom.xml index fdc2ece46b..5309364ef8 100644 --- a/cellbase-client/pom.xml +++ b/cellbase-client/pom.xml @@ -6,7 +6,7 @@ org.opencb.cellbase cellbase - 6.1.0-SNAPSHOT + 6.1.0 ../pom.xml diff --git a/cellbase-core/pom.xml b/cellbase-core/pom.xml index d711a053f8..df3600043d 100644 --- a/cellbase-core/pom.xml +++ b/cellbase-core/pom.xml @@ -6,7 +6,7 @@ org.opencb.cellbase cellbase - 6.1.0-SNAPSHOT + 6.1.0 ../pom.xml diff --git a/cellbase-lib/pom.xml b/cellbase-lib/pom.xml index 71964bb36e..e847899db9 100644 --- a/cellbase-lib/pom.xml +++ b/cellbase-lib/pom.xml @@ -6,7 +6,7 @@ org.opencb.cellbase cellbase - 6.1.0-SNAPSHOT + 6.1.0 ../pom.xml diff --git a/cellbase-server/pom.xml b/cellbase-server/pom.xml index 3b3e1b743c..c2cfdd997d 100644 --- a/cellbase-server/pom.xml +++ b/cellbase-server/pom.xml @@ -6,7 +6,7 @@ org.opencb.cellbase cellbase - 6.1.0-SNAPSHOT + 6.1.0 ../pom.xml diff --git a/pom.xml b/pom.xml index 95ffb702c6..e40c5e5cde 100644 --- a/pom.xml +++ b/pom.xml @@ -6,7 +6,7 @@ org.opencb.cellbase cellbase - 6.1.0-SNAPSHOT + 6.1.0 pom CellBase project @@ -23,8 +23,8 @@ ${project.version} - 5.1.0-SNAPSHOT - 3.1.0-SNAPSHOT + 5.1.0 + 3.1.0 0.1.0 9.4.51.v20230217 From ae0a55a00d0a36753857b392c5f54082a5cfd34a Mon Sep 17 00:00:00 2001 From: JuanfeSanahuja Date: Tue, 21 May 2024 11:20:32 +0200 Subject: [PATCH 46/51] Prepare new development branch release-6.1.x --- cellbase-app/pom.xml | 2 +- cellbase-client/pom.xml | 2 +- cellbase-core/pom.xml | 2 +- cellbase-lib/pom.xml | 2 +- cellbase-server/pom.xml | 2 +- pom.xml | 6 +++--- 6 files changed, 8 insertions(+), 8 deletions(-) diff --git a/cellbase-app/pom.xml b/cellbase-app/pom.xml index 98f2b62ac1..436410b49c 100644 --- a/cellbase-app/pom.xml +++ b/cellbase-app/pom.xml @@ -6,7 +6,7 @@ org.opencb.cellbase cellbase - 6.1.0 + 6.2.0-SNAPSHOT ../pom.xml diff --git a/cellbase-client/pom.xml b/cellbase-client/pom.xml index 5309364ef8..cc7940bfc7 100644 --- a/cellbase-client/pom.xml +++ b/cellbase-client/pom.xml @@ -6,7 +6,7 @@ org.opencb.cellbase cellbase - 6.1.0 + 6.2.0-SNAPSHOT ../pom.xml diff --git a/cellbase-core/pom.xml b/cellbase-core/pom.xml index df3600043d..612fa69673 100644 --- a/cellbase-core/pom.xml +++ b/cellbase-core/pom.xml @@ -6,7 +6,7 @@ org.opencb.cellbase cellbase - 6.1.0 + 6.2.0-SNAPSHOT ../pom.xml diff --git a/cellbase-lib/pom.xml b/cellbase-lib/pom.xml index e847899db9..64c5a27a22 100644 --- a/cellbase-lib/pom.xml +++ b/cellbase-lib/pom.xml @@ -6,7 +6,7 @@ org.opencb.cellbase cellbase - 6.1.0 + 6.2.0-SNAPSHOT ../pom.xml diff --git a/cellbase-server/pom.xml b/cellbase-server/pom.xml index c2cfdd997d..0ab3b2bd09 100644 --- a/cellbase-server/pom.xml +++ b/cellbase-server/pom.xml @@ -6,7 +6,7 @@ org.opencb.cellbase cellbase - 6.1.0 + 6.2.0-SNAPSHOT ../pom.xml diff --git a/pom.xml b/pom.xml index e40c5e5cde..0f729c9d0d 100644 --- a/pom.xml +++ b/pom.xml @@ -6,7 +6,7 @@ org.opencb.cellbase cellbase - 6.1.0 + 6.2.0-SNAPSHOT pom CellBase project @@ -23,8 +23,8 @@ ${project.version} - 5.1.0 - 3.1.0 + 5.2.0-SNAPSHOT + 3.2.0-SNAPSHOT 0.1.0 9.4.51.v20230217 From 71cfd27015a7c044470db9543ba22dc9a0909801 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Joaqu=C3=ADn=20T=C3=A1rraga=20Gim=C3=A9nez?= Date: Wed, 29 May 2024 09:55:21 +0200 Subject: [PATCH 47/51] test: modify the file configuration.test.yaml used by the JUnit tets to update it with CellBase MongoDB configuration specified from the command line On branch TASK-6026 Changes to be committed: modified: cellbase-lib/pom.xml modified: cellbase-lib/src/test/resources/configuration.test.yaml modified: pom.xml --- cellbase-lib/pom.xml | 18 +++++++++++++ .../test/resources/configuration.test.yaml | 26 +++++++++---------- pom.xml | 9 +++++++ 3 files changed, 40 insertions(+), 13 deletions(-) diff --git a/cellbase-lib/pom.xml b/cellbase-lib/pom.xml index 64c5a27a22..bfb68c86c0 100644 --- a/cellbase-lib/pom.xml +++ b/cellbase-lib/pom.xml @@ -187,4 +187,22 @@ + + + + + src/test/resources + + configuration.test.yaml + + + + src/test/resources + + configuration.test.yaml + + true + + + diff --git a/cellbase-lib/src/test/resources/configuration.test.yaml b/cellbase-lib/src/test/resources/configuration.test.yaml index 1322d2fa52..e1750e7b4b 100644 --- a/cellbase-lib/src/test/resources/configuration.test.yaml +++ b/cellbase-lib/src/test/resources/configuration.test.yaml @@ -7,24 +7,24 @@ maintainerContact: joaquin.tarraga@zettagenomics.com secretKey: "xPacig89igHSieEnveJEi4KCfdEslhmssC3vui1JJQGgDQ0y8v" databases: mongodb: - host: localhost:27017 - user: 'cellbase' - password: 'cellbase' + host: "${JUNIT.CELLBASE.DB.MONGODB.HOST}" + user: "${JUNIT.CELLBASE.DB.USER}" + password: "${JUNIT.CELLBASE.DB.PASSWORD}" options: - authenticationDatabase: 'admin' - authenticationMechanism: 'SCRAM-SHA-256' + authenticationDatabase: "${JUNIT.CELLBASE.DB.MONGODB.AUTHENTICATIONDATABASE}" + authenticationMechanism: "${JUNIT.CELLBASE.DB.MONGODB.AUTHENTICATION_MECHANISM}" readPreference: '' replicaSet: '' connectionsPerHost: 20 neo4j: hsapiens: - host: "${CELLBASE.DB.NEO4J.HOST}" - user: "${CELLBASE.DB.USER}" - password: "${CELLBASE.DB.PASSWORD}" + host: "${JUNIT.CELLBASE.DB.NEO4J.HOST}" + user: "${JUNIT.CELLBASE.DB.USER}" + password: "${JUNIT.CELLBASE.DB.PASSWORD}" mmusculus: - host: "${CELLBASE.DB.NEO4J.HOST}" - user: "${CELLBASE.DB.USER}" - password: "${CELLBASE.DB.PASSWORD}" + host: "${JUNIT.CELLBASE.DB.NEO4J.HOST}" + user: "${JUNIT.CELLBASE.DB.USER}" + password: "${JUNIT.CELLBASE.DB.PASSWORD}" defaultOutdir: "/tmp" download: ensembl: @@ -32,7 +32,7 @@ download: host: ensembldb.ensembl.org:3306 user: anonymous password: '' - libs: "${CELLBASE.ENSEMBL.LIBS}" + libs: "${JUNIT.CELLBASE.ENSEMBL.LIBS}" url: host: ftp://ftp.ensembl.org/pub ensemblGenomes: @@ -40,7 +40,7 @@ download: host: mysql-eg-publicsql.ebi.ac.uk:4157 user: anonymous password: '' - libs: "${CELLBASE.ENSEMBL.LIBS}" + libs: "${JUNIT.CELLBASE.ENSEMBL.LIBS}" url: host: ftp://ftp.ensemblgenomes.org/pub geneUniprotXref: diff --git a/pom.xml b/pom.xml index 0f729c9d0d..61e1f1d7fa 100644 --- a/pom.xml +++ b/pom.xml @@ -615,6 +615,15 @@ secondaryPreferred 9090 + + + localhost:27017 + cellbase + cellbase + admin + SCRAM-SHA-256 From 47f104d6d2674e7b3256f275bb5fb3dbf3213d95 Mon Sep 17 00:00:00 2001 From: JuanfeSanahuja Date: Thu, 30 May 2024 17:59:01 +0200 Subject: [PATCH 48/51] cicd: Update action version to test for compatibility with test and release process #TASK-6264 --- .github/workflows/manual-test.yml | 4 ++-- .github/workflows/test-analysis.yml | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/manual-test.yml b/.github/workflows/manual-test.yml index b77df41654..9c0ccc7085 100644 --- a/.github/workflows/manual-test.yml +++ b/.github/workflows/manual-test.yml @@ -18,12 +18,12 @@ jobs: name: JUnit Test runs-on: ubuntu-22.04 steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 with: ref: ${{ github.event.inputs.branch }} fetch-depth: '0' - name: Set up JDK 11 - uses: actions/setup-java@v3 + uses: actions/setup-java@v4 with: distribution: 'temurin' java-version: '11' diff --git a/.github/workflows/test-analysis.yml b/.github/workflows/test-analysis.yml index 413f274921..7725528041 100644 --- a/.github/workflows/test-analysis.yml +++ b/.github/workflows/test-analysis.yml @@ -11,11 +11,11 @@ jobs: name: Test and push Sonar analysis runs-on: ubuntu-22.04 steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 with: fetch-depth: '0' - name: Set up JDK 11 - uses: actions/setup-java@v3 + uses: actions/setup-java@v4 with: distribution: 'temurin' java-version: '11' From d0ad892c5a8818f5bc6e1358030a1532063af79b Mon Sep 17 00:00:00 2001 From: JuanfeSanahuja Date: Fri, 14 Jun 2024 09:33:32 +0200 Subject: [PATCH 49/51] cicd: Pull request approve workflow #TASK-4970 --- .github/workflows/pull-request-approved.yml | 15 +++++++++++++++ 1 file changed, 15 insertions(+) create mode 100644 .github/workflows/pull-request-approved.yml diff --git a/.github/workflows/pull-request-approved.yml b/.github/workflows/pull-request-approved.yml new file mode 100644 index 0000000000..eb410c9cba --- /dev/null +++ b/.github/workflows/pull-request-approved.yml @@ -0,0 +1,15 @@ +name: Pull request approve workflow + +on: + pull_request_review: + types: [ submitted ] + +jobs: + build: + uses: opencb/java-common-libs/.github/workflows/build-java-app-workflow.yml@develop + + test: + name: "Test analysis" + uses: ./.github/workflows/test-analysis.yml + needs: build + secrets: inherit From 8b754fc4ad4ddcb05af79297b3519ea3d9380428 Mon Sep 17 00:00:00 2001 From: JuanfeSanahuja Date: Wed, 17 Jul 2024 10:54:46 +0200 Subject: [PATCH 50/51] Prepare release 6.2.0 --- cellbase-app/pom.xml | 2 +- cellbase-client/pom.xml | 2 +- cellbase-core/pom.xml | 2 +- cellbase-lib/pom.xml | 2 +- cellbase-server/pom.xml | 2 +- pom.xml | 6 +++--- 6 files changed, 8 insertions(+), 8 deletions(-) diff --git a/cellbase-app/pom.xml b/cellbase-app/pom.xml index 436410b49c..cfd8a7de30 100644 --- a/cellbase-app/pom.xml +++ b/cellbase-app/pom.xml @@ -6,7 +6,7 @@ org.opencb.cellbase cellbase - 6.2.0-SNAPSHOT + 6.2.0 ../pom.xml diff --git a/cellbase-client/pom.xml b/cellbase-client/pom.xml index cc7940bfc7..47a5c7ca9a 100644 --- a/cellbase-client/pom.xml +++ b/cellbase-client/pom.xml @@ -6,7 +6,7 @@ org.opencb.cellbase cellbase - 6.2.0-SNAPSHOT + 6.2.0 ../pom.xml diff --git a/cellbase-core/pom.xml b/cellbase-core/pom.xml index 612fa69673..5eca656e8d 100644 --- a/cellbase-core/pom.xml +++ b/cellbase-core/pom.xml @@ -6,7 +6,7 @@ org.opencb.cellbase cellbase - 6.2.0-SNAPSHOT + 6.2.0 ../pom.xml diff --git a/cellbase-lib/pom.xml b/cellbase-lib/pom.xml index bfb68c86c0..c9bb925ee6 100644 --- a/cellbase-lib/pom.xml +++ b/cellbase-lib/pom.xml @@ -6,7 +6,7 @@ org.opencb.cellbase cellbase - 6.2.0-SNAPSHOT + 6.2.0 ../pom.xml diff --git a/cellbase-server/pom.xml b/cellbase-server/pom.xml index 0ab3b2bd09..aaa2015e0b 100644 --- a/cellbase-server/pom.xml +++ b/cellbase-server/pom.xml @@ -6,7 +6,7 @@ org.opencb.cellbase cellbase - 6.2.0-SNAPSHOT + 6.2.0 ../pom.xml diff --git a/pom.xml b/pom.xml index 61e1f1d7fa..be0613038d 100644 --- a/pom.xml +++ b/pom.xml @@ -6,7 +6,7 @@ org.opencb.cellbase cellbase - 6.2.0-SNAPSHOT + 6.2.0 pom CellBase project @@ -23,8 +23,8 @@ ${project.version} - 5.2.0-SNAPSHOT - 3.2.0-SNAPSHOT + 5.2.0 + 3.2.0 0.1.0 9.4.51.v20230217 From 25b9dd33ff527eb914b04307baade8456e4bae6b Mon Sep 17 00:00:00 2001 From: JuanfeSanahuja Date: Wed, 17 Jul 2024 10:55:25 +0200 Subject: [PATCH 51/51] Prepare new development branch release-6.2.x --- cellbase-app/pom.xml | 2 +- cellbase-client/pom.xml | 2 +- cellbase-core/pom.xml | 2 +- cellbase-lib/pom.xml | 2 +- cellbase-server/pom.xml | 2 +- pom.xml | 6 +++--- 6 files changed, 8 insertions(+), 8 deletions(-) diff --git a/cellbase-app/pom.xml b/cellbase-app/pom.xml index cfd8a7de30..aed90e9897 100644 --- a/cellbase-app/pom.xml +++ b/cellbase-app/pom.xml @@ -6,7 +6,7 @@ org.opencb.cellbase cellbase - 6.2.0 + 6.3.0-SNAPSHOT ../pom.xml diff --git a/cellbase-client/pom.xml b/cellbase-client/pom.xml index 47a5c7ca9a..7424c21bbb 100644 --- a/cellbase-client/pom.xml +++ b/cellbase-client/pom.xml @@ -6,7 +6,7 @@ org.opencb.cellbase cellbase - 6.2.0 + 6.3.0-SNAPSHOT ../pom.xml diff --git a/cellbase-core/pom.xml b/cellbase-core/pom.xml index 5eca656e8d..7c74e13d92 100644 --- a/cellbase-core/pom.xml +++ b/cellbase-core/pom.xml @@ -6,7 +6,7 @@ org.opencb.cellbase cellbase - 6.2.0 + 6.3.0-SNAPSHOT ../pom.xml diff --git a/cellbase-lib/pom.xml b/cellbase-lib/pom.xml index c9bb925ee6..9b4dd7fbe0 100644 --- a/cellbase-lib/pom.xml +++ b/cellbase-lib/pom.xml @@ -6,7 +6,7 @@ org.opencb.cellbase cellbase - 6.2.0 + 6.3.0-SNAPSHOT ../pom.xml diff --git a/cellbase-server/pom.xml b/cellbase-server/pom.xml index aaa2015e0b..fe4509c6fc 100644 --- a/cellbase-server/pom.xml +++ b/cellbase-server/pom.xml @@ -6,7 +6,7 @@ org.opencb.cellbase cellbase - 6.2.0 + 6.3.0-SNAPSHOT ../pom.xml diff --git a/pom.xml b/pom.xml index be0613038d..302ae6c6fc 100644 --- a/pom.xml +++ b/pom.xml @@ -6,7 +6,7 @@ org.opencb.cellbase cellbase - 6.2.0 + 6.3.0-SNAPSHOT pom CellBase project @@ -23,8 +23,8 @@ ${project.version} - 5.2.0 - 3.2.0 + 5.3.0-SNAPSHOT + 3.3.0-SNAPSHOT 0.1.0 9.4.51.v20230217