Skip to content

Commit

Permalink
lib: update configuration file to download last versions of ClinVar a…
Browse files Browse the repository at this point in the history
…nd Gwas, files to download; and update Cosmic version, #TASK-6347

On branch TASK-6347
Changes to be committed:
	modified:   cellbase-core/src/main/resources/configuration.yml
	modified:   cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/clinical/variant/CosmicIndexer.java
	modified:   cellbase-lib/src/main/java/org/opencb/cellbase/lib/download/ClinicalDownloadManager.java
  • Loading branch information
jtarraga committed Jun 4, 2024
1 parent 9ab2fe6 commit 499407d
Show file tree
Hide file tree
Showing 3 changed files with 12 additions and 11 deletions.
8 changes: 6 additions & 2 deletions cellbase-core/src/main/resources/configuration.yml
Original file line number Diff line number Diff line change
Expand Up @@ -106,11 +106,15 @@ download:
clinvar:
# host: https://ftp.ncbi.nlm.nih.gov/pub/clinvar/xml/ClinVarFullRelease_2021-07.xml.gz
# host: https://ftp.ncbi.nlm.nih.gov/pub/clinvar/xml/ClinVarFullRelease_2022-02.xml.gz
host: https://ftp.ncbi.nlm.nih.gov/pub/clinvar/xml/ClinVarFullRelease_2022-11.xml.gz
# host: https://ftp.ncbi.nlm.nih.gov/pub/clinvar/xml/ClinVarFullRelease_2022-11.xml.gz
host: https://ftp.ncbi.nlm.nih.gov/pub/clinvar/xml/RCV_xml_old_format/ClinVarFullRelease_2024-05.xml.gz
version: 2024-05
clinvarVariation:
# host: https://ftp.ncbi.nlm.nih.gov/pub/clinvar/xml/clinvar_variation/ClinVarVariationRelease_2021-07.xml.gz
# host: https://ftp.ncbi.nlm.nih.gov/pub/clinvar/xml/clinvar_variation/ClinVarVariationRelease_2022-02.xml.gz
host: https://ftp.ncbi.nlm.nih.gov/pub/clinvar/xml/clinvar_variation/ClinVarVariationRelease_2022-11.xml.gz
# host: https://ftp.ncbi.nlm.nih.gov/pub/clinvar/xml/clinvar_variation/ClinVarVariationRelease_2022-11.xml.gz
host: https://ftp.ncbi.nlm.nih.gov/pub/clinvar/xml/VCV_xml_old_format/ClinVarVariationRelease_2024-05.xml.gz
version: 2024-05
clinvarSummary:
host: http://ftp.ncbi.nlm.nih.gov/pub/clinvar/tab_delimited/variant_summary.txt.gz
clinvarVariationAllele:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ public class CosmicIndexer extends ClinicalIndexer {
private Pattern mutationGRCh37GenomePositionPattern;
private Pattern snvPattern;

private static final String COSMIC_VERSION = "v95";
private static final String COSMIC_VERSION = "v99";

private static final int GENE_NAMES_COLUMN = 0;
private static final int HGNC_COLUMN = 3;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -81,8 +81,10 @@ public List<DownloadFile> downloadClinical() throws IOException, InterruptedExce
url = configuration.getDownload().getClinvarVariationAllele().getHost();
downloadFiles.add(downloadFile(url, clinicalFolder.resolve(EtlCommons.CLINVAR_VARIATION_ALLELE_FILE).toString()));
clinvarUrls.add(url);
saveVersionData(EtlCommons.CLINICAL_VARIANTS_DATA, CLINVAR_NAME, getClinVarVersion(), getTimeStamp(), clinvarUrls,
clinicalFolder.resolve("clinvarVersion.json"));
saveVersionData(EtlCommons.CLINICAL_VARIANTS_DATA, CLINVAR_NAME, configuration.getDownload().getClinvar()
.getVersion(), getTimeStamp(), clinvarUrls, clinicalFolder.resolve("clinvarVersion.json"));

logger.info("\t\tDone");

// Gwas catalog
logger.info("\t\tDownloading GWAS catalog file ...");
Expand All @@ -91,6 +93,7 @@ public List<DownloadFile> downloadClinical() throws IOException, InterruptedExce
downloadFiles.add(downloadFile(url, clinicalFolder.resolve(EtlCommons.GWAS_FILE).toString()));
saveVersionData(EtlCommons.CLINICAL_VARIANTS_DATA, GWAS_NAME, gwasCatalog.getVersion(), getTimeStamp(),
Collections.singletonList(url), clinicalFolder.resolve("gwasVersion.json"));
logger.info("\t\tDone");

// List<String> hgvsList = getDocmHgvsList();
// if (!hgvsList.isEmpty()) {
Expand Down Expand Up @@ -236,10 +239,4 @@ private List<String> getDocmHgvsList() throws IOException {

return hgvsList;
}

private String getClinVarVersion() {
// ftp://ftp.ncbi.nlm.nih.gov/pub/clinvar/xml/ClinVarFullRelease_2015-12.xml.gz
return configuration.getDownload().getClinvar().getHost().split("_")[1].split("\\.")[0];
}

}

0 comments on commit 499407d

Please sign in to comment.