Skip to content

Commit

Permalink
Rename get file name method
Browse files Browse the repository at this point in the history
  • Loading branch information
imedina committed Apr 11, 2024
1 parent a3e9684 commit c7ad55d
Show file tree
Hide file tree
Showing 5 changed files with 21 additions and 22 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -138,11 +138,10 @@ protected boolean speciesHasInfoToDownload(SpeciesConfiguration sp, String info)
}

protected DownloadFile downloadAndSaveDataSource(DownloadProperties.URLProperties props, String name, String category, String fileId,
String versionFilename, Path outPath)
throws IOException, InterruptedException {
String versionFilename, Path outPath) throws IOException, InterruptedException {
logger.info("Downloading {} ({}) file ...", name, category);
String url = props.getHost() + props.getFiles().get(fileId);
File outFile = outPath.resolve(getUrlFilename(url)).toFile();
File outFile = outPath.resolve(getFilenameFromUrl(url)).toFile();
logger.info(DOWNLOADING_LOG_MESSAGE, url, outFile);
DownloadFile downloadFile = downloadFile(url, outPath.toString());

Expand Down Expand Up @@ -270,12 +269,12 @@ private boolean validateDownloadFile(DownloadFile downloadFile, String outputFil

private long getExpectedFileSize(String outputFileLog) {
try (BufferedReader reader = new BufferedReader(new FileReader(outputFileLog))) {
String line = null;
String line;
while ((line = reader.readLine()) != null) {
// looking for: Length: 13846591 (13M)
if (line.startsWith("Length:")) {
String[] parts = line.split("\\s");
return Long.valueOf(parts[1]);
return Long.parseLong(parts[1]);
}
}
} catch (Exception e) {
Expand All @@ -294,7 +293,7 @@ private String getEnsemblURL(SpeciesConfiguration sp) {
}
}

protected String getUrlFilename(String url) {
protected String getFilenameFromUrl(String url) {
return Paths.get(url).getFileName().toString();
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@ public List<DownloadFile> downloadClinical() throws IOException, InterruptedExce
for (String fileId : Arrays.asList(CLINVAR_FULL_RELEASE_FILE_ID, CLINVAR_SUMMARY_FILE_ID, CLINVAR_ALLELE_FILE_ID,
CLINVAR_EFO_TERMS_FILE_ID)) {
url = props.getHost() + props.getFiles().get(fileId);
outPath = clinicalFolder.resolve(getUrlFilename(url));
outPath = clinicalFolder.resolve(getFilenameFromUrl(url));
logger.info(DOWNLOADING_LOG_MESSAGE, url, outPath);
downloadFiles.add(downloadFile(url, outPath.toString()));
urls.add(url);
Expand All @@ -104,7 +104,7 @@ public List<DownloadFile> downloadClinical() throws IOException, InterruptedExce
Path chunksPath = clinicalFolder.resolve(CLINVAR_CHUNKS_SUBDIRECTORY);
if (Files.notExists(chunksPath)) {
Files.createDirectories(chunksPath);
Path clinvarPath = clinicalFolder.resolve(getUrlFilename(
Path clinvarPath = clinicalFolder.resolve(getFilenameFromUrl(
props.getHost() + props.getFiles().get(CLINVAR_FULL_RELEASE_FILE_ID)));
logger.info("Splitting {} in {} ...", clinvarPath, chunksPath);
splitClinvar(clinvarPath, chunksPath);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -177,7 +177,7 @@ private DownloadFile downloadRefSeqFile(String name, DownloadProperties.URLPrope
String versionFilename, Path refSeqFolder) throws IOException, InterruptedException {
String url = urlProperties.getHost();
String version = urlProperties.getVersion();
String filename = getUrlFilename(url);
String filename = getFilenameFromUrl(url);
Path outputPath = refSeqFolder.resolve(filename);
saveDataSource(EtlCommons.REFSEQ_DATA, name, version, timeStamp, Collections.singletonList(url),
refSeqFolder.resolve(versionFilename));
Expand All @@ -193,7 +193,7 @@ private DownloadFile downloadMane(Path geneFolder) throws IOException, Interrupt
saveDataSource(EtlCommons.GENE_DATA, MANE_SELECT_NAME, configuration.getDownload().getManeSelect().getVersion(),
getTimeStamp(), Collections.singletonList(url), geneFolder.resolve(MANE_SELECT_VERSION_FILENAME));

Path outputPath = geneFolder.resolve(getUrlFilename(url));
Path outputPath = geneFolder.resolve(getFilenameFromUrl(url));
logger.info(DOWNLOADING_LOG_MESSAGE, url, outputPath);
return downloadFile(url, outputPath.toString());
}
Expand All @@ -207,7 +207,7 @@ private DownloadFile downloadLrg(Path geneFolder) throws IOException, Interrupte
saveDataSource(EtlCommons.GENE_DATA, LRG_NAME, configuration.getDownload().getLrg().getVersion(),
getTimeStamp(), Collections.singletonList(url), geneFolder.resolve(LRG_VERSION_FILENAME));

Path outputPath = geneFolder.resolve(getUrlFilename(url));
Path outputPath = geneFolder.resolve(getFilenameFromUrl(url));
logger.info(DOWNLOADING_LOG_MESSAGE, url, outputPath);
return downloadFile(url, outputPath.toString());
}
Expand All @@ -221,7 +221,7 @@ private DownloadFile downloadHgnc(Path geneFolder) throws IOException, Interrupt
saveDataSource(GENE_DATA, HGNC_GENE_NAME, configuration.getDownload().getHgnc().getVersion(),
getTimeStamp(), Collections.singletonList(url), geneFolder.resolve(HGNC_VERSION_FILENAME));

Path outputPath = geneFolder.resolve(getUrlFilename(url));
Path outputPath = geneFolder.resolve(getFilenameFromUrl(url));
logger.info(DOWNLOADING_LOG_MESSAGE, url, outputPath);
return downloadFile(url, outputPath.toString());
}
Expand All @@ -235,7 +235,7 @@ private DownloadFile downloadCancerHotspot(Path geneFolder) throws IOException,
saveDataSource(EtlCommons.GENE_DATA, CANCER_HOTSPOT_NAME, configuration.getDownload().getHgnc().getVersion(),
getTimeStamp(), Collections.singletonList(url), geneFolder.resolve(CANCER_HOTSPOT_VERSION_FILENAME));

Path outputPath = geneFolder.resolve(getUrlFilename(url));
Path outputPath = geneFolder.resolve(getFilenameFromUrl(url));
logger.info(DOWNLOADING_LOG_MESSAGE, url, outputPath);
return downloadFile(url, outputPath.toString());
}
Expand All @@ -249,7 +249,7 @@ private DownloadFile downloadGO(Path geneFolder) throws IOException, Interrupted
saveDataSource(EtlCommons.GENE_DATA, GO_ANNOTATION_NAME, configuration.getDownload().getGoAnnotation().getVersion(),
getTimeStamp(), Collections.singletonList(url), geneFolder.resolve(GO_ANNOTATION_VERSION_FILENAME));

Path outputPath = geneFolder.resolve(getUrlFilename(url));
Path outputPath = geneFolder.resolve(getFilenameFromUrl(url));
logger.info(DOWNLOADING_LOG_MESSAGE, url, outputPath);
return downloadFile(url, outputPath.toString());
}
Expand All @@ -263,7 +263,7 @@ private DownloadFile downloadGnomadConstraints(Path geneFolder) throws IOExcepti
saveDataSource(EtlCommons.GENE_DATA, GNOMAD_NAME, configuration.getDownload().getGnomadConstraints().getVersion(),
getTimeStamp(), Collections.singletonList(url), geneFolder.resolve(GNOMAD_VERSION_FILENAME));

Path outputPath = geneFolder.resolve(getUrlFilename(url));
Path outputPath = geneFolder.resolve(getFilenameFromUrl(url));
logger.info(DOWNLOADING_LOG_MESSAGE, url, outputPath);
return downloadFile(url, outputPath.toString());
}
Expand All @@ -277,7 +277,7 @@ private DownloadFile downloadDrugData(Path geneFolder) throws IOException, Inter
saveDataSource(EtlCommons.GENE_DATA, DGIDB_NAME, configuration.getDownload().getDgidb().getVersion(), getTimeStamp(),
Collections.singletonList(url), geneFolder.resolve(DGIDB_VERSION_FILENAME));

Path outputPath = geneFolder.resolve(getUrlFilename(url));
Path outputPath = geneFolder.resolve(getFilenameFromUrl(url));
logger.info(DOWNLOADING_LOG_MESSAGE, url, outputPath);
return downloadFile(url, outputPath.toString());
}
Expand Down Expand Up @@ -309,7 +309,7 @@ private DownloadFile downloadGeneExpressionAtlas(Path geneFolder) throws IOExcep
saveDataSource(EtlCommons.GENE_DATA, GENE_EXPRESSION_ATLAS_NAME, configuration.getDownload().getGeneExpressionAtlas().getVersion(),
getTimeStamp(), Collections.singletonList(geneGtfUrl), geneFolder.resolve(GENE_EXPRESSION_ATLAS_VERSION_FILENAME));

Path outputPath = geneFolder.resolve(getUrlFilename(geneGtfUrl));
Path outputPath = geneFolder.resolve(getFilenameFromUrl(geneGtfUrl));
logger.info(DOWNLOADING_LOG_MESSAGE, geneGtfUrl, outputPath);
return downloadFile(geneGtfUrl, outputPath.toString());
}
Expand All @@ -325,7 +325,7 @@ private DownloadFile downloadGeneDiseaseAnnotation(Path geneFolder) throws IOExc
saveDataSource(EtlCommons.GENE_DISEASE_ASSOCIATION_DATA, DISGENET_NAME, configuration.getDownload().getDisgenet().getVersion(),
getTimeStamp(), Collections.singletonList(url), geneFolder.resolve(DISGINET_VERSION_FILENAME));

Path outputPath = geneFolder.resolve(getUrlFilename(url));
Path outputPath = geneFolder.resolve(getFilenameFromUrl(url));
logger.info(DOWNLOADING_LOG_MESSAGE, url, outputPath);
return downloadFile(url, outputPath.toString());
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -213,7 +213,7 @@ public List<DownloadFile> downloadRepeats() throws IOException, InterruptedExcep
saveDataSource(TRF_NAME, EtlCommons.REPEATS_DATA, configuration.getDownload().getSimpleRepeats().getVersion(), getTimeStamp(),
Collections.singletonList(url), repeatsFolder.resolve(EtlCommons.TRF_VERSION_FILENAME));

Path outputPath = repeatsFolder.resolve(getUrlFilename(url));
Path outputPath = repeatsFolder.resolve(getFilenameFromUrl(url));
logger.info(DOWNLOADING_LOG_MESSAGE, url, outputPath);
downloadFiles.add(downloadFile(url, outputPath.toString()));

Expand All @@ -223,7 +223,7 @@ public List<DownloadFile> downloadRepeats() throws IOException, InterruptedExcep
saveDataSource(GSD_NAME, EtlCommons.REPEATS_DATA, configuration.getDownload().getGenomicSuperDups().getVersion(),
getTimeStamp(), Collections.singletonList(url), repeatsFolder.resolve(EtlCommons.GSD_VERSION_FILENAME));

outputPath = repeatsFolder.resolve(getUrlFilename(url));
outputPath = repeatsFolder.resolve(getFilenameFromUrl(url));
logger.info(DOWNLOADING_LOG_MESSAGE, url, outputPath);
downloadFiles.add(downloadFile(url, outputPath.toString()));

Expand All @@ -234,7 +234,7 @@ public List<DownloadFile> downloadRepeats() throws IOException, InterruptedExcep
saveDataSource(WM_NAME, EtlCommons.REPEATS_DATA, configuration.getDownload().getWindowMasker().getVersion(),
getTimeStamp(), Collections.singletonList(url), repeatsFolder.resolve(EtlCommons.WM_VERSION_FILENAME));

outputPath = repeatsFolder.resolve(getUrlFilename(url));
outputPath = repeatsFolder.resolve(getFilenameFromUrl(url));
logger.info(DOWNLOADING_LOG_MESSAGE, url, outputPath);
downloadFiles.add(downloadFile(url, outputPath.toString()));
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ public List<DownloadFile> download() throws IOException, InterruptedException {
downloadFile = downloadAndSaveDataSource(configuration.getDownload().getUniprot(), UNIPROT_NAME, PROTEIN_DATA, UNIPROT_FILE_ID,
UNIPROT_VERSION_FILENAME, proteinFolder);
Path chunksPath = proteinFolder.resolve(UNIPROT_CHUNKS_SUBDIRECTORY);
String uniprotFilename = getUrlFilename(configuration.getDownload().getUniprot().getFiles().get(UNIPROT_FILE_ID));
String uniprotFilename = getFilenameFromUrl(configuration.getDownload().getUniprot().getFiles().get(UNIPROT_FILE_ID));
logger.info("Split UniProt file {} into chunks at {}", uniprotFilename, chunksPath);
Files.createDirectories(chunksPath);
splitUniprot(proteinFolder.resolve(uniprotFilename), chunksPath);
Expand Down

0 comments on commit c7ad55d

Please sign in to comment.