diff --git a/cellbase-core/src/main/java/org/opencb/cellbase/core/api/SnpQuery.java b/cellbase-core/src/main/java/org/opencb/cellbase/core/api/SnpQuery.java index 94fb961bf..ade217f38 100644 --- a/cellbase-core/src/main/java/org/opencb/cellbase/core/api/SnpQuery.java +++ b/cellbase-core/src/main/java/org/opencb/cellbase/core/api/SnpQuery.java @@ -30,7 +30,7 @@ public class SnpQuery extends AbstractQuery { @QueryParameter(id = "chromosome") private String chromosome; @QueryParameter(id = "position") - private String position; + private Integer position; @QueryParameter(id = "reference") private String reference; @@ -76,11 +76,11 @@ public SnpQuery setChromosome(String chromosome) { return this; } - public String getPosition() { + public Integer getPosition() { return position; } - public SnpQuery setPosition(String position) { + public SnpQuery setPosition(Integer position) { this.position = position; return this; } diff --git a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/variant/annotation/VariantAnnotationCalculator.java b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/variant/annotation/VariantAnnotationCalculator.java index a503ba704..5b8444e6f 100644 --- a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/variant/annotation/VariantAnnotationCalculator.java +++ b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/variant/annotation/VariantAnnotationCalculator.java @@ -40,6 +40,8 @@ import org.opencb.cellbase.lib.managers.*; import org.opencb.cellbase.lib.variant.VariantAnnotationUtils; import org.opencb.cellbase.lib.variant.annotation.futures.FuturePharmacogenomicsAnnotator; +import org.opencb.cellbase.lib.variant.annotation.futures.FutureSnpAnnotator; +import org.opencb.cellbase.lib.variant.annotation.futures.FutureSpliceScoreAnnotator; import org.opencb.cellbase.lib.variant.hgvs.HgvsCalculator; import org.opencb.commons.datastore.core.QueryOptions; import org.slf4j.Logger; @@ -465,6 +467,13 @@ private List runAnnotationProcess(List normalizedVar variationFuture = CACHED_THREAD_POOL.submit(futureVariationAnnotator); } + FutureSnpAnnotator futureSnpAnnotator = null; + Future>> snpFuture = null; + if (annotatorSet.contains("xrefs")) { + futureSnpAnnotator = new FutureSnpAnnotator(normalizedVariantList, dataRelease.getRelease(), variantManager, logger); + snpFuture = CACHED_THREAD_POOL.submit(futureSnpAnnotator); + } + FutureConservationAnnotator futureConservationAnnotator = null; Future>> conservationFuture = null; if (annotatorSet.contains("conservation")) { @@ -510,8 +519,8 @@ private List runAnnotationProcess(List normalizedVar FutureSpliceScoreAnnotator futureSpliceScoreAnnotator = null; Future>> spliceScoreFuture = null; if (annotatorSet.contains("consequenceType")) { - futureSpliceScoreAnnotator = new FutureSpliceScoreAnnotator(normalizedVariantList, QueryOptions.empty(), - dataRelease.getRelease()); + futureSpliceScoreAnnotator = new FutureSpliceScoreAnnotator(normalizedVariantList, dataRelease.getRelease(), apiKey, + variantManager, logger); spliceScoreFuture = CACHED_THREAD_POOL.submit(futureSpliceScoreAnnotator); } @@ -643,6 +652,9 @@ private List runAnnotationProcess(List normalizedVar if (futureVariationAnnotator != null) { futureVariationAnnotator.processResults(variationFuture, variantAnnotationList, annotatorSet); } + if (futureSnpAnnotator != null) { + futureSnpAnnotator.processResults(snpFuture, variantAnnotationList); + } if (futureConservationAnnotator != null) { futureConservationAnnotator.processResults(conservationFuture, variantAnnotationList); } @@ -1171,7 +1183,7 @@ private Set getAnnotatorSet(QueryOptions queryOptions) { // 'expression' removed in CB 5.0 annotatorSet = new HashSet<>(Arrays.asList("variation", "traitAssociation", "conservation", "functionalScore", "consequenceType", "geneDisease", "drugInteraction", "geneConstraints", "mirnaTargets", "pharmacogenomics", - "cancerGeneAssociation", "cancerHotspots", "populationFrequencies", "repeats", "cytoband", "hgvs")); + "cancerGeneAssociation", "cancerHotspots", "populationFrequencies", "repeats", "cytoband", "hgvs", "xrefs")); List excludeList = queryOptions.getAsStringList("exclude"); excludeList.forEach(annotatorSet::remove); } @@ -1909,74 +1921,6 @@ public void processResults(Future>> cytobandFu } } - class FutureSpliceScoreAnnotator implements Callable>> { - private List variantList; - private QueryOptions queryOptions; - private int dataRelease; - - FutureSpliceScoreAnnotator(List variantList, QueryOptions queryOptions, int dataRelease) { - this.variantList = variantList; - this.queryOptions = queryOptions; - this.dataRelease = dataRelease; - } - - @Override - public List> call() throws Exception { - long startTime = System.currentTimeMillis(); - - List> cellBaseDataResultList = new ArrayList<>(variantList.size()); - - logger.debug("Query splice"); - // Want to return only one CellBaseDataResult object per Variant - for (Variant variant : variantList) { - cellBaseDataResultList.add(variantManager.getSpliceScoreVariant(variant, apiKey, dataRelease)); - } - logger.debug("Splice score query performance is {}ms for {} variants", System.currentTimeMillis() - startTime, - variantList.size()); - return cellBaseDataResultList; - } - - public void processResults(Future>> spliceFuture, - List variantAnnotationList) - throws InterruptedException, ExecutionException { - List> spliceCellBaseDataResults; - try { - spliceCellBaseDataResults = spliceFuture.get(30, TimeUnit.SECONDS); - } catch (TimeoutException e) { - spliceFuture.cancel(true); - throw new ExecutionException("Unable to finish splice score query on time", e); - } - - if (CollectionUtils.isNotEmpty(spliceCellBaseDataResults)) { - for (int i = 0; i < variantAnnotationList.size(); i++) { - CellBaseDataResult spliceScoreResult = spliceCellBaseDataResults.get(i); - if (spliceScoreResult != null && CollectionUtils.isNotEmpty(spliceScoreResult.getResults())) { - for (SpliceScore spliceScore : spliceScoreResult.getResults()) { - for (ConsequenceType ct : variantAnnotationList.get(i).getConsequenceTypes()) { - for (SpliceScoreAlternate spliceScoreAlt : spliceScore.getAlternates()) { - String alt = StringUtils.isEmpty(variantAnnotationList.get(i).getAlternate()) - ? "-" - : variantAnnotationList.get(i).getAlternate(); - if (alt.equals(spliceScoreAlt.getAltAllele())) { - if (StringUtils.isEmpty(spliceScore.getTranscriptId()) - || StringUtils.isEmpty(ct.getTranscriptId()) - || spliceScore.getTranscriptId().equals(ct.getTranscriptId())) { - SpliceScores scores = new SpliceScores(spliceScore.getSource(), spliceScoreAlt.getScores()); - if (ct.getSpliceScores() == null) { - ct.setSpliceScores(new ArrayList<>()); - } - ct.getSpliceScores().add(scores); - } - } - } - } - } - } - } - } - } - } - public VariantNormalizer getNormalizer() { return normalizer; } diff --git a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/variant/annotation/futures/FutureSnpAnnotator.java b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/variant/annotation/futures/FutureSnpAnnotator.java new file mode 100644 index 000000000..bc982d658 --- /dev/null +++ b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/variant/annotation/futures/FutureSnpAnnotator.java @@ -0,0 +1,105 @@ +/* + * Copyright 2015-2020 OpenCB + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.opencb.cellbase.lib.variant.annotation.futures; + +import org.apache.commons.collections4.CollectionUtils; +import org.apache.commons.lang3.StringUtils; +import org.opencb.biodata.models.core.Snp; +import org.opencb.biodata.models.variant.Variant; +import org.opencb.biodata.models.variant.avro.VariantAnnotation; +import org.opencb.biodata.models.variant.avro.Xref; +import org.opencb.cellbase.core.api.SnpQuery; +import org.opencb.cellbase.core.result.CellBaseDataResult; +import org.opencb.cellbase.lib.managers.VariantManager; +import org.slf4j.Logger; + +import java.util.ArrayList; +import java.util.List; +import java.util.concurrent.*; + +public class FutureSnpAnnotator implements Callable>> { + private VariantManager variantManager; + + private List variantList; + private int dataRelease; + + private Logger logger; + + public FutureSnpAnnotator(List variantList, int dataRelease, VariantManager variantManager, Logger logger) { + this.variantManager = variantManager; + + this.variantList = variantList; + this.dataRelease = dataRelease; + + this.logger = logger; + } + + @Override + public List> call() throws Exception { + long startTime = System.currentTimeMillis(); + + List> cellBaseDataResultList = new ArrayList<>(variantList.size()); + + logger.debug("SNP queries..."); + // Want to return only one CellBaseDataResult object per Variant + List includes = new ArrayList<>(); + includes.add("id"); + includes.add("source"); + String logMsg = StringUtils.join(includes, ","); + logger.info("SNP annotation/search includes: {}", logMsg); + for (Variant variant : variantList) { + SnpQuery query = new SnpQuery(); + query.setChromosome(variant.getChromosome()); + query.setPosition(variant.getStart()); + query.setReference(variant.getReference()); + query.setDataRelease(dataRelease); + query.setIncludes(includes); + cellBaseDataResultList.add(variantManager.searchSnp(query)); + } + logger.info("SNP queries performance in {} ms for {} variants", System.currentTimeMillis() - startTime, variantList.size()); + return cellBaseDataResultList; + } + + public void processResults(Future>> snpFuture, List variantAnnotationList) + throws InterruptedException, ExecutionException { + List> snpCellBaseDataResults; + try { + snpCellBaseDataResults = snpFuture.get(30, TimeUnit.SECONDS); + } catch (TimeoutException e) { + snpFuture.cancel(true); + throw new ExecutionException("Unable to finish SNP query on time", e); + } + + if (CollectionUtils.isNotEmpty(snpCellBaseDataResults)) { + for (int i = 0; i < variantAnnotationList.size(); i++) { + CellBaseDataResult snpResult = snpCellBaseDataResults.get(i); + if (snpResult != null && CollectionUtils.isNotEmpty(snpResult.getResults())) { + List xrefs = new ArrayList<>(); + for (Snp snp : snpResult.getResults()) { + xrefs.add(new Xref(snp.getId(), snp.getSource())); + } + if (CollectionUtils.isNotEmpty(xrefs)) { + if (variantAnnotationList.get(i).getXrefs() == null) { + variantAnnotationList.get(i).setXrefs(new ArrayList<>()); + } + variantAnnotationList.get(i).getXrefs().addAll(xrefs); + } + } + } + } + } +} diff --git a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/variant/annotation/futures/FutureSpliceScoreAnnotator.java b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/variant/annotation/futures/FutureSpliceScoreAnnotator.java new file mode 100644 index 000000000..40523fdbc --- /dev/null +++ b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/variant/annotation/futures/FutureSpliceScoreAnnotator.java @@ -0,0 +1,109 @@ +/* + * Copyright 2015-2020 OpenCB + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.opencb.cellbase.lib.variant.annotation.futures; + +import org.apache.commons.collections4.CollectionUtils; +import org.apache.commons.lang3.StringUtils; +import org.opencb.biodata.models.core.SpliceScore; +import org.opencb.biodata.models.core.SpliceScoreAlternate; +import org.opencb.biodata.models.variant.Variant; +import org.opencb.biodata.models.variant.avro.ConsequenceType; +import org.opencb.biodata.models.variant.avro.SpliceScores; +import org.opencb.biodata.models.variant.avro.VariantAnnotation; +import org.opencb.cellbase.core.result.CellBaseDataResult; +import org.opencb.cellbase.lib.managers.VariantManager; +import org.slf4j.Logger; + +import java.util.ArrayList; +import java.util.List; +import java.util.concurrent.*; + +public class FutureSpliceScoreAnnotator implements Callable>> { + private List variantList; + private int dataRelease; + private String apiKey; + + private VariantManager variantManager; + + private Logger logger; + + public FutureSpliceScoreAnnotator(List variantList, int dataRelease, String apiKey, VariantManager variantManager, + Logger logger) { + this.variantList = variantList; + this.dataRelease = dataRelease; + this.apiKey = apiKey; + + this.variantManager = variantManager; + + this.logger = logger; + } + + @Override + public List> call() throws Exception { + long startTime = System.currentTimeMillis(); + + List> cellBaseDataResultList = new ArrayList<>(variantList.size()); + + logger.debug("Query splice"); + // Want to return only one CellBaseDataResult object per Variant + for (Variant variant : variantList) { + cellBaseDataResultList.add(variantManager.getSpliceScoreVariant(variant, apiKey, dataRelease)); + } + logger.debug("Splice score query performance is {}ms for {} variants", System.currentTimeMillis() - startTime, + variantList.size()); + return cellBaseDataResultList; + } + + public void processResults(Future>> spliceFuture, List variantAnnotationList) + throws InterruptedException, ExecutionException { + List> spliceCellBaseDataResults; + try { + spliceCellBaseDataResults = spliceFuture.get(30, TimeUnit.SECONDS); + } catch (TimeoutException e) { + spliceFuture.cancel(true); + throw new ExecutionException("Unable to finish splice score query on time", e); + } + + if (CollectionUtils.isNotEmpty(spliceCellBaseDataResults)) { + for (int i = 0; i < variantAnnotationList.size(); i++) { + CellBaseDataResult spliceScoreResult = spliceCellBaseDataResults.get(i); + if (spliceScoreResult != null && CollectionUtils.isNotEmpty(spliceScoreResult.getResults())) { + for (SpliceScore spliceScore : spliceScoreResult.getResults()) { + for (ConsequenceType ct : variantAnnotationList.get(i).getConsequenceTypes()) { + for (SpliceScoreAlternate spliceScoreAlt : spliceScore.getAlternates()) { + String alt = StringUtils.isEmpty(variantAnnotationList.get(i).getAlternate()) + ? "-" + : variantAnnotationList.get(i).getAlternate(); + if (alt.equals(spliceScoreAlt.getAltAllele())) { + if (StringUtils.isEmpty(spliceScore.getTranscriptId()) + || StringUtils.isEmpty(ct.getTranscriptId()) + || spliceScore.getTranscriptId().equals(ct.getTranscriptId())) { + SpliceScores scores = new SpliceScores(spliceScore.getSource(), spliceScoreAlt.getScores()); + if (ct.getSpliceScores() == null) { + ct.setSpliceScores(new ArrayList<>()); + } + ct.getSpliceScores().add(scores); + } + } + } + } + } + } + } + } + } +}