Skip to content

Commit

Permalink
server: update variant annotator to return the dbSNP IDs in the field…
Browse files Browse the repository at this point in the history
… annotation.xrefs, #TASK-5821, #TASK-5789
  • Loading branch information
jtarraga committed Mar 13, 2024
1 parent 36c64fe commit 7670702
Show file tree
Hide file tree
Showing 4 changed files with 232 additions and 74 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ public class SnpQuery extends AbstractQuery {
@QueryParameter(id = "chromosome")
private String chromosome;
@QueryParameter(id = "position")
private String position;
private Integer position;
@QueryParameter(id = "reference")
private String reference;

Expand Down Expand Up @@ -76,11 +76,11 @@ public SnpQuery setChromosome(String chromosome) {
return this;
}

public String getPosition() {
public Integer getPosition() {
return position;
}

public SnpQuery setPosition(String position) {
public SnpQuery setPosition(Integer position) {
this.position = position;
return this;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,8 @@
import org.opencb.cellbase.lib.managers.*;
import org.opencb.cellbase.lib.variant.VariantAnnotationUtils;
import org.opencb.cellbase.lib.variant.annotation.futures.FuturePharmacogenomicsAnnotator;
import org.opencb.cellbase.lib.variant.annotation.futures.FutureSnpAnnotator;
import org.opencb.cellbase.lib.variant.annotation.futures.FutureSpliceScoreAnnotator;
import org.opencb.cellbase.lib.variant.hgvs.HgvsCalculator;
import org.opencb.commons.datastore.core.QueryOptions;
import org.slf4j.Logger;
Expand Down Expand Up @@ -465,6 +467,13 @@ private List<VariantAnnotation> runAnnotationProcess(List<Variant> normalizedVar
variationFuture = CACHED_THREAD_POOL.submit(futureVariationAnnotator);
}

FutureSnpAnnotator futureSnpAnnotator = null;
Future<List<CellBaseDataResult<Snp>>> snpFuture = null;
if (annotatorSet.contains("xrefs")) {
futureSnpAnnotator = new FutureSnpAnnotator(normalizedVariantList, dataRelease.getRelease(), variantManager, logger);
snpFuture = CACHED_THREAD_POOL.submit(futureSnpAnnotator);
}

FutureConservationAnnotator futureConservationAnnotator = null;
Future<List<CellBaseDataResult<Score>>> conservationFuture = null;
if (annotatorSet.contains("conservation")) {
Expand Down Expand Up @@ -510,8 +519,8 @@ private List<VariantAnnotation> runAnnotationProcess(List<Variant> normalizedVar
FutureSpliceScoreAnnotator futureSpliceScoreAnnotator = null;
Future<List<CellBaseDataResult<SpliceScore>>> spliceScoreFuture = null;
if (annotatorSet.contains("consequenceType")) {
futureSpliceScoreAnnotator = new FutureSpliceScoreAnnotator(normalizedVariantList, QueryOptions.empty(),
dataRelease.getRelease());
futureSpliceScoreAnnotator = new FutureSpliceScoreAnnotator(normalizedVariantList, dataRelease.getRelease(), apiKey,
variantManager, logger);
spliceScoreFuture = CACHED_THREAD_POOL.submit(futureSpliceScoreAnnotator);
}

Expand Down Expand Up @@ -643,6 +652,9 @@ private List<VariantAnnotation> runAnnotationProcess(List<Variant> normalizedVar
if (futureVariationAnnotator != null) {
futureVariationAnnotator.processResults(variationFuture, variantAnnotationList, annotatorSet);
}
if (futureSnpAnnotator != null) {
futureSnpAnnotator.processResults(snpFuture, variantAnnotationList);
}
if (futureConservationAnnotator != null) {
futureConservationAnnotator.processResults(conservationFuture, variantAnnotationList);
}
Expand Down Expand Up @@ -1171,7 +1183,7 @@ private Set<String> getAnnotatorSet(QueryOptions queryOptions) {
// 'expression' removed in CB 5.0
annotatorSet = new HashSet<>(Arrays.asList("variation", "traitAssociation", "conservation", "functionalScore",
"consequenceType", "geneDisease", "drugInteraction", "geneConstraints", "mirnaTargets", "pharmacogenomics",
"cancerGeneAssociation", "cancerHotspots", "populationFrequencies", "repeats", "cytoband", "hgvs"));
"cancerGeneAssociation", "cancerHotspots", "populationFrequencies", "repeats", "cytoband", "hgvs", "xrefs"));
List<String> excludeList = queryOptions.getAsStringList("exclude");
excludeList.forEach(annotatorSet::remove);
}
Expand Down Expand Up @@ -1909,74 +1921,6 @@ public void processResults(Future<List<CellBaseDataResult<Cytoband>>> cytobandFu
}
}

class FutureSpliceScoreAnnotator implements Callable<List<CellBaseDataResult<SpliceScore>>> {
private List<Variant> variantList;
private QueryOptions queryOptions;
private int dataRelease;

FutureSpliceScoreAnnotator(List<Variant> variantList, QueryOptions queryOptions, int dataRelease) {
this.variantList = variantList;
this.queryOptions = queryOptions;
this.dataRelease = dataRelease;
}

@Override
public List<CellBaseDataResult<SpliceScore>> call() throws Exception {
long startTime = System.currentTimeMillis();

List<CellBaseDataResult<SpliceScore>> cellBaseDataResultList = new ArrayList<>(variantList.size());

logger.debug("Query splice");
// Want to return only one CellBaseDataResult object per Variant
for (Variant variant : variantList) {
cellBaseDataResultList.add(variantManager.getSpliceScoreVariant(variant, apiKey, dataRelease));
}
logger.debug("Splice score query performance is {}ms for {} variants", System.currentTimeMillis() - startTime,
variantList.size());
return cellBaseDataResultList;
}

public void processResults(Future<List<CellBaseDataResult<SpliceScore>>> spliceFuture,
List<VariantAnnotation> variantAnnotationList)
throws InterruptedException, ExecutionException {
List<CellBaseDataResult<SpliceScore>> spliceCellBaseDataResults;
try {
spliceCellBaseDataResults = spliceFuture.get(30, TimeUnit.SECONDS);
} catch (TimeoutException e) {
spliceFuture.cancel(true);
throw new ExecutionException("Unable to finish splice score query on time", e);
}

if (CollectionUtils.isNotEmpty(spliceCellBaseDataResults)) {
for (int i = 0; i < variantAnnotationList.size(); i++) {
CellBaseDataResult<SpliceScore> spliceScoreResult = spliceCellBaseDataResults.get(i);
if (spliceScoreResult != null && CollectionUtils.isNotEmpty(spliceScoreResult.getResults())) {
for (SpliceScore spliceScore : spliceScoreResult.getResults()) {
for (ConsequenceType ct : variantAnnotationList.get(i).getConsequenceTypes()) {
for (SpliceScoreAlternate spliceScoreAlt : spliceScore.getAlternates()) {
String alt = StringUtils.isEmpty(variantAnnotationList.get(i).getAlternate())
? "-"
: variantAnnotationList.get(i).getAlternate();
if (alt.equals(spliceScoreAlt.getAltAllele())) {
if (StringUtils.isEmpty(spliceScore.getTranscriptId())
|| StringUtils.isEmpty(ct.getTranscriptId())
|| spliceScore.getTranscriptId().equals(ct.getTranscriptId())) {
SpliceScores scores = new SpliceScores(spliceScore.getSource(), spliceScoreAlt.getScores());
if (ct.getSpliceScores() == null) {
ct.setSpliceScores(new ArrayList<>());
}
ct.getSpliceScores().add(scores);
}
}
}
}
}
}
}
}
}
}

public VariantNormalizer getNormalizer() {
return normalizer;
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
/*
* Copyright 2015-2020 OpenCB
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.opencb.cellbase.lib.variant.annotation.futures;

import org.apache.commons.collections4.CollectionUtils;
import org.apache.commons.lang3.StringUtils;
import org.opencb.biodata.models.core.Snp;
import org.opencb.biodata.models.variant.Variant;
import org.opencb.biodata.models.variant.avro.VariantAnnotation;
import org.opencb.biodata.models.variant.avro.Xref;
import org.opencb.cellbase.core.api.SnpQuery;
import org.opencb.cellbase.core.result.CellBaseDataResult;
import org.opencb.cellbase.lib.managers.VariantManager;
import org.slf4j.Logger;

import java.util.ArrayList;
import java.util.List;
import java.util.concurrent.*;

public class FutureSnpAnnotator implements Callable<List<CellBaseDataResult<Snp>>> {
private VariantManager variantManager;

private List<Variant> variantList;
private int dataRelease;

private Logger logger;

public FutureSnpAnnotator(List<Variant> variantList, int dataRelease, VariantManager variantManager, Logger logger) {
this.variantManager = variantManager;

this.variantList = variantList;
this.dataRelease = dataRelease;

this.logger = logger;
}

@Override
public List<CellBaseDataResult<Snp>> call() throws Exception {
long startTime = System.currentTimeMillis();

List<CellBaseDataResult<Snp>> cellBaseDataResultList = new ArrayList<>(variantList.size());

logger.debug("SNP queries...");
// Want to return only one CellBaseDataResult object per Variant
List<String> includes = new ArrayList<>();
includes.add("id");
includes.add("source");
String logMsg = StringUtils.join(includes, ",");
logger.info("SNP annotation/search includes: {}", logMsg);
for (Variant variant : variantList) {
SnpQuery query = new SnpQuery();
query.setChromosome(variant.getChromosome());
query.setPosition(variant.getStart());
query.setReference(variant.getReference());
query.setDataRelease(dataRelease);
query.setIncludes(includes);
cellBaseDataResultList.add(variantManager.searchSnp(query));
}
logger.info("SNP queries performance in {} ms for {} variants", System.currentTimeMillis() - startTime, variantList.size());
return cellBaseDataResultList;
}

public void processResults(Future<List<CellBaseDataResult<Snp>>> snpFuture, List<VariantAnnotation> variantAnnotationList)
throws InterruptedException, ExecutionException {
List<CellBaseDataResult<Snp>> snpCellBaseDataResults;
try {
snpCellBaseDataResults = snpFuture.get(30, TimeUnit.SECONDS);
} catch (TimeoutException e) {
snpFuture.cancel(true);
throw new ExecutionException("Unable to finish SNP query on time", e);
}

if (CollectionUtils.isNotEmpty(snpCellBaseDataResults)) {
for (int i = 0; i < variantAnnotationList.size(); i++) {
CellBaseDataResult<Snp> snpResult = snpCellBaseDataResults.get(i);
if (snpResult != null && CollectionUtils.isNotEmpty(snpResult.getResults())) {
List<Xref> xrefs = new ArrayList<>();
for (Snp snp : snpResult.getResults()) {
xrefs.add(new Xref(snp.getId(), snp.getSource()));
}
if (CollectionUtils.isNotEmpty(xrefs)) {
if (variantAnnotationList.get(i).getXrefs() == null) {
variantAnnotationList.get(i).setXrefs(new ArrayList<>());
}
variantAnnotationList.get(i).getXrefs().addAll(xrefs);
}
}
}
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,109 @@
/*
* Copyright 2015-2020 OpenCB
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.opencb.cellbase.lib.variant.annotation.futures;

import org.apache.commons.collections4.CollectionUtils;
import org.apache.commons.lang3.StringUtils;
import org.opencb.biodata.models.core.SpliceScore;
import org.opencb.biodata.models.core.SpliceScoreAlternate;
import org.opencb.biodata.models.variant.Variant;
import org.opencb.biodata.models.variant.avro.ConsequenceType;
import org.opencb.biodata.models.variant.avro.SpliceScores;
import org.opencb.biodata.models.variant.avro.VariantAnnotation;
import org.opencb.cellbase.core.result.CellBaseDataResult;
import org.opencb.cellbase.lib.managers.VariantManager;
import org.slf4j.Logger;

import java.util.ArrayList;
import java.util.List;
import java.util.concurrent.*;

public class FutureSpliceScoreAnnotator implements Callable<List<CellBaseDataResult<SpliceScore>>> {
private List<Variant> variantList;
private int dataRelease;
private String apiKey;

private VariantManager variantManager;

private Logger logger;

public FutureSpliceScoreAnnotator(List<Variant> variantList, int dataRelease, String apiKey, VariantManager variantManager,
Logger logger) {
this.variantList = variantList;
this.dataRelease = dataRelease;
this.apiKey = apiKey;

this.variantManager = variantManager;

this.logger = logger;
}

@Override
public List<CellBaseDataResult<SpliceScore>> call() throws Exception {
long startTime = System.currentTimeMillis();

List<CellBaseDataResult<SpliceScore>> cellBaseDataResultList = new ArrayList<>(variantList.size());

logger.debug("Query splice");
// Want to return only one CellBaseDataResult object per Variant
for (Variant variant : variantList) {
cellBaseDataResultList.add(variantManager.getSpliceScoreVariant(variant, apiKey, dataRelease));
}
logger.debug("Splice score query performance is {}ms for {} variants", System.currentTimeMillis() - startTime,
variantList.size());
return cellBaseDataResultList;
}

public void processResults(Future<List<CellBaseDataResult<SpliceScore>>> spliceFuture, List<VariantAnnotation> variantAnnotationList)
throws InterruptedException, ExecutionException {
List<CellBaseDataResult<SpliceScore>> spliceCellBaseDataResults;
try {
spliceCellBaseDataResults = spliceFuture.get(30, TimeUnit.SECONDS);
} catch (TimeoutException e) {
spliceFuture.cancel(true);
throw new ExecutionException("Unable to finish splice score query on time", e);
}

if (CollectionUtils.isNotEmpty(spliceCellBaseDataResults)) {
for (int i = 0; i < variantAnnotationList.size(); i++) {
CellBaseDataResult<SpliceScore> spliceScoreResult = spliceCellBaseDataResults.get(i);
if (spliceScoreResult != null && CollectionUtils.isNotEmpty(spliceScoreResult.getResults())) {
for (SpliceScore spliceScore : spliceScoreResult.getResults()) {
for (ConsequenceType ct : variantAnnotationList.get(i).getConsequenceTypes()) {
for (SpliceScoreAlternate spliceScoreAlt : spliceScore.getAlternates()) {
String alt = StringUtils.isEmpty(variantAnnotationList.get(i).getAlternate())
? "-"
: variantAnnotationList.get(i).getAlternate();
if (alt.equals(spliceScoreAlt.getAltAllele())) {
if (StringUtils.isEmpty(spliceScore.getTranscriptId())
|| StringUtils.isEmpty(ct.getTranscriptId())
|| spliceScore.getTranscriptId().equals(ct.getTranscriptId())) {
SpliceScores scores = new SpliceScores(spliceScore.getSource(), spliceScoreAlt.getScores());
if (ct.getSpliceScores() == null) {
ct.setSpliceScores(new ArrayList<>());
}
ct.getSpliceScores().add(scores);
}
}
}
}
}
}
}
}
}
}

0 comments on commit 7670702

Please sign in to comment.