Skip to content

Commit

Permalink
core: add tests for variant, gene and protein annotation
Browse files Browse the repository at this point in the history
  • Loading branch information
jtarraga committed Mar 2, 2018
1 parent 5c6a4bb commit 5e3a380
Show file tree
Hide file tree
Showing 9 changed files with 402 additions and 58 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -113,14 +113,14 @@ public void annotateVariants(List<String> variantIds) throws IOException, BioNet
networkDBAdaptor.annotateVariants(variantIds, variationClient);
}

public void annotateProtein() throws BioNetDBException, IOException {
ClientConfiguration clientConfiguration = new ClientConfiguration();
clientConfiguration.setVersion("v4");
clientConfiguration.setRest(new RestConfig(Collections.singletonList("http://bioinfo.hpc.cam.ac.uk/cellbase"), 30000));
CellBaseClient cellBaseClient = new CellBaseClient("hsapiens", clientConfiguration);
public void annotateProteins(NodeQuery query, QueryOptions options) throws IOException, BioNetDBException {
ProteinClient proteinClient = cellBaseClient.getProteinClient();
networkDBAdaptor.annotateProteins(query, options, proteinClient);
}

networkDBAdaptor.annotateProtein(proteinClient);
public void annotateProteins(List<String> proteinIds) throws IOException, BioNetDBException {
ProteinClient proteinClient = cellBaseClient.getProteinClient();
networkDBAdaptor.annotateProteins(proteinIds, proteinClient);
}

/*
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,8 @@ enum NetworkQueryParams implements QueryParam {
void annotateGenes(NodeQuery query, QueryOptions options, GeneClient geneClient) throws BioNetDBException, IOException;
void annotateGenes(List<String> geneIds, GeneClient geneClient) throws BioNetDBException, IOException;

void annotateProtein(ProteinClient proteinClient) throws BioNetDBException, IOException;
void annotateProteins(NodeQuery query, QueryOptions options, ProteinClient proteinClient) throws BioNetDBException, IOException;
void annotateProteins(List<String> proteinIds, ProteinClient proteinClient) throws BioNetDBException, IOException;

//-------------------------------------------------------------------------
// N O D E S
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,14 +24,15 @@
import org.opencb.cellbase.client.rest.GeneClient;
import org.opencb.cellbase.client.rest.ProteinClient;
import org.opencb.cellbase.client.rest.VariationClient;
import org.opencb.commons.datastore.core.Query;
import org.opencb.commons.datastore.core.QueryOptions;
import org.opencb.commons.datastore.core.QueryResponse;
import org.opencb.commons.datastore.core.QueryResult;
import org.opencb.commons.utils.ListUtils;

import java.io.IOException;
import java.util.*;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;

import static org.neo4j.driver.v1.Values.parameters;

Expand Down Expand Up @@ -179,6 +180,7 @@ public void annotateVariants(List<String> variantIds, VariationClient variationC
}
}
}

//-------------------------------------------------------------------------

public void annotateGenes(NodeQuery query, QueryOptions options, GeneClient geneClient) throws BioNetDBException,
Expand Down Expand Up @@ -211,40 +213,68 @@ public void annotateGenes(List<String> geneIds, GeneClient geneClient) throws Bi

//-------------------------------------------------------------------------

public void annotateProtein(ProteinClient proteinClient) throws BioNetDBException, IOException {
// First, get all proteins from the network
Query query = new Query();
String cypher = "MATCH path=(p:PROTEIN)-[xr:XREF]->(x:XREF) WHERE toLower(x.attr_source) = \"uniprot\" return path";
QueryResult<Network> networkResult = networkQuery(cypher);

if (ListUtils.isEmpty(networkResult.getResult())) {
System.out.println("Network not found!!");
return;
}
Network network = networkResult.getResult().get(0);

// Get proteins annotations from Cellbase...
// ... prepare list of protein id/names from xref/protein nodes
List<String> proteinIds = new ArrayList<>();
for (Node node: network.getNodes()) {
if (node.getType() == Node.Type.XREF) {
proteinIds.add(node.getId());
public void annotateProteins(NodeQuery query, QueryOptions options, ProteinClient proteinClient) throws BioNetDBException,
IOException {
NodeIterator nodeIterator = nodeIterator(query, options);
List<String> proteinIds = new ArrayList<>(1000);
while (nodeIterator.hasNext()) {
Node geneNode = nodeIterator.next();
proteinIds.add(geneNode.getId());
if (proteinIds.size() >= 1000) {
annotateProteins(proteinIds, proteinClient);
proteinIds.clear();
}
}

// ... finally, call Cellbase service
Map<String, Entry> proteinMap = new HashMap<>();
QueryResponse<Entry> entryQueryResponse = proteinClient.get(proteinIds, new QueryOptions(QueryOptions.EXCLUDE,
"reference,organism,comment,evidence,sequence"));
for (QueryResult<Entry> queryResult: entryQueryResponse.getResponse()) {
proteinMap.put(queryResult.getId(), queryResult.getResult().get(0));
if (ListUtils.isNotEmpty(proteinIds)) {
annotateProteins(proteinIds, proteinClient);
}
}

for (String key: proteinMap.keySet()) {
System.out.println(key + " -> " + proteinMap.get(key));
public void annotateProteins(List<String> proteinIds, ProteinClient proteinClient) throws BioNetDBException, IOException {
Neo4JVariantLoader variantLoader = new Neo4JVariantLoader(this);
QueryOptions options = new QueryOptions("EXCLUDE", "transcripts.exons,transcripts.cDnaSequence");
QueryResponse<Entry> entryQueryResponse = proteinClient.get(proteinIds, options);
for (QueryResult<Entry> queryResult: entryQueryResponse.getResponse()) {
if (ListUtils.isNotEmpty(queryResult.getResult())) {
variantLoader.loadProteins(queryResult.getResult());
}
}
}

// public void annotateProtein(ProteinClient proteinClient) throws BioNetDBException, IOException {
// // First, get all proteins from the network
// Query query = new Query();
// String cypher = "MATCH path=(p:PROTEIN)-[xr:XREF]->(x:XREF) WHERE toLower(x.attr_source) = \"uniprot\" return path";
// QueryResult<Network> networkResult = networkQuery(cypher);
//
// if (ListUtils.isEmpty(networkResult.getResult())) {
// System.out.println("Network not found!!");
// return;
// }
// Network network = networkResult.getResult().get(0);
//
// // Get proteins annotations from Cellbase...
// // ... prepare list of protein id/names from xref/protein nodes
// List<String> proteinIds = new ArrayList<>();
// for (Node node: network.getNodes()) {
// if (node.getType() == Node.Type.XREF) {
// proteinIds.add(node.getId());
// }
// }
//
// // ... finally, call Cellbase service
// Map<String, Entry> proteinMap = new HashMap<>();
// QueryResponse<Entry> entryQueryResponse = proteinClient.get(proteinIds, new QueryOptions(QueryOptions.EXCLUDE,
// "reference,organism,comment,evidence,sequence"));
// for (QueryResult<Entry> queryResult: entryQueryResponse.getResponse()) {
// proteinMap.put(queryResult.getId(), queryResult.getResult().get(0));
// }
//
// for (String key: proteinMap.keySet()) {
// System.out.println(key + " -> " + proteinMap.get(key));
// }
// }

//-------------------------------------------------------------------------
// N O D E Q U E R I E S
//-------------------------------------------------------------------------
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,6 @@ public void loadVariants(List<Variant> variants) {
session.close();
}


public void loadGenes(List<Gene> genes) {
Session session = networkDBAdaptor.getDriver().session();
for (Gene gene: genes) {
Expand All @@ -91,6 +90,17 @@ public void loadGenes(List<Gene> genes) {
session.close();
}

public void loadProteins(List<Entry> proteins) {
Session session = networkDBAdaptor.getDriver().session();
for (Entry protein: proteins) {
session.writeTransaction(tx -> {
loadProtein(protein, tx);
return 1;
});
}
session.close();
}

//-------------------------------------------------------------------------
// P R I V A T E M E T H O D S
//-------------------------------------------------------------------------
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ public static Network toNetwork(StatementResult statementResult) {
// Then, we can process relationships and insert them into the network
for (long key: relationshipMap.keySet()) {
Relationship neoRelation = relationshipMap.get(key);
Relation relation = new Relation(neoRelation.get("uid").asLong(), neoRelation.get("name").asString(),
Relation relation = new Relation(neoRelation.id(), neoRelation.get("name").asString(),
nodeMap.get(neoRelation.startNodeId()).getUid(), nodeMap.get(neoRelation.endNodeId()).getUid(),
Relation.Type.valueOf(neoRelation.type()));
network.addRelation(relation);
Expand All @@ -104,9 +104,16 @@ public static Network toNetwork(StatementResult statementResult) {

private static Node toNode(org.neo4j.driver.v1.types.Node neoNode) {
// Set uid, id and name
Node node = new Node(neoNode.get("uid").asLong());
node.setId(neoNode.get("id").asString());
node.setName(neoNode.get("name").asString());
Node node = new Node(neoNode.id());
if (neoNode.containsKey("id")) {
node.setId(neoNode.get("id").asString());
}
if (neoNode.containsKey("name")) {
node.setName(neoNode.get("name").asString());
}
if (neoNode.containsKey("source")) {
node.setSource(neoNode.get("source").asString());
}

// Set type and tags
boolean first = true;
Expand Down Expand Up @@ -157,7 +164,7 @@ private static org.opencb.bionetdb.core.network.Path toPath(Path neoPath) {
// Then, we can process relationships and insert them into the path
for (long key: relationshipMap.keySet()) {
Relationship neoRelation = relationshipMap.get(key);
Relation relation = new Relation(neoRelation.get("uid").asLong(), neoRelation.get("name").asString(),
Relation relation = new Relation(neoRelation.id(), neoRelation.get("name").asString(),
nodeMap.get(neoRelation.startNodeId()).getUid(), nodeMap.get(neoRelation.endNodeId()).getUid(),
Relation.Type.valueOf(neoRelation.type()));
path.addRelation(relation);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,12 +12,16 @@
import org.opencb.commons.datastore.core.Query;
import org.opencb.commons.datastore.core.QueryOptions;
import org.opencb.commons.datastore.core.QueryResult;
import org.opencb.commons.utils.ListUtils;

import java.io.IOException;
import java.net.URISyntaxException;
import java.nio.file.Paths;
import java.util.ArrayList;
import java.util.List;

import static org.junit.Assert.fail;

public class BioNetDBManagerTest {

private String database = "scerevisiae";
Expand Down Expand Up @@ -50,14 +54,17 @@ public void initialize () {

@Test
public void loadBiopax() throws BioNetDBException, IOException {
String filename = "/home/jtarraga/data150/neo4j/hsapiens.meiosis.biopax3";
String filename = "~/neo4j/hsapiens.meiosis.biopax3";
bioNetDBManager.loadBiopax(Paths.get(filename));
}

@Test
public void loadVCF() throws BioNetDBException {
String filename = "/home/jtarraga/data150/vcf/2.vcf";
bioNetDBManager.loadVcf(Paths.get(filename));
try {
bioNetDBManager.loadVcf(Paths.get(getClass().getResource("/3.vcf").toURI()));
} catch (URISyntaxException e) {
fail();
}
}

//-------------------------------------------------------------------------
Expand All @@ -67,24 +74,57 @@ public void loadVCF() throws BioNetDBException {
@Test
public void annotateVariants() throws BioNetDBException, IOException {
loadVCF();
List<String> variantIds = new ArrayList<>();
variantIds.add("rs540431307");
variantIds.add("rs367896724");
bioNetDBManager.annotateVariants(variantIds);

List<String> ids = new ArrayList<>();

// variantIds.add("rs540431307");
// variantIds.add("rs367896724");
// variantIds.add("rs429358");

QueryResult<Node> queryResult = bioNetDBManager.nodeQuery("MATCH (n:VARIANT) return n");
if (queryResult != null && ListUtils.isNotEmpty(queryResult.getResult())) {
for (Node node: queryResult.getResult()) {
ids.add(node.getId());
}
}

bioNetDBManager.annotateVariants(ids);
}

@Test
public void annotateGenes() throws BioNetDBException, IOException {
//loadVCF();
List<String> geneIds = new ArrayList<>();
geneIds.add("");
//variantIds.add("rs367896724");
bioNetDBManager.annotateGenes(geneIds);
annotateVariants();
List<String> ids = new ArrayList<>();

// geneIds.add("ENSG00000227232");
// geneIds.add("ENSG00000223972");

QueryResult<Node> queryResult = bioNetDBManager.nodeQuery("MATCH (n:GENE) return n");
if (queryResult != null && ListUtils.isNotEmpty(queryResult.getResult())) {
for (Node node: queryResult.getResult()) {
ids.add(node.getId());
}
}

bioNetDBManager.annotateGenes(ids);
}

@Test
public void annotateProtein() throws BioNetDBException, IOException {
bioNetDBManager.annotateProtein();
annotateGenes();

List<String> ids = new ArrayList<>();

QueryResult<Node> queryResult = bioNetDBManager.nodeQuery("MATCH (n:PROTEIN) return n");
if (queryResult != null && ListUtils.isNotEmpty(queryResult.getResult())) {
for (Node node: queryResult.getResult()) {
ids.add(node.getId());
}
}

// proteinIds.add("P02649");

bioNetDBManager.annotateProteins(ids);
}

//-------------------------------------------------------------------------
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,9 +33,9 @@ public class DemoTest {

//String reactomeBiopaxFilename = "~/data150/neo4j/vesicle.mediated.transport.biopax3";
// String reactomeBiopaxFilename = "~/data150/neo4j/pathway.biopax";
String reactomeBiopaxFilename = "/home/jtarraga/data150/neo4j/pathway1.biopax3";
String reactomeBiopaxFilename = "~/data150/neo4j/pathway1.biopax3";

String variantJsonFilename = "/home/jtarraga/data150/neo4j/test2.json";
String variantJsonFilename = "~/data150/neo4j/test2.json";

@Rule
public final ExpectedException exception = ExpectedException.none();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ public void close() throws Exception {
@Test
public void testInsert() throws Exception {
BioPaxParser bioPaxParser = new BioPaxParser("L3");
String reactomeBiopaxFilename = "/home/jtarraga/data150/neo4j/pathway1.biopax3";
String reactomeBiopaxFilename = "~/jtarraga/data150/neo4j/pathway1.biopax3";
//Path inputPath = Paths.get(getClass().getResource("/Saccharomyces_cerevisiae.owl.gz").toURI());
Path inputPath = Paths.get(reactomeBiopaxFilename);
Network network = bioPaxParser.parse(inputPath);
Expand Down
Loading

0 comments on commit 5e3a380

Please sign in to comment.