diff --git a/bionetdb-app/src/main/java/org/opencb/bionetdb/app/cli/CliOptionsParser.java b/bionetdb-app/src/main/java/org/opencb/bionetdb/app/cli/CliOptionsParser.java index ebbe482..17bb4e1 100644 --- a/bionetdb-app/src/main/java/org/opencb/bionetdb/app/cli/CliOptionsParser.java +++ b/bionetdb-app/src/main/java/org/opencb/bionetdb/app/cli/CliOptionsParser.java @@ -148,6 +148,9 @@ public class QueryCommandOptions { @Parameter(names = {"--betweenness"}, description = "", required = false, arity = 0) public boolean betweenness; + @Parameter(names = {"--clusteringCoeff"}, description = "", required = false, arity = 0) + public boolean clusteringCoeff; + @Parameter(names = {"-o", "--output-file"}, description = "", required = false, arity = 1) public String outputFile; diff --git a/bionetdb-app/src/main/java/org/opencb/bionetdb/app/cli/QueryCommandExecutor.java b/bionetdb-app/src/main/java/org/opencb/bionetdb/app/cli/QueryCommandExecutor.java index 56485e5..3f8cae4 100644 --- a/bionetdb-app/src/main/java/org/opencb/bionetdb/app/cli/QueryCommandExecutor.java +++ b/bionetdb-app/src/main/java/org/opencb/bionetdb/app/cli/QueryCommandExecutor.java @@ -30,6 +30,13 @@ public void execute() { networkDBAdaptor.betweenness(query); } + if (queryCommandOptions.clusteringCoeff) { + Query query = new Query("id", queryCommandOptions.id); + query.put("nodeLabel", queryCommandOptions.nodeType); + + networkDBAdaptor.clusteringCoefficient(query); + } + } catch (Exception e) { e.printStackTrace(); } diff --git a/bionetdb-core/pom.xml b/bionetdb-core/pom.xml index 891d483..4325760 100644 --- a/bionetdb-core/pom.xml +++ b/bionetdb-core/pom.xml @@ -55,6 +55,10 @@ com.fasterxml.jackson.core jackson-databind + + org.apache.commons + commons-math3 + junit junit diff --git a/bionetdb-core/src/main/java/org/opencb/bionetdb/core/neo4j/Neo4JNetworkDBAdaptor.java b/bionetdb-core/src/main/java/org/opencb/bionetdb/core/neo4j/Neo4JNetworkDBAdaptor.java index bee47a1..eaa1c06 100644 --- a/bionetdb-core/src/main/java/org/opencb/bionetdb/core/neo4j/Neo4JNetworkDBAdaptor.java +++ b/bionetdb-core/src/main/java/org/opencb/bionetdb/core/neo4j/Neo4JNetworkDBAdaptor.java @@ -1,5 +1,6 @@ package org.opencb.bionetdb.core.neo4j; +import org.apache.commons.math3.util.CombinatoricsUtils; import org.neo4j.graphdb.*; import org.neo4j.graphdb.factory.GraphDatabaseFactory; import org.neo4j.graphdb.index.IndexManager; @@ -12,6 +13,7 @@ import org.opencb.datastore.core.QueryOptions; import org.opencb.datastore.core.QueryResult; +import java.text.DecimalFormat; import java.util.*; /** @@ -278,7 +280,7 @@ private void insertPhysicalEntities(List physicalEntityList, Que addRelationship(n, ont, RelTypes.ONTOLOGY); } - /* Insert the cellular locations */ + /* Insert the cellular locations */ for (CellularLocation c : p.getCellularLocation()) { Node cel = getOrCreateCellularLocationNode(parseCellularLocation(c)); addRelationship(n, cel, RelTypes.CELLULARLOCATION); @@ -847,6 +849,54 @@ public QueryResult betweenness(Query query) { @Override public QueryResult clusteringCoefficient(Query query) { + // The clustering coefficient of a node is defined as the probability that two randomly + // selected neighbors are connected to each other. With the number of neighbors as n and + // the number of mutual connections between the neighbors r the calculation is: + // clusteringCoefficient = r/NumberOfPossibleConnectionsBetweenTwoNeighbors. Where: + // NumberOfPossibleConnectionsBetweenTwoNeighbors: n!/(2!(n-2)!). + + // TODO multiple ids + String id = query.getString("id"); + + StringBuilder cypherQuery = new StringBuilder(); + cypherQuery.append("MATCH (a { name: \"" + id + "\" })--(:Interaction)--(b)"); + cypherQuery.append(" WITH a, count(DISTINCT b) AS n"); + cypherQuery.append(" MATCH (a)--(:Interaction)--(:PhysicalEntity)" + + "--(:Interaction)-[r]-(:PhysicalEntity)--(:Interaction)--(a)"); + cypherQuery.append(" MATCH (a)-[:CELLULARLOCATION]-(c:CellularLocation)"); + cypherQuery.append(" RETURN a.name, c.id, n, count(DISTINCT r) AS r"); + + Result execute = this.database.execute(cypherQuery.toString()); + + if (execute.hasNext()) { + String msg = "#ID\tLOCATION\tCLUSTERING_COEFFICIENT"; + System.out.println(msg); + while (execute.hasNext()) { + Map result = execute.next(); + Integer r = (int) (long) result.get("r"); + Integer n = (int) (long) result.get("n"); + + // Computed value must fit into a double. The largest n for which n! < Double.MAX_VALUE is 170. + if (n > 170) { + String msg2 = "\"" + result.get("a.name").toString() + "\"\t" + + "\"" + result.get("c.id").toString() + "\"\t" + + "\"NA\""; + System.out.println(msg2); + } else if (n > 1) { + double possibleConnexions = CombinatoricsUtils.factorialDouble(n) + / (CombinatoricsUtils.factorialDouble(2) * (CombinatoricsUtils.factorialDouble(n - 2))); + DecimalFormat df = new DecimalFormat("###.##"); + String msg3 = "\"" + result.get("a.name").toString() + "\"\t" + + "\"" + result.get("c.id").toString() + "\"\t" + + "\"" + df.format(r / possibleConnexions) + "\""; + System.out.println(msg3); + } else { + System.out.println(0.00); + } + } + } else { + System.out.println("Physical Entity not found"); + } return null; } diff --git a/bionetdb-core/src/test/java/org/opencb/bionetdb/core/neo4j/Neo4JNetworkDBAdaptorTest.java b/bionetdb-core/src/test/java/org/opencb/bionetdb/core/neo4j/Neo4JNetworkDBAdaptorTest.java index 8ec667c..54df804 100644 --- a/bionetdb-core/src/test/java/org/opencb/bionetdb/core/neo4j/Neo4JNetworkDBAdaptorTest.java +++ b/bionetdb-core/src/test/java/org/opencb/bionetdb/core/neo4j/Neo4JNetworkDBAdaptorTest.java @@ -217,6 +217,13 @@ public void testBetweenness() throws Exception { } + @Test + public void testClusteringCoefficient() throws Exception { + loadTestData(); + networkDBAdaptor.clusteringCoefficient(new Query("id", "PEP")); + + } + private void loadTestData() throws URISyntaxException, IOException, NetworkDBException { BioPaxParser bioPaxParser = new BioPaxParser("L3"); Path inputPath = Paths.get(getClass().getResource("/Saccharomyces_cerevisiae.owl.gz").toURI()); diff --git a/pom.xml b/pom.xml index 4792f06..b969fc9 100644 --- a/pom.xml +++ b/pom.xml @@ -151,6 +151,11 @@ jersey-client ${jersey.version} + + org.apache.commons + commons-math3 + 3.5 + junit junit