diff --git a/bionetdb-app/src/main/java/org/opencb/bionetdb/app/cli/admin/AdminCliOptionsParser.java b/bionetdb-app/src/main/java/org/opencb/bionetdb/app/cli/admin/AdminCliOptionsParser.java index d991a6e..fa710c5 100644 --- a/bionetdb-app/src/main/java/org/opencb/bionetdb/app/cli/admin/AdminCliOptionsParser.java +++ b/bionetdb-app/src/main/java/org/opencb/bionetdb/app/cli/admin/AdminCliOptionsParser.java @@ -109,6 +109,9 @@ public class BuildCommandOptions { @Parameter(names = {"-o", "--output"}, description = "Output directory where to save the CSV files to import", required = true, arity = 1) public String output; + @Parameter(names = {"--add-network-file"}, description = "JSON file containing a BioNetDB network", arity = 1) + public List networkFiles; + @Parameter(names = {"--exclude"}, description = "Exclude information separated by comma, e.g.:'XREF_DBNAME:Reactome Database ID Release 63'", arity = 1) public List exclude; } diff --git a/bionetdb-app/src/main/java/org/opencb/bionetdb/app/cli/admin/executors/BuildCommandExecutor.java b/bionetdb-app/src/main/java/org/opencb/bionetdb/app/cli/admin/executors/BuildCommandExecutor.java index 03335d6..bb7b471 100644 --- a/bionetdb-app/src/main/java/org/opencb/bionetdb/app/cli/admin/executors/BuildCommandExecutor.java +++ b/bionetdb-app/src/main/java/org/opencb/bionetdb/app/cli/admin/executors/BuildCommandExecutor.java @@ -1,7 +1,10 @@ package org.opencb.bionetdb.app.cli.admin.executors; +import com.fasterxml.jackson.annotation.JsonInclude; +import com.fasterxml.jackson.databind.MapperFeature; import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.databind.SerializationFeature; +import htsjdk.samtools.util.StringUtil; import org.apache.commons.collections.CollectionUtils; import org.apache.commons.lang3.StringUtils; import org.opencb.bionetdb.app.cli.CommandExecutor; @@ -11,12 +14,15 @@ import org.opencb.bionetdb.core.io.SbmlParser; import org.opencb.bionetdb.core.io.SifParser; import org.opencb.bionetdb.core.models.network.Network; +import org.opencb.bionetdb.core.models.network.Node; +import org.opencb.bionetdb.core.models.network.Relation; import org.opencb.bionetdb.lib.BioNetDbManager; import org.opencb.bionetdb.lib.utils.Builder; import org.opencb.commons.utils.FileUtils; import org.opencb.commons.utils.ListUtils; import java.io.BufferedWriter; +import java.io.File; import java.io.FileWriter; import java.io.IOException; import java.nio.file.Path; @@ -47,7 +53,7 @@ public void execute() { FileUtils.checkDirectory(outputPath); BioNetDbManager manager = new BioNetDbManager(configuration); - manager.build(inputPath, outputPath, buildCommandOptions.exclude); + manager.build(inputPath, outputPath, buildCommandOptions.networkFiles, buildCommandOptions.exclude); } catch (IOException | BioNetDBException e) { e.printStackTrace(); } diff --git a/bionetdb-app/src/test/java/org/opencb/bionetdb/app/BioNetDBMainTest.java b/bionetdb-app/src/test/java/org/opencb/bionetdb/app/BioNetDBMainTest.java index f753b36..e0af41c 100644 --- a/bionetdb-app/src/test/java/org/opencb/bionetdb/app/BioNetDBMainTest.java +++ b/bionetdb-app/src/test/java/org/opencb/bionetdb/app/BioNetDBMainTest.java @@ -1,8 +1,16 @@ package org.opencb.bionetdb.app; +import com.fasterxml.jackson.annotation.JsonInclude; +import com.fasterxml.jackson.databind.MapperFeature; +import com.fasterxml.jackson.databind.ObjectMapper; import org.junit.Test; +import org.opencb.bionetdb.core.models.network.Network; +import org.opencb.bionetdb.core.models.network.Node; +import org.opencb.bionetdb.core.models.network.Relation; -import static org.junit.Assert.*; +import java.io.File; +import java.io.IOException; +import java.util.ArrayList; public class BioNetDBMainTest { @@ -12,4 +20,58 @@ public void createCsvClinicalAnalysis() { String cmdLine = "~/appl/bionetdb/build/bin/bionetdb.sh create-csv -i " + caPath + "/input/ -o csv/ --clinical-analysis"; } + private void createNetworks() { + long uid = 0; + + ObjectMapper mapper = new ObjectMapper(); + mapper.setSerializationInclusion(JsonInclude.Include.NON_NULL); + mapper.configure(MapperFeature.REQUIRE_SETTERS_FOR_GETTERS, true); + + Network network; + Node node1, node2, node3; + Relation relation1, relation2, relation3; + + network = new Network("net1", "net1", "Network #1"); + network.setNodes(new ArrayList<>()); + network.setRelations(new ArrayList<>()); + + node1 = new Node(uid++, "ENSG00000078808", "SDF4", Node.Type.GENE); + network.getNodes().add(node1); + node2 = new Node(uid++, null, "COCA", Node.Type.DRUG); + network.getNodes().add(node2); + relation1 = new Relation(uid++, "rel1", node1.getUid(), Node.Type.GENE, node2.getUid(), Node.Type.DRUG, + Relation.Type.GENE__DRUG); + network.getRelations().add(relation1); + + try { + mapper.writer().writeValue(new File("/tmp/network1.json"), network); + } catch (IOException e) { + e.printStackTrace(); + } + + + network = new Network("net2", "net2", "Network #2"); + network.setNodes(new ArrayList<>()); + network.setRelations(new ArrayList<>()); + + node1 = new Node(uid++, "ENSG00000066666", "SDF666", Node.Type.GENE); + network.getNodes().add(node1); + node2 = new Node(uid++, null, "COCA", Node.Type.DRUG); + network.getNodes().add(node2); + node3 = new Node(uid++, "ALCOHOL", "ALCOHOL", Node.Type.DRUG); + network.getNodes().add(node3); + relation2 = new Relation(uid++, "rel2", node1.getUid(), Node.Type.GENE, node2.getUid(), Node.Type.DRUG, + Relation.Type.GENE__DRUG); + network.getRelations().add(relation2); + relation3 = new Relation(uid++, "rel3", node1.getUid(), Node.Type.GENE, node3.getUid(), Node.Type.DRUG, + Relation.Type.GENE__DRUG); + network.getRelations().add(relation3); + + try { + mapper.writer().writeValue(new File("/tmp/network2.json"), network); + } catch (IOException e) { + e.printStackTrace(); + } + + } } \ No newline at end of file diff --git a/bionetdb-core/src/main/java/org/opencb/bionetdb/core/models/network/Node.java b/bionetdb-core/src/main/java/org/opencb/bionetdb/core/models/network/Node.java index 845c1e5..595d04e 100644 --- a/bionetdb-core/src/main/java/org/opencb/bionetdb/core/models/network/Node.java +++ b/bionetdb-core/src/main/java/org/opencb/bionetdb/core/models/network/Node.java @@ -171,6 +171,10 @@ public static boolean isPhysicalEntity(Node node) { } } + public Node() { + this(-1, null, null, null, null); + } + public Node(long uid) { this(uid, null, null, null, null); } diff --git a/bionetdb-lib/src/main/java/org/opencb/bionetdb/lib/BioNetDbManager.java b/bionetdb-lib/src/main/java/org/opencb/bionetdb/lib/BioNetDbManager.java index 5555b11..1a04406 100644 --- a/bionetdb-lib/src/main/java/org/opencb/bionetdb/lib/BioNetDbManager.java +++ b/bionetdb-lib/src/main/java/org/opencb/bionetdb/lib/BioNetDbManager.java @@ -97,7 +97,14 @@ public void download(Path outDir) throws IOException { //------------------------------------------------------------------------- public void build(Path inputPath, Path outputPath, List exclude) throws IOException { + build(inputPath, outputPath, null, exclude); + } + + public void build(Path inputPath, Path outputPath, List networkFiles, List exclude) throws IOException { Builder builder = new Builder(inputPath, outputPath, parseFilters(exclude)); + if (CollectionUtils.isNotEmpty(networkFiles)) { + builder.setAdditionalNeworkFiles(networkFiles); + } builder.build(); } diff --git a/bionetdb-lib/src/main/java/org/opencb/bionetdb/lib/utils/Builder.java b/bionetdb-lib/src/main/java/org/opencb/bionetdb/lib/utils/Builder.java index f42f89c..5b01f26 100644 --- a/bionetdb-lib/src/main/java/org/opencb/bionetdb/lib/utils/Builder.java +++ b/bionetdb-lib/src/main/java/org/opencb/bionetdb/lib/utils/Builder.java @@ -12,6 +12,7 @@ import org.opencb.biodata.models.core.Xref; import org.opencb.biodata.models.variant.Variant; import org.opencb.biodata.models.variant.avro.*; +import org.opencb.bionetdb.core.models.network.Network; import org.opencb.bionetdb.core.models.network.Node; import org.opencb.bionetdb.core.models.network.Relation; import org.opencb.bionetdb.lib.db.Neo4jBioPaxBuilder; @@ -26,6 +27,7 @@ import java.io.IOException; import java.io.PrintWriter; import java.nio.file.Path; +import java.nio.file.Paths; import java.util.*; public class Builder { @@ -45,6 +47,8 @@ public class Builder { public static final Object CLINICAL_VARIANT_FILENAME = "clinical_variants.full.json"; + private List additionalNeworkFiles; + private CsvInfo csv; private Path inputPath; private Path outputPath; @@ -53,20 +57,30 @@ public class Builder { protected static Logger logger; + public Builder(Path inputPath, Path outputPath, Map> filters) { + + this.inputPath = inputPath; + this.outputPath = outputPath; + this.filters = filters; + + + // Prepare CSV object + csv = new CsvInfo(inputPath, outputPath); + + // Prepare jackson writer (object to string) + mapper = new ObjectMapper(); + mapper.setSerializationInclusion(JsonInclude.Include.NON_NULL); + mapper.configure(MapperFeature.REQUIRE_SETTERS_FOR_GETTERS, true); + + this.logger = LoggerFactory.getLogger(this.getClass().toString()); + } + public void build() throws IOException { long start; // Open CSV files csv.openCSVFiles(); - long ensemblGeneBuildTime = 0; - long refSeqGeneBuildTime = 0; - long proteinBuildTime = 0; - long genePanelBuildTime = 0; - long bioPaxBuildTime = 0; - long clinvarBuildTime = 0; - - // Check input files File ensemblGeneFile = new File(inputPath + "/" + ENSEMBL_GENE_FILENAME); if (!ensemblGeneFile.exists()) { @@ -105,31 +119,27 @@ public void build() throws IOException { logger.info("Processing Ensembl genes..."); start = System.currentTimeMillis(); buildGenes(ensemblGeneFile.toPath()); - ensemblGeneBuildTime = (System.currentTimeMillis() - start) / 1000; - logger.info("Ensembl gene processing done in {} s", ensemblGeneBuildTime); + logger.info("Ensembl gene processing done in {} s", (System.currentTimeMillis() - start) / 1000); } if (refSeqGeneFile.exists()) { logger.info("Processing RefSeq genes..."); start = System.currentTimeMillis(); buildGenes(refSeqGeneFile.toPath()); - refSeqGeneBuildTime = (System.currentTimeMillis() - start) / 1000; - logger.info("RefSeq gene processing done in {} s", refSeqGeneBuildTime); + logger.info("RefSeq gene processing done in {} s", (System.currentTimeMillis() - start) / 1000); } // Processing proteins logger.info("Processing proteins..."); start = System.currentTimeMillis(); buildProteins(proteinFile.toPath()); - proteinBuildTime = (System.currentTimeMillis() - start) / 1000; - logger.info("Protein processing done in {} s", proteinBuildTime); + logger.info("Protein processing done in {} s", (System.currentTimeMillis() - start) / 1000); // Gene panels support logger.info("Processing gene panels..."); start = System.currentTimeMillis(); buildGenePanels(panelFile.toPath()); - genePanelBuildTime = (System.currentTimeMillis() - start) / 1000; - logger.info("Gene panel processing done in {} s", genePanelBuildTime); + logger.info("Gene panel processing done in {} s", (System.currentTimeMillis() - start) / 1000); // Procesing BioPAX file @@ -138,44 +148,27 @@ public void build() throws IOException { start = System.currentTimeMillis(); bioPAXImporter.build(networkFile.toPath()); biopaxProcessing.post(); - bioPaxBuildTime = (System.currentTimeMillis() - start) / 1000; + logger.info("Processing BioPax/reactome file done in {} s", (System.currentTimeMillis() - start) / 1000); // Processing clinical variants logger.info("Processing clinical variants..."); start = System.currentTimeMillis(); buildClinicalVariants(clinicalVariantFile.toPath()); - clinvarBuildTime = (System.currentTimeMillis() - start) / 1000; - logger.info("Processing clinical variants done in {} s", clinvarBuildTime); + logger.info("Processing clinical variants done in {} s", (System.currentTimeMillis() - start) / 1000); + + // Processing additional networks + if (CollectionUtils.isNotEmpty(additionalNeworkFiles)) { + for (String additionalNeworkFile: additionalNeworkFiles) { + logger.info("Processing additional network file {}...", additionalNeworkFile); + start = System.currentTimeMillis(); + processAdditionalNetwork(additionalNeworkFile); + logger.info("Processing clinical variants done in {} s", (System.currentTimeMillis() - start) / 1000); + } + } // Close CSV files csv.close(); - - logger.info("Ensembl gene build time: {} s", ensemblGeneBuildTime); - logger.info("RefSeq gene build time: {} s", refSeqGeneBuildTime); - logger.info("Protein build time: {} s", proteinBuildTime); - logger.info("Gene panel build time: {} s", genePanelBuildTime); - logger.info("BioPAX build time: {} s", bioPaxBuildTime); - logger.info("Clinical variant build time: {} s", clinvarBuildTime); - } - - - public Builder(Path inputPath, Path outputPath, Map> filters) { - - this.inputPath = inputPath; - this.outputPath = outputPath; - this.filters = filters; - - - // Prepare CSV object - csv = new CsvInfo(inputPath, outputPath); - - // Prepare jackson writer (object to string) - mapper = new ObjectMapper(); - mapper.setSerializationInclusion(JsonInclude.Include.NON_NULL); - mapper.configure(MapperFeature.REQUIRE_SETTERS_FOR_GETTERS, true); - - this.logger = LoggerFactory.getLogger(this.getClass().toString()); } //------------------------------------------------------------------------- @@ -1018,6 +1011,54 @@ private Node createVariantNode(Variant variant, Long varUid) { return varNode; } + + private void processAdditionalNetwork(String additionalNeworkFilename) throws IOException { + // Check file + File addNetworkFile = Paths.get(additionalNeworkFilename).toFile(); + if (!addNetworkFile.exists()) { + logger.info("Additional network file {} does not exist", additionalNeworkFilename); + return; + } + + ObjectMapper objectMapper = new ObjectMapper(); + Network network = objectMapper.readValue(addNetworkFile, Network.class); + + Map nodeUidMap = new HashMap<>(); + + // First, nodes + if (CollectionUtils.isNotEmpty(network.getNodes())) { + for (Node node: network.getNodes()) { + Long uid = csv.getLong(node.getId(), node.getType().name()); + if (uid == null) { + // Node does not exist in the ! + nodeUidMap.put(node.getUid(), csv.getAndIncUid()); + // Update UID and append node to the CSV file + node.setUid(nodeUidMap.get(node.getUid())); + csv.getCsvWriters().get(node.getType().toString()).println(csv.nodeLine(node)); + } else { + // Node already exists !! + nodeUidMap.put(node.getUid(), uid); + } + } + } + + // Second, relations + if (CollectionUtils.isNotEmpty(network.getRelations())) { + for (Relation relation: network.getRelations()) { + relation.setUid(csv.getAndIncUid()); + System.out.println(relation.getType().toString()); + System.out.println(csv.relationLine(nodeUidMap.get(relation.getOrigUid()), nodeUidMap.get(relation.getDestUid()))); + if (csv.getCsvWriters().containsKey(relation.getType().toString())) { + System.out.println("YYYYEEEEESSSSSSSS"); + } + + csv.getCsvWriters().get(relation.getType().toString()).println(csv.relationLine(nodeUidMap.get(relation.getOrigUid()), + nodeUidMap.get(relation.getDestUid()))); + } + } + } + + // // public Long processClinicalAnalysis(ClinicalAnalysis clinicalAnalysis) throws IOException { // Node clinicalAnalysisNode = null; @@ -1710,4 +1751,13 @@ private void createVariantObjectNode(Variant variant, Node variantNode) throws I pw = csv.getCsvWriters().get(Relation.Type.VARIANT__VARIANT_OBJECT.toString()); pw.println(variantNode.getUid() + CsvInfo.SEPARATOR + variantObjectNode.getUid()); } + + public List getAdditionalNeworkFiles() { + return additionalNeworkFiles; + } + + public Builder setAdditionalNeworkFiles(List additionalNeworkFiles) { + this.additionalNeworkFiles = additionalNeworkFiles; + return this; + } }