diff --git a/bionetdb-app/src/main/java/org/opencb/bionetdb/app/cli/ImportCommandExecutor.java b/bionetdb-app/src/main/java/org/opencb/bionetdb/app/cli/ImportCommandExecutor.java index 57854df..d3726a6 100644 --- a/bionetdb-app/src/main/java/org/opencb/bionetdb/app/cli/ImportCommandExecutor.java +++ b/bionetdb-app/src/main/java/org/opencb/bionetdb/app/cli/ImportCommandExecutor.java @@ -96,6 +96,7 @@ private void createCsvFiles() { long geneIndexingTime = 0; long proteinIndexingTime = 0; + long genePanelsTime = 0; long miRnaIndexingTime = 0; long bioPaxTime = 0; @@ -123,6 +124,16 @@ private void createCsvFiles() { proteinIndexingTime = (System.currentTimeMillis() - start) / 1000; logger.info("Protein indexing done in {} s", proteinIndexingTime); + // Gene panels support + if (Paths.get(inputPath + "/" + Neo4jCsvImporter.PANEL_DIRNAME).toFile().exists()) { + logger.info("Starting gene panels processing..."); + start = System.currentTimeMillis(); + importer.addGenePanels(Paths.get(inputPath + "/" + Neo4jCsvImporter.PANEL_DIRNAME), outputPath); + genePanelsTime = (System.currentTimeMillis() - start) / 1000; + logger.info("Gene panels processing done in {} s", genePanelsTime); + } + + // Indexing miRNA if (Paths.get(inputPath + "/" + Neo4jCsvImporter.MIRNA_FILENAME).toFile().exists()) { logger.info("Starting miRNA indexing..."); @@ -153,6 +164,7 @@ private void createCsvFiles() { logger.info("Gene indexing in {} s", geneIndexingTime); logger.info("Protein indexing in {} s", proteinIndexingTime); + logger.info("Gene panels processing in {} s", genePanelsTime); logger.info("miRNA indexing in {} s", miRnaIndexingTime); logger.info("BioPAX processing in {} s", bioPaxTime); logger.info("Variant processing in {} s", variantTime); diff --git a/bionetdb-core/pom.xml b/bionetdb-core/pom.xml index 0bc6eee..6b62c78 100644 --- a/bionetdb-core/pom.xml +++ b/bionetdb-core/pom.xml @@ -20,6 +20,10 @@ org.opencb.biodata biodata-formats + + org.opencb.biodata + biodata-models + org.opencb.commons commons-lib diff --git a/bionetdb-core/src/main/java/org/opencb/bionetdb/core/network/Node.java b/bionetdb-core/src/main/java/org/opencb/bionetdb/core/network/Node.java index ada42c4..2a10d92 100644 --- a/bionetdb-core/src/main/java/org/opencb/bionetdb/core/network/Node.java +++ b/bionetdb-core/src/main/java/org/opencb/bionetdb/core/network/Node.java @@ -85,9 +85,7 @@ public enum Type { EXON_OVERLAP("EXON_OVERLAP"), PROTEIN_KEYWORD("PROTEIN_KEYWORD"), - PANEL("PANEL"), - DISEASE_GROUP("DISEASE_GROUP"), - DISEASE_SUBGROUP("DISEASE_SUBGROUP"); + PANEL("PANEL"); private final String type; private final String parentType; diff --git a/bionetdb-core/src/main/java/org/opencb/bionetdb/core/network/Relation.java b/bionetdb-core/src/main/java/org/opencb/bionetdb/core/network/Relation.java index b5a578d..cfc0557 100644 --- a/bionetdb-core/src/main/java/org/opencb/bionetdb/core/network/Relation.java +++ b/bionetdb-core/src/main/java/org/opencb/bionetdb/core/network/Relation.java @@ -128,11 +128,12 @@ public enum Type { PROTEIN__PROTEIN_FEATURE("PROTEIN__PROTEIN_FEATURE"), SAMPLE__VARIANT_CALL("SAMPLE__VARIANT_CALL"), VARIANT_FILE_INFO__FILE("VARIANT_FILE_INFO__FILE"), - VARIANT_CALL__VARIANT_FILE_INFO("VARIANT_CALL__VARIANT_FILE_INFO"); + VARIANT_CALL__VARIANT_FILE_INFO("VARIANT_CALL__VARIANT_FILE_INFO"), // DNA__GENE("DNA__GENE"); -// PANEL__GENE("PANEL__GENE"), + PANEL__GENE("PANEL__GENE"); + // DISEASE_GROUP__PANEL("DISEASE_GROUP__PANEL"), // DISEASE_SUBGROUP__PANEL("DISEASE_SUBGROUP__PANEL"), // DISEASE_SUBGROUP__DISEASE_GROUP("DISEASE_SUBGROUP__DISEASE_GROUP"), diff --git a/bionetdb-core/src/main/java/org/opencb/bionetdb/core/utils/CsvInfo.java b/bionetdb-core/src/main/java/org/opencb/bionetdb/core/utils/CsvInfo.java index c6cda9d..56401c0 100644 --- a/bionetdb-core/src/main/java/org/opencb/bionetdb/core/utils/CsvInfo.java +++ b/bionetdb-core/src/main/java/org/opencb/bionetdb/core/utils/CsvInfo.java @@ -592,6 +592,11 @@ private Map> createNodeAttributes() { "source", "status"); nodeAttributes.put(Node.Type.GENE.toString(), new ArrayList<>(attrs)); + // Panel + attrs = Arrays.asList("panelId", "id", "name", "description", "phenotypeNames", "sourceId", "sourceName", + "sourceAuthor", "sourceProject", "sourceVersion", "creationDate", "modificationDate"); + nodeAttributes.put(Node.Type.PANEL.toString(), new ArrayList<>(attrs)); + // Drug attrs = Arrays.asList("drugId", "id", "name", "source", "type", "studyType"); nodeAttributes.put(Node.Type.DRUG.toString(), new ArrayList<>(attrs)); diff --git a/bionetdb-core/src/main/java/org/opencb/bionetdb/core/utils/Neo4jCsvImporter.java b/bionetdb-core/src/main/java/org/opencb/bionetdb/core/utils/Neo4jCsvImporter.java index 2691dcc..d181b42 100644 --- a/bionetdb-core/src/main/java/org/opencb/bionetdb/core/utils/Neo4jCsvImporter.java +++ b/bionetdb-core/src/main/java/org/opencb/bionetdb/core/utils/Neo4jCsvImporter.java @@ -1,11 +1,13 @@ package org.opencb.bionetdb.core.utils; import com.fasterxml.jackson.databind.ObjectMapper; +import com.fasterxml.jackson.databind.ObjectReader; import org.apache.commons.lang.StringUtils; import org.opencb.biodata.formats.protein.uniprot.v201504jaxb.DbReferenceType; import org.opencb.biodata.formats.protein.uniprot.v201504jaxb.Entry; import org.opencb.biodata.formats.protein.uniprot.v201504jaxb.FeatureType; import org.opencb.biodata.formats.protein.uniprot.v201504jaxb.KeywordType; +import org.opencb.biodata.models.clinical.interpretation.DiseasePanel; import org.opencb.biodata.models.core.Gene; import org.opencb.biodata.models.core.Transcript; import org.opencb.biodata.models.core.TranscriptTfbs; @@ -21,10 +23,7 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import java.io.BufferedReader; -import java.io.File; -import java.io.IOException; -import java.io.PrintWriter; +import java.io.*; import java.nio.file.Path; import java.util.*; @@ -32,6 +31,8 @@ public class Neo4jCsvImporter { public static final Object GENE_FILENAME = "genes.json"; public static final Object GENE_DBNAME = "genes.rocksdb"; + public static final Object PANEL_DIRNAME = "panels"; + public static final Object PROTEIN_FILENAME = "proteins.json"; public static final Object PROTEIN_DBNAME = "proteins.rocksdb"; @@ -675,6 +676,43 @@ public void indexingProteins(Path proteinPath, Path indexPath) throws IOExceptio csv.indexingProteins(proteinPath, indexPath); } + public void addGenePanels(Path panelPath, Path indexPath) throws IOException { + File[] panelFiles = panelPath.toFile().listFiles(); + + ObjectMapper mapper = new ObjectMapper(); + ObjectReader reader = mapper.reader(DiseasePanel.class); + + // Get CSV file writers + PrintWriter pwNode = csv.getCsvWriters().get(Node.Type.PANEL.toString()); + PrintWriter pwRel = csv.getCsvWriters().get(Relation.Type.PANEL__GENE.toString()); + + for (File panelFile: panelFiles) { + if (panelFile.getName().endsWith("json")) { + FileInputStream fis = new FileInputStream(panelFile); + byte[] data = new byte[(int) panelFile.length()]; + fis.read(data); + fis.close(); + + //String str = new String(data, "UTF-8"); + DiseasePanel panel = reader.readValue(data); + + // Create node and save CSV file + Node node = NodeBuilder.newNode(csv.getAndIncUid(), panel); + pwNode.println(csv.nodeLine(node)); + + for (DiseasePanel.GenePanel gene: panel.getGenes()) { + if (StringUtils.isNotEmpty(gene.getId())) { + Long geneUid = processGene(gene.getId(), gene.getName()); + if (geneUid != null) { + // Add relation to CSV file + pwRel.println(node.getUid() + "," + geneUid); + } + } + } + } + } + } + public void indexingMiRnas(Path miRnaPath, Path indexPath, boolean toImport) throws IOException { csv.indexingMiRnas(miRnaPath, indexPath); diff --git a/bionetdb-core/src/main/java/org/opencb/bionetdb/core/utils/Neo4jImporter.java b/bionetdb-core/src/main/java/org/opencb/bionetdb/core/utils/Neo4jImporter.java index 42bad77..c21c264 100644 --- a/bionetdb-core/src/main/java/org/opencb/bionetdb/core/utils/Neo4jImporter.java +++ b/bionetdb-core/src/main/java/org/opencb/bionetdb/core/utils/Neo4jImporter.java @@ -314,13 +314,13 @@ public Neo4jImporter() { attrs = Arrays.asList("panelId", "id", "name", "author", "version", "date", "sourceProject", "sourceId", "sourceVersion"); nodeAttributes.put(Node.Type.PANEL.toString(), new ArrayList<>(attrs)); - // Disease group - attrs = Arrays.asList("diseaseGroupId", "id", "name"); - nodeAttributes.put(Node.Type.DISEASE_GROUP.toString(), new ArrayList<>(attrs)); - - // Disease subgroup - attrs = Arrays.asList("diseaseSubGroupId", "id", "name"); - nodeAttributes.put(Node.Type.DISEASE_SUBGROUP.toString(), new ArrayList<>(attrs)); +// // Disease group +// attrs = Arrays.asList("diseaseGroupId", "id", "name"); +// nodeAttributes.put(Node.Type.DISEASE_GROUP.toString(), new ArrayList<>(attrs)); +// +// // Disease subgroup +// attrs = Arrays.asList("diseaseSubGroupId", "id", "name"); +// nodeAttributes.put(Node.Type.DISEASE_SUBGROUP.toString(), new ArrayList<>(attrs)); // Ontology attrs = Arrays.asList("ontologyId", "id", "name", "source"); diff --git a/bionetdb-core/src/main/java/org/opencb/bionetdb/core/utils/NodeBuilder.java b/bionetdb-core/src/main/java/org/opencb/bionetdb/core/utils/NodeBuilder.java index 570a723..5d175f0 100644 --- a/bionetdb-core/src/main/java/org/opencb/bionetdb/core/utils/NodeBuilder.java +++ b/bionetdb-core/src/main/java/org/opencb/bionetdb/core/utils/NodeBuilder.java @@ -2,6 +2,8 @@ import org.apache.commons.lang.StringUtils; import org.opencb.biodata.formats.protein.uniprot.v201504jaxb.*; +import org.opencb.biodata.models.clinical.interpretation.DiseasePanel; +import org.opencb.biodata.models.commons.Phenotype; import org.opencb.biodata.models.core.Gene; import org.opencb.biodata.models.core.Transcript; import org.opencb.biodata.models.core.TranscriptTfbs; @@ -299,24 +301,33 @@ public static Node newNode(long uid, DbReferenceType xref) { node.addAttribute("dbName", xref.getType()); return node; } -/* - public static Node newNode(long uid, Panel panel) { + + public static Node newNode(long uid, DiseasePanel panel) { Node node = new Node(uid, panel.getId(), panel.getName(), Node.Type.PANEL); - node.addAttribute("author", panel.getAuthor()); - node.addAttribute("version", panel.getVersion()); - node.addAttribute("date", panel.getDate()); + node.addAttribute("description", panel.getDescription()); + node.addAttribute("creationDate", panel.getCreationDate()); + node.addAttribute("modificationDate", panel.getModificationDate()); + if (ListUtils.isNotEmpty(panel.getPhenotypes())) { + StringBuilder sb = new StringBuilder(); + for (Phenotype phenotype: panel.getPhenotypes()) { + if (StringUtils.isNotEmpty(phenotype.getName())) { + if (sb.length() > 0) { + sb.append("--"); + } + sb.append(phenotype.getName()); + } + } + if (sb.length() > 0) { + node.addAttribute("phenotypeNames", sb.toString()); + } + } if (panel.getSource() != null) { - node.addAttribute("sourceProject", panel.getSource().getProject()); node.addAttribute("sourceId", panel.getSource().getId()); + node.addAttribute("sourceName", panel.getSource().getId()); + node.addAttribute("sourceAuthor", panel.getSource().getAuthor()); + node.addAttribute("sourceProject", panel.getSource().getProject()); node.addAttribute("sourceVersion", panel.getSource().getVersion()); } return node; } - - public static Node newNode(long uid, OntologyTerm term) { - Node node = new Node(uid, term.getId(), term.getName(), Node.Type.ONTOLOGY); - node.addAttribute("source", term.getSource()); - return node; - } - */ } diff --git a/pom.xml b/pom.xml index cb206d9..e3973c7 100644 --- a/pom.xml +++ b/pom.xml @@ -23,7 +23,7 @@ 0.2.0-SNAPSHOT 1.8 3.7.0-SNAPSHOT - 1.4.0-SNAPSHOT + 1.4.2-SNAPSHOT 4.6.0-SNAPSHOT 2.8.10 2.23