Skip to content

Commit

Permalink
core: add gene panels
Browse files Browse the repository at this point in the history
  • Loading branch information
jtarraga committed Oct 22, 2018
1 parent 1d68b15 commit 158bdc6
Show file tree
Hide file tree
Showing 9 changed files with 99 additions and 30 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,7 @@ private void createCsvFiles() {

long geneIndexingTime = 0;
long proteinIndexingTime = 0;
long genePanelsTime = 0;
long miRnaIndexingTime = 0;
long bioPaxTime = 0;

Expand Down Expand Up @@ -123,6 +124,16 @@ private void createCsvFiles() {
proteinIndexingTime = (System.currentTimeMillis() - start) / 1000;
logger.info("Protein indexing done in {} s", proteinIndexingTime);

// Gene panels support
if (Paths.get(inputPath + "/" + Neo4jCsvImporter.PANEL_DIRNAME).toFile().exists()) {
logger.info("Starting gene panels processing...");
start = System.currentTimeMillis();
importer.addGenePanels(Paths.get(inputPath + "/" + Neo4jCsvImporter.PANEL_DIRNAME), outputPath);
genePanelsTime = (System.currentTimeMillis() - start) / 1000;
logger.info("Gene panels processing done in {} s", genePanelsTime);
}


// Indexing miRNA
if (Paths.get(inputPath + "/" + Neo4jCsvImporter.MIRNA_FILENAME).toFile().exists()) {
logger.info("Starting miRNA indexing...");
Expand Down Expand Up @@ -153,6 +164,7 @@ private void createCsvFiles() {

logger.info("Gene indexing in {} s", geneIndexingTime);
logger.info("Protein indexing in {} s", proteinIndexingTime);
logger.info("Gene panels processing in {} s", genePanelsTime);
logger.info("miRNA indexing in {} s", miRnaIndexingTime);
logger.info("BioPAX processing in {} s", bioPaxTime);
logger.info("Variant processing in {} s", variantTime);
Expand Down
4 changes: 4 additions & 0 deletions bionetdb-core/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,10 @@
<groupId>org.opencb.biodata</groupId>
<artifactId>biodata-formats</artifactId>
</dependency>
<dependency>
<groupId>org.opencb.biodata</groupId>
<artifactId>biodata-models</artifactId>
</dependency>
<dependency>
<groupId>org.opencb.commons</groupId>
<artifactId>commons-lib</artifactId>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -85,9 +85,7 @@ public enum Type {
EXON_OVERLAP("EXON_OVERLAP"),
PROTEIN_KEYWORD("PROTEIN_KEYWORD"),

PANEL("PANEL"),
DISEASE_GROUP("DISEASE_GROUP"),
DISEASE_SUBGROUP("DISEASE_SUBGROUP");
PANEL("PANEL");

private final String type;
private final String parentType;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -128,11 +128,12 @@ public enum Type {
PROTEIN__PROTEIN_FEATURE("PROTEIN__PROTEIN_FEATURE"),
SAMPLE__VARIANT_CALL("SAMPLE__VARIANT_CALL"),
VARIANT_FILE_INFO__FILE("VARIANT_FILE_INFO__FILE"),
VARIANT_CALL__VARIANT_FILE_INFO("VARIANT_CALL__VARIANT_FILE_INFO");
VARIANT_CALL__VARIANT_FILE_INFO("VARIANT_CALL__VARIANT_FILE_INFO"),

// DNA__GENE("DNA__GENE");

// PANEL__GENE("PANEL__GENE"),
PANEL__GENE("PANEL__GENE");

// DISEASE_GROUP__PANEL("DISEASE_GROUP__PANEL"),
// DISEASE_SUBGROUP__PANEL("DISEASE_SUBGROUP__PANEL"),
// DISEASE_SUBGROUP__DISEASE_GROUP("DISEASE_SUBGROUP__DISEASE_GROUP"),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -592,6 +592,11 @@ private Map<String, List<String>> createNodeAttributes() {
"source", "status");
nodeAttributes.put(Node.Type.GENE.toString(), new ArrayList<>(attrs));

// Panel
attrs = Arrays.asList("panelId", "id", "name", "description", "phenotypeNames", "sourceId", "sourceName",
"sourceAuthor", "sourceProject", "sourceVersion", "creationDate", "modificationDate");
nodeAttributes.put(Node.Type.PANEL.toString(), new ArrayList<>(attrs));

// Drug
attrs = Arrays.asList("drugId", "id", "name", "source", "type", "studyType");
nodeAttributes.put(Node.Type.DRUG.toString(), new ArrayList<>(attrs));
Expand Down
Original file line number Diff line number Diff line change
@@ -1,11 +1,13 @@
package org.opencb.bionetdb.core.utils;

import com.fasterxml.jackson.databind.ObjectMapper;
import com.fasterxml.jackson.databind.ObjectReader;
import org.apache.commons.lang.StringUtils;
import org.opencb.biodata.formats.protein.uniprot.v201504jaxb.DbReferenceType;
import org.opencb.biodata.formats.protein.uniprot.v201504jaxb.Entry;
import org.opencb.biodata.formats.protein.uniprot.v201504jaxb.FeatureType;
import org.opencb.biodata.formats.protein.uniprot.v201504jaxb.KeywordType;
import org.opencb.biodata.models.clinical.interpretation.DiseasePanel;
import org.opencb.biodata.models.core.Gene;
import org.opencb.biodata.models.core.Transcript;
import org.opencb.biodata.models.core.TranscriptTfbs;
Expand All @@ -21,17 +23,16 @@
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.BufferedReader;
import java.io.File;
import java.io.IOException;
import java.io.PrintWriter;
import java.io.*;
import java.nio.file.Path;
import java.util.*;

public class Neo4jCsvImporter {
public static final Object GENE_FILENAME = "genes.json";
public static final Object GENE_DBNAME = "genes.rocksdb";

public static final Object PANEL_DIRNAME = "panels";

public static final Object PROTEIN_FILENAME = "proteins.json";
public static final Object PROTEIN_DBNAME = "proteins.rocksdb";

Expand Down Expand Up @@ -675,6 +676,43 @@ public void indexingProteins(Path proteinPath, Path indexPath) throws IOExceptio
csv.indexingProteins(proteinPath, indexPath);
}

public void addGenePanels(Path panelPath, Path indexPath) throws IOException {
File[] panelFiles = panelPath.toFile().listFiles();

ObjectMapper mapper = new ObjectMapper();
ObjectReader reader = mapper.reader(DiseasePanel.class);

// Get CSV file writers
PrintWriter pwNode = csv.getCsvWriters().get(Node.Type.PANEL.toString());
PrintWriter pwRel = csv.getCsvWriters().get(Relation.Type.PANEL__GENE.toString());

for (File panelFile: panelFiles) {
if (panelFile.getName().endsWith("json")) {
FileInputStream fis = new FileInputStream(panelFile);
byte[] data = new byte[(int) panelFile.length()];
fis.read(data);
fis.close();

//String str = new String(data, "UTF-8");
DiseasePanel panel = reader.readValue(data);

// Create node and save CSV file
Node node = NodeBuilder.newNode(csv.getAndIncUid(), panel);
pwNode.println(csv.nodeLine(node));

for (DiseasePanel.GenePanel gene: panel.getGenes()) {
if (StringUtils.isNotEmpty(gene.getId())) {
Long geneUid = processGene(gene.getId(), gene.getName());
if (geneUid != null) {
// Add relation to CSV file
pwRel.println(node.getUid() + "," + geneUid);
}
}
}
}
}
}

public void indexingMiRnas(Path miRnaPath, Path indexPath, boolean toImport) throws IOException {
csv.indexingMiRnas(miRnaPath, indexPath);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -314,13 +314,13 @@ public Neo4jImporter() {
attrs = Arrays.asList("panelId", "id", "name", "author", "version", "date", "sourceProject", "sourceId", "sourceVersion");
nodeAttributes.put(Node.Type.PANEL.toString(), new ArrayList<>(attrs));

// Disease group
attrs = Arrays.asList("diseaseGroupId", "id", "name");
nodeAttributes.put(Node.Type.DISEASE_GROUP.toString(), new ArrayList<>(attrs));

// Disease subgroup
attrs = Arrays.asList("diseaseSubGroupId", "id", "name");
nodeAttributes.put(Node.Type.DISEASE_SUBGROUP.toString(), new ArrayList<>(attrs));
// // Disease group
// attrs = Arrays.asList("diseaseGroupId", "id", "name");
// nodeAttributes.put(Node.Type.DISEASE_GROUP.toString(), new ArrayList<>(attrs));
//
// // Disease subgroup
// attrs = Arrays.asList("diseaseSubGroupId", "id", "name");
// nodeAttributes.put(Node.Type.DISEASE_SUBGROUP.toString(), new ArrayList<>(attrs));

// Ontology
attrs = Arrays.asList("ontologyId", "id", "name", "source");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@

import org.apache.commons.lang.StringUtils;
import org.opencb.biodata.formats.protein.uniprot.v201504jaxb.*;
import org.opencb.biodata.models.clinical.interpretation.DiseasePanel;
import org.opencb.biodata.models.commons.Phenotype;
import org.opencb.biodata.models.core.Gene;
import org.opencb.biodata.models.core.Transcript;
import org.opencb.biodata.models.core.TranscriptTfbs;
Expand Down Expand Up @@ -299,24 +301,33 @@ public static Node newNode(long uid, DbReferenceType xref) {
node.addAttribute("dbName", xref.getType());
return node;
}
/*
public static Node newNode(long uid, Panel panel) {

public static Node newNode(long uid, DiseasePanel panel) {
Node node = new Node(uid, panel.getId(), panel.getName(), Node.Type.PANEL);
node.addAttribute("author", panel.getAuthor());
node.addAttribute("version", panel.getVersion());
node.addAttribute("date", panel.getDate());
node.addAttribute("description", panel.getDescription());
node.addAttribute("creationDate", panel.getCreationDate());
node.addAttribute("modificationDate", panel.getModificationDate());
if (ListUtils.isNotEmpty(panel.getPhenotypes())) {
StringBuilder sb = new StringBuilder();
for (Phenotype phenotype: panel.getPhenotypes()) {
if (StringUtils.isNotEmpty(phenotype.getName())) {
if (sb.length() > 0) {
sb.append("--");
}
sb.append(phenotype.getName());
}
}
if (sb.length() > 0) {
node.addAttribute("phenotypeNames", sb.toString());
}
}
if (panel.getSource() != null) {
node.addAttribute("sourceProject", panel.getSource().getProject());
node.addAttribute("sourceId", panel.getSource().getId());
node.addAttribute("sourceName", panel.getSource().getId());
node.addAttribute("sourceAuthor", panel.getSource().getAuthor());
node.addAttribute("sourceProject", panel.getSource().getProject());
node.addAttribute("sourceVersion", panel.getSource().getVersion());
}
return node;
}
public static Node newNode(long uid, OntologyTerm term) {
Node node = new Node(uid, term.getId(), term.getName(), Node.Type.ONTOLOGY);
node.addAttribute("source", term.getSource());
return node;
}
*/
}
2 changes: 1 addition & 1 deletion pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
<bionetdb.version>0.2.0-SNAPSHOT</bionetdb.version>
<java.version>1.8</java.version>
<java-common-libs.version>3.7.0-SNAPSHOT</java-common-libs.version>
<biodata.version>1.4.0-SNAPSHOT</biodata.version>
<biodata.version>1.4.2-SNAPSHOT</biodata.version>
<cellbase.version>4.6.0-SNAPSHOT</cellbase.version>
<jackson.version>2.8.10</jackson.version>
<jersey.version>2.23</jersey.version>
Expand Down

0 comments on commit 158bdc6

Please sign in to comment.