diff --git a/.DS_Store b/.DS_Store
new file mode 100644
index 0000000..70a550b
Binary files /dev/null and b/.DS_Store differ
diff --git a/build.xml b/build.xml
new file mode 100644
index 0000000..1af8d10
--- /dev/null
+++ b/build.xml
@@ -0,0 +1,74 @@
+
+
+
+
+
+
+
+
+
+
+ Builds, tests, and runs the project ExpressionTable.
+
+
+
diff --git a/build/built-jar.properties b/build/built-jar.properties
new file mode 100644
index 0000000..23598fd
--- /dev/null
+++ b/build/built-jar.properties
@@ -0,0 +1,12 @@
+#Thu, 20 Jun 2013 16:45:22 +0400
+
+
+/Users/dashazhernakova/Documents/NetBeansProjects/ExpressionTable=
+
+/Users/dashazhernakova/Documents/NetBeansProjects/Correlation=
+
+/Users/dashazhernakova/Documents/NetBeansProjects/GeneticaLibraries=
+
+/Users/dashazhernakova/Documents/NetBeansProjects/eqtlmappingpipeline=
+
+/Users/dashazhernakova/Documents/NetBeansProjects/processTmap=
diff --git a/build/classes/expressiontable/Coexpression.class b/build/classes/expressiontable/Coexpression.class
new file mode 100644
index 0000000..fb5772e
Binary files /dev/null and b/build/classes/expressiontable/Coexpression.class differ
diff --git a/build/classes/expressiontable/ExpressionTable.class b/build/classes/expressiontable/ExpressionTable.class
new file mode 100644
index 0000000..9e65a70
Binary files /dev/null and b/build/classes/expressiontable/ExpressionTable.class differ
diff --git a/build/classes/expressiontable/Joiner.class b/build/classes/expressiontable/Joiner.class
new file mode 100644
index 0000000..0b03a44
Binary files /dev/null and b/build/classes/expressiontable/Joiner.class differ
diff --git a/build/classes/expressiontable/Normalizer.class b/build/classes/expressiontable/Normalizer.class
new file mode 100644
index 0000000..7606bcf
Binary files /dev/null and b/build/classes/expressiontable/Normalizer.class differ
diff --git a/build/classes/expressiontable/ProbeToGeneConverter.class b/build/classes/expressiontable/ProbeToGeneConverter.class
new file mode 100644
index 0000000..2daea92
Binary files /dev/null and b/build/classes/expressiontable/ProbeToGeneConverter.class differ
diff --git a/build/classes/expressiontable/Sorter$ValueComparator.class b/build/classes/expressiontable/Sorter$ValueComparator.class
new file mode 100644
index 0000000..88fb5fc
Binary files /dev/null and b/build/classes/expressiontable/Sorter$ValueComparator.class differ
diff --git a/build/classes/expressiontable/Sorter.class b/build/classes/expressiontable/Sorter.class
new file mode 100644
index 0000000..cbb43be
Binary files /dev/null and b/build/classes/expressiontable/Sorter.class differ
diff --git a/build/classes/expressiontable/Subtable.class b/build/classes/expressiontable/Subtable.class
new file mode 100644
index 0000000..e1434f8
Binary files /dev/null and b/build/classes/expressiontable/Subtable.class differ
diff --git a/dist/ExpressionTable.jar b/dist/ExpressionTable.jar
new file mode 100644
index 0000000..9247a52
Binary files /dev/null and b/dist/ExpressionTable.jar differ
diff --git a/dist/README.TXT b/dist/README.TXT
new file mode 100644
index 0000000..e9e3676
--- /dev/null
+++ b/dist/README.TXT
@@ -0,0 +1,32 @@
+========================
+BUILD OUTPUT DESCRIPTION
+========================
+
+When you build an Java application project that has a main class, the IDE
+automatically copies all of the JAR
+files on the projects classpath to your projects dist/lib folder. The IDE
+also adds each of the JAR files to the Class-Path element in the application
+JAR files manifest file (MANIFEST.MF).
+
+To run the project from the command line, go to the dist folder and
+type the following:
+
+java -jar "ExpressionTable.jar"
+
+To distribute this project, zip up the dist folder (including the lib folder)
+and distribute the ZIP file.
+
+Notes:
+
+* If two JAR files on the project classpath have the same name, only the first
+JAR file is copied to the lib folder.
+* Only JAR files are copied to the lib folder.
+If the classpath contains other types of files or folders, these files (folders)
+are not copied.
+* If a library on the projects classpath also has a Class-Path element
+specified in the manifest,the content of the Class-Path element has to be on
+the projects runtime path.
+* To set a main class in a standard Java project, right-click the project node
+in the Projects window and choose Properties. Then click Run and enter the
+class name in the Main Class field. Alternatively, you can manually type the
+class name in the manifest Main-Class element.
diff --git a/dist/lib/Correlation.jar b/dist/lib/Correlation.jar
new file mode 100644
index 0000000..33e0caf
Binary files /dev/null and b/dist/lib/Correlation.jar differ
diff --git a/dist/lib/GeneticaLibraries.jar b/dist/lib/GeneticaLibraries.jar
new file mode 100644
index 0000000..78afa14
Binary files /dev/null and b/dist/lib/GeneticaLibraries.jar differ
diff --git a/dist/lib/colt.jar b/dist/lib/colt.jar
new file mode 100644
index 0000000..a7192f6
Binary files /dev/null and b/dist/lib/colt.jar differ
diff --git a/dist/lib/commons-math-2.1.jar b/dist/lib/commons-math-2.1.jar
new file mode 100644
index 0000000..43b4b36
Binary files /dev/null and b/dist/lib/commons-math-2.1.jar differ
diff --git a/dist/lib/eQTLMappingPipeline.jar b/dist/lib/eQTLMappingPipeline.jar
new file mode 100644
index 0000000..99708aa
Binary files /dev/null and b/dist/lib/eQTLMappingPipeline.jar differ
diff --git a/dist/lib/jsc.jar b/dist/lib/jsc.jar
new file mode 100644
index 0000000..88d2af0
Binary files /dev/null and b/dist/lib/jsc.jar differ
diff --git a/dist/lib/jscicore.jar b/dist/lib/jscicore.jar
new file mode 100644
index 0000000..2fdca39
Binary files /dev/null and b/dist/lib/jscicore.jar differ
diff --git a/manifest.mf b/manifest.mf
new file mode 100644
index 0000000..328e8e5
--- /dev/null
+++ b/manifest.mf
@@ -0,0 +1,3 @@
+Manifest-Version: 1.0
+X-COMMENT: Main-Class will be added automatically by build
+
diff --git a/nbproject/build-impl.xml b/nbproject/build-impl.xml
new file mode 100644
index 0000000..404c3b0
--- /dev/null
+++ b/nbproject/build-impl.xml
@@ -0,0 +1,1453 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Must set src.dir
+ Must set test.src.dir
+ Must set build.dir
+ Must set dist.dir
+ Must set build.classes.dir
+ Must set dist.javadoc.dir
+ Must set build.test.classes.dir
+ Must set build.test.results.dir
+ Must set build.classes.excludes
+ Must set dist.jar
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Must set javac.includes
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ No tests executed.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Must set JVM to use for profiling in profiler.info.jvm
+ Must set profiler agent JVM arguments in profiler.info.jvmargs.agent
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Must select some files in the IDE or set javac.includes
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ To run this application from the command line without Ant, try:
+
+
+
+
+
+
+ java -cp "${run.classpath.with.dist.jar}" ${main.class}
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ To run this application from the command line without Ant, try:
+
+ java -jar "${dist.jar.resolved}"
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Must select one file in the IDE or set run.class
+
+
+
+ Must select one file in the IDE or set run.class
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Must select one file in the IDE or set debug.class
+
+
+
+
+ Must select one file in the IDE or set debug.class
+
+
+
+
+ Must set fix.includes
+
+
+
+
+
+
+
+
+
+ This target only works when run from inside the NetBeans IDE.
+
+
+
+
+
+
+
+
+ Must select one file in the IDE or set profile.class
+ This target only works when run from inside the NetBeans IDE.
+
+
+
+
+
+
+
+
+ This target only works when run from inside the NetBeans IDE.
+
+
+
+
+
+
+
+
+
+
+
+
+ This target only works when run from inside the NetBeans IDE.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Must select one file in the IDE or set run.class
+
+
+
+
+
+ Must select some files in the IDE or set test.includes
+
+
+
+
+ Must select one file in the IDE or set run.class
+
+
+
+
+ Must select one file in the IDE or set applet.url
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Must select some files in the IDE or set javac.includes
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Some tests failed; see details above.
+
+
+
+
+
+
+
+
+ Must select some files in the IDE or set test.includes
+
+
+
+ Some tests failed; see details above.
+
+
+
+ Must select some files in the IDE or set test.class
+ Must select some method in the IDE or set test.method
+
+
+
+ Some tests failed; see details above.
+
+
+
+
+ Must select one file in the IDE or set test.class
+
+
+
+ Must select one file in the IDE or set test.class
+ Must select some method in the IDE or set test.method
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Must select one file in the IDE or set applet.url
+
+
+
+
+
+
+
+
+ Must select one file in the IDE or set applet.url
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/nbproject/genfiles.properties b/nbproject/genfiles.properties
new file mode 100644
index 0000000..15fcb32
--- /dev/null
+++ b/nbproject/genfiles.properties
@@ -0,0 +1,8 @@
+build.xml.data.CRC32=b6a6e4b5
+build.xml.script.CRC32=6d71407f
+build.xml.stylesheet.CRC32=28e38971@1.44.1.45
+# This file is used by a NetBeans-based IDE to track changes in generated files such as build-impl.xml.
+# Do not edit this file. You may delete it but then the IDE will never regenerate such files for you.
+nbproject/build-impl.xml.data.CRC32=b6a6e4b5
+nbproject/build-impl.xml.script.CRC32=0e5d9718
+nbproject/build-impl.xml.stylesheet.CRC32=c6d2a60f@1.56.0.46
diff --git a/nbproject/private/private.properties b/nbproject/private/private.properties
new file mode 100644
index 0000000..b451a9b
--- /dev/null
+++ b/nbproject/private/private.properties
@@ -0,0 +1,2 @@
+compile.on.save=true
+user.properties.file=/Users/dashazhernakova/Library/Application Support/NetBeans/7.2.1/build.properties
diff --git a/nbproject/private/private.xml b/nbproject/private/private.xml
new file mode 100644
index 0000000..8505fc1
--- /dev/null
+++ b/nbproject/private/private.xml
@@ -0,0 +1,5 @@
+
+
+
+
+
diff --git a/nbproject/private/profiler/configurations.xml b/nbproject/private/profiler/configurations.xml
new file mode 100644
index 0000000..b01b9a1
--- /dev/null
+++ b/nbproject/private/profiler/configurations.xml
@@ -0,0 +1,110 @@
+
+
+
+1000
+false
+profiler.simple.filter
+false
+
+8
+true
+
+false
+0
+false
+true
+1
+false
+false
+false
+profiler.simple.filter
+32
+false
+1
+true
+3
+10
+1
+true
+Analyze Memory
+false
+1
+true
+10
+0
+profiler.simple.filter
+0
+false
+true
+
+
+1
+true
+
+
+false
+false
+true
+false
+false
+32
+Quick filter...
+0
+false
+0
+{$project.classes.only}
+10
+0
+true
+true
+
+true
+10
+
+1000
+0
+profiler.simple.filter
+false
+Analyze Performance
+
+1
+0
+
+0
+false
+profiler.simple.filter
+Quick filter...
+true
+false
+0
+true
+
+2
+32
+
+0
+false
+Profile only project classes
+0
+0
+profiler.simple.filter
+true
+1
+false
+10
+false
+10
+false
+true
+true
+false
+Quick filter...
+0
+false
+
+2
+Monitor Application
+1000
+true
+true
+
diff --git a/nbproject/project.properties b/nbproject/project.properties
new file mode 100644
index 0000000..383121d
--- /dev/null
+++ b/nbproject/project.properties
@@ -0,0 +1,88 @@
+annotation.processing.enabled=true
+annotation.processing.enabled.in.editor=false
+annotation.processing.processor.options=
+annotation.processing.processors.list=
+annotation.processing.run.all.processors=true
+annotation.processing.source.output=${build.generated.sources.dir}/ap-source-output
+build.classes.dir=${build.dir}/classes
+build.classes.excludes=**/*.java,**/*.form
+# This directory is removed when the project is cleaned:
+build.dir=build
+build.generated.dir=${build.dir}/generated
+build.generated.sources.dir=${build.dir}/generated-sources
+# Only compile against the classpath explicitly listed here:
+build.sysclasspath=ignore
+build.test.classes.dir=${build.dir}/test/classes
+build.test.results.dir=${build.dir}/test/results
+# Uncomment to specify the preferred debugger connection transport:
+#debug.transport=dt_socket
+debug.classpath=\
+ ${run.classpath}
+debug.test.classpath=\
+ ${run.test.classpath}
+# This directory is removed when the project is cleaned:
+dist.dir=dist
+dist.jar=${dist.dir}/ExpressionTable.jar
+dist.javadoc.dir=${dist.dir}/javadoc
+excludes=
+file.reference.colt.jar=/Users/dashazhernakova/lib/colt.jar
+file.reference.commons-math-2.1.jar=/Users/dashazhernakova/lib/commons-math-2.1.jar
+file.reference.jsc.jar=/Users/dashazhernakova/lib/jsc.jar
+file.reference.jscicore.jar=/Users/dashazhernakova/lib/jscicore.jar
+includes=**
+jar.compress=false
+javac.classpath=\
+ ${reference.GeneticaLibraries.jar}:\
+ ${reference.Correlation.jar}:\
+ ${reference.eQTLMappingPipeline.jar}:\
+ ${file.reference.colt.jar}:\
+ ${file.reference.jsc.jar}:\
+ ${file.reference.jscicore.jar}:\
+ ${file.reference.commons-math-2.1.jar}
+# Space-separated list of extra javac options
+javac.compilerargs=
+javac.deprecation=false
+javac.processorpath=\
+ ${javac.classpath}
+javac.source=1.6
+javac.target=1.6
+javac.test.classpath=\
+ ${javac.classpath}:\
+ ${build.classes.dir}
+javac.test.processorpath=\
+ ${javac.test.classpath}
+javadoc.additionalparam=
+javadoc.author=false
+javadoc.encoding=${source.encoding}
+javadoc.noindex=false
+javadoc.nonavbar=false
+javadoc.notree=false
+javadoc.private=false
+javadoc.splitindex=true
+javadoc.use=true
+javadoc.version=false
+javadoc.windowtitle=
+main.class=expressiontable.ExpressionTable
+manifest.file=manifest.mf
+meta.inf.dir=${src.dir}/META-INF
+mkdist.disabled=false
+platform.active=default_platform
+project.Correlation=../Correlation
+project.eQTLMappingPipeline=../eqtlmappingpipeline
+project.GeneticaLibraries=../GeneticaLibraries
+reference.Correlation.jar=${project.Correlation}/dist/Correlation.jar
+reference.eQTLMappingPipeline.jar=${project.eQTLMappingPipeline}/dist/eQTLMappingPipeline.jar
+reference.GeneticaLibraries.jar=${project.GeneticaLibraries}/dist/GeneticaLibraries.jar
+run.classpath=\
+ ${javac.classpath}:\
+ ${build.classes.dir}
+# Space-separated list of JVM arguments used when running the project
+# (you may also define separate properties like run-sys-prop.name=value instead of -Dname=value
+# or test-sys-prop.name=value to set system properties for unit tests):
+run.jvmargs=
+run.test.classpath=\
+ ${javac.test.classpath}:\
+ ${build.test.classes.dir}
+source.encoding=UTF-8
+src.dir=src
+test.src.dir=test
diff --git a/nbproject/project.xml b/nbproject/project.xml
new file mode 100644
index 0000000..fe206b8
--- /dev/null
+++ b/nbproject/project.xml
@@ -0,0 +1,41 @@
+
+
+ org.netbeans.modules.java.j2seproject
+
+
+ ExpressionTable
+
+
+
+
+
+
+
+
+
+ Correlation
+ jar
+
+ jar
+ clean
+ jar
+
+
+ GeneticaLibraries
+ jar
+
+ jar
+ clean
+ jar
+
+
+ eQTLMappingPipeline
+ jar
+
+ jar
+ clean
+ jar
+
+
+
+
diff --git a/src/expressiontable/Coexpression.java b/src/expressiontable/Coexpression.java
new file mode 100644
index 0000000..2b99d82
--- /dev/null
+++ b/src/expressiontable/Coexpression.java
@@ -0,0 +1,43 @@
+
+package expressiontable;
+
+import java.io.IOException;
+import java.util.HashMap;
+import org.apache.commons.math.stat.correlation.SpearmansCorrelation;
+import umcg.genetica.io.ExpressionDataset;
+import umcg.genetica.io.text.TextFile;
+import umcg.genetica.math.matrix.DoubleMatrixDataset;
+
+/**
+ *
+ * @author dashazhernakova
+ */
+public class Coexpression {
+
+ public void calculateCoexpression(String fname, String out_fname) throws IOException{
+ TextFile out = new TextFile(out_fname, true);
+ DoubleMatrixDataset dataset = new DoubleMatrixDataset(fname);
+ double[][] rawData = dataset.getRawData();
+ dataset.recalculateHashMaps();
+ HashMap hashProbes = new HashMap(dataset.hashRows);//probes to indices in rawData
+ double[] pr1_expr, pr2_expr;
+ double cor;
+
+ String probe1 = "7_50472431";
+ for (String probe2 : hashProbes.keySet()){
+ if (! probe1.equals(probe2)){
+ pr1_expr = rawData[hashProbes.get(probe1)];
+ pr2_expr = rawData[hashProbes.get(probe2)];
+ cor = new SpearmansCorrelation().correlation(pr1_expr, pr2_expr);
+ out.writeln(probe1 + "\t" + probe2 + "\t" + cor);
+ }
+ }
+ //}
+ out.close();
+ }
+ public static void main(String[] args) throws IOException {
+ Coexpression c = new Coexpression();
+ c.calculateCoexpression("/Users/dashazhernakova/Documents/UMCG/GeneticalGenomicsDatasets/new/deepSAGE_tag/tagwise_expression_table_SNP_in_recognition_sequence_tags_excluded.txt.QuantileNormalized.Log2Transformed.ProbesCentered.SamplesZTransformed.txt.gz",
+ "/Users/dashazhernakova/Documents/UMCG/GeneticalGenomicsDatasets/new/deepSAGE_tag/coexpression_noPCA_7_50472431");
+ }
+}
diff --git a/src/expressiontable/ExpressionTable.java b/src/expressiontable/ExpressionTable.java
new file mode 100644
index 0000000..8cc9f75
--- /dev/null
+++ b/src/expressiontable/ExpressionTable.java
@@ -0,0 +1,181 @@
+/*
+ * To change this template, choose Tools | Templates
+ * and open the template in the editor.
+ */
+package expressiontable;
+
+import java.io.IOException;
+//import eqtlmappingpipeline.normalization.Normalizer;
+/**
+ *
+ * @author dashazhernakova
+ */
+public class ExpressionTable {
+
+ /**
+ * @param args the command line arguments
+ */
+ public static void usage(){
+ System.out.println("--mode\n\t"
+ + "ProbeToGeneConverter\n\t"
+ + "getExpressedInAllSamples\n\t"
+ + "getTopExpressed\n\t"
+ + "sort\n\t"
+ + "normalize");
+ }
+ public static void main(String[] args) throws IOException {
+ String lincRNA = "/Users/dashazhernakova/Documents/UMCG/lincRNA/annotation_lincRNA_hg19_toGenes.txt",
+ transcr = "/Users/dashazhernakova/Documents/UMCG/hg19/annotation_transcr_hg19.txt";
+ Subtable sub = new Subtable();
+ Sorter sorter = new Sorter();
+ Normalizer norm = new Normalizer();
+ //Normalizer norm = new Normalizer();
+
+ String arg, val, in = null, mode = null, out = null;
+
+ int i = 0;
+ for (i = 0; i < args.length; i++) {
+ arg = args[i];
+ val = null;
+
+ if (i + 1 < args.length) {
+ val = args[i + 1];
+ }
+
+ if (arg.equals("--mode")) {
+ mode = val;
+ //System.out.println("mode");
+ break;
+ }
+
+ }
+ if (mode == null) {
+ System.out.println("ERROR: Please supply --mode");
+ usage();
+ }
+ else if (mode.equals("ProbeToGeneConverter")){
+ String annot = null;
+ boolean unique = false;
+ for (int j = i; j < args.length; j++){
+ arg = args[j];
+ val = null;
+
+ if (j + 1 < args.length) {
+ val = args[j + 1];
+ }
+ if (arg.equals("--in"))
+ in = val;
+ if (arg.equals("--annot")) {
+ annot = val;
+ }
+ if (arg.equals("--out"))
+ out = val;
+ if (arg.equals("--unique"))
+ unique = Boolean.valueOf(val);
+ }
+ if (out == null)
+ out = in.replaceAll("(\\.gz)?(\\.txt)?$", "") + ".genes.txt";
+ if ( (in == null ) || (annot == null ))
+ System.out.println("Not enough arguments!!!");
+ System.out.println("Converting to gene ids: \n\texpression table " + in + "\n\tunique " + unique + "\n\tannotation " + annot);
+ ProbeToGeneConverter converter = new ProbeToGeneConverter(annot);
+ converter.convertProbesToGenesAvg(in, out, unique);
+ }
+ else if (mode.equals("getExpressedInAllSamples")){
+
+ for (int j = i; j < args.length; j++){
+ arg = args[j];
+ val = null;
+
+ if (j + 1 < args.length) {
+ val = args[j + 1];
+ }
+ if (arg.equals("--in"))
+ in = val;
+ if (arg.equals("--out"))
+ out = val;
+
+ }
+ if (out == null)
+ out = in.replaceAll("(\\.gz)?(\\.txt)?$", "") + ".expressedInAllSamples.txt";
+ System.out.println("\nGetting probes expressed in all samples from " + in);
+ sub.getExpressedInAllSamples(in, out);
+ }
+ else if (mode.equals("getTopExpressed")){
+ int n = 0;
+ for (int j = i; j < args.length; j++){
+ arg = args[j];
+ val = null;
+
+ if (j + 1 < args.length) {
+ val = args[j + 1];
+ }
+ if (arg.equals("--in"))
+ in = val;
+ if (arg.equals("--out"))
+ out = val;
+ if (arg.equals("--n"))
+ n = Integer.parseInt(val);
+ }
+ if (out == null)
+ out = in.replaceAll("(\\.gz)?(\\.txt)?$", "") + ".top" + n;
+ System.out.println("Getting top " + n + " expressed genes/transcripts... from " + in);
+ sub.getMostExpressed(in, out, n);
+ }
+
+ else if (mode.equals("sort")){
+ String by = null;
+ for (int j = i; j < args.length; j++){
+ arg = args[j];
+ val = null;
+
+ if (j + 1 < args.length) {
+ val = args[j + 1];
+ }
+ if (arg.equals("--in"))
+ in = val;
+ if (arg.equals("--out"))
+ out = val;
+ if (arg.equals("--by"))
+ by = val;
+ }
+ if (out == null)
+ out = in + ".sorted";
+ if (by.equals("name")){
+ System.out.println("Sorting " + in + " by probe name...");
+ if (out == null)
+ out = in.replaceAll("(\\.gz)?(\\.txt)?$", "") + ".sortedByName";
+ sorter.sortByProbeName(in, out);
+
+ }
+ else if (by.equals("expression")){
+ System.out.println("Sorting " + in + " by average expression...");
+ if (out == null)
+ out = in.replaceAll("(\\.gz)?(\\.txt)?$", "") + ".sortedByExpr";
+ sorter.sortByAvgExpression(in, out);
+ }
+ else
+ System.out.println("Wrong \"by\" parameter");
+ }
+ else if (mode.equals("normalize")){
+
+ for (int j = i; j < args.length; j++){
+ arg = args[j];
+ val = null;
+
+ if (j + 1 < args.length) {
+ val = args[j + 1];
+ }
+ if (arg.equals("--in"))
+ in = val;
+ }
+ System.out.println("Normalizing " + in);
+ norm.normalize(in);
+ }
+ else{
+ System.out.println("Wrong mode!");
+ usage();
+ }
+
+ }
+}
diff --git a/src/expressiontable/Joiner.java b/src/expressiontable/Joiner.java
new file mode 100644
index 0000000..f6cd6cb
--- /dev/null
+++ b/src/expressiontable/Joiner.java
@@ -0,0 +1,72 @@
+package expressiontable;
+
+import java.io.IOException;
+import java.util.HashSet;
+import umcg.genetica.io.text.TextFile;
+import umcg.genetica.math.matrix.DoubleMatrixDataset;
+
+/**
+ *
+ * @author dashazhernakova
+ */
+public class Joiner {
+ DoubleMatrixDataset table1;
+ DoubleMatrixDataset table2;
+ public Joiner(String f1, String f2) throws IOException{
+ table1 = new DoubleMatrixDataset(f1);
+ table2 = new DoubleMatrixDataset(f2);
+ }
+ public void addNewProbes(String outFileName) throws IOException{
+ TextFile out = new TextFile(outFileName, true);
+ HashSet newProbes = new HashSet();
+
+ //looking for probes from table2 not present in table1
+ for (String probe : table2.rowObjects){
+ if (! table1.rowObjects.contains(probe))
+ newProbes.add(probe);
+ }
+ out.close();
+ }
+
+ public void appendSamples(String outFileName) throws IOException{
+ TextFile out = new TextFile(outFileName, true);
+ int lineN1 = 0, lineN2 = 0;
+ //header
+ for (String id : table1.colObjects)
+ out.write("\t" + id);
+ for (String id : table2.colObjects)
+ out.write("\t" + id);
+ out.writeln();
+ //probes+expression
+ for (String probe : table1.rowObjects){
+ if (table2.rowObjects.contains(probe)){
+ out.write(probe);
+ lineN1 = table1.hashRows.get(probe);
+ lineN2 = table2.hashRows.get(probe);
+ for (int i = 0; i < table1.nrCols;i++){
+ out.write("\t" + table1.rawData[lineN1][i]);
+ }
+ for (int i = 0; i < table2.nrCols;i++){
+ out.write("\t" + table2.rawData[lineN2][i]);
+ }
+ out.writeln();
+ }
+
+ }
+
+ out.close();
+ }
+
+ public void merge(){
+
+ }
+ public static void main(String[] args) throws IOException {
+ /*Joiner j = new Joiner("/Users/dashazhernakova/Documents/UMCG/GeneticalGenomicsDatasets/lincRNA_Montgomery/expression_table_all.txt.expressedInAllSamples.txt.200genes.sortedByName.txt.QuantileNormalized.Log2Transformed.txt",
+ "/Users/dashazhernakova/Documents/UMCG/GeneticalGenomicsDatasets/lincRNA_Yale+Argonne/expression_table_all_yale+argonne.txt.expressedInAllSamples.txt.genes.txt.QuantileNormalized.Log2Transformed.txt");
+
+ j.appendSamples("/Users/dashazhernakova/Documents/UMCG/GeneticalGenomicsDatasets/lincRNA_Montgomery/Montgomery+Pickrell.genes.txt.QuantileNormalized.Log2Transformed.txt");
+ *
+ */
+ System.out.println("tfd/sdfgs/sdfgs.txt/fsdf.gz".replaceAll("(\\.gz)?(\\.txt)?$", ""));
+ }
+}
diff --git a/src/expressiontable/Normalizer.java b/src/expressiontable/Normalizer.java
new file mode 100644
index 0000000..120b01a
--- /dev/null
+++ b/src/expressiontable/Normalizer.java
@@ -0,0 +1,85 @@
+package expressiontable;
+
+import java.io.IOException;
+import umcg.genetica.io.ExpressionDataset;
+import umcg.genetica.math.matrix.DoubleMatrixDataset;
+import umcg.genetica.math.stats.Descriptives;
+import umcg.genetica.math.stats.Log2Transform;
+import umcg.genetica.math.stats.QuantileNormalization;
+
+/**
+ *
+ * @author dashazhernakova
+ */
+public class Normalizer {
+ public void normalize(String expressionFile) throws IOException{
+ DoubleMatrixDataset dataset = new DoubleMatrixDataset(expressionFile);
+ double[][] rawData = dataset.getRawData();
+ String fileNamePrefix = expressionFile;
+
+
+ QuantileNormalization.quantilenormalize(rawData);
+//
+ DoubleMatrixDataset datasetNormalized = new DoubleMatrixDataset (dataset.nrRows, dataset.nrCols);
+
+ datasetNormalized.rowObjects = dataset.rowObjects;
+ datasetNormalized.colObjects = dataset.colObjects;
+ datasetNormalized.setRawData(rawData);
+ fileNamePrefix += ".QuantileNormalized";
+ datasetNormalized.save(fileNamePrefix + ".txt.gz");
+ datasetNormalized = null;
+
+
+ Log2Transform.log2transform(rawData);
+
+ datasetNormalized = new DoubleMatrixDataset(dataset.nrRows, dataset.nrCols);
+ datasetNormalized.rowObjects = dataset.rowObjects;
+ datasetNormalized.colObjects = dataset.colObjects;
+ datasetNormalized.setRawData(rawData);
+ fileNamePrefix += ".Log2Transformed";
+ datasetNormalized.save(fileNamePrefix + ".txt.gz");
+ datasetNormalized = null;
+
+ System.out.println("Standardizing probe mean and standard deviation");
+ for (int p = 0; p < dataset.nrRows; p++) {
+ double mean = Descriptives.mean(rawData[p]);
+ double stdev = Math.sqrt(Descriptives.variance(rawData[p], mean));
+ for (int s = 0; s < dataset.nrCols; s++) {
+ rawData[p][s] -= mean;
+ }
+ }
+
+ dataset.setRawData(rawData);
+ fileNamePrefix += ".ProbesCentered";
+ dataset.save(fileNamePrefix + ".txt.gz");
+
+ System.out.println("- Standardizing sample mean and standard deviation");
+ for (int s = 0; s < dataset.nrCols; s++) {
+ double[] vals = new double[dataset.nrRows];
+ for (int p = 0; p < dataset.nrRows; p++) {
+ vals[p] = dataset.getRawData()[p][s];
+ }
+ double mean = Descriptives.mean(vals);
+ for (int p = 0; p < dataset.nrRows; p++) {
+ vals[p] -= mean;
+ }
+ double var = Descriptives.variance(vals, mean);
+ double stdev = Math.sqrt(var);
+ for (int p = 0; p < dataset.nrRows; p++) {
+ dataset.getRawData()[p][s] = (vals[p] / stdev);
+ }
+ }
+
+ datasetNormalized = new DoubleMatrixDataset(dataset.nrRows, dataset.nrCols);
+ datasetNormalized.rowObjects = dataset.rowObjects;
+ datasetNormalized.colObjects = dataset.colObjects;
+ datasetNormalized.setRawData(rawData);
+ fileNamePrefix += ".SamplesZTransformed";
+ datasetNormalized.save(fileNamePrefix + ".txt.gz");
+ datasetNormalized = null;
+ }
+ public static void main(String[] args) throws IOException {
+ Normalizer n = new Normalizer();
+ n.normalize("/Users/dashazhernakova/Documents/UMCG/GeneticalGenomicsDatasets/lincRNA_Yale+Argonne/expression_table_all_yale+argonne.txt.expressedInAllSamples.txt.genes.txt");
+ }
+}
diff --git a/src/expressiontable/ProbeToGeneConverter.java b/src/expressiontable/ProbeToGeneConverter.java
new file mode 100644
index 0000000..13980aa
--- /dev/null
+++ b/src/expressiontable/ProbeToGeneConverter.java
@@ -0,0 +1,144 @@
+package expressiontable;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.Map.Entry;
+import java.util.TreeMap;
+import umcg.genetica.io.text.TextFile;
+
+/**
+ *
+ * @author dashazhernakova
+ */
+public class ProbeToGeneConverter {
+ HashMap probe2genes;
+ public ProbeToGeneConverter(String annotationFile) throws IOException{
+ TextFile annotation = new TextFile(annotationFile, false);
+ probe2genes = new HashMap();
+ String[] els = annotation.readLineElems(TextFile.tab);
+ while ((els = annotation.readLineElems(TextFile.tab)) != null)
+ probe2genes.put(els[1], els[2]);
+ annotation.close();
+
+ }
+
+ public ProbeToGeneConverter(){}
+
+ /*
+ * Converts one gene ids X to gene ids Y
+ * fname - path to the expression table
+ * conversionFname - path to the file of the type X \t Y
+ */
+ public void convertGeneIdsToGeneNames(String fname, String outFname, String conversionFname) throws IOException{
+ HashMap conversion = new HashMap();
+ TextFile conv = new TextFile(conversionFname, false);
+ String [] els;
+ while ((els = conv.readLineElems(TextFile.tab)) != null){
+ conversion.put(els[0], els[1]);
+ }
+ conv.close();
+
+ TextFile table = new TextFile(fname, false);
+ TextFile out = new TextFile(outFname, true);
+ out.writeln(table.readLine());
+ String line;
+ int pos = 0, neg = 0;
+ while ((line = table.readLine()) != null){
+ els = line.split("\t");
+ if (conversion.containsKey(els[0])){
+ els[0] = conversion.get(els[0]);
+ out.writelnTabDelimited(els);
+ pos++;
+ }
+ else
+ neg++;
+ }
+ System.out.println("Successfully converted " + pos + " genes\nNo alternative name found for " + neg + " genes.");
+ table.close();
+ out.close();
+ }
+
+ /*
+ * writes expression values averaged over all isoforms of a gene
+ * fname - expression table
+ * unique - write only genes with one isoform
+ */
+ public void convertProbesToGenesAvg(String fname, String outFname, boolean unique) throws IOException{
+ TextFile expr = new TextFile(fname, false);
+
+ TextFile out = new TextFile(outFname, true);
+ String[] spl;
+ String line= expr.readLine(), probe = null, gene = null;
+ out.writeln(line);
+ TreeMap> gene2lines = new TreeMap>();
+ TreeMap gene2avg = new TreeMap();
+ System.out.println("Converting only single isoform genes? " + unique);
+ int numProbes = 0;
+ while ((line = expr.readLine()) != null){
+ spl = line.split("\t");
+ probe = spl[0];
+ numProbes ++;
+ if (probe2genes.containsKey(probe)){
+ gene = probe2genes.get(probe);
+
+ ArrayList lines = new ArrayList();
+ if (gene2lines.containsKey(gene)) {
+ gene2lines.get(gene).add(line);
+ }
+ else{
+ lines.add(line);
+ gene2lines.put(gene, lines);
+ }
+ }
+ }
+ System.out.println("Overall number of probes processed: " + numProbes);
+ System.out.println("Overall number of resulting genes: " + gene2lines.keySet().size());
+
+ //Averaging and writing to file
+ int size;
+ String[] splLine;
+ float[] sum;
+ String avg; // average gene expression for each sample
+ for (Entry > e : gene2lines.entrySet()){
+ ArrayList lines = e.getValue();
+ gene = e.getKey();
+ size = lines.size();
+ sum = new float[lines.get(0).split("\t").length];
+ avg = "";
+ if ((size > 1) && (! unique)){ //if more than one isoform for this gene
+ for (String s : lines){
+ splLine = s.split("\t");
+ for (int i = 1; i < splLine.length; i++)
+ sum[i]+=Float.parseFloat(splLine[i]); //summing
+ }
+ out.write(gene);
+ for (int i = 1; i < sum.length; i++){
+ out.write("\t" + sum[i]/size); //averaging over isoform expression values for current sample
+ }
+ out.writeln();
+ //gene2avg.put(gene, avg);
+ }
+ else if (size == 1){ //if one isoform
+ out.write(gene);//average gene expression = isoform expression
+ splLine = lines.get(0).split("\t");
+ for (int i = 1; i < splLine.length; i++)
+ out.write("\t" + splLine[i]);
+ out.writeln();
+ }
+ }
+ expr.close();
+ out.close();
+ }
+ public static void main(String[] args) throws IOException {
+ /*ProbeToGeneConverter c = new ProbeToGeneConverter();
+
+ c.convertGeneIdsToGeneNames("/Users/dashazhernakova/Documents/UMCG/GeneticalGenomicsDatasets/new/Pickrell/genes/expression_table.Pickrell.genes.txt.gz.QuantileNormalized.Log2Transformed.txt",
+ "/Users/dashazhernakova/Documents/UMCG/GeneticalGenomicsDatasets/new/Pickrell/genes/expression_table.Pickrell.geneNames.txt.gz.QuantileNormalized.Log2Transformed.txt",
+ "/Users/dashazhernakova/Documents/UMCG/hg19/Ids_conversion/Ensembl_v69_geneId2gene.txt");
+ */
+ ProbeToGeneConverter c = new ProbeToGeneConverter("/Users/dashazhernakova/Documents/UMCG/hg19/annotations/annotation_tag_hg19.txt");
+ c.convertProbesToGenesAvg("/Users/dashazhernakova/Documents/UMCG/GeneticalGenomicsDatasets/new/deepSAGE_tag/randomSubsets/45samples/expression_table.deepSAGE_tag.45samples.1.txt.gz.QuantileNormalized.Log2Transformed.txt.gz",
+ "/Users/dashazhernakova/Documents/UMCG/GeneticalGenomicsDatasets/new/deepSAGE_tag/randomSubsets/45samples/expression_table.deepSAGE_tag.45samples.1.txt.gz.QuantileNormalized.Log2Transformed_genes.txt.gz", false);
+ }
+}
diff --git a/src/expressiontable/Sorter.java b/src/expressiontable/Sorter.java
new file mode 100644
index 0000000..3590d02
--- /dev/null
+++ b/src/expressiontable/Sorter.java
@@ -0,0 +1,100 @@
+package expressiontable;
+
+import java.io.IOException;
+import java.util.Comparator;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.Map.Entry;
+import java.util.TreeMap;
+import umcg.genetica.io.text.TextFile;
+import umcg.genetica.math.matrix.DoubleMatrixDataset;
+
+/**
+ *
+ * @author dashazhernakova
+ */
+public class Sorter {
+ public double calculateAvg(double[] array){
+ double avg = 0;
+ for (int i = 0 ; i < array.length; i++)
+ avg += array[i];
+ avg /= array.length;
+ return avg;
+ }
+
+ public void sortByAvgExpression(String fname, String outFname) throws IOException{
+ TextFile out = new TextFile(outFname, true);
+
+ //reading the expression table
+ DoubleMatrixDataset dataset = new DoubleMatrixDataset(fname);
+ dataset.recalculateHashMaps();
+ HashMap hashProbes = new HashMap(dataset.hashRows);//probes to indices in rawData
+
+ out.write("\t");
+ out.writelnTabDelimited(dataset.colObjects.toArray());
+
+ double[] line = null;
+ int lineNum = 0;
+ HashMap probeNumToAvg = new HashMap(); //probe indices in rawData to avg expresion
+ ValueComparator bvc = new ValueComparator(probeNumToAvg); //to sort by value (avg expression) rather than by key
+ TreeMap sorted_probeNumToAvg = new TreeMap(bvc); //probeNumToAvg sorted by avg expression
+
+ for ( Entry e : hashProbes.entrySet()){
+ lineNum = e.getValue(); //probe index in rawData
+ line = dataset.getRawData()[lineNum]; //probe expression
+ probeNumToAvg.put(lineNum, calculateAvg(line));
+ }
+ sorted_probeNumToAvg.putAll(probeNumToAvg);
+ for (Entry e : sorted_probeNumToAvg.entrySet()){
+ lineNum = e.getKey();
+ line = dataset.getRawData()[lineNum];
+ out.write(dataset.rowObjects.get(lineNum));
+ for (int i = 0; i < line.length; i++)
+ out.write("\t" + line[i]);
+ out.writeln();
+ }
+ out.close();
+ }
+
+ public void sortByProbeName(String fname, String outFname) throws IOException{
+ TextFile in = new TextFile(fname, false);
+ TextFile out = new TextFile(outFname, true);
+
+ String line = in.readLine(), probe;
+ out.write("\t");
+ out.writeln(line);
+
+ TreeMap probe2expr = new TreeMap();
+
+ while ( (line = in.readLine()) != null){
+ probe = line.split("\t")[0];
+ probe2expr.put(probe, line);
+ }
+
+ for (String pr : probe2expr.keySet()){
+ out.writeln(pr + "\t" + probe2expr.get(pr));
+ }
+ in.close();
+ out.close();
+ }
+
+ public class ValueComparator implements Comparator {
+
+ Map base;
+ public ValueComparator(Map base) {
+ this.base = base;
+ }
+
+
+ @Override
+ public int compare(Integer a, Integer b) {
+ return base.get(b).compareTo(base.get(a));
+ }
+ }
+ public static void main(String[] args) throws IOException {
+ Sorter s = new Sorter();
+ s.sortByAvgExpression("/Users/dashazhernakova/Documents/UMCG/GeneticalGenomicsDatasets/lincRNA_Sebo/expression_table_normByGeneLength.txt",
+ "/Users/dashazhernakova/Documents/UMCG/GeneticalGenomicsDatasets/lincRNA_Sebo/expression_table_normByGeneLength_sorted.txt");
+ }
+}
+
diff --git a/src/expressiontable/Subtable.java b/src/expressiontable/Subtable.java
new file mode 100644
index 0000000..eeec7ff
--- /dev/null
+++ b/src/expressiontable/Subtable.java
@@ -0,0 +1,224 @@
+package expressiontable;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map.Entry;
+import java.util.TreeMap;
+import umcg.genetica.io.ExpressionDataset;
+import umcg.genetica.io.text.TextFile;
+import umcg.genetica.math.matrix.DoubleMatrixDataset;
+
+
+/**
+ *
+ * @author dashazhernakova
+ */
+public class Subtable {
+
+ public double calculateAvg(double[] array){
+ double avg = 0;
+ for (int i = 0 ; i < array.length; i++)
+ avg += array[i];
+ avg /= array.length;
+ return avg;
+ }
+ public boolean isExpressedInAllSamples(double[] array){
+ for (int i = 0 ; i < array.length; i++){
+ if (array[i] == 0)
+ return false;
+ }
+ return true;
+ }
+ public boolean isExpressedInNSamples(double[] array, int N){
+ int n = 0;
+ for (int i = 0 ; i < array.length; i++){
+ if (array[i] > 0)
+ n++;
+ }
+ if (n >= N)
+ return true;
+ return false;
+ }
+ public boolean avgExpressionHigherThanThreshold(double[] array, double threshold){
+ double avg = 0;
+ for (int i = 0 ; i < array.length; i++)
+ avg += array[i];
+ avg /= array.length;
+ if (avg < threshold)
+ return false;
+ return true;
+ }
+
+ public void getRandomSubsetOfSamples(String fname, int n, String outFname) throws IOException{
+ //reading the expression table
+ DoubleMatrixDataset dataset = new DoubleMatrixDataset(fname);
+ dataset.recalculateHashMaps();
+ HashMap hashSamples = new HashMap(dataset.hashCols);//samples to indices in rawData
+
+ List samples = dataset.colObjects;
+ HashSet samplesToInclude = new HashSet(n);
+
+ Collections.shuffle(samples);
+ samplesToInclude.addAll(samples.subList(0, n));
+ for (String s : samplesToInclude)
+ System.out.println(s);
+ dataset = new DoubleMatrixDataset (fname, new HashSet(dataset.rowObjects), samplesToInclude);
+
+ dataset.recalculateHashMaps();
+ dataset.save(outFname);
+
+
+ }
+ public void getAvgExpression(String fname, String outFname) throws IOException{
+ TextFile out = new TextFile(outFname, true);
+
+ //reading the expression table
+ DoubleMatrixDataset dataset = new DoubleMatrixDataset(fname);
+ dataset.recalculateHashMaps();
+ HashMap hashProbes = new HashMap(dataset.hashRows);//probes to indices in rawData
+
+ //out.writeln("gene\tavg");
+ String probe = "";
+ double[] line = null;
+ int lineNum = 0;
+ for ( Entry e : hashProbes.entrySet()){
+ lineNum = e.getValue(); //probe index in rawData
+ probe = dataset.rowObjects.get(lineNum);
+ line = dataset.getRawData()[lineNum]; //probe expression
+ out.writeln(probe + "\t" + calculateAvg(line));
+ }
+ out.close();
+ }
+
+ /**
+ * Gets top N most expressed probes (N specified by numProbes)
+ * @param fname
+ * @param outFname
+ * @param numProbes
+ * @throws IOException
+ */
+ public void getMostExpressed(String fname, String outFname, int numProbes) throws IOException{
+ TextFile out = new TextFile(outFname, true);
+
+ //reading the expression table
+ DoubleMatrixDataset dataset = new DoubleMatrixDataset(fname);
+ dataset.recalculateHashMaps();
+ HashMap hashProbes = new HashMap(dataset.hashRows);//probes to indices in rawData
+
+ out.write("\t");
+ out.writelnTabDelimited(dataset.colObjects.toArray());
+
+ double[] line = null;
+ int lineNum = 0;
+ HashMap probeNumToAvg = new HashMap(); //probe indices in rawData to avg expresion
+ Sorter s = new Sorter();
+ Sorter.ValueComparator bvc = s.new ValueComparator(probeNumToAvg); //to sort by value (avg expression) rather than by key
+ TreeMap sorted_probeNumToAvg = new TreeMap(bvc); //probeNumToAvg sorted by avg expression
+
+ for ( Entry e : hashProbes.entrySet()){
+ lineNum = e.getValue(); //probe index in rawData
+ line = dataset.getRawData()[lineNum]; //probe expression
+ probeNumToAvg.put(lineNum, calculateAvg(line));
+ }
+ sorted_probeNumToAvg.putAll(probeNumToAvg);
+ for (Entry e : sorted_probeNumToAvg.entrySet()){
+ lineNum = e.getKey();
+ if (lineNum < numProbes){
+ line = dataset.getRawData()[lineNum];
+ out.write(dataset.rowObjects.get(lineNum));
+ for (int i = 0; i < line.length; i++)
+ out.write("\t" + line[i]);
+ out.writeln();
+ }
+ }
+ out.close();
+ }
+
+
+ public void getExpressedInAllSamples(String fname, String outFname) throws IOException{
+ TextFile out = new TextFile(outFname, true);
+
+ DoubleMatrixDataset dataset = new DoubleMatrixDataset(fname);
+ dataset.recalculateHashMaps();
+ HashMap hashProbes = new HashMap(dataset.hashRows);//probes to indices in rawData
+
+ out.write("\t");
+ out.writelnTabDelimited(dataset.colObjects.toArray());
+
+ int counter = 0;
+ double[] line = null;
+ for ( Entry e : hashProbes.entrySet()){
+ line = dataset.getRawData()[e.getValue()];
+ if (isExpressedInAllSamples(line)){
+ counter ++;
+ out.write(e.getKey());
+ for (int i = 0; i < line.length; i++)
+ out.write("\t" + line[i]);
+ out.writeln();
+ }
+ }
+ System.out.println("Number of probes expressed in all samples: " + counter);
+ out.close();
+ }
+
+ /**
+ * Writes all probes expressed in at least "percent" % samples
+ * @param fname
+ * @param outFname
+ * @param percent
+ * @throws IOException
+ */
+ public void getExpressedInNSamples(String fname, String outFname, int percent) throws IOException{
+ TextFile out = new TextFile(outFname, true);
+
+ DoubleMatrixDataset dataset = new DoubleMatrixDataset(fname);
+ dataset.recalculateHashMaps();
+ HashMap hashProbes = new HashMap(dataset.hashRows);//probes to indices in rawData
+
+ out.write("\t");
+ out.writelnTabDelimited(dataset.colObjects.toArray());
+
+ int minSamplesExpressed = dataset.nrCols*percent/100;
+ int counter = 0;
+ double[] line = null;
+ for ( Entry e : hashProbes.entrySet()){
+ line = dataset.getRawData()[e.getValue()];
+ if (isExpressedInNSamples(line, minSamplesExpressed)){
+ counter ++;
+ out.write(e.getKey());
+ for (int i = 0; i < line.length; i++)
+ out.write("\t" + line[i]);
+ out.writeln();
+ }
+ }
+ System.out.println("Number of probes expressed in " + percent + " % of samples: " + counter);
+ out.close();
+ }
+
+ public static void main(String[] args) throws IOException {
+ Subtable c = new Subtable();
+ /*c.getExpressedInAllSamples("/Users/dashazhernakova/Documents/UMCG/GeneticalGenomicsDatasets/tmp.txt",
+ "/Users/dashazhernakova/Documents/UMCG/GeneticalGenomicsDatasets/tmp2.txt");
+ Sorter s = new Sorter();
+ s.sortByAvgExpression("/Users/dashazhernakova/Documents/UMCG/GeneticalGenomicsDatasets/tmp.txt",
+ "/Users/dashazhernakova/Documents/UMCG/GeneticalGenomicsDatasets/tmp2.txt");
+ *
+ */
+ //c.getExpressedInAllSamples("/Users/dashazhernakova/Documents/UMCG/GeneticalGenomicsDatasets/Yale+Argonne/yale_argonne_expression_nonnorm_NONZERO.txt",
+ // "/Users/dashazhernakova/Documents/UMCG/GeneticalGenomicsDatasets/Yale+Argonne/yale_argonne_expression_nonnorm_NONZERO.txt.expressedInAllSamples");
+
+ //c.getAvgExpression("/Users/dashazhernakova/Documents/UMCG/GeneticalGenomicsDatasets/new/deepSAGE_tag/tagwise_expression_table_SNP_in_recognition_sequence_tags_excluded.txt",
+ // "/Users/dashazhernakova/Documents/UMCG/GeneticalGenomicsDatasets/new/deepSAGE_tag/avgExpression.txt");
+ c.getRandomSubsetOfSamples("/Users/dashazhernakova/Documents/UMCG/GeneticalGenomicsDatasets/new/deepSAGE_tag/tagwise_expression_table_SNP_in_recognition_sequence_tags_excluded.txt",
+ 40,
+ "/Users/dashazhernakova/Documents/UMCG/GeneticalGenomicsDatasets/tmp.txt");
+ //"/Users/dashazhernakova/Documents/UMCG/GeneticalGenomicsDatasets/new/deepSAGE_tag/randomSubsets/55samples/expression_table.deepSAGE_tag.55samples.4.txt.gz");
+ }
+
+
+}