From 7f0d5bc374c6fb923d2d74c70a9e60634f52b469 Mon Sep 17 00:00:00 2001 From: Bonder-MJ Date: Tue, 24 Feb 2015 10:40:19 +0100 Subject: [PATCH 001/143] removed hints --- eqtl-mapping-pipeline/nb-configuration.xml | 9 --------- genetica-libraries/nb-configuration.xml | 9 --------- 2 files changed, 18 deletions(-) diff --git a/eqtl-mapping-pipeline/nb-configuration.xml b/eqtl-mapping-pipeline/nb-configuration.xml index 4478061d1..5b96d138a 100644 --- a/eqtl-mapping-pipeline/nb-configuration.xml +++ b/eqtl-mapping-pipeline/nb-configuration.xml @@ -6,15 +6,6 @@ The configuration is intended to be shared among all the users of project and therefore it is assumed to be part of version control checkout. Without this configuration present, some functionality in the IDE may be limited or fail altogether. --> - - - JDK_1.7 - diff --git a/genetica-libraries/nb-configuration.xml b/genetica-libraries/nb-configuration.xml index dbb1c5fb6..9d924f817 100644 --- a/genetica-libraries/nb-configuration.xml +++ b/genetica-libraries/nb-configuration.xml @@ -11,13 +11,4 @@ Without this configuration present, some functionality in the IDE may be limited - - - JDK_1.7 - From 225f97f0c37125dae9b2f518faff5739791d4695 Mon Sep 17 00:00:00 2001 From: Marc Jan Bonder Date: Tue, 24 Feb 2015 10:51:34 +0100 Subject: [PATCH 002/143] Different file to annotate --- .../eqtlmappingpipeline/util/QTLAnnotator.java | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/eqtl-mapping-pipeline/src/main/java/eqtlmappingpipeline/util/QTLAnnotator.java b/eqtl-mapping-pipeline/src/main/java/eqtlmappingpipeline/util/QTLAnnotator.java index fafe55432..c6c9d9b6c 100644 --- a/eqtl-mapping-pipeline/src/main/java/eqtlmappingpipeline/util/QTLAnnotator.java +++ b/eqtl-mapping-pipeline/src/main/java/eqtlmappingpipeline/util/QTLAnnotator.java @@ -43,16 +43,16 @@ public static void main(String[] args) throws IOException { // "1;1;1;10;1;11;1;4;1;4;1;4", "snp;probe;probe;probe;probe;probe", "D:\\UMCG\\ProbeMapping\\Info\\V70\\gencode.v15.annotation.gtf.gz", // "D:\\OnlineFolders\\AeroFS\\RP3_BIOS_Methylation\\meQTLs\\Cis_Pc22c_meQTLs\\Primary\\eQTLProbesFDR0.05-ProbeLevel_ldDrivenEffectsRemoved-ExtendedInfo.txt"); - addAnnotationToQTLOutput( - "D:\\OnlineFolders\\AeroFS\\RP3_BIOS_Methylation\\eQTMs\\Optimal_PC_and_QTL_Corrected\\RP3_0.25MB_TSS_extendedCis_eQTMs_2015\\eQTLSNPsFDR0.05-SNPLevel.txt", - "D:\\UMCG\\Methylation_GPL13534\\annotationFile\\Illumina450K_MQtlMappingFile_Extensive.txt;D:\\UMCG\\Data\\RP3_RNA_Seq\\annotation_geneIds+overlapping_v71_cut.txt;D:\\OnlineFolders\\AeroFS\\RP3_BIOS_Methylation\\Annotations\\Annotation450k_AdditionMJ_v5.txt.gz;D:\\OnlineFolders\\AeroFS\\RP3_BIOS_Methylation\\Annotations\\CODAM_NTR_LLS_LLD_RS_BBMRI_450K_var_mean_median.txt", - "1;8-9-10-11-12-13-14;1;4-5;0;17-18-20-49-50-51-52-53-54-55-56-57-58-59-60-61-62-63-64-65-66-67-68-70-71-81-133-134-141-142-176-177-178-179-180-181-182-183-184-185-186-187-188-189-190-191-148-149-150-151-152-153-154-155-156-157-158-159-160-161-162-163-164-165-166-167-168-169-170-171-172-173-174-175-192-193-194-195-196-197-198;0;1-2-3-4", "snp;probe;snp;snp", null, - "D:\\OnlineFolders\\AeroFS\\RP3_BIOS_Methylation\\eQTMs\\Optimal_PC_and_QTL_Corrected\\RP3_0.25MB_TSS_extendedCis_eQTMs_2015\\eQTLSNPsFDR0.05-SNPLevel.txt-ExtendedInfo5.txt"); -// +// addAnnotationToQTLOutput( +// "D:\\OnlineFolders\\AeroFS\\RP3_BIOS_Methylation\\eQTMs\\Optimal_PC_and_QTL_Corrected\\RP3_0.25MB_TSS_extendedCis_eQTMs_2015\\eQTLSNPsFDR0.05-SNPLevel.txt", +// "D:\\UMCG\\Methylation_GPL13534\\annotationFile\\Illumina450K_MQtlMappingFile_Extensive.txt;D:\\UMCG\\Data\\RP3_RNA_Seq\\annotation_geneIds+overlapping_v71_cut.txt;D:\\OnlineFolders\\AeroFS\\RP3_BIOS_Methylation\\Annotations\\Annotation450k_AdditionMJ_v5.txt.gz;D:\\OnlineFolders\\AeroFS\\RP3_BIOS_Methylation\\Annotations\\CODAM_NTR_LLS_LLD_RS_BBMRI_450K_var_mean_median.txt", +// "1;8-9-10-11-12-13-14;1;4-5;0;17-18-20-49-50-51-52-53-54-55-56-57-58-59-60-61-62-63-64-65-66-67-68-70-71-81-133-134-141-142-176-177-178-179-180-181-182-183-184-185-186-187-188-189-190-191-148-149-150-151-152-153-154-155-156-157-158-159-160-161-162-163-164-165-166-167-168-169-170-171-172-173-174-175-192-193-194-195-196-197-198;0;1-2-3-4", "snp;probe;snp;snp", null, +// "D:\\OnlineFolders\\AeroFS\\RP3_BIOS_Methylation\\eQTMs\\Optimal_PC_and_QTL_Corrected\\RP3_0.25MB_TSS_extendedCis_eQTMs_2015\\eQTLSNPsFDR0.05-SNPLevel.txt-ExtendedInfo5.txt"); + addAnnotationToQTLOutput( "D:\\OnlineFolders\\AeroFS\\RP3_BIOS_Methylation\\eQTMs\\QTLCorrected\\RP3_0.25MB_TSS_extendedCis_eQTMs_2015\\eQTLSNPsFDR0.05-SNPLevel.txt", - "D:\\UMCG\\Methylation_GPL13534\\annotationFile\\Illumina450K_MQtlMappingFile_Extensive.txt;D:\\UMCG\\Data\\RP3_RNA_Seq\\annotation_geneIds+overlapping_v71_cut.txt;D:\\OnlineFolders\\AeroFS\\RP3_BIOS_Methylation\\Annotations\\Annotation450k_AdditionMJ_v5.txt.gz;D:\\OnlineFolders\\AeroFS\\RP3_BIOS_Methylation\\Annotations\\CODAM_NTR_LLS_LLD_RS_BBMRI_450K_var_mean_median.txt", - "1;8-9-10-11-12-13-14;1;4-5;0;17-18-20-49-50-51-52-53-54-55-56-57-58-59-60-61-62-63-64-65-66-67-68-70-71-81-133-134-141-142-176-177-178-179-180-181-182-183-184-185-186-187-188-189-190-191-148-149-150-151-152-153-154-155-156-157-158-159-160-161-162-163-164-165-166-167-168-169-170-171-172-173-174-175-192-193-194-195-196-197-198;0;1-2-3-4", "snp;probe;snp;snp", null, + "D:\\UMCG\\Methylation_GPL13534\\annotationFile\\Illumina450K_MQtlMappingFile_Extensive.txt;D:\\UMCG\\Data\\RP3_RNA_Seq\\annotation_geneIds+overlapping_v71_cut.txt;D:\\OnlineFolders\\AeroFS\\RP3_BIOS_Methylation\\Annotations\\Annotation450k_AdditionMJ_v5.txt.gz;D:\\OnlineFolders\\AeroFS\\RP3_BIOS_Methylation\\Annotations\\CODAM_NTR_LLS_LLD_RS_BBMRI_450K_var_mean_median.txt;D:\\OnlineFolders\\AeroFS\\RP3_BIOS_Methylation\\Annotations\\GC_ContentSuroundingProbes_full.txt;", + "1;8-9-10-11-12-13-14;1;4-5;0;17-18-20-49-50-51-52-53-54-55-56-57-58-59-60-61-62-63-64-65-66-67-68-70-71-81-133-134-141-142-176-177-178-179-180-181-182-183-184-185-186-187-188-189-190-191-148-149-150-151-152-153-154-155-156-157-158-159-160-161-162-163-164-165-166-167-168-169-170-171-172-173-174-175-192-193-194-195-196-197-198;0;1-2-3-4;0;1-2-3-4-5-6-7-8-9-10-11-12-13-14-15", "snp;probe;snp;snp;snp", null, "D:\\OnlineFolders\\AeroFS\\RP3_BIOS_Methylation\\eQTMs\\QTLCorrected\\RP3_0.25MB_TSS_extendedCis_eQTMs_2015\\eQTLSNPsFDR0.05-SNPLevel_ExtendedInfo4.txt"); // addAnnotationToQTLOutput( From 6de79c2ce3a42de4e7cf2d960eef85038b2cc3eb Mon Sep 17 00:00:00 2001 From: harmjanwestra Date: Thu, 5 Mar 2015 21:51:50 -0500 Subject: [PATCH 003/143] Fix lavaan, update POM to include new genetica libraries, other edits --- .../InteractionAnalysisConsoleGUI.java | 23 +++++---- .../InteractionAnalysisMultiThreaded.java | 5 +- .../InteractionAnalysisTask.java | 50 +++++++++++++++++-- 3 files changed, 63 insertions(+), 15 deletions(-) diff --git a/eqtl-mapping-pipeline/src/main/java/eqtlmappingpipeline/interactionanalysis/InteractionAnalysisConsoleGUI.java b/eqtl-mapping-pipeline/src/main/java/eqtlmappingpipeline/interactionanalysis/InteractionAnalysisConsoleGUI.java index 772e758d3..c71df26fc 100644 --- a/eqtl-mapping-pipeline/src/main/java/eqtlmappingpipeline/interactionanalysis/InteractionAnalysisConsoleGUI.java +++ b/eqtl-mapping-pipeline/src/main/java/eqtlmappingpipeline/interactionanalysis/InteractionAnalysisConsoleGUI.java @@ -32,11 +32,12 @@ public InteractionAnalysisConsoleGUI(String[] args) { String snpprobecombofile = null; String covariates = null; String inexp = null; - String cohort = null; + String cohort = null; RUNMODE step = null; boolean binaryoutput = false; boolean robust = false; + boolean sem = false; boolean fullStats = false; boolean matchCovariateNamesToExpressionProbeNames = false; @@ -70,7 +71,10 @@ public InteractionAnalysisConsoleGUI(String[] args) { } else if (arg.equals("--robust")) { System.out.println("WARNING: using R connection!! Make sure Rserve and sandwich are installed"); robust = true; - } else if (arg.equals("--fullstats")) { + } else if (arg.equals("--sem")) { + System.out.println("WARNING: using R connection!! Make sure Rserve and lavaan are installed"); + sem = true; + }else if (arg.equals("--fullstats")) { fullStats = true; } else if (arg.equals("--covariates")) { covariates = val; @@ -90,8 +94,8 @@ public InteractionAnalysisConsoleGUI(String[] args) { gte = val; } else if (arg.equals("--snpprobe")) { snpprobecombofile = val; - } else if (arg.equals("--cohort")) { - cohort = val; + } else if (arg.equals("--cohort")) { + cohort = val; } else if (arg.equals("--testMatchingCovariates")) { matchCovariateNamesToExpressionProbeNames = true; } else if (arg.equals("--threads")) { @@ -182,10 +186,10 @@ public InteractionAnalysisConsoleGUI(String[] args) { // System.err.println("Warning: yo please supply --cellcounts"); //kill = true; } - if ((binaryoutput == true) && (cohort == null)) { - System.err.println("Error: please supply --cohort (required in binary output mode)"); - kill = true; - } + if ((binaryoutput == true) && (cohort == null)) { + System.err.println("Error: please supply --cohort (required in binary output mode)"); + kill = true; + } if (kill) { System.err.println(""); @@ -198,7 +202,7 @@ public InteractionAnalysisConsoleGUI(String[] args) { snpprobecombofile, nrThreads, out, - covariateList, robust, fullStats, binaryoutput, cohort); + covariateList, sem, robust, fullStats, binaryoutput, cohort); // qmt.runCelltypeSpecificEQTLMapping(inexppccorrected, inexpraw, in, gte, snpprobecombofile, cellcountfile, nrThreads, out, testAllCovariatesInCovariateData); } @@ -238,6 +242,7 @@ private void printUsage() { + "--threads\t\tInteger\t\tThe number of threads to use for calculations.\n" + "--covariatelist\t\tList of covariates to test\n" + "--robust\t\tUse robust estimates of standard errors (Requires Rserve and sandwich packages, and R)\n" + + "--sem\t\tStructural equation modeling (requires RServe and Lavaan)\n" + "--fullstats\t\tOutput extra columns of statistics (SEs and Betas)"); System.out.println(""); diff --git a/eqtl-mapping-pipeline/src/main/java/eqtlmappingpipeline/interactionanalysis/InteractionAnalysisMultiThreaded.java b/eqtl-mapping-pipeline/src/main/java/eqtlmappingpipeline/interactionanalysis/InteractionAnalysisMultiThreaded.java index 824edf188..f5a5dedfa 100644 --- a/eqtl-mapping-pipeline/src/main/java/eqtlmappingpipeline/interactionanalysis/InteractionAnalysisMultiThreaded.java +++ b/eqtl-mapping-pipeline/src/main/java/eqtlmappingpipeline/interactionanalysis/InteractionAnalysisMultiThreaded.java @@ -398,7 +398,7 @@ public void prepareDataForCelltypeSpecificEQTLMapping(String inexpraw, String ou public void runInteractionAnalysis(String inExpPCCorrected, String covariateFile, String ingt, String gte, String snpprobecombinationfile, Integer nrThreads, String out, - String covariateList, boolean robustSE, boolean fullStats, boolean binaryOutput, String cohort) throws IOException, Exception { + String covariateList, boolean sem, boolean robustSE, boolean fullStats, boolean binaryOutput, String cohort) throws IOException, Exception { String probeannot = null; double mafthreshold = 0.05; @@ -409,7 +409,7 @@ public void runInteractionAnalysis(String inExpPCCorrected, String covariateFile throw new IllegalArgumentException("ERROR: please provide snpprobe combination file"); } - if (robustSE) { + if (robustSE || sem) { System.out.println("Running tests for robust standard errors. Now testing R connection"); try { RConnection rConnection = new RConnection(); @@ -668,6 +668,7 @@ public void runInteractionAnalysis(String inExpPCCorrected, String covariateFile expInds, covariateData, pcCorrectedExpressionData, + sem, robustSE, fullStats ); diff --git a/eqtl-mapping-pipeline/src/main/java/eqtlmappingpipeline/interactionanalysis/InteractionAnalysisTask.java b/eqtl-mapping-pipeline/src/main/java/eqtlmappingpipeline/interactionanalysis/InteractionAnalysisTask.java index ed6fbb9e0..c0ea0063c 100644 --- a/eqtl-mapping-pipeline/src/main/java/eqtlmappingpipeline/interactionanalysis/InteractionAnalysisTask.java +++ b/eqtl-mapping-pipeline/src/main/java/eqtlmappingpipeline/interactionanalysis/InteractionAnalysisTask.java @@ -20,6 +20,7 @@ import umcg.genetica.io.trityper.util.ChrAnnotation; import umcg.genetica.math.matrix.DoubleMatrixDataset; import umcg.genetica.math.stats.Correlation; +import umcg.genetica.math.stats.Normalization; /** * @@ -39,11 +40,12 @@ public class InteractionAnalysisTask implements Callable NAN_PAIR = new Pair(Double.NaN, Double.NaN); + private final boolean sem; public InteractionAnalysisTask(SNP snpObj, ArrayList> eQTLsForSNP, double[][] pcCorrectedData, int[] wgaId, String[] expInds, DoubleMatrixDataset covariateData, - TriTyperExpressionData expressionData, boolean robustSE, boolean provideFullStats) { + TriTyperExpressionData expressionData, boolean sem, boolean robustSE, boolean provideFullStats) { this.eQTLSNPObj = snpObj; this.eQTLsForSNP = eQTLsForSNP; this.pcCorrectedExpressionData = pcCorrectedData; @@ -53,6 +55,7 @@ public InteractionAnalysisTask(SNP snpObj, ArrayList> eQTLs this.covariateData = covariateData; this.sandwich = robustSE; this.provideFullStats = provideFullStats; + this.sem = sem; } @Override @@ -163,13 +166,17 @@ public InteractionAnalysisResults call() throws Exception { double rsquared = 0; - if (sandwich) { + if (sandwich || sem) { RConnection rConnection = null; // this code is very suboptimal and is here for validation purposes only anyway try { rConnection = new RConnection(); // rConnection.voidEval("install.packages('sandwich')"); +if(sandwich){ rConnection.voidEval("library(sandwich)"); + } else { + rConnection.voidEval("library(lavaan)"); + } } catch (RserveException ex) { System.err.println(ex.getMessage()); rConnection = null; @@ -177,8 +184,9 @@ public InteractionAnalysisResults call() throws Exception { if (rConnection == null) { System.err.println("Error: using R connection but none found"); + return null; } - if (rConnection != null) { + try { if (rConnection.isConnected()) { double[] olsY = new double[nrCalled]; //Ordinary least squares: Our gene expression @@ -203,6 +211,7 @@ public InteractionAnalysisResults call() throws Exception { double corr = JSci.maths.ArrayMath.correlation(olsX, olsY); mainZ = Correlation.convertCorrelationToZScore(olsX.length, corr); +if(sandwich){ rConnection.assign("y", olsY); rConnection.assign("x", olsX); rConnection.assign("z", covariateValues); @@ -224,6 +233,39 @@ public InteractionAnalysisResults call() throws Exception { betaCovariate = rConnection.eval("modelsummary$coefficients[3,1]").asDouble(); seCovariate = rConnection.eval("modelsummary$coefficients[3,2]").asDouble(); rsquared = rConnection.eval("modelsummary$r.squared").asDouble(); + } else { + // use structural equation modeling (errors-in-variables compensation) + + // define model + // z-transform, otherwise the used covariances in lavaan may be wrong + double[] olsYZ = Normalization.standardNormalize(valsY); + double[] olsXZ = Normalization.standardNormalize(olsX); + double[] covariatesZ = Normalization.standardNormalize(covariateValues); + double[] interaction = Normalization.standardNormalize(covariateValues); + + for (int i = 0; i < covariatesZ.length; i++) { + interaction[i] = olsXZ[i] * covariatesZ[i]; + } + + rConnection.assign("expression", olsYZ); + rConnection.assign("genotype", olsXZ); + rConnection.assign("covariate", covariatesZ); + rConnection.assign("interaction", interaction); + + String model = "'latentExpression ~ latentCovariate + latentInteraction\n" + + "latentExpression =~ expression\n" + + "latentCovariate =~ covariate\n" + + "latentGenotype =~ genotype\n" + + "latentInteraction =~ interaction\n" + + "covariate ~~ genotype\n" + + "covariate ~~ interaction\n" + + "genotype ~~ interaction\n" + + "'"; + + rConnection.voidEval(model); + rConnection.voidEval("fit <- sem(model)"); + rConnection.voidEval("modelsummary <- summary(m)"); + } rConnection.close(); } else { System.err.println("ERROR: R is not connected."); @@ -235,7 +277,7 @@ public InteractionAnalysisResults call() throws Exception { System.err.println(ex.getMessage()); } - } + } else { From 485fdac5e51cacb7ad82262babb077d05c6bdae1 Mon Sep 17 00:00:00 2001 From: Patrick Deelen Date: Fri, 6 Mar 2015 19:29:18 +0100 Subject: [PATCH 004/143] Binary interaction stuff --- .../ReplicateInteractions.java | 57 +++++++++++-------- 1 file changed, 32 insertions(+), 25 deletions(-) diff --git a/eqtl-mapping-pipeline/src/main/java/eqtlmappingpipeline/binaryInteraction/ReplicateInteractions.java b/eqtl-mapping-pipeline/src/main/java/eqtlmappingpipeline/binaryInteraction/ReplicateInteractions.java index 7f7ae2086..858c2a8c3 100644 --- a/eqtl-mapping-pipeline/src/main/java/eqtlmappingpipeline/binaryInteraction/ReplicateInteractions.java +++ b/eqtl-mapping-pipeline/src/main/java/eqtlmappingpipeline/binaryInteraction/ReplicateInteractions.java @@ -9,15 +9,12 @@ import java.io.FileWriter; import java.io.IOException; import java.io.InputStreamReader; -import java.text.DateFormat; +import java.io.Writer; import java.text.NumberFormat; -import java.text.SimpleDateFormat; -import java.util.Date; import java.util.HashSet; import java.util.Iterator; import org.apache.commons.cli.CommandLine; import org.apache.commons.cli.HelpFormatter; -import org.apache.commons.cli.Option; import org.apache.commons.cli.OptionBuilder; import org.apache.commons.cli.Options; import org.apache.commons.cli.ParseException; @@ -138,19 +135,21 @@ public static void main(String[] args) throws FileNotFoundException, IOException System.exit(1); return; } + BufferedWriter logWriter = new BufferedWriter(new FileWriter(outputPrefix + "_Log.txt")); - System.out.println("Input file: " + inputInteractionFile.getAbsolutePath()); - System.out.println("Replication file: " + replicationInteractionFile.getAbsolutePath()); - System.out.println("Output prefix: " + outputPrefix); - System.out.println("Min interaction z-score: " + minAbsInteractionZ); - System.out.println("Min replication interaction z-score: " + minAbsReplicationInteractionZ); + writeAndOut("Input file: " + inputInteractionFile.getAbsolutePath(), logWriter); + writeAndOut("Replication file: " + replicationInteractionFile.getAbsolutePath(), logWriter); + writeAndOut("Output prefix: " + outputPrefix, logWriter); + writeAndOut("Min interaction z-score: " + minAbsInteractionZ, logWriter); + writeAndOut("Min replication interaction z-score: " + minAbsReplicationInteractionZ, logWriter); if (matchOnChrPos) { - System.out.println("Matching variants on chr-pos"); + writeAndOut("Matching variants on chr-pos", logWriter); } if (covariatesToIncludeFile != null) { - System.out.println("Covariates to include: " + covariatesToIncludeFile.getAbsolutePath()); + writeAndOut("Covariates to include: " + covariatesToIncludeFile.getAbsolutePath(), logWriter); } - System.out.println(); + writeAndOut("", logWriter); + final HashSet covariantsToInclude; if (covariatesToIncludeFile != null) { @@ -160,8 +159,8 @@ public static void main(String[] args) throws FileNotFoundException, IOException while ((line = reader.readLine()) != null) { covariantsToInclude.add(line.trim()); } - System.out.println("Covariates included: " + covariantsToInclude.size()); - System.out.println(); + writeAndOut("Covariates included: " + covariantsToInclude.size(), logWriter); + writeAndOut("", logWriter); } else { covariantsToInclude = null; } @@ -291,17 +290,19 @@ public static void main(String[] args) throws FileNotFoundException, IOException numberFormat.setMinimumFractionDigits(0); numberFormat.setMaximumFractionDigits(2); - System.out.println(""); - System.out.println("Total number of interactions: " + numberFormat.format(notSignificant + significant)); - System.out.println(" - Not significant: " + numberFormat.format(notSignificant) + " (" + numberFormat.format(notSignificant * 100d / (notSignificant + significant)) + "%)"); - System.out.println(" - Significant: " + numberFormat.format(significant) + " (" + numberFormat.format(significant * 100d / (notSignificant + significant)) + "%)"); - System.out.println(" * Not in replication: " + numberFormat.format(notTestedInReplication) + " (" + numberFormat.format(notTestedInReplication * 100d / significant) + "%)"); - System.out.println(" * Not significant in replication: " + numberFormat.format(notSignificantReplicationSameDirection + notSignificantReplicationOppositeDirection) + " (" + numberFormat.format((notSignificantReplicationSameDirection + notSignificantReplicationOppositeDirection) * 100d / significant) + "%)"); - System.out.println(" # Same direction: " + numberFormat.format(notSignificantReplicationSameDirection) + " (" + numberFormat.format(notSignificantReplicationSameDirection * 100d / (notSignificantReplicationSameDirection + notSignificantReplicationOppositeDirection)) + "%)"); - System.out.println(" # Opposite direction: " + numberFormat.format(notSignificantReplicationOppositeDirection) + " (" + numberFormat.format(notSignificantReplicationOppositeDirection * 100d / (notSignificantReplicationSameDirection + notSignificantReplicationOppositeDirection)) + "%)"); - System.out.println(" * Significant in replication: " + numberFormat.format(significantReplicationSameDirection + significantReplicationOppositeDirection) + " (" + numberFormat.format((significantReplicationSameDirection + significantReplicationOppositeDirection) * 100d / significant) + "%)"); - System.out.println(" # Same direction: " + numberFormat.format(significantReplicationSameDirection) + " (" + numberFormat.format(significantReplicationSameDirection * 100d / (significantReplicationSameDirection + significantReplicationOppositeDirection)) + "%)"); - System.out.println(" # Opposite direction: " + numberFormat.format(significantReplicationOppositeDirection) + " (" + numberFormat.format(significantReplicationOppositeDirection * 100d / (significantReplicationSameDirection + significantReplicationOppositeDirection)) + "%)"); + writeAndOut("", logWriter); + writeAndOut("Total number of interactions: " + numberFormat.format(notSignificant + significant), logWriter); + writeAndOut(" - Not significant: " + numberFormat.format(notSignificant) + " (" + numberFormat.format(notSignificant * 100d / (notSignificant + significant)) + "%)", logWriter); + writeAndOut(" - Significant: " + numberFormat.format(significant) + " (" + numberFormat.format(significant * 100d / (notSignificant + significant)) + "%)", logWriter); + writeAndOut(" * Not in replication: " + numberFormat.format(notTestedInReplication) + " (" + numberFormat.format(notTestedInReplication * 100d / significant) + "%)", logWriter); + writeAndOut(" * Not significant in replication: " + numberFormat.format(notSignificantReplicationSameDirection + notSignificantReplicationOppositeDirection) + " (" + numberFormat.format((notSignificantReplicationSameDirection + notSignificantReplicationOppositeDirection) * 100d / significant) + "%)", logWriter); + writeAndOut(" # Same direction: " + numberFormat.format(notSignificantReplicationSameDirection) + " (" + numberFormat.format(notSignificantReplicationSameDirection * 100d / (notSignificantReplicationSameDirection + notSignificantReplicationOppositeDirection)) + "%)", logWriter); + writeAndOut(" # Opposite direction: " + numberFormat.format(notSignificantReplicationOppositeDirection) + " (" + numberFormat.format(notSignificantReplicationOppositeDirection * 100d / (notSignificantReplicationSameDirection + notSignificantReplicationOppositeDirection)) + "%)", logWriter); + writeAndOut(" * Significant in replication: " + numberFormat.format(significantReplicationSameDirection + significantReplicationOppositeDirection) + " (" + numberFormat.format((significantReplicationSameDirection + significantReplicationOppositeDirection) * 100d / significant) + "%)", logWriter); + writeAndOut(" # Same direction: " + numberFormat.format(significantReplicationSameDirection) + " (" + numberFormat.format(significantReplicationSameDirection * 100d / (significantReplicationSameDirection + significantReplicationOppositeDirection)) + "%)", logWriter); + writeAndOut(" # Opposite direction: " + numberFormat.format(significantReplicationOppositeDirection) + " (" + numberFormat.format(significantReplicationOppositeDirection * 100d / (significantReplicationSameDirection + significantReplicationOppositeDirection)) + "%)", logWriter); + + logWriter.close(); } @@ -346,4 +347,10 @@ private static CSVWriter writeHeader(File file, String[] row) throws IOException replicatedSameDirectionWriter.writeNext(row); return replicatedSameDirectionWriter; } + + private static void writeAndOut(String message, Writer writer) throws IOException{ + writer.append(message); + writer.append('\n'); + System.out.println(message); + } } From 65aa309f28c1d435768e8dbcea8a455506f9fe7a Mon Sep 17 00:00:00 2001 From: Patrick Deelen Date: Thu, 12 Mar 2015 21:25:58 +0100 Subject: [PATCH 005/143] Replicate interactions --- eqtl-mapping-pipeline/pom.xml | 2 +- .../InvestigateCovariate.java | 16 ++ .../ReplicateInteractions.java | 240 +++++++++++++++--- 3 files changed, 226 insertions(+), 32 deletions(-) create mode 100644 eqtl-mapping-pipeline/src/main/java/eqtlmappingpipeline/binaryInteraction/InvestigateCovariate.java diff --git a/eqtl-mapping-pipeline/pom.xml b/eqtl-mapping-pipeline/pom.xml index 6b95cd552..67f1f6e6c 100644 --- a/eqtl-mapping-pipeline/pom.xml +++ b/eqtl-mapping-pipeline/pom.xml @@ -7,7 +7,7 @@ 1.0.2-SNAPSHOT eqtl-mapping-pipeline - 1.3.1-SNAPSHOT + 1.3.3-SNAPSHOT jar 4.0.0 diff --git a/eqtl-mapping-pipeline/src/main/java/eqtlmappingpipeline/binaryInteraction/InvestigateCovariate.java b/eqtl-mapping-pipeline/src/main/java/eqtlmappingpipeline/binaryInteraction/InvestigateCovariate.java new file mode 100644 index 000000000..a09e79a53 --- /dev/null +++ b/eqtl-mapping-pipeline/src/main/java/eqtlmappingpipeline/binaryInteraction/InvestigateCovariate.java @@ -0,0 +1,16 @@ +package eqtlmappingpipeline.binaryInteraction; + +/** + * + * @author Patrick Deelen + */ +public class InvestigateCovariate { + + /** + * @param args the command line arguments + */ + public static void main(String[] args) { + // TODO code application logic here + } + +} diff --git a/eqtl-mapping-pipeline/src/main/java/eqtlmappingpipeline/binaryInteraction/ReplicateInteractions.java b/eqtl-mapping-pipeline/src/main/java/eqtlmappingpipeline/binaryInteraction/ReplicateInteractions.java index 858c2a8c3..3a18f9e76 100644 --- a/eqtl-mapping-pipeline/src/main/java/eqtlmappingpipeline/binaryInteraction/ReplicateInteractions.java +++ b/eqtl-mapping-pipeline/src/main/java/eqtlmappingpipeline/binaryInteraction/ReplicateInteractions.java @@ -1,6 +1,7 @@ package eqtlmappingpipeline.binaryInteraction; import au.com.bytecode.opencsv.CSVWriter; +import eqtlmappingpipeline.Main; import java.io.BufferedReader; import java.io.BufferedWriter; import java.io.File; @@ -13,6 +14,8 @@ import java.text.NumberFormat; import java.util.HashSet; import java.util.Iterator; +import java.util.LinkedHashMap; +import java.util.Map; import org.apache.commons.cli.CommandLine; import org.apache.commons.cli.HelpFormatter; import org.apache.commons.cli.OptionBuilder; @@ -74,6 +77,20 @@ public class ReplicateInteractions { OptionBuilder.isRequired(); OPTIONS.addOption(OptionBuilder.create("riz")); + OptionBuilder.withArgName("double"); + OptionBuilder.hasArg(); + OptionBuilder.withDescription("Minimum absolute interaction z-score to count covariate"); + OptionBuilder.withLongOpt("covariateInteractionZ"); + OptionBuilder.isRequired(); + OPTIONS.addOption(OptionBuilder.create("ciz")); + + OptionBuilder.withArgName("double"); + OptionBuilder.hasArg(); + OptionBuilder.withDescription("Minimum absolute replication interaction z-score to count covariate"); + OptionBuilder.withLongOpt("covariateReplicationInteractionZ"); + OptionBuilder.isRequired(); + OPTIONS.addOption(OptionBuilder.create("criz")); + OptionBuilder.withDescription("If set match variant on chr-pos"); OptionBuilder.withLongOpt("chrPos"); OPTIONS.addOption(OptionBuilder.create("cp")); @@ -92,6 +109,8 @@ public static void main(String[] args) throws FileNotFoundException, IOException final File replicationInteractionFile; final double minAbsInteractionZ; final double minAbsReplicationInteractionZ; + final double minAbsInteractionZCovariateCount; + final double minAbsReplicationInteractionZCovariateCount; final boolean matchOnChrPos; final String outputPrefix; final File covariatesToIncludeFile; @@ -119,6 +138,22 @@ public static void main(String[] args) throws FileNotFoundException, IOException return; } + try { + minAbsInteractionZCovariateCount = Double.parseDouble(commandLine.getOptionValue("ciz")); + } catch (NumberFormatException ex) { + System.out.println("Cannot not parse --covariateInteractionZ as double: " + commandLine.getOptionValue("ciz")); + System.exit(1); + return; + } + + try { + minAbsReplicationInteractionZCovariateCount = Double.parseDouble(commandLine.getOptionValue("criz")); + } catch (NumberFormatException ex) { + System.out.println("Cannot not parse --covariateReplicationInteractionZ as double: " + commandLine.getOptionValue("criz")); + System.exit(1); + return; + } + if (commandLine.hasOption("c")) { covariatesToIncludeFile = new File(commandLine.getOptionValue("c")); } else { @@ -137,11 +172,14 @@ public static void main(String[] args) throws FileNotFoundException, IOException } BufferedWriter logWriter = new BufferedWriter(new FileWriter(outputPrefix + "_Log.txt")); + writeAndOut("Software version: " + Main.VERSION, logWriter); writeAndOut("Input file: " + inputInteractionFile.getAbsolutePath(), logWriter); writeAndOut("Replication file: " + replicationInteractionFile.getAbsolutePath(), logWriter); writeAndOut("Output prefix: " + outputPrefix, logWriter); writeAndOut("Min interaction z-score: " + minAbsInteractionZ, logWriter); writeAndOut("Min replication interaction z-score: " + minAbsReplicationInteractionZ, logWriter); + writeAndOut("Min interaction z-score covariate counter: " + minAbsInteractionZCovariateCount, logWriter); + writeAndOut("Min replication interaction z-score covariate counter: " + minAbsReplicationInteractionZCovariateCount, logWriter); if (matchOnChrPos) { writeAndOut("Matching variants on chr-pos", logWriter); } @@ -149,13 +187,13 @@ public static void main(String[] args) throws FileNotFoundException, IOException writeAndOut("Covariates to include: " + covariatesToIncludeFile.getAbsolutePath(), logWriter); } writeAndOut("", logWriter); - + final HashSet covariantsToInclude; if (covariatesToIncludeFile != null) { covariantsToInclude = new HashSet(); BufferedReader reader = new BufferedReader(new InputStreamReader(new FileInputStream(covariatesToIncludeFile), "UTF-8")); - String line; + String line; while ((line = reader.readLine()) != null) { covariantsToInclude.add(line.trim()); } @@ -178,6 +216,7 @@ public static void main(String[] args) throws FileNotFoundException, IOException int significant = 0; int notSignificant = 0; int notTestedInReplication = 0; + int nanReplication = 0; int notSignificantReplicationSameDirection = 0; int notSignificantReplicationOppositeDirection = 0; int significantReplicationOppositeDirection = 0; @@ -185,11 +224,17 @@ public static void main(String[] args) throws FileNotFoundException, IOException int reporter = 0; + LinkedHashMap covariateCounts = new LinkedHashMap(inputFile.getCovariateCount()); + for (String covariate : inputFile.getCovariates()) { + covariateCounts.put(covariate, new CovariateCount()); + } + for (BinaryInteractionVariant variant : inputFile.getVariants()) { String variantName = variant.getName(); BinaryInteractionVariant replicationVariant; + boolean swap; if (matchOnChrPos) { replicationVariant = replicationFile.getVariant(variant.getChr(), variant.getPos()); @@ -200,6 +245,17 @@ public static void main(String[] args) throws FileNotFoundException, IOException replicationVariant = null; } } + + if (replicationVariant != null) { + if (!(variant.getRefAllele() == replicationVariant.getRefAllele() && variant.getAltAllele() == replicationVariant.getAltAllele()) + && !(variant.getRefAllele() == replicationVariant.getAltAllele() && variant.getAltAllele() == replicationVariant.getRefAllele())) { + System.err.println("Allele mismatch!"); + } + swap = variant.getAltAllele() != replicationVariant.getAltAllele(); + } else { + swap = false; + } + //Do loop anyway to also count not replicated int[] genePointers = inputFile.getVariant(variantName).getGenePointers(); @@ -223,46 +279,42 @@ public static void main(String[] args) throws FileNotFoundException, IOException if (replicationVariant != null && replicationFile.containsInteraction(replicationVariant.getName(), gene.getName(), interation.getCovariateName())) { - if (!(variant.getRefAllele() == replicationVariant.getRefAllele() && variant.getAltAllele() == replicationVariant.getAltAllele()) - && !(variant.getRefAllele() == replicationVariant.getAltAllele() && variant.getAltAllele() == replicationVariant.getRefAllele())) { - System.err.println("Allele mismatch!"); - continue covairates; - } - BinaryInteractionZscores replicationZscores = replicationFile.readInteractionResults(replicationVariant.getName(), gene.getName(), interation.getCovariateName()); double replicationInteractionZscore = replicationZscores.getZscoreInteractionMeta(); - boolean swap = variant.getAltAllele() != replicationVariant.getAltAllele(); - BinaryInteractionQtlZscores replicationQtlRes = replicationFile.readQtlResults(replicationVariant.getName(), gene.getName()); - if (swap) { - replicationInteractionZscore *= -1; - } + if (!Double.isNaN(replicationInteractionZscore)) { - if (replicationInteractionZscore <= -minAbsReplicationInteractionZ || replicationInteractionZscore >= minAbsReplicationInteractionZ) { - if (metaInteractionZ * replicationInteractionZscore >= 0) { - ++significantReplicationSameDirection; + if (swap) { + replicationInteractionZscore *= -1; + } + if (replicationInteractionZscore <= -minAbsReplicationInteractionZ || replicationInteractionZscore >= minAbsReplicationInteractionZ) { + if (metaInteractionZ * replicationInteractionZscore >= 0) { + ++significantReplicationSameDirection; - writeInteraction(row, variantName, gene, interation, variant, replicationQtlRes, replicationZscores, swap, replicatedSameDirectionWriter); - } else { - ++significantReplicationOppositeDirection; + writeInteraction(row, variantName, gene, interation, variant, replicationQtlRes, replicationZscores, swap, replicatedSameDirectionWriter); - writeInteraction(row, variantName, gene, interation, variant, replicationQtlRes, replicationZscores, swap, replicatedOppositeDirectionWriter); - } - } else { - if (metaInteractionZ * replicationInteractionZscore >= 0) { - ++notSignificantReplicationSameDirection; - writeInteraction(row, variantName, gene, interation, variant, replicationQtlRes, replicationZscores, swap, notReplicatedSameDirectionWriter); + } else { + ++significantReplicationOppositeDirection; + + writeInteraction(row, variantName, gene, interation, variant, replicationQtlRes, replicationZscores, swap, replicatedOppositeDirectionWriter); + } } else { - ++notSignificantReplicationOppositeDirection; - writeInteraction(row, variantName, gene, interation, variant, replicationQtlRes, replicationZscores, swap, notReplicatedOppositeDirectionWriter); + if (metaInteractionZ * replicationInteractionZscore >= 0) { + ++notSignificantReplicationSameDirection; + writeInteraction(row, variantName, gene, interation, variant, replicationQtlRes, replicationZscores, swap, notReplicatedSameDirectionWriter); + } else { + ++notSignificantReplicationOppositeDirection; + writeInteraction(row, variantName, gene, interation, variant, replicationQtlRes, replicationZscores, swap, notReplicatedOppositeDirectionWriter); + } } + } else { + ++nanReplication; } - } else { ++notTestedInReplication; } @@ -270,6 +322,48 @@ public static void main(String[] args) throws FileNotFoundException, IOException } else { ++notSignificant; } + + if (metaInteractionZ >= minAbsInteractionZCovariateCount || metaInteractionZ <= -minAbsInteractionZCovariateCount) { + + CovariateCount thisCovariateCounts = covariateCounts.get(interation.getCovariateName()); + thisCovariateCounts.incrementCovariateSignificant(); + + if (replicationVariant != null && replicationFile.containsInteraction(replicationVariant.getName(), gene.getName(), interation.getCovariateName())) { + + BinaryInteractionZscores replicationZscores = replicationFile.readInteractionResults(replicationVariant.getName(), gene.getName(), interation.getCovariateName()); + double replicationInteractionZscore = replicationZscores.getZscoreInteractionMeta(); + + if (!Double.isNaN(replicationInteractionZscore)) { + + if (swap) { + replicationInteractionZscore *= -1; + } + + if (replicationInteractionZscore <= -minAbsReplicationInteractionZCovariateCount || replicationInteractionZscore >= minAbsReplicationInteractionZCovariateCount) { + if (metaInteractionZ * replicationInteractionZscore >= 0) { + thisCovariateCounts.incrementReplicatedSameDirection(); + + } else { + thisCovariateCounts.incrementReplicatedOppositeDirection(); + } + } else { + if (metaInteractionZ * replicationInteractionZscore >= 0) { + thisCovariateCounts.incrementNotReplicatedSameDirection(); + } else { + thisCovariateCounts.incrementNotReplicatedOppositeDirection(); + } + } + + } else { + } + + } else { + } + + } else { + } + + } ++reporter; @@ -286,6 +380,8 @@ public static void main(String[] args) throws FileNotFoundException, IOException notReplicatedSameDirectionWriter.close(); notReplicatedOppositeDirectionWriter.close(); + writeCovaraiteCounts(new File(outputPrefix + "_CovariateCounts.txt"), covariateCounts); + NumberFormat numberFormat = NumberFormat.getInstance(); numberFormat.setMinimumFractionDigits(0); numberFormat.setMaximumFractionDigits(2); @@ -295,13 +391,14 @@ public static void main(String[] args) throws FileNotFoundException, IOException writeAndOut(" - Not significant: " + numberFormat.format(notSignificant) + " (" + numberFormat.format(notSignificant * 100d / (notSignificant + significant)) + "%)", logWriter); writeAndOut(" - Significant: " + numberFormat.format(significant) + " (" + numberFormat.format(significant * 100d / (notSignificant + significant)) + "%)", logWriter); writeAndOut(" * Not in replication: " + numberFormat.format(notTestedInReplication) + " (" + numberFormat.format(notTestedInReplication * 100d / significant) + "%)", logWriter); + writeAndOut(" * NaN in replication: " + numberFormat.format(nanReplication) + " (" + numberFormat.format(notTestedInReplication * 100d / significant) + "%)", logWriter); writeAndOut(" * Not significant in replication: " + numberFormat.format(notSignificantReplicationSameDirection + notSignificantReplicationOppositeDirection) + " (" + numberFormat.format((notSignificantReplicationSameDirection + notSignificantReplicationOppositeDirection) * 100d / significant) + "%)", logWriter); writeAndOut(" # Same direction: " + numberFormat.format(notSignificantReplicationSameDirection) + " (" + numberFormat.format(notSignificantReplicationSameDirection * 100d / (notSignificantReplicationSameDirection + notSignificantReplicationOppositeDirection)) + "%)", logWriter); writeAndOut(" # Opposite direction: " + numberFormat.format(notSignificantReplicationOppositeDirection) + " (" + numberFormat.format(notSignificantReplicationOppositeDirection * 100d / (notSignificantReplicationSameDirection + notSignificantReplicationOppositeDirection)) + "%)", logWriter); writeAndOut(" * Significant in replication: " + numberFormat.format(significantReplicationSameDirection + significantReplicationOppositeDirection) + " (" + numberFormat.format((significantReplicationSameDirection + significantReplicationOppositeDirection) * 100d / significant) + "%)", logWriter); writeAndOut(" # Same direction: " + numberFormat.format(significantReplicationSameDirection) + " (" + numberFormat.format(significantReplicationSameDirection * 100d / (significantReplicationSameDirection + significantReplicationOppositeDirection)) + "%)", logWriter); writeAndOut(" # Opposite direction: " + numberFormat.format(significantReplicationOppositeDirection) + " (" + numberFormat.format(significantReplicationOppositeDirection * 100d / (significantReplicationSameDirection + significantReplicationOppositeDirection)) + "%)", logWriter); - + logWriter.close(); } @@ -347,10 +444,91 @@ private static CSVWriter writeHeader(File file, String[] row) throws IOException replicatedSameDirectionWriter.writeNext(row); return replicatedSameDirectionWriter; } - - private static void writeAndOut(String message, Writer writer) throws IOException{ + + private static void writeCovaraiteCounts(File file, LinkedHashMap covariateCounts) throws IOException { + + CSVWriter covariateCountWriter = new CSVWriter(new BufferedWriter(new FileWriter(file)), '\t', '\0', '\0'); + int c = 0; + String[] row2 = new String[6]; + row2[c++] = "Covariate"; + row2[c++] = "Significant"; + row2[c++] = "ReplicatedSameDirection"; + row2[c++] = "ReplicatedOppositeDirection"; + row2[c++] = "NotReplicateSameDirection"; + row2[c++] = "NotReplicatedOppositeDirection"; + covariateCountWriter.writeNext(row2); + + for (Map.Entry covariateEntry : covariateCounts.entrySet()) { + + CovariateCount thisCounts = covariateEntry.getValue(); + + c = 0; + row2[c++] = covariateEntry.getKey(); + row2[c++] = String.valueOf(thisCounts.getCovariateSignificant()); + row2[c++] = String.valueOf(thisCounts.getReplicatedSameDirection()); + row2[c++] = String.valueOf(thisCounts.getReplicatedOppositeDirection()); + row2[c++] = String.valueOf(thisCounts.getNotReplicatedSameDirection()); + row2[c++] = String.valueOf(thisCounts.getNotReplicatedOppositeDirection()); + covariateCountWriter.writeNext(row2); + + } + + covariateCountWriter.close(); + + } + + private static void writeAndOut(String message, Writer writer) throws IOException { writer.append(message); writer.append('\n'); System.out.println(message); } + + private static class CovariateCount { + + private int covariateSignificant = 0; + private int replicatedSameDirection = 0; + private int replicatedOppositeDirection = 0; + private int notReplicatedSameDirection = 0; + private int notReplicatedOppositeDirection = 0; + + public int getCovariateSignificant() { + return covariateSignificant; + } + + public int getReplicatedSameDirection() { + return replicatedSameDirection; + } + + public int getReplicatedOppositeDirection() { + return replicatedOppositeDirection; + } + + public int getNotReplicatedSameDirection() { + return notReplicatedSameDirection; + } + + public int getNotReplicatedOppositeDirection() { + return notReplicatedOppositeDirection; + } + + public void incrementCovariateSignificant() { + covariateSignificant++; + } + + public void incrementReplicatedSameDirection() { + replicatedSameDirection++; + } + + public void incrementReplicatedOppositeDirection() { + replicatedOppositeDirection++; + } + + public void incrementNotReplicatedSameDirection() { + notReplicatedSameDirection++; + } + + public void incrementNotReplicatedOppositeDirection() { + notReplicatedOppositeDirection++; + } + } } From a5d9f89af79e1ab6912e995c2b2b3900c1ad9036 Mon Sep 17 00:00:00 2001 From: Patrick Deelen Date: Fri, 13 Mar 2015 12:31:08 +0100 Subject: [PATCH 006/143] DoubleMatrixDataset improvement --- .../java/umcg/genetica/math/matrix2/DoubleMatrixDataset.java | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/genetica-libraries/src/main/java/umcg/genetica/math/matrix2/DoubleMatrixDataset.java b/genetica-libraries/src/main/java/umcg/genetica/math/matrix2/DoubleMatrixDataset.java index b7664b3ed..7156bf8e2 100644 --- a/genetica-libraries/src/main/java/umcg/genetica/math/matrix2/DoubleMatrixDataset.java +++ b/genetica-libraries/src/main/java/umcg/genetica/math/matrix2/DoubleMatrixDataset.java @@ -13,6 +13,7 @@ import java.io.FileNotFoundException; import java.io.IOException; import java.util.ArrayList; +import java.util.Collection; import java.util.Collections; import java.util.HashSet; import java.util.LinkedHashSet; @@ -21,6 +22,7 @@ import java.util.Map; import java.util.Map.Entry; import java.util.NoSuchElementException; +import java.util.Set; import java.util.logging.Level; import java.util.logging.Logger; import java.util.regex.Pattern; @@ -71,7 +73,7 @@ public DoubleMatrixDataset(DoubleMatrix2D matrix, LinkedHashMap hash this.matrix = matrix; } - public DoubleMatrixDataset(List rowNames, List colNames) { + public DoubleMatrixDataset(Collection rowNames, Collection colNames) { hashRows = new LinkedHashMap(rowNames.size()); hashCols = new LinkedHashMap(colNames.size()); From ba93aa6ee660fbc7380ff8bd36e2c3ee9d76a974 Mon Sep 17 00:00:00 2001 From: Patrick Deelen Date: Fri, 13 Mar 2015 12:31:24 +0100 Subject: [PATCH 007/143] Typo fix --- .../ReplicateInteractions.java | 24 +++++++++---------- 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/eqtl-mapping-pipeline/src/main/java/eqtlmappingpipeline/binaryInteraction/ReplicateInteractions.java b/eqtl-mapping-pipeline/src/main/java/eqtlmappingpipeline/binaryInteraction/ReplicateInteractions.java index 3a18f9e76..f400c266a 100644 --- a/eqtl-mapping-pipeline/src/main/java/eqtlmappingpipeline/binaryInteraction/ReplicateInteractions.java +++ b/eqtl-mapping-pipeline/src/main/java/eqtlmappingpipeline/binaryInteraction/ReplicateInteractions.java @@ -266,20 +266,20 @@ public static void main(String[] args) throws FileNotFoundException, IOException covairates: for (Iterator iterator = inputFile.readVariantGeneResults(variantName, gene.getName()); iterator.hasNext();) { - BinaryInteractionQueryResult interation = iterator.next(); + BinaryInteractionQueryResult interaction = iterator.next(); - if (covariantsToInclude != null && !covariantsToInclude.contains(interation.getCovariateName())) { + if (covariantsToInclude != null && !covariantsToInclude.contains(interaction.getCovariateName())) { continue covairates; } - double metaInteractionZ = interation.getInteractionZscores().getZscoreInteractionMeta(); + double metaInteractionZ = interaction.getInteractionZscores().getZscoreInteractionMeta(); if (metaInteractionZ >= minAbsInteractionZ || metaInteractionZ <= -minAbsInteractionZ) { ++significant; - if (replicationVariant != null && replicationFile.containsInteraction(replicationVariant.getName(), gene.getName(), interation.getCovariateName())) { + if (replicationVariant != null && replicationFile.containsInteraction(replicationVariant.getName(), gene.getName(), interaction.getCovariateName())) { - BinaryInteractionZscores replicationZscores = replicationFile.readInteractionResults(replicationVariant.getName(), gene.getName(), interation.getCovariateName()); + BinaryInteractionZscores replicationZscores = replicationFile.readInteractionResults(replicationVariant.getName(), gene.getName(), interaction.getCovariateName()); double replicationInteractionZscore = replicationZscores.getZscoreInteractionMeta(); BinaryInteractionQtlZscores replicationQtlRes = replicationFile.readQtlResults(replicationVariant.getName(), gene.getName()); @@ -295,20 +295,20 @@ public static void main(String[] args) throws FileNotFoundException, IOException ++significantReplicationSameDirection; - writeInteraction(row, variantName, gene, interation, variant, replicationQtlRes, replicationZscores, swap, replicatedSameDirectionWriter); + writeInteraction(row, variantName, gene, interaction, variant, replicationQtlRes, replicationZscores, swap, replicatedSameDirectionWriter); } else { ++significantReplicationOppositeDirection; - writeInteraction(row, variantName, gene, interation, variant, replicationQtlRes, replicationZscores, swap, replicatedOppositeDirectionWriter); + writeInteraction(row, variantName, gene, interaction, variant, replicationQtlRes, replicationZscores, swap, replicatedOppositeDirectionWriter); } } else { if (metaInteractionZ * replicationInteractionZscore >= 0) { ++notSignificantReplicationSameDirection; - writeInteraction(row, variantName, gene, interation, variant, replicationQtlRes, replicationZscores, swap, notReplicatedSameDirectionWriter); + writeInteraction(row, variantName, gene, interaction, variant, replicationQtlRes, replicationZscores, swap, notReplicatedSameDirectionWriter); } else { ++notSignificantReplicationOppositeDirection; - writeInteraction(row, variantName, gene, interation, variant, replicationQtlRes, replicationZscores, swap, notReplicatedOppositeDirectionWriter); + writeInteraction(row, variantName, gene, interaction, variant, replicationQtlRes, replicationZscores, swap, notReplicatedOppositeDirectionWriter); } } } else { @@ -325,12 +325,12 @@ public static void main(String[] args) throws FileNotFoundException, IOException if (metaInteractionZ >= minAbsInteractionZCovariateCount || metaInteractionZ <= -minAbsInteractionZCovariateCount) { - CovariateCount thisCovariateCounts = covariateCounts.get(interation.getCovariateName()); + CovariateCount thisCovariateCounts = covariateCounts.get(interaction.getCovariateName()); thisCovariateCounts.incrementCovariateSignificant(); - if (replicationVariant != null && replicationFile.containsInteraction(replicationVariant.getName(), gene.getName(), interation.getCovariateName())) { + if (replicationVariant != null && replicationFile.containsInteraction(replicationVariant.getName(), gene.getName(), interaction.getCovariateName())) { - BinaryInteractionZscores replicationZscores = replicationFile.readInteractionResults(replicationVariant.getName(), gene.getName(), interation.getCovariateName()); + BinaryInteractionZscores replicationZscores = replicationFile.readInteractionResults(replicationVariant.getName(), gene.getName(), interaction.getCovariateName()); double replicationInteractionZscore = replicationZscores.getZscoreInteractionMeta(); if (!Double.isNaN(replicationInteractionZscore)) { From f998450f70fcd2e7a712f7f8da280a4c930785bc Mon Sep 17 00:00:00 2001 From: Patrick Deelen Date: Sat, 14 Mar 2015 14:08:53 +0100 Subject: [PATCH 008/143] pom fix --- eqtl-mapping-pipeline/pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/eqtl-mapping-pipeline/pom.xml b/eqtl-mapping-pipeline/pom.xml index 67f1f6e6c..869e1d484 100644 --- a/eqtl-mapping-pipeline/pom.xml +++ b/eqtl-mapping-pipeline/pom.xml @@ -14,7 +14,7 @@ nl.systemsgenetics genetica-libraries - 1.0.5 + 1.0.6-SNAPSHOT log4j From 75749a7f45513bffcc93cfc081aa5a309ded3783 Mon Sep 17 00:00:00 2001 From: Patrick Deelen Date: Sat, 14 Mar 2015 19:22:53 +0100 Subject: [PATCH 009/143] working --- .../InvestigateCovariate.java | 478 +++++++++++++++++- .../gui/EQTLMappingPipelineConsole.java | 4 + 2 files changed, 476 insertions(+), 6 deletions(-) diff --git a/eqtl-mapping-pipeline/src/main/java/eqtlmappingpipeline/binaryInteraction/InvestigateCovariate.java b/eqtl-mapping-pipeline/src/main/java/eqtlmappingpipeline/binaryInteraction/InvestigateCovariate.java index a09e79a53..1065e6f1c 100644 --- a/eqtl-mapping-pipeline/src/main/java/eqtlmappingpipeline/binaryInteraction/InvestigateCovariate.java +++ b/eqtl-mapping-pipeline/src/main/java/eqtlmappingpipeline/binaryInteraction/InvestigateCovariate.java @@ -1,16 +1,482 @@ package eqtlmappingpipeline.binaryInteraction; +import au.com.bytecode.opencsv.CSVWriter; +import eqtlmappingpipeline.Main; +import gnu.trove.map.hash.TObjectIntHashMap; +import java.io.BufferedReader; +import java.io.BufferedWriter; +import java.io.File; +import java.io.FileInputStream; +import java.io.FileNotFoundException; +import java.io.FileWriter; +import java.io.IOException; +import java.io.InputStreamReader; +import java.io.Writer; +import java.util.HashSet; +import java.util.Iterator; +import java.util.LinkedHashSet; +import org.apache.commons.cli.CommandLine; +import org.apache.commons.cli.HelpFormatter; +import org.apache.commons.cli.OptionBuilder; +import org.apache.commons.cli.Options; +import org.apache.commons.cli.ParseException; +import org.apache.commons.cli.PosixParser; +import umcg.genetica.io.binInteraction.BinaryInteractionFile; +import umcg.genetica.io.binInteraction.BinaryInteractionFileException; +import umcg.genetica.io.binInteraction.BinaryInteractionQueryResult; +import umcg.genetica.io.binInteraction.BinaryInteractionZscores; +import umcg.genetica.io.binInteraction.gene.BinaryInteractionGene; +import umcg.genetica.io.binInteraction.variant.BinaryInteractionVariant; +import umcg.genetica.math.matrix2.DoubleMatrixDataset; + /** * * @author Patrick Deelen */ public class InvestigateCovariate { - /** - * @param args the command line arguments - */ - public static void main(String[] args) { - // TODO code application logic here - } + private static final Options OPTIONS; + + static { + + OPTIONS = new Options(); + + OptionBuilder.withArgName("path"); + OptionBuilder.hasArg(); + OptionBuilder.withDescription("Binary interaction file (must be a meta analysis)"); + OptionBuilder.withLongOpt("input"); + OptionBuilder.isRequired(); + OPTIONS.addOption(OptionBuilder.create("i")); + + OptionBuilder.withArgName("path"); + OptionBuilder.hasArg(); + OptionBuilder.withDescription("Binary interaction file to use as replication (must be a meta analysis)"); + OptionBuilder.withLongOpt("replication"); + OptionBuilder.isRequired(); + OPTIONS.addOption(OptionBuilder.create("r")); + + OptionBuilder.withArgName("path"); + OptionBuilder.hasArg(); + OptionBuilder.withDescription("Ouput prefix"); + OptionBuilder.withLongOpt("output"); + OptionBuilder.isRequired(); + OPTIONS.addOption(OptionBuilder.create("o")); + + OptionBuilder.withArgName("double"); + OptionBuilder.hasArg(); + OptionBuilder.withDescription("Minimum absolute interaction z-score"); + OptionBuilder.withLongOpt("interactionZ"); + OptionBuilder.isRequired(); + OPTIONS.addOption(OptionBuilder.create("iz")); + + OptionBuilder.withArgName("double"); + OptionBuilder.hasArg(); + OptionBuilder.withDescription("Minimum absolute replication interaction z-score"); + OptionBuilder.withLongOpt("replicationInteractionZ"); + OptionBuilder.isRequired(); + OPTIONS.addOption(OptionBuilder.create("riz")); + + OptionBuilder.withArgName("string"); + OptionBuilder.hasArg(); + OptionBuilder.withDescription("Covariate name"); + OptionBuilder.withLongOpt("queryCovariate"); + OptionBuilder.isRequired(); + OPTIONS.addOption(OptionBuilder.create("qc")); + + OptionBuilder.withDescription("If set match variant on chr-pos"); + OptionBuilder.withLongOpt("chrPos"); + OPTIONS.addOption(OptionBuilder.create("cp")); + + OptionBuilder.withArgName("path"); + OptionBuilder.hasArg(); + OptionBuilder.withDescription("File with covariates to include in analysis"); + OptionBuilder.withLongOpt("covariats"); + OPTIONS.addOption(OptionBuilder.create("c")); + + } + + /** + * @param args the command line arguments + */ + public static void main(String[] args) throws IOException, FileNotFoundException, BinaryInteractionFileException { + + final File inputInteractionFile; + final File replicationInteractionFile; + final double minAbsInteractionZ; + final double minAbsReplicationInteractionZ; + final boolean matchOnChrPos; + final String outputPrefix; + final String queryCovariateName; + final File covariatesToIncludeFile; + + try { + final CommandLine commandLine = new PosixParser().parse(OPTIONS, args, false); + + inputInteractionFile = new File(commandLine.getOptionValue("i")); + replicationInteractionFile = new File(commandLine.getOptionValue("r")); + outputPrefix = commandLine.getOptionValue("o"); + queryCovariateName = commandLine.getOptionValue("qc"); + + try { + minAbsInteractionZ = Double.parseDouble(commandLine.getOptionValue("iz")); + } catch (NumberFormatException ex) { + System.out.println("Cannot not parse --interactionZ as double: " + commandLine.getOptionValue("iz")); + System.exit(1); + return; + } + + try { + minAbsReplicationInteractionZ = Double.parseDouble(commandLine.getOptionValue("riz")); + } catch (NumberFormatException ex) { + System.out.println("Cannot not parse --replicationInteractionZ as double: " + commandLine.getOptionValue("riz")); + System.exit(1); + return; + } + + if (commandLine.hasOption("c")) { + covariatesToIncludeFile = new File(commandLine.getOptionValue("c")); + } else { + covariatesToIncludeFile = null; + } + + matchOnChrPos = commandLine.hasOption("cp"); + + } catch (ParseException ex) { + System.err.println("Invalid command line arguments: "); + System.err.println(ex.getMessage()); + System.err.println(); + new HelpFormatter().printHelp(" ", OPTIONS); + System.exit(1); + return; + } + BufferedWriter logWriter = new BufferedWriter(new FileWriter(outputPrefix + "_Log.txt")); + + writeAndOut("Software version: " + Main.VERSION, logWriter); + writeAndOut("Input file: " + inputInteractionFile.getAbsolutePath(), logWriter); + writeAndOut("Replication file: " + replicationInteractionFile.getAbsolutePath(), logWriter); + writeAndOut("Query covariate: " + queryCovariateName, logWriter); + writeAndOut("Output prefix: " + outputPrefix, logWriter); + writeAndOut("Min interaction z-score: " + minAbsInteractionZ, logWriter); + writeAndOut("Min replication interaction z-score: " + minAbsReplicationInteractionZ, logWriter); + if (matchOnChrPos) { + writeAndOut("Matching variants on chr-pos", logWriter); + } + if (covariatesToIncludeFile != null) { + writeAndOut("Covariates to include: " + covariatesToIncludeFile.getAbsolutePath(), logWriter); + } + writeAndOut("", logWriter); + + final HashSet covariantsToIncluded; + if (covariatesToIncludeFile != null) { + covariantsToIncluded = new HashSet(); + BufferedReader reader = new BufferedReader(new InputStreamReader(new FileInputStream(covariatesToIncludeFile), "UTF-8")); + String line; + while ((line = reader.readLine()) != null) { + covariantsToIncluded.add(line.trim()); + } + writeAndOut("Covariates included: " + covariantsToIncluded.size(), logWriter); + writeAndOut("", logWriter); + + if (!covariantsToIncluded.contains(queryCovariateName)) { + System.err.println("Query covariate not in include list"); + System.exit(1); + return; + } + + } else { + covariantsToIncluded = null; + } + + BinaryInteractionFile inputFile = BinaryInteractionFile.load(inputInteractionFile, true); + BinaryInteractionFile replicationFile = BinaryInteractionFile.load(replicationInteractionFile, true); + + LinkedHashSet genesOfInterest = new LinkedHashSet(); + + if (!inputFile.containsCovariant(queryCovariateName)) { + System.err.println("Covariate not found in input data"); + System.exit(1); + return; + } + + if (!replicationFile.containsCovariant(queryCovariateName)) { + System.err.println("Covariate not found in replication data"); + System.exit(1); + return; + } + + variants: + for (final BinaryInteractionVariant variant : inputFile.getVariants()) { + + final String variantName = variant.getName(); + + final BinaryInteractionVariant replicationVariant; + + if (matchOnChrPos) { + replicationVariant = replicationFile.getVariant(variant.getChr(), variant.getPos()); + if (replicationVariant == null) { + continue variants; + } + } else { + if (replicationFile.containsVariant(variantName)) { + replicationVariant = replicationFile.getVariant(variantName); + } else { + continue variants; + } + } + + //Only do if replication variant has been found + + if (!(variant.getRefAllele() == replicationVariant.getRefAllele() && variant.getAltAllele() == replicationVariant.getAltAllele()) + && !(variant.getRefAllele() == replicationVariant.getAltAllele() && variant.getAltAllele() == replicationVariant.getRefAllele())) { + System.err.println("Allele mismatch!"); + } + final boolean swap = variant.getAltAllele() != replicationVariant.getAltAllele(); + + final int[] genePointers = inputFile.getVariant(variantName).getGenePointers(); + + genes: + for (int genePointer : genePointers) { + + final BinaryInteractionGene gene = inputFile.getGene(genePointer); + + if (!inputFile.containsInteraction(variantName, gene.getName(), queryCovariateName)) { + continue genes; + } + + if (!replicationFile.containsInteraction(replicationVariant.getName(), gene.getName(), queryCovariateName)) { + continue genes; + } + + if (!replicationFile.containsInteraction(replicationVariant.getName(), gene.getName(), queryCovariateName)) { + continue genes; + } + + final BinaryInteractionZscores inputInteractionResult = inputFile.readInteractionResults(variantName, gene.getName(), queryCovariateName); + final double inputInteractionZ = inputInteractionResult.getZscoreInteractionMeta(); + + if (Double.isNaN(inputInteractionZ)) { + continue genes; + } + + if (!(inputInteractionZ <= -minAbsInteractionZ || inputInteractionZ >= -minAbsInteractionZ)) { + continue genes; + } + + if(!replicationFile.containsInteraction(replicationVariant.getName(), gene.getName(), queryCovariateName)){ + continue genes; + } + + final BinaryInteractionZscores replicationInteractionResult = replicationFile.readInteractionResults(replicationVariant.getName(), gene.getName(), queryCovariateName); + double replicationInteractionZ = replicationInteractionResult.getZscoreInteractionMeta(); + + if (Double.isNaN(replicationInteractionZ)) { + continue genes; + } + + if (!(replicationInteractionZ <= -minAbsReplicationInteractionZ || replicationInteractionZ >= -minAbsReplicationInteractionZ)) { + continue genes; + } + + //If here then discovery and replication significant + + if (swap) { + replicationInteractionZ *= -1; + } + + if (inputInteractionZ * replicationInteractionZ >= 0) { + //Same direction + genesOfInterest.add(gene.getName()); + } + + + + } + + } + + System.out.println("Number of genes of interest: " + genesOfInterest.size()); + + TObjectIntHashMap covaraitesOfInterestCount = new TObjectIntHashMap(); + TObjectIntHashMap genesOfInterestCount = new TObjectIntHashMap(); + LinkedHashSet covaraitesOfInterest = new LinkedHashSet(); + + //Here we now know which genes are of interest. + //We are now going to search for other covariates that are significant for any of these genes + for (String geneName : genesOfInterest) { + BinaryInteractionGene gene = inputFile.getGene(geneName); + + variants: + for (int variantPointer : gene.getVariantPointers()) { + BinaryInteractionVariant variant = inputFile.getVariant(variantPointer); + + final String variantName = variant.getName(); + + final BinaryInteractionVariant replicationVariant; + + if (matchOnChrPos) { + replicationVariant = replicationFile.getVariant(variant.getChr(), variant.getPos()); + if (replicationVariant == null) { + continue variants; + } + } else { + if (replicationFile.containsVariant(variantName)) { + replicationVariant = replicationFile.getVariant(variantName); + } else { + continue variants; + } + } + + //Only do if replication variant has been found + + if (!(variant.getRefAllele() == replicationVariant.getRefAllele() && variant.getAltAllele() == replicationVariant.getAltAllele()) + && !(variant.getRefAllele() == replicationVariant.getAltAllele() && variant.getAltAllele() == replicationVariant.getRefAllele())) { + System.err.println("Allele mismatch!"); + } + final boolean swap = variant.getAltAllele() != replicationVariant.getAltAllele(); + + covairates: + for (Iterator iterator = inputFile.readVariantGeneResults(variantName, gene.getName()); iterator.hasNext();) { + + BinaryInteractionQueryResult interaction = iterator.next(); + + if (covariantsToIncluded != null && !covariantsToIncluded.contains(interaction.getCovariateName())) { + continue covairates; + } + + final BinaryInteractionZscores inputInteractionResult = interaction.getInteractionZscores(); + final double inputInteractionZ = inputInteractionResult.getZscoreInteractionMeta(); + + if (Double.isNaN(inputInteractionZ)) { + continue; + } + + if (!(inputInteractionZ <= -minAbsInteractionZ || inputInteractionZ >= -minAbsInteractionZ)) { + continue covairates; + } + + if(!replicationFile.containsInteraction(replicationVariant.getName(), gene.getName(), interaction.getCovariateName())){ + continue covairates; + } + + final BinaryInteractionZscores replicationInteractionResult = replicationFile.readInteractionResults(replicationVariant.getName(), gene.getName(), interaction.getCovariateName()); + double replicationInteractionZ = replicationInteractionResult.getZscoreInteractionMeta(); + + if (Double.isNaN(replicationInteractionZ)) { + continue covairates; + } + + if (!(replicationInteractionZ <= -minAbsReplicationInteractionZ || replicationInteractionZ >= -minAbsReplicationInteractionZ)) { + continue covairates; + } + + //If here then discovery and replication significant + + if (swap) { + replicationInteractionZ *= -1; + } + + if (inputInteractionZ * replicationInteractionZ >= 0) { + //Same direction + covaraitesOfInterestCount.adjustOrPutValue(interaction.getCovariateName(), 1, 1); + covaraitesOfInterest.add(interaction.getCovariateName()); + genesOfInterestCount.adjustOrPutValue(geneName, 1, 1); + } + + } + + } + + } + //We now also know which other covariates are of interest + + System.out.println("Number of covariates of interest: " + covaraitesOfInterest.size()); + + + writeCounts(genesOfInterest, genesOfInterestCount, new File(outputPrefix + "_Genes.txt")); + writeCounts(covaraitesOfInterest, covaraitesOfInterestCount, new File(outputPrefix + "_Covariates.txt")); + + DoubleMatrixDataset interactionZscores = new DoubleMatrixDataset(covaraitesOfInterest, genesOfInterest); + + DoubleMatrixDataset replicationInteractionZscores = new DoubleMatrixDataset(covaraitesOfInterest, genesOfInterest); + + for (String geneName : genesOfInterest) { + + BinaryInteractionGene gene = inputFile.getGene(geneName); + + variants: + for (int variantPointer : gene.getVariantPointers()) { + BinaryInteractionVariant variant = inputFile.getVariant(variantPointer); + + final String variantName = variant.getName(); + + final BinaryInteractionVariant replicationVariant; + + if (matchOnChrPos) { + replicationVariant = replicationFile.getVariant(variant.getChr(), variant.getPos()); + if (replicationVariant == null) { + continue variants; + } + } else { + if (replicationFile.containsVariant(variantName)) { + replicationVariant = replicationFile.getVariant(variantName); + } else { + continue variants; + } + } + + //Only do if replication variant has been found + + if (!(variant.getRefAllele() == replicationVariant.getRefAllele() && variant.getAltAllele() == replicationVariant.getAltAllele()) + && !(variant.getRefAllele() == replicationVariant.getAltAllele() && variant.getAltAllele() == replicationVariant.getRefAllele())) { + System.err.println("Allele mismatch!"); + } + final boolean swap = variant.getAltAllele() != replicationVariant.getAltAllele(); + + + for (String covariateName : covaraitesOfInterest) { + + final BinaryInteractionZscores inputInteractionResult = inputFile.readInteractionResults(variantName, geneName, covariateName); + + interactionZscores.setElement(covariateName, geneName, inputInteractionResult.getZscoreInteractionMeta()); + + final BinaryInteractionZscores replicationInteractionResult = replicationFile.readInteractionResults(replicationVariant.getName(), gene.getName(), covariateName); + double replicationInteractionZ = replicationInteractionResult.getZscoreInteractionMeta(); + if (swap) { + replicationInteractionZ *= -1; + } + replicationInteractionZscores.setElement(covariateName, geneName, replicationInteractionZ); + + } + + + } + } + + interactionZscores.save(outputPrefix + "_InteractionMatrix.txt"); + replicationInteractionZscores.save(outputPrefix + "_ReplicationInteractionMatrix.txt"); + + logWriter.close(); + + } + + private static void writeCounts(LinkedHashSet elements, TObjectIntHashMap counts, File file) throws IOException { + CSVWriter writer = new CSVWriter(new BufferedWriter(new FileWriter(file)), '\t', '\0', '\0'); + + String[] row = new String[2]; + + for (String elementName : elements) { + row[0] = elementName; + row[1] = String.valueOf(counts.get(elementName)); + writer.writeNext(row); + } + + writer.close(); + + } + private static void writeAndOut(String message, Writer writer) throws IOException { + writer.append(message); + writer.append('\n'); + System.out.println(message); + } } diff --git a/eqtl-mapping-pipeline/src/main/java/eqtlmappingpipeline/gui/EQTLMappingPipelineConsole.java b/eqtl-mapping-pipeline/src/main/java/eqtlmappingpipeline/gui/EQTLMappingPipelineConsole.java index 48494cd1f..ba6e06f2b 100644 --- a/eqtl-mapping-pipeline/src/main/java/eqtlmappingpipeline/gui/EQTLMappingPipelineConsole.java +++ b/eqtl-mapping-pipeline/src/main/java/eqtlmappingpipeline/gui/EQTLMappingPipelineConsole.java @@ -11,6 +11,7 @@ import eqtlmappingpipeline.binaryInteraction.BinaryInteractionMetaAnalysis; import eqtlmappingpipeline.binaryInteraction.ConvertTextOutputToBinary; import eqtlmappingpipeline.binaryInteraction.CovariateImportance; +import eqtlmappingpipeline.binaryInteraction.InvestigateCovariate; import eqtlmappingpipeline.binaryInteraction.QueryBinaryInteraction; import eqtlmappingpipeline.binaryInteraction.ReplicateInteractions; import eqtlmappingpipeline.interactionanalysis.InteractionAnalysisConsoleGUI; @@ -131,6 +132,9 @@ public void main(String[] args) throws Exception { } else if (mode.equals("interactionChi2") || mode.equals("ic")) { CovariateImportance.main(Arrays.copyOfRange(args, 2, args.length)); return; + } else if (mode.equals("covariate")) { + InvestigateCovariate.main(Arrays.copyOfRange(args, 2, args.length)); + return; } else if (mode.equals("pileupToVcf")) { PileupToVcf.main(Arrays.copyOfRange(args, 2, args.length)); return; From 79bade3c5571e36ea5171cbad3588bc321d52b8a Mon Sep 17 00:00:00 2001 From: Patrick Deelen Date: Sat, 14 Mar 2015 20:18:52 +0100 Subject: [PATCH 010/143] Bug double matrix dataset --- .../java/umcg/genetica/math/matrix2/DoubleMatrixDataset.java | 1 + 1 file changed, 1 insertion(+) diff --git a/genetica-libraries/src/main/java/umcg/genetica/math/matrix2/DoubleMatrixDataset.java b/genetica-libraries/src/main/java/umcg/genetica/math/matrix2/DoubleMatrixDataset.java index 7156bf8e2..2b227f25b 100644 --- a/genetica-libraries/src/main/java/umcg/genetica/math/matrix2/DoubleMatrixDataset.java +++ b/genetica-libraries/src/main/java/umcg/genetica/math/matrix2/DoubleMatrixDataset.java @@ -87,6 +87,7 @@ public DoubleMatrixDataset(Collection rowNames, Collection colNames) { i = 0; for (C col : colNames) { hashCols.put(col, i); + ++i; } if ((hashRows.size() * (long) hashCols.size()) < (Integer.MAX_VALUE - 2)) { From 826237130e067f2c609fcf98fff69e292def8ced Mon Sep 17 00:00:00 2001 From: Patrick Deelen Date: Sun, 15 Mar 2015 11:03:28 +0100 Subject: [PATCH 011/143] interaction stuff --- .../binaryInteraction/InvestigateCovariate.java | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/eqtl-mapping-pipeline/src/main/java/eqtlmappingpipeline/binaryInteraction/InvestigateCovariate.java b/eqtl-mapping-pipeline/src/main/java/eqtlmappingpipeline/binaryInteraction/InvestigateCovariate.java index 1065e6f1c..84a8184c0 100644 --- a/eqtl-mapping-pipeline/src/main/java/eqtlmappingpipeline/binaryInteraction/InvestigateCovariate.java +++ b/eqtl-mapping-pipeline/src/main/java/eqtlmappingpipeline/binaryInteraction/InvestigateCovariate.java @@ -258,7 +258,7 @@ public static void main(String[] args) throws IOException, FileNotFoundException continue genes; } - if (!(inputInteractionZ <= -minAbsInteractionZ || inputInteractionZ >= -minAbsInteractionZ)) { + if (!(inputInteractionZ <= -minAbsInteractionZ || inputInteractionZ >= minAbsInteractionZ)) { continue genes; } @@ -273,7 +273,7 @@ public static void main(String[] args) throws IOException, FileNotFoundException continue genes; } - if (!(replicationInteractionZ <= -minAbsReplicationInteractionZ || replicationInteractionZ >= -minAbsReplicationInteractionZ)) { + if (!(replicationInteractionZ <= -minAbsReplicationInteractionZ || replicationInteractionZ >= minAbsReplicationInteractionZ)) { continue genes; } @@ -350,7 +350,7 @@ public static void main(String[] args) throws IOException, FileNotFoundException continue; } - if (!(inputInteractionZ <= -minAbsInteractionZ || inputInteractionZ >= -minAbsInteractionZ)) { + if (!(inputInteractionZ <= -minAbsInteractionZ || inputInteractionZ >= minAbsInteractionZ)) { continue covairates; } @@ -365,7 +365,7 @@ public static void main(String[] args) throws IOException, FileNotFoundException continue covairates; } - if (!(replicationInteractionZ <= -minAbsReplicationInteractionZ || replicationInteractionZ >= -minAbsReplicationInteractionZ)) { + if (!(replicationInteractionZ <= -minAbsReplicationInteractionZ || replicationInteractionZ >= minAbsReplicationInteractionZ)) { continue covairates; } @@ -437,6 +437,7 @@ public static void main(String[] args) throws IOException, FileNotFoundException final BinaryInteractionZscores inputInteractionResult = inputFile.readInteractionResults(variantName, geneName, covariateName); + //System.out.println(covariateName + "-" + geneName + "-" + inputInteractionResult.getZscoreInteractionMeta()); interactionZscores.setElement(covariateName, geneName, inputInteractionResult.getZscoreInteractionMeta()); final BinaryInteractionZscores replicationInteractionResult = replicationFile.readInteractionResults(replicationVariant.getName(), gene.getName(), covariateName); @@ -451,6 +452,9 @@ public static void main(String[] args) throws IOException, FileNotFoundException } } + + System.out.println("TEST: " + interactionZscores.getElement("ENSG00000001167", "ENSG00000066084")); + System.out.println("TEST: " + interactionZscores.getElement("ENSG00000001167", "ENSG00000183604")); interactionZscores.save(outputPrefix + "_InteractionMatrix.txt"); replicationInteractionZscores.save(outputPrefix + "_ReplicationInteractionMatrix.txt"); From 9b775bcd820c040c357094693c6c51b51d1ad8b3 Mon Sep 17 00:00:00 2001 From: Patrick Deelen Date: Mon, 23 Mar 2015 13:00:19 +0100 Subject: [PATCH 012/143] interaction stuff --- eqtl-mapping-pipeline/pom.xml | 9 + .../InvestigateCovariate.java | 3 - .../QueryBinaryInteraction.java | 255 ++++++++++++++---- 3 files changed, 217 insertions(+), 50 deletions(-) diff --git a/eqtl-mapping-pipeline/pom.xml b/eqtl-mapping-pipeline/pom.xml index 869e1d484..e15aa90e7 100644 --- a/eqtl-mapping-pipeline/pom.xml +++ b/eqtl-mapping-pipeline/pom.xml @@ -137,6 +137,15 @@ + + org.apache.maven.plugins + maven-compiler-plugin + 2.3.2 + + 1.7 + 1.7 + + \ No newline at end of file diff --git a/eqtl-mapping-pipeline/src/main/java/eqtlmappingpipeline/binaryInteraction/InvestigateCovariate.java b/eqtl-mapping-pipeline/src/main/java/eqtlmappingpipeline/binaryInteraction/InvestigateCovariate.java index 84a8184c0..3074035ce 100644 --- a/eqtl-mapping-pipeline/src/main/java/eqtlmappingpipeline/binaryInteraction/InvestigateCovariate.java +++ b/eqtl-mapping-pipeline/src/main/java/eqtlmappingpipeline/binaryInteraction/InvestigateCovariate.java @@ -452,9 +452,6 @@ public static void main(String[] args) throws IOException, FileNotFoundException } } - - System.out.println("TEST: " + interactionZscores.getElement("ENSG00000001167", "ENSG00000066084")); - System.out.println("TEST: " + interactionZscores.getElement("ENSG00000001167", "ENSG00000183604")); interactionZscores.save(outputPrefix + "_InteractionMatrix.txt"); replicationInteractionZscores.save(outputPrefix + "_ReplicationInteractionMatrix.txt"); diff --git a/eqtl-mapping-pipeline/src/main/java/eqtlmappingpipeline/binaryInteraction/QueryBinaryInteraction.java b/eqtl-mapping-pipeline/src/main/java/eqtlmappingpipeline/binaryInteraction/QueryBinaryInteraction.java index bfb79934b..0dd18a48d 100644 --- a/eqtl-mapping-pipeline/src/main/java/eqtlmappingpipeline/binaryInteraction/QueryBinaryInteraction.java +++ b/eqtl-mapping-pipeline/src/main/java/eqtlmappingpipeline/binaryInteraction/QueryBinaryInteraction.java @@ -1,9 +1,11 @@ package eqtlmappingpipeline.binaryInteraction; +import au.com.bytecode.opencsv.CSVReader; import au.com.bytecode.opencsv.CSVWriter; import eqtlmappingpipeline.Main; import java.io.BufferedWriter; import java.io.File; +import java.io.FileReader; import java.io.FileWriter; import java.io.IOException; import java.io.OutputStreamWriter; @@ -13,6 +15,7 @@ import java.text.SimpleDateFormat; import java.util.Date; import java.util.Iterator; +import java.util.LinkedHashSet; import org.apache.commons.cli.CommandLine; import org.apache.commons.cli.HelpFormatter; import org.apache.commons.cli.Option; @@ -68,7 +71,7 @@ public class QueryBinaryInteraction { OptionBuilder.withArgName("string"); OptionBuilder.hasArg(); OptionBuilder.withDescription("Covariate name (optional)"); - OptionBuilder.withLongOpt("cocariate"); + OptionBuilder.withLongOpt("covariate"); OPTIONS.addOption(OptionBuilder.create("c")); OptionBuilder.withArgName("string"); @@ -77,6 +80,12 @@ public class QueryBinaryInteraction { OptionBuilder.withLongOpt("variant"); OPTIONS.addOption(OptionBuilder.create("v")); + OptionBuilder.withArgName("path"); + OptionBuilder.hasArg(); + OptionBuilder.withDescription("File with queries. Must have header. All columns are optional, options are gene, variant and covariate. Any combination of headers is possible (optional)"); + OptionBuilder.withLongOpt("queryFile"); + OPTIONS.addOption(OptionBuilder.create("qf")); + OptionBuilder.withArgName("double"); OptionBuilder.hasArg(); OptionBuilder.withDescription("Minimum absolute interaction z-score (not yet implemented)"); @@ -93,6 +102,7 @@ public static void main(String[] args) throws UnsupportedEncodingException, IOEx final String queryCovariateName; final String queryVariantName; final double queryMinAbsInteractionZ; + final File queryFile; try { final CommandLine commandLine = new PosixParser().parse(OPTIONS, args, false); @@ -107,11 +117,22 @@ public static void main(String[] args) throws UnsupportedEncodingException, IOEx queryCovariateName = commandLine.getOptionValue("c"); queryVariantName = commandLine.getOptionValue("v"); + if (commandLine.hasOption("qf")) { + queryFile = new File(commandLine.getOptionValue("qf")); + if (queryGeneName != null || queryVariantName != null || queryCovariateName != null) { + System.err.println("Cannot combine query file with commandline query arguments"); + System.exit(1); + return; + } + } else { + queryFile = null; + } + if (commandLine.hasOption("iz")) { try { queryMinAbsInteractionZ = Double.parseDouble(commandLine.getOptionValue("iz")); } catch (NumberFormatException ex) { - System.out.println("Cannot not parse interactionZ as double: " + commandLine.getOptionValue("iz")); + System.err.println("Cannot not parse interactionZ as double: " + commandLine.getOptionValue("iz")); System.exit(1); return; } @@ -165,6 +186,10 @@ public static void main(String[] args) throws UnsupportedEncodingException, IOEx outputWriter.write("# - Query minimum absote interaction z-score: " + queryMinAbsInteractionZ); outputWriter.write('\n'); } + if (queryFile != null) { + outputWriter.write("# - Query file: " + queryFile.getAbsolutePath()); + outputWriter.write('\n'); + } outputWriter.write("#\n"); outputWriter.write("# Interaction file meta data: "); @@ -187,7 +212,75 @@ public static void main(String[] args) throws UnsupportedEncodingException, IOEx outputWriter.write('\n'); outputWriter.write("#\n"); + final LinkedHashSet interactionQueries; + if (queryFile != null) { + interactionQueries = new LinkedHashSet(); + CSVReader queryReader = new CSVReader(new FileReader(queryFile), '\t', '\0'); + + String[] nextLine = queryReader.readNext(); + + int variantCol = -1; + int geneCol = -1; + int covariateCol = -1; + + //Parse header + for (int i = 0; i < nextLine.length; ++i) { + String headerEntry = nextLine[i].toLowerCase(); + switch (headerEntry) { + case "variant": + if (variantCol != -1) { + System.err.println("Variant column found twice"); + System.exit(1); + return; + } + variantCol = i; + break; + case "gene": + if (geneCol != -1) { + System.err.println("Gene column found twice"); + System.exit(1); + return; + } + geneCol = i; + break; + case "covariate": + if (covariateCol != -1) { + System.err.println("Covariate column found twice"); + System.exit(1); + return; + } + covariateCol = i; + break; + + } + + } + + if (variantCol == -1 && geneCol == -1 && covariateCol == -1) { + System.err.println("Did not detect appropiate header in query file"); + System.exit(1); + return; + } + while ((nextLine = queryReader.readNext()) != null) { + String variant = null; + String gene = null; + String covariate = null; + + if(variantCol != -1){ + variant = nextLine[variantCol]; + } + if(geneCol != -1){ + gene = nextLine[geneCol]; + } + if(covariateCol != -1){ + covariate = nextLine[covariateCol]; + } + interactionQueries.add(new InteractoinQuery(variant, gene, covariate)); + } + } else { + interactionQueries = null; + } CSVWriter tableWriter = new CSVWriter(outputWriter, '\t', '\0', '\0'); @@ -243,6 +336,74 @@ public static void main(String[] args) throws UnsupportedEncodingException, IOEx tableWriter.writeNext(row); + + if (interactionQueries != null) { + for(InteractoinQuery interactionQuery : interactionQueries){ + doQuery(interactionQuery.getGene(), interactionQuery.getVariant(), interactionQuery.getCovariate(), inputFile, tableWriter, row); + } + } else { + doQuery(queryGeneName, queryVariantName, queryCovariateName, inputFile, tableWriter, row); + } + + + + tableWriter.close(); + outputWriter.close(); + + } + + @SuppressWarnings({"null", "ConstantConditions"}) + private static void addRow(BinaryInteractionQueryResult queryRestult, BinaryInteractionFile inputFile, CSVWriter tableWriter, String[] row) throws BinaryInteractionFileException, IOException { + int c = 0; + + row[c++] = queryRestult.getVariantName(); + row[c++] = queryRestult.getGeneName(); + row[c++] = queryRestult.getCovariateName(); + + BinaryInteractionVariant variant = inputFile.getVariant(queryRestult.getVariantName()); + row[c++] = variant.getChr(); + row[c++] = String.valueOf(variant.getPos()); + row[c++] = variant.getRefAllele().getAlleleAsString() + '/' + variant.getAltAllele().getAlleleAsString(); + row[c++] = variant.getAltAllele().toString(); + + BinaryInteractionQtlZscores zscroresQtl = queryRestult.getQtlZscores(); + BinaryInteractionZscores zscroresInteraction = queryRestult.getInteractionZscores(); + + for (int cohortIndex = 0; cohortIndex < inputFile.getCohortCount(); ++cohortIndex) { + + if (inputFile.isNormalQtlStored()) { + row[c++] = String.valueOf(zscroresQtl.getSampleCounts()[cohortIndex]); + row[c++] = String.valueOf(zscroresQtl.getZscores()[cohortIndex]); + } + + row[c++] = String.valueOf(zscroresInteraction.getSamplesInteractionCohort()[cohortIndex]); + row[c++] = String.valueOf(zscroresInteraction.getrSquaredCohort()[cohortIndex]); + row[c++] = String.valueOf(zscroresInteraction.getZscoreSnpCohort()[cohortIndex]); + row[c++] = String.valueOf(zscroresInteraction.getZscoreCovariateCohort()[cohortIndex]); + row[c++] = String.valueOf(zscroresInteraction.getZscoreInteractionCohort()[cohortIndex]); + + if (inputFile.isFlippedZscoreStored()) { + row[c++] = String.valueOf(zscroresInteraction.getZscoreInteractionFlippedCohort()[cohortIndex]); + } + + } + + if (inputFile.isMetaAnalysis()) { + if (inputFile.isNormalQtlStored()) { + row[c++] = String.valueOf(zscroresQtl.getMetaZscore()); + } + row[c++] = String.valueOf(zscroresInteraction.getZscoreSnpMeta()); + row[c++] = String.valueOf(zscroresInteraction.getZscoreCovariateMeta()); + row[c++] = String.valueOf(zscroresInteraction.getZscoreInteractionMeta()); + if (inputFile.isFlippedZscoreStored()) { + row[c++] = String.valueOf(zscroresInteraction.getZscoreInteractionFlippedMeta()); + } + } + + tableWriter.writeNext(row); + } + + private static void doQuery(final String queryGeneName, final String queryVariantName, final String queryCovariateName, BinaryInteractionFile inputFile, CSVWriter tableWriter, String[] row) throws IOException, BinaryInteractionFileException { if (queryGeneName != null && queryVariantName != null && queryCovariateName != null) { addRow(inputFile.readVariantGeneCovariateResults(queryVariantName, queryGeneName, queryCovariateName), inputFile, tableWriter, row); @@ -296,10 +457,10 @@ public static void main(String[] args) throws UnsupportedEncodingException, IOEx } else { - for (BinaryInteractionVariant variant : inputFile.getVariants()) { - + for (BinaryInteractionVariant variant : inputFile.getVariants()) { + String variantName = variant.getName(); - + int[] genePointers = inputFile.getVariant(variantName).getGenePointers(); for (int genePointer : genePointers) { @@ -323,60 +484,60 @@ public static void main(String[] args) throws UnsupportedEncodingException, IOEx } - - tableWriter.close(); - outputWriter.close(); - } - @SuppressWarnings({"null", "ConstantConditions"}) - private static void addRow(BinaryInteractionQueryResult queryRestult, BinaryInteractionFile inputFile, CSVWriter tableWriter, String[] row) throws BinaryInteractionFileException, IOException { - int c = 0; + private static class InteractoinQuery { - row[c++] = queryRestult.getVariantName(); - row[c++] = queryRestult.getGeneName(); - row[c++] = queryRestult.getCovariateName(); + private final String variant; + private final String gene; + private final String covariate; - BinaryInteractionVariant variant = inputFile.getVariant(queryRestult.getVariantName()); - row[c++] = variant.getChr(); - row[c++] = String.valueOf(variant.getPos()); - row[c++] = variant.getRefAllele().getAlleleAsString() + '/' + variant.getAltAllele().getAlleleAsString(); - row[c++] = variant.getAltAllele().toString(); - - BinaryInteractionQtlZscores zscroresQtl = queryRestult.getQtlZscores(); - BinaryInteractionZscores zscroresInteraction = queryRestult.getInteractionZscores(); - - for (int cohortIndex = 0; cohortIndex < inputFile.getCohortCount(); ++cohortIndex) { + public InteractoinQuery(String variant, String gene, String covariate) { + this.variant = variant; + this.gene = gene; + this.covariate = covariate; + } - if (inputFile.isNormalQtlStored()) { - row[c++] = String.valueOf(zscroresQtl.getSampleCounts()[cohortIndex]); - row[c++] = String.valueOf(zscroresQtl.getZscores()[cohortIndex]); - } + public String getVariant() { + return variant; + } - row[c++] = String.valueOf(zscroresInteraction.getSamplesInteractionCohort()[cohortIndex]); - row[c++] = String.valueOf(zscroresInteraction.getrSquaredCohort()[cohortIndex]); - row[c++] = String.valueOf(zscroresInteraction.getZscoreSnpCohort()[cohortIndex]); - row[c++] = String.valueOf(zscroresInteraction.getZscoreCovariateCohort()[cohortIndex]); - row[c++] = String.valueOf(zscroresInteraction.getZscoreInteractionCohort()[cohortIndex]); + public String getGene() { + return gene; + } - if (inputFile.isFlippedZscoreStored()) { - row[c++] = String.valueOf(zscroresInteraction.getZscoreInteractionFlippedCohort()[cohortIndex]); - } + public String getCovariate() { + return covariate; + } + @Override + public int hashCode() { + int hash = 5; + hash = 67 * hash + (this.variant != null ? this.variant.hashCode() : 0); + hash = 67 * hash + (this.gene != null ? this.gene.hashCode() : 0); + hash = 67 * hash + (this.covariate != null ? this.covariate.hashCode() : 0); + return hash; } - if (inputFile.isMetaAnalysis()) { - if (inputFile.isNormalQtlStored()) { - row[c++] = String.valueOf(zscroresQtl.getMetaZscore()); + @Override + public boolean equals(Object obj) { + if (obj == null) { + return false; } - row[c++] = String.valueOf(zscroresInteraction.getZscoreSnpMeta()); - row[c++] = String.valueOf(zscroresInteraction.getZscoreCovariateMeta()); - row[c++] = String.valueOf(zscroresInteraction.getZscoreInteractionMeta()); - if (inputFile.isFlippedZscoreStored()) { - row[c++] = String.valueOf(zscroresInteraction.getZscoreInteractionFlippedMeta()); + if (getClass() != obj.getClass()) { + return false; + } + final InteractoinQuery other = (InteractoinQuery) obj; + if ((this.variant == null) ? (other.variant != null) : !this.variant.equals(other.variant)) { + return false; } + if ((this.gene == null) ? (other.gene != null) : !this.gene.equals(other.gene)) { + return false; + } + if ((this.covariate == null) ? (other.covariate != null) : !this.covariate.equals(other.covariate)) { + return false; + } + return true; } - - tableWriter.writeNext(row); } } From b2eb2e1d7eb9a88e1b1a6a9dbbfb16a8bac0315a Mon Sep 17 00:00:00 2001 From: harmjanwestra Date: Tue, 10 Mar 2015 21:11:23 -0400 Subject: [PATCH 013/143] Some test code for lavaan InteractionPlotter also ouputs data to textfile (for use in e.g. R) --- .../InteractionAnalysisTask.java | 215 ++++++++++-------- .../InteractionPlotter.java | 108 ++++----- 2 files changed, 171 insertions(+), 152 deletions(-) diff --git a/eqtl-mapping-pipeline/src/main/java/eqtlmappingpipeline/interactionanalysis/InteractionAnalysisTask.java b/eqtl-mapping-pipeline/src/main/java/eqtlmappingpipeline/interactionanalysis/InteractionAnalysisTask.java index c0ea0063c..6fdf53322 100644 --- a/eqtl-mapping-pipeline/src/main/java/eqtlmappingpipeline/interactionanalysis/InteractionAnalysisTask.java +++ b/eqtl-mapping-pipeline/src/main/java/eqtlmappingpipeline/interactionanalysis/InteractionAnalysisTask.java @@ -20,6 +20,7 @@ import umcg.genetica.io.trityper.util.ChrAnnotation; import umcg.genetica.math.matrix.DoubleMatrixDataset; import umcg.genetica.math.stats.Correlation; +import umcg.genetica.math.stats.Descriptives; import umcg.genetica.math.stats.Normalization; /** @@ -38,9 +39,9 @@ public class InteractionAnalysisTask implements Callable NAN_PAIR = new Pair(Double.NaN, Double.NaN); - private final boolean sem; + + private final Pair NAN_PAIR = new Pair(Double.NaN, Double.NaN); + private final boolean sem; public InteractionAnalysisTask(SNP snpObj, ArrayList> eQTLsForSNP, double[][] pcCorrectedData, int[] wgaId, @@ -172,10 +173,10 @@ public InteractionAnalysisResults call() throws Exception { try { rConnection = new RConnection(); // rConnection.voidEval("install.packages('sandwich')"); -if(sandwich){ - rConnection.voidEval("library(sandwich)"); + if (sandwich) { + rConnection.voidEval("library(sandwich)"); } else { - rConnection.voidEval("library(lavaan)"); + rConnection.voidEval("library(lavaan)"); } } catch (RserveException ex) { System.err.println(ex.getMessage()); @@ -186,32 +187,32 @@ public InteractionAnalysisResults call() throws Exception { System.err.println("Error: using R connection but none found"); return null; } - - try { - if (rConnection.isConnected()) { - double[] olsY = new double[nrCalled]; //Ordinary least squares: Our gene expression - double[] olsX = new double[nrCalled]; - double[] covariateValues = new double[nrCalled]; + + try { + if (rConnection.isConnected()) { + double[] olsY = new double[nrCalled]; //Ordinary least squares: Our gene expression + double[] olsX = new double[nrCalled]; + double[] covariateValues = new double[nrCalled]; //No interaction term, linear model: y ~ a * SNP + b * CellCount + c // double[][] olsXFullWithInteraction = new double[nrCalled][3]; //With interaction term, linear model: y ~ a * SNP + b * CellCount + c + d * SNP * CellCount - int itr = 0; - for (int s = 0; s < valsX.length; s++) { - double genotype = valsX[s]; - if (genotype != -1 && !Double.isNaN(tmpVarCelCount[s])) { - if (signInteractionEffectDirection == -1) { - genotype = 2 - genotype; - } - covariateValues[itr] = tmpVarCelCount[s]; - olsY[itr] = valsY[s]; - olsX[itr] = genotype; - itr++; + int itr = 0; + for (int s = 0; s < valsX.length; s++) { + double genotype = valsX[s]; + if (genotype != -1 && !Double.isNaN(tmpVarCelCount[s])) { + if (signInteractionEffectDirection == -1) { + genotype = 2 - genotype; } + covariateValues[itr] = tmpVarCelCount[s]; + olsY[itr] = valsY[s]; + olsX[itr] = genotype; + itr++; } + } - double corr = JSci.maths.ArrayMath.correlation(olsX, olsY); - mainZ = Correlation.convertCorrelationToZScore(olsX.length, corr); + double corr = JSci.maths.ArrayMath.correlation(olsX, olsY); + mainZ = Correlation.convertCorrelationToZScore(olsX.length, corr); -if(sandwich){ + if (sandwich) { rConnection.assign("y", olsY); rConnection.assign("x", olsX); rConnection.assign("z", covariateValues); @@ -233,51 +234,64 @@ public InteractionAnalysisResults call() throws Exception { betaCovariate = rConnection.eval("modelsummary$coefficients[3,1]").asDouble(); seCovariate = rConnection.eval("modelsummary$coefficients[3,2]").asDouble(); rsquared = rConnection.eval("modelsummary$r.squared").asDouble(); - } else { + } else { // use structural equation modeling (errors-in-variables compensation) - // define model - // z-transform, otherwise the used covariances in lavaan may be wrong - double[] olsYZ = Normalization.standardNormalize(valsY); - double[] olsXZ = Normalization.standardNormalize(olsX); - double[] covariatesZ = Normalization.standardNormalize(covariateValues); - double[] interaction = Normalization.standardNormalize(covariateValues); - - for (int i = 0; i < covariatesZ.length; i++) { - interaction[i] = olsXZ[i] * covariatesZ[i]; - } - - rConnection.assign("expression", olsYZ); - rConnection.assign("genotype", olsXZ); - rConnection.assign("covariate", covariatesZ); - rConnection.assign("interaction", interaction); - - String model = "'latentExpression ~ latentCovariate + latentInteraction\n" + - "latentExpression =~ expression\n" + - "latentCovariate =~ covariate\n" + - "latentGenotype =~ genotype\n" + - "latentInteraction =~ interaction\n" + - "covariate ~~ genotype\n" + - "covariate ~~ interaction\n" + - "genotype ~~ interaction\n" + - "'"; - - rConnection.voidEval(model); - rConnection.voidEval("fit <- sem(model)"); - rConnection.voidEval("modelsummary <- summary(m)"); + // define model + // z-transform, otherwise the used covariances in lavaan may be wrong + double[] olsYZ = Normalization.standardNormalize(valsY); + double[] olsXZ = Normalization.standardNormalize(olsX); + double[] covariatesZ = Normalization.standardNormalize(covariateValues); + double[] interactionVals = new double[covariatesZ.length]; + for (int i = 0; i < valsY.length; i++) { + interactionVals[i] = olsX[i] * covariateValues[i]; + } + double[] interactionZ = Normalization.standardNormalize(interactionVals); + + System.out.println("Var Y: " + Descriptives.variance(olsY) + "\t" + Descriptives.mean(olsY)); + System.out.println("Var gen: " + Descriptives.variance(olsX) + "\t" + Descriptives.mean(olsX)); + System.out.println("Var cov: " + Descriptives.variance(covariateValues) + "\t" + Descriptives.mean(covariateValues)); + System.out.println("Var int: " + Descriptives.variance(interactionVals) + "\t" + Descriptives.mean(interactionVals)); + + System.out.println(""); + + if (Descriptives.variance(olsY) > 1E-5 && Descriptives.variance(olsX) > 1E-5) { + rConnection.assign("expression", olsY); + rConnection.assign("genotype", olsX); + rConnection.assign("covariate", covariateValues); + rConnection.assign("interaction", interactionVals); + rConnection.voidEval("df <- data.frame(expression, genotype, covariate, interaction)"); + + String model = "model <- 'expression ~ genotype\n" // + latentCovariate + latentInteraction\n" + + // + "latentInteraction =~ interaction\n" + + "expression ~~ genotype\n" + // + "covariate ~~ interaction\n" + // + "genotype ~~ interaction\n" + + "'"; + + rConnection.voidEval(model); + rConnection.voidEval("fit <- sem(model, data=df)"); + rConnection.voidEval("modelsummary <- summary(fit)"); + System.exit(0); } - rConnection.close(); - } else { - System.err.println("ERROR: R is not connected."); - } - } catch (REngineException ex) { - System.err.println(ex.getMessage()); - } catch (REXPMismatchException ex) { - System.err.println(ex.getMessage()); +// String[] output = rConnection.eval("modelsummary").asStrings(); +// for (String s : output) { +// System.out.println(s); +// +// } + } + rConnection.close(); + } else { + System.err.println("ERROR: R is not connected."); } - + } catch (REngineException ex) { + System.err.println(ex.getMessage()); + } catch (REXPMismatchException ex) { + System.err.println(ex.getMessage()); + } } else { @@ -334,38 +348,37 @@ public InteractionAnalysisResults call() throws Exception { // double intersect = regressionParameters[0]; double corr = JSci.maths.ArrayMath.correlation(genotypesCalled, olsY); mainZ = Correlation.convertCorrelationToZScore(genotypesCalled.length, corr); - - - // Get the regression parameters and R-square value and print it. + + // Get the regression parameters and R-square value and print it. try { - double[] regressionParameters = regressionFullWithInteraction.estimateRegressionParameters(); - double[] regressionStandardErrors = regressionFullWithInteraction.estimateRegressionParametersStandardErrors(); - - betaInteraction = regressionParameters[3]; - seInteraction = regressionStandardErrors[3]; - - // Get the regression parameters and R-square value and print it. - betaSNP = regressionParameters[1]; - seSNP = regressionStandardErrors[1]; - - betaCovariate = regressionParameters[2]; - seCovariate = regressionStandardErrors[2]; - - rsquared = regressionFullWithInteraction.calculateRSquared(); - - } catch (SingularMatrixException ex) { - betaInteraction = Double.NaN; - seInteraction = Double.NaN; - - // Get the regression parameters and R-square value and print it. - betaSNP = Double.NaN; - seSNP = Double.NaN; - - betaCovariate = Double.NaN; - seCovariate = Double.NaN; - - rsquared = Double.NaN; - } + double[] regressionParameters = regressionFullWithInteraction.estimateRegressionParameters(); + double[] regressionStandardErrors = regressionFullWithInteraction.estimateRegressionParametersStandardErrors(); + + betaInteraction = regressionParameters[3]; + seInteraction = regressionStandardErrors[3]; + + // Get the regression parameters and R-square value and print it. + betaSNP = regressionParameters[1]; + seSNP = regressionStandardErrors[1]; + + betaCovariate = regressionParameters[2]; + seCovariate = regressionStandardErrors[2]; + + rsquared = regressionFullWithInteraction.calculateRSquared(); + + } catch (SingularMatrixException ex) { + betaInteraction = Double.NaN; + seInteraction = Double.NaN; + + // Get the regression parameters and R-square value and print it. + betaSNP = Double.NaN; + seSNP = Double.NaN; + + betaCovariate = Double.NaN; + seCovariate = Double.NaN; + + rsquared = Double.NaN; + } } @@ -438,11 +451,11 @@ public InteractionAnalysisResults call() throws Exception { } private Pair convertBetaToP(double beta, double se, StudentT tDistColt) { - - if(Double.isNaN(beta)){ - return NAN_PAIR; - } - + + if (Double.isNaN(beta)) { + return NAN_PAIR; + } + double t = beta / se; double p = 1; double z = 0; diff --git a/eqtl-mapping-pipeline/src/main/java/eqtlmappingpipeline/interactionanalysis/InteractionPlotter.java b/eqtl-mapping-pipeline/src/main/java/eqtlmappingpipeline/interactionanalysis/InteractionPlotter.java index 11fe1f87b..955941b52 100644 --- a/eqtl-mapping-pipeline/src/main/java/eqtlmappingpipeline/interactionanalysis/InteractionPlotter.java +++ b/eqtl-mapping-pipeline/src/main/java/eqtlmappingpipeline/interactionanalysis/InteractionPlotter.java @@ -171,7 +171,11 @@ public InteractionPlotter(String interactionFile, String genotypeDir, String exp for (int q = startCovariate; q < endCovariate; q++) { System.out.println("Plotting: " + snp + "\t" + covariateData.rowObjects.get(q) + "\t" + probe); - System.out.println("Individual\tAllele1\tAllele2\tGenotype\tGenotypeFlipped\tCovariate\tExpression"); + + + TextFile interactionOut = new TextFile(outdir + snp + "-" + probe + "-" + covariateData.rowObjects.get(q) + ".txt", TextFile.W); + interactionOut.writeln("Individual\tAllele1\tAllele2\tGenotype\tGenotypeFlipped\tCovariate\tExpression"); + byte[] alleles1 = snpObj.getAllele1(); byte[] alleles2 = snpObj.getAllele2(); byte[] genotypes = snpObj.getGenotypes(); @@ -198,7 +202,7 @@ public InteractionPlotter(String interactionFile, String genotypeDir, String exp + "\t" + genotypeflipped + "\t" + covariateData.rawData[q][genotypeToCovariate[i]] + "\t" + expressionData.rawData[probeId][genotypeToExpression[i]]; - System.out.println(output); + interactionOut.writeln(output); genotypeArr.add(genotypes[i]); @@ -208,7 +212,10 @@ public InteractionPlotter(String interactionFile, String genotypeDir, String exp } } + } + interactionOut.close(); + System.out.println(""); //Fill arrays with data in order to be able to perform the ordinary least squares analysis: double[] olsY = new double[nrCalled]; //Ordinary least squares: Our gene expression @@ -241,55 +248,54 @@ public InteractionPlotter(String interactionFile, String genotypeDir, String exp regressionFullWithInteraction.newSampleData(olsY, olsXFullWithInteraction); - try{ - double rss2 = regressionFullWithInteraction.calculateResidualSumOfSquares(); - double[] regressionParameters = regressionFullWithInteraction.estimateRegressionParameters(); - - double[] regressionStandardErrors = regressionFullWithInteraction.estimateRegressionParametersStandardErrors(); - - - double betaInteraction = regressionParameters[3]; - double seInteraction = regressionStandardErrors[3]; - double tInteraction = betaInteraction / seInteraction; - double pValueInteraction = 1; - double zScoreInteraction = 0; - - if (fDist == null) { - fDist = new org.apache.commons.math3.distribution.FDistribution((int) (3 - 2), (int) (olsY.length - 3)); - randomEngine = new cern.jet.random.tdouble.engine.DRand(); - tDistColt = new cern.jet.random.tdouble.StudentT(olsY.length - 4, randomEngine); - } - - if (tInteraction < 0) { - pValueInteraction = tDistColt.cdf(tInteraction); - if (pValueInteraction < 2.0E-323) { - pValueInteraction = 2.0E-323; - } - zScoreInteraction = cern.jet.stat.tdouble.Probability.normalInverse(pValueInteraction); - } else { - pValueInteraction = tDistColt.cdf(-tInteraction); - if (pValueInteraction < 2.0E-323) { - pValueInteraction = 2.0E-323; - } - - zScoreInteraction = -cern.jet.stat.tdouble.Probability.normalInverse(pValueInteraction); - } - pValueInteraction *= 2; - String pvalFormatted = ""; - if (pValueInteraction >= 0.001) { - pvalFormatted = decFormat.format(pValueInteraction); - } else { - pvalFormatted = decFormatSmall.format(pValueInteraction); - } - ScatterPlot scatterPlot = new ScatterPlot(500, 500, dataCov, dataExp, dataGen, genotypeDescriptions, colorarray, ScatterPlot.OUTPUTFORMAT.PDF, - "Interaction between SNP " + snp + ", probe " + probe + " and covariate " + covariateData.rowObjects.get(q), - "Z: " + decFormat.format(zScoreInteraction) + " Pvalue: " + pvalFormatted + " n: " + nrCalled, - outdir + snp + "-" + probe + "-" + covariateData.rowObjects.get(q) + ".pdf", false); - - } catch(SingularMatrixException ex ){ - ex.printStackTrace(); - System.out.println("\tMatrix is singular, skipping\n"); - } + try { + double rss2 = regressionFullWithInteraction.calculateResidualSumOfSquares(); + double[] regressionParameters = regressionFullWithInteraction.estimateRegressionParameters(); + + double[] regressionStandardErrors = regressionFullWithInteraction.estimateRegressionParametersStandardErrors(); + + double betaInteraction = regressionParameters[3]; + double seInteraction = regressionStandardErrors[3]; + double tInteraction = betaInteraction / seInteraction; + double pValueInteraction = 1; + double zScoreInteraction = 0; + + if (fDist == null) { + fDist = new org.apache.commons.math3.distribution.FDistribution((int) (3 - 2), (int) (olsY.length - 3)); + randomEngine = new cern.jet.random.tdouble.engine.DRand(); + tDistColt = new cern.jet.random.tdouble.StudentT(olsY.length - 4, randomEngine); + } + + if (tInteraction < 0) { + pValueInteraction = tDistColt.cdf(tInteraction); + if (pValueInteraction < 2.0E-323) { + pValueInteraction = 2.0E-323; + } + zScoreInteraction = cern.jet.stat.tdouble.Probability.normalInverse(pValueInteraction); + } else { + pValueInteraction = tDistColt.cdf(-tInteraction); + if (pValueInteraction < 2.0E-323) { + pValueInteraction = 2.0E-323; + } + + zScoreInteraction = -cern.jet.stat.tdouble.Probability.normalInverse(pValueInteraction); + } + pValueInteraction *= 2; + String pvalFormatted = ""; + if (pValueInteraction >= 0.001) { + pvalFormatted = decFormat.format(pValueInteraction); + } else { + pvalFormatted = decFormatSmall.format(pValueInteraction); + } + ScatterPlot scatterPlot = new ScatterPlot(500, 500, dataCov, dataExp, dataGen, genotypeDescriptions, colorarray, ScatterPlot.OUTPUTFORMAT.PDF, + "Interaction between SNP " + snp + ", probe " + probe + " and covariate " + covariateData.rowObjects.get(q), + "Z: " + decFormat.format(zScoreInteraction) + " Pvalue: " + pvalFormatted + " n: " + nrCalled, + outdir + snp + "-" + probe + "-" + covariateData.rowObjects.get(q) + ".pdf", false); + + } catch (SingularMatrixException ex) { + ex.printStackTrace(); + System.out.println("\tMatrix is singular, skipping\n"); + } } snpObj.clearGenotypes(); From d4945f3f69d2581dbe328262b63e8d9276bff041 Mon Sep 17 00:00:00 2001 From: harmjanwestra Date: Tue, 24 Mar 2015 01:02:40 -0400 Subject: [PATCH 014/143] Per reviewers request, output 2 PCs for --step normalize in --mode interaction --- .../interactionanalysis/InteractionAnalysisMultiThreaded.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/eqtl-mapping-pipeline/src/main/java/eqtlmappingpipeline/interactionanalysis/InteractionAnalysisMultiThreaded.java b/eqtl-mapping-pipeline/src/main/java/eqtlmappingpipeline/interactionanalysis/InteractionAnalysisMultiThreaded.java index f5a5dedfa..8b9ddd8a6 100644 --- a/eqtl-mapping-pipeline/src/main/java/eqtlmappingpipeline/interactionanalysis/InteractionAnalysisMultiThreaded.java +++ b/eqtl-mapping-pipeline/src/main/java/eqtlmappingpipeline/interactionanalysis/InteractionAnalysisMultiThreaded.java @@ -189,7 +189,7 @@ public void prepareDataForCelltypeSpecificEQTLMapping(String inexpraw, String ou // 4. PCA on sample correlation matrix rawExpressionDataset.transposeDataset(); // put samples back on columns // this method returns two DoubleMatrixDatasets: left are the PC scores, right are the Eigenvalues and expects the samples to be on the columns - Pair, DoubleMatrixDataset> PCAResults = n.calculatePCA(rawExpressionDataset, sampleCorrelationMatrix, expressionOutputDirectory + "PCAResults", 1); + Pair, DoubleMatrixDataset> PCAResults = n.calculatePCA(rawExpressionDataset, sampleCorrelationMatrix, expressionOutputDirectory + "PCAResults", 2); // 5. Correlate samples with PC1 - scores (QC step to determine poor RNA samples) // This dataset needs to be transposed if rows are currently PCs, and columns contain samples. From f2472f9a17222fbd1444b0e152a7623e0cf85667 Mon Sep 17 00:00:00 2001 From: Harm-Jan Westra Date: Tue, 24 Mar 2015 01:23:32 -0400 Subject: [PATCH 015/143] Update InteractionAnalysisMultiThreaded.java --- .../interactionanalysis/InteractionAnalysisMultiThreaded.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/eqtl-mapping-pipeline/src/main/java/eqtlmappingpipeline/interactionanalysis/InteractionAnalysisMultiThreaded.java b/eqtl-mapping-pipeline/src/main/java/eqtlmappingpipeline/interactionanalysis/InteractionAnalysisMultiThreaded.java index f5a5dedfa..8b9ddd8a6 100644 --- a/eqtl-mapping-pipeline/src/main/java/eqtlmappingpipeline/interactionanalysis/InteractionAnalysisMultiThreaded.java +++ b/eqtl-mapping-pipeline/src/main/java/eqtlmappingpipeline/interactionanalysis/InteractionAnalysisMultiThreaded.java @@ -189,7 +189,7 @@ public void prepareDataForCelltypeSpecificEQTLMapping(String inexpraw, String ou // 4. PCA on sample correlation matrix rawExpressionDataset.transposeDataset(); // put samples back on columns // this method returns two DoubleMatrixDatasets: left are the PC scores, right are the Eigenvalues and expects the samples to be on the columns - Pair, DoubleMatrixDataset> PCAResults = n.calculatePCA(rawExpressionDataset, sampleCorrelationMatrix, expressionOutputDirectory + "PCAResults", 1); + Pair, DoubleMatrixDataset> PCAResults = n.calculatePCA(rawExpressionDataset, sampleCorrelationMatrix, expressionOutputDirectory + "PCAResults", 2); // 5. Correlate samples with PC1 - scores (QC step to determine poor RNA samples) // This dataset needs to be transposed if rows are currently PCs, and columns contain samples. From a4554c68797b51ad39d3a9edaef45f0134e7db88 Mon Sep 17 00:00:00 2001 From: Patrick Deelen Date: Tue, 24 Mar 2015 21:49:38 +0100 Subject: [PATCH 016/143] Fix trityper writer --- Genotype-Harmonizer/pom.xml | 2 +- .../genotype/trityper/TriTyperGenotypeWriter.java | 13 +++++++++++-- 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/Genotype-Harmonizer/pom.xml b/Genotype-Harmonizer/pom.xml index d5142e475..a2cc8496a 100644 --- a/Genotype-Harmonizer/pom.xml +++ b/Genotype-Harmonizer/pom.xml @@ -14,7 +14,7 @@ nl.systemsgenetics Genotype-IO - 1.0.1 + 1.0.2-SNAPSHOT commons-cli diff --git a/Genotype-IO/src/main/java/org/molgenis/genotype/trityper/TriTyperGenotypeWriter.java b/Genotype-IO/src/main/java/org/molgenis/genotype/trityper/TriTyperGenotypeWriter.java index a8fa0905c..46323161f 100644 --- a/Genotype-IO/src/main/java/org/molgenis/genotype/trityper/TriTyperGenotypeWriter.java +++ b/Genotype-IO/src/main/java/org/molgenis/genotype/trityper/TriTyperGenotypeWriter.java @@ -15,6 +15,7 @@ import org.molgenis.genotype.Sample; import org.molgenis.genotype.variant.GeneticVariant; import org.molgenis.genotype.variant.NotASnpException; +import org.molgenis.genotype.variant.id.GeneticVariantId; /** * @@ -70,14 +71,22 @@ private void writeSnps(File snpFile, File snpMapFile) throws IOException { // continue; // } - snpFileWriter.append(variant.getPrimaryVariantId()); + final GeneticVariantId snpId = variant.getVariantId(); + final String snpName; + if(snpId.containsId()){ + snpName = snpId.getPrimairyId(); + } else { + snpName = variant.getSequenceName() + ':' + String.valueOf(variant.getStartPos()); + } + + snpFileWriter.append(snpName); snpFileWriter.append('\n'); snpMapFileWriter.append(variant.getSequenceName()); snpMapFileWriter.append('\t'); snpMapFileWriter.append(String.valueOf(variant.getStartPos())); snpMapFileWriter.append('\t'); - snpMapFileWriter.append(variant.getPrimaryVariantId()); + snpMapFileWriter.append(snpName); snpMapFileWriter.append('\n'); } From 97bcff05a39acf9ff6b2b3c62a28fdc3154e1d3f Mon Sep 17 00:00:00 2001 From: Bonder-MJ Date: Tue, 31 Mar 2015 10:01:02 +0200 Subject: [PATCH 017/143] speed up quantile normalize --- .../genetica/math/stats/QuantileNormalization.java | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/genetica-libraries/src/main/java/umcg/genetica/math/stats/QuantileNormalization.java b/genetica-libraries/src/main/java/umcg/genetica/math/stats/QuantileNormalization.java index da40e6244..396513bad 100644 --- a/genetica-libraries/src/main/java/umcg/genetica/math/stats/QuantileNormalization.java +++ b/genetica-libraries/src/main/java/umcg/genetica/math/stats/QuantileNormalization.java @@ -46,6 +46,12 @@ public static void quantilenormalize(double[][] rawData) { for (int probeID = 0; probeID < probeCount; probeID++) { rankedMean[probeID] /= (double) sampleCount; } + + double[] rankedMeanClasses = new double[probeCount-1]; + + for (int probeID = 0; probeID < (probeCount-1); probeID++) { + rankedMeanClasses[probeID] = ((rankedMean[probeID]+rankedMean[probeID+1])/2); + } RankArray rda = new RankArray(); //Iterate through each sample: @@ -60,7 +66,7 @@ public static void quantilenormalize(double[][] rawData) { for (int p = 0; p < probeCount; p++) { if((probesRanked[p]%1)!=0){ - probesQuantileNormalized[p] = ((rankedMean[(int)Math.floor(probesRanked[p])]+rankedMean[(int)Math.ceil(probesRanked[p])])/2); + probesQuantileNormalized[p] = rankedMeanClasses[(int)Math.floor(probesRanked[p])]; rawData[p][s] = probesQuantileNormalized[p]; } else { probesQuantileNormalized[p] = rankedMean[(int) probesRanked[p]]; @@ -69,7 +75,7 @@ public static void quantilenormalize(double[][] rawData) { } // double[] probesRankedAfterQQNorm = rda.rank(probesQuantileNormalized, false); - System.out.println("Normalized sample:\t" + (s+1) + "\tCorrelation original data and ranked data:\t" + JSci.maths.ArrayMath.correlation(probes, probesRanked) + "\tCorrelation original data and quantile normalized data:\t" + JSci.maths.ArrayMath.correlation(probes, probesQuantileNormalized) + "\tSpearman: "+spearman.correlation(probes, probesQuantileNormalized)); + System.out.println("Normalized sample:\t" + (s+1) + "\tPearson correlation original data and ranked data:\t" + JSci.maths.ArrayMath.correlation(probes, probesRanked) + "\ttSpearman correlation original data and quantile normalized data:\t"+spearman.correlation(probes, probesQuantileNormalized)); } } From b3ab543e44731071ef85ebdae375034dc4ba363d Mon Sep 17 00:00:00 2001 From: Patrick Deelen Date: Thu, 2 Apr 2015 10:24:10 +0200 Subject: [PATCH 018/143] Interaction --- .../CovariateImportance.java | 52 ++- .../QueryBinaryInteraction.java | 384 +++++++++++------- 2 files changed, 278 insertions(+), 158 deletions(-) diff --git a/eqtl-mapping-pipeline/src/main/java/eqtlmappingpipeline/binaryInteraction/CovariateImportance.java b/eqtl-mapping-pipeline/src/main/java/eqtlmappingpipeline/binaryInteraction/CovariateImportance.java index 8e1d38cf2..32576274d 100644 --- a/eqtl-mapping-pipeline/src/main/java/eqtlmappingpipeline/binaryInteraction/CovariateImportance.java +++ b/eqtl-mapping-pipeline/src/main/java/eqtlmappingpipeline/binaryInteraction/CovariateImportance.java @@ -2,11 +2,15 @@ import au.com.bytecode.opencsv.CSVWriter; import gnu.trove.map.hash.TObjectDoubleHashMap; +import java.io.BufferedReader; import java.io.BufferedWriter; import java.io.File; +import java.io.FileInputStream; import java.io.FileNotFoundException; import java.io.FileWriter; import java.io.IOException; +import java.io.InputStreamReader; +import java.util.HashSet; import java.util.Iterator; import org.apache.commons.cli.CommandLine; import org.apache.commons.cli.HelpFormatter; @@ -46,6 +50,12 @@ public class CovariateImportance { OptionBuilder.isRequired(); OPTIONS.addOption(OptionBuilder.create("o")); + OptionBuilder.withArgName("path"); + OptionBuilder.hasArg(); + OptionBuilder.withDescription("File with covariates to include in analysis"); + OptionBuilder.withLongOpt("covariats"); + OPTIONS.addOption(OptionBuilder.create("c")); + } /** @@ -55,6 +65,7 @@ public static void main(String[] args) throws FileNotFoundException, IOException final File inputInteractionFile; final File outputFile; + final File covariatesToIncludeFile; try { final CommandLine commandLine = new PosixParser().parse(OPTIONS, args, false); @@ -62,6 +73,12 @@ public static void main(String[] args) throws FileNotFoundException, IOException inputInteractionFile = new File(commandLine.getOptionValue("i")); outputFile = new File(commandLine.getOptionValue("o")); + if (commandLine.hasOption("c")) { + covariatesToIncludeFile = new File(commandLine.getOptionValue("c")); + } else { + covariatesToIncludeFile = null; + } + } catch (ParseException ex) { System.err.println("Invalid command line arguments: "); System.err.println(ex.getMessage()); @@ -73,6 +90,23 @@ public static void main(String[] args) throws FileNotFoundException, IOException System.out.println("Input file: " + inputInteractionFile.getAbsolutePath()); System.out.println("Output file: " + outputFile); + if (covariatesToIncludeFile != null) { + System.out.println("Covariates to include: " + covariatesToIncludeFile.getAbsolutePath()); + } + + final HashSet covariantsToInclude; + if (covariatesToIncludeFile != null) { + covariantsToInclude = new HashSet(); + BufferedReader reader = new BufferedReader(new InputStreamReader(new FileInputStream(covariatesToIncludeFile), "UTF-8")); + String line; + while ((line = reader.readLine()) != null) { + covariantsToInclude.add(line.trim()); + } + System.out.println("Covariates included: " + covariantsToInclude.size()); + System.out.println(); + } else { + covariantsToInclude = null; + } BinaryInteractionFile inputFile = BinaryInteractionFile.load(inputInteractionFile, true); @@ -82,7 +116,7 @@ public static void main(String[] args) throws FileNotFoundException, IOException for (BinaryInteractionVariant variant : inputFile.getVariants()) { String variantName = variant.getName(); - int[] genePointers = inputFile.getVariant(variantName).getGenePointers(); + int[] genePointers = variant.getGenePointers(); for (int genePointer : genePointers) { @@ -92,6 +126,11 @@ public static void main(String[] args) throws FileNotFoundException, IOException for (Iterator iterator = inputFile.readVariantGeneResults(variantName, gene.getName()); iterator.hasNext();) { BinaryInteractionQueryResult interation = iterator.next(); + + if (covariantsToInclude != null && !covariantsToInclude.contains(interation.getCovariateName())) { + continue covariates; + } + double metaZ = interation.getInteractionZscores().getZscoreInteractionMeta(); if (Double.isNaN(metaZ)) { continue covariates; @@ -100,13 +139,16 @@ public static void main(String[] args) throws FileNotFoundException, IOException sumChi2.adjustOrPutValue(interation.getCovariateName(), chi2, chi2); } - } - ++reporter; - if (reporter % 500 == 0) { - System.out.println("Parsed " + reporter + " of " + inputFile.getVariantGeneCombinations() + " variant-gene combinations"); + ++reporter; + if (reporter % 500 == 0) { + System.out.println("Parsed " + reporter + " of " + inputFile.getVariantGeneCombinations() + " variant-gene combinations"); + } + } + + } CSVWriter outputWriter = new CSVWriter(new BufferedWriter(new FileWriter(outputFile)), '\t', '\0', '\0'); diff --git a/eqtl-mapping-pipeline/src/main/java/eqtlmappingpipeline/binaryInteraction/QueryBinaryInteraction.java b/eqtl-mapping-pipeline/src/main/java/eqtlmappingpipeline/binaryInteraction/QueryBinaryInteraction.java index 0dd18a48d..ebe9246e9 100644 --- a/eqtl-mapping-pipeline/src/main/java/eqtlmappingpipeline/binaryInteraction/QueryBinaryInteraction.java +++ b/eqtl-mapping-pipeline/src/main/java/eqtlmappingpipeline/binaryInteraction/QueryBinaryInteraction.java @@ -5,6 +5,7 @@ import eqtlmappingpipeline.Main; import java.io.BufferedWriter; import java.io.File; +import java.io.FileNotFoundException; import java.io.FileReader; import java.io.FileWriter; import java.io.IOException; @@ -14,15 +15,16 @@ import java.text.DateFormat; import java.text.SimpleDateFormat; import java.util.Date; +import java.util.HashSet; import java.util.Iterator; import java.util.LinkedHashSet; import org.apache.commons.cli.CommandLine; import org.apache.commons.cli.HelpFormatter; -import org.apache.commons.cli.Option; import org.apache.commons.cli.OptionBuilder; import org.apache.commons.cli.Options; import org.apache.commons.cli.ParseException; import org.apache.commons.cli.PosixParser; +import umcg.genetica.containers.Pair; import umcg.genetica.io.binInteraction.BinaryInteractionCohort; import umcg.genetica.io.binInteraction.BinaryInteractionFile; import umcg.genetica.io.binInteraction.BinaryInteractionFileException; @@ -47,8 +49,6 @@ public class QueryBinaryInteraction { OPTIONS = new Options(); - Option option; - OptionBuilder.withArgName("path"); OptionBuilder.hasArg(); OptionBuilder.withDescription("Binary interaction file"); @@ -92,6 +92,10 @@ public class QueryBinaryInteraction { OptionBuilder.withLongOpt("interactionZ"); OPTIONS.addOption(OptionBuilder.create("iz")); + OptionBuilder.withDescription("Only output meta z-scores"); + OptionBuilder.withLongOpt("onlyMetaZ"); + OPTIONS.addOption(OptionBuilder.create("oz")); + } public static void main(String[] args) throws UnsupportedEncodingException, IOException, Exception { @@ -103,6 +107,7 @@ public static void main(String[] args) throws UnsupportedEncodingException, IOEx final String queryVariantName; final double queryMinAbsInteractionZ; final File queryFile; + final boolean onlyOutputMetaZ; try { final CommandLine commandLine = new PosixParser().parse(OPTIONS, args, false); @@ -140,6 +145,8 @@ public static void main(String[] args) throws UnsupportedEncodingException, IOEx queryMinAbsInteractionZ = -1; } + onlyOutputMetaZ = commandLine.hasOption("oz"); + } catch (ParseException ex) { System.err.println("Invalid command line arguments: "); System.err.println(ex.getMessage()); @@ -152,7 +159,7 @@ public static void main(String[] args) throws UnsupportedEncodingException, IOEx BinaryInteractionFile inputFile = BinaryInteractionFile.load(inputInteractionFile, true); final Writer outputWriter; - if (outputFile != null) { + if (outputFile != null && !onlyOutputMetaZ) { outputWriter = new BufferedWriter(new FileWriter(outputFile)); } else { outputWriter = new OutputStreamWriter(System.out); @@ -190,6 +197,10 @@ public static void main(String[] args) throws UnsupportedEncodingException, IOEx outputWriter.write("# - Query file: " + queryFile.getAbsolutePath()); outputWriter.write('\n'); } + if (onlyOutputMetaZ) { + outputWriter.write("# - Only outputing meta z-scores"); + outputWriter.write('\n'); + } outputWriter.write("#\n"); outputWriter.write("# Interaction file meta data: "); @@ -212,137 +223,107 @@ public static void main(String[] args) throws UnsupportedEncodingException, IOEx outputWriter.write('\n'); outputWriter.write("#\n"); + outputWriter.flush(); + final LinkedHashSet interactionQueries; + final boolean interactionQueriesOnlyCovariates; if (queryFile != null) { - interactionQueries = new LinkedHashSet(); - CSVReader queryReader = new CSVReader(new FileReader(queryFile), '\t', '\0'); - - String[] nextLine = queryReader.readNext(); - - int variantCol = -1; - int geneCol = -1; - int covariateCol = -1; - - //Parse header - for (int i = 0; i < nextLine.length; ++i) { - String headerEntry = nextLine[i].toLowerCase(); - switch (headerEntry) { - case "variant": - if (variantCol != -1) { - System.err.println("Variant column found twice"); - System.exit(1); - return; - } - variantCol = i; - break; - case "gene": - if (geneCol != -1) { - System.err.println("Gene column found twice"); - System.exit(1); - return; - } - geneCol = i; - break; - case "covariate": - if (covariateCol != -1) { - System.err.println("Covariate column found twice"); - System.exit(1); - return; - } - covariateCol = i; - break; - - } - - } - - if (variantCol == -1 && geneCol == -1 && covariateCol == -1) { - System.err.println("Did not detect appropiate header in query file"); - System.exit(1); - return; - } + Pair, Boolean> loadRes = loadInteractionQueries(queryFile); + interactionQueries = loadRes.getLeft(); + interactionQueriesOnlyCovariates = loadRes.getRight(); + } else { + interactionQueries = null; + interactionQueriesOnlyCovariates = false; + } - while ((nextLine = queryReader.readNext()) != null) { - String variant = null; - String gene = null; - String covariate = null; - - if(variantCol != -1){ - variant = nextLine[variantCol]; - } - if(geneCol != -1){ - gene = nextLine[geneCol]; - } - if(covariateCol != -1){ - covariate = nextLine[covariateCol]; - } - interactionQueries.add(new InteractoinQuery(variant, gene, covariate)); + CSVWriter tableWriter; + if (onlyOutputMetaZ) { + if (outputFile == null) { + throw new Exception("Use of option --onlyMetaZ only possible in combination with output file"); } + tableWriter = new CSVWriter(new BufferedWriter(new FileWriter(outputFile)), '\t', '\0', '\0'); } else { - interactionQueries = null; + tableWriter = new CSVWriter(outputWriter, '\t', '\0', '\0'); } - CSVWriter tableWriter = new CSVWriter(outputWriter, '\t', '\0', '\0'); + if (onlyOutputMetaZ && !inputFile.isMetaAnalysis()) { + throw new Exception("No meta analysis information detected cannot use option: --onlyMetaZ"); + } - int columnCount = - 7 - + ((5 + (inputFile.isNormalQtlStored() ? 2 : 0) + (inputFile.isFlippedZscoreStored() ? 1 : 0)) * inputFile.getCohortCount()) - + (inputFile.isMetaAnalysis() ? (3 + (inputFile.isNormalQtlStored() ? 1 : 0) + (inputFile.isFlippedZscoreStored() ? 1 : 0)) : 0); + final String[] row; + if (onlyOutputMetaZ) { + row = new String[1]; + } else { + final int columnCount = 7 + + ((5 + (inputFile.isNormalQtlStored() ? 2 : 0) + (inputFile.isFlippedZscoreStored() ? 1 : 0)) * inputFile.getCohortCount()) + + (inputFile.isMetaAnalysis() ? (3 + (inputFile.isNormalQtlStored() ? 1 : 0) + (inputFile.isFlippedZscoreStored() ? 1 : 0)) : 0); - String[] row = new String[columnCount]; - int c = 0; - row[c++] = "Variant"; - row[c++] = "Gene"; - row[c++] = "Covariate"; - row[c++] = "Variant_chr"; - row[c++] = "Variant_pos"; - row[c++] = "Variant alleles"; - row[c++] = "Assessed_allele"; - for (BinaryInteractionCohort cohort : inputFile.getCohorts()) { + row = new String[columnCount]; + int c = 0; - String cohortName = cohort.getName(); + row[c++] = "Variant"; + row[c++] = "Gene"; + row[c++] = "Covariate"; + row[c++] = "Variant_chr"; + row[c++] = "Variant_pos"; + row[c++] = "Variant alleles"; + row[c++] = "Assessed_allele"; - if (inputFile.isNormalQtlStored()) { - row[c++] = cohortName + "_QTL_sample_count"; - row[c++] = cohortName + "_QTL_Z-score"; - } + for (BinaryInteractionCohort cohort : inputFile.getCohorts()) { - row[c++] = cohortName + "_interaction_sample_count"; - row[c++] = cohortName + "_interaction_r2"; - row[c++] = cohortName + "_variant_Z-score"; - row[c++] = cohortName + "_covariate_Z-score"; - row[c++] = cohortName + "_interaction_Z-score"; + String cohortName = cohort.getName(); - if (inputFile.isFlippedZscoreStored()) { - row[c++] = cohortName + "_flipped_interaction_Z-score"; - } + if (inputFile.isNormalQtlStored()) { + row[c++] = cohortName + "_QTL_sample_count"; + row[c++] = cohortName + "_QTL_Z-score"; + } - } + row[c++] = cohortName + "_interaction_sample_count"; + row[c++] = cohortName + "_interaction_r2"; + row[c++] = cohortName + "_variant_Z-score"; + row[c++] = cohortName + "_covariate_Z-score"; + row[c++] = cohortName + "_interaction_Z-score"; + + if (inputFile.isFlippedZscoreStored()) { + row[c++] = cohortName + "_flipped_interaction_Z-score"; + } - if (inputFile.isMetaAnalysis()) { - if (inputFile.isNormalQtlStored()) { - row[c++] = "Meta_QTL_Z-score"; - } - row[c++] = "Meta_variant_Z-score"; - row[c++] = "Meta_covariate_Z-score"; - row[c++] = "Meta_interaction_Z-score"; - if (inputFile.isFlippedZscoreStored()) { - row[c++] = "Meta_flipped_interaction_Z-score"; } - } - tableWriter.writeNext(row); + if (inputFile.isMetaAnalysis()) { + if (inputFile.isNormalQtlStored()) { + row[c++] = "Meta_QTL_Z-score"; + } + row[c++] = "Meta_variant_Z-score"; + row[c++] = "Meta_covariate_Z-score"; + row[c++] = "Meta_interaction_Z-score"; + if (inputFile.isFlippedZscoreStored()) { + row[c++] = "Meta_flipped_interaction_Z-score"; + } + } + tableWriter.writeNext(row); + } if (interactionQueries != null) { - for(InteractoinQuery interactionQuery : interactionQueries){ - doQuery(interactionQuery.getGene(), interactionQuery.getVariant(), interactionQuery.getCovariate(), inputFile, tableWriter, row); + + if (interactionQueriesOnlyCovariates) { + HashSet covariateNames = new HashSet<>(interactionQueries.size()); + for (InteractoinQuery interactionQuery : interactionQueries) { + covariateNames.add(interactionQuery.getCovariate()); + } + doQueryCovariates(covariateNames, inputFile, tableWriter, row, onlyOutputMetaZ); + } else { + for (InteractoinQuery interactionQuery : interactionQueries) { + doQuery(interactionQuery.getGene(), interactionQuery.getVariant(), interactionQuery.getCovariate(), inputFile, tableWriter, row, onlyOutputMetaZ); + } } + } else { - doQuery(queryGeneName, queryVariantName, queryCovariateName, inputFile, tableWriter, row); + doQuery(queryGeneName, queryVariantName, queryCovariateName, inputFile, tableWriter, row, onlyOutputMetaZ); } @@ -353,65 +334,73 @@ public static void main(String[] args) throws UnsupportedEncodingException, IOEx } @SuppressWarnings({"null", "ConstantConditions"}) - private static void addRow(BinaryInteractionQueryResult queryRestult, BinaryInteractionFile inputFile, CSVWriter tableWriter, String[] row) throws BinaryInteractionFileException, IOException { - int c = 0; + private static void addRow(BinaryInteractionQueryResult queryRestult, BinaryInteractionFile inputFile, CSVWriter tableWriter, String[] row, boolean onlyOutputMetaZ) throws BinaryInteractionFileException, IOException { - row[c++] = queryRestult.getVariantName(); - row[c++] = queryRestult.getGeneName(); - row[c++] = queryRestult.getCovariateName(); + if (onlyOutputMetaZ) { + BinaryInteractionZscores zscroresInteraction = queryRestult.getInteractionZscores(); + row[0] = String.valueOf(zscroresInteraction.getZscoreInteractionMeta()); + } else { + int c = 0; - BinaryInteractionVariant variant = inputFile.getVariant(queryRestult.getVariantName()); - row[c++] = variant.getChr(); - row[c++] = String.valueOf(variant.getPos()); - row[c++] = variant.getRefAllele().getAlleleAsString() + '/' + variant.getAltAllele().getAlleleAsString(); - row[c++] = variant.getAltAllele().toString(); + row[c++] = queryRestult.getVariantName(); + row[c++] = queryRestult.getGeneName(); + row[c++] = queryRestult.getCovariateName(); - BinaryInteractionQtlZscores zscroresQtl = queryRestult.getQtlZscores(); - BinaryInteractionZscores zscroresInteraction = queryRestult.getInteractionZscores(); + BinaryInteractionVariant variant = inputFile.getVariant(queryRestult.getVariantName()); + row[c++] = variant.getChr(); + row[c++] = String.valueOf(variant.getPos()); + row[c++] = variant.getRefAllele().getAlleleAsString() + '/' + variant.getAltAllele().getAlleleAsString(); + row[c++] = variant.getAltAllele().toString(); - for (int cohortIndex = 0; cohortIndex < inputFile.getCohortCount(); ++cohortIndex) { + BinaryInteractionQtlZscores zscroresQtl = queryRestult.getQtlZscores(); + BinaryInteractionZscores zscroresInteraction = queryRestult.getInteractionZscores(); - if (inputFile.isNormalQtlStored()) { - row[c++] = String.valueOf(zscroresQtl.getSampleCounts()[cohortIndex]); - row[c++] = String.valueOf(zscroresQtl.getZscores()[cohortIndex]); - } + for (int cohortIndex = 0; cohortIndex < inputFile.getCohortCount(); ++cohortIndex) { - row[c++] = String.valueOf(zscroresInteraction.getSamplesInteractionCohort()[cohortIndex]); - row[c++] = String.valueOf(zscroresInteraction.getrSquaredCohort()[cohortIndex]); - row[c++] = String.valueOf(zscroresInteraction.getZscoreSnpCohort()[cohortIndex]); - row[c++] = String.valueOf(zscroresInteraction.getZscoreCovariateCohort()[cohortIndex]); - row[c++] = String.valueOf(zscroresInteraction.getZscoreInteractionCohort()[cohortIndex]); + if (inputFile.isNormalQtlStored()) { + row[c++] = String.valueOf(zscroresQtl.getSampleCounts()[cohortIndex]); + row[c++] = String.valueOf(zscroresQtl.getZscores()[cohortIndex]); + } - if (inputFile.isFlippedZscoreStored()) { - row[c++] = String.valueOf(zscroresInteraction.getZscoreInteractionFlippedCohort()[cohortIndex]); - } + row[c++] = String.valueOf(zscroresInteraction.getSamplesInteractionCohort()[cohortIndex]); + row[c++] = String.valueOf(zscroresInteraction.getrSquaredCohort()[cohortIndex]); + row[c++] = String.valueOf(zscroresInteraction.getZscoreSnpCohort()[cohortIndex]); + row[c++] = String.valueOf(zscroresInteraction.getZscoreCovariateCohort()[cohortIndex]); + row[c++] = String.valueOf(zscroresInteraction.getZscoreInteractionCohort()[cohortIndex]); - } + if (inputFile.isFlippedZscoreStored()) { + row[c++] = String.valueOf(zscroresInteraction.getZscoreInteractionFlippedCohort()[cohortIndex]); + } - if (inputFile.isMetaAnalysis()) { - if (inputFile.isNormalQtlStored()) { - row[c++] = String.valueOf(zscroresQtl.getMetaZscore()); } - row[c++] = String.valueOf(zscroresInteraction.getZscoreSnpMeta()); - row[c++] = String.valueOf(zscroresInteraction.getZscoreCovariateMeta()); - row[c++] = String.valueOf(zscroresInteraction.getZscoreInteractionMeta()); - if (inputFile.isFlippedZscoreStored()) { - row[c++] = String.valueOf(zscroresInteraction.getZscoreInteractionFlippedMeta()); + + if (inputFile.isMetaAnalysis()) { + if (inputFile.isNormalQtlStored()) { + row[c++] = String.valueOf(zscroresQtl.getMetaZscore()); + } + row[c++] = String.valueOf(zscroresInteraction.getZscoreSnpMeta()); + row[c++] = String.valueOf(zscroresInteraction.getZscoreCovariateMeta()); + row[c++] = String.valueOf(zscroresInteraction.getZscoreInteractionMeta()); + if (inputFile.isFlippedZscoreStored()) { + row[c++] = String.valueOf(zscroresInteraction.getZscoreInteractionFlippedMeta()); + } } } + tableWriter.writeNext(row); } - private static void doQuery(final String queryGeneName, final String queryVariantName, final String queryCovariateName, BinaryInteractionFile inputFile, CSVWriter tableWriter, String[] row) throws IOException, BinaryInteractionFileException { + private static void doQuery(final String queryGeneName, final String queryVariantName, final String queryCovariateName, BinaryInteractionFile inputFile, CSVWriter tableWriter, String[] row, boolean onlyOutputMetaZ) throws IOException, BinaryInteractionFileException { + if (queryGeneName != null && queryVariantName != null && queryCovariateName != null) { - addRow(inputFile.readVariantGeneCovariateResults(queryVariantName, queryGeneName, queryCovariateName), inputFile, tableWriter, row); + addRow(inputFile.readVariantGeneCovariateResults(queryVariantName, queryGeneName, queryCovariateName), inputFile, tableWriter, row, onlyOutputMetaZ); } else if (queryGeneName != null && queryVariantName != null) { for (Iterator iterator = inputFile.readVariantGeneResults(queryVariantName, queryGeneName); iterator.hasNext();) { - addRow(iterator.next(), inputFile, tableWriter, row); + addRow(iterator.next(), inputFile, tableWriter, row, onlyOutputMetaZ); } } else if (queryVariantName != null) { @@ -423,12 +412,12 @@ private static void doQuery(final String queryGeneName, final String queryVarian if (queryCovariateName != null) { if (inputFile.containsInteraction(queryVariantName, gene.getName(), queryCovariateName)) { - addRow(inputFile.readVariantGeneCovariateResults(queryVariantName, gene.getName(), queryCovariateName), inputFile, tableWriter, row); + addRow(inputFile.readVariantGeneCovariateResults(queryVariantName, gene.getName(), queryCovariateName), inputFile, tableWriter, row, onlyOutputMetaZ); } } else { for (Iterator iterator = inputFile.readVariantGeneResults(queryVariantName, gene.getName()); iterator.hasNext();) { - addRow(iterator.next(), inputFile, tableWriter, row); + addRow(iterator.next(), inputFile, tableWriter, row, onlyOutputMetaZ); } } @@ -443,12 +432,12 @@ private static void doQuery(final String queryGeneName, final String queryVarian if (queryCovariateName != null) { if (inputFile.containsInteraction(variant.getName(), queryGeneName, queryCovariateName)) { - addRow(inputFile.readVariantGeneCovariateResults(variant.getName(), queryGeneName, queryCovariateName), inputFile, tableWriter, row); + addRow(inputFile.readVariantGeneCovariateResults(variant.getName(), queryGeneName, queryCovariateName), inputFile, tableWriter, row, onlyOutputMetaZ); } } else { for (Iterator iterator = inputFile.readVariantGeneResults(variant.getName(), queryGeneName); iterator.hasNext();) { - addRow(iterator.next(), inputFile, tableWriter, row); + addRow(iterator.next(), inputFile, tableWriter, row, onlyOutputMetaZ); } } @@ -468,12 +457,12 @@ private static void doQuery(final String queryGeneName, final String queryVarian if (queryCovariateName != null) { if (inputFile.containsInteraction(variantName, gene.getName(), queryCovariateName)) { - addRow(inputFile.readVariantGeneCovariateResults(variantName, gene.getName(), queryCovariateName), inputFile, tableWriter, row); + addRow(inputFile.readVariantGeneCovariateResults(variantName, gene.getName(), queryCovariateName), inputFile, tableWriter, row, onlyOutputMetaZ); } } else { for (Iterator iterator = inputFile.readVariantGeneResults(variantName, gene.getName()); iterator.hasNext();) { - addRow(iterator.next(), inputFile, tableWriter, row); + addRow(iterator.next(), inputFile, tableWriter, row, onlyOutputMetaZ); } } @@ -486,6 +475,95 @@ private static void doQuery(final String queryGeneName, final String queryVarian } } + private static void doQueryCovariates(final HashSet queryCovariateNames, BinaryInteractionFile inputFile, CSVWriter tableWriter, String[] row, boolean onlyOutputMetaZ) throws IOException, BinaryInteractionFileException { + + for (BinaryInteractionVariant variant : inputFile.getVariants()) { + + String variantName = variant.getName(); + + int[] genePointers = inputFile.getVariant(variantName).getGenePointers(); + for (int genePointer : genePointers) { + + BinaryInteractionGene gene = inputFile.getGene(genePointer); + + for (Iterator iterator = inputFile.readVariantGeneResults(variantName, gene.getName()); iterator.hasNext();) { + BinaryInteractionQueryResult next = iterator.next(); + if(queryCovariateNames.contains(next.getCovariateName())){ + addRow(next, inputFile, tableWriter, row, onlyOutputMetaZ); + } + + } + + } + + } + + } + + private static Pair, Boolean> loadInteractionQueries(File queryFile) throws FileNotFoundException, IOException, Exception { + + LinkedHashSet interactionQueries = new LinkedHashSet(); + final CSVReader queryReader = new CSVReader(new FileReader(queryFile), '\t', '\0'); + + String[] nextLine = queryReader.readNext(); + + int variantCol = -1; + int geneCol = -1; + int covariateCol = -1; + + //Parse header + for (int i = 0; i < nextLine.length; ++i) { + String headerEntry = nextLine[i].toLowerCase(); + switch (headerEntry) { + case "variant": + if (variantCol != -1) { + throw new Exception("Variant column found twice"); + } + variantCol = i; + break; + case "gene": + if (geneCol != -1) { + throw new Exception("Gene column found twice"); + } + geneCol = i; + break; + case "covariate": + if (covariateCol != -1) { + throw new Exception("Covariate column found twice"); + } + covariateCol = i; + break; + + } + + } + + if (variantCol == -1 && geneCol == -1 && covariateCol == -1) { + throw new Exception("Did not detect appropiate header in query file"); + + } + + while ((nextLine = queryReader.readNext()) != null) { + String variant = null; + String gene = null; + String covariate = null; + + if (variantCol != -1) { + variant = nextLine[variantCol]; + } + if (geneCol != -1) { + gene = nextLine[geneCol]; + } + if (covariateCol != -1) { + covariate = nextLine[covariateCol]; + } + interactionQueries.add(new InteractoinQuery(variant, gene, covariate)); + } + queryReader.close(); + + return new Pair(interactionQueries, variantCol == -1 && geneCol == -1); + } + private static class InteractoinQuery { private final String variant; From dabf53652a068a2462cf9ee21343f123792d0806 Mon Sep 17 00:00:00 2001 From: Marc Jan Bonder Date: Fri, 10 Apr 2015 12:19:00 +0200 Subject: [PATCH 019/143] Small fixes --- .../eqtlmappingpipeline/util/QTLAnnotator.java | 15 +++++++++++---- .../java/umcg/genetica/math/stats/ZScores.java | 14 ++++++++++++++ .../methylation/ConvertBetaAndMvalues.java | 9 +++++++++ 3 files changed, 34 insertions(+), 4 deletions(-) diff --git a/eqtl-mapping-pipeline/src/main/java/eqtlmappingpipeline/util/QTLAnnotator.java b/eqtl-mapping-pipeline/src/main/java/eqtlmappingpipeline/util/QTLAnnotator.java index c6c9d9b6c..a2468ec65 100644 --- a/eqtl-mapping-pipeline/src/main/java/eqtlmappingpipeline/util/QTLAnnotator.java +++ b/eqtl-mapping-pipeline/src/main/java/eqtlmappingpipeline/util/QTLAnnotator.java @@ -49,11 +49,18 @@ public static void main(String[] args) throws IOException { // "1;8-9-10-11-12-13-14;1;4-5;0;17-18-20-49-50-51-52-53-54-55-56-57-58-59-60-61-62-63-64-65-66-67-68-70-71-81-133-134-141-142-176-177-178-179-180-181-182-183-184-185-186-187-188-189-190-191-148-149-150-151-152-153-154-155-156-157-158-159-160-161-162-163-164-165-166-167-168-169-170-171-172-173-174-175-192-193-194-195-196-197-198;0;1-2-3-4", "snp;probe;snp;snp", null, // "D:\\OnlineFolders\\AeroFS\\RP3_BIOS_Methylation\\eQTMs\\Optimal_PC_and_QTL_Corrected\\RP3_0.25MB_TSS_extendedCis_eQTMs_2015\\eQTLSNPsFDR0.05-SNPLevel.txt-ExtendedInfo5.txt"); +// addAnnotationToQTLOutput( +// "D:\\OnlineFolders\\AeroFS\\RP3_BIOS_Methylation\\eQTMs\\QTLCorrected\\RP3_0.25MB_TSS_extendedCis_eQTMs_2015\\eQTLSNPsFDR0.05-SNPLevel.txt", +// "D:\\UMCG\\Methylation_GPL13534\\annotationFile\\Illumina450K_MQtlMappingFile_Extensive.txt;D:\\UMCG\\Data\\RP3_RNA_Seq\\annotation_geneIds+overlapping_v71_cut.txt;D:\\OnlineFolders\\AeroFS\\RP3_BIOS_Methylation\\Annotations\\Annotation450k_AdditionMJ_v5.txt.gz;D:\\OnlineFolders\\AeroFS\\RP3_BIOS_Methylation\\Annotations\\CODAM_NTR_LLS_LLD_RS_BBMRI_450K_var_mean_median.txt;D:\\OnlineFolders\\AeroFS\\RP3_BIOS_Methylation\\Annotations\\GC_ContentSuroundingProbes_full.txt;", +// "1;8-9-10-11-12-13-14;1;4-5;0;17-18-20-49-50-51-52-53-54-55-56-57-58-59-60-61-62-63-64-65-66-67-68-70-71-81-133-134-141-142-176-177-178-179-180-181-182-183-184-185-186-187-188-189-190-191-148-149-150-151-152-153-154-155-156-157-158-159-160-161-162-163-164-165-166-167-168-169-170-171-172-173-174-175-192-193-194-195-196-197-198;0;1-2-3-4;0;1-2-3-4-5-6-7-8-9-10-11-12-13-14-15", "snp;probe;snp;snp;snp", null, +// "D:\\OnlineFolders\\AeroFS\\RP3_BIOS_Methylation\\eQTMs\\QTLCorrected\\RP3_0.25MB_TSS_extendedCis_eQTMs_2015\\eQTLSNPsFDR0.05-SNPLevel_ExtendedInfo4.txt"); +// + addAnnotationToQTLOutput( - "D:\\OnlineFolders\\AeroFS\\RP3_BIOS_Methylation\\eQTMs\\QTLCorrected\\RP3_0.25MB_TSS_extendedCis_eQTMs_2015\\eQTLSNPsFDR0.05-SNPLevel.txt", - "D:\\UMCG\\Methylation_GPL13534\\annotationFile\\Illumina450K_MQtlMappingFile_Extensive.txt;D:\\UMCG\\Data\\RP3_RNA_Seq\\annotation_geneIds+overlapping_v71_cut.txt;D:\\OnlineFolders\\AeroFS\\RP3_BIOS_Methylation\\Annotations\\Annotation450k_AdditionMJ_v5.txt.gz;D:\\OnlineFolders\\AeroFS\\RP3_BIOS_Methylation\\Annotations\\CODAM_NTR_LLS_LLD_RS_BBMRI_450K_var_mean_median.txt;D:\\OnlineFolders\\AeroFS\\RP3_BIOS_Methylation\\Annotations\\GC_ContentSuroundingProbes_full.txt;", - "1;8-9-10-11-12-13-14;1;4-5;0;17-18-20-49-50-51-52-53-54-55-56-57-58-59-60-61-62-63-64-65-66-67-68-70-71-81-133-134-141-142-176-177-178-179-180-181-182-183-184-185-186-187-188-189-190-191-148-149-150-151-152-153-154-155-156-157-158-159-160-161-162-163-164-165-166-167-168-169-170-171-172-173-174-175-192-193-194-195-196-197-198;0;1-2-3-4;0;1-2-3-4-5-6-7-8-9-10-11-12-13-14-15", "snp;probe;snp;snp;snp", null, - "D:\\OnlineFolders\\AeroFS\\RP3_BIOS_Methylation\\eQTMs\\QTLCorrected\\RP3_0.25MB_TSS_extendedCis_eQTMs_2015\\eQTLSNPsFDR0.05-SNPLevel_ExtendedInfo4.txt"); + "D:\\OnlineFolders\\AeroFS\\RP3_BIOS_Methylation\\eQTMs_Exon\\Optimal_PC_and_QTL_Corrected\\eQTLSNPsFDR0.05-SNPLevel.txt", + "D:\\UMCG\\Methylation_GPL13534\\annotationFile\\Illumina450K_MQtlMappingFile_Extensive.txt;D:\\OnlineFolders\\AeroFS\\RP3_BIOS_Methylation\\Annotations\\Annotation450k_AdditionMJ_v5.txt.gz;D:\\OnlineFolders\\AeroFS\\RP3_BIOS_Methylation\\Annotations\\CODAM_NTR_LLS_LLD_RS_BBMRI_450K_var_mean_median.txt;D:\\OnlineFolders\\AeroFS\\RP3_BIOS_Methylation\\Annotations\\GC_ContentSuroundingProbes_full.txt;", + "1;8-9-10-11-12-13-14;0;17-18-20-49-50-51-52-53-54-55-56-57-58-59-60-61-62-63-64-65-66-67-68-70-71-81-133-134-141-142-176-177-178-179-180-181-182-183-184-185-186-187-188-189-190-191-148-149-150-151-152-153-154-155-156-157-158-159-160-161-162-163-164-165-166-167-168-169-170-171-172-173-174-175-192-193-194-195-196-197-198;0;1-2-3-4;0;1-2-3-4-5-6-7-8-9-10-11-12-13-14-15", "snp;snp;snp;snp", null, + "D:\\OnlineFolders\\AeroFS\\RP3_BIOS_Methylation\\eQTMs_Exon\\Optimal_PC_and_QTL_Corrected\\eQTLSNPsFDR0.05-SNPLevel_ExtendedInfo.txt"); // addAnnotationToQTLOutput( // "D:\\OnlineFolders\\AeroFS\\RP3_BIOS_Methylation\\meQTLs\\Cis_Pc22c_meQTLs\\Comparison_eQTLs_meQTLs.txt", diff --git a/genetica-libraries/src/main/java/umcg/genetica/math/stats/ZScores.java b/genetica-libraries/src/main/java/umcg/genetica/math/stats/ZScores.java index 2aa378967..4175551f9 100644 --- a/genetica-libraries/src/main/java/umcg/genetica/math/stats/ZScores.java +++ b/genetica-libraries/src/main/java/umcg/genetica/math/stats/ZScores.java @@ -148,6 +148,20 @@ public static double pToZ(double p) { return Probability.normalInverse(p); } + + /** + * + * Returns the absolute Z-score for a given p-value using a normal + * distribution. + * + * @param p p-value + * @return absolute Z-score + */ + public static double pToZTwoTailed(double p) { + + p = p/2; + return pToZ(p); + } /** * diff --git a/genetica-libraries/src/main/java/umcg/genetica/methylation/ConvertBetaAndMvalues.java b/genetica-libraries/src/main/java/umcg/genetica/methylation/ConvertBetaAndMvalues.java index 877cd4930..cfdb225ee 100644 --- a/genetica-libraries/src/main/java/umcg/genetica/methylation/ConvertBetaAndMvalues.java +++ b/genetica-libraries/src/main/java/umcg/genetica/methylation/ConvertBetaAndMvalues.java @@ -58,6 +58,15 @@ public static void transformMToBetavalue(DoubleMatrix2D rawData){ } } + public static double[] transformMToBetavalue(double[] rawData){ + double[] betaCopy = new double[rawData.length]; + for (int s=0; s Date: Fri, 10 Apr 2015 11:24:50 -0400 Subject: [PATCH 020/143] - Did a quick hack to the binary interaction meta-analysis to allow for strand issues in genotype data (it is a really dirty hack but it works) - force normal distribution in interaction analysis (forces normal distribution on covariates and gene expression data before fitting interaction model) --- .../BinaryInteractionMetaAnalysis.java | 91 +- .../meta/MetaAnalysisCalculationThread.java | 880 ++++---- .../meta/MetaAnalysisResultThread.java | 503 ++--- .../binarymeta/meta/MetaAnalyze.java | 30 +- .../binarymeta/meta/cis/BinaryUnzipTask.java | 152 +- .../InteractionAnalysisMultiThreaded.java | 151 +- .../InteractionAnalysisTask.java | 806 ++++---- .../normalization/Normalizer.java | 1822 +++++++++-------- 8 files changed, 2195 insertions(+), 2240 deletions(-) diff --git a/eqtl-mapping-pipeline/src/main/java/eqtlmappingpipeline/binaryInteraction/BinaryInteractionMetaAnalysis.java b/eqtl-mapping-pipeline/src/main/java/eqtlmappingpipeline/binaryInteraction/BinaryInteractionMetaAnalysis.java index 826f03f09..02619ca1a 100644 --- a/eqtl-mapping-pipeline/src/main/java/eqtlmappingpipeline/binaryInteraction/BinaryInteractionMetaAnalysis.java +++ b/eqtl-mapping-pipeline/src/main/java/eqtlmappingpipeline/binaryInteraction/BinaryInteractionMetaAnalysis.java @@ -1,6 +1,15 @@ package eqtlmappingpipeline.binaryInteraction; import eqtlmappingpipeline.Main; +import org.apache.commons.cli.*; +import org.molgenis.genotype.Allele; +import umcg.genetica.io.binInteraction.*; +import umcg.genetica.io.binInteraction.gene.BinaryInteractionGene; +import umcg.genetica.io.binInteraction.gene.BinaryInteractionGeneCreator; +import umcg.genetica.io.binInteraction.variant.BinaryInteractionVariant; +import umcg.genetica.io.binInteraction.variant.BinaryInteractionVariantCreator; +import umcg.genetica.io.trityper.util.BaseAnnot; + import java.io.File; import java.io.FileNotFoundException; import java.io.IOException; @@ -11,27 +20,8 @@ import java.util.LinkedHashMap; import java.util.LinkedHashSet; import java.util.List; -import org.apache.commons.cli.CommandLine; -import org.apache.commons.cli.HelpFormatter; -import org.apache.commons.cli.Option; -import org.apache.commons.cli.OptionBuilder; -import org.apache.commons.cli.Options; -import org.apache.commons.cli.ParseException; -import org.apache.commons.cli.PosixParser; -import org.molgenis.genotype.Allele; -import umcg.genetica.io.binInteraction.BinaryInteractionCohort; -import umcg.genetica.io.binInteraction.BinaryInteractionFile; -import umcg.genetica.io.binInteraction.BinaryInteractionFileCreator; -import umcg.genetica.io.binInteraction.BinaryInteractionFileException; -import umcg.genetica.io.binInteraction.BinaryInteractionQtlZscores; -import umcg.genetica.io.binInteraction.BinaryInteractionZscores; -import umcg.genetica.io.binInteraction.gene.BinaryInteractionGene; -import umcg.genetica.io.binInteraction.gene.BinaryInteractionGeneCreator; -import umcg.genetica.io.binInteraction.variant.BinaryInteractionVariant; -import umcg.genetica.io.binInteraction.variant.BinaryInteractionVariantCreator; /** - * * @author Patrick Deelen */ public class BinaryInteractionMetaAnalysis { @@ -39,18 +29,18 @@ public class BinaryInteractionMetaAnalysis { private static final String VERSION = Main.VERSION; private static final String HEADER = " /---------------------------------------\\\n" - + " | Binary interaction meta analysis |\n" - + " | |\n" - + " | Patrick Deelen |\n" - + " | patrickdeelen@gmail.com |\n" - + " | |\n" - + " | Dasha Zhernakova, Marc Jan Bonder |\n" - + " | Lude Franke, Morris Swertz |\n" - + " | |\n" - + " | Genomics Coordication Center |\n" - + " | Department of Genetics |\n" - + " | University Medical Center Groningen |\n" - + " \\---------------------------------------/"; + + " | Binary interaction meta analysis |\n" + + " | |\n" + + " | Patrick Deelen |\n" + + " | patrickdeelen@gmail.com |\n" + + " | |\n" + + " | Dasha Zhernakova, Marc Jan Bonder |\n" + + " | Lude Franke, Morris Swertz |\n" + + " | |\n" + + " | Genomics Coordication Center |\n" + + " | Department of Genetics |\n" + + " | University Medical Center Groningen |\n" + + " \\---------------------------------------/"; private static final DateFormat DATE_TIME_FORMAT = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); private static final Date currentDataTime = new Date(); private static final Options OPTIONS; @@ -168,13 +158,25 @@ public static void main(String[] args) throws UnsupportedEncodingException, IOEx BinaryInteractionVariantCreator metaVariant = variants.get(variant.getName()); - if (!(metaVariant.getRefAllele() == variant.getRefAllele() && metaVariant.getAltAllele() == variant.getAltAllele()) - && !(metaVariant.getRefAllele() == variant.getAltAllele() && metaVariant.getAltAllele() == variant.getRefAllele())) { + Boolean flipAlleles = BaseAnnot.flipalleles(variant.getRefAllele().getAlleleAsString() + "/" + variant.getAltAllele().getAlleleAsString(), variant.getRefAllele().getAlleleAsString(), + metaVariant.getRefAllele().getAlleleAsString() + "/" + metaVariant.getAltAllele().getAlleleAsString(), metaVariant.getRefAllele().getAlleleAsString()); + + +// if (!(metaVariant.getRefAllele() == variant.getRefAllele() && metaVariant.getAltAllele() == variant.getAltAllele()) +// && !(metaVariant.getRefAllele() == variant.getAltAllele() && metaVariant.getAltAllele() == variant.getRefAllele())) { +// System.err.println("Error: different alleles detected for variant: " + variant.getName()); +// System.exit(1); +// return; +// } + + + if (flipAlleles == null) { System.err.println("Error: different alleles detected for variant: " + variant.getName()); + System.err.println("Expected: " + metaVariant.getRefAllele().getAlleleAsString() + " / " + metaVariant.getAltAllele().getAlleleAsString()); + System.err.println("Found: " + variant.getRefAllele().getAlleleAsString() + " / " + variant.getAltAllele().getAlleleAsString()); System.exit(1); return; } - } for (int geneIndex : variant.getGenePointers()) { variantGenes.add(new VariantGene(variant.getName(), fileGenes.get(geneIndex).getName())); @@ -283,13 +285,18 @@ public static void main(String[] args) throws UnsupportedEncodingException, IOEx if (binaryInteractionFile.containsVariantGene(variantName, geneName)) { - boolean swap = binaryInteractionFile.getVariant(variantName).getAltAllele() != assessedAllele; + BinaryInteractionVariant currentVariant = binaryInteractionFile.getVariant(variantName); + // boolean swap = binaryInteractionFile.getVariant(variantName).getAltAllele() != assessedAllele; + + // sorry for the ugly code :| + Boolean flipAlleles = BaseAnnot.flipalleles(variant.getRefAllele().getAlleleAsString() + "/" + variant.getAltAllele().getAlleleAsString(), assessedAllele.getAlleleAsString(), + currentVariant.getRefAllele().getAlleleAsString() + "/" + currentVariant.getAltAllele().getAlleleAsString(), currentVariant.getAltAllele().getAlleleAsString()); BinaryInteractionQtlZscores qtlRes = binaryInteractionFile.readQtlResults(variantName, geneName); for (int j = 0; j < binaryInteractionFile.getCohortCount(); ++j) { sampleCountsQtl[i] = qtlRes.getSampleCounts()[j]; zscoresQtl[i] = qtlRes.getZscores()[j]; - if (swap) { + if (flipAlleles) { zscoresQtl[i] *= -1; } ++i; @@ -341,7 +348,15 @@ public static void main(String[] args) throws UnsupportedEncodingException, IOEx BinaryInteractionZscores interactionRes = binaryInteractionFile.readInteractionResults(variantName, geneName, covariate); - boolean swap = binaryInteractionFile.getVariant(variantName).getAltAllele() != assessedAllele; +// boolean swap = binaryInteractionFile.getVariant(variantName).getAltAllele() != assessedAllele; + + BinaryInteractionVariant currentVariant = binaryInteractionFile.getVariant(variantName); + // boolean swap = binaryInteractionFile.getVariant(variantName).getAltAllele() != assessedAllele; + + // sorry for the ugly code :| + Boolean flipAlleles = BaseAnnot.flipalleles(variant.getRefAllele().getAlleleAsString() + "/" + variant.getAltAllele().getAlleleAsString(), assessedAllele.getAlleleAsString(), + currentVariant.getRefAllele().getAlleleAsString() + "/" + currentVariant.getAltAllele().getAlleleAsString(), currentVariant.getAltAllele().getAlleleAsString()); + for (int j = 0; j < binaryInteractionFile.getCohortCount(); ++j) { sampleCountsInteraction[i] = interactionRes.getSamplesInteractionCohort()[j]; @@ -350,7 +365,7 @@ public static void main(String[] args) throws UnsupportedEncodingException, IOEx zscoreInteractionCohort[i] = interactionRes.getZscoreInteractionCohort()[j]; rSquaredCohort[i] = interactionRes.getrSquaredCohort()[j]; zscoreInteractionFlippedCohort[i] = interactionRes.getZscoreInteractionFlippedCohort()[j]; - if (swap) { + if (flipAlleles) { zscoreSnpCohort[i] *= -1; zscoreInteractionCohort[i] *= -1; zscoreInteractionFlippedCohort[i] *= -1; diff --git a/eqtl-mapping-pipeline/src/main/java/eqtlmappingpipeline/binarymeta/meta/MetaAnalysisCalculationThread.java b/eqtl-mapping-pipeline/src/main/java/eqtlmappingpipeline/binarymeta/meta/MetaAnalysisCalculationThread.java index dce25b347..154e7f17d 100644 --- a/eqtl-mapping-pipeline/src/main/java/eqtlmappingpipeline/binarymeta/meta/MetaAnalysisCalculationThread.java +++ b/eqtl-mapping-pipeline/src/main/java/eqtlmappingpipeline/binarymeta/meta/MetaAnalysisCalculationThread.java @@ -4,8 +4,14 @@ */ package eqtlmappingpipeline.binarymeta.meta; -import umcg.genetica.io.trityper.probeannotation.ProbeTranslation; import eqtlmappingpipeline.binarymeta.meta.graphics.ZScorePlot; +import umcg.genetica.io.trityper.EQTL; +import umcg.genetica.io.trityper.bin.BinaryResultDataset; +import umcg.genetica.io.trityper.bin.BinaryResultSNP; +import umcg.genetica.io.trityper.probeannotation.ProbeTranslation; +import umcg.genetica.io.trityper.util.BaseAnnot; +import umcg.genetica.math.stats.Descriptives; + import java.nio.ByteBuffer; import java.util.ArrayList; import java.util.HashSet; @@ -14,175 +20,167 @@ import java.util.logging.Logger; import java.util.zip.DataFormatException; import java.util.zip.Inflater; -import umcg.genetica.io.trityper.bin.BinaryResultSNP; -import umcg.genetica.math.stats.Descriptives; - -import umcg.genetica.io.trityper.EQTL; -import umcg.genetica.io.trityper.bin.BinaryResultDataset; -import umcg.genetica.io.trityper.util.BaseAnnot; /** - * * @author harmjan */ public class MetaAnalysisCalculationThread extends Thread { - protected LinkedBlockingQueue m_queue_input; - protected LinkedBlockingQueue m_queue_output; - protected ArrayList probes; - protected ArrayList snps; - protected ArrayList snpChr; - protected ArrayList snpChrPos; - protected BinaryResultDataset[] ds; - protected Integer[][] snpTranslation; - protected Integer[][] probeTranslationLookupTable; - protected ProbeTranslation probeTranslation; - protected MetaSettings m_settings; - protected ZScorePlot zs; - protected Inflater inflater = new Inflater(); - protected PValueThreshold pvaluethreshold; - private int numEffects = 0; - private int numSNPs = 0; - - public MetaAnalysisCalculationThread(LinkedBlockingQueue input, LinkedBlockingQueue output, - ArrayList snps, ArrayList probes, - ArrayList snpChr, ArrayList snpChrPos, - BinaryResultDataset[] ds, - Integer[][] snpTranslation, - Integer[][] probeTranslationLookupTable, ProbeTranslation probeTranslation, - MetaSettings m_settings, - ZScorePlot zs, PValueThreshold p) { - this.probes = probes; - this.snps = snps; - this.snpChr = snpChr; - this.snpChrPos = snpChrPos; - this.snpTranslation = snpTranslation; - this.ds = ds; - this.probeTranslation = probeTranslation; - this.probeTranslationLookupTable = probeTranslationLookupTable; - this.m_settings = m_settings; - this.zs = zs; - this.pvaluethreshold = p; - m_queue_input = input; - m_queue_output = output; - } - - @Override - public void run() { - boolean poison = false; - while (!poison) { - try { - MetaAnalysisWorkPackage pack = m_queue_input.take(); - poison = pack.getPoison(); - if (!poison) { - analyze(pack); + protected LinkedBlockingQueue m_queue_input; + protected LinkedBlockingQueue m_queue_output; + protected ArrayList probes; + protected ArrayList snps; + protected ArrayList snpChr; + protected ArrayList snpChrPos; + protected BinaryResultDataset[] ds; + protected Integer[][] snpTranslation; + protected Integer[][] probeTranslationLookupTable; + protected ProbeTranslation probeTranslation; + protected MetaSettings m_settings; + protected ZScorePlot zs; + protected Inflater inflater = new Inflater(); + protected PValueThreshold pvaluethreshold; + private int numEffects = 0; + private int numSNPs = 0; + + public MetaAnalysisCalculationThread(LinkedBlockingQueue input, LinkedBlockingQueue output, + ArrayList snps, ArrayList probes, + ArrayList snpChr, ArrayList snpChrPos, + BinaryResultDataset[] ds, + Integer[][] snpTranslation, + Integer[][] probeTranslationLookupTable, ProbeTranslation probeTranslation, + MetaSettings m_settings, + ZScorePlot zs, PValueThreshold p) { + this.probes = probes; + this.snps = snps; + this.snpChr = snpChr; + this.snpChrPos = snpChrPos; + this.snpTranslation = snpTranslation; + this.ds = ds; + this.probeTranslation = probeTranslation; + this.probeTranslationLookupTable = probeTranslationLookupTable; + this.m_settings = m_settings; + this.zs = zs; + this.pvaluethreshold = p; + m_queue_input = input; + m_queue_output = output; + } + + @Override + public void run() { + boolean poison = false; + while (!poison) { + try { + MetaAnalysisWorkPackage pack = m_queue_input.take(); + poison = pack.getPoison(); + if (!poison) { + analyze(pack); // if(taken % printperiterations == 0){ // System.out.println("Thread "+this.getName()+" calculated "+taken+" workpackages."); // } - } + } - } catch (InterruptedException ex) { - ex.printStackTrace(); - } - } + } catch (InterruptedException ex) { + ex.printStackTrace(); + } + } - System.out.println(this.getName() + " - Poisoned - Num tests passed QC: " + numEffects + "\t" + numSNPs); - } + System.out.println(this.getName() + " - Poisoned - Num tests passed QC: " + numEffects + "\t" + numSNPs); + } - protected void analyze(MetaAnalysisWorkPackage pack) { + protected void analyze(MetaAnalysisWorkPackage pack) { - int s = pack.getSNPNum(); + int s = pack.getSNPNum(); - // DEBUG + // DEBUG // boolean verbose = false; // if (snps.get(s).equals("rs6919346")) { // verbose = true; // } - int[] totalNrSamples = new int[probes.size()]; - double[] zSum = new double[probes.size()]; - double[] zSumAbsolute = new double[probes.size()]; - int[] dsPassQC = new int[probes.size()]; - Result r = new Result(); - r.finalzscores = new Double[probes.size()]; - r.finalpvalues = new Double[probes.size()]; - r.numSamples = new Integer[probes.size()][ds.length]; - r.datasetZScores = new Double[probes.size()][ds.length]; - r.dspassingqc = new boolean[probes.size()][ds.length]; - r.snp = s; - r.passesQC = true; - r.datasets = new String[ds.length]; - boolean[] zscoreflipped = new boolean[ds.length]; - EQTL[] result = new EQTL[probes.size()]; - - BinaryResultSNP firstSNPPassingQC = null; - - Byte snpchr = snpChr.get(s); - Integer snpchrpos = snpChrPos.get(s); - boolean snphaspropermapping = true; - if (snpchr == null || snpchrpos == null || snpchr == -1) { - snpchr = -1; - snpchrpos = -1; - snphaspropermapping = false; - } - - StringBuilder zscoretableout = new StringBuilder(); - - int numDSPassingQC = 0; - - HashSet probesTestedHash = new HashSet(); - boolean[] testprobes = new boolean[probes.size()]; - for (int p = 0; p < probes.size(); p++) { - byte probechr = probeTranslation.getProbeChr(p); - int probechrpos = probeTranslation.getProbeChrPos(p); - boolean testprobe = false; - - if (m_settings.isCis() && m_settings.isTrans()) { - testprobe = true; - } else if (m_settings.isCis() && !m_settings.isTrans()) { - if (snpchr < 1 || probechr < 1) { - testprobe = false; - } else if (probechr == snpchr) { - if (Math.abs(snpchrpos - probechrpos) < m_settings.getCisdistance()) { - testprobe = true; - } else { - testprobe = false; - } - } else { - testprobe = false; - } - } else if (!m_settings.isCis() && m_settings.isTrans()) { - if (snpchr < 1 || probechr < 1) { - testprobe = false; - } else if (probechr == snpchr) { - if (Math.abs(snpchrpos - probechrpos) > m_settings.getTransdistance()) { - testprobe = true; - } else { - testprobe = false; - } - } else { - testprobe = true; - } - } - testprobes[p] = testprobe; - - if (testprobe) { - probesTestedHash.add(p); - } - } - - - - for (int d = 0; d < ds.length; d++) { - - Integer snpId = snpTranslation[d][s]; - - if (snpId != null) { - - BinaryResultSNP snpObject = pack.getSNPObject(d); // ds[d].getSnps()[snpId]; + int[] totalNrSamples = new int[probes.size()]; + double[] zSum = new double[probes.size()]; + double[] zSumAbsolute = new double[probes.size()]; + int[] dsPassQC = new int[probes.size()]; + Result r = new Result(); + r.finalzscores = new Double[probes.size()]; + r.finalpvalues = new Double[probes.size()]; + r.numSamples = new Integer[probes.size()][ds.length]; + r.datasetZScores = new Double[probes.size()][ds.length]; + r.dspassingqc = new boolean[probes.size()][ds.length]; + r.snp = s; + r.passesQC = true; + r.datasets = new String[ds.length]; + boolean[] zscoreflipped = new boolean[ds.length]; + EQTL[] result = new EQTL[probes.size()]; + + BinaryResultSNP firstSNPPassingQC = null; + + Byte snpchr = snpChr.get(s); + Integer snpchrpos = snpChrPos.get(s); + boolean snphaspropermapping = true; + if (snpchr == null || snpchrpos == null || snpchr == -1) { + snpchr = -1; + snpchrpos = -1; + snphaspropermapping = false; + } + + StringBuilder zscoretableout = new StringBuilder(); + + int numDSPassingQC = 0; + + HashSet probesTestedHash = new HashSet(); + boolean[] testprobes = new boolean[probes.size()]; + for (int p = 0; p < probes.size(); p++) { + byte probechr = probeTranslation.getProbeChr(p); + int probechrpos = probeTranslation.getProbeChrPos(p); + boolean testprobe = false; + + if (m_settings.isCis() && m_settings.isTrans()) { + testprobe = true; + } else if (m_settings.isCis() && !m_settings.isTrans()) { + if (snpchr < 1 || probechr < 1) { + testprobe = false; + } else if (probechr == snpchr) { + if (Math.abs(snpchrpos - probechrpos) < m_settings.getCisdistance()) { + testprobe = true; + } else { + testprobe = false; + } + } else { + testprobe = false; + } + } else if (!m_settings.isCis() && m_settings.isTrans()) { + if (snpchr < 1 || probechr < 1) { + testprobe = false; + } else if (probechr == snpchr) { + if (Math.abs(snpchrpos - probechrpos) > m_settings.getTransdistance()) { + testprobe = true; + } else { + testprobe = false; + } + } else { + testprobe = true; + } + } + testprobes[p] = testprobe; + + if (testprobe) { + probesTestedHash.add(p); + } + } + + + for (int d = 0; d < ds.length; d++) { + + Integer snpId = snpTranslation[d][s]; + + if (snpId != null) { + + BinaryResultSNP snpObject = pack.getSNPObject(d); // ds[d].getSnps()[snpId]; // long pointer = snpObject.getzScoreIndex(); // long nextpointer = -1; @@ -192,162 +190,159 @@ protected void analyze(MetaAnalysisWorkPackage pack) { // nextpointer = snpObject2.getzScoreIndex(); // } - byte[] data = pack.getData(d); - Float[] zscores = null; - if (data != null) { - try { - zscores = inflate(data, ds[d].getNumProbes()); // - pack.setData(d, null); - } catch (DataFormatException ex) { - Logger.getLogger(MetaAnalysisCalculationThread.class.getName()).log(Level.SEVERE, null, ex); - } + byte[] data = pack.getData(d); + Float[] zscores = null; + if (data != null) { + try { + zscores = inflate(data, ds[d].getNumProbes()); // + pack.setData(d, null); + } catch (DataFormatException ex) { + Logger.getLogger(MetaAnalysisCalculationThread.class.getName()).log(Level.SEVERE, null, ex); + } - if (zscores != null) { - numDSPassingQC++; - // weight for dataset d - int nrSamples = snpObject.getNumsamples(); - double weight = Descriptives.getSqrt(nrSamples); + if (zscores != null) { + numDSPassingQC++; + // weight for dataset d + int nrSamples = snpObject.getNumsamples(); + double weight = Descriptives.getSqrt(nrSamples); - for (int p = 0; p < probes.size(); p++) { + for (int p = 0; p < probes.size(); p++) { - boolean testprobe = testprobes[p]; - if (testprobe) { - Integer probeId = probeTranslationLookupTable[d][p]; + boolean testprobe = testprobes[p]; + if (testprobe) { + Integer probeId = probeTranslationLookupTable[d][p]; - if (!testprobe && probeId != null) { - zscores[probeId] = null; - } else if (probeId != null && testprobe) { - if (zscores[probeId] != null) { + if (!testprobe && probeId != null) { + zscores[probeId] = null; + } else if (probeId != null && testprobe) { + if (zscores[probeId] != null) { - totalNrSamples[p] += nrSamples; - r.dspassingqc[p][d] = true; - r.numSamples[p][d] = nrSamples; + totalNrSamples[p] += nrSamples; + r.dspassingqc[p][d] = true; + r.numSamples[p][d] = nrSamples; - double zscore = zscores[probeId]; + double zscore = zscores[probeId]; - r.datasets[d] = ds[d].getM_name(); - dsPassQC[p]++; + r.datasets[d] = ds[d].getM_name().intern(); + dsPassQC[p]++; - if (firstSNPPassingQC == null) { - firstSNPPassingQC = snpObject; - } else { - Boolean flipalleles = flipalleles(firstSNPPassingQC, snpObject); - if (flipalleles == null) { - System.err.println("ERROR! SNP alleles cannot be matched for snp\t" + snpObject.getName() + "\tin dataset\t" + d); - System.err.println("This SNP will be excluded from further research"); - r.passesQC = false; - } else if (flipalleles) { - zscore = -zscore; - zscoreflipped[d] = true; - } - } + if (firstSNPPassingQC == null) { + firstSNPPassingQC = snpObject; + } else { + Boolean flipalleles = flipalleles(firstSNPPassingQC, snpObject); + if (flipalleles == null) { + System.err.println("ERROR! SNP alleles cannot be matched for snp\t" + snpObject.getName() + "\tin dataset\t" + d); + System.err.println("This SNP will be excluded from further research"); + r.passesQC = false; + } else if (flipalleles) { + zscore = -zscore; + zscoreflipped[d] = true; + } + } - r.datasetZScores[p][d] = new Double(zscore); + r.datasetZScores[p][d] = new Double(zscore); // if (verbose) { // System.out.println(d + "\t" + r.datasetZScores[p][d]); // } - zSumAbsolute[p] += Math.abs(zscore * weight); - zSum[p] += (zscore * weight); - } else { - } - } - } - } - for (int i = 0; i < zscores.length; i++) { - zscores[i] = null; - } - } - } - - - - - } - } + zSumAbsolute[p] += Math.abs(zscore * weight); + zSum[p] += (zscore * weight); + } else { + } + } + } + } + for (int i = 0; i < zscores.length; i++) { + zscores[i] = null; + } + } + } + + + } + } // if (verbose) { //// System.exit(0); // } - pack.clearByteData(); - - int numDSThatMinimallyShouldHaveEffect = m_settings.getSnpDatasetPresenceThreshold(); - if (numDSThatMinimallyShouldHaveEffect == 0) { - numDSThatMinimallyShouldHaveEffect = 1; - } - - if (numDSPassingQC >= numDSThatMinimallyShouldHaveEffect) { - pack.setPassedQC(true); - Double[] metaZPerProbe = null; - if (m_settings.isMakezscoretable()) { - metaZPerProbe = new Double[probes.size()]; - } - int probesTested = 0; - numSNPs++; - for (int p = 0; p < probes.size(); p++) { - - if (dsPassQC[p] >= numDSThatMinimallyShouldHaveEffect && totalNrSamples[p] > 0) { - numEffects++; - probesTestedHash.add(p); - probesTested++; - double zSumVal = zSum[p]; - double sqrtSample = Descriptives.getSqrt(totalNrSamples[p]); - double metaZScore = zSumVal / sqrtSample; - double pValueOverall = Descriptives.convertZscoreToPvalue(metaZScore); - - double zSumValAbsolute = zSumAbsolute[p]; - double zScoreAbs = zSumValAbsolute / sqrtSample; - double pValueOverallAbs = Descriptives.convertZscoreToPvalue(zScoreAbs); - - - - boolean outputeqtl = false; - if (m_settings.isMakezscoretable()) { - outputeqtl = true; - } else if (pValueOverall <= pvaluethreshold.getPvalue()) { - outputeqtl = true; - } - - if (outputeqtl) { - result[p] = new EQTL(); - EQTL e = result[p]; - e.setRsChr(snpChr.get(s)); - e.setRsChrPos(snpChrPos.get(s)); - e.setProbeChr(probeTranslation.getProbeChr(p)); - e.setProbeChrPos(probeTranslation.getProbeChrPos(p)); - e.setDatasets(r.datasets); - e.setAlleleAssessed(BaseAnnot.toString(firstSNPPassingQC.getAssessedAllele())); - byte[] alleles = firstSNPPassingQC.getAlleles(); - String alleleStr = BaseAnnot.toString(alleles[0]) + "/" + BaseAnnot.toString(alleles[1]); - e.setAlleles(alleleStr); - e.setDatasetZScores(r.datasetZScores[p]); - e.setZscore(metaZScore); - e.setPvalue(pValueOverall); - e.setZscoreAbs(zScoreAbs); - e.setPvalueAbs(pValueOverallAbs); - - if (m_settings.isUseAbsoluteZscore()) { - e.setUseAbsoluteZScore(); - } - - if (pValueOverallAbs < 1) { - for (int d1 = 0; d1 < ds.length; d1++) { - boolean ds1PassesQC = r.dspassingqc[p][d1]; - if (ds1PassesQC) { - double datasetZScore = r.datasetZScores[p][d1]; - if (zscoreflipped[d1]) { - datasetZScore = -datasetZScore; - } - for (int d2 = d1 + 1; d2 < ds.length; d2++) { - if (r.dspassingqc[p][d2]) { - double zscore2 = r.datasetZScores[p][d2]; - if (zscoreflipped[d2]) { - zscore2 = -zscore2; - } - if (zs != null) { + pack.clearByteData(); + + int numDSThatMinimallyShouldHaveEffect = m_settings.getSnpDatasetPresenceThreshold(); + if (numDSThatMinimallyShouldHaveEffect == 0) { + numDSThatMinimallyShouldHaveEffect = 1; + } + + if (numDSPassingQC >= numDSThatMinimallyShouldHaveEffect) { + pack.setPassedQC(true); + Double[] metaZPerProbe = null; + if (m_settings.isMakezscoretable()) { + metaZPerProbe = new Double[probes.size()]; + } + int probesTested = 0; + numSNPs++; + for (int p = 0; p < probes.size(); p++) { + + if (dsPassQC[p] >= numDSThatMinimallyShouldHaveEffect && totalNrSamples[p] > 0) { + numEffects++; + probesTestedHash.add(p); + probesTested++; + double zSumVal = zSum[p]; + double sqrtSample = Descriptives.getSqrt(totalNrSamples[p]); + double metaZScore = zSumVal / sqrtSample; + double pValueOverall = Descriptives.convertZscoreToPvalue(metaZScore); + + double zSumValAbsolute = zSumAbsolute[p]; + double zScoreAbs = zSumValAbsolute / sqrtSample; + double pValueOverallAbs = Descriptives.convertZscoreToPvalue(zScoreAbs); + + + boolean outputeqtl = false; + if (m_settings.isMakezscoretable()) { + outputeqtl = true; + } else if (pValueOverall <= pvaluethreshold.getPvalue()) { + outputeqtl = true; + } + + if (outputeqtl) { + result[p] = new EQTL(); + EQTL e = result[p]; + e.setRsChr(snpChr.get(s)); + e.setRsChrPos(snpChrPos.get(s)); + e.setProbeChr(probeTranslation.getProbeChr(p)); + e.setProbeChrPos(probeTranslation.getProbeChrPos(p)); + e.setDatasets(r.datasets); + e.setAlleleAssessed(BaseAnnot.toString(firstSNPPassingQC.getAssessedAllele()).intern()); + byte[] alleles = firstSNPPassingQC.getAlleles(); + String alleleStr = (BaseAnnot.toString(alleles[0]) + "/" + BaseAnnot.toString(alleles[1])).intern(); + e.setAlleles(alleleStr); + e.setDatasetZScores(r.datasetZScores[p]); + e.setZscore(metaZScore); + e.setPvalue(pValueOverall); + e.setZscoreAbs(zScoreAbs); + e.setPvalueAbs(pValueOverallAbs); + + if (m_settings.isUseAbsoluteZscore()) { + e.setUseAbsoluteZScore(); + } + + if (pValueOverallAbs < 1) { + for (int d1 = 0; d1 < ds.length; d1++) { + boolean ds1PassesQC = r.dspassingqc[p][d1]; + if (ds1PassesQC) { + double datasetZScore = r.datasetZScores[p][d1]; + if (zscoreflipped[d1]) { + datasetZScore = -datasetZScore; + } + for (int d2 = d1 + 1; d2 < ds.length; d2++) { + if (r.dspassingqc[p][d2]) { + double zscore2 = r.datasetZScores[p][d2]; + if (zscoreflipped[d2]) { + zscore2 = -zscore2; + } + if (zs != null) { // if ((datasetZScore < -10 && zscore2 > 10) || (datasetZScore > 10 && zscore2 < -10)) { // System.out.println(""); // System.out.println("Opposite effect: "); @@ -393,163 +388,162 @@ protected void analyze(MetaAnalysisWorkPackage pack) { // // System.out.println(""); // } - if (pValueOverall < 1E-15) { - zs.draw(new Double(datasetZScore), new Double(zscore2), d1, d2); - } - } - } - } - if (zs != null && pValueOverall < 1E-15) { - zs.draw(new Double(datasetZScore), new Double(metaZScore), d1, ds.length); - } - } - } - } - // - e.setDatasetsSamples(r.numSamples[p]); - e.setProbe(probes.get(p)); - e.setRsName(firstSNPPassingQC.getName()); - e.setProbeHUGO(probeTranslation.getProbeSymbol(p)); - - } - - if (m_settings.isMakezscoretable()) { - metaZPerProbe[p] = metaZScore; - } - } else { - r.finalzscores[p] = null; - } - } - - if (m_settings.isMakezscoretable()) { - - if (firstSNPPassingQC != null) { - zscoretableout.append(snps.get(s)); - zscoretableout.append("\t").append(BaseAnnot.toString(firstSNPPassingQC.getAlleles()[0])).append("/").append(BaseAnnot.toString(firstSNPPassingQC.getAlleles()[1])).append("\t").append(BaseAnnot.toString(firstSNPPassingQC.getAssessedAllele())); - - for (int i = 0; i < metaZPerProbe.length; i++) { - zscoretableout.append("\t").append(metaZPerProbe[i]); - metaZPerProbe[i] = null; - } - metaZPerProbe = null; - pack.setZScoreOut(zscoretableout.toString()); - } - } - r.clearData(); - - - if (numDSPassingQC > 0) { - pack.setProbesTestedHash(probesTestedHash); - } else { - pack.setProbesTestedHash(new HashSet()); - } - pack.setNumOfTestedProbes(probesTested); - pack.setResult(result); - try { - m_queue_output.put(pack); - } catch (InterruptedException ex) { - ex.printStackTrace(); - } - - } - - - - } - - // TODO: AT / GC SNPs?? - public Boolean flipalleles(BinaryResultSNP firstSNPPassingQC, BinaryResultSNP snpObject) { - byte[] allelesfirst = firstSNPPassingQC.getAlleles(); - byte allelefirstassessed = firstSNPPassingQC.getAssessedAllele(); - - byte[] allelessecond = snpObject.getAlleles(); - byte allelesecondassessed = snpObject.getAssessedAllele(); - - int nridenticalalleles = 0; - - for (int i = 0; i < allelesfirst.length; i++) { - byte allele1 = allelesfirst[i]; - for (int j = 0; j < allelessecond.length; j++) { - if (allelessecond[j] == allele1) { - nridenticalalleles++; - } - } - } - - if (nridenticalalleles == 2) { - // alleles are identical. check if same allele was assessed... - if (allelefirstassessed == allelesecondassessed) { - return false; - } else { - return true; - } - } else { - // try complement - allelessecond = convertToComplementaryAlleles(allelessecond); - allelesecondassessed = BaseAnnot.getComplement(allelesecondassessed); - nridenticalalleles = 0; - - for (int i = 0; i < allelesfirst.length; i++) { - byte allele1 = allelesfirst[i]; - for (int j = 0; j < allelessecond.length; j++) { - if (allelessecond[j] == allele1) { - nridenticalalleles++; - } - } - } - - if (nridenticalalleles == 2) { - // alleles are identical. check if same allele was assessed... - if (allelefirstassessed == allelesecondassessed) { - return false; - } else { - return true; - } - } - } - return null; - } - - public byte[] convertToComplementaryAlleles(byte[] allelesToCompare) { - byte[] allelesComplementary = new byte[2]; - for (int a = 0; a < 2; a++) { - allelesComplementary[a] = BaseAnnot.getComplement(allelesToCompare[a]); - } - return allelesComplementary; - } - - protected Float[] inflate(byte[] buffer, int numElems) throws DataFormatException { - inflater.setInput(buffer); - inflater.finished(); - byte[] decompressed = new byte[numElems * 4]; - inflater.inflate(decompressed); - - long actuallydecompressed = inflater.getBytesWritten(); - if (actuallydecompressed != numElems * 4) { - throw new DataFormatException("IO Error: uncompressed data does not correspond to the size requested\t" + actuallydecompressed + "\t" + numElems * 4); - } - - inflater.reset(); - - ByteBuffer bytebuffer = ByteBuffer.wrap(decompressed); - Float[] output = new Float[numElems]; - int ctr = 0; - for (int i = 0; i < numElems; i++) { - Float f = bytebuffer.getFloat(); - if (f.isNaN()) { - f = null; - } else { - ctr++; - } - output[i] = f; - } - - decompressed = null; - - if (ctr == 0) { - return null; - } else { - return output; - } - } + if (pValueOverall < 1E-15) { + zs.draw(new Double(datasetZScore), new Double(zscore2), d1, d2); + } + } + } + } + if (zs != null && pValueOverall < 1E-15) { + zs.draw(new Double(datasetZScore), new Double(metaZScore), d1, ds.length); + } + } + } + } + // + e.setDatasetsSamples(r.numSamples[p]); + e.setProbe(probes.get(p).intern()); + e.setRsName(firstSNPPassingQC.getName().intern()); + e.setProbeHUGO(probeTranslation.getProbeSymbol(p).intern()); + + } + + if (m_settings.isMakezscoretable()) { + metaZPerProbe[p] = metaZScore; + } + } else { + r.finalzscores[p] = null; + } + } + + if (m_settings.isMakezscoretable()) { + + if (firstSNPPassingQC != null) { + zscoretableout.append(snps.get(s)); + zscoretableout.append("\t").append(BaseAnnot.toString(firstSNPPassingQC.getAlleles()[0])).append("/").append(BaseAnnot.toString(firstSNPPassingQC.getAlleles()[1])).append("\t").append(BaseAnnot.toString(firstSNPPassingQC.getAssessedAllele())); + + for (int i = 0; i < metaZPerProbe.length; i++) { + zscoretableout.append("\t").append(metaZPerProbe[i]); + metaZPerProbe[i] = null; + } + metaZPerProbe = null; + pack.setZScoreOut(zscoretableout.toString()); + } + } + r.clearData(); + + + if (numDSPassingQC > 0) { + pack.setProbesTestedHash(probesTestedHash); + } else { + pack.setProbesTestedHash(new HashSet()); + } + pack.setNumOfTestedProbes(probesTested); + pack.setResult(result); + try { + m_queue_output.put(pack); + } catch (InterruptedException ex) { + ex.printStackTrace(); + } + + } + + + } + + // TODO: AT / GC SNPs?? + public Boolean flipalleles(BinaryResultSNP firstSNPPassingQC, BinaryResultSNP snpObject) { + byte[] allelesfirst = firstSNPPassingQC.getAlleles(); + byte allelefirstassessed = firstSNPPassingQC.getAssessedAllele(); + + byte[] allelessecond = snpObject.getAlleles(); + byte allelesecondassessed = snpObject.getAssessedAllele(); + + int nridenticalalleles = 0; + + for (int i = 0; i < allelesfirst.length; i++) { + byte allele1 = allelesfirst[i]; + for (int j = 0; j < allelessecond.length; j++) { + if (allelessecond[j] == allele1) { + nridenticalalleles++; + } + } + } + + if (nridenticalalleles == 2) { + // alleles are identical. check if same allele was assessed... + if (allelefirstassessed == allelesecondassessed) { + return false; + } else { + return true; + } + } else { + // try complement + allelessecond = convertToComplementaryAlleles(allelessecond); + allelesecondassessed = BaseAnnot.getComplement(allelesecondassessed); + nridenticalalleles = 0; + + for (int i = 0; i < allelesfirst.length; i++) { + byte allele1 = allelesfirst[i]; + for (int j = 0; j < allelessecond.length; j++) { + if (allelessecond[j] == allele1) { + nridenticalalleles++; + } + } + } + + if (nridenticalalleles == 2) { + // alleles are identical. check if same allele was assessed... + if (allelefirstassessed == allelesecondassessed) { + return false; + } else { + return true; + } + } + } + return null; + } + + public byte[] convertToComplementaryAlleles(byte[] allelesToCompare) { + byte[] allelesComplementary = new byte[2]; + for (int a = 0; a < 2; a++) { + allelesComplementary[a] = BaseAnnot.getComplement(allelesToCompare[a]); + } + return allelesComplementary; + } + + protected Float[] inflate(byte[] buffer, int numElems) throws DataFormatException { + inflater.setInput(buffer); + inflater.finished(); + byte[] decompressed = new byte[numElems * 4]; + inflater.inflate(decompressed); + + long actuallydecompressed = inflater.getBytesWritten(); + if (actuallydecompressed != numElems * 4) { + throw new DataFormatException("IO Error: uncompressed data does not correspond to the size requested\t" + actuallydecompressed + "\t" + numElems * 4); + } + + inflater.reset(); + + ByteBuffer bytebuffer = ByteBuffer.wrap(decompressed); + Float[] output = new Float[numElems]; + int ctr = 0; + for (int i = 0; i < numElems; i++) { + Float f = bytebuffer.getFloat(); + if (f.isNaN()) { + f = null; + } else { + ctr++; + } + output[i] = f; + } + + decompressed = null; + + if (ctr == 0) { + return null; + } else { + return output; + } + } } diff --git a/eqtl-mapping-pipeline/src/main/java/eqtlmappingpipeline/binarymeta/meta/MetaAnalysisResultThread.java b/eqtl-mapping-pipeline/src/main/java/eqtlmappingpipeline/binarymeta/meta/MetaAnalysisResultThread.java index 0c1ccc66d..66dd28a20 100644 --- a/eqtl-mapping-pipeline/src/main/java/eqtlmappingpipeline/binarymeta/meta/MetaAnalysisResultThread.java +++ b/eqtl-mapping-pipeline/src/main/java/eqtlmappingpipeline/binarymeta/meta/MetaAnalysisResultThread.java @@ -4,197 +4,199 @@ */ package eqtlmappingpipeline.binarymeta.meta; +import umcg.genetica.io.text.TextFile; +import umcg.genetica.io.trityper.EQTL; + import java.io.IOException; import java.util.ArrayList; import java.util.HashMap; import java.util.HashSet; import java.util.List; import java.util.concurrent.LinkedBlockingQueue; -import umcg.genetica.io.text.TextFile; -import umcg.genetica.io.trityper.EQTL; /** - * * @author harmjan */ public class MetaAnalysisResultThread extends Thread { - private final LinkedBlockingQueue m_queue_input; + private final LinkedBlockingQueue m_queue_input; // private double pvaluethreshold = 1; - - - private static String header = "PValue\t" - + "SNPName\t" - + "SNPChr\t" - + "SNPChrPos\t" - + "ProbeName\t" - + "ProbeChr\t" - + "ProbeCenterChrPos\t" - + "CisTrans\t" - + "SNPType\t" - + "AlleleAssessed\t" - + "OverallZScore\t" - + "DatasetsWhereSNPProbePairIsAvailableAndPassesQC\t" - + "DatasetsZScores\t" - + "DatasetsNrSamples\t" - + "IncludedDatasetsMeanProbeExpression\t" - + "IncludedDatasetsProbeExpressionVariance\t" - + "HGNCName\t" - + "IncludedDatasetsCorrelationCoefficient"; - - private int ctr = 0; - private EQTL[] eQTLBuffer = new EQTL[100000]; - private EQTL[] finalEQTLBuffer = new EQTL[0]; - private int nrInFinalBuffer = 0; - private static MetaSettings m_settings; - private int perm; - private String[] datasets; - private final TextFile zscoretable; - private final PValueThreshold pvaluethreshold; - private final ArrayList snps; - private final HashMap> snpProbeSelection; - private final ArrayList probes; - - public MetaAnalysisResultThread(LinkedBlockingQueue input, - MetaSettings m_settings, - String[] datasets, - int perm, - TextFile zscoretable, PValueThreshold p, ArrayList snps, HashMap> snpProbeSelection, ArrayList probes) { - this.m_settings = m_settings; - this.datasets = datasets; - this.perm = perm; - this.zscoretable = zscoretable; - this.pvaluethreshold = p; - m_queue_input = input; - this.snps = snps; - this.snpProbeSelection = snpProbeSelection; - this.probes = probes; - } - TextFile snpout = null; - - @Override - public void run() { - boolean poison = false; - try { - snpout = new TextFile(m_settings.getOutput() + "snpsandnreqtls.txt", TextFile.W); - while (!poison) { - try { - MetaAnalysisWorkPackage pack = m_queue_input.take(); - if (!pack.getPoison()) { - Integer snpnum = pack.getSNPNum(); - String snp = snps.get(snpnum); - if (snpProbeSelection == null || snpProbeSelection.containsKey(snp)) { - analyze(pack); - } + + + private static String header = "PValue\t" + + "SNPName\t" + + "SNPChr\t" + + "SNPChrPos\t" + + "ProbeName\t" + + "ProbeChr\t" + + "ProbeCenterChrPos\t" + + "CisTrans\t" + + "SNPType\t" + + "AlleleAssessed\t" + + "OverallZScore\t" + + "DatasetsWhereSNPProbePairIsAvailableAndPassesQC\t" + + "DatasetsZScores\t" + + "DatasetsNrSamples\t" + + "IncludedDatasetsMeanProbeExpression\t" + + "IncludedDatasetsProbeExpressionVariance\t" + + "HGNCName\t" + + "IncludedDatasetsCorrelationCoefficient"; + + private int ctr = 0; + private EQTL[] eQTLBuffer = new EQTL[100000]; + private EQTL[] finalEQTLBuffer = new EQTL[0]; + private int nrInFinalBuffer = 0; + private static MetaSettings m_settings; + private int perm; + private String[] datasets; + private final TextFile zscoretable; + private final PValueThreshold pvaluethreshold; + private final ArrayList snps; + private final HashMap> snpProbeSelection; + private final ArrayList probes; + + public MetaAnalysisResultThread(LinkedBlockingQueue input, + MetaSettings m_settings, + String[] datasets, + int perm, + TextFile zscoretable, PValueThreshold p, ArrayList snps, HashMap> snpProbeSelection, ArrayList probes) { + this.m_settings = m_settings; + this.datasets = datasets; + this.perm = perm; + this.zscoretable = zscoretable; + this.pvaluethreshold = p; + m_queue_input = input; + this.snps = snps; + this.snpProbeSelection = snpProbeSelection; + this.probes = probes; + } + + TextFile snpout = null; + + @Override + public void run() { + boolean poison = false; + try { + snpout = new TextFile(m_settings.getOutput() + "snpsandnreqtls.txt", TextFile.W); + while (!poison) { + try { + MetaAnalysisWorkPackage pack = m_queue_input.take(); + if (!pack.getPoison()) { + Integer snpnum = pack.getSNPNum(); + String snp = snps.get(snpnum); + if (snpProbeSelection == null || snpProbeSelection.containsKey(snp)) { + analyze(pack); + } // if(taken % printperiterations == 0){ // System.out.println("Thread "+this.getName()+" calculated "+taken+" workpackages."); // } - } else { - poison = pack.getPoison(); + } else { + poison = pack.getPoison(); // System.out.println("Thread " + m_name + " got killed by a poisonous workpackage, but was bravely able to perform\t" + testsPerformed + "\ttests"); - } - - } catch (InterruptedException ex) { - ex.printStackTrace(); - } - } - - if (ctr > 0) { - mergebuffers(ctr); - } - snpout.close(); - - // write eQTL results.. - - writeresults(); - - TextFile out = new TextFile(m_settings.getOutput() + "/NumberOfEQTLSTotal.txt", TextFile.W); - out.writeln("Number of eQTLs in total: " + totalNumberOfEQTLs); - System.out.println("Number of eQTLs in total: " + totalNumberOfEQTLs); - out.writeln("Number of snps in total: " + uniqueSNPs.size()); - out.writeln("Number of snps in total not passing QC: " + uniqueSNPsNotPassingQC.size()); - - - System.out.println("Number of snps in total: " + uniqueSNPs.size()); - TextFile out2 = new TextFile(m_settings.getOutput() + "/TestedSNPs.txt", TextFile.W); - List list = new ArrayList(uniqueSNPs); - out2.writeList(list); - out2.close(); - - out2 = new TextFile(m_settings.getOutput() + "/TestedSNPsNPQC.txt", TextFile.W); - list = new ArrayList(uniqueSNPsNotPassingQC); - out2.writeList(list); - - out.writeln("Number of probes in total: " + uniqueProbes.size()); - System.out.println("Number of probes in total: " + uniqueProbes.size()); - out2 = new TextFile(m_settings.getOutput() + "/TestedProbes.txt", TextFile.W); - List list2 = new ArrayList(uniqueProbes); - ArrayList list2str = new ArrayList(); - for (Integer i : list2) { - list2str.add("" + i); - } - - out2.writeList(list2str); - out.close(); - - } catch (IOException e) { - e.printStackTrace(); - } - } - private HashSet uniqueSNPs = new HashSet(); - private HashSet uniqueSNPsNotPassingQC = new HashSet(); - private HashSet uniqueProbes = new HashSet(); - private int totalNumberOfEQTLs = 0; - - private void analyze(MetaAnalysisWorkPackage pack) { - - Integer snpnum = pack.getSNPNum(); - String snp = snps.get(snpnum); - - HashSet allowedProbes = null; - if (snpProbeSelection != null) { - allowedProbes = snpProbeSelection.get(snp); - } - - Integer[] probeList = pack.getListOfTestedProbes(); - for (int i = 0; i < probeList.length; i++) { - String probe = probes.get(probeList[i]); - if (allowedProbes == null || allowedProbes.contains(probe)) { - totalNumberOfEQTLs++; - uniqueProbes.add(probeList[i]); - } - } - - if (pack.getPassedQC()) { - uniqueSNPs.add(snps.get(snpnum)); - } else { - uniqueSNPsNotPassingQC.add(snps.get(snpnum)); - } - if (m_settings.isMakezscoretable() && zscoretable != null) { - try { - String zscoreout = pack.getZScoreOut(); - if (zscoreout != null) { - zscoretable.writeln(zscoreout); - pack.setZScoreOut(null); - } - } catch (IOException e) { - e.printStackTrace(); - } - } - - EQTL[] finalEQTLs = pack.getResult(); - - int nreQTLsForSNP = 0; - for (int p = 0; p < finalEQTLs.length; p++) { + } + + } catch (InterruptedException ex) { + ex.printStackTrace(); + } + } + + if (ctr > 0) { + mergebuffers(ctr); + } + snpout.close(); + + // write eQTL results.. + + writeresults(); + + TextFile out = new TextFile(m_settings.getOutput() + "/NumberOfEQTLSTotal.txt", TextFile.W); + out.writeln("Number of eQTLs in total: " + totalNumberOfEQTLs); + System.out.println("Number of eQTLs in total: " + totalNumberOfEQTLs); + out.writeln("Number of snps in total: " + uniqueSNPs.size()); + out.writeln("Number of snps in total not passing QC: " + uniqueSNPsNotPassingQC.size()); + + + System.out.println("Number of snps in total: " + uniqueSNPs.size()); + TextFile out2 = new TextFile(m_settings.getOutput() + "/TestedSNPs.txt", TextFile.W); + List list = new ArrayList(uniqueSNPs); + out2.writeList(list); + out2.close(); + + out2 = new TextFile(m_settings.getOutput() + "/TestedSNPsNPQC.txt", TextFile.W); + list = new ArrayList(uniqueSNPsNotPassingQC); + out2.writeList(list); + + out.writeln("Number of probes in total: " + uniqueProbes.size()); + System.out.println("Number of probes in total: " + uniqueProbes.size()); + out2 = new TextFile(m_settings.getOutput() + "/TestedProbes.txt", TextFile.W); + List list2 = new ArrayList(uniqueProbes); + ArrayList list2str = new ArrayList(); + for (Integer i : list2) { + list2str.add("" + i); + } + + out2.writeList(list2str); + out.close(); + + } catch (IOException e) { + e.printStackTrace(); + } + } + + private HashSet uniqueSNPs = new HashSet(); + private HashSet uniqueSNPsNotPassingQC = new HashSet(); + private HashSet uniqueProbes = new HashSet(); + private int totalNumberOfEQTLs = 0; + + private void analyze(MetaAnalysisWorkPackage pack) { + + Integer snpnum = pack.getSNPNum(); + String snp = snps.get(snpnum).intern(); + + HashSet allowedProbes = null; + if (snpProbeSelection != null) { + allowedProbes = snpProbeSelection.get(snp); + } + + Integer[] probeList = pack.getListOfTestedProbes(); + for (int i = 0; i < probeList.length; i++) { + String probe = probes.get(probeList[i]).intern(); + if (allowedProbes == null || allowedProbes.contains(probe)) { + totalNumberOfEQTLs++; + uniqueProbes.add(probeList[i]); + } + } + + if (pack.getPassedQC()) { + uniqueSNPs.add(snps.get(snpnum).intern()); + } else { + uniqueSNPsNotPassingQC.add(snps.get(snpnum).intern()); + } + if (m_settings.isMakezscoretable() && zscoretable != null) { + try { + String zscoreout = pack.getZScoreOut(); + if (zscoreout != null) { + zscoretable.writeln(zscoreout); + pack.setZScoreOut(null); + } + } catch (IOException e) { + e.printStackTrace(); + } + } + + EQTL[] finalEQTLs = pack.getResult(); + + int nreQTLsForSNP = 0; + for (int p = 0; p < finalEQTLs.length; p++) { // if (finalEQTLs[p] != null) { //// uniqueProbes.add(finalEQTLs[p].getProbe()); // } - if (finalEQTLs[p] != null && finalEQTLs[p].getPvalue() <= pvaluethreshold.getPvalue() && (allowedProbes == null || allowedProbes.contains(finalEQTLs[p].getProbe()))) { - nreQTLsForSNP++; - // check cis / trans constraints ... + if (finalEQTLs[p] != null && finalEQTLs[p].getPvalue() <= pvaluethreshold.getPvalue() && (allowedProbes == null || allowedProbes.contains(finalEQTLs[p].getProbe()))) { + nreQTLsForSNP++; + // check cis / trans constraints ... // if(finalEQTLs[p].getProbeChr()) // boolean includeEQTL = true; // if(transAnalysis && !cisAnalysis){ @@ -212,102 +214,101 @@ private void analyze(MetaAnalysisWorkPackage pack) { // } // if (includeEQTL) { - eQTLBuffer[ctr] = finalEQTLs[p]; - ctr++; - if (ctr == eQTLBuffer.length) { - mergebuffers(ctr); - ctr = 0; + eQTLBuffer[ctr] = finalEQTLs[p]; + ctr++; + if (ctr == eQTLBuffer.length) { + mergebuffers(ctr); + ctr = 0; // System.out.println("SNPs tested: "+s+"/"+snps.size()+", threshold: "+pvaluethreshold); - } + } // } - } else { - if (finalEQTLs[p] != null) { - finalEQTLs[p].clearData(); - finalEQTLs[p] = null; - } - } - } - finalEQTLs = null; - try { - snpout.writeln(snps.get(pack.getSNPNum()) + "\t" + nreQTLsForSNP); - } catch (Exception e) { - e.printStackTrace(); - } - pack.clearData(); - pack = null; - } - - protected void mergebuffers(int ctr) { - EQTL[] toMerge = null; - if (ctr < eQTLBuffer.length) { - toMerge = new EQTL[ctr]; - System.arraycopy(eQTLBuffer, 0, toMerge, 0, ctr); - } else { - toMerge = eQTLBuffer; - } - - EQTL[] tmp = new EQTL[finalEQTLBuffer.length + toMerge.length]; - System.arraycopy(toMerge, 0, tmp, 0, toMerge.length); - System.arraycopy(finalEQTLBuffer, 0, tmp, toMerge.length, finalEQTLBuffer.length); - - java.util.Arrays.sort(tmp); - - nrInFinalBuffer += toMerge.length; - if (nrInFinalBuffer < m_settings.getFinalEQTLBufferMaxLength()) { - finalEQTLBuffer = tmp; - } else { - - finalEQTLBuffer = new EQTL[m_settings.getFinalEQTLBufferMaxLength()]; + } else { + if (finalEQTLs[p] != null) { + finalEQTLs[p].clearData(); + finalEQTLs[p] = null; + } + } + } + finalEQTLs = null; + try { + snpout.writeln(snps.get(pack.getSNPNum()) + "\t" + nreQTLsForSNP); + } catch (Exception e) { + e.printStackTrace(); + } + pack.clearData(); + pack = null; + } + + protected void mergebuffers(int ctr) { + EQTL[] toMerge = null; + if (ctr < eQTLBuffer.length) { + toMerge = new EQTL[ctr]; + System.arraycopy(eQTLBuffer, 0, toMerge, 0, ctr); + } else { + toMerge = eQTLBuffer; + } + + EQTL[] tmp = new EQTL[finalEQTLBuffer.length + toMerge.length]; + System.arraycopy(toMerge, 0, tmp, 0, toMerge.length); + System.arraycopy(finalEQTLBuffer, 0, tmp, toMerge.length, finalEQTLBuffer.length); + + java.util.Arrays.sort(tmp); + + nrInFinalBuffer += toMerge.length; + if (nrInFinalBuffer < m_settings.getFinalEQTLBufferMaxLength()) { + finalEQTLBuffer = tmp; + } else { + + finalEQTLBuffer = new EQTL[m_settings.getFinalEQTLBufferMaxLength()]; // System.out.println(finalEQTLBuffer.length+"\t"+tmp.length); - System.arraycopy(tmp, 0, finalEQTLBuffer, 0, m_settings.getFinalEQTLBufferMaxLength()); - nrInFinalBuffer = m_settings.getFinalEQTLBufferMaxLength(); - pvaluethreshold.setPvalue(finalEQTLBuffer[nrInFinalBuffer - 1].getPvalue()); - - } - } + System.arraycopy(tmp, 0, finalEQTLBuffer, 0, m_settings.getFinalEQTLBufferMaxLength()); + nrInFinalBuffer = m_settings.getFinalEQTLBufferMaxLength(); + pvaluethreshold.setPvalue(finalEQTLBuffer[nrInFinalBuffer - 1].getPvalue()); - private void writeresults() throws IOException { + } + } + private void writeresults() throws IOException { - TextFile out = null; - if (perm > 0) { - out = new TextFile(m_settings.getOutput() + "PermutedEQTLsPermutationRound" + perm + ".txt.gz", TextFile.W); - } else { - out = new TextFile(m_settings.getOutput() + "eQTLs.txt", TextFile.W); - } + TextFile out = null; + if (perm > 0) { + out = new TextFile(m_settings.getOutput() + "PermutedEQTLsPermutationRound" + perm + ".txt.gz", TextFile.W); + } else { + out = new TextFile(m_settings.getOutput() + "eQTLs.txt", TextFile.W); + } - out.write(header + "\n"); + out.write(header + "\n"); - for (int i = 0; i < finalEQTLBuffer.length; i++) { - finalEQTLBuffer[i].setDatasets(datasets); - out.writeln(finalEQTLBuffer[i].toString()); - } + for (int i = 0; i < finalEQTLBuffer.length; i++) { + finalEQTLBuffer[i].setDatasets(datasets); + out.writeln(finalEQTLBuffer[i].toString()); + } - out.close(); + out.close(); - TextFile oppositeEffects = null; - if (perm > 0) { - oppositeEffects = new TextFile(m_settings.getOutput() + "OppositeEffects-PermutedEQTLsPermutationRound" + perm + ".txt.gz", TextFile.W); - } else { - oppositeEffects = new TextFile(m_settings.getOutput() + "OppositeEffects-eQTLs.txt", TextFile.W); - } + TextFile oppositeEffects = null; + if (perm > 0) { + oppositeEffects = new TextFile(m_settings.getOutput() + "OppositeEffects-PermutedEQTLsPermutationRound" + perm + ".txt.gz", TextFile.W); + } else { + oppositeEffects = new TextFile(m_settings.getOutput() + "OppositeEffects-eQTLs.txt", TextFile.W); + } - for (int i = 0; i < finalEQTLBuffer.length; i++) { - String oppositeEffectIndicator = ""; - double pValueOverall = finalEQTLBuffer[i].getPvalue(); - double pValueAbs = finalEQTLBuffer[i].getPvalueAbs(); - if (pValueAbs < pValueOverall) { - oppositeEffectIndicator = "OppositeEffect"; - if (pValueAbs <= pValueOverall / 100000) { - oppositeEffectIndicator = "StrongOppositeEffect"; - } + for (int i = 0; i < finalEQTLBuffer.length; i++) { + String oppositeEffectIndicator = ""; + double pValueOverall = finalEQTLBuffer[i].getPvalue(); + double pValueAbs = finalEQTLBuffer[i].getPvalueAbs(); + if (pValueAbs < pValueOverall) { + oppositeEffectIndicator = "OppositeEffect"; + if (pValueAbs <= pValueOverall / 100000) { + oppositeEffectIndicator = "StrongOppositeEffect"; + } - oppositeEffects.writeln(oppositeEffectIndicator + "\t" + pValueAbs + "\t" + finalEQTLBuffer[i].getZscoreAbs() + "\t" + finalEQTLBuffer[i].toString()); - } - } - oppositeEffects.close(); - } + oppositeEffects.writeln(oppositeEffectIndicator + "\t" + pValueAbs + "\t" + finalEQTLBuffer[i].getZscoreAbs() + "\t" + finalEQTLBuffer[i].toString()); + } + } + oppositeEffects.close(); + } } diff --git a/eqtl-mapping-pipeline/src/main/java/eqtlmappingpipeline/binarymeta/meta/MetaAnalyze.java b/eqtl-mapping-pipeline/src/main/java/eqtlmappingpipeline/binarymeta/meta/MetaAnalyze.java index a286389bb..1fe9e0e1d 100644 --- a/eqtl-mapping-pipeline/src/main/java/eqtlmappingpipeline/binarymeta/meta/MetaAnalyze.java +++ b/eqtl-mapping-pipeline/src/main/java/eqtlmappingpipeline/binarymeta/meta/MetaAnalyze.java @@ -6,25 +6,25 @@ // //import eqtlmappingpipeline.gpio.binary.Dataset; -import umcg.genetica.io.trityper.probeannotation.ProbeTranslation; import eqtlmappingpipeline.binarymeta.meta.graphics.ZScorePlot; +import eqtlmappingpipeline.metaqtl3.FDR; +import eqtlmappingpipeline.metaqtl3.graphics.EQTLDotPlot; +import umcg.genetica.io.Gpio; +import umcg.genetica.io.text.TextFile; +import umcg.genetica.io.trityper.EQTL; import umcg.genetica.io.trityper.bin.BinaryResultDataset; import umcg.genetica.io.trityper.bin.BinaryResultProbe; import umcg.genetica.io.trityper.bin.BinaryResultSNP; -import java.util.Arrays; +import umcg.genetica.io.trityper.probeannotation.ProbeTranslation; import umcg.genetica.math.stats.Descriptives; -import eqtlmappingpipeline.metaqtl3.FDR; -import eqtlmappingpipeline.metaqtl3.graphics.EQTLDotPlot; import java.io.IOException; import java.util.ArrayList; +import java.util.Arrays; import java.util.HashMap; import java.util.HashSet; import java.util.concurrent.LinkedBlockingQueue; import java.util.zip.DataFormatException; -import umcg.genetica.io.Gpio; -import umcg.genetica.io.text.TextFile; -import umcg.genetica.io.trityper.EQTL; ///** // * @@ -162,15 +162,15 @@ protected void initdatasets(String[] locations, int perm, int dToUse) throws IOE int ctr = 0; String[] felems = stf.readLineElems(TextFile.tab); while (felems != null) { - String snp = felems[0]; - String probe = felems[1]; + String snp = felems[0].intern(); + String probe = felems[1].intern(); HashSet probesForSNP = selectedSNPProbePairs.get(snp); if (probesForSNP == null) { probesForSNP = new HashSet(); } - probesForSNP.add(probe); - selectedSNPs.add(snp); - selectedSNPProbePairs.put(snp, probesForSNP); + probesForSNP.add(probe.intern()); + selectedSNPs.add(snp.intern()); + selectedSNPProbePairs.put(snp.intern(), probesForSNP); ctr++; felems = stf.readLineElems(TextFile.tab); } @@ -219,11 +219,11 @@ protected void initdatasets(String[] locations, int perm, int dToUse) throws IOE } for (BinaryResultSNP s : dsSNPs) { - if (!uniqueSNPs.contains(s.getName()) && (selectedSNPs == null || selectedSNPs.contains(s.getName()))) { - snps.add(s.getName()); + if (!uniqueSNPs.contains(s.getName().intern()) && (selectedSNPs == null || selectedSNPs.contains(s.getName().intern()))) { + snps.add(s.getName().intern()); snpChr.add(s.getChr()); snpChrPos.add(s.getChrpos()); - uniqueSNPs.add(s.getName()); + uniqueSNPs.add(s.getName().intern()); } } diff --git a/eqtl-mapping-pipeline/src/main/java/eqtlmappingpipeline/binarymeta/meta/cis/BinaryUnzipTask.java b/eqtl-mapping-pipeline/src/main/java/eqtlmappingpipeline/binarymeta/meta/cis/BinaryUnzipTask.java index 623a66e09..f2d3bd2b3 100644 --- a/eqtl-mapping-pipeline/src/main/java/eqtlmappingpipeline/binarymeta/meta/cis/BinaryUnzipTask.java +++ b/eqtl-mapping-pipeline/src/main/java/eqtlmappingpipeline/binarymeta/meta/cis/BinaryUnzipTask.java @@ -4,103 +4,103 @@ */ package eqtlmappingpipeline.binarymeta.meta.cis; +import umcg.genetica.containers.Pair; +import umcg.genetica.io.trityper.bin.BinaryResultDataset; +import umcg.genetica.io.trityper.bin.BinaryResultSNP; + import java.nio.ByteBuffer; import java.util.HashMap; import java.util.concurrent.Callable; import java.util.zip.DataFormatException; import java.util.zip.Inflater; -import umcg.genetica.containers.Pair; -import umcg.genetica.io.trityper.bin.BinaryResultDataset; -import umcg.genetica.io.trityper.bin.BinaryResultSNP; /** - * * @author harm-jan */ public class BinaryUnzipTask implements Callable>> { - private final int snp; - private BinaryResultDataset data; - private final int numprobes; - private final Inflater inflater = new Inflater(); - private boolean poison; - - public BinaryUnzipTask(int snp, BinaryResultDataset data, int numprobes) { - this.snp = snp; - this.data = data; - this.numprobes = numprobes; - } - - BinaryUnzipTask(int snp, int nrProbes, BinaryResultDataset dataset, BinaryResultSNP[] snps) { - throw new UnsupportedOperationException("Not yet implemented"); - } - - @Override - public Pair> call() throws Exception { - if (snp < 0) { - return new Pair>(-1, null); - } - BinaryResultSNP[] snps = data.getSnps(); - BinaryResultSNP snpObject = snps[snp]; - long pointer = snpObject.getzScoreIndex(); - long nextpointer = -1; - - if (snp + 1 < snps.length) { - BinaryResultSNP snpObject2 = snps[snp + 1]; - nextpointer = snpObject2.getzScoreIndex(); - } - - byte[] bindata = data.getMatrix().readDeflated(pointer, nextpointer, data.getNumProbes()); - HashMap dataUnzipped = inflate(bindata, data.getNumProbes()); - bindata = null; + private final int snp; + private BinaryResultDataset data; + private final int numprobes; + private final Inflater inflater = new Inflater(); + private boolean poison; - return new Pair>(snp, dataUnzipped); - } + public BinaryUnzipTask(int snp, BinaryResultDataset data, int numprobes) { + this.snp = snp; + this.data = data; + this.numprobes = numprobes; + } - private HashMap inflate(byte[] buffer, int numElems) throws DataFormatException { - inflater.setInput(buffer); - inflater.finished(); - byte[] decompressed = new byte[numElems * 4]; - inflater.inflate(decompressed); + BinaryUnzipTask(int snp, int nrProbes, BinaryResultDataset dataset, BinaryResultSNP[] snps) { + throw new UnsupportedOperationException("Not yet implemented"); + } - long actuallydecompressed = inflater.getBytesWritten(); - if (actuallydecompressed != numElems * 4) { - throw new DataFormatException("IO Error: uncompressed data does not correspond to the size requested\t" + actuallydecompressed + "\t" + numElems * 4); + @Override + public Pair> call() throws Exception { + if (snp < 0) { + return new Pair>(-1, null); + } + BinaryResultSNP[] snps = data.getSnps(); + BinaryResultSNP snpObject = snps[snp]; + long pointer = snpObject.getzScoreIndex(); + long nextpointer = -1; + + if (snp + 1 < snps.length) { + BinaryResultSNP snpObject2 = snps[snp + 1]; + nextpointer = snpObject2.getzScoreIndex(); + } + + byte[] bindata = data.getMatrix().readDeflated(pointer, nextpointer, data.getNumProbes()); + HashMap dataUnzipped = inflate(bindata, data.getNumProbes()); + bindata = null; + + return new Pair>(snp, dataUnzipped); } - inflater.reset(); - - ByteBuffer bytebuffer = ByteBuffer.wrap(decompressed); - Float[] output = new Float[numElems]; - int ctr = 0; - HashMap results = new HashMap(); - for (int i = 0; i < numElems; i++) { - Float f = bytebuffer.getFloat(); - if (f.isNaN()) { - f = null; - } else { - ctr++; - results.put(i, f); - } + private HashMap inflate(byte[] buffer, int numElems) throws DataFormatException { + inflater.setInput(buffer); + inflater.finished(); + byte[] decompressed = new byte[numElems * 4]; + inflater.inflate(decompressed); + + long actuallydecompressed = inflater.getBytesWritten(); + if (actuallydecompressed != numElems * 4) { + throw new DataFormatException("IO Error: uncompressed data does not correspond to the size requested\t" + actuallydecompressed + "\t" + numElems * 4); + } + + inflater.reset(); + + ByteBuffer bytebuffer = ByteBuffer.wrap(decompressed); + Float[] output = new Float[numElems]; + int ctr = 0; + HashMap results = new HashMap(); + for (int i = 0; i < numElems; i++) { + Float f = bytebuffer.getFloat(); + if (f.isNaN()) { + f = null; + } else { + ctr++; + results.put(i, f); + } // output[i] = f; - } + } - decompressed = null; - buffer = null; + decompressed = null; + buffer = null; - if (ctr == 0) { - return null; - } else { - return results; + if (ctr == 0) { + return null; + } else { + return results; + } } - } - void setIsPoison() { - poison = true; - } + void setIsPoison() { + poison = true; + } - boolean isPoison() { - return poison; - } + boolean isPoison() { + return poison; + } } diff --git a/eqtl-mapping-pipeline/src/main/java/eqtlmappingpipeline/interactionanalysis/InteractionAnalysisMultiThreaded.java b/eqtl-mapping-pipeline/src/main/java/eqtlmappingpipeline/interactionanalysis/InteractionAnalysisMultiThreaded.java index 8b9ddd8a6..0d7f93e62 100644 --- a/eqtl-mapping-pipeline/src/main/java/eqtlmappingpipeline/interactionanalysis/InteractionAnalysisMultiThreaded.java +++ b/eqtl-mapping-pipeline/src/main/java/eqtlmappingpipeline/interactionanalysis/InteractionAnalysisMultiThreaded.java @@ -5,36 +5,21 @@ package eqtlmappingpipeline.interactionanalysis; import eqtlmappingpipeline.Main; -import org.molgenis.genotype.Allele; -import umcg.genetica.graphics.ScatterPlot; import eqtlmappingpipeline.normalization.Normalizer; import gnu.trove.map.hash.THashMap; - -import java.io.File; -import java.io.IOException; -import java.util.*; -import java.util.concurrent.CompletionService; -import java.util.concurrent.ExecutorCompletionService; -import java.util.concurrent.ExecutorService; -import java.util.concurrent.Executors; import org.apache.commons.math3.stat.correlation.SpearmansCorrelation; -import org.rosuda.REngine.REXP; -import org.rosuda.REngine.RFactor; +import org.molgenis.genotype.Allele; import org.rosuda.REngine.Rserve.RConnection; import org.rosuda.REngine.Rserve.RserveException; import umcg.genetica.console.ProgressBar; import umcg.genetica.containers.Pair; +import umcg.genetica.graphics.ScatterPlot; import umcg.genetica.io.Gpio; import umcg.genetica.io.binInteraction.*; import umcg.genetica.io.binInteraction.gene.BinaryInteractionGeneCreator; import umcg.genetica.io.binInteraction.variant.BinaryInteractionVariantCreator; import umcg.genetica.io.text.TextFile; -import umcg.genetica.io.trityper.SNP; -import umcg.genetica.io.trityper.SNPLoader; -import umcg.genetica.io.trityper.TriTyperExpressionData; -import umcg.genetica.io.trityper.TriTyperGeneticalGenomicsDataset; -import umcg.genetica.io.trityper.TriTyperGeneticalGenomicsDatasetSettings; -import umcg.genetica.io.trityper.TriTyperGenotypeData; +import umcg.genetica.io.trityper.*; import umcg.genetica.math.matrix.DoubleMatrixDataset; import umcg.genetica.math.stats.Correlation; import umcg.genetica.math.stats.Descriptives; @@ -42,8 +27,15 @@ import umcg.genetica.math.stats.QuantileNormalization; import umcg.genetica.math.stats.concurrent.ConcurrentCorrelation; +import java.io.File; +import java.io.IOException; +import java.util.*; +import java.util.concurrent.CompletionService; +import java.util.concurrent.ExecutorCompletionService; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; + /** - * * @author harm-jan Multi-threaded implementation of the OLS model */ public class InteractionAnalysisMultiThreaded { @@ -297,7 +289,7 @@ public void prepareDataForCelltypeSpecificEQTLMapping(String inexpraw, String ou cellTypeSpecificDataset.transposeDataset(); // calculate first Principal Component over the cell type specific probe matrix... - PCAResults = n.calculatePCA(cellTypeSpecificDataset, celltypeSpecificCorrelationMatrix, outdirectory + "CellTypeSpecificProbePCA", 1); + PCAResults = n.calculatePCA(cellTypeSpecificDataset, celltypeSpecificCorrelationMatrix, outdirectory + "CellTypeSpecificProbePCA", cellTypeSpecificProbeDatasetRowNames.size()); // 10. PC1 scores: cell specific proxy -- write to file for future use... DoubleMatrixDataset cellSpecificPCScores = PCAResults.getLeft(); @@ -398,7 +390,7 @@ public void prepareDataForCelltypeSpecificEQTLMapping(String inexpraw, String ou public void runInteractionAnalysis(String inExpPCCorrected, String covariateFile, String ingt, String gte, String snpprobecombinationfile, Integer nrThreads, String out, - String covariateList, boolean sem, boolean robustSE, boolean fullStats, boolean binaryOutput, String cohort) throws IOException, Exception { + String covariateList, boolean forceNormalDistribution, boolean robustSE, boolean fullStats, boolean binaryOutput, String cohort) throws IOException, Exception { String probeannot = null; double mafthreshold = 0.05; @@ -409,19 +401,11 @@ public void runInteractionAnalysis(String inExpPCCorrected, String covariateFile throw new IllegalArgumentException("ERROR: please provide snpprobe combination file"); } - if (robustSE || sem) { + if (robustSE) { System.out.println("Running tests for robust standard errors. Now testing R connection"); try { RConnection rConnection = new RConnection(); -// rConnection.voidEval("install.packages('sandwich')"); - System.out.println("R server found: "+rConnection.getServerVersion()); -// REXP result = rConnection.eval("library(sandwich,logical.return=TRUE)"); -// boolean sandwichpresent = result.asBool(); -// if(!sandwichpresent){ -// System.err.println("Library sandwich not installed, which is required for robust SE estimation."); -// } - - + System.out.println("R server found: " + rConnection.getServerVersion()); rConnection.close(); } catch (RserveException ex) { System.err.println(ex.getMessage()); @@ -482,7 +466,6 @@ public void runInteractionAnalysis(String inExpPCCorrected, String covariateFile Set covariateHash = null; if (covariateList != null) { TextFile tfcovariatelist = new TextFile(covariateList, TextFile.R); - covariateHash = tfcovariatelist.readAsSet(0, TextFile.tab); tfcovariatelist.close(); } @@ -514,6 +497,8 @@ public void runInteractionAnalysis(String inExpPCCorrected, String covariateFile // since the number of samples has changed, we might need to reperform q-norm and log2 transform... // it may be a good idea to remove these last steps from the normalization step.. + + // investigate which SNPs to run.. LinkedHashSet> snpProbeCombinationsToTest = new LinkedHashSet>(); HashSet snpsPassingQC = new HashSet(); @@ -541,7 +526,7 @@ public void runInteractionAnalysis(String inExpPCCorrected, String covariateFile snpsPassingQC.add(snp); snpProbeCombinationsToTest.add(p); - if (binaryOutput){ + if (binaryOutput) { snpStats.put(snp, snpObj); } } else { @@ -571,36 +556,45 @@ public void runInteractionAnalysis(String inExpPCCorrected, String covariateFile ArrayList rowNames = new ArrayList(); rowNames.addAll(covariateData.rowObjects); -// if (cellcounts != null) { -// rowNames.add("CellTypeSNPZScore"); -// rowNames.add("CellTypeZScore"); -// rowNames.add("CellTypeInteractionZScore"); -// rowNames.add("MainEffectZScore"); -// } + Correlation.correlationToZScore(covariateData.nrCols); -// DoubleMatrixDataset datasetOut = new DoubleMatrixDataset(rowNames.size(), snpProbeCombinationsToTest.size()); System.out.println("Output matrix will be " + snpProbeCombinationsToTest.size() + "(x5) x " + rowNames.size()); -// datasetOut.rowObjects = rowNames; -// ArrayList colNames = new ArrayList(); double[][] expressiondata = pcCorrectedExpressionData.getMatrix(); int[] wgaId = ds.getExpressionToGenotypeIdArray(); + if (forceNormalDistribution) { + System.out.println("Forcing normal distribution on covariate and expression data"); + System.out.println("Warning: normal distribution is forced before covariate samples are matched to genotypes."); + System.out.println("Make sure that the number of samples between samples and covariates are more or less equal"); + System.out.println("Currently: " + pcCorrectedExpressionData.getColNames().length + " for expression and " + covariateData.nrCols + " for covariates"); + + Normalizer norm = new Normalizer(); + + for (int row = 0; row < expressiondata.length; row++) { + expressiondata[row] = norm.forceNormal(expressiondata[row]); + } + + double[][] covariates = covariateData.getRawData(); + for (int row = 0; row < expressiondata.length; row++) { + covariates[row] = norm.forceNormal(covariates[row]); + } + covariateData.setRawData(covariates); + System.out.println("Done. And you have been warned."); + } + + TextFile snpFile = new TextFile(out + "SNPSummaryStatistics.txt", TextFile.W); snpFile.writeln("SNP\tChr\tChrPos\tAlleles\tMinorAllele\tMAF\tCallRate\tHWE\tGenotypesCalled"); -// TextFile proxyEffectFile = null; -// if (cellcounts != null) { -// proxyEffectFile = new TextFile(out + "CelltypeSpecificEQTLEffects.txt", TextFile.W); -// proxyEffectFile.writeln("#/#\tSNP\tProbe\tnrCalled\tCorrelation\tanovaFTestP\tbetaInteraction\tseInteraction\ttInteraction\tpValueInteraction\tzScoreInteraction"); -// } String[] snpsPassingQCArr = snpsPassingQC.toArray(new String[0]); int nrSubmitted = 0; if (nrThreads == null) { nrThreads = Runtime.getRuntime().availableProcessors(); } + System.out.println("Running with: " + nrThreads + " threads"); ExecutorService threadPool = Executors.newFixedThreadPool(nrThreads); @@ -611,18 +605,18 @@ public void runInteractionAnalysis(String inExpPCCorrected, String covariateFile BinaryInteractionFile binaryInteractionFile = null; // Write binary output header - if (binaryOutput){ + if (binaryOutput) { File binaryOutFile = new File(out + "InteractionResults.binary.dat"); - String description = "Genotypes: " + ingt + - " Expresion: " + inExpPCCorrected + - " GTE: " + gte + - " Covariates: " + covariateFile + - " Covariates List: " + covariateList + + String description = "Genotypes: " + ingt + + " Expresion: " + inExpPCCorrected + + " GTE: " + gte + + " Covariates: " + covariateFile + + " Covariates List: " + covariateList + " SNP-probes: " + snpprobecombinationfile + " Software version: " + Main.VERSION; - binaryInteractionFile = createBinaryOutputHeader(binaryOutFile, snpsPassingQCArr, snpStats, snpProbeCombinationsToTest, covariateData, expressionIndividualsInPCCorrectedData, cohort, description); - } - else{ + binaryInteractionFile = createBinaryOutputHeader(binaryOutFile, snpsPassingQCArr, snpStats, + snpProbeCombinationsToTest, covariateData, expressionIndividualsInPCCorrectedData, cohort, description); + } else { System.out.println("Output will be written to: " + out + "InteractionResults.txt"); outputFile = new TextFile(out + "InteractionResults.txt", TextFile.W); String outputheader = "SNP\tProbe\tCovariate\tZ-SNP\tZ-Cov\tZ-Interaction\tZ-Main\tZ-Interaction-Flipped\tN\tRSquared"; @@ -639,7 +633,9 @@ public void runInteractionAnalysis(String inExpPCCorrected, String covariateFile outputFile.writeln(outputheader); } - ProgressBar pb = new ProgressBar(snpProbeCombinationsToTest.size(), "Now testing available eQTL effects for cell type specificity."); + + + ProgressBar pb = new ProgressBar(snpProbeCombinationsToTest.size(), "Now testing available eQTL effects for interactions."); int maxbuffer = (nrThreads * 8); for (int i = 0; i < snpsPassingQCArr.length; i++) { String snp = snpsPassingQCArr[i]; @@ -668,7 +664,6 @@ public void runInteractionAnalysis(String inExpPCCorrected, String covariateFile expInds, covariateData, pcCorrectedExpressionData, - sem, robustSE, fullStats ); @@ -730,7 +725,7 @@ public void runInteractionAnalysis(String inExpPCCorrected, String covariateFile snpFile.close(); - if (binaryOutput){ + if (binaryOutput) { binaryInteractionFile.finalizeWriting(); System.out.println("Interaction results writer buffer flushed: " + binaryInteractionFile.getInteractionWriteBufferFlushed()); System.out.println("QTL results writer buffer flushed: " + binaryInteractionFile.getQtlWriteBufferFlushed()); @@ -738,14 +733,13 @@ public void runInteractionAnalysis(String inExpPCCorrected, String covariateFile System.out.println("Total number of writen interactions: " + binaryInteractionFile.getInteractionZscoresSet()); System.out.println("Number of QTL z-scores: " + binaryInteractionFile.getVariantCount()); binaryInteractionFile.close(); - - if(binaryInteractionFile.getInteractionZscoresSet() != binaryInteractionFile.getTotalNumberInteractions()){ + + if (binaryInteractionFile.getInteractionZscoresSet() != binaryInteractionFile.getTotalNumberInteractions()) { System.out.println("WARNING!!! written and expected interactions not the same"); System.err.println("WARNING!!! written and expected interactions not the same"); } - - } - else{ + + } else { outputFile.close(); } // datasetOut.colObjects = colNames; @@ -799,8 +793,8 @@ private void processResult(InteractionAnalysisResults result, TextFile outputFil double mainZ = maineffectZResultMatrix[e][c]; builder.append(mainZ); - if(mainZ < 0){ - interactionZ*=-1; + if (mainZ < 0) { + interactionZ *= -1; } builder.append("\t"); builder.append(interactionZ); @@ -825,8 +819,8 @@ private void processResult(InteractionAnalysisResults result, TextFile outputFil builder.append("\t"); builder.append(interactionSE[e][c]); - if(mainZ<0){ - interactionB*=-1; + if (mainZ < 0) { + interactionB *= -1; } builder.append("\t"); builder.append(interactionB); @@ -859,7 +853,7 @@ private BinaryInteractionFile processResultWriteBinaryOutput(InteractionAnalysis //main effect z-score double mainZ = maineffectZResultMatrix[e][0]; - BinaryInteractionQtlZscores qtlZscore = new BinaryInteractionQtlZscores(new double[] {mainZ}, new int[] {numSamples}); + BinaryInteractionQtlZscores qtlZscore = new BinaryInteractionQtlZscores(new double[]{mainZ}, new int[]{numSamples}); createdInteractions.setQtlResults(snp, gene, qtlZscore); for (int c = 0; c < SNPZResultMatrix[e].length; c++) { String covariate = covariateData.rowObjects.get(c); @@ -871,8 +865,8 @@ private BinaryInteractionFile processResultWriteBinaryOutput(InteractionAnalysis final double[] zscoreCovariateCohort = {covariateZResultMatrix[e][c]}; final double[] zscoreInteractionCohort = {interactionZ}; final double[] rSquaredCohort = {rsquared[e][c]}; - if(mainZ < 0){ - interactionZ*=-1; + if (mainZ < 0) { + interactionZ *= -1; } final double[] zscoreInteractionFlippedCohort = {interactionZ}; @@ -888,7 +882,7 @@ private BinaryInteractionFile processResultWriteBinaryOutput(InteractionAnalysis private BinaryInteractionFile createBinaryOutputHeader(File binaryOutFile, String[] snpsPassingQCArr, HashMap snpStats, LinkedHashSet> snpProbeCombinationsToTest, DoubleMatrixDataset covariateData, HashSet expressionIndividualsInPCCorrectedData, String cohort, String description) throws BinaryInteractionFileException, IOException { LinkedHashSet geneIds = new LinkedHashSet(); System.out.println("snpProbeCombinationsToTest size: " + snpProbeCombinationsToTest.size()); - for (Pair snpProbePair : snpProbeCombinationsToTest){ + for (Pair snpProbePair : snpProbeCombinationsToTest) { String gene = snpProbePair.getRight(); geneIds.add(gene); } @@ -898,7 +892,7 @@ private BinaryInteractionFile createBinaryOutputHeader(File binaryOutFile, Strin //fill variants BinaryInteractionVariantCreator[] variants = new BinaryInteractionVariantCreator[numSNPs]; - for (int snpIdx = 0; snpIdx < numSNPs; snpIdx++ ){ + for (int snpIdx = 0; snpIdx < numSNPs; snpIdx++) { String snpId = snpsPassingQCArr[snpIdx]; SNP snpObj = snpStats.get(snpId); @@ -910,13 +904,14 @@ private BinaryInteractionFile createBinaryOutputHeader(File binaryOutFile, Strin else majorAllele = alleles[0]; - variants[snpIdx] = new BinaryInteractionVariantCreator(snpId, snpObj.getChr() + "", snpObj.getChrPos(), Allele.create((char) majorAllele), Allele.create((char)minorAllele)); + variants[snpIdx] = new BinaryInteractionVariantCreator(snpId, snpObj.getChr() + "", snpObj.getChrPos(), Allele.create((char) majorAllele), Allele.create((char) minorAllele)); + } //fill genes BinaryInteractionGeneCreator[] genes = new BinaryInteractionGeneCreator[numGenes]; int geneIdx = 0; - for (String gene : geneIds){ + for (String gene : geneIds) { genes[geneIdx] = new BinaryInteractionGeneCreator(gene); geneIdx++; } @@ -927,7 +922,7 @@ private BinaryInteractionFile createBinaryOutputHeader(File binaryOutFile, Strin //fill cohort int numSamples = 0; - for (String s : expressionIndividualsInPCCorrectedData){ + for (String s : expressionIndividualsInPCCorrectedData) { if (covariateData.hashCols.containsKey(s)) numSamples++; } @@ -936,10 +931,10 @@ private BinaryInteractionFile createBinaryOutputHeader(File binaryOutFile, Strin // initialize BinaryInteractionFileCreator creator = new BinaryInteractionFileCreator(binaryOutFile, variants, genes, cohorts, covariates, true, false, true, true); - + creator.setDescription(description); - for (Pair eqtl : snpProbeCombinationsToTest){ + for (Pair eqtl : snpProbeCombinationsToTest) { creator.addTestedVariantGene(eqtl.getLeft(), eqtl.getRight()); } BinaryInteractionFile createdInteractions = creator.create(); diff --git a/eqtl-mapping-pipeline/src/main/java/eqtlmappingpipeline/interactionanalysis/InteractionAnalysisTask.java b/eqtl-mapping-pipeline/src/main/java/eqtlmappingpipeline/interactionanalysis/InteractionAnalysisTask.java index 6fdf53322..b74ac36d5 100644 --- a/eqtl-mapping-pipeline/src/main/java/eqtlmappingpipeline/interactionanalysis/InteractionAnalysisTask.java +++ b/eqtl-mapping-pipeline/src/main/java/eqtlmappingpipeline/interactionanalysis/InteractionAnalysisTask.java @@ -5,8 +5,6 @@ package eqtlmappingpipeline.interactionanalysis; import cern.jet.random.tdouble.StudentT; -import java.util.ArrayList; -import java.util.concurrent.Callable; import org.apache.commons.math3.linear.SingularMatrixException; import org.apache.commons.math3.stat.regression.OLSMultipleLinearRegression; import org.rosuda.REngine.REXPMismatchException; @@ -20,308 +18,258 @@ import umcg.genetica.io.trityper.util.ChrAnnotation; import umcg.genetica.math.matrix.DoubleMatrixDataset; import umcg.genetica.math.stats.Correlation; -import umcg.genetica.math.stats.Descriptives; -import umcg.genetica.math.stats.Normalization; + +import java.util.ArrayList; +import java.util.concurrent.Callable; /** - * * @author harmjan */ public class InteractionAnalysisTask implements Callable { - private SNP eQTLSNPObj; - private final double[][] pcCorrectedExpressionData; - private final int[] wgaId; - private final String[] expInds; - private final DoubleMatrixDataset covariateData; - private final TriTyperExpressionData expressionData; - private final ArrayList> eQTLsForSNP; - - private final boolean sandwich; - private final boolean provideFullStats; - - private final Pair NAN_PAIR = new Pair(Double.NaN, Double.NaN); - private final boolean sem; - - public InteractionAnalysisTask(SNP snpObj, ArrayList> eQTLsForSNP, double[][] pcCorrectedData, - int[] wgaId, - String[] expInds, DoubleMatrixDataset covariateData, - TriTyperExpressionData expressionData, boolean sem, boolean robustSE, boolean provideFullStats) { - this.eQTLSNPObj = snpObj; - this.eQTLsForSNP = eQTLsForSNP; - this.pcCorrectedExpressionData = pcCorrectedData; - this.wgaId = wgaId; - this.expInds = expInds; - this.expressionData = expressionData; - this.covariateData = covariateData; - this.sandwich = robustSE; - this.provideFullStats = provideFullStats; - this.sem = sem; - } - - @Override - public InteractionAnalysisResults call() throws Exception { - - ArrayList> eQTLsTested = new ArrayList>(); - - int nrTotalCovariates = covariateData.nrRows; - - double[][] interactionZScoreMatrix = new double[eQTLsForSNP.size()][nrTotalCovariates]; - - double[][] SNPZResultMatrix = new double[eQTLsForSNP.size()][nrTotalCovariates]; - double[][] covariateZResultMatrix = new double[eQTLsForSNP.size()][nrTotalCovariates]; - double[][] maineffectZResultMatrix = new double[eQTLsForSNP.size()][nrTotalCovariates]; - double[][] interactionBeta = null; - - double[][] interactionSE = null; - double[][] mainBeta = null; - double[][] mainSE = null; - double[][] covariateBeta = null; - double[][] covariateSE = null; - int[][] nMatrix = new int[eQTLsForSNP.size()][nrTotalCovariates]; - double[][] rsquaredMatrix = new double[eQTLsForSNP.size()][nrTotalCovariates]; - if (provideFullStats) { - - interactionBeta = new double[eQTLsForSNP.size()][nrTotalCovariates]; - interactionSE = new double[eQTLsForSNP.size()][nrTotalCovariates]; - mainBeta = new double[eQTLsForSNP.size()][nrTotalCovariates]; - mainSE = new double[eQTLsForSNP.size()][nrTotalCovariates]; - covariateBeta = new double[eQTLsForSNP.size()][nrTotalCovariates]; - covariateSE = new double[eQTLsForSNP.size()][nrTotalCovariates]; - } - - //We are using a coding system that uses the minor allele. - //If allele2 is not the minor allele, change the sign of the results we will output. - double signInteractionEffectDirection = 1; - if (eQTLSNPObj.getAlleles()[1] == eQTLSNPObj.getMinorAllele()) { - signInteractionEffectDirection = -1; - } - - String qcString = null; - Integer nrGenotypesCalled = null; - - org.apache.commons.math3.distribution.FDistribution fDist = null; - cern.jet.random.tdouble.engine.DoubleRandomEngine randomEngine = null; - cern.jet.random.tdouble.StudentT tDistColt = null; - - OLSMultipleLinearRegression regressionFullWithInteraction = new OLSMultipleLinearRegression(); - - for (int e = 0; e < eQTLsForSNP.size(); e++) { - Pair eqtl = eQTLsForSNP.get(e); - String eQTLProbeName = eqtl.getRight(); - - eQTLsTested.add(eqtl); - - Integer eQTLProbeId = expressionData.getProbeToId().get(eQTLProbeName); - - double[] valsX = eQTLSNPObj.selectGenotypes(wgaId, true, true); // this is sorted on expression ID - double[] valsY = pcCorrectedExpressionData[eQTLProbeId]; //Expression level - - for (int covariate = 0; covariate < nrTotalCovariates; covariate++) { - double[] tmpVarCelCount = new double[valsY.length]; - - for (int i = 0; i < tmpVarCelCount.length; i++) { - String sampleName = expInds[i]; - Integer individualIdInCovariateData = covariateData.hashCols.get(sampleName); - if (individualIdInCovariateData != null) { - // presorting greatly speeds this stuff up - tmpVarCelCount[i] = covariateData.rawData[covariate][individualIdInCovariateData]; - } else { - tmpVarCelCount[i] = Double.NaN; - } - } - - //Check whether all the expression samples have a genotype and a cell count... - int nrCalled = 0; - for (int i = 0; i < wgaId.length; i++) { - if (wgaId[i] != -1 && !Double.isNaN(tmpVarCelCount[i]) && valsX[i] != -1) { - nrCalled++; - } - } - - // THIS WILL GIVE ERRONEOUS VALUES WHEN THERE ARE MISSING - // VALUES IN VALSY THE NEXT TIME THIS SNP IS TESTED!! - // this value is required for subsequent meta-analysis.. fix for altering sample sizes (take smallest size / omit missing values) - // in stead use the value for N that is now in the standard output. - double[] genotypesCalled = new double[nrCalled]; - if (qcString == null) { - qcString = eQTLSNPObj.getName() + "\t" + ChrAnnotation.parseByte(eQTLSNPObj.getChr()) + "\t" + eQTLSNPObj.getChrPos() + "\t" + BaseAnnot.toString(eQTLSNPObj.getAlleles()[0]) + "/" + BaseAnnot.toString(eQTLSNPObj.getAlleles()[1]) + "\t" + BaseAnnot.toString(eQTLSNPObj.getMinorAllele()) + "\t" + eQTLSNPObj.getMAF() + "\t" + eQTLSNPObj.getCR() + "\t" + eQTLSNPObj.getHWEP() + "\t" + genotypesCalled.length; - nrGenotypesCalled = genotypesCalled.length; - } else if (genotypesCalled.length != nrGenotypesCalled) { - - System.err.println("ERROR: the number of available values has changed. Does your gene expression data or cell count file contain missing values?"); - System.exit(0); - } - - double zScoreInteraction = 0; - double zScoreSNP = 0; - double zScoreCovariate = 0; - double mainZ = 0; - - double betaInteraction = 0; - double seInteraction = 0; - double betaSNP = 0; - double seSNP = 0; - double betaCovariate = 0; - double seCovariate = 0; - - double rsquared = 0; - - if (sandwich || sem) { - RConnection rConnection = null; - // this code is very suboptimal and is here for validation purposes only anyway - try { - rConnection = new RConnection(); -// rConnection.voidEval("install.packages('sandwich')"); - if (sandwich) { - rConnection.voidEval("library(sandwich)"); - } else { - rConnection.voidEval("library(lavaan)"); - } - } catch (RserveException ex) { - System.err.println(ex.getMessage()); - rConnection = null; - } - - if (rConnection == null) { - System.err.println("Error: using R connection but none found"); - return null; - } - - try { - if (rConnection.isConnected()) { - double[] olsY = new double[nrCalled]; //Ordinary least squares: Our gene expression - double[] olsX = new double[nrCalled]; - double[] covariateValues = new double[nrCalled]; + private SNP eQTLSNPObj; + private final double[][] pcCorrectedExpressionData; + private final int[] wgaId; + private final String[] expInds; + private final DoubleMatrixDataset covariateData; + private final TriTyperExpressionData expressionData; + private final ArrayList> eQTLsForSNP; + + private final boolean sandwich; + private final boolean provideFullStats; + + private final Pair NAN_PAIR = new Pair(Double.NaN, Double.NaN); + + + public InteractionAnalysisTask(SNP snpObj, ArrayList> eQTLsForSNP, double[][] pcCorrectedData, + int[] wgaId, + String[] expInds, DoubleMatrixDataset covariateData, + TriTyperExpressionData expressionData, boolean robustSE, boolean provideFullStats) { + this.eQTLSNPObj = snpObj; + this.eQTLsForSNP = eQTLsForSNP; + this.pcCorrectedExpressionData = pcCorrectedData; + this.wgaId = wgaId; + this.expInds = expInds; + this.expressionData = expressionData; + this.covariateData = covariateData; + this.sandwich = robustSE; + this.provideFullStats = provideFullStats; + + } + + + @Override + public InteractionAnalysisResults call() throws Exception { + + ArrayList> eQTLsTested = new ArrayList>(); + + int nrTotalCovariates = covariateData.nrRows; + + double[][] interactionZScoreMatrix = new double[eQTLsForSNP.size()][nrTotalCovariates]; + + double[][] SNPZResultMatrix = new double[eQTLsForSNP.size()][nrTotalCovariates]; + double[][] covariateZResultMatrix = new double[eQTLsForSNP.size()][nrTotalCovariates]; + double[][] maineffectZResultMatrix = new double[eQTLsForSNP.size()][nrTotalCovariates]; + double[][] interactionBeta = null; + + double[][] interactionSE = null; + double[][] mainBeta = null; + double[][] mainSE = null; + double[][] covariateBeta = null; + double[][] covariateSE = null; + int[][] nMatrix = new int[eQTLsForSNP.size()][nrTotalCovariates]; + double[][] rsquaredMatrix = new double[eQTLsForSNP.size()][nrTotalCovariates]; + if (provideFullStats) { + + interactionBeta = new double[eQTLsForSNP.size()][nrTotalCovariates]; + interactionSE = new double[eQTLsForSNP.size()][nrTotalCovariates]; + mainBeta = new double[eQTLsForSNP.size()][nrTotalCovariates]; + mainSE = new double[eQTLsForSNP.size()][nrTotalCovariates]; + covariateBeta = new double[eQTLsForSNP.size()][nrTotalCovariates]; + covariateSE = new double[eQTLsForSNP.size()][nrTotalCovariates]; + } + + //We are using a coding system that uses the minor allele. + //If allele2 is not the minor allele, change the sign of the results we will output. + double signInteractionEffectDirection = 1; + if (eQTLSNPObj.getAlleles()[1] == eQTLSNPObj.getMinorAllele()) { + signInteractionEffectDirection = -1; + } + + String qcString = null; + Integer nrGenotypesCalled = null; + + org.apache.commons.math3.distribution.FDistribution fDist = null; + cern.jet.random.tdouble.engine.DoubleRandomEngine randomEngine = null; + cern.jet.random.tdouble.StudentT tDistColt = null; + + OLSMultipleLinearRegression regressionFullWithInteraction = new OLSMultipleLinearRegression(); + + for (int e = 0; e < eQTLsForSNP.size(); e++) { + Pair eqtl = eQTLsForSNP.get(e); + String eQTLProbeName = eqtl.getRight(); + + eQTLsTested.add(eqtl); + + Integer eQTLProbeId = expressionData.getProbeToId().get(eQTLProbeName); + + double[] valsX = eQTLSNPObj.selectGenotypes(wgaId, true, true); // this is sorted on expression ID + double[] valsY = pcCorrectedExpressionData[eQTLProbeId]; //Expression level + + for (int covariate = 0; covariate < nrTotalCovariates; covariate++) { + double[] tmpVarCelCount = new double[valsY.length]; + + for (int i = 0; i < tmpVarCelCount.length; i++) { + String sampleName = expInds[i]; + Integer individualIdInCovariateData = covariateData.hashCols.get(sampleName); + if (individualIdInCovariateData != null) { + // presorting greatly speeds this stuff up + tmpVarCelCount[i] = covariateData.rawData[covariate][individualIdInCovariateData]; + } else { + tmpVarCelCount[i] = Double.NaN; + } + } + + //Check whether all the expression samples have a genotype and a cell count... + int nrCalled = 0; + for (int i = 0; i < wgaId.length; i++) { + if (wgaId[i] != -1 && !Double.isNaN(tmpVarCelCount[i]) && valsX[i] != -1) { + nrCalled++; + } + } + + // THIS WILL GIVE ERRONEOUS VALUES WHEN THERE ARE MISSING + // VALUES IN VALSY THE NEXT TIME THIS SNP IS TESTED!! + // this value is required for subsequent meta-analysis.. fix for altering sample sizes (take smallest size / omit missing values) + // in stead use the value for N that is now in the standard output. + double[] genotypesCalled = new double[nrCalled]; + if (qcString == null) { + qcString = eQTLSNPObj.getName() + "\t" + ChrAnnotation.parseByte(eQTLSNPObj.getChr()) + "\t" + eQTLSNPObj.getChrPos() + "\t" + BaseAnnot.toString(eQTLSNPObj.getAlleles()[0]) + "/" + BaseAnnot.toString(eQTLSNPObj.getAlleles()[1]) + "\t" + BaseAnnot.toString(eQTLSNPObj.getMinorAllele()) + "\t" + eQTLSNPObj.getMAF() + "\t" + eQTLSNPObj.getCR() + "\t" + eQTLSNPObj.getHWEP() + "\t" + genotypesCalled.length; + nrGenotypesCalled = genotypesCalled.length; + } else if (genotypesCalled.length != nrGenotypesCalled) { + + System.err.println("ERROR: the number of available values has changed. Does your gene expression data or cell count file contain missing values?"); + System.exit(0); + } + + double zScoreInteraction = 0; + double zScoreSNP = 0; + double zScoreCovariate = 0; + double mainZ = 0; + + double betaInteraction = 0; + double seInteraction = 0; + double betaSNP = 0; + double seSNP = 0; + double betaCovariate = 0; + double seCovariate = 0; + + double rsquared = 0; + + if (sandwich) { + RConnection rConnection = null; + // this code is very suboptimal and is here for validation purposes only anyway + try { + rConnection = new RConnection(); + rConnection.voidEval("library(sandwich)"); + } catch (RserveException ex) { + System.err.println(ex.getMessage()); + rConnection = null; + } + + if (rConnection == null) { + System.err.println("Error: using R connection but none found"); + return null; + } + + try { + if (rConnection.isConnected()) { + double[] olsY = new double[nrCalled]; //Ordinary least squares: Our gene expression + double[] olsX = new double[nrCalled]; + double[] covariateValues = new double[nrCalled]; //No interaction term, linear model: y ~ a * SNP + b * CellCount + c // double[][] olsXFullWithInteraction = new double[nrCalled][3]; //With interaction term, linear model: y ~ a * SNP + b * CellCount + c + d * SNP * CellCount - int itr = 0; - for (int s = 0; s < valsX.length; s++) { - double genotype = valsX[s]; - if (genotype != -1 && !Double.isNaN(tmpVarCelCount[s])) { - if (signInteractionEffectDirection == -1) { - genotype = 2 - genotype; - } - covariateValues[itr] = tmpVarCelCount[s]; - olsY[itr] = valsY[s]; - olsX[itr] = genotype; - itr++; - } - } - - double corr = JSci.maths.ArrayMath.correlation(olsX, olsY); - mainZ = Correlation.convertCorrelationToZScore(olsX.length, corr); - - if (sandwich) { - rConnection.assign("y", olsY); - rConnection.assign("x", olsX); - rConnection.assign("z", covariateValues); - rConnection.voidEval("interaction <- x*z"); - rConnection.voidEval("m <- lm(y ~ x + z + interaction)"); - rConnection.voidEval("modelsummary <- summary(m)"); - - rConnection.voidEval("m2 <- sqrt(diag(vcovHC(m, type = 'HC0')))"); // robust covariance model - - if (tDistColt == null) { - randomEngine = new cern.jet.random.tdouble.engine.DRand(); - tDistColt = new cern.jet.random.tdouble.StudentT(olsY.length - 4, randomEngine); - } - - betaInteraction = rConnection.eval("modelsummary$coefficients[4,1]").asDouble(); - seInteraction = rConnection.eval("as.numeric(m2[4])").asDouble(); - betaSNP = rConnection.eval("modelsummary$coefficients[2,1]").asDouble(); - seSNP = rConnection.eval("modelsummary$coefficients[2,2]").asDouble(); - betaCovariate = rConnection.eval("modelsummary$coefficients[3,1]").asDouble(); - seCovariate = rConnection.eval("modelsummary$coefficients[3,2]").asDouble(); - rsquared = rConnection.eval("modelsummary$r.squared").asDouble(); - } else { - // use structural equation modeling (errors-in-variables compensation) - - // define model - // z-transform, otherwise the used covariances in lavaan may be wrong - double[] olsYZ = Normalization.standardNormalize(valsY); - double[] olsXZ = Normalization.standardNormalize(olsX); - double[] covariatesZ = Normalization.standardNormalize(covariateValues); - double[] interactionVals = new double[covariatesZ.length]; - for (int i = 0; i < valsY.length; i++) { - interactionVals[i] = olsX[i] * covariateValues[i]; - } - double[] interactionZ = Normalization.standardNormalize(interactionVals); - - System.out.println("Var Y: " + Descriptives.variance(olsY) + "\t" + Descriptives.mean(olsY)); - System.out.println("Var gen: " + Descriptives.variance(olsX) + "\t" + Descriptives.mean(olsX)); - System.out.println("Var cov: " + Descriptives.variance(covariateValues) + "\t" + Descriptives.mean(covariateValues)); - System.out.println("Var int: " + Descriptives.variance(interactionVals) + "\t" + Descriptives.mean(interactionVals)); - - System.out.println(""); - - if (Descriptives.variance(olsY) > 1E-5 && Descriptives.variance(olsX) > 1E-5) { - rConnection.assign("expression", olsY); - rConnection.assign("genotype", olsX); - rConnection.assign("covariate", covariateValues); - rConnection.assign("interaction", interactionVals); - rConnection.voidEval("df <- data.frame(expression, genotype, covariate, interaction)"); - - String model = "model <- 'expression ~ genotype\n" // + latentCovariate + latentInteraction\n" - - // + "latentInteraction =~ interaction\n" - + "expression ~~ genotype\n" - // + "covariate ~~ interaction\n" - // + "genotype ~~ interaction\n" - + "'"; - - rConnection.voidEval(model); - rConnection.voidEval("fit <- sem(model, data=df)"); - rConnection.voidEval("modelsummary <- summary(fit)"); - System.exit(0); - } - -// String[] output = rConnection.eval("modelsummary").asStrings(); -// for (String s : output) { -// System.out.println(s); -// -// } - } - rConnection.close(); - } else { - System.err.println("ERROR: R is not connected."); - } - - } catch (REngineException ex) { - System.err.println(ex.getMessage()); - } catch (REXPMismatchException ex) { - System.err.println(ex.getMessage()); - } - - } else { - - //Fill arrays with data in order to be able to perform the ordinary least squares analysis: - double[] olsY = new double[nrCalled]; //Ordinary least squares: Our gene expression - - double[][] olsX = new double[nrCalled][2]; //No interaction term, linear model: y ~ a * SNP + b * CellCount + c - double[][] olsXFullWithInteraction = new double[nrCalled][3]; //With interaction term, linear model: y ~ a * SNP + b * CellCount + c + d * SNP * CellCount - int itr = 0; - for (int s = 0; s < valsX.length; s++) { - double genotype = valsX[s]; - if (genotype != -1 && !Double.isNaN(tmpVarCelCount[s])) { - if (signInteractionEffectDirection == -1) { - genotype = 2 - genotype; - } - genotypesCalled[itr] = genotype; - olsY[itr] = valsY[s]; - olsX[itr][0] = genotype; - olsXFullWithInteraction[itr][0] = genotype; - olsX[itr][1] = tmpVarCelCount[s]; - olsXFullWithInteraction[itr][1] = tmpVarCelCount[s]; - olsXFullWithInteraction[itr][2] = olsXFullWithInteraction[itr][0] * olsXFullWithInteraction[itr][1]; - itr++; - } - } + int itr = 0; + for (int s = 0; s < valsX.length; s++) { + double genotype = valsX[s]; + if (genotype != -1 && !Double.isNaN(tmpVarCelCount[s])) { + if (signInteractionEffectDirection == -1) { + genotype = 2 - genotype; + } + covariateValues[itr] = tmpVarCelCount[s]; + olsY[itr] = valsY[s]; + olsX[itr] = genotype; + itr++; + } + } + + double corr = JSci.maths.ArrayMath.correlation(olsX, olsY); + mainZ = Correlation.convertCorrelationToZScore(olsX.length, corr); + + + rConnection.assign("y", olsY); + rConnection.assign("x", olsX); + rConnection.assign("z", covariateValues); + rConnection.voidEval("interaction <- x*z"); + rConnection.voidEval("m <- lm(y ~ x + z + interaction)"); + rConnection.voidEval("modelsummary <- summary(m)"); + + rConnection.voidEval("m2 <- sqrt(diag(vcovHC(m, type = 'HC0')))"); // robust covariance model + + if (tDistColt == null) { + randomEngine = new cern.jet.random.tdouble.engine.DRand(); + tDistColt = new cern.jet.random.tdouble.StudentT(olsY.length - 4, randomEngine); + } + + betaInteraction = rConnection.eval("modelsummary$coefficients[4,1]").asDouble(); + seInteraction = rConnection.eval("as.numeric(m2[4])").asDouble(); + betaSNP = rConnection.eval("modelsummary$coefficients[2,1]").asDouble(); + seSNP = rConnection.eval("modelsummary$coefficients[2,2]").asDouble(); + betaCovariate = rConnection.eval("modelsummary$coefficients[3,1]").asDouble(); + seCovariate = rConnection.eval("modelsummary$coefficients[3,2]").asDouble(); + rsquared = rConnection.eval("modelsummary$r.squared").asDouble(); + + rConnection.close(); + } else { + System.err.println("ERROR: R is not connected."); + } + + } catch (REngineException ex) { + System.err.println(ex.getMessage()); + } catch (REXPMismatchException ex) { + System.err.println(ex.getMessage()); + } + + } else { + + //Fill arrays with data in order to be able to perform the ordinary least squares analysis: + double[] olsY = new double[nrCalled]; //Ordinary least squares: Our gene expression + + double[][] olsX = new double[nrCalled][2]; //No interaction term, linear model: y ~ a * SNP + b * CellCount + c + double[][] olsXFullWithInteraction = new double[nrCalled][3]; //With interaction term, linear model: y ~ a * SNP + b * CellCount + c + d * SNP * CellCount + int itr = 0; + for (int s = 0; s < valsX.length; s++) { + double genotype = valsX[s]; + if (genotype != -1 && !Double.isNaN(tmpVarCelCount[s])) { + if (signInteractionEffectDirection == -1) { + genotype = 2 - genotype; + } + genotypesCalled[itr] = genotype; + olsY[itr] = valsY[s]; + olsX[itr][0] = genotype; + olsXFullWithInteraction[itr][0] = genotype; + olsX[itr][1] = tmpVarCelCount[s]; + olsXFullWithInteraction[itr][1] = tmpVarCelCount[s]; + olsXFullWithInteraction[itr][2] = olsXFullWithInteraction[itr][0] * olsXFullWithInteraction[itr][1]; + itr++; + } + } + // regression.newSampleData(olsY, olsX); - regressionFullWithInteraction.newSampleData(olsY, olsXFullWithInteraction); + regressionFullWithInteraction.newSampleData(olsY, olsXFullWithInteraction); - // not sure if this is needed right now, but I will keep it in for later use. + // not sure if this is needed right now, but I will keep it in for later use. // double rss1 = regression.calculateResidualSumOfSquares(); // double rss2 = regressionFullWithInteraction.calculateResidualSumOfSquares(); // double anovaF = ((rss1 - rss2) / (3 - 2)) / (rss2 / (olsY.length - 3)); @@ -340,141 +288,141 @@ public InteractionAnalysisResults call() throws Exception { // } // } catch (Exception err) { // } - if (tDistColt == null) { - randomEngine = new cern.jet.random.tdouble.engine.DRand(); - tDistColt = new cern.jet.random.tdouble.StudentT(olsY.length - 4, randomEngine); - } - - // double intersect = regressionParameters[0]; - double corr = JSci.maths.ArrayMath.correlation(genotypesCalled, olsY); - mainZ = Correlation.convertCorrelationToZScore(genotypesCalled.length, corr); - - // Get the regression parameters and R-square value and print it. - try { - double[] regressionParameters = regressionFullWithInteraction.estimateRegressionParameters(); - double[] regressionStandardErrors = regressionFullWithInteraction.estimateRegressionParametersStandardErrors(); - - betaInteraction = regressionParameters[3]; - seInteraction = regressionStandardErrors[3]; - - // Get the regression parameters and R-square value and print it. - betaSNP = regressionParameters[1]; - seSNP = regressionStandardErrors[1]; - - betaCovariate = regressionParameters[2]; - seCovariate = regressionStandardErrors[2]; - - rsquared = regressionFullWithInteraction.calculateRSquared(); - - } catch (SingularMatrixException ex) { - betaInteraction = Double.NaN; - seInteraction = Double.NaN; - - // Get the regression parameters and R-square value and print it. - betaSNP = Double.NaN; - seSNP = Double.NaN; - - betaCovariate = Double.NaN; - seCovariate = Double.NaN; - - rsquared = Double.NaN; - } - - } - - Pair pair = convertBetaToP(betaInteraction, seInteraction, tDistColt); - double pValueInteraction = pair.getLeft(); - zScoreInteraction = pair.getRight(); - - pair = convertBetaToP(betaSNP, seSNP, tDistColt); - double pValueSNP = pair.getLeft(); - zScoreSNP = pair.getRight(); - - // Get the regression parameters and R-square value and print it. - pair = convertBetaToP(betaCovariate, seCovariate, tDistColt); - double pValueCovariate = pair.getLeft(); - zScoreCovariate = pair.getRight(); - - interactionZScoreMatrix[e][covariate] = zScoreInteraction; - SNPZResultMatrix[e][covariate] = zScoreSNP; - covariateZResultMatrix[e][covariate] = zScoreCovariate; - maineffectZResultMatrix[e][covariate] = mainZ; - nMatrix[e][covariate] = nrCalled; - rsquaredMatrix[e][covariate] = rsquared; - - // flip the covariate effect according to the main effect - if (provideFullStats) { - interactionBeta[e][covariate] = betaInteraction; - interactionSE[e][covariate] = seInteraction; - mainBeta[e][covariate] = betaSNP; - mainSE[e][covariate] = seSNP; - covariateBeta[e][covariate] = betaCovariate; - covariateSE[e][covariate] = seCovariate; - - } - } - } - - eQTLSNPObj.clearGenotypes(); - eQTLSNPObj = null; - - if (provideFullStats) { - - return new InteractionAnalysisResults( - qcString, - eQTLsTested, - interactionZScoreMatrix, - SNPZResultMatrix, - covariateZResultMatrix, - maineffectZResultMatrix, - interactionBeta, - interactionSE, - mainBeta, - mainSE, - covariateBeta, - covariateSE, - nMatrix, - rsquaredMatrix); - } else { - return new InteractionAnalysisResults( - qcString, - eQTLsTested, - interactionZScoreMatrix, - SNPZResultMatrix, - covariateZResultMatrix, - maineffectZResultMatrix, - nMatrix, - rsquaredMatrix); - - } - - } - - private Pair convertBetaToP(double beta, double se, StudentT tDistColt) { - - if (Double.isNaN(beta)) { - return NAN_PAIR; - } - - double t = beta / se; - double p = 1; - double z = 0; - if (t < 0) { - p = tDistColt.cdf(t); - if (p < 2.0E-323) { - p = 2.0E-323; - - } - z = cern.jet.stat.Probability.normalInverse(p); - } else { - p = tDistColt.cdf(-t); - if (p < 2.0E-323) { - p = 2.0E-323; - - } - z = -cern.jet.stat.Probability.normalInverse(p); - } - return new Pair(p, z); - } + if (tDistColt == null) { + randomEngine = new cern.jet.random.tdouble.engine.DRand(); + tDistColt = new cern.jet.random.tdouble.StudentT(olsY.length - 4, randomEngine); + } + + // double intersect = regressionParameters[0]; + double corr = JSci.maths.ArrayMath.correlation(genotypesCalled, olsY); + mainZ = Correlation.convertCorrelationToZScore(genotypesCalled.length, corr); + + // Get the regression parameters and R-square value and print it. + try { + double[] regressionParameters = regressionFullWithInteraction.estimateRegressionParameters(); + double[] regressionStandardErrors = regressionFullWithInteraction.estimateRegressionParametersStandardErrors(); + + betaInteraction = regressionParameters[3]; + seInteraction = regressionStandardErrors[3]; + + // Get the regression parameters and R-square value and print it. + betaSNP = regressionParameters[1]; + seSNP = regressionStandardErrors[1]; + + betaCovariate = regressionParameters[2]; + seCovariate = regressionStandardErrors[2]; + + rsquared = regressionFullWithInteraction.calculateRSquared(); + + } catch (SingularMatrixException ex) { + betaInteraction = Double.NaN; + seInteraction = Double.NaN; + + // Get the regression parameters and R-square value and print it. + betaSNP = Double.NaN; + seSNP = Double.NaN; + + betaCovariate = Double.NaN; + seCovariate = Double.NaN; + + rsquared = Double.NaN; + } + + } + + Pair pair = convertBetaToP(betaInteraction, seInteraction, tDistColt); + double pValueInteraction = pair.getLeft(); + zScoreInteraction = pair.getRight(); + + pair = convertBetaToP(betaSNP, seSNP, tDistColt); + double pValueSNP = pair.getLeft(); + zScoreSNP = pair.getRight(); + + // Get the regression parameters and R-square value and print it. + pair = convertBetaToP(betaCovariate, seCovariate, tDistColt); + double pValueCovariate = pair.getLeft(); + zScoreCovariate = pair.getRight(); + + interactionZScoreMatrix[e][covariate] = zScoreInteraction; + SNPZResultMatrix[e][covariate] = zScoreSNP; + covariateZResultMatrix[e][covariate] = zScoreCovariate; + maineffectZResultMatrix[e][covariate] = mainZ; + nMatrix[e][covariate] = nrCalled; + rsquaredMatrix[e][covariate] = rsquared; + + // flip the covariate effect according to the main effect + if (provideFullStats) { + interactionBeta[e][covariate] = betaInteraction; + interactionSE[e][covariate] = seInteraction; + mainBeta[e][covariate] = betaSNP; + mainSE[e][covariate] = seSNP; + covariateBeta[e][covariate] = betaCovariate; + covariateSE[e][covariate] = seCovariate; + + } + } + } + + eQTLSNPObj.clearGenotypes(); + eQTLSNPObj = null; + + if (provideFullStats) { + + return new InteractionAnalysisResults( + qcString, + eQTLsTested, + interactionZScoreMatrix, + SNPZResultMatrix, + covariateZResultMatrix, + maineffectZResultMatrix, + interactionBeta, + interactionSE, + mainBeta, + mainSE, + covariateBeta, + covariateSE, + nMatrix, + rsquaredMatrix); + } else { + return new InteractionAnalysisResults( + qcString, + eQTLsTested, + interactionZScoreMatrix, + SNPZResultMatrix, + covariateZResultMatrix, + maineffectZResultMatrix, + nMatrix, + rsquaredMatrix); + + } + + } + + private Pair convertBetaToP(double beta, double se, StudentT tDistColt) { + + if (Double.isNaN(beta)) { + return NAN_PAIR; + } + + double t = beta / se; + double p = 1; + double z = 0; + if (t < 0) { + p = tDistColt.cdf(t); + if (p < 2.0E-323) { + p = 2.0E-323; + + } + z = cern.jet.stat.Probability.normalInverse(p); + } else { + p = tDistColt.cdf(-t); + if (p < 2.0E-323) { + p = 2.0E-323; + + } + z = -cern.jet.stat.Probability.normalInverse(p); + } + return new Pair(p, z); + } } diff --git a/eqtl-mapping-pipeline/src/main/java/eqtlmappingpipeline/normalization/Normalizer.java b/eqtl-mapping-pipeline/src/main/java/eqtlmappingpipeline/normalization/Normalizer.java index 5ccf49e0b..6243337d4 100644 --- a/eqtl-mapping-pipeline/src/main/java/eqtlmappingpipeline/normalization/Normalizer.java +++ b/eqtl-mapping-pipeline/src/main/java/eqtlmappingpipeline/normalization/Normalizer.java @@ -1,11 +1,5 @@ package eqtlmappingpipeline.normalization; -import java.io.File; -import java.io.IOException; -import java.util.ArrayList; -import java.util.HashMap; -import java.util.HashSet; -import java.util.Set; import org.apache.commons.math3.stat.ranking.NaNStrategy; import org.apache.commons.math3.stat.ranking.NaturalRanking; import org.apache.commons.math3.stat.ranking.TiesStrategy; @@ -15,8 +9,8 @@ import umcg.genetica.io.text.TextFile; import umcg.genetica.math.PCA; import umcg.genetica.math.matrix.DoubleMatrixDataset; -import umcg.genetica.math.matrix.MatrixTools; import umcg.genetica.math.matrix.MatrixHandling; +import umcg.genetica.math.matrix.MatrixTools; import umcg.genetica.math.stats.Descriptives; import umcg.genetica.math.stats.Log2Transform; import umcg.genetica.math.stats.QuantileNormalization; @@ -25,340 +19,350 @@ import umcg.genetica.math.stats.concurrent.ConcurrentCovariation; import umcg.genetica.methylation.ConvertBetaAndMvalues; +import java.io.File; +import java.io.IOException; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.HashSet; +import java.util.Set; + /** - * * @author harmjan */ public class Normalizer { - //nrIntermediatePCAsOverSamplesToRemoveToOutput = 5 - //nrPCAsOverSamplesToRemove = 100 - public void normalize(String expressionFile, String probeIncludeList, String sampleIncludeList, int nrPCAsOverSamplesToRemove, int nrIntermediatePCAsOverSamplesToRemoveToOutput, String covariatesToRemove, boolean orthogonalizecovariates, String outdir, - boolean runQQNorm, boolean runLog2Transform, boolean runMTransform, boolean runCenterScale, boolean runPCA, boolean adjustCovariates, boolean forceMissingValues, boolean forceReplacementOfMissingValues, - boolean forceReplacementOfMissingValues2, boolean treatZerosAsNulls, boolean forceNormalDistribution) throws IOException { - - System.out.println("Running normalization."); - if (outdir != null) { - outdir = Gpio.formatAsDirectory(outdir); - Gpio.createDir(outdir); - } else { - if (Gpio.getParentDir(expressionFile) == null) { - //This happens for relative paths in current dir - outdir = ""; - } else { - outdir = Gpio.getParentDir(expressionFile) + Gpio.getFileSeparator(); - } - - } - - String parentDir = Gpio.getParentDir(expressionFile); - String expressionFileName = Gpio.getFileName(expressionFile); - if (parentDir == null) { - parentDir = ""; - } - - if (expressionFileName.contains(".txt.gz")) { - expressionFileName = expressionFileName.replaceAll(".txt.gz", ""); - } else { - expressionFileName = expressionFileName.replaceAll(".txt", ""); - } - - String outputFileNamePrefix = outdir + expressionFileName; - - - Set s = null; - if(sampleIncludeList != null){ - TextFile t = new TextFile(sampleIncludeList, TextFile.R); - s = new HashSet(t.readAsArrayList()); - } - Set p = null; - if(probeIncludeList != null){ - TextFile t = new TextFile(probeIncludeList, TextFile.R); - p = new HashSet(t.readAsArrayList()); - } - DoubleMatrixDataset dataset = null; - - if(s != null || p!=null){ - dataset = new DoubleMatrixDataset(expressionFile, p, s); - //Check if samples are correclty loaded. - boolean breakAfterCheck = false; - if(s!=null){ - outputFileNamePrefix = outputFileNamePrefix + ".SampleSelection"; - HashSet tmpNames = new HashSet(); - tmpNames.addAll(dataset.colObjects); - tmpNames.addAll(s); - HashSet missingNames = new HashSet(); - HashSet extraNames = new HashSet(); - for(String colName : tmpNames){ - if(!s.contains(colName)){ - extraNames.add(colName); - } - if(!dataset.colObjects.contains(colName)) { - missingNames.add(colName); - } - } - if(!missingNames.isEmpty()){ - System.err.println("\nMatrix does not contains desired columns, please check filtering list."); - System.err.println(missingNames.toString()+"\n"); - breakAfterCheck = true; - } else if(!extraNames.isEmpty()){ - System.err.println("\nMatrix contains unwanted columns, please check filtering list."); - System.err.println(extraNames.toString()+"\n"); - breakAfterCheck = true; - } - } - //Check if probes are correclty loaded. - if(p!=null){ - outputFileNamePrefix = outputFileNamePrefix + ".ProbeSelection"; - HashSet tmpNames = new HashSet(); - tmpNames.addAll(dataset.rowObjects); - tmpNames.addAll(p); - HashSet missingNames = new HashSet(); - HashSet extraNames = new HashSet(); - for(String rowName : tmpNames){ - if(!p.contains(rowName)){ - extraNames.add(rowName); - } - if(!dataset.rowObjects.contains(rowName)) { - missingNames.add(rowName); - } - } - if(!missingNames.isEmpty()){ - System.err.println("\nMatrix does not contains desired rows, please check filtering list."); - System.err.println(missingNames.toString()+"\n"); - breakAfterCheck = true; - } else if(!extraNames.isEmpty()){ - System.err.println("\nMatrix contains unwanted rows, please check filtering list."); - System.err.println(extraNames.toString()+"\n"); - breakAfterCheck = true; - } - } - + //nrIntermediatePCAsOverSamplesToRemoveToOutput = 5 + //nrPCAsOverSamplesToRemove = 100 + public void normalize(String expressionFile, String probeIncludeList, String sampleIncludeList, int nrPCAsOverSamplesToRemove, int nrIntermediatePCAsOverSamplesToRemoveToOutput, String covariatesToRemove, boolean orthogonalizecovariates, String outdir, + boolean runQQNorm, boolean runLog2Transform, boolean runMTransform, boolean runCenterScale, boolean runPCA, boolean adjustCovariates, boolean forceMissingValues, boolean forceReplacementOfMissingValues, + boolean forceReplacementOfMissingValues2, boolean treatZerosAsNulls, boolean forceNormalDistribution) throws IOException { + + System.out.println("Running normalization."); + if (outdir != null) { + outdir = Gpio.formatAsDirectory(outdir); + Gpio.createDir(outdir); + } else { + if (Gpio.getParentDir(expressionFile) == null) { + //This happens for relative paths in current dir + outdir = ""; + } else { + outdir = Gpio.getParentDir(expressionFile) + Gpio.getFileSeparator(); + } + + } + + String parentDir = Gpio.getParentDir(expressionFile); + String expressionFileName = Gpio.getFileName(expressionFile); + if (parentDir == null) { + parentDir = ""; + } + + if (expressionFileName.contains(".txt.gz")) { + expressionFileName = expressionFileName.replaceAll(".txt.gz", ""); + } else { + expressionFileName = expressionFileName.replaceAll(".txt", ""); + } + + String outputFileNamePrefix = outdir + expressionFileName; + + + Set s = null; + if (sampleIncludeList != null) { + TextFile t = new TextFile(sampleIncludeList, TextFile.R); + s = new HashSet(t.readAsArrayList()); + } + Set p = null; + if (probeIncludeList != null) { + TextFile t = new TextFile(probeIncludeList, TextFile.R); + p = new HashSet(t.readAsArrayList()); + } + DoubleMatrixDataset dataset = null; + + if (s != null || p != null) { + dataset = new DoubleMatrixDataset(expressionFile, p, s); + //Check if samples are correclty loaded. + boolean breakAfterCheck = false; + if (s != null) { + outputFileNamePrefix = outputFileNamePrefix + ".SampleSelection"; + HashSet tmpNames = new HashSet(); + tmpNames.addAll(dataset.colObjects); + tmpNames.addAll(s); + HashSet missingNames = new HashSet(); + HashSet extraNames = new HashSet(); + for (String colName : tmpNames) { + if (!s.contains(colName)) { + extraNames.add(colName); + } + if (!dataset.colObjects.contains(colName)) { + missingNames.add(colName); + } + } + if (!missingNames.isEmpty()) { + System.err.println("\nMatrix does not contains desired columns, please check filtering list."); + System.err.println(missingNames.toString() + "\n"); + breakAfterCheck = true; + } else if (!extraNames.isEmpty()) { + System.err.println("\nMatrix contains unwanted columns, please check filtering list."); + System.err.println(extraNames.toString() + "\n"); + breakAfterCheck = true; + } + } + //Check if probes are correclty loaded. + if (p != null) { + outputFileNamePrefix = outputFileNamePrefix + ".ProbeSelection"; + HashSet tmpNames = new HashSet(); + tmpNames.addAll(dataset.rowObjects); + tmpNames.addAll(p); + HashSet missingNames = new HashSet(); + HashSet extraNames = new HashSet(); + for (String rowName : tmpNames) { + if (!p.contains(rowName)) { + extraNames.add(rowName); + } + if (!dataset.rowObjects.contains(rowName)) { + missingNames.add(rowName); + } + } + if (!missingNames.isEmpty()) { + System.err.println("\nMatrix does not contains desired rows, please check filtering list."); + System.err.println(missingNames.toString() + "\n"); + breakAfterCheck = true; + } else if (!extraNames.isEmpty()) { + System.err.println("\nMatrix contains unwanted rows, please check filtering list."); + System.err.println(extraNames.toString() + "\n"); + breakAfterCheck = true; + } + } + // if(breakAfterCheck){ // System.exit(-1); // } - - dataset.save(outputFileNamePrefix + ".txt.gz"); - } else { - dataset = new DoubleMatrixDataset(expressionFile); - } - - - // check for probes with zero variance, if there > 3 samples in the dataset - if (dataset.nrCols > 3) { - outputFileNamePrefix = removeProbesWithZeroVariance(dataset, outputFileNamePrefix); - } - - if (runQQNorm) { - outputFileNamePrefix = quantileNormalize(dataset, outputFileNamePrefix, forceMissingValues, forceReplacementOfMissingValues, forceReplacementOfMissingValues2, treatZerosAsNulls); - } - if (runLog2Transform) { - outputFileNamePrefix = log2transform(dataset, outputFileNamePrefix); - } - if (runMTransform) { - outputFileNamePrefix = mValueTransform(dataset, outputFileNamePrefix); - } - if (runCenterScale) { - outputFileNamePrefix = centerAndScale(dataset, outputFileNamePrefix); - } - - if (adjustCovariates && covariatesToRemove != null) { - outputFileNamePrefix = adjustCovariates(dataset, outputFileNamePrefix, covariatesToRemove, orthogonalizecovariates, 1E-10); - } - - if (runPCA) { - ConcurrentCorrelation c = new ConcurrentCorrelation(2); - double[][] correlationMatrix = c.pairwiseCorrelation(dataset.getRawDataTransposed()); - Pair, DoubleMatrixDataset> PCAResults = calculatePCA(dataset, correlationMatrix, outputFileNamePrefix, null); - if(nrPCAsOverSamplesToRemove != 0 || nrIntermediatePCAsOverSamplesToRemoveToOutput != 0){ - correctDataForPCs(dataset, outputFileNamePrefix, nrPCAsOverSamplesToRemove, nrIntermediatePCAsOverSamplesToRemoveToOutput, PCAResults.getLeft(), PCAResults.getRight()); - } - } - - if(forceNormalDistribution){ + + dataset.save(outputFileNamePrefix + ".txt.gz"); + } else { + dataset = new DoubleMatrixDataset(expressionFile); + } + + + // check for probes with zero variance, if there > 3 samples in the dataset + if (dataset.nrCols > 3) { + outputFileNamePrefix = removeProbesWithZeroVariance(dataset, outputFileNamePrefix); + } + + if (runQQNorm) { + outputFileNamePrefix = quantileNormalize(dataset, outputFileNamePrefix, forceMissingValues, forceReplacementOfMissingValues, forceReplacementOfMissingValues2, treatZerosAsNulls); + } + if (runLog2Transform) { + outputFileNamePrefix = log2transform(dataset, outputFileNamePrefix); + } + if (runMTransform) { + outputFileNamePrefix = mValueTransform(dataset, outputFileNamePrefix); + } + if (runCenterScale) { + outputFileNamePrefix = centerAndScale(dataset, outputFileNamePrefix); + } + + if (adjustCovariates && covariatesToRemove != null) { + outputFileNamePrefix = adjustCovariates(dataset, outputFileNamePrefix, covariatesToRemove, orthogonalizecovariates, 1E-10); + } + + if (runPCA) { + ConcurrentCorrelation c = new ConcurrentCorrelation(2); + double[][] correlationMatrix = c.pairwiseCorrelation(dataset.getRawDataTransposed()); + Pair, DoubleMatrixDataset> PCAResults = calculatePCA(dataset, correlationMatrix, outputFileNamePrefix, null); + if (nrPCAsOverSamplesToRemove != 0 || nrIntermediatePCAsOverSamplesToRemoveToOutput != 0) { + correctDataForPCs(dataset, outputFileNamePrefix, nrPCAsOverSamplesToRemove, nrIntermediatePCAsOverSamplesToRemoveToOutput, PCAResults.getLeft(), PCAResults.getRight()); + } + } + + if (forceNormalDistribution) { outputFileNamePrefix = forceNormalDistribution(dataset, outputFileNamePrefix); } - } + } + + + NaturalRanking ranking = new NaturalRanking(NaNStrategy.FAILED, TiesStrategy.AVERAGE); + + public double[] forceNormal(double[] data) { + double[] rankedValues = ranking.rank(data); + for (int s = 0; s < data.length; s++) { + //Convert the rank to a proportion, with range <0, 1> + double pValue = (0.5d + rankedValues[s] - 1d) / (double) (rankedValues.length); + //Convert the pValue to a Z-Score: + data[s] = cern.jet.stat.Probability.normalInverse(pValue); + } + return data; + } + - public String forceNormalDistribution(DoubleMatrixDataset dataset, String fileNamePrefix) throws IOException{ + public String forceNormalDistribution(DoubleMatrixDataset dataset, String fileNamePrefix) throws IOException { double[][] rawData = dataset.getRawData(); - - NaturalRanking ranking = new NaturalRanking(NaNStrategy.FAILED, TiesStrategy.AVERAGE); - for (int p = 0; p < dataset.rowObjects.size(); p++) { - - double[] rankedValues = ranking.rank(rawData[p]); - - for (int s = 0; s < dataset.colObjects.size(); s++) { - //Convert the rank to a proportion, with range <0, 1> - double pValue = (0.5d + rankedValues[s] - 1d) / (double) (rankedValues.length); - //Convert the pValue to a Z-Score: - rawData[p][s] = cern.jet.stat.Probability.normalInverse(pValue); - } - } - + rawData[p] = forceNormal(rawData[p]); + } + + DoubleMatrixDataset datasetNormalized = new DoubleMatrixDataset(rawData, dataset.rowObjects, dataset.colObjects); + fileNamePrefix += ".ForcedNormal"; + datasetNormalized.save(fileNamePrefix + ".txt.gz"); + return fileNamePrefix; + + + } + + public String quantileNormalize(DoubleMatrixDataset dataset, String fileNamePrefix, boolean forceMissingValues, boolean forceReplacementOfMissingValues, boolean forceReplacementOfMissingValues2, boolean treatZerosAsNulls) throws IOException { + double[][] rawData = dataset.getRawData(); + + boolean dataContainsNulls = MatrixTools.containsNaNs(rawData); + + if (treatZerosAsNulls && dataContainsNulls) { + System.out.println("Warning: Data already contains nulls before treating zeros as nulls.\n Later on it will not be possible to distinguish between those two!"); + } + if (treatZerosAsNulls) { + MatrixHandling.ReplaceZerosToNull(rawData); + dataContainsNulls = MatrixTools.containsNaNs(rawData); + } + + if (!dataContainsNulls) { + QuantileNormalization.quantilenormalize(rawData); + } else if (forceReplacementOfMissingValues) { + QuantileNormalization.QuantileNormAdressingNaValuesAfterInitialQN(dataset, false, false, false); + } else if (forceReplacementOfMissingValues2) { + QuantileNormalization.QuantileNormAdressingNaValuesAfterInitialQN(dataset, false, true, false); + } else if (forceMissingValues && treatZerosAsNulls) { + QuantileNormalization.QuantileNormAdressingNaValuesAfterInitialQN(dataset, true, false, true); + } else if (forceMissingValues) { + QuantileNormalization.QuantileNormAdressingNaValuesAfterInitialQN(dataset, true, false, false); + } else { + System.out.println("Warning: Your data contains missing values and missing value treatment is not selected.\n" + + "If desired please supply additional flag: --forceMissingValues or --forceReplacementOfMissingValues"); + System.exit(0); + } + + if (treatZerosAsNulls) { + MatrixHandling.ReplaceNullToZero(rawData); + } + + DoubleMatrixDataset datasetNormalized = new DoubleMatrixDataset(rawData, dataset.rowObjects, dataset.colObjects); + fileNamePrefix += ".QuantileNormalized"; + datasetNormalized.save(fileNamePrefix + ".txt.gz"); + + return fileNamePrefix; + } + + public String log2transform(DoubleMatrixDataset dataset, String fileNamePrefix) throws IOException { + double[][] rawData = dataset.getRawData(); + Log2Transform.log2transform(rawData); + DoubleMatrixDataset datasetNormalized = new DoubleMatrixDataset(rawData, dataset.rowObjects, dataset.colObjects); + fileNamePrefix += ".Log2Transformed"; + datasetNormalized.save(fileNamePrefix + ".txt.gz"); + return fileNamePrefix; + } + + public String mValueTransform(DoubleMatrixDataset dataset, String fileNamePrefix) throws IOException { + double[][] rawData = dataset.getRawData(); + ConvertBetaAndMvalues.transformToMvalue(rawData); + DoubleMatrixDataset datasetNormalized = new DoubleMatrixDataset(rawData, dataset.rowObjects, dataset.colObjects); + fileNamePrefix += ".MvalueTransformed"; + datasetNormalized.save(fileNamePrefix + ".txt.gz"); + return fileNamePrefix; + } + + public String centerAndScale(DoubleMatrixDataset dataset, String fileNamePrefix) throws IOException { + double[][] rawData = dataset.getRawData(); + System.out.println("Standardizing probe mean"); + for (int p = 0; p < dataset.rowObjects.size(); p++) { + double mean = Descriptives.mean(rawData[p]); + //double stdev = Math.sqrt(Descriptives.variance(rawData[p], mean)); + for (int s = 0; s < dataset.colObjects.size(); s++) { + rawData[p][s] -= mean; + } + } + + dataset.setRawData(rawData); + fileNamePrefix += ".ProbesCentered"; + dataset.save(fileNamePrefix + ".txt.gz"); + + System.out.println("- Standardizing sample mean and standard deviation"); + for (int s = 0; s < dataset.colObjects.size(); s++) { + double[] vals = new double[dataset.rowObjects.size()]; + for (int p = 0; p < dataset.rowObjects.size(); p++) { + vals[p] = dataset.getRawData()[p][s]; + } + double mean = Descriptives.mean(vals); + for (int p = 0; p < dataset.rowObjects.size(); p++) { + vals[p] -= mean; + } + double var = Descriptives.variance(vals, mean); + double stdev = Math.sqrt(var); + for (int p = 0; p < dataset.rowObjects.size(); p++) { + dataset.getRawData()[p][s] = (vals[p] / stdev); + } + } + DoubleMatrixDataset datasetNormalized = new DoubleMatrixDataset(rawData, dataset.rowObjects, dataset.colObjects); - fileNamePrefix += ".ForcedNormal"; - datasetNormalized.save(fileNamePrefix + ".txt.gz"); - return fileNamePrefix; - - + fileNamePrefix += ".SamplesZTransformed"; + datasetNormalized.save(fileNamePrefix + ".txt.gz"); + return fileNamePrefix; } - - public String quantileNormalize(DoubleMatrixDataset dataset, String fileNamePrefix, boolean forceMissingValues, boolean forceReplacementOfMissingValues, boolean forceReplacementOfMissingValues2, boolean treatZerosAsNulls) throws IOException { - double[][] rawData = dataset.getRawData(); - - boolean dataContainsNulls = MatrixTools.containsNaNs(rawData); - - if(treatZerosAsNulls && dataContainsNulls){ - System.out.println("Warning: Data already contains nulls before treating zeros as nulls.\n Later on it will not be possible to distinguish between those two!"); - } - if(treatZerosAsNulls){ - MatrixHandling.ReplaceZerosToNull(rawData); - dataContainsNulls = MatrixTools.containsNaNs(rawData); - } - - if (!dataContainsNulls) { - QuantileNormalization.quantilenormalize(rawData); - } else if(forceReplacementOfMissingValues){ - QuantileNormalization.QuantileNormAdressingNaValuesAfterInitialQN(dataset, false, false, false); - } else if(forceReplacementOfMissingValues2){ - QuantileNormalization.QuantileNormAdressingNaValuesAfterInitialQN(dataset, false, true, false); - } else if(forceMissingValues && treatZerosAsNulls){ - QuantileNormalization.QuantileNormAdressingNaValuesAfterInitialQN(dataset, true, false, true); - } else if(forceMissingValues){ - QuantileNormalization.QuantileNormAdressingNaValuesAfterInitialQN(dataset, true, false, false); - } else { - System.out.println("Warning: Your data contains missing values and missing value treatment is not selected.\n" - + "If desired please supply additional flag: --forceMissingValues or --forceReplacementOfMissingValues"); - System.exit(0); - } - - if(treatZerosAsNulls){ - MatrixHandling.ReplaceNullToZero(rawData); - } - - DoubleMatrixDataset datasetNormalized = new DoubleMatrixDataset(rawData, dataset.rowObjects, dataset.colObjects); - fileNamePrefix += ".QuantileNormalized"; - datasetNormalized.save(fileNamePrefix + ".txt.gz"); - - return fileNamePrefix; - } - - public String log2transform(DoubleMatrixDataset dataset, String fileNamePrefix) throws IOException { - double[][] rawData = dataset.getRawData(); - Log2Transform.log2transform(rawData); - DoubleMatrixDataset datasetNormalized = new DoubleMatrixDataset(rawData, dataset.rowObjects, dataset.colObjects); - fileNamePrefix += ".Log2Transformed"; - datasetNormalized.save(fileNamePrefix + ".txt.gz"); - return fileNamePrefix; - } - - public String mValueTransform(DoubleMatrixDataset dataset, String fileNamePrefix) throws IOException { - double[][] rawData = dataset.getRawData(); - ConvertBetaAndMvalues.transformToMvalue(rawData); - DoubleMatrixDataset datasetNormalized = new DoubleMatrixDataset(rawData, dataset.rowObjects, dataset.colObjects); - fileNamePrefix += ".MvalueTransformed"; - datasetNormalized.save(fileNamePrefix + ".txt.gz"); - return fileNamePrefix; - } - - public String centerAndScale(DoubleMatrixDataset dataset, String fileNamePrefix) throws IOException { - double[][] rawData = dataset.getRawData(); - System.out.println("Standardizing probe mean"); - for (int p = 0; p < dataset.rowObjects.size(); p++) { - double mean = Descriptives.mean(rawData[p]); - //double stdev = Math.sqrt(Descriptives.variance(rawData[p], mean)); - for (int s = 0; s < dataset.colObjects.size(); s++) { - rawData[p][s] -= mean; - } - } - - dataset.setRawData(rawData); - fileNamePrefix += ".ProbesCentered"; - dataset.save(fileNamePrefix + ".txt.gz"); - - System.out.println("- Standardizing sample mean and standard deviation"); - for (int s = 0; s < dataset.colObjects.size(); s++) { - double[] vals = new double[dataset.rowObjects.size()]; - for (int p = 0; p < dataset.rowObjects.size(); p++) { - vals[p] = dataset.getRawData()[p][s]; - } - double mean = Descriptives.mean(vals); - for (int p = 0; p < dataset.rowObjects.size(); p++) { - vals[p] -= mean; - } - double var = Descriptives.variance(vals, mean); - double stdev = Math.sqrt(var); - for (int p = 0; p < dataset.rowObjects.size(); p++) { - dataset.getRawData()[p][s] = (vals[p] / stdev); - } - } - - DoubleMatrixDataset datasetNormalized = new DoubleMatrixDataset(rawData, dataset.rowObjects, dataset.colObjects); - fileNamePrefix += ".SamplesZTransformed"; - datasetNormalized.save(fileNamePrefix + ".txt.gz"); - return fileNamePrefix; - } - - public String adjustCovariates(DoubleMatrixDataset traitData, String fileNamePrefix, String covariatesToRemove, boolean orthogonalizecovariates, double varianceExplainedCutoff) throws IOException { - // load covariate data, and remove samples for which there is missing covariate data. - Pair, DoubleMatrixDataset> covariateData = loadCovariateValues(covariatesToRemove, traitData); - DoubleMatrixDataset covariateDataset = covariateData.getLeft(); - DoubleMatrixDataset traitDataUpdated = covariateData.getRight(); - - traitData.rawData = traitDataUpdated.rawData; - traitData.colObjects = traitDataUpdated.colObjects; - traitData.rowObjects = traitDataUpdated.rowObjects; - traitData.recalculateHashMaps(); - - double[][] covariateValues = null; - double[] pcaExpVar = null; - - System.out.println("Covariate data has " + covariateDataset.nrRows + " rows and " + covariateDataset.nrCols + " columns."); - - for (int p = 0; p < covariateDataset.rowObjects.size(); p++) { - double mean = Descriptives.mean(covariateDataset.getRawData()[p]); - double stdev = Math.sqrt(Descriptives.variance(covariateDataset.getRawData()[p], mean)); - for (int s = 0; s < covariateDataset.colObjects.size(); s++) { - covariateDataset.getRawData()[p][s] -= mean; - covariateDataset.getRawData()[p][s] /= stdev; - } - } - - //Covariation on a centered and scaled matrix equals the correlation. - //Covariation is faster to compute. - ConcurrentCovariation c = new ConcurrentCovariation(2); - double[][] correlationMatrix = c.pairwiseCovariation(covariateDataset.getRawData()); - covariateDataset.transposeDataset(); - Pair, DoubleMatrixDataset> PCAResults = calculatePCA(covariateDataset, correlationMatrix, covariatesToRemove, null); - - // replace covariateValues with orthogonal ones... - covariateDataset = PCAResults.getLeft(); - - - covariateDataset.transposeDataset(); - covariateValues = covariateDataset.getRawData(); - - System.out.println(covariateDataset.nrRows + " covariates finally loaded."); - - // load the eigenvalues - pcaExpVar = new double[covariateValues.length]; - System.out.println("Loading eigenvalues from: " + covariatesToRemove + ".PCAOverSamplesEigenvalues.txt.gz"); - TextFile tf = new TextFile(covariatesToRemove + ".PCAOverSamplesEigenvalues.txt.gz", TextFile.R); // - // skip header - tf.readLine(); - String[] elems = tf.readLineElems(TextFile.tab); - while (elems != null) { - if (elems.length > 2) { - int pcanr = Integer.parseInt(elems[0]); - double expvar = Double.parseDouble(elems[1]); - pcaExpVar[pcanr - 1] = expvar; - System.out.println(pcanr + "\t" + expvar); - } - elems = tf.readLineElems(TextFile.tab); - } - tf.close(); + + public String adjustCovariates(DoubleMatrixDataset traitData, String fileNamePrefix, String covariatesToRemove, boolean orthogonalizecovariates, double varianceExplainedCutoff) throws IOException { + // load covariate data, and remove samples for which there is missing covariate data. + Pair, DoubleMatrixDataset> covariateData = loadCovariateValues(covariatesToRemove, traitData); + DoubleMatrixDataset covariateDataset = covariateData.getLeft(); + DoubleMatrixDataset traitDataUpdated = covariateData.getRight(); + + traitData.rawData = traitDataUpdated.rawData; + traitData.colObjects = traitDataUpdated.colObjects; + traitData.rowObjects = traitDataUpdated.rowObjects; + traitData.recalculateHashMaps(); + + double[][] covariateValues = null; + double[] pcaExpVar = null; + + System.out.println("Covariate data has " + covariateDataset.nrRows + " rows and " + covariateDataset.nrCols + " columns."); + + for (int p = 0; p < covariateDataset.rowObjects.size(); p++) { + double mean = Descriptives.mean(covariateDataset.getRawData()[p]); + double stdev = Math.sqrt(Descriptives.variance(covariateDataset.getRawData()[p], mean)); + for (int s = 0; s < covariateDataset.colObjects.size(); s++) { + covariateDataset.getRawData()[p][s] -= mean; + covariateDataset.getRawData()[p][s] /= stdev; + } + } + + //Covariation on a centered and scaled matrix equals the correlation. + //Covariation is faster to compute. + ConcurrentCovariation c = new ConcurrentCovariation(2); + double[][] correlationMatrix = c.pairwiseCovariation(covariateDataset.getRawData()); + covariateDataset.transposeDataset(); + Pair, DoubleMatrixDataset> PCAResults = calculatePCA(covariateDataset, correlationMatrix, covariatesToRemove, null); + + // replace covariateValues with orthogonal ones... + covariateDataset = PCAResults.getLeft(); + + + covariateDataset.transposeDataset(); + covariateValues = covariateDataset.getRawData(); + + System.out.println(covariateDataset.nrRows + " covariates finally loaded."); + + // load the eigenvalues + pcaExpVar = new double[covariateValues.length]; + System.out.println("Loading eigenvalues from: " + covariatesToRemove + ".PCAOverSamplesEigenvalues.txt.gz"); + TextFile tf = new TextFile(covariatesToRemove + ".PCAOverSamplesEigenvalues.txt.gz", TextFile.R); // + // skip header + tf.readLine(); + String[] elems = tf.readLineElems(TextFile.tab); + while (elems != null) { + if (elems.length > 2) { + int pcanr = Integer.parseInt(elems[0]); + double expvar = Double.parseDouble(elems[1]); + pcaExpVar[pcanr - 1] = expvar; + System.out.println(pcanr + "\t" + expvar); + } + elems = tf.readLineElems(TextFile.tab); + } + tf.close(); // } else { // // PCA has been performed a-priori. Just check whether the user has supplied proper covariates. // if (covariateValues.length > 1) { @@ -394,247 +398,246 @@ public String adjustCovariates(DoubleMatrixDataset traitData, St // } - double[][] rawdata = traitData.getRawData(); - for (int i = 0; i < covariateValues.length; i++) { - if (pcaExpVar == null || pcaExpVar[i] > varianceExplainedCutoff) { - correctForCovariate(rawdata, covariateValues, i); - } else { - System.out.println("Not regressing covariate: " + i + " because explained variance < " + varianceExplainedCutoff + ": " + pcaExpVar[i]); - } - } + double[][] rawdata = traitData.getRawData(); + for (int i = 0; i < covariateValues.length; i++) { + if (pcaExpVar == null || pcaExpVar[i] > varianceExplainedCutoff) { + correctForCovariate(rawdata, covariateValues, i); + } else { + System.out.println("Not regressing covariate: " + i + " because explained variance < " + varianceExplainedCutoff + ": " + pcaExpVar[i]); + } + } + + DoubleMatrixDataset datasetNormalized = new DoubleMatrixDataset(rawdata, traitData.rowObjects, traitData.colObjects); + fileNamePrefix += ".CovariatesRemoved"; + datasetNormalized.save(fileNamePrefix + ".txt.gz"); + + traitData.rawData = rawdata; + + + return fileNamePrefix; + } + + /** + * Calculate correlation over columns in DoubleMatrixDataset. WARNING: this + * method assumes that SD == 1 and mean == 0 (which makes the covariance + * equal to the correlation). + * + * @param dataset + * @return + */ + private double[][] correlateSamples(DoubleMatrixDataset dataset) { + double[][] correlationMatrix = new double[dataset.colObjects.size()][dataset.colObjects.size()]; + double probeCountMinusOne = dataset.rowObjects.size() - 1; + + ProgressBar pb = new ProgressBar(dataset.colObjects.size(), "- Calculating correlations: " + dataset.colObjects.size() + " x " + dataset.colObjects.size()); + + for (int f = 0; f < dataset.colObjects.size(); f++) { + + + for (int g = f; g < dataset.colObjects.size(); g++) { + double covarianceInterim = 0; + for (int p = 0; p < dataset.rowObjects.size(); p++) { + covarianceInterim += dataset.getRawData()[p][f] * dataset.getRawData()[p][g]; + } + double covariance = covarianceInterim / probeCountMinusOne; + correlationMatrix[f][g] = covariance; + correlationMatrix[g][f] = covariance; +// System.out.println(f + "\t" + g + "\t" + covariance); + } + pb.iterate(); + } + pb.close(); + return correlationMatrix; + } + + public double[][] correlateProbes(DoubleMatrixDataset dataset) { + + double[][] correlationMatrix = new double[dataset.rowObjects.size()][dataset.rowObjects.size()]; + double probeCountMinusOne = dataset.rowObjects.size() - 1; + + ProgressBar pb = new ProgressBar(dataset.rowObjects.size(), "- Calculating correlations: " + dataset.rowObjects.size() + " x " + dataset.rowObjects.size()); + for (int f = 0; f < dataset.rowObjects.size(); f++) { + for (int g = f; g < dataset.rowObjects.size(); g++) { + double covarianceInterim = 0; + for (int p = 0; p < dataset.rowObjects.size(); p++) { + covarianceInterim += dataset.getRawData()[p][f] * dataset.getRawData()[p][g]; + } + double covariance = covarianceInterim / probeCountMinusOne; + correlationMatrix[f][g] = covariance; + correlationMatrix[g][f] = covariance; + System.out.println(f + "\t" + g + "\t" + covariance); + } + pb.iterate(); + } + pb.close(); + return correlationMatrix; + } + + public Pair, DoubleMatrixDataset> calculatePCA(DoubleMatrixDataset dataset, double[][] correlationMatrix, String fileNamePrefix, Integer nrOfPCsToCalculate) throws IOException { + String expressionFile = fileNamePrefix; + System.out.println("Calculating PCA over file: " + fileNamePrefix); + System.out.println("- Performing PCA over correlation matrix of size: " + correlationMatrix.length + "x" + correlationMatrix.length); + Jama.EigenvalueDecomposition eig = PCA.eigenValueDecomposition(correlationMatrix); - DoubleMatrixDataset datasetNormalized = new DoubleMatrixDataset(rawdata, traitData.rowObjects, traitData.colObjects); - fileNamePrefix += ".CovariatesRemoved"; - datasetNormalized.save(fileNamePrefix + ".txt.gz"); + if (nrOfPCsToCalculate == null || nrOfPCsToCalculate > dataset.colObjects.size()) { + nrOfPCsToCalculate = dataset.colObjects.size(); + } else if (nrOfPCsToCalculate < 1) { + throw new IllegalArgumentException("Number of PCs to calculate should be at least 1"); + } - traitData.rawData = rawdata; + DoubleMatrixDataset datasetEV = new DoubleMatrixDataset(dataset.colObjects.size(), nrOfPCsToCalculate); + datasetEV.rowObjects = dataset.colObjects; + double[] eigenValues = eig.getRealEigenvalues(); + System.out.println("Eigenvalue results:"); + + System.out.println("PCA\tPCANr\tEigenValue\tExplainedVariance\tTotalExplainedVariance"); + + TextFile out = new TextFile(expressionFile + ".PCAOverSamplesEigenvalues.txt.gz", TextFile.W); + double cumExpVarPCA = 0; + + out.writeln("PCA\tPCANr\tEigenValue\tExplainedVariance\tTotalExplainedVariance"); + + for (int pca = 0; pca < nrOfPCsToCalculate; pca++) { + double expVarPCA = PCA.getEigenValueVar(eigenValues, pca); + double[] pca1ExpEigenVector = PCA.getEigenVector(eig, eigenValues, pca); + for (int s = 0; s < dataset.colObjects.size(); s++) { + datasetEV.getRawData()[s][pca] = pca1ExpEigenVector[s]; + } + int pcaNr = pca + 1; + cumExpVarPCA += expVarPCA; + out.write(pcaNr + "\t" + eigenValues[eigenValues.length - 1 - pca] + "\t" + expVarPCA + "\t" + cumExpVarPCA + "\n"); + datasetEV.colObjects.set(pca, "Comp" + String.valueOf(pcaNr)); + System.out.println("PCA:\t" + pcaNr + "\t" + eigenValues[eigenValues.length - 1 - pca] + "\t" + expVarPCA + "\t" + cumExpVarPCA); + } + out.close(); + datasetEV.save(expressionFile + ".PCAOverSamplesEigenvectors.txt.gz"); - return fileNamePrefix; - } + datasetEV.transposeDataset(); - /** - * Calculate correlation over columns in DoubleMatrixDataset. WARNING: this - * method assumes that SD == 1 and mean == 0 (which makes the covariance - * equal to the correlation). - * - * @param dataset - * @return - */ - private double[][] correlateSamples(DoubleMatrixDataset dataset) { - double[][] correlationMatrix = new double[dataset.colObjects.size()][dataset.colObjects.size()]; - double probeCountMinusOne = dataset.rowObjects.size() - 1; + datasetEV.save(expressionFile + ".PCAOverSamplesEigenvectorsTransposed.txt.gz"); - ProgressBar pb = new ProgressBar(dataset.colObjects.size(), "- Calculating correlations: " + dataset.colObjects.size() + " x " + dataset.colObjects.size()); + datasetEV.transposeDataset(); + System.out.println("Calculating PCs"); + System.out.println("Initializing PCA matrix"); + DoubleMatrixDataset datasetPCAOverSamplesPCAs = new DoubleMatrixDataset(dataset.rowObjects.size(), nrOfPCsToCalculate); + datasetPCAOverSamplesPCAs.rowObjects = dataset.rowObjects; + for (int s = 0; s < nrOfPCsToCalculate; s++) { + datasetPCAOverSamplesPCAs.colObjects.set(s, "Comp" + String.valueOf(s + 1)); + } + for (int p = 0; p < dataset.rowObjects.size(); p++) { + for (int t = 0; t < nrOfPCsToCalculate; t++) { + datasetPCAOverSamplesPCAs.getRawData()[p][t] = 0; + } + } - for (int f = 0; f < dataset.colObjects.size(); f++) { + ProgressBar pb = new ProgressBar(dataset.rowObjects.size(), "Calculating the PCA scores per probe: "); + for (int probe = 0; probe < dataset.rowObjects.size(); probe++) { + for (int sample1 = 0; sample1 < nrOfPCsToCalculate; sample1++) { + for (int sample2 = 0; sample2 < dataset.colObjects.size(); sample2++) { + double probeCoefficient = datasetEV.getRawData()[sample2][sample1]; + datasetPCAOverSamplesPCAs.getRawData()[probe][sample1] += probeCoefficient * dataset.getRawData()[probe][sample2]; + } + } + pb.iterate(); + } + pb.close(); + String outfilename = expressionFile + ".PCAOverSamplesPrincipalComponents.txt.gz"; + System.out.println("Saving PCA scores: " + outfilename); + datasetPCAOverSamplesPCAs.save(outfilename); - for (int g = f; g < dataset.colObjects.size(); g++) { - double covarianceInterim = 0; - for (int p = 0; p < dataset.rowObjects.size(); p++) { - covarianceInterim += dataset.getRawData()[p][f] * dataset.getRawData()[p][g]; - } - double covariance = covarianceInterim / probeCountMinusOne; - correlationMatrix[f][g] = covariance; - correlationMatrix[g][f] = covariance; -// System.out.println(f + "\t" + g + "\t" + covariance); - } - pb.iterate(); - } - pb.close(); - return correlationMatrix; - } - - public double[][] correlateProbes(DoubleMatrixDataset dataset) { - - double[][] correlationMatrix = new double[dataset.rowObjects.size()][dataset.rowObjects.size()]; - double probeCountMinusOne = dataset.rowObjects.size() - 1; - - ProgressBar pb = new ProgressBar(dataset.rowObjects.size(), "- Calculating correlations: " + dataset.rowObjects.size() + " x " + dataset.rowObjects.size()); - for (int f = 0; f < dataset.rowObjects.size(); f++) { - for (int g = f; g < dataset.rowObjects.size(); g++) { - double covarianceInterim = 0; - for (int p = 0; p < dataset.rowObjects.size(); p++) { - covarianceInterim += dataset.getRawData()[p][f] * dataset.getRawData()[p][g]; - } - double covariance = covarianceInterim / probeCountMinusOne; - correlationMatrix[f][g] = covariance; - correlationMatrix[g][f] = covariance; - System.out.println(f + "\t" + g + "\t" + covariance); - } - pb.iterate(); - } - pb.close(); - return correlationMatrix; - } - - public Pair, DoubleMatrixDataset> calculatePCA(DoubleMatrixDataset dataset, double[][] correlationMatrix, String fileNamePrefix, Integer nrOfPCsToCalculate) throws IOException { - String expressionFile = fileNamePrefix; - System.out.println("Calculating PCA over file: " + fileNamePrefix); - System.out.println("- Performing PCA over correlation matrix of size: " + correlationMatrix.length + "x" + correlationMatrix.length); - Jama.EigenvalueDecomposition eig = PCA.eigenValueDecomposition(correlationMatrix); - - if (nrOfPCsToCalculate == null || nrOfPCsToCalculate > dataset.colObjects.size()) { - nrOfPCsToCalculate = dataset.colObjects.size(); - } else if (nrOfPCsToCalculate < 1) { - throw new IllegalArgumentException("Number of PCs to calculate should be at least 1"); - } - - DoubleMatrixDataset datasetEV = new DoubleMatrixDataset(dataset.colObjects.size(), nrOfPCsToCalculate); - datasetEV.rowObjects = dataset.colObjects; - double[] eigenValues = eig.getRealEigenvalues(); - System.out.println("Eigenvalue results:"); - - System.out.println("PCA\tPCANr\tEigenValue\tExplainedVariance\tTotalExplainedVariance"); - - TextFile out = new TextFile(expressionFile + ".PCAOverSamplesEigenvalues.txt.gz", TextFile.W); - double cumExpVarPCA = 0; - - out.writeln("PCA\tPCANr\tEigenValue\tExplainedVariance\tTotalExplainedVariance"); - - for (int pca = 0; pca < nrOfPCsToCalculate; pca++) { - double expVarPCA = PCA.getEigenValueVar(eigenValues, pca); - double[] pca1ExpEigenVector = PCA.getEigenVector(eig, eigenValues, pca); - for (int s = 0; s < dataset.colObjects.size(); s++) { - datasetEV.getRawData()[s][pca] = pca1ExpEigenVector[s]; - } - int pcaNr = pca + 1; - cumExpVarPCA += expVarPCA; - out.write(pcaNr + "\t" + eigenValues[eigenValues.length - 1 - pca] + "\t" + expVarPCA + "\t" + cumExpVarPCA + "\n"); - datasetEV.colObjects.set(pca, "Comp" + String.valueOf(pcaNr)); - System.out.println("PCA:\t" + pcaNr + "\t" + eigenValues[eigenValues.length - 1 - pca] + "\t" + expVarPCA + "\t" + cumExpVarPCA); - } - out.close(); - - datasetEV.save(expressionFile + ".PCAOverSamplesEigenvectors.txt.gz"); - - datasetEV.transposeDataset(); - - datasetEV.save(expressionFile + ".PCAOverSamplesEigenvectorsTransposed.txt.gz"); - - datasetEV.transposeDataset(); - System.out.println("Calculating PCs"); - System.out.println("Initializing PCA matrix"); - DoubleMatrixDataset datasetPCAOverSamplesPCAs = new DoubleMatrixDataset(dataset.rowObjects.size(), nrOfPCsToCalculate); - datasetPCAOverSamplesPCAs.rowObjects = dataset.rowObjects; - for (int s = 0; s < nrOfPCsToCalculate; s++) { - datasetPCAOverSamplesPCAs.colObjects.set(s, "Comp" + String.valueOf(s + 1)); - } - for (int p = 0; p < dataset.rowObjects.size(); p++) { - for (int t = 0; t < nrOfPCsToCalculate; t++) { - datasetPCAOverSamplesPCAs.getRawData()[p][t] = 0; - } - } - - - ProgressBar pb = new ProgressBar(dataset.rowObjects.size(), "Calculating the PCA scores per probe: "); - for (int probe = 0; probe < dataset.rowObjects.size(); probe++) { - for (int sample1 = 0; sample1 < nrOfPCsToCalculate; sample1++) { - for (int sample2 = 0; sample2 < dataset.colObjects.size(); sample2++) { - double probeCoefficient = datasetEV.getRawData()[sample2][sample1]; - datasetPCAOverSamplesPCAs.getRawData()[probe][sample1] += probeCoefficient * dataset.getRawData()[probe][sample2]; - } - } - pb.iterate(); - } - pb.close(); - - String outfilename = expressionFile + ".PCAOverSamplesPrincipalComponents.txt.gz"; - System.out.println("Saving PCA scores: " + outfilename); - datasetPCAOverSamplesPCAs.save(outfilename); - - return new Pair, DoubleMatrixDataset>(datasetPCAOverSamplesPCAs, datasetEV); - } - - public void correctDataForPCs(DoubleMatrixDataset dataset, String fileNamePrefix, int nrPCAsOverSamplesToRemove, int nrIntermediatePCAsOverSamplesToRemoveToOutput, - DoubleMatrixDataset datasetPCAOverSamplesPCAs, DoubleMatrixDataset datasetEV) throws IOException { - String expressionFile = fileNamePrefix; - System.out.println("\nInitializing residual gene expression matrix"); - - if (dataset.colObjects.size() < nrPCAsOverSamplesToRemove) { - int remainder = dataset.colObjects.size() % nrIntermediatePCAsOverSamplesToRemoveToOutput; - nrPCAsOverSamplesToRemove = dataset.colObjects.size() - remainder; - } - - for (int t = 0; t < nrPCAsOverSamplesToRemove; t++) { - for (int p = 0; p < dataset.rowObjects.size(); p++) { - for (int s = 0; s < dataset.colObjects.size(); s++) { - dataset.getRawData()[p][s] -= datasetPCAOverSamplesPCAs.getRawData()[p][t] * datasetEV.getRawData()[s][t]; - } - } - int nrPCAs = t + 1; - if (nrIntermediatePCAsOverSamplesToRemoveToOutput > 0 && nrPCAs % nrIntermediatePCAsOverSamplesToRemoveToOutput == 0) { - dataset.save(expressionFile + "." + nrPCAs + "PCAsOverSamplesRemoved.txt.gz"); - System.out.println("Removed\t" + nrPCAs + "\tPCs. File:\t" + expressionFile + "." + nrPCAs + "PCAsOverSamplesRemoved.txt.gz"); - } - - } - dataset.save(expressionFile + "." + nrPCAsOverSamplesToRemove + "PCAsOverSamplesRemoved.txt.gz"); - - } - - public void repeatPCAOmitCertainPCAs(HashSet pcasNotToRemove, String parentDir, String expressionFile, int nrPCAsOverSamplesToRemove, int nrIntermediatePCAsOverSamplesToRemoveToOutput) throws IOException { - System.out.println("Will write output to: "+parentDir); - String[] files = Gpio.getListOfFiles(parentDir); - String startExpressionFileName = expressionFile; - File st = new File(startExpressionFileName); - - // strip the parent dir name - parentDir += Gpio.getFileSeparator(); - String minimalFilename = st.getName(); - String[] expressionFileNameElems = minimalFilename.split("\\."); - String eigenvectorFile = null; - String principalComponentsFile = null; - - if(minimalFilename.contains("PCAsOverSamplesRemoved")){ - StringBuilder newMinimal = new StringBuilder(); - newMinimal.append(expressionFileNameElems[0]); - for(int i = 1; i, DoubleMatrixDataset>(datasetPCAOverSamplesPCAs, datasetEV); + } + + public void correctDataForPCs(DoubleMatrixDataset dataset, String fileNamePrefix, int nrPCAsOverSamplesToRemove, int nrIntermediatePCAsOverSamplesToRemoveToOutput, + DoubleMatrixDataset datasetPCAOverSamplesPCAs, DoubleMatrixDataset datasetEV) throws IOException { + String expressionFile = fileNamePrefix; + System.out.println("\nInitializing residual gene expression matrix"); + + if (dataset.colObjects.size() < nrPCAsOverSamplesToRemove) { + int remainder = dataset.colObjects.size() % nrIntermediatePCAsOverSamplesToRemoveToOutput; + nrPCAsOverSamplesToRemove = dataset.colObjects.size() - remainder; + } + + for (int t = 0; t < nrPCAsOverSamplesToRemove; t++) { + for (int p = 0; p < dataset.rowObjects.size(); p++) { + for (int s = 0; s < dataset.colObjects.size(); s++) { + dataset.getRawData()[p][s] -= datasetPCAOverSamplesPCAs.getRawData()[p][t] * datasetEV.getRawData()[s][t]; + } + } + int nrPCAs = t + 1; + if (nrIntermediatePCAsOverSamplesToRemoveToOutput > 0 && nrPCAs % nrIntermediatePCAsOverSamplesToRemoveToOutput == 0) { + dataset.save(expressionFile + "." + nrPCAs + "PCAsOverSamplesRemoved.txt.gz"); + System.out.println("Removed\t" + nrPCAs + "\tPCs. File:\t" + expressionFile + "." + nrPCAs + "PCAsOverSamplesRemoved.txt.gz"); + } + + } + dataset.save(expressionFile + "." + nrPCAsOverSamplesToRemove + "PCAsOverSamplesRemoved.txt.gz"); + + } + + public void repeatPCAOmitCertainPCAs(HashSet pcasNotToRemove, String parentDir, String expressionFile, int nrPCAsOverSamplesToRemove, int nrIntermediatePCAsOverSamplesToRemoveToOutput) throws IOException { + System.out.println("Will write output to: " + parentDir); + String[] files = Gpio.getListOfFiles(parentDir); + String startExpressionFileName = expressionFile; + File st = new File(startExpressionFileName); + + // strip the parent dir name + parentDir += Gpio.getFileSeparator(); + String minimalFilename = st.getName(); + String[] expressionFileNameElems = minimalFilename.split("\\."); + String eigenvectorFile = null; + String principalComponentsFile = null; + + if (minimalFilename.contains("PCAsOverSamplesRemoved")) { + StringBuilder newMinimal = new StringBuilder(); + newMinimal.append(expressionFileNameElems[0]); + for (int i = 1; i < expressionFileNameElems.length; ++i) { + if (!expressionFileNameElems[i].contains("PCAsOverSamplesRemoved")) { + newMinimal.append(".").append(expressionFileNameElems[i]); + } + } + minimalFilename = newMinimal.toString(); + } + + for (String file : files) { // if (file.length() < minimalFilename.length() && file.contains(expressionFileNameElems[0])) { // minimalFilename = file; // } else - if (file.toLowerCase().contains("pcaoversampleseigenvectors.")) { - eigenvectorFile = parentDir + "" + file; - } else if (file.toLowerCase().contains("pcaoversamplesprincipalcomponents")) { - principalComponentsFile = parentDir + "" + file; - } - } - - boolean fileFound = true; - if (eigenvectorFile == null) { - System.err.println("Could not find file containing 'PCAOverSamplesEigenvectors' in directory: " + parentDir); - fileFound = false; - } - - if (eigenvectorFile == null) { - System.err.println("Could not find file containing 'PCAOverSamplesPrincipalComponents' in directory: " + parentDir); - fileFound = false; - } - - if (!fileFound) { - System.exit(0); - } - - System.out.println("Detected core file name to be: " + minimalFilename); - - DoubleMatrixDataset expressionDataset = new DoubleMatrixDataset(parentDir+minimalFilename); - DoubleMatrixDataset datasetPCAOverSamplesPCAs = new DoubleMatrixDataset(principalComponentsFile); - DoubleMatrixDataset datasetEV = new DoubleMatrixDataset(eigenvectorFile); - - if (expressionDataset.colObjects.size() < nrPCAsOverSamplesToRemove) { - int remainder = expressionDataset.colObjects.size() % nrIntermediatePCAsOverSamplesToRemoveToOutput; - nrPCAsOverSamplesToRemove = expressionDataset.colObjects.size() - remainder; - } + if (file.toLowerCase().contains("pcaoversampleseigenvectors.")) { + eigenvectorFile = parentDir + "" + file; + } else if (file.toLowerCase().contains("pcaoversamplesprincipalcomponents")) { + principalComponentsFile = parentDir + "" + file; + } + } + + boolean fileFound = true; + if (eigenvectorFile == null) { + System.err.println("Could not find file containing 'PCAOverSamplesEigenvectors' in directory: " + parentDir); + fileFound = false; + } + + if (eigenvectorFile == null) { + System.err.println("Could not find file containing 'PCAOverSamplesPrincipalComponents' in directory: " + parentDir); + fileFound = false; + } + + if (!fileFound) { + System.exit(0); + } + + System.out.println("Detected core file name to be: " + minimalFilename); + + DoubleMatrixDataset expressionDataset = new DoubleMatrixDataset(parentDir + minimalFilename); + DoubleMatrixDataset datasetPCAOverSamplesPCAs = new DoubleMatrixDataset(principalComponentsFile); + DoubleMatrixDataset datasetEV = new DoubleMatrixDataset(eigenvectorFile); + + if (expressionDataset.colObjects.size() < nrPCAsOverSamplesToRemove) { + int remainder = expressionDataset.colObjects.size() % nrIntermediatePCAsOverSamplesToRemoveToOutput; + nrPCAsOverSamplesToRemove = expressionDataset.colObjects.size() - remainder; + } // DoubleMatrixDataset datasetResidualExpressionBasedOnPCAOverSamples = new DoubleMatrixDataset(expressionDataset.rowObjects.size(), expressionDataset.colObjects.size()); // datasetResidualExpressionBasedOnPCAOverSamples.rowObjects = expressionDataset.rowObjects; @@ -644,378 +647,377 @@ public void repeatPCAOmitCertainPCAs(HashSet pcasNotToRemove, String pa // System.arraycopy(expressionDataset.getRawData()[p], 0, datasetResidualExpressionBasedOnPCAOverSamples.getRawData()[p], 0, expressionDataset.colObjects.size()); // } - if(minimalFilename.endsWith(".txt")){ - minimalFilename = minimalFilename.substring(0, minimalFilename.length()-4); - } else if(minimalFilename.endsWith(".txt.gz")){ - minimalFilename = minimalFilename.substring(0, minimalFilename.length()-7); - } - - for (int t = 0; t < nrPCAsOverSamplesToRemove; t++) { - if (!pcasNotToRemove.contains(t + 1)) { - - for (int p = 0; p < expressionDataset.rowObjects.size(); p++) { - for (int s = 0; s < expressionDataset.colObjects.size(); s++) { - //datasetResidualExpressionBasedOnPCAOverSamples.rawData[p][s]-= datasetPCAOverSamplesPCAs.rawData[p][t] * datasetEV.rawData[s][t]; - expressionDataset.getRawData()[p][s] -= datasetPCAOverSamplesPCAs.getRawData()[p][t] * datasetEV.getRawData()[s][t]; - } - } - } else { - System.out.println("Omitting PCA: " + (t + 1) + " since this component is under genetic control"); - } - - int nrPCAs = t + 1; - - if (nrIntermediatePCAsOverSamplesToRemoveToOutput > 0 && nrPCAs % nrIntermediatePCAsOverSamplesToRemoveToOutput == 0) { - //datasetResidualExpressionBasedOnPCAOverSamples.save(expressionFile + "." + nrPCAs + "PCAsOverSamplesRemoved.txt"); - expressionDataset.save(parentDir+minimalFilename + "." + nrPCAs + "PCAsOverSamplesRemoved-GeneticVectorsNotRemoved.txt.gz"); - System.out.println("Removed\t" + nrPCAs + "\tPCs. File:\t" + minimalFilename + "." + nrPCAs + "PCAsOverSamplesRemoved-GeneticVectorsNotRemoved.txt.gz"); - } - - } - //datasetResidualExpressionBasedOnPCAOverSamples.save(expressionFile + "." + nrPCAsOverSamplesToRemove + "PCAsOverSamplesRemoved.txt"); - expressionDataset.save(parentDir+minimalFilename + "." + nrPCAsOverSamplesToRemove + "PCAsOverSamplesRemoved-GeneticVectorsNotRemoved.txt.gz"); - - System.out.println("Done\n"); - } - - private void correctForCovariate(double[][] rawdata, double[][] covariateValues, int covariateToCorrect) { - for (int probe = 0; probe < rawdata.length; probe++) { - double[] y = rawdata[probe]; - double meanY = JSci.maths.ArrayMath.mean(y); - double varianceY = JSci.maths.ArrayMath.variance(y); - double[] x = covariateValues[covariateToCorrect]; - - - - double[] rc = Regression.getLinearRegressionCoefficients(x, y); - double correlation = JSci.maths.ArrayMath.correlation(x, y); - double propExplainedVarianceTrait = correlation * correlation - 1.0d / (double) y.length; - - if (propExplainedVarianceTrait < 0) { - propExplainedVarianceTrait = 0; - } + if (minimalFilename.endsWith(".txt")) { + minimalFilename = minimalFilename.substring(0, minimalFilename.length() - 4); + } else if (minimalFilename.endsWith(".txt.gz")) { + minimalFilename = minimalFilename.substring(0, minimalFilename.length() - 7); + } + + for (int t = 0; t < nrPCAsOverSamplesToRemove; t++) { + if (!pcasNotToRemove.contains(t + 1)) { + + for (int p = 0; p < expressionDataset.rowObjects.size(); p++) { + for (int s = 0; s < expressionDataset.colObjects.size(); s++) { + //datasetResidualExpressionBasedOnPCAOverSamples.rawData[p][s]-= datasetPCAOverSamplesPCAs.rawData[p][t] * datasetEV.rawData[s][t]; + expressionDataset.getRawData()[p][s] -= datasetPCAOverSamplesPCAs.getRawData()[p][t] * datasetEV.getRawData()[s][t]; + } + } + } else { + System.out.println("Omitting PCA: " + (t + 1) + " since this component is under genetic control"); + } + + int nrPCAs = t + 1; + + if (nrIntermediatePCAsOverSamplesToRemoveToOutput > 0 && nrPCAs % nrIntermediatePCAsOverSamplesToRemoveToOutput == 0) { + //datasetResidualExpressionBasedOnPCAOverSamples.save(expressionFile + "." + nrPCAs + "PCAsOverSamplesRemoved.txt"); + expressionDataset.save(parentDir + minimalFilename + "." + nrPCAs + "PCAsOverSamplesRemoved-GeneticVectorsNotRemoved.txt.gz"); + System.out.println("Removed\t" + nrPCAs + "\tPCs. File:\t" + minimalFilename + "." + nrPCAs + "PCAsOverSamplesRemoved-GeneticVectorsNotRemoved.txt.gz"); + } + + } + //datasetResidualExpressionBasedOnPCAOverSamples.save(expressionFile + "." + nrPCAsOverSamplesToRemove + "PCAsOverSamplesRemoved.txt"); + expressionDataset.save(parentDir + minimalFilename + "." + nrPCAsOverSamplesToRemove + "PCAsOverSamplesRemoved-GeneticVectorsNotRemoved.txt.gz"); + + System.out.println("Done\n"); + } + + private void correctForCovariate(double[][] rawdata, double[][] covariateValues, int covariateToCorrect) { + for (int probe = 0; probe < rawdata.length; probe++) { + double[] y = rawdata[probe]; + double meanY = JSci.maths.ArrayMath.mean(y); + double varianceY = JSci.maths.ArrayMath.variance(y); + double[] x = covariateValues[covariateToCorrect]; + + + double[] rc = Regression.getLinearRegressionCoefficients(x, y); + double correlation = JSci.maths.ArrayMath.correlation(x, y); + double propExplainedVarianceTrait = correlation * correlation - 1.0d / (double) y.length; + + if (propExplainedVarianceTrait < 0) { + propExplainedVarianceTrait = 0; + } // explainedVariancePerEQTLProbe[d][(int) Math.round(propExplainedVarianceTrait * 100d)]++; - double[] rawDataUpdated = new double[x.length]; - for (int s = 0; s < x.length; s++) { - double residual = y[s] - x[s] * rc[0]; - rawDataUpdated[s] = residual; - } - - double meanUpdated = JSci.maths.ArrayMath.mean(rawDataUpdated); - double stdDevRatio = JSci.maths.ArrayMath.standardDeviation(rawDataUpdated) / Math.sqrt(varianceY); - for (int s = 0; s < x.length; s++) { - rawDataUpdated[s] -= meanUpdated; - rawDataUpdated[s] /= stdDevRatio; - rawDataUpdated[s] += meanY; - } - System.arraycopy(rawDataUpdated, 0, rawdata[probe], 0, x.length); - } - } - - // NOTE: this new code switches around columns and rows for the covariate matrix - private Pair, DoubleMatrixDataset> loadCovariateValues(String covariatesToRemove, DoubleMatrixDataset dataset) throws IOException { - System.out.println("- Removing covariates as defined in: " + covariatesToRemove); - TextFile covariates = new TextFile(covariatesToRemove, TextFile.R); - int numRows = covariates.countLines() - 1; // minus the header :) - int numCols = covariates.countCols(TextFile.tab) - 1; // minus the header's row identifier (if any) - - if (numRows == 0 || numCols == 0) { - System.err.println("Covariate file is empty, but no covariates found in file! Is your file format correct?"); - System.err.println("The program is expecting the following: tab separated, one covariate per row, one sample per column, with sample identifiers identical to your --in file."); - System.exit(0); - } else { - System.out.println("Covariate file has " + numRows + " rows and " + numCols + " columns"); - } - - - // first hash up which samples are in the dataset - HashMap samplesInDatasetIndex = new HashMap(); - String[] allSamplesInDataset = dataset.colObjects.toArray(new String[0]); - for (int i = 0; i < allSamplesInDataset.length; i++) { - samplesInDatasetIndex.put(allSamplesInDataset[i], i); - } - - // read the column names from the covariate file - // expect the samples on the columns - String[] elems = covariates.readLineElemsReturnReference(TextFile.tab); // header - - int ctr = 0; - boolean[] sampleInDatasetIncludedInCovariates = new boolean[dataset.colObjects.size()]; - ArrayList columnNames = new ArrayList(); - for (int i = 1; i < elems.length; i++) { - Integer index = samplesInDatasetIndex.get(elems[i]); - columnNames.add(elems[i]); - if (index != null) { - sampleInDatasetIncludedInCovariates[index] = true; - ctr++; - } - } - - // read the covariate names, expect them to be on the rows - ArrayList rowNames = new ArrayList(); - elems = covariates.readLineElemsReturnReference(TextFile.tab); // first line - while (elems != null) { - rowNames.add(elems[0]); - elems = covariates.readLineElemsReturnReference(TextFile.tab); - } - covariates.close(); - - boolean isTransposed = false; - if (ctr == 0) { - System.err.println("No matching samples detected between covariate file and dataset. Maybe your covariate file needs to be transposed? Will test that for you now:"); - for (String rowName : rowNames) { - Integer index = samplesInDatasetIndex.get(rowName); - if (index != null) { - sampleInDatasetIncludedInCovariates[index] = true; - ctr++; - } - } - - if (ctr == 0) { - System.err.println("Transposing the data does not seem to resolve the issue. Please check your sample identifiers."); - System.exit(0); - } else { - System.out.println("Transposing the covariate file reveals: " + ctr + " samples present."); - isTransposed = true; - - } - - - } + double[] rawDataUpdated = new double[x.length]; + for (int s = 0; s < x.length; s++) { + double residual = y[s] - x[s] * rc[0]; + rawDataUpdated[s] = residual; + } + + double meanUpdated = JSci.maths.ArrayMath.mean(rawDataUpdated); + double stdDevRatio = JSci.maths.ArrayMath.standardDeviation(rawDataUpdated) / Math.sqrt(varianceY); + for (int s = 0; s < x.length; s++) { + rawDataUpdated[s] -= meanUpdated; + rawDataUpdated[s] /= stdDevRatio; + rawDataUpdated[s] += meanY; + } + System.arraycopy(rawDataUpdated, 0, rawdata[probe], 0, x.length); + } + } + + // NOTE: this new code switches around columns and rows for the covariate matrix + private Pair, DoubleMatrixDataset> loadCovariateValues(String covariatesToRemove, DoubleMatrixDataset dataset) throws IOException { + System.out.println("- Removing covariates as defined in: " + covariatesToRemove); + TextFile covariates = new TextFile(covariatesToRemove, TextFile.R); + int numRows = covariates.countLines() - 1; // minus the header :) + int numCols = covariates.countCols(TextFile.tab) - 1; // minus the header's row identifier (if any) + + if (numRows == 0 || numCols == 0) { + System.err.println("Covariate file is empty, but no covariates found in file! Is your file format correct?"); + System.err.println("The program is expecting the following: tab separated, one covariate per row, one sample per column, with sample identifiers identical to your --in file."); + System.exit(0); + } else { + System.out.println("Covariate file has " + numRows + " rows and " + numCols + " columns"); + } + + + // first hash up which samples are in the dataset + HashMap samplesInDatasetIndex = new HashMap(); + String[] allSamplesInDataset = dataset.colObjects.toArray(new String[0]); + for (int i = 0; i < allSamplesInDataset.length; i++) { + samplesInDatasetIndex.put(allSamplesInDataset[i], i); + } + + // read the column names from the covariate file + // expect the samples on the columns + String[] elems = covariates.readLineElemsReturnReference(TextFile.tab); // header + + int ctr = 0; + boolean[] sampleInDatasetIncludedInCovariates = new boolean[dataset.colObjects.size()]; + ArrayList columnNames = new ArrayList(); + for (int i = 1; i < elems.length; i++) { + Integer index = samplesInDatasetIndex.get(elems[i]); + columnNames.add(elems[i]); + if (index != null) { + sampleInDatasetIncludedInCovariates[index] = true; + ctr++; + } + } + + // read the covariate names, expect them to be on the rows + ArrayList rowNames = new ArrayList(); + elems = covariates.readLineElemsReturnReference(TextFile.tab); // first line + while (elems != null) { + rowNames.add(elems[0]); + elems = covariates.readLineElemsReturnReference(TextFile.tab); + } + covariates.close(); + + boolean isTransposed = false; + if (ctr == 0) { + System.err.println("No matching samples detected between covariate file and dataset. Maybe your covariate file needs to be transposed? Will test that for you now:"); + for (String rowName : rowNames) { + Integer index = samplesInDatasetIndex.get(rowName); + if (index != null) { + sampleInDatasetIncludedInCovariates[index] = true; + ctr++; + } + } + + if (ctr == 0) { + System.err.println("Transposing the data does not seem to resolve the issue. Please check your sample identifiers."); + System.exit(0); + } else { + System.out.println("Transposing the covariate file reveals: " + ctr + " samples present."); + isTransposed = true; + + } + + + } // if (dataset.colObjects.size() != numSamples) { // System.out.println("Covariates loaded from: " + covariatesToRemove + ", but the number of samples does not correspond! " + numSamples + " in covariates file, " + dataset.colObjects.size() + " in dataset..."); // System.out.println("Please note that missing samples will be removed from your eventual corrected --in file."); // } - if (ctr < dataset.colObjects.size()) { - System.err.println("Covariates loaded from: " + covariatesToRemove + ", but not all samples present in covariates file! " + ctr + " present in covariates file, out of " + dataset.colObjects.size() + " in dataset..."); - System.out.println("Your dataset will be adjusted accordingly."); - } - int nrCovariates = numRows; - if (isTransposed) { - nrCovariates = numCols; - } - - // make matrix with equal sample size - double[][] covariateValues = new double[nrCovariates][dataset.colObjects.size()]; - for (int row = 0; row < covariateValues.length; row++) { - for (int col = 0; col < covariateValues[row].length; col++) { - covariateValues[row][col] = Double.NaN; - } - } - - int lineCtr = 0; - covariates.open(); - String[] headerElems = covariates.readLineElemsReturnReference(TextFile.tab); // header - elems = covariates.readLineElemsReturnReference(TextFile.tab); - while (elems != null) { - if (isTransposed) { - String sampleName = elems[0]; - Integer sampleIdInDataset = samplesInDatasetIndex.get(sampleName); - if (sampleIdInDataset != null) { - for (int i = 1; i < elems.length; i++) { - try { - covariateValues[i - 1][sampleIdInDataset] = Double.parseDouble(elems[i]); - } catch (NumberFormatException e) { + if (ctr < dataset.colObjects.size()) { + System.err.println("Covariates loaded from: " + covariatesToRemove + ", but not all samples present in covariates file! " + ctr + " present in covariates file, out of " + dataset.colObjects.size() + " in dataset..."); + System.out.println("Your dataset will be adjusted accordingly."); + } + int nrCovariates = numRows; + if (isTransposed) { + nrCovariates = numCols; + } + + // make matrix with equal sample size + double[][] covariateValues = new double[nrCovariates][dataset.colObjects.size()]; + for (int row = 0; row < covariateValues.length; row++) { + for (int col = 0; col < covariateValues[row].length; col++) { + covariateValues[row][col] = Double.NaN; + } + } + + int lineCtr = 0; + covariates.open(); + String[] headerElems = covariates.readLineElemsReturnReference(TextFile.tab); // header + elems = covariates.readLineElemsReturnReference(TextFile.tab); + while (elems != null) { + if (isTransposed) { + String sampleName = elems[0]; + Integer sampleIdInDataset = samplesInDatasetIndex.get(sampleName); + if (sampleIdInDataset != null) { + for (int i = 1; i < elems.length; i++) { + try { + covariateValues[i - 1][sampleIdInDataset] = Double.parseDouble(elems[i]); + } catch (NumberFormatException e) { // System.out.println("WARNING: " + elems[i] + " is not a numeric value! in " + covariatesToRemove + " at line: " + (lineCtr + 1) + "."); // covariateValues[i - 1][sampleIdInDataset] = Double.NaN; // sampleInDatasetIncludedInCovariates[sampleIdInDataset] = false; - } - } - } - } else { - for (int i = 1; i < elems.length; i++) { - String sampleName = headerElems[i]; - Integer sampleIdInDataset = samplesInDatasetIndex.get(sampleName); - if (sampleIdInDataset != null) { - try { - covariateValues[lineCtr][sampleIdInDataset] = Double.parseDouble(elems[i]); - } catch (NumberFormatException e) { + } + } + } + } else { + for (int i = 1; i < elems.length; i++) { + String sampleName = headerElems[i]; + Integer sampleIdInDataset = samplesInDatasetIndex.get(sampleName); + if (sampleIdInDataset != null) { + try { + covariateValues[lineCtr][sampleIdInDataset] = Double.parseDouble(elems[i]); + } catch (NumberFormatException e) { // System.out.println("WARNING: " + elems[i] + " is not a numeric value at line: " + (lineCtr + 1) + "\tcolumn: " + i); - } - } - } - } - elems = covariates.readLineElemsReturnReference(TextFile.tab); - lineCtr++; - } - covariates.close(); - - // investigate how many covariates there actually is data for. - int covariateCtr = 0; - boolean[] includeCovariate = new boolean[covariateValues.length]; - for (int row = 0; row < covariateValues.length; row++) { - int nrColsFilled = 0; - for (int col = 0; col < covariateValues[row].length; col++) { - if (!Double.isNaN(covariateValues[row][col])) { - nrColsFilled++; - } - } - - if (nrColsFilled == 0) { - // there's no data for this covariate.... - includeCovariate[row] = false; - } else { - includeCovariate[row] = true; - covariateCtr++; - } - } - - if (covariateCtr == 0) { - System.err.println("ERROR: none of your covariates seem to have valid numerical values.. Please check your covariate file."); - System.exit(0); - } else { - System.out.println("After removing covariates without data, your dataset will have " + covariateCtr + " covariates (out of: " + covariateValues.length + ") ."); - } - - ArrayList covariateNames = null; - if (isTransposed) { - covariateNames = columnNames; - } else { - covariateNames = rowNames; - } - - if (covariateCtr != covariateValues.length) { - // remove covariates with missing values - System.out.println("Removing covariates that have no data at all."); - double[][] newCovariateData = new double[covariateCtr][dataset.colObjects.size()]; - ArrayList newCovariateNames = new ArrayList(); - int newCovariateCTR = 0; - for (int row = 0; row < covariateValues.length; row++) { - if (includeCovariate[row]) { - newCovariateNames.add(covariateNames.get(row)); - - for (int col = 0; col < covariateValues[row].length; col++) { - newCovariateData[newCovariateCTR][col] = covariateValues[row][col]; - - // check whether we should include all samples, but don't remove yet: sync this with the expression/whatever dastaset - if (Double.isNaN(covariateValues[row][col])) { - sampleInDatasetIncludedInCovariates[col] = false; - } - } - newCovariateCTR++; - } else { - System.out.println(covariateNames.get(row) + " removed."); - } - } - - - nrCovariates = newCovariateCTR; - covariateValues = newCovariateData; - covariateNames = newCovariateNames; - } - System.out.println(""); - System.out.println("Remaining covariates: "); - for (String s : covariateNames) { - System.out.println(s); - } - System.out.println(""); - // investigate how many samples there actually is data for. - for (int row = 0; row < covariateValues.length; row++) { - for (int col = 0; col < covariateValues[row].length; col++) { - if (Double.isNaN(covariateValues[row][col])) { - sampleInDatasetIncludedInCovariates[col] = false; - } - } - } - - int sampleCtr = 0; - for (int q = 0; q < sampleInDatasetIncludedInCovariates.length; q++) { - if (sampleInDatasetIncludedInCovariates[q]) { - sampleCtr++; - } - } - - // remove samples that have a missing value for at least one covariate + } + } + } + } + elems = covariates.readLineElemsReturnReference(TextFile.tab); + lineCtr++; + } + covariates.close(); + + // investigate how many covariates there actually is data for. + int covariateCtr = 0; + boolean[] includeCovariate = new boolean[covariateValues.length]; + for (int row = 0; row < covariateValues.length; row++) { + int nrColsFilled = 0; + for (int col = 0; col < covariateValues[row].length; col++) { + if (!Double.isNaN(covariateValues[row][col])) { + nrColsFilled++; + } + } + + if (nrColsFilled == 0) { + // there's no data for this covariate.... + includeCovariate[row] = false; + } else { + includeCovariate[row] = true; + covariateCtr++; + } + } + + if (covariateCtr == 0) { + System.err.println("ERROR: none of your covariates seem to have valid numerical values.. Please check your covariate file."); + System.exit(0); + } else { + System.out.println("After removing covariates without data, your dataset will have " + covariateCtr + " covariates (out of: " + covariateValues.length + ") ."); + } + + ArrayList covariateNames = null; + if (isTransposed) { + covariateNames = columnNames; + } else { + covariateNames = rowNames; + } + + if (covariateCtr != covariateValues.length) { + // remove covariates with missing values + System.out.println("Removing covariates that have no data at all."); + double[][] newCovariateData = new double[covariateCtr][dataset.colObjects.size()]; + ArrayList newCovariateNames = new ArrayList(); + int newCovariateCTR = 0; + for (int row = 0; row < covariateValues.length; row++) { + if (includeCovariate[row]) { + newCovariateNames.add(covariateNames.get(row)); + + for (int col = 0; col < covariateValues[row].length; col++) { + newCovariateData[newCovariateCTR][col] = covariateValues[row][col]; + + // check whether we should include all samples, but don't remove yet: sync this with the expression/whatever dastaset + if (Double.isNaN(covariateValues[row][col])) { + sampleInDatasetIncludedInCovariates[col] = false; + } + } + newCovariateCTR++; + } else { + System.out.println(covariateNames.get(row) + " removed."); + } + } + + + nrCovariates = newCovariateCTR; + covariateValues = newCovariateData; + covariateNames = newCovariateNames; + } + System.out.println(""); + System.out.println("Remaining covariates: "); + for (String s : covariateNames) { + System.out.println(s); + } + System.out.println(""); + // investigate how many samples there actually is data for. + for (int row = 0; row < covariateValues.length; row++) { + for (int col = 0; col < covariateValues[row].length; col++) { + if (Double.isNaN(covariateValues[row][col])) { + sampleInDatasetIncludedInCovariates[col] = false; + } + } + } + + int sampleCtr = 0; + for (int q = 0; q < sampleInDatasetIncludedInCovariates.length; q++) { + if (sampleInDatasetIncludedInCovariates[q]) { + sampleCtr++; + } + } + + // remove samples that have a missing value for at least one covariate // if (sampleCtr == sampleInDatasetIncludedInCovariates.length) { // System.out.println("There were no missing values or samples in your covariate file. Sample size will remain unchanged."); // DoubleMatrixDataset covariateDataset = new DoubleMatrixDataset(covariateValues, dataset.rowObjects, covariateNames); // return new Pair, DoubleMatrixDataset>(covariateDataset, dataset); // } else { - System.out.println("Your covariate corrected dataset will have " + sampleCtr + " samples, after removing samples with missing covariate values."); - double[][] rawData = dataset.getRawData(); - double[][] newRawData = new double[rawData.length][sampleCtr]; - double[][] finalCovariateData = new double[nrCovariates][sampleCtr]; - ArrayList newColObjects = new ArrayList(); - - for (int col = 0; col < dataset.colObjects.size(); col++) { - if (sampleInDatasetIncludedInCovariates[col]) { - newColObjects.add(dataset.colObjects.get(col)); - } - } - - for (int row = 0; row < rawData.length; row++) { - int includedSampleCtr = 0; - for (int col = 0; col < dataset.colObjects.size(); col++) { - if (sampleInDatasetIncludedInCovariates[col]) { - // include sample - newRawData[row][includedSampleCtr] = rawData[row][col]; - includedSampleCtr++; - } - } - } - - for (int row = 0; row < covariateValues.length; row++) { - int includedCovariateSampleCtr = 0; - for (int col = 0; col < dataset.colObjects.size(); col++) { - // replace covariate data... - if (sampleInDatasetIncludedInCovariates[col]) { - finalCovariateData[row][includedCovariateSampleCtr] = covariateValues[row][col]; - includedCovariateSampleCtr++; - } - } - } - - DoubleMatrixDataset covariateDataset = new DoubleMatrixDataset(finalCovariateData, covariateNames, newColObjects); - covariateDataset.save(covariatesToRemove + "-asLoadedByNormalizer.txt"); - DoubleMatrixDataset newDataset = new DoubleMatrixDataset(newRawData, dataset.rowObjects, newColObjects); - newDataset.save(dataset.fileName + "-SampleSizeCorrectedForCovariates.txt"); - return new Pair, DoubleMatrixDataset>(covariateDataset, newDataset); + System.out.println("Your covariate corrected dataset will have " + sampleCtr + " samples, after removing samples with missing covariate values."); + double[][] rawData = dataset.getRawData(); + double[][] newRawData = new double[rawData.length][sampleCtr]; + double[][] finalCovariateData = new double[nrCovariates][sampleCtr]; + ArrayList newColObjects = new ArrayList(); + + for (int col = 0; col < dataset.colObjects.size(); col++) { + if (sampleInDatasetIncludedInCovariates[col]) { + newColObjects.add(dataset.colObjects.get(col)); + } + } + + for (int row = 0; row < rawData.length; row++) { + int includedSampleCtr = 0; + for (int col = 0; col < dataset.colObjects.size(); col++) { + if (sampleInDatasetIncludedInCovariates[col]) { + // include sample + newRawData[row][includedSampleCtr] = rawData[row][col]; + includedSampleCtr++; + } + } + } + + for (int row = 0; row < covariateValues.length; row++) { + int includedCovariateSampleCtr = 0; + for (int col = 0; col < dataset.colObjects.size(); col++) { + // replace covariate data... + if (sampleInDatasetIncludedInCovariates[col]) { + finalCovariateData[row][includedCovariateSampleCtr] = covariateValues[row][col]; + includedCovariateSampleCtr++; + } + } + } + + DoubleMatrixDataset covariateDataset = new DoubleMatrixDataset(finalCovariateData, covariateNames, newColObjects); + covariateDataset.save(covariatesToRemove + "-asLoadedByNormalizer.txt"); + DoubleMatrixDataset newDataset = new DoubleMatrixDataset(newRawData, dataset.rowObjects, newColObjects); + newDataset.save(dataset.fileName + "-SampleSizeCorrectedForCovariates.txt"); + return new Pair, DoubleMatrixDataset>(covariateDataset, newDataset); // } - } - - private String removeProbesWithZeroVariance(DoubleMatrixDataset dataset, String outputFileNamePrefix) throws IOException { - boolean[] dataHasZeroVariance = new boolean[dataset.nrRows]; - int nrRowsWithZeroVariance = 0; - for (int row = 0; row < dataset.nrRows; row++) { - double[] data = dataset.rawData[row]; - double var = JSci.maths.ArrayMath.variance(data); - if (var == 0d) { - System.out.println("Removing probe with zero variance: " + dataset.rowObjects.get(row) + " on line " + (row + 1)); - nrRowsWithZeroVariance++; - dataHasZeroVariance[row] = true; - } - } - - if (nrRowsWithZeroVariance > 0) { - int newNrRows = dataset.nrRows - nrRowsWithZeroVariance; - if (newNrRows == 0) { - System.err.println("ERROR: all probes have zero variance!"); - System.exit(-1); - } - - - double[][] newData = new double[newNrRows][dataset.nrCols]; - int ctr = 0; - ArrayList newRowHeader = new ArrayList(); - for (int row = 0; row < dataset.nrRows; row++) { - if (!dataHasZeroVariance[row]) { - newData[ctr] = dataset.rawData[row]; - newRowHeader.add(dataset.rowObjects.get(row)); - ctr++; - } - } - - dataset.rawData = newData; - dataset.rowObjects = newRowHeader; - dataset.recalculateHashMaps(); - String outputFileName = outputFileNamePrefix + ".ProbesWithZeroVarianceRemoved"; - dataset.save(outputFileName + ".txt.gz"); - return outputFileName; - } - - return outputFileNamePrefix; - } + } + + private String removeProbesWithZeroVariance(DoubleMatrixDataset dataset, String outputFileNamePrefix) throws IOException { + boolean[] dataHasZeroVariance = new boolean[dataset.nrRows]; + int nrRowsWithZeroVariance = 0; + for (int row = 0; row < dataset.nrRows; row++) { + double[] data = dataset.rawData[row]; + double var = JSci.maths.ArrayMath.variance(data); + if (var == 0d) { + System.out.println("Removing probe with zero variance: " + dataset.rowObjects.get(row) + " on line " + (row + 1)); + nrRowsWithZeroVariance++; + dataHasZeroVariance[row] = true; + } + } + + if (nrRowsWithZeroVariance > 0) { + int newNrRows = dataset.nrRows - nrRowsWithZeroVariance; + if (newNrRows == 0) { + System.err.println("ERROR: all probes have zero variance!"); + System.exit(-1); + } + + + double[][] newData = new double[newNrRows][dataset.nrCols]; + int ctr = 0; + ArrayList newRowHeader = new ArrayList(); + for (int row = 0; row < dataset.nrRows; row++) { + if (!dataHasZeroVariance[row]) { + newData[ctr] = dataset.rawData[row]; + newRowHeader.add(dataset.rowObjects.get(row)); + ctr++; + } + } + + dataset.rawData = newData; + dataset.rowObjects = newRowHeader; + dataset.recalculateHashMaps(); + String outputFileName = outputFileNamePrefix + ".ProbesWithZeroVarianceRemoved"; + dataset.save(outputFileName + ".txt.gz"); + return outputFileName; + } + + return outputFileNamePrefix; + } } From f026c7bff214698f5a1eb909b8883efda4958c5c Mon Sep 17 00:00:00 2001 From: harmjanwestra Date: Fri, 10 Apr 2015 11:26:07 -0400 Subject: [PATCH 021/143] - Some edits to the old eQTL binary meta-analyzer tool --- .../io/trityper/bin/BinaryResultDataset.java | 474 +++++++++--------- .../bin/BinaryResultProbeSummary.java | 160 +++--- .../trityper/bin/BinaryResultSNPSummary.java | 2 +- .../probeannotation/ProbeTranslation.java | 13 +- 4 files changed, 320 insertions(+), 329 deletions(-) diff --git a/genetica-libraries/src/main/java/umcg/genetica/io/trityper/bin/BinaryResultDataset.java b/genetica-libraries/src/main/java/umcg/genetica/io/trityper/bin/BinaryResultDataset.java index 8bd3599a2..19c9c1dca 100644 --- a/genetica-libraries/src/main/java/umcg/genetica/io/trityper/bin/BinaryResultDataset.java +++ b/genetica-libraries/src/main/java/umcg/genetica/io/trityper/bin/BinaryResultDataset.java @@ -5,191 +5,188 @@ package umcg.genetica.io.trityper.bin; +import umcg.genetica.console.ConsoleGUIElems; +import umcg.genetica.console.ProgressBar; + import java.io.IOException; import java.util.HashMap; import java.util.zip.DataFormatException; -import umcg.genetica.console.ConsoleGUIElems; -import umcg.genetica.console.ProgressBar; /** - * * @author harmjan */ public class BinaryResultDataset { - private String m_name; - private String m_location; - private BinaryResultSNP[] snps; - private HashMap stringToSNP = new HashMap(); - private BinaryResultProbe[] probes; - private HashMap stringToProbe = new HashMap(); - private BinaryGZipFloatMatrix bgfm; - private int maxNrSamples; -// private long[] filepointers; - private float maxfloat = Float.MIN_VALUE; - private float minfloat = Float.MIN_VALUE; - private int numprobes; + private String m_name; + private String m_location; + private BinaryResultSNP[] snps; + private HashMap stringToSNP = new HashMap(); + private BinaryResultProbe[] probes; + private HashMap stringToProbe = new HashMap(); + private BinaryGZipFloatMatrix bgfm; + private int maxNrSamples; + // private long[] filepointers; + private float maxfloat = Float.MIN_VALUE; + private float minfloat = Float.MIN_VALUE; + private int numprobes; + + public BinaryResultDataset(String location, String name, int permutation) throws IOException { + m_location = location; + m_name = name; + System.out.println("Loading " + name + " from " + location); + if (permutation == 0) { + load(m_location + m_name + ".ProbeSummary.dat", m_location + m_name + ".SNPSummary.dat", m_location + m_name + ".ZScoreMatrix.dat"); + } else { + load(m_location + m_name + ".ProbeSummary.dat", m_location + m_name + "-PermutationRound-" + permutation + ".SNPSummary.dat", m_location + m_name + "-PermutationRound-" + permutation + ".ZScoreMatrix.dat"); + } + + } - public BinaryResultDataset(String location, String name, int permutation) throws IOException { - m_location = location; - m_name = name; - System.out.println("Loading "+name+" from "+location); - if(permutation == 0){ - load(m_location+m_name+".ProbeSummary.dat",m_location+m_name+".SNPSummary.dat",m_location+m_name+".ZScoreMatrix.dat"); - } else { - load(m_location+m_name+".ProbeSummary.dat",m_location+m_name+"-PermutationRound-"+permutation+".SNPSummary.dat",m_location+m_name+"-PermutationRound-"+permutation+".ZScoreMatrix.dat"); - } + private void load(String probesummaryloc, String snpsummaryloc, String zscoreloc) throws IOException { + System.out.println("Loading files: \n - " + probesummaryloc + "\n - " + snpsummaryloc + "\n - " + zscoreloc); + BinaryResultProbeSummary ps = new BinaryResultProbeSummary(probesummaryloc, BinaryResultProbeSummary.R); + BinaryResultSNPSummary ss = new BinaryResultSNPSummary(snpsummaryloc, BinaryResultSNPSummary.R); - } + snps = ss.readAllSNPs(); - private void load(String probesummaryloc, String snpsummaryloc, String zscoreloc) throws IOException { - System.out.println("Loading files: \n - "+probesummaryloc+"\n - "+snpsummaryloc +"\n - "+zscoreloc); - BinaryResultProbeSummary ps = new BinaryResultProbeSummary(probesummaryloc, BinaryResultProbeSummary.R); - BinaryResultSNPSummary ss = new BinaryResultSNPSummary(snpsummaryloc, BinaryResultSNPSummary.R); + probes = ps.readAllProbes(); + for (BinaryResultSNP s : snps) { + stringToSNP.put(s.getName().intern(), s); + } + for (BinaryResultProbe p : probes) { + stringToProbe.put(p.getName().intern(), p); + } + System.out.print("Dataset\t" + m_name + "\n" + ConsoleGUIElems.LINE); + System.out.println(snps.length + "\t\tSNPs read."); + System.out.println(probes.length + "\t\tProbes read."); + System.out.println(ss.getMaxNrSamples() + " samples."); + this.maxNrSamples = ss.getMaxNrSamples(); - snps = ss.readAllSNPs(); - probes = ps.readAllProbes(); + ps.close(); + ss.close(); - for(BinaryResultSNP s: snps){ - stringToSNP.put(s.getName(), s); - } - for(BinaryResultProbe p: probes){ - stringToProbe.put(p.getName(), p); - } - System.out.print("Dataset\t"+m_name+"\n"+ConsoleGUIElems.LINE); - System.out.println(snps.length+"\t\tSNPs read."); - System.out.println(probes.length+"\t\tProbes read."); - System.out.println(ss.getMaxNrSamples() +" samples."); - this.maxNrSamples = ss.getMaxNrSamples(); + bgfm = new BinaryGZipFloatMatrix(zscoreloc, BinaryGZipFloatMatrix.R); +// checkMatrix(); + numprobes = probes.length; + System.out.println(ConsoleGUIElems.LINE); + } + public void closeMatrix() throws IOException { + if (bgfm != null) { + bgfm.close(); + bgfm = null; + } + } - ps.close(); - ss.close(); + public void openMatrix(int permutation) throws IOException { + closeMatrix(); + } + /** + * @return the m_name + */ + public String getM_name() { + return m_name; + } - bgfm = new BinaryGZipFloatMatrix(zscoreloc, BinaryGZipFloatMatrix.R); -// checkMatrix(); - numprobes = probes.length; - System.out.println(ConsoleGUIElems.LINE); - } - - public void closeMatrix() throws IOException { - if(bgfm != null){ - bgfm.close(); - bgfm = null; - } - } - - - public void openMatrix(int permutation) throws IOException { - closeMatrix(); - } - - /** - * @return the m_name - */ - public String getM_name() { - return m_name; - } - - /** - * @param m_name the m_name to set - */ - public void setM_name(String m_name) { - this.m_name = m_name; - } - - /** - * @return the m_location - */ - public String getM_location() { - return m_location; - } - - /** - * @param m_location the m_location to set - */ - public void setM_location(String m_location) { - this.m_location = m_location; - } - - /** - * @return the snps - */ - public BinaryResultSNP[] getSnps() { - return snps; - } - - /** - * @param snps the snps to set - */ - public void setSnps(BinaryResultSNP[] snps) { - this.snps = snps; - } - - /** - * @return the stringToSNP - */ - public HashMap getStringToSNP() { - return stringToSNP; - } - - /** - * @param stringToSNP the stringToSNP to set - */ - public void setStringToSNP(HashMap stringToSNP) { - this.stringToSNP = stringToSNP; - } - - /** - * @return the probes - */ - public BinaryResultProbe[] getProbes() { - return probes; - } - - /** - * @param probes the probes to set - */ - public void setProbes(BinaryResultProbe[] probes) { - this.probes = probes; - } - - /** - * @return the stringToProbe - */ - public HashMap getStringToProbe() { - return stringToProbe; - } - - /** - * @param stringToProbe the stringToProbe to set - */ - public void setStringToProbe(HashMap stringToProbe) { - this.stringToProbe = stringToProbe; - } - - private void checkMatrix() throws IOException { - System.out.println("Detecting whether binary matrix corresponds to SNP and Probe definition."); - long expectedsize = (long) snps.length*probes.length; - System.out.println("Expected matrix size:\t"+expectedsize+" Z-scores"); - System.out.println("Checking matrix: "); - ProgressBar pb = new ProgressBar(snps.length); - long count = 0; - for(int i=0; i getStringToSNP() { + return stringToSNP; + } + + /** + * @param stringToSNP the stringToSNP to set + */ + public void setStringToSNP(HashMap stringToSNP) { + this.stringToSNP = stringToSNP; + } + + /** + * @return the probes + */ + public BinaryResultProbe[] getProbes() { + return probes; + } + + /** + * @param probes the probes to set + */ + public void setProbes(BinaryResultProbe[] probes) { + this.probes = probes; + } + + /** + * @return the stringToProbe + */ + public HashMap getStringToProbe() { + return stringToProbe; + } + + /** + * @param stringToProbe the stringToProbe to set + */ + public void setStringToProbe(HashMap stringToProbe) { + this.stringToProbe = stringToProbe; + } + + private void checkMatrix() throws IOException { + System.out.println("Detecting whether binary matrix corresponds to SNP and Probe definition."); + long expectedsize = (long) snps.length * probes.length; + System.out.println("Expected matrix size:\t" + expectedsize + " Z-scores"); + System.out.println("Checking matrix: "); + ProgressBar pb = new ProgressBar(snps.length); + long count = 0; + for (int i = 0; i < snps.length; i++) { + long index = snps[i].getzScoreIndex(); + long next = -1; + if (i + 1 < snps.length) { + next = snps[i + 1].getzScoreIndex(); + } + + try { + bgfm.read(index, next, probes.length); // for(int f=0; f probes = new ArrayList(); - BinaryResultProbe probe = readNextProbe(); + public BinaryResultProbe[] readAllProbes() throws IOException { + ArrayList probes = new ArrayList(); + BinaryResultProbe probe = readNextProbe(); - int ct = 0; - while (probe != null) { - probes.add(probe); - probe = readNextProbe(); - } + int ct = 0; + while (probe != null) { + probes.add(probe); + probe = readNextProbe(); + } - BinaryResultProbe[] probelist = new BinaryResultProbe[probes.size()]; - for (int p = 0; p < probelist.length; p++) { - probelist[p] = probes.get(p); + BinaryResultProbe[] probelist = new BinaryResultProbe[probes.size()]; + for (int p = 0; p < probelist.length; p++) { + probelist[p] = probes.get(p); + } + return probelist; } - return probelist; - } - - public BinaryResultProbe readNextProbe() throws IOException { - BinaryResultProbe p = null; - try { - p = new BinaryResultProbe(); - p.setId(in.readInt()); - p.setName(in.readUTF()); - p.setChr(in.readByte()); - p.setMidpoint(in.readInt()); - p.setAnnotation(in.readUTF()); - } catch (EOFException e) { - return null; + + public BinaryResultProbe readNextProbe() throws IOException { + BinaryResultProbe p = null; + try { + p = new BinaryResultProbe(); + p.setId(in.readInt()); + p.setName(in.readUTF().intern()); + p.setChr(in.readByte()); + p.setMidpoint(in.readInt()); + p.setAnnotation(in.readUTF().intern()); + } catch (EOFException e) { + return null; + } + return p; } - return p; - } } diff --git a/genetica-libraries/src/main/java/umcg/genetica/io/trityper/bin/BinaryResultSNPSummary.java b/genetica-libraries/src/main/java/umcg/genetica/io/trityper/bin/BinaryResultSNPSummary.java index 36c80a663..01bf43e3c 100644 --- a/genetica-libraries/src/main/java/umcg/genetica/io/trityper/bin/BinaryResultSNPSummary.java +++ b/genetica-libraries/src/main/java/umcg/genetica/io/trityper/bin/BinaryResultSNPSummary.java @@ -92,7 +92,7 @@ public BinaryResultSNP readNextSNP() throws IOException { byte[] alleles = new byte[2]; s = new BinaryResultSNP(); s.setId(in.readInt()); - s.setName(in.readUTF()); + s.setName(in.readUTF().intern()); s.setChr(in.readByte()); s.setChrpos(in.readInt()); s.setHwe(in.readDouble()); diff --git a/genetica-libraries/src/main/java/umcg/genetica/io/trityper/probeannotation/ProbeTranslation.java b/genetica-libraries/src/main/java/umcg/genetica/io/trityper/probeannotation/ProbeTranslation.java index abfe6da5f..252f1ee89 100644 --- a/genetica-libraries/src/main/java/umcg/genetica/io/trityper/probeannotation/ProbeTranslation.java +++ b/genetica-libraries/src/main/java/umcg/genetica/io/trityper/probeannotation/ProbeTranslation.java @@ -4,11 +4,12 @@ */ package umcg.genetica.io.trityper.probeannotation; -import java.io.IOException; -import java.util.HashMap; import umcg.genetica.io.text.TextFile; import umcg.genetica.io.trityper.util.ChrAnnotation; +import java.io.IOException; +import java.util.HashMap; + /** * * @author harmjan @@ -64,7 +65,7 @@ public void load(String probeAnnotation) throws IOException { String symbol = elems[4]; num = 0; - probeName[probeNum] = elems[0]; + probeName[probeNum] = elems[0].intern(); byte bchr = -1; try { @@ -107,7 +108,7 @@ public void load(String probeAnnotation) throws IOException { actualMappingPosition.put(probeNum, chrpos); probeChr[probeNum] = bchr; probeChrPos[probeNum] = bchrpos; - probeSymbol[probeNum] = symbol; + probeSymbol[probeNum] = symbol.intern(); for (int i = 5; i < elems.length; i++) { @@ -116,9 +117,9 @@ public void load(String probeAnnotation) throws IOException { try { String[] addresselems = arrayaddress.split(","); for (int q = 0; q < addresselems.length; q++) { - String address = addresselems[q]; + String address = addresselems[q].intern(); - oldToNewProbeAddress.put(annotationname[i - 5] + address, probeNum); + oldToNewProbeAddress.put(annotationname[i - 5] + address.intern(), probeNum); } From 994a49cc40c3b07b03d146da85cafed4711ae2d7 Mon Sep 17 00:00:00 2001 From: harmjanwestra Date: Sat, 11 Apr 2015 15:36:49 -0400 Subject: [PATCH 022/143] - Some edits to the eQTL interaction tool --- .../InteractionAnalysisConsoleGUI.java | 458 +++++++++--------- .../InteractionAnalysisMultiThreaded.java | 2 +- 2 files changed, 233 insertions(+), 227 deletions(-) diff --git a/eqtl-mapping-pipeline/src/main/java/eqtlmappingpipeline/interactionanalysis/InteractionAnalysisConsoleGUI.java b/eqtl-mapping-pipeline/src/main/java/eqtlmappingpipeline/interactionanalysis/InteractionAnalysisConsoleGUI.java index c71df26fc..c742c3153 100644 --- a/eqtl-mapping-pipeline/src/main/java/eqtlmappingpipeline/interactionanalysis/InteractionAnalysisConsoleGUI.java +++ b/eqtl-mapping-pipeline/src/main/java/eqtlmappingpipeline/interactionanalysis/InteractionAnalysisConsoleGUI.java @@ -4,258 +4,264 @@ */ package eqtlmappingpipeline.interactionanalysis; -import java.io.IOException; import umcg.genetica.console.ConsoleGUIElems; +import java.io.IOException; + /** - * * @author harm-jan */ public class InteractionAnalysisConsoleGUI { - enum RUNMODE { + enum RUNMODE { - NORMALIZE, CELLTYPESPECIFICEQTLMAPPING, PLOT - }; + NORMALIZE, CELLTYPESPECIFICEQTLMAPPING, PLOT + } - /** - * @param args the command line arguments - */ - public InteractionAnalysisConsoleGUI(String[] args) { - String inexpraw = null; - String out = null; - String celltypespecificprobefile = null; - String mdscomponents = null; - String cellcountfile = null; - String in = null; - String gte = null; - String snpprobecombofile = null; - String covariates = null; - String inexp = null; - String cohort = null; - RUNMODE step = null; - boolean binaryoutput = false; + ; - boolean robust = false; - boolean sem = false; - boolean fullStats = false; + /** + * @param args the command line arguments + */ + public InteractionAnalysisConsoleGUI(String[] args) { + String inexpraw = null; + String out = null; + String celltypespecificprobefile = null; + String mdscomponents = null; + String cellcountfile = null; + String in = null; + String gte = null; + String snpprobecombofile = null; + String covariates = null; + String inexp = null; + String cohort = null; + RUNMODE step = null; + boolean binaryoutput = false; - boolean matchCovariateNamesToExpressionProbeNames = false; - Integer nrThreads = null; - String covariateList = null; + boolean robust = false; + boolean forceNormal = false; + boolean fullStats = false; - for (int i = 0; i < args.length; i++) { - String arg = args[i]; - String val = null; + boolean matchCovariateNamesToExpressionProbeNames = false; + Integer nrThreads = null; + String covariateList = null; - if (i + 1 < args.length) { - val = args[i + 1]; - } + for (int i = 0; i < args.length; i++) { + String arg = args[i]; + String val = null; - if (arg.equals("--step")) { - if (val == null) { + if (i + 1 < args.length) { + val = args[i + 1]; + } - } else if (val.equals("normalize")) { - step = RUNMODE.NORMALIZE; - } else if (val.equals("mapeqtls")) { - step = RUNMODE.CELLTYPESPECIFICEQTLMAPPING; - } else if (val.equals("plot")) { - step = RUNMODE.PLOT; - } - } else if (arg.equals("--inexpraw")) { - inexpraw = val; - } else if (arg.equals("--covariatelist")) { - covariateList = val; - } else if (arg.equals("--binary")) { - binaryoutput = true; - } else if (arg.equals("--robust")) { - System.out.println("WARNING: using R connection!! Make sure Rserve and sandwich are installed"); - robust = true; - } else if (arg.equals("--sem")) { - System.out.println("WARNING: using R connection!! Make sure Rserve and lavaan are installed"); - sem = true; - }else if (arg.equals("--fullstats")) { - fullStats = true; - } else if (arg.equals("--covariates")) { - covariates = val; - } else if (arg.equals("--inexp")) { - inexp = val; - } else if (arg.equals("--out")) { - out = val; - } else if (arg.equals("--in")) { - in = val; - } else if (arg.equals("--celltypespecificprobes")) { - celltypespecificprobefile = val; - } else if (arg.equals("--mdscomponents")) { - mdscomponents = val; - } else if (arg.equals("--cellcounts")) { - cellcountfile = val; - } else if (arg.equals("--gte")) { - gte = val; - } else if (arg.equals("--snpprobe")) { - snpprobecombofile = val; - } else if (arg.equals("--cohort")) { - cohort = val; - } else if (arg.equals("--testMatchingCovariates")) { - matchCovariateNamesToExpressionProbeNames = true; - } else if (arg.equals("--threads")) { - try { - nrThreads = Integer.parseInt(val); - } catch (NumberFormatException e) { - System.err.println("ERROR: value supplied for --threads is not a numerical value."); - System.exit(-1); - } - if (nrThreads != null && nrThreads < 1) { - System.err.println("ERROR: value supplied for --threads is smaller than 1."); - System.exit(-1); - } + if (arg.equals("--step")) { + if (val == null) { - } - } + } else if (val.equals("normalize")) { + step = RUNMODE.NORMALIZE; + } else if (val.equals("mapeqtls")) { + step = RUNMODE.CELLTYPESPECIFICEQTLMAPPING; + } else if (val.equals("plot")) { + step = RUNMODE.PLOT; + } + } else if (arg.equals("--inexpraw")) { + inexpraw = val; + } else if (arg.equals("--covariatelist")) { + covariateList = val; + } else if (arg.equals("--binary")) { + binaryoutput = true; + } else if (arg.equals("--robust")) { + System.out.println("WARNING: using R connection!! Make sure Rserve and sandwich are installed"); + robust = true; + } else if (arg.equals("--forceNormal")) { + forceNormal = true; + } else if (arg.equals("--fullstats")) { + fullStats = true; + } else if (arg.equals("--covariates")) { + covariates = val; + } else if (arg.equals("--inexp")) { + inexp = val; + } else if (arg.equals("--out")) { + out = val; + } else if (arg.equals("--in")) { + in = val; + } else if (arg.equals("--celltypespecificprobes")) { + celltypespecificprobefile = val; + } else if (arg.equals("--mdscomponents")) { + mdscomponents = val; + } else if (arg.equals("--cellcounts")) { + cellcountfile = val; + } else if (arg.equals("--gte")) { + gte = val; + } else if (arg.equals("--snpprobe")) { + snpprobecombofile = val; + } else if (arg.equals("--cohort")) { + cohort = val; + } else if (arg.equals("--testMatchingCovariates")) { + matchCovariateNamesToExpressionProbeNames = true; + } else if (arg.equals("--threads")) { + try { + nrThreads = Integer.parseInt(val); + } catch (NumberFormatException e) { + System.err.println("ERROR: value supplied for --threads is not a numerical value."); + System.exit(-1); + } + if (nrThreads != null && nrThreads < 1) { + System.err.println("ERROR: value supplied for --threads is smaller than 1."); + System.exit(-1); + } - if (step == null) { - System.err.println("ERROR: please select the step to run."); - printUsage(); - } + } + } - try { - if (step == RUNMODE.PLOT) { - System.out.println("Interaction plotter"); - boolean kill = false; - if (covariates == null) { - System.err.println("Error: please supply --covariates"); - kill = true; - } - if (in == null) { - System.err.println("Error: please supply --in"); - kill = true; - } - if (inexp == null) { - System.err.println("Error: please supply --inexp"); - kill = true; - } - if (out == null) { - System.err.println("Error: please supply --out"); - kill = true; - } - if (kill) { - System.err.println(""); - printUsage(); - } else { - InteractionPlotter plotter = new InteractionPlotter(snpprobecombofile, in, inexp, covariates, gte, out); - } - } else { - InteractionAnalysisMultiThreaded qmt = new InteractionAnalysisMultiThreaded(); - if (step == RUNMODE.NORMALIZE) { - System.out.println("Cell type specific cis-eQTL normalization"); - boolean kill = false; - if (inexpraw == null) { - System.err.println("Error: please supply --inexpraw"); - kill = true; - } - if (out == null) { - System.err.println("Error: please supply --out"); - kill = true; - } - if (celltypespecificprobefile == null) { - System.err.println("Error: please supply --celltypespecificprobes"); - kill = true; - } - if (kill) { - System.err.println(""); - printUsage(); - } else { - qmt.prepareDataForCelltypeSpecificEQTLMapping(inexpraw, out, null, celltypespecificprobefile, mdscomponents, cellcountfile, gte, nrThreads); - } - } else if (step == RUNMODE.CELLTYPESPECIFICEQTLMAPPING) { - System.out.println("Cell type specific cis-eQTL mapping"); - boolean kill = false; - if (covariates == null) { - System.err.println("Error: please supply --covariates"); - kill = true; - } - if (inexp == null) { - System.err.println("Error: please supply --inexp"); - kill = true; - } - if (out == null) { - System.err.println("Error: please supply --out"); - kill = true; - } - if (cellcountfile == null) { + if (step == null) { + System.err.println("ERROR: please select the step to run."); + printUsage(); + } + + try { + if (step == RUNMODE.PLOT) { + System.out.println("Interaction plotter"); + boolean kill = false; + if (covariates == null) { + System.err.println("Error: please supply --covariates"); + kill = true; + } + if (in == null) { + System.err.println("Error: please supply --in"); + kill = true; + } + if (inexp == null) { + System.err.println("Error: please supply --inexp"); + kill = true; + } + if (out == null) { + System.err.println("Error: please supply --out"); + kill = true; + } + if (kill) { + System.err.println(""); + printUsage(); + } else { + InteractionPlotter plotter = new InteractionPlotter(snpprobecombofile, in, inexp, covariates, gte, out); + } + } else { + InteractionAnalysisMultiThreaded qmt = new InteractionAnalysisMultiThreaded(); + if (step == RUNMODE.NORMALIZE) { + System.out.println("Cell type specific cis-eQTL normalization"); + boolean kill = false; + if (inexpraw == null) { + System.err.println("Error: please supply --inexpraw"); + kill = true; + } + if (out == null) { + System.err.println("Error: please supply --out"); + kill = true; + } + if (celltypespecificprobefile == null) { + System.err.println("Error: please supply --celltypespecificprobes"); + kill = true; + } + if (kill) { + System.err.println(""); + printUsage(); + } else { + qmt.prepareDataForCelltypeSpecificEQTLMapping(inexpraw, out, null, celltypespecificprobefile, mdscomponents, cellcountfile, gte, nrThreads); + } + } else if (step == RUNMODE.CELLTYPESPECIFICEQTLMAPPING) { + System.out.println("Cell type specific cis-eQTL mapping"); + boolean kill = false; + if (covariates == null) { + System.err.println("Error: please supply --covariates"); + kill = true; + } + if (inexp == null) { + System.err.println("Error: please supply --inexp"); + kill = true; + } + if (out == null) { + System.err.println("Error: please supply --out"); + kill = true; + } + if (cellcountfile == null) { // System.err.println("Warning: yo please supply --cellcounts"); - //kill = true; - } - if ((binaryoutput == true) && (cohort == null)) { - System.err.println("Error: please supply --cohort (required in binary output mode)"); - kill = true; - } + //kill = true; + } + if ((binaryoutput == true) && (cohort == null)) { + System.err.println("Error: please supply --cohort (required in binary output mode)"); + kill = true; + } + + if (kill) { + System.err.println(""); + printUsage(); + } else { + qmt.runInteractionAnalysis(inexp, + covariates, + in, + gte, + snpprobecombofile, + nrThreads, + out, + covariateList, + forceNormal, + robust, + fullStats, + binaryoutput, + cohort); - if (kill) { - System.err.println(""); - printUsage(); - } else { - qmt.runInteractionAnalysis(inexp, - covariates, - in, - gte, - snpprobecombofile, - nrThreads, - out, - covariateList, sem, robust, fullStats, binaryoutput, cohort); -// qmt.runCelltypeSpecificEQTLMapping(inexppccorrected, inexpraw, in, gte, snpprobecombofile, cellcountfile, nrThreads, out, testAllCovariatesInCovariateData); - } + } - } - } - } catch (IOException e) { - e.printStackTrace(); - } catch (Exception e) { - e.printStackTrace(); - } - } + } + } + } catch (IOException e) { + e.printStackTrace(); + } catch (Exception e) { + e.printStackTrace(); + } + } - private void printUsage() { - System.out.print("\nCell type specific eQTL Mapping\n" + ConsoleGUIElems.LINE); - System.out.println("This program uses an OLS model to test eQTLs for cell type specificity."); + private void printUsage() { + System.out.print("\nCell type specific eQTL Mapping\n" + ConsoleGUIElems.LINE); + System.out.println("This program uses an OLS model to test eQTLs for cell type specificity."); - System.out.println(""); - System.out.print("Step 1: Normalization\n" + ConsoleGUIElems.LINE); - System.out.println("--step normalize\t\t\t\tTell the program to run normalization.\n" - + "--inexpraw\t\t\tdir\t\tLocation of the gene expression data\n" - + "--out\t\t\t\tdir\t\tLocation where the output should be stored\n" - + "--celltypespecificprobes\tString\t\tLocation of the file containing list of cell-type specific probes\n" - + "--mdscomponents\t\t\tString\t\tLocation of the file containing MDS components (optional)\n" - + "--gte\t\t\t\tString\t\tLocation of the genotype to expression coupling file (optional)\n" - + "--cellcounts\t\t\tString\t\tLocation of the cell count file (optional)\n"); + System.out.println(""); + System.out.print("Step 1: Normalization\n" + ConsoleGUIElems.LINE); + System.out.println("--step normalize\t\t\t\tTell the program to run normalization.\n" + + "--inexpraw\t\t\tdir\t\tLocation of the gene expression data\n" + + "--out\t\t\t\tdir\t\tLocation where the output should be stored\n" + + "--celltypespecificprobes\tString\t\tLocation of the file containing list of cell-type specific probes\n" + + "--mdscomponents\t\t\tString\t\tLocation of the file containing MDS components (optional)\n" + + "--gte\t\t\t\tString\t\tLocation of the genotype to expression coupling file (optional)\n" + + "--cellcounts\t\t\tString\t\tLocation of the cell count file (optional)\n"); - System.out.println(""); + System.out.println(""); - System.out.print("Step 2: Mapping eQTLs with interaction model\n" + ConsoleGUIElems.LINE); - System.out.println("--step mapeqtls\t\t\t\tTell the program to map eQTLs.\n" - + "--inexp\tdir\t\tLocation of the dependent dataset\n" - + "--covariates\t\tdir\t\tLocation of covariate file (may contain one or more covariates)\n" - + "--gte\t\t\tString\t\tLocation of the genotype to expression coupling file\n" - + "--in\t\t\tdir\t\tLocation of the genotype data\n" - + "--out\t\t\tdir\t\tLocation where the output should be stored\n" - + "--snpprobe\t\tString\t\tLocation of the SNP-Probe combination file\n" - + "--threads\t\tInteger\t\tThe number of threads to use for calculations.\n" - + "--covariatelist\t\tList of covariates to test\n" - + "--robust\t\tUse robust estimates of standard errors (Requires Rserve and sandwich packages, and R)\n" - + "--sem\t\tStructural equation modeling (requires RServe and Lavaan)\n" - + "--fullstats\t\tOutput extra columns of statistics (SEs and Betas)"); + System.out.print("Step 2: Mapping eQTLs with interaction model\n" + ConsoleGUIElems.LINE); + System.out.println("--step mapeqtls\t\t\t\tTell the program to map eQTLs.\n" + + "--inexp\tdir\t\tLocation of the dependent dataset\n" + + "--covariates\t\tdir\t\tLocation of covariate file (may contain one or more covariates)\n" + + "--gte\t\t\tString\t\tLocation of the genotype to expression coupling file\n" + + "--in\t\t\tdir\t\tLocation of the genotype data\n" + + "--out\t\t\tdir\t\tLocation where the output should be stored\n" + + "--snpprobe\t\tString\t\tLocation of the SNP-Probe combination file\n" + + "--threads\t\tInteger\t\tThe number of threads to use for calculations.\n" + + "--covariatelist\t\tList of covariates to test\n" + + "--robust\t\tUse robust estimates of standard errors (Requires Rserve and sandwich packages, and R)\n" + + "--forceNormal\t\tForce a normal distribution on the covariate and gene expression data.\n" + + "--fullstats\t\tOutput extra columns of statistics (SEs and Betas)"); - System.out.println(""); + System.out.println(""); - System.out.print("Step 3: Plot effects\n" + ConsoleGUIElems.LINE); - System.out.println("--step plot\t\t\t\tTell the program to plot interaction effects.\n" - + "--inexp\tdir\t\tLocation of the dependent dataset\n" - + "--covariates\t\tdir\t\tLocation of covariate file (the raw gene expression data or the matrix containing the covariates to analyze)\n" - + "--gte\t\t\tString\t\tLocation of the genotype to expression coupling file\n" - + "--in\t\t\tdir\t\tLocation of the genotype data\n" - + "--out\t\t\tdir\t\tLocation where the output should be stored\n" - + "--snpprobe\t\tString\t\tLocation of the SNP-Covariate-Probe combination file\n" - ); + System.out.print("Step 3: Plot effects\n" + ConsoleGUIElems.LINE); + System.out.println("--step plot\t\t\t\tTell the program to plot interaction effects.\n" + + "--inexp\tdir\t\tLocation of the dependent dataset\n" + + "--covariates\t\tdir\t\tLocation of covariate file (the raw gene expression data or the matrix containing the covariates to analyze)\n" + + "--gte\t\t\tString\t\tLocation of the genotype to expression coupling file\n" + + "--in\t\t\tdir\t\tLocation of the genotype data\n" + + "--out\t\t\tdir\t\tLocation where the output should be stored\n" + + "--snpprobe\t\tString\t\tLocation of the SNP-Covariate-Probe combination file\n" + ); - } + } } diff --git a/eqtl-mapping-pipeline/src/main/java/eqtlmappingpipeline/interactionanalysis/InteractionAnalysisMultiThreaded.java b/eqtl-mapping-pipeline/src/main/java/eqtlmappingpipeline/interactionanalysis/InteractionAnalysisMultiThreaded.java index 0d7f93e62..87caf1e66 100644 --- a/eqtl-mapping-pipeline/src/main/java/eqtlmappingpipeline/interactionanalysis/InteractionAnalysisMultiThreaded.java +++ b/eqtl-mapping-pipeline/src/main/java/eqtlmappingpipeline/interactionanalysis/InteractionAnalysisMultiThreaded.java @@ -577,7 +577,7 @@ public void runInteractionAnalysis(String inExpPCCorrected, String covariateFile } double[][] covariates = covariateData.getRawData(); - for (int row = 0; row < expressiondata.length; row++) { + for (int row = 0; row < covariates.length; row++) { covariates[row] = norm.forceNormal(covariates[row]); } covariateData.setRawData(covariates); From 18e5f9c5657c13bf6a1a2490d05e036c7a20d3cb Mon Sep 17 00:00:00 2001 From: Marc Jan Bonder Date: Thu, 16 Apr 2015 16:44:47 +0200 Subject: [PATCH 023/143] Change to new hashmap type --- .../main/java/umcg/genetica/io/probemapping/reading.java | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/genetica-libraries/src/main/java/umcg/genetica/io/probemapping/reading.java b/genetica-libraries/src/main/java/umcg/genetica/io/probemapping/reading.java index 25050ca48..4d6638f01 100644 --- a/genetica-libraries/src/main/java/umcg/genetica/io/probemapping/reading.java +++ b/genetica-libraries/src/main/java/umcg/genetica/io/probemapping/reading.java @@ -4,6 +4,7 @@ */ package umcg.genetica.io.probemapping; +import gnu.trove.map.hash.THashMap; import java.awt.TextField; import java.io.BufferedReader; import java.io.File; @@ -398,8 +399,8 @@ private static int getNrNs(String string) { * @param sizeMap * @return */ - public static HashMap> readAnnotationFile(String annotationFile, int storingId, int sizeMap) { - HashMap> probeInfo = new HashMap>((int) Math.ceil(sizeMap / 0.75)); + public static THashMap> readAnnotationFile(String annotationFile, int storingId, int sizeMap) { + THashMap> probeInfo = new THashMap>((int) Math.ceil(sizeMap / 0.75)); int entryId = 0; try { TextFile in = new TextFile(annotationFile, TextFile.R); @@ -411,7 +412,7 @@ public static HashMap> readAnnotationFile(String while ((str = in.readLine()) != null) { String[] strParts = SPLIT_ON_TAB.split(str); - HashMap t = new HashMap((int) Math.ceil(header.length / 0.75)); + THashMap t = new THashMap((int) Math.ceil(header.length / 0.75)); for (int i = 0; i < strParts.length; ++i) { if (i != storingId) { t.put(header[i], strParts[i]); From 59a7ac4f5b32be6ebd62c88d5825745ca134b233 Mon Sep 17 00:00:00 2001 From: Marc Jan Bonder Date: Thu, 16 Apr 2015 16:45:35 +0200 Subject: [PATCH 024/143] Fix bug in the binarymeta-analysis the results buffer was not cleared --- .../microbialmetanalysis/BinaryMicrobePcaAnalysis.java | 2 +- .../westrah/binarymetaanalyzer/BinaryMetaAnalysis.java | 9 ++++++--- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/BinaryMetaAnalyzer/src/main/java/nl/umcg/bondermj/microbialmetanalysis/BinaryMicrobePcaAnalysis.java b/BinaryMetaAnalyzer/src/main/java/nl/umcg/bondermj/microbialmetanalysis/BinaryMicrobePcaAnalysis.java index c75dbcec7..4c0fd1d3b 100644 --- a/BinaryMetaAnalyzer/src/main/java/nl/umcg/bondermj/microbialmetanalysis/BinaryMicrobePcaAnalysis.java +++ b/BinaryMetaAnalyzer/src/main/java/nl/umcg/bondermj/microbialmetanalysis/BinaryMicrobePcaAnalysis.java @@ -105,7 +105,7 @@ public void run(int bufferSize) throws IOException { loadProbeAnnotation(); for (int permutation = 0; permutation < settings.getNrPermutations() + 1; permutation++) { - finalEQTLs = new QTL[bufferSize]; + Arrays.fill(finalEQTLs, null); locationToStoreResult = 0; bufferHasOverFlown = false; maxSavedPvalue = -Double.MAX_VALUE; diff --git a/BinaryMetaAnalyzer/src/main/java/nl/umcg/westrah/binarymetaanalyzer/BinaryMetaAnalysis.java b/BinaryMetaAnalyzer/src/main/java/nl/umcg/westrah/binarymetaanalyzer/BinaryMetaAnalysis.java index 03c109bcf..83e452ab9 100644 --- a/BinaryMetaAnalyzer/src/main/java/nl/umcg/westrah/binarymetaanalyzer/BinaryMetaAnalysis.java +++ b/BinaryMetaAnalyzer/src/main/java/nl/umcg/westrah/binarymetaanalyzer/BinaryMetaAnalysis.java @@ -46,7 +46,7 @@ public static void main(String[] args) { settingsFile = args[0]; } else { - System.out.println("Usage: settings.xml replacetext replacetextwith"); + System.out.println("Usage of the binary meta-analysis: settings.xml replacetext replacetextwith"); System.exit(-1); } @@ -106,6 +106,7 @@ public void run() throws IOException { loadProbeAnnotation(); for (int permutation = 0; permutation < settings.getNrPermutations() + 1; permutation++) { + Arrays.fill(finalEQTLs, null); // create dataset objects System.out.println("Running permutation " + permutation); datasets = new BinaryMetaAnalysisDataset[settings.getDatasetlocations().size()]; @@ -245,9 +246,10 @@ public void run() throws IOException { double metaZ = ZScores.getWeightedZ(finalZScores[probe], sampleSizes); double p = Descriptives.convertZscoreToPvalue(metaZ); - if (!Double.isNaN(p)) { + if (!Double.isNaN(p) && !Double.isNaN(metaZ)) { // create output object QTL q = new QTL(p, t, snp, BaseAnnot.toByte(alleleAssessed), metaZ, BaseAnnot.toByteArray(alleles), finalZScores[probe], sampleSizes); // sort buffer if needed. + System.out.println(q.getSNPId()+"\t"+q.getMetaTrait().getMetaTraitName()+"\t"+q.toString()); addEQTL(q); } else { // if (!printed) { @@ -307,8 +309,9 @@ public void run() throws IOException { double metaAnalysisP = Descriptives.convertZscoreToPvalue(metaAnalysisZ); // create output object - if (!Double.isNaN(metaAnalysisP)) { + if (!Double.isNaN(metaAnalysisP) && !Double.isNaN(metaAnalysisZ)) { QTL q = new QTL(metaAnalysisP, t, snp, BaseAnnot.toByte(alleleAssessed), metaAnalysisZ, BaseAnnot.toByteArray(alleles), finalZScores[probe], sampleSizes); // sort buffer if needed. + System.out.println(q.getSNPId()+"\t"+q.getMetaTrait().getMetaTraitName()+"\t"+q.toString()); addEQTL(q); } } From e2c630a760039d9381eb76e9ef7830f31e9c71ce Mon Sep 17 00:00:00 2001 From: Marc Jan Bonder Date: Thu, 16 Apr 2015 16:46:13 +0200 Subject: [PATCH 025/143] Version bump --- BinaryMetaAnalyzer/pom.xml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/BinaryMetaAnalyzer/pom.xml b/BinaryMetaAnalyzer/pom.xml index 09d582d05..3eda61fff 100644 --- a/BinaryMetaAnalyzer/pom.xml +++ b/BinaryMetaAnalyzer/pom.xml @@ -7,13 +7,13 @@ 1.0.2-SNAPSHOT BinaryMetaAnalyzer - 1.0.4-SNAPSHOT + 1.0.5-SNAPSHOT jar ${project.groupId} genetica-libraries - 1.0.5 + 1.0.6 BinaryMetaAnalyzer From 9ebc397a849edf4f29a25e2b608aada18fb88654 Mon Sep 17 00:00:00 2001 From: Marc Jan Bonder Date: Thu, 16 Apr 2015 22:31:08 +0200 Subject: [PATCH 026/143] removed debug souts and added snapshot --- BinaryMetaAnalyzer/pom.xml | 4 ++-- .../umcg/westrah/binarymetaanalyzer/BinaryMetaAnalysis.java | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/BinaryMetaAnalyzer/pom.xml b/BinaryMetaAnalyzer/pom.xml index 3eda61fff..d8010dbd1 100644 --- a/BinaryMetaAnalyzer/pom.xml +++ b/BinaryMetaAnalyzer/pom.xml @@ -7,13 +7,13 @@ 1.0.2-SNAPSHOT BinaryMetaAnalyzer - 1.0.5-SNAPSHOT + 1.0.6-SNAPSHOT jar ${project.groupId} genetica-libraries - 1.0.6 + 1.0.6-SNAPSHOT BinaryMetaAnalyzer diff --git a/BinaryMetaAnalyzer/src/main/java/nl/umcg/westrah/binarymetaanalyzer/BinaryMetaAnalysis.java b/BinaryMetaAnalyzer/src/main/java/nl/umcg/westrah/binarymetaanalyzer/BinaryMetaAnalysis.java index 83e452ab9..5a66ec78e 100644 --- a/BinaryMetaAnalyzer/src/main/java/nl/umcg/westrah/binarymetaanalyzer/BinaryMetaAnalysis.java +++ b/BinaryMetaAnalyzer/src/main/java/nl/umcg/westrah/binarymetaanalyzer/BinaryMetaAnalysis.java @@ -249,7 +249,7 @@ public void run() throws IOException { if (!Double.isNaN(p) && !Double.isNaN(metaZ)) { // create output object QTL q = new QTL(p, t, snp, BaseAnnot.toByte(alleleAssessed), metaZ, BaseAnnot.toByteArray(alleles), finalZScores[probe], sampleSizes); // sort buffer if needed. - System.out.println(q.getSNPId()+"\t"+q.getMetaTrait().getMetaTraitName()+"\t"+q.toString()); +// System.out.println(q.getSNPId()+"\t"+q.getMetaTrait().getMetaTraitName()+"\t"+q.toString()); addEQTL(q); } else { // if (!printed) { @@ -311,7 +311,7 @@ public void run() throws IOException { // create output object if (!Double.isNaN(metaAnalysisP) && !Double.isNaN(metaAnalysisZ)) { QTL q = new QTL(metaAnalysisP, t, snp, BaseAnnot.toByte(alleleAssessed), metaAnalysisZ, BaseAnnot.toByteArray(alleles), finalZScores[probe], sampleSizes); // sort buffer if needed. - System.out.println(q.getSNPId()+"\t"+q.getMetaTrait().getMetaTraitName()+"\t"+q.toString()); +// System.out.println(q.getSNPId()+"\t"+q.getMetaTrait().getMetaTraitName()+"\t"+q.toString()); addEQTL(q); } } From a0a4b4ef25b2ef40b7c59af5d417603f363c8897 Mon Sep 17 00:00:00 2001 From: Bonder-MJ Date: Fri, 17 Apr 2015 09:22:45 +0200 Subject: [PATCH 027/143] fix results buffer binaryMeta --- .../BinaryMicrobePcaAnalysis.java | 10 +- .../BinaryMetaAnalysis.java | 9 +- .../metaqtl3/ResultProcessorThread.java | 782 +++++++++--------- 3 files changed, 403 insertions(+), 398 deletions(-) diff --git a/BinaryMetaAnalyzer/src/main/java/nl/umcg/bondermj/microbialmetanalysis/BinaryMicrobePcaAnalysis.java b/BinaryMetaAnalyzer/src/main/java/nl/umcg/bondermj/microbialmetanalysis/BinaryMicrobePcaAnalysis.java index 4c0fd1d3b..8279566c0 100644 --- a/BinaryMetaAnalyzer/src/main/java/nl/umcg/bondermj/microbialmetanalysis/BinaryMicrobePcaAnalysis.java +++ b/BinaryMetaAnalyzer/src/main/java/nl/umcg/bondermj/microbialmetanalysis/BinaryMicrobePcaAnalysis.java @@ -105,9 +105,7 @@ public void run(int bufferSize) throws IOException { loadProbeAnnotation(); for (int permutation = 0; permutation < settings.getNrPermutations() + 1; permutation++) { - Arrays.fill(finalEQTLs, null); - locationToStoreResult = 0; - bufferHasOverFlown = false; + clearResultsBuffer(); maxSavedPvalue = -Double.MAX_VALUE; // create dataset objects System.out.println("Running permutation " + permutation); @@ -546,4 +544,10 @@ private void writeBuffer(String outdir, int permutation) throws IOException { System.out.println( "Done."); } + + private void clearResultsBuffer() { + Arrays.fill(finalEQTLs, null); + bufferHasOverFlown = false; + locationToStoreResult=0; + } } diff --git a/BinaryMetaAnalyzer/src/main/java/nl/umcg/westrah/binarymetaanalyzer/BinaryMetaAnalysis.java b/BinaryMetaAnalyzer/src/main/java/nl/umcg/westrah/binarymetaanalyzer/BinaryMetaAnalysis.java index 5a66ec78e..85d5e8f91 100644 --- a/BinaryMetaAnalyzer/src/main/java/nl/umcg/westrah/binarymetaanalyzer/BinaryMetaAnalysis.java +++ b/BinaryMetaAnalyzer/src/main/java/nl/umcg/westrah/binarymetaanalyzer/BinaryMetaAnalysis.java @@ -106,7 +106,8 @@ public void run() throws IOException { loadProbeAnnotation(); for (int permutation = 0; permutation < settings.getNrPermutations() + 1; permutation++) { - Arrays.fill(finalEQTLs, null); + clearResultsBuffer(); + // create dataset objects System.out.println("Running permutation " + permutation); datasets = new BinaryMetaAnalysisDataset[settings.getDatasetlocations().size()]; @@ -615,4 +616,10 @@ private void writeBuffer(String outdir, int permutation) throws IOException { System.out.println( "Done."); } + + private void clearResultsBuffer() { + Arrays.fill(finalEQTLs, null); + bufferHasOverFlown = false; + locationToStoreResult=0; + } } diff --git a/eqtl-mapping-pipeline/src/main/java/eqtlmappingpipeline/metaqtl3/ResultProcessorThread.java b/eqtl-mapping-pipeline/src/main/java/eqtlmappingpipeline/metaqtl3/ResultProcessorThread.java index 332439da2..65b15aada 100644 --- a/eqtl-mapping-pipeline/src/main/java/eqtlmappingpipeline/metaqtl3/ResultProcessorThread.java +++ b/eqtl-mapping-pipeline/src/main/java/eqtlmappingpipeline/metaqtl3/ResultProcessorThread.java @@ -47,435 +47,431 @@ public class ResultProcessorThread extends Thread { // private TextFile[] zScoreBinaryFile; // private TextFile zScoreMetaAnalysisFile; // private int m_numdatasets = 0; - long nrZ = 0; - private boolean m_createBinaryFiles = false; - private TriTyperGeneticalGenomicsDataset[] m_gg = null; - private boolean m_cisOnly; - private IntMatrix2D m_probeTranslation; - private int m_midpointprobedist; - private final String m_outputdir; - private final boolean m_permuting; - private final int m_permutationround; - private final boolean m_createTEXTFiles; - private final String[] m_probeList; - private final LinkedBlockingQueue m_queue; - private final WorkPackage[] m_availableWorkPackages; - private long nrTestsPerformed = 0; - private QTL[] finalEQTLs; - private double maxSavedPvalue = -Double.MAX_VALUE; - private int locationToStoreResult = 0; - private boolean bufferHasOverFlown = false; - private boolean sorted = false; - private int m_maxResults = 0; - public double highestP = Double.MAX_VALUE; - private int nrSNPsTested = 0; - private final boolean m_useAbsoluteZScore; - private BinaryFile[] zScoreBinaryFile; - private BinaryFile zScoreMetaAnalysisFile; - private TextFile zScoreMetaAnalysisRowNamesFile; - private TextFile[] zScoreRowNamesFile; - - public ResultProcessorThread(int nrThreads, LinkedBlockingQueue queue, boolean chargeOutput, - TriTyperGeneticalGenomicsDataset[] gg, Settings settings, IntMatrix2D pprobeTranslation, - boolean permuting, int round, String[] snplist, String[] probelist, WorkPackage[] allPackages) { - m_availableWorkPackages = allPackages; - m_createBinaryFiles = settings.createBinaryOutputFiles; - m_createTEXTFiles = settings.createTEXTOutputFiles; - m_useAbsoluteZScore = settings.useAbsoluteZScorePValue; - m_queue = queue; - m_outputdir = settings.outputReportsDir; - m_permuting = permuting; - m_permutationround = round; - m_probeTranslation = pprobeTranslation; - m_gg = gg; - m_midpointprobedist = settings.ciseQTLAnalysMaxSNPProbeMidPointDistance; - m_cisOnly = (settings.cisAnalysis && !settings.transAnalysis); - - m_probeList = probelist; - m_maxResults = settings.maxNrMostSignificantEQTLs; - - int tmpbuffersize = (m_maxResults / 10); - - if (tmpbuffersize == 0) { - tmpbuffersize = 10; - } else if (tmpbuffersize > 250000) { - tmpbuffersize = 250000; - } + long nrZ = 0; + private boolean m_createBinaryFiles = false; + private TriTyperGeneticalGenomicsDataset[] m_gg = null; + private boolean m_cisOnly; + private IntMatrix2D m_probeTranslation; + private int m_midpointprobedist; + private final String m_outputdir; + private final boolean m_permuting; + private final int m_permutationround; + private final boolean m_createTEXTFiles; + private final String[] m_probeList; + private final LinkedBlockingQueue m_queue; + private final WorkPackage[] m_availableWorkPackages; + private long nrTestsPerformed = 0; + private QTL[] finalEQTLs; + private double maxSavedPvalue = -Double.MAX_VALUE; + private int locationToStoreResult = 0; + private boolean bufferHasOverFlown = false; + private boolean sorted = false; + private int m_maxResults = 0; + public double highestP = Double.MAX_VALUE; + private int nrSNPsTested = 0; + private final boolean m_useAbsoluteZScore; + private BinaryFile[] zScoreBinaryFile; + private BinaryFile zScoreMetaAnalysisFile; + private TextFile zScoreMetaAnalysisRowNamesFile; + private TextFile[] zScoreRowNamesFile; + + public ResultProcessorThread(int nrThreads, LinkedBlockingQueue queue, boolean chargeOutput, + TriTyperGeneticalGenomicsDataset[] gg, Settings settings, IntMatrix2D pprobeTranslation, + boolean permuting, int round, String[] snplist, String[] probelist, WorkPackage[] allPackages) { + m_availableWorkPackages = allPackages; + m_createBinaryFiles = settings.createBinaryOutputFiles; + m_createTEXTFiles = settings.createTEXTOutputFiles; + m_useAbsoluteZScore = settings.useAbsoluteZScorePValue; + m_queue = queue; + m_outputdir = settings.outputReportsDir; + m_permuting = permuting; + m_permutationround = round; + m_probeTranslation = pprobeTranslation; + m_gg = gg; + m_midpointprobedist = settings.ciseQTLAnalysMaxSNPProbeMidPointDistance; + m_cisOnly = (settings.cisAnalysis && !settings.transAnalysis); + + m_probeList = probelist; + m_maxResults = settings.maxNrMostSignificantEQTLs; + + int tmpbuffersize = (m_maxResults / 10); + + if (tmpbuffersize == 0) { + tmpbuffersize = 10; + } else if (tmpbuffersize > 250000) { + tmpbuffersize = 250000; + } // m_totalNumberOfProbes = probelist.length; // m_pvaluePlotThreshold = settings.plotOutputPValueCutOff; // tmpEQTLBuffer = new QTL[tmpbuffersize]; // m_result_counter = 0; // m_numdatasets = m_gg.length; - finalEQTLs = new QTL[(m_maxResults + tmpbuffersize)]; - nrSNPsTested = 0; - } + finalEQTLs = new QTL[(m_maxResults + tmpbuffersize)]; + nrSNPsTested = 0; + } - @Override - public void run() { + @Override + public void run() { // nrProcessed = 0; - try { - if (m_createBinaryFiles) { - zScoreBinaryFile = new BinaryFile[m_gg.length]; - zScoreRowNamesFile = new TextFile[m_gg.length]; - if (m_gg.length > 1) { - String metaAnalysisFileName = m_outputdir + "MetaAnalysis"; - if (m_permuting) { - metaAnalysisFileName += "-PermutationRound-" + m_permutationround; - } - zScoreMetaAnalysisFile = new BinaryFile(metaAnalysisFileName + ".dat", BinaryFile.W); - // write magic number - if (m_cisOnly) { - zScoreMetaAnalysisFile.writeInt(1); - } else { - zScoreMetaAnalysisFile.writeInt(0); - } - - zScoreMetaAnalysisRowNamesFile = new TextFile(metaAnalysisFileName + "-RowNames.txt.gz", TextFile.W); - zScoreMetaAnalysisRowNamesFile.writeln("SNP\tAlleles\tMinorAllele\tAlleleAssessed\tNrCalled"); - TextFile tf = new TextFile(metaAnalysisFileName + "-ColNames.txt.gz", TextFile.W); - tf.writeList(Arrays.asList(m_probeList)); - tf.close(); - } - for (int d = 0; d < m_gg.length; d++) { - String fileName = m_outputdir + m_gg[d].getSettings().name; - if (m_permuting) { - fileName += "-PermutationRound-" + m_permutationround; - } - zScoreBinaryFile[d] = new BinaryFile(fileName + ".dat", BinaryFile.W); - // write magic number - if (m_cisOnly) { - zScoreBinaryFile[d].writeInt(1); - } else { - zScoreBinaryFile[d].writeInt(0); - } - - TextFile tf = new TextFile(fileName + "-ColNames.txt.gz", TextFile.W); - tf.writeList(Arrays.asList(m_probeList)); - tf.close(); - zScoreRowNamesFile[d] = new TextFile(fileName + "-RowNames.txt.gz", TextFile.W); - zScoreRowNamesFile[d].writeln("SNP\tAlleles\tMinorAllele\tAlleleAssessed\tNrCalled\tMaf\tHWE\tCallRate"); - } - } - - ProgressBar progressbar = new ProgressBar(m_availableWorkPackages.length); - boolean poison = false; - - while (!poison) { - WorkPackage wp = m_queue.take(); - Result r = wp.results; - if (wp.getHasResults()) { - nrSNPsTested++; - } - - if (r.poison) { - poison = true; - } else if (r.pvalues != null) { - - nrTestsPerformed += wp.getNumTested(); - - double[] pvalues = r.pvalues; - - //Is this working? - if (m_createBinaryFiles && !poison) { - writeBinaryResult(r); - } - - if (m_createTEXTFiles && !poison) { - // classic textual output. - - for (int p = 0; p < pvalues.length; p++) { - double pval = pvalues[p]; - - if (!Double.isNaN(pval) && pval <= highestP) { - double[][] corr = r.correlations; - double[] correlations = new double[corr.length]; - double[] zscores = new double[corr.length]; - int[] samples = new int[corr.length]; - - double[] fc = new double[corr.length]; - double[] beta = new double[corr.length]; - double[] betase = new double[corr.length]; - - for (int d = 0; d < correlations.length; d++) { - if (Double.isNaN(corr[d][p])) { - correlations[d] = Double.NaN; - zscores[d] = Double.NaN; - samples[d] = -9; - fc[d] = Double.NaN; - beta[d] = Double.NaN; - betase[d] = Double.NaN; - } else { - correlations[d] = corr[d][p]; - if (m_useAbsoluteZScore) { - zscores[d] = Math.abs(r.zscores[d][p]); - } else { - zscores[d] = r.zscores[d][p]; - } - - samples[d] = r.numSamples[d]; - fc[d] = r.fc[d][p]; - beta[d] = r.beta[d][p]; - betase[d] = r.se[d][p]; - } - } + try { + if (m_createBinaryFiles) { + zScoreBinaryFile = new BinaryFile[m_gg.length]; + zScoreRowNamesFile = new TextFile[m_gg.length]; + if (m_gg.length > 1) { + String metaAnalysisFileName = m_outputdir + "MetaAnalysis"; + if (m_permuting) { + metaAnalysisFileName += "-PermutationRound-" + m_permutationround; + } + zScoreMetaAnalysisFile = new BinaryFile(metaAnalysisFileName + ".dat", BinaryFile.W); + // write magic number + if (m_cisOnly) { + zScoreMetaAnalysisFile.writeInt(1); + } else { + zScoreMetaAnalysisFile.writeInt(0); + } + + zScoreMetaAnalysisRowNamesFile = new TextFile(metaAnalysisFileName + "-RowNames.txt.gz", TextFile.W); + zScoreMetaAnalysisRowNamesFile.writeln("SNP\tAlleles\tMinorAllele\tAlleleAssessed\tNrCalled"); + TextFile tf = new TextFile(metaAnalysisFileName + "-ColNames.txt.gz", TextFile.W); + tf.writeList(Arrays.asList(m_probeList)); + tf.close(); + } + for (int d = 0; d < m_gg.length; d++) { + String fileName = m_outputdir + m_gg[d].getSettings().name; + if (m_permuting) { + fileName += "-PermutationRound-" + m_permutationround; + } + zScoreBinaryFile[d] = new BinaryFile(fileName + ".dat", BinaryFile.W); + // write magic number + if (m_cisOnly) { + zScoreBinaryFile[d].writeInt(1); + } else { + zScoreBinaryFile[d].writeInt(0); + } + + TextFile tf = new TextFile(fileName + "-ColNames.txt.gz", TextFile.W); + tf.writeList(Arrays.asList(m_probeList)); + tf.close(); + zScoreRowNamesFile[d] = new TextFile(fileName + "-RowNames.txt.gz", TextFile.W); + zScoreRowNamesFile[d].writeln("SNP\tAlleles\tMinorAllele\tAlleleAssessed\tNrCalled\tMaf\tHWE\tCallRate"); + } + } + + ProgressBar progressbar = new ProgressBar(m_availableWorkPackages.length); + boolean poison = false; + + while (!poison) { + WorkPackage wp = m_queue.take(); + Result r = wp.results; + if (wp.getHasResults()) { + nrSNPsTested++; + } + + if (r.poison) { + poison = true; + } else if (r.pvalues != null) { + + nrTestsPerformed += wp.getNumTested(); + + double[] pvalues = r.pvalues; + + //Is this working? + if (m_createBinaryFiles && !poison) { + writeBinaryResult(r); + } + + if (m_createTEXTFiles && !poison) { + // classic textual output. + + for (int p = 0; p < pvalues.length; p++) { + double pval = pvalues[p]; + + if (!Double.isNaN(pval) && pval <= highestP) { + double[][] corr = r.correlations; + double[] correlations = new double[corr.length]; + double[] zscores = new double[corr.length]; + int[] samples = new int[corr.length]; + + double[] fc = new double[corr.length]; + double[] beta = new double[corr.length]; + double[] betase = new double[corr.length]; + + for (int d = 0; d < correlations.length; d++) { + if (Double.isNaN(corr[d][p])) { + correlations[d] = Double.NaN; + zscores[d] = Double.NaN; + samples[d] = -9; + fc[d] = Double.NaN; + beta[d] = Double.NaN; + betase[d] = Double.NaN; + } else { + correlations[d] = corr[d][p]; + if (m_useAbsoluteZScore) { + zscores[d] = Math.abs(r.zscores[d][p]); + } else { + zscores[d] = r.zscores[d][p]; + } + + samples[d] = r.numSamples[d]; + fc[d] = r.fc[d][p]; + beta[d] = r.beta[d][p]; + betase[d] = r.se[d][p]; + } + } // - byte allele = -1; - byte[] alleles = null; - SNP[] snps = wp.getSnps(); - for (int d = 0; d < snps.length; d++) { - if (snps[d] != null) { - allele = snps[d].getMinorAllele(); - alleles = snps[d].getAlleles(); - break; - } - } + byte allele = -1; + byte[] alleles = null; + SNP[] snps = wp.getSnps(); + for (int d = 0; d < snps.length; d++) { + if (snps[d] != null) { + allele = snps[d].getMinorAllele(); + alleles = snps[d].getAlleles(); + break; + } + } + + if (alleles == null) { + System.err.println("SNP has null alleles: "); + for (int d = 0; d < snps.length; d++) { + + if (snps[d] != null) { + + allele = snps[d].getMinorAllele(); + System.err.println(allele); + alleles = snps[d].getAlleles(); + System.err.println(alleles); + break; + } + } + } + + double Zfinal = r.finalZScore[p]; + double finalbeta = r.finalBeta[p]; + double finalbetase = r.finalBetaSe[p]; + int pid; + if (m_cisOnly) { + pid = wp.getProbes()[p]; + } else { + pid = p; + } + + addEQTL(pid, wp.getId(), pval, Zfinal, correlations, zscores, samples, alleles, allele, fc, beta, betase, finalbeta, finalbetase); + + } + } + } - if (alleles == null) { - System.err.println("SNP has null alleles: "); - for (int d = 0; d < snps.length; d++) { + } - if (snps[d] != null) { + if (wp.results != null) { + wp.clearResults(); - allele = snps[d].getMinorAllele(); - System.err.println(allele); - alleles = snps[d].getAlleles(); - System.err.println(alleles); - break; - } - } - } + } - double Zfinal = r.finalZScore[p]; - double finalbeta = r.finalBeta[p]; - double finalbetase = r.finalBetaSe[p]; - int pid; - if (m_cisOnly) { - pid = wp.getProbes()[p]; - } else { - pid = p; - } + progressbar.iterate(); + } - addEQTL(pid, wp.getId(), pval, Zfinal, correlations, zscores, samples, alleles, allele, fc, beta, betase, finalbeta, finalbetase); + progressbar.close(); - } - } - } + //Is this working? + if (m_createBinaryFiles) { - } + String fileName = "check"; + if (m_permuting) { + fileName += "-PermutationRound-" + m_permutationround; + } + fileName += ".md5"; - if (wp.results != null) { - wp.clearResults(); + HexBinaryAdapter md5Parser = new HexBinaryAdapter(); - } + BufferedWriter md5writer = new BufferedWriter(new FileWriter(m_outputdir + fileName)); - progressbar.iterate(); - } + for (int d = 0; d < m_gg.length; d++) { + zScoreBinaryFile[d].close(); - progressbar.close(); + fileName = m_gg[d].getSettings().name; + if (m_permuting) { + fileName += "-PermutationRound-" + m_permutationround; + } + fileName += ".dat"; + md5writer.write(md5Parser.marshal(zScoreBinaryFile[d].getWrittenHash()) + " " + fileName + '\n'); + zScoreRowNamesFile[d].close(); + } + if (m_gg.length > 1) { + zScoreMetaAnalysisFile.close(); + fileName = "MetaAnalysis"; + if (m_permuting) { + fileName += "-PermutationRound-" + m_permutationround; + } + fileName += ".dat"; + md5writer.write(md5Parser.marshal(zScoreMetaAnalysisFile.getWrittenHash()) + " " + fileName + '\n'); + zScoreMetaAnalysisRowNamesFile.close(); + } - //Is this working? - if (m_createBinaryFiles) { + md5writer.close(); - String fileName = "check"; - if (m_permuting) { - fileName += "-PermutationRound-" + m_permutationround; - } - fileName += ".md5"; - - HexBinaryAdapter md5Parser = new HexBinaryAdapter(); + } - BufferedWriter md5writer = new BufferedWriter(new FileWriter(m_outputdir + fileName)); + if (m_createTEXTFiles) { + if (!sorted) { + if (locationToStoreResult != 0) { - for (int d = 0; d < m_gg.length; d++) { - zScoreBinaryFile[d].close(); + Arrays.sort(finalEQTLs, 0, locationToStoreResult); +// SmoothSort.sort(finalEQTLs, 0, locationToStoreResult); +// inplaceArrayQuickSort.sort(finalEQTLs, 0, locationToStoreResult); - fileName = m_gg[d].getSettings().name; - if (m_permuting) { - fileName += "-PermutationRound-" + m_permutationround; - } - fileName += ".dat"; - md5writer.write(md5Parser.marshal(zScoreBinaryFile[d].getWrittenHash()) + " " + fileName + '\n'); + } + } + writeTextResults(); + } - zScoreRowNamesFile[d].close(); - } - if (m_gg.length > 1) { - zScoreMetaAnalysisFile.close(); + } catch (IOException e1) { + e1.printStackTrace(); + } catch (InterruptedException e2) { + e2.printStackTrace(); + } + } - fileName = "MetaAnalysis"; - if (m_permuting) { - fileName += "-PermutationRound-" + m_permutationround; - } - fileName += ".dat"; - md5writer.write(md5Parser.marshal(zScoreMetaAnalysisFile.getWrittenHash()) + " " + fileName + '\n'); + private void writeBinaryResult(Result r) throws IOException { - zScoreMetaAnalysisRowNamesFile.close(); - } + if (r != null) { + int[] numSamples = null; + try { + numSamples = r.numSamples; + } catch (NullPointerException e) { + System.out.println("ERROR: null result?"); + } - md5writer.close(); + int wpId = r.wpid; + WorkPackage currentWP = m_availableWorkPackages[wpId]; + double[][] zscores = r.zscores; + + if (zscores != null) { + SNP[] snps = currentWP.getSnps(); + int numDatasets = zscores.length; + double[] finalZscores = r.finalZScore; + String snpoutput = null; + + // if we're doing a meta-analysis, write the meta-analysis Z to a separate binaryFile + if (m_gg.length > 1) { + int totalSampleNr = 0; + String snpname = null; + for (int d = 0; d < numDatasets; d++) { + if (snps[d] != null) { + snpname = snps[d].getName(); + + byte[] alleles = snps[d].getAlleles(); + byte minorAllele = snps[d].getMinorAllele(); + byte alleleassessed = alleles[1]; + + if (currentWP.getFlipSNPAlleles()[d]) { + alleleassessed = alleles[0]; + } + if (snpoutput == null) { + snpoutput = snpname + "\t" + BaseAnnot.getAllelesDescription(alleles) + "\t" + BaseAnnot.toString(minorAllele) + "\t" + BaseAnnot.toString(alleleassessed); + } + totalSampleNr += r.numSamples[d]; + } + } + + StringBuilder sb = null; + for (int p = 0; p < finalZscores.length; p++) { + float z = (float) finalZscores[p]; + if (m_cisOnly) { + int[] probes = currentWP.getProbes(); + int probeId = probes[p]; + String probeName = m_probeList[probeId]; + if (sb == null) { + sb = new StringBuilder(); + } else { + sb.append("\t"); + } + sb.append(probeName); + + zScoreMetaAnalysisFile.writeFloat(z); + } else { + zScoreMetaAnalysisFile.writeFloat(z); + } + } + + if (sb != null) { + zScoreMetaAnalysisRowNamesFile.writeln(snpoutput + "\t" + totalSampleNr + "\t-\t-\t-\t" + finalZscores.length + "\t" + sb.toString()); + } else { + zScoreMetaAnalysisRowNamesFile.writeln(snpoutput + "\t" + totalSampleNr + "\t-\t-\t-\t" + finalZscores.length + "\t-"); + } + } + for (int d = 0; d < numDatasets; d++) { + double[] datasetZScores = zscores[d]; + SNP datasetSNP = snps[d]; + if (datasetSNP != null) { + BinaryFile outfile = zScoreBinaryFile[d]; + + String snpname = datasetSNP.getName(); + + byte[] alleles = datasetSNP.getAlleles(); + byte minorAllele = datasetSNP.getMinorAllele(); + byte alleleassessed = alleles[1]; + double hwe = datasetSNP.getHWEP(); + double cr = datasetSNP.getCR(); + double maf = datasetSNP.getMAF(); + + if (currentWP.getFlipSNPAlleles()[d]) { + alleleassessed = alleles[0]; + } + TextFile snpfile = zScoreRowNamesFile[d]; + StringBuilder sb = null; + for (int p = 0; p < datasetZScores.length; p++) { + float z = (float) datasetZScores[p]; + if (currentWP.getFlipSNPAlleles()[d]) { + z *= -1; + } + // System.out.println(p + "\t" + alleleassessed + "\t" + m_probeList[p] + "\t" + z + "\t" + currentWP.getFlipSNPAlleles()[d]); + if (m_cisOnly) { + // take into account that not all probes have been tested.. + int[] probes = currentWP.getProbes(); + int probeId = probes[p]; + String probeName = m_probeList[probeId]; + outfile.writeFloat(z); + if (sb == null) { + sb = new StringBuilder(); + } else { + sb.append("\t"); + } + sb.append(probeName); + } else { + outfile.writeFloat(z); + } + } + + if (sb != null) { + snpfile.writeln(snpname + "\t" + BaseAnnot.getAllelesDescription(alleles) + "\t" + BaseAnnot.toString(minorAllele) + "\t" + BaseAnnot.toString(alleleassessed) + "\t" + datasetSNP.getNrCalled() + "\t" + maf + "\t" + hwe + "\t" + cr + "\t" + datasetZScores.length + "\t" + sb.toString()); + } else { + snpfile.writeln(snpname + "\t" + BaseAnnot.getAllelesDescription(alleles) + "\t" + BaseAnnot.toString(minorAllele) + "\t" + BaseAnnot.toString(alleleassessed) + "\t" + datasetSNP.getNrCalled() + "\t" + maf + "\t" + hwe + "\t" + cr + "\t" + datasetZScores.length + "\t-"); + } + + } + } + } + } + } - } + private void addEQTL(int pid, int sid, double pval, double zscore, double[] correlations, double[] zscores, int[] numSamples, byte[] alleles, byte assessedAllele, double[] fc, double[] beta, double[] betase, double finalbeta, double finalbetase) { + if (bufferHasOverFlown) { + if (pval <= maxSavedPvalue) { - if (m_createTEXTFiles) { - if (!sorted) { - if (locationToStoreResult != 0) { + sorted = false; - Arrays.sort(finalEQTLs, 0, locationToStoreResult); -// SmoothSort.sort(finalEQTLs, 0, locationToStoreResult); -// inplaceArrayQuickSort.sort(finalEQTLs, 0, locationToStoreResult); + finalEQTLs[locationToStoreResult] = new QTL(pval, pid, sid, assessedAllele, zscore, alleles, zscores, numSamples, correlations, fc, beta, betase, finalbeta, finalbetase); + locationToStoreResult++; + + if (locationToStoreResult == finalEQTLs.length) { - } - } - writeTextResults(); - } - - } catch (IOException e1) { - e1.printStackTrace(); - } catch (InterruptedException e2) { - e2.printStackTrace(); - } - } - - private void writeBinaryResult(Result r) throws IOException { - - if (r != null) { - int[] numSamples = null; - try { - numSamples = r.numSamples; - } catch (NullPointerException e) { - System.out.println("ERROR: null result?"); - } - - int wpId = r.wpid; - WorkPackage currentWP = m_availableWorkPackages[wpId]; - double[][] zscores = r.zscores; - - if (zscores != null) { - SNP[] snps = currentWP.getSnps(); - int numDatasets = zscores.length; - double[] finalZscores = r.finalZScore; - String snpoutput = null; - - // if we're doing a meta-analysis, write the meta-analysis Z to a separate binaryFile - if (m_gg.length > 1) { - int totalSampleNr = 0; - String snpname = null; - for (int d = 0; d < numDatasets; d++) { - if (snps[d] != null) { - snpname = snps[d].getName(); - - byte[] alleles = snps[d].getAlleles(); - byte minorAllele = snps[d].getMinorAllele(); - byte alleleassessed = alleles[1]; - - if (currentWP.getFlipSNPAlleles()[d]) { - alleleassessed = alleles[0]; - } - if (snpoutput == null) { - snpoutput = snpname + "\t" + BaseAnnot.getAllelesDescription(alleles) + "\t" + BaseAnnot.toString(minorAllele) + "\t" + BaseAnnot.toString(alleleassessed); - } - totalSampleNr += r.numSamples[d]; - } - } - - StringBuilder sb = null; - for (int p = 0; p < finalZscores.length; p++) { - float z = (float) finalZscores[p]; - if (m_cisOnly) { - int[] probes = currentWP.getProbes(); - int probeId = probes[p]; - String probeName = m_probeList[probeId]; - if (sb == null) { - sb = new StringBuilder(); - } else { - sb.append("\t"); - } - sb.append(probeName); - - zScoreMetaAnalysisFile.writeFloat(z); - } else { - zScoreMetaAnalysisFile.writeFloat(z); - } - } - - if (sb != null) { - zScoreMetaAnalysisRowNamesFile.writeln(snpoutput + "\t" + totalSampleNr + "\t-\t-\t-\t" + finalZscores.length + "\t" + sb.toString()); - } else { - zScoreMetaAnalysisRowNamesFile.writeln(snpoutput + "\t" + totalSampleNr + "\t-\t-\t-\t" + finalZscores.length + "\t-"); - } - } - for (int d = 0; d < numDatasets; d++) { - double[] datasetZScores = zscores[d]; - SNP datasetSNP = snps[d]; - if (datasetSNP != null) { - BinaryFile outfile = zScoreBinaryFile[d]; - - String snpname = datasetSNP.getName(); - - byte[] alleles = datasetSNP.getAlleles(); - byte minorAllele = datasetSNP.getMinorAllele(); - byte alleleassessed = alleles[1]; - double hwe = datasetSNP.getHWEP(); - double cr = datasetSNP.getCR(); - double maf = datasetSNP.getMAF(); - - if (currentWP.getFlipSNPAlleles()[d]) { - alleleassessed = alleles[0]; - } - TextFile snpfile = zScoreRowNamesFile[d]; - StringBuilder sb = null; - for (int p = 0; p < datasetZScores.length; p++) { - float z = (float) datasetZScores[p]; - if (currentWP.getFlipSNPAlleles()[d]) { - z *= -1; - } - // System.out.println(p + "\t" + alleleassessed + "\t" + m_probeList[p] + "\t" + z + "\t" + currentWP.getFlipSNPAlleles()[d]); - if (m_cisOnly) { - // take into account that not all probes have been tested.. - int[] probes = currentWP.getProbes(); - int probeId = probes[p]; - String probeName = m_probeList[probeId]; - outfile.writeFloat(z); - if (sb == null) { - sb = new StringBuilder(); - } else { - sb.append("\t"); - } - sb.append(probeName); - } else { - outfile.writeFloat(z); - } - } - - if (sb != null) { - snpfile.writeln(snpname + "\t" + BaseAnnot.getAllelesDescription(alleles) + "\t" + BaseAnnot.toString(minorAllele) + "\t" + BaseAnnot.toString(alleleassessed) + "\t" + datasetSNP.getNrCalled() + "\t" + maf + "\t" + hwe + "\t" + cr + "\t" + datasetZScores.length + "\t" + sb.toString()); - } else { - snpfile.writeln(snpname + "\t" + BaseAnnot.getAllelesDescription(alleles) + "\t" + BaseAnnot.toString(minorAllele) + "\t" + BaseAnnot.toString(alleleassessed) + "\t" + datasetSNP.getNrCalled() + "\t" + maf + "\t" + hwe + "\t" + cr + "\t" + datasetZScores.length + "\t-"); - } - - } - } - } - } - } - - private void addEQTL(int pid, int sid, double pval, double zscore, double[] correlations, double[] zscores, int[] numSamples, byte[] alleles, byte assessedAllele, double[] fc, double[] beta, double[] betase, double finalbeta, double finalbetase) { - - if (bufferHasOverFlown) { - if (pval <= maxSavedPvalue) { - - sorted = false; - - finalEQTLs[locationToStoreResult] = new QTL(pval, pid, sid, assessedAllele, zscore, alleles, zscores, numSamples, correlations, fc, beta, betase, finalbeta, finalbetase); - locationToStoreResult++; - - if (locationToStoreResult == finalEQTLs.length) { - - Arrays.sort(finalEQTLs); + Arrays.sort(finalEQTLs); // SmoothSort.sort(finalEQTLs); // inplaceArrayQuickSort.sort(finalEQTLs); sorted = true; @@ -507,8 +503,6 @@ private void writeTextResults() throws IOException { System.out.println("Writing " + nrOfEntriesToWrite + " results out of " + nrTestsPerformed + " tests performed. " + nrSNPsTested + " SNPs finally tested."); - - if (m_permuting) { TextFile gz = new TextFile((m_outputdir + "PermutedEQTLsPermutationRound" + m_permutationround + ".txt.gz"), TextFile.W); gz.writeln("PValue\tSNP\tProbe\tGene\tAlleles\tAlleleAssessed\tZScore"); From e1b5f7213495d1faa5c6824c4d82f76948b4fc5b Mon Sep 17 00:00:00 2001 From: harmjanwestra Date: Mon, 4 May 2015 14:13:12 -0400 Subject: [PATCH 028/143] edits --- .../BinaryMetaAnalysis.java | 6 + .../eqtlmappingpipeline/binarymeta/Main.java | 4 +- .../meta/MetaAnalysisResultThread.java | 10 +- .../binarymeta/meta/MetaAnalyze.java | 813 ++++++++------- .../binarymeta/meta/MetaSettings.java | 975 +++++++++--------- .../main/java/umcg/genetica/text/Strings.java | 7 +- 6 files changed, 917 insertions(+), 898 deletions(-) diff --git a/BinaryMetaAnalyzer/src/main/java/nl/umcg/westrah/binarymetaanalyzer/BinaryMetaAnalysis.java b/BinaryMetaAnalyzer/src/main/java/nl/umcg/westrah/binarymetaanalyzer/BinaryMetaAnalysis.java index 5a66ec78e..8d5b54bbe 100644 --- a/BinaryMetaAnalyzer/src/main/java/nl/umcg/westrah/binarymetaanalyzer/BinaryMetaAnalysis.java +++ b/BinaryMetaAnalyzer/src/main/java/nl/umcg/westrah/binarymetaanalyzer/BinaryMetaAnalysis.java @@ -106,7 +106,13 @@ public void run() throws IOException { loadProbeAnnotation(); for (int permutation = 0; permutation < settings.getNrPermutations() + 1; permutation++) { + + // reinitialize buffer Arrays.fill(finalEQTLs, null); + locationToStoreResult = 0; + maxSavedPvalue = -Double.MAX_VALUE; + bufferHasOverFlown = false; + // create dataset objects System.out.println("Running permutation " + permutation); datasets = new BinaryMetaAnalysisDataset[settings.getDatasetlocations().size()]; diff --git a/eqtl-mapping-pipeline/src/main/java/eqtlmappingpipeline/binarymeta/Main.java b/eqtl-mapping-pipeline/src/main/java/eqtlmappingpipeline/binarymeta/Main.java index dddf09892..0587461d3 100644 --- a/eqtl-mapping-pipeline/src/main/java/eqtlmappingpipeline/binarymeta/Main.java +++ b/eqtl-mapping-pipeline/src/main/java/eqtlmappingpipeline/binarymeta/Main.java @@ -82,8 +82,8 @@ public static void main(String[] args) { try { MetaAnalyze m2 = new MetaAnalyze(); - m2.init(settings, texttoreplace, replacetextwith); - m2.analyze(); + m2.init(settings, texttoreplace, replacetextwith); + m2.analyze(); // System.gc(); // System.gc(); diff --git a/eqtl-mapping-pipeline/src/main/java/eqtlmappingpipeline/binarymeta/meta/MetaAnalysisResultThread.java b/eqtl-mapping-pipeline/src/main/java/eqtlmappingpipeline/binarymeta/meta/MetaAnalysisResultThread.java index 66dd28a20..5bde34cc7 100644 --- a/eqtl-mapping-pipeline/src/main/java/eqtlmappingpipeline/binarymeta/meta/MetaAnalysisResultThread.java +++ b/eqtl-mapping-pipeline/src/main/java/eqtlmappingpipeline/binarymeta/meta/MetaAnalysisResultThread.java @@ -107,6 +107,8 @@ public void run() { } snpout.close(); + java.util.Arrays.sort(finalEQTLBuffer); + // write eQTL results.. writeresults(); @@ -162,8 +164,8 @@ private void analyze(MetaAnalysisWorkPackage pack) { Integer[] probeList = pack.getListOfTestedProbes(); for (int i = 0; i < probeList.length; i++) { - String probe = probes.get(probeList[i]).intern(); - if (allowedProbes == null || allowedProbes.contains(probe)) { + String probe = probes.get(probeList[i]); + if (probe != null && (allowedProbes == null || allowedProbes.contains(probe))) { totalNumberOfEQTLs++; uniqueProbes.add(probeList[i]); } @@ -252,13 +254,13 @@ protected void mergebuffers(int ctr) { System.arraycopy(toMerge, 0, tmp, 0, toMerge.length); System.arraycopy(finalEQTLBuffer, 0, tmp, toMerge.length, finalEQTLBuffer.length); - java.util.Arrays.sort(tmp); nrInFinalBuffer += toMerge.length; if (nrInFinalBuffer < m_settings.getFinalEQTLBufferMaxLength()) { finalEQTLBuffer = tmp; } else { + java.util.Arrays.sort(tmp); finalEQTLBuffer = new EQTL[m_settings.getFinalEQTLBufferMaxLength()]; // System.out.println(finalEQTLBuffer.length+"\t"+tmp.length); System.arraycopy(tmp, 0, finalEQTLBuffer, 0, m_settings.getFinalEQTLBufferMaxLength()); @@ -275,7 +277,7 @@ private void writeresults() throws IOException { if (perm > 0) { out = new TextFile(m_settings.getOutput() + "PermutedEQTLsPermutationRound" + perm + ".txt.gz", TextFile.W); } else { - out = new TextFile(m_settings.getOutput() + "eQTLs.txt", TextFile.W); + out = new TextFile(m_settings.getOutput() + "eQTLs.txt.gz", TextFile.W); } diff --git a/eqtl-mapping-pipeline/src/main/java/eqtlmappingpipeline/binarymeta/meta/MetaAnalyze.java b/eqtl-mapping-pipeline/src/main/java/eqtlmappingpipeline/binarymeta/meta/MetaAnalyze.java index 1fe9e0e1d..54b3e5719 100644 --- a/eqtl-mapping-pipeline/src/main/java/eqtlmappingpipeline/binarymeta/meta/MetaAnalyze.java +++ b/eqtl-mapping-pipeline/src/main/java/eqtlmappingpipeline/binarymeta/meta/MetaAnalyze.java @@ -32,117 +32,119 @@ // */ public class MetaAnalyze { - protected static MetaSettings m_settings; - protected BinaryResultDataset[] ds; - protected ArrayList probes; - protected ArrayList snps; - protected Integer[][] snpTranslation; - protected int[] pvaluedistribution; - protected EQTL[] eQTLBuffer; - protected EQTL[] finalEQTLBuffer; - protected int nrInFinalBuffer = 0; - protected double pvaluethreshold; - protected ArrayList snpChr; - protected ArrayList snpChrPos; - protected ProbeTranslation probeTranslation; - protected Integer[][] probeTranslationLookupTable; - public static String header = "PValue\t" - + "SNPName\t" - + "SNPChr\t" - + "SNPChrPos\t" - + "ProbeName\t" - + "ProbeChr\t" - + "ProbeCenterChrPos\t" - + "CisTrans\t" - + "SNPType\t" - + "AlleleAssessed\t" - + "OverallZScore\t" - + "DatasetsWhereSNPProbePairIsAvailableAndPassesQC\t" - + "DatasetsZScores\t" - + "DatasetsNrSamples\t" - + "IncludedDatasetsMeanProbeExpression\t" - + "IncludedDatasetsProbeExpressionVariance\t" - + "HGNCName\t" - + "IncludedDatasetsCorrelationCoefficient"; - protected double[] zsumPerSNP; - protected int[] zsumSNPsNumberOfProbes; - protected double[] zsumPerProbe; - protected int[] zsumProbesNumberOfSNPs; - protected ZScorePlot zs; - protected TextFile zscoretable; - protected HashSet uniqueProbes; - protected HashSet uniqueSNPs; - protected int nrTotalSamples; - protected int numSNPs; - protected int numProbes; - private HashSet probeListToAnalyze; - - public void init(String settingsFile, String texttoreplace, String replacetextwith) throws IOException { - m_settings = new MetaSettings(); - m_settings.parse(settingsFile, texttoreplace, replacetextwith); - probeTranslation = new ProbeTranslation(); - probeTranslation.load(m_settings.getProbetranslationfile()); - - } - - public void analyze() throws IOException, DataFormatException, Exception { - System.out.println(""); - System.out.println("Starting analysis!"); - - String[] datasets = new String[m_settings.getDatasetnames().size()]; - for (int i = 0; i < m_settings.getDatasetnames().size(); i++) { - datasets[i] = m_settings.getDatasetnames().get(i); - } - - if (!m_settings.getOutput().endsWith("/")) { - m_settings.setOutput(m_settings.getOutput() + "/MetaAnalysis/"); - } - - if (!Gpio.exists(m_settings.getOutput())) { - Gpio.createDir(m_settings.getOutput()); - } - m_settings.save(); - - String[] locations = new String[m_settings.getDatasetnames().size()]; - for (int i = 0; i < locations.length; i++) { - locations[i] = m_settings.getDatasetlocations().get(i); - } - - int permstart = 0; - int permstop = m_settings.getNrPermutations() + 1; - - if (m_settings.getRunonlypermutation() > -1) { - permstart = m_settings.getRunonlypermutation(); - permstop = m_settings.getRunonlypermutation() + m_settings.getNrPermutations(); - } - - for (int perm = permstart; perm < permstop; perm++) { - ds = new BinaryResultDataset[m_settings.getDatasetlocations().size()]; - runCalculationRound(perm, locations, datasets, -1); - } - - if (m_settings.getRunonlypermutation() == -1) { - - if (m_settings.getNrPermutations() > 0) { - FDR.calculateFDR(m_settings.getOutput(), m_settings.getNrPermutations(), m_settings.getFinalEQTLBufferMaxLength(), m_settings.getFdrthreshold(), true, null, null, FDR.FDRMethod.ALL, true); - EQTLDotPlot edp = new EQTLDotPlot(); - edp.draw(m_settings.getOutput() + "/eQTLsFDR" + m_settings.getFdrthreshold() + ".txt", m_settings.getOutput() + "/DotPlot-FDR" + m_settings.getFdrthreshold() + ".pdf", EQTLDotPlot.Output.PDF); // "/eQTLsFDR" + fdrCutOff + ".txt", outputReportsDir + "/eQTLsFDR" + fdrCutOff + "DotPlot.png" - edp = null; - } - } - - } - - protected void initdatasets(String[] locations, int perm, int dToUse) throws IOException { - - int numProbes = probeTranslation.getNumProbes(); - System.out.println(numProbes + " probes found in translation table. Now matching probes across datasets.."); - probeTranslationLookupTable = new Integer[ds.length][numProbes]; - HashSet probesPresentInDatasets = new HashSet(); + protected static MetaSettings m_settings; + protected BinaryResultDataset[] ds; + protected ArrayList probes; + protected ArrayList snps; + protected Integer[][] snpTranslation; + protected int[] pvaluedistribution; + protected EQTL[] eQTLBuffer; + protected EQTL[] finalEQTLBuffer; + protected int nrInFinalBuffer = 0; + protected double pvaluethreshold; + protected ArrayList snpChr; + protected ArrayList snpChrPos; + protected ProbeTranslation probeTranslation; + protected Integer[][] probeTranslationLookupTable; + public static String header = "PValue\t" + + "SNPName\t" + + "SNPChr\t" + + "SNPChrPos\t" + + "ProbeName\t" + + "ProbeChr\t" + + "ProbeCenterChrPos\t" + + "CisTrans\t" + + "SNPType\t" + + "AlleleAssessed\t" + + "OverallZScore\t" + + "DatasetsWhereSNPProbePairIsAvailableAndPassesQC\t" + + "DatasetsZScores\t" + + "DatasetsNrSamples\t" + + "IncludedDatasetsMeanProbeExpression\t" + + "IncludedDatasetsProbeExpressionVariance\t" + + "HGNCName\t" + + "IncludedDatasetsCorrelationCoefficient"; + protected double[] zsumPerSNP; + protected int[] zsumSNPsNumberOfProbes; + protected double[] zsumPerProbe; + protected int[] zsumProbesNumberOfSNPs; + protected ZScorePlot zs; + protected TextFile zscoretable; + protected HashSet uniqueProbes; + protected HashSet uniqueSNPs; + protected int nrTotalSamples; + protected int numSNPs; + protected int numProbes; + private HashSet probeListToAnalyze; + + public void init(String settingsFile, String texttoreplace, String replacetextwith) throws IOException { + m_settings = new MetaSettings(); + m_settings.parse(settingsFile, texttoreplace, replacetextwith); + probeTranslation = new ProbeTranslation(); + probeTranslation.load(m_settings.getProbetranslationfile()); + + } + + public void analyze() throws IOException, DataFormatException, Exception { + System.out.println(""); + System.out.println("Starting analysis!"); + + String[] datasets = new String[m_settings.getDatasetnames().size()]; + for (int i = 0; i < m_settings.getDatasetnames().size(); i++) { + datasets[i] = m_settings.getDatasetnames().get(i); + } + + if (!m_settings.getOutput().endsWith("/")) { + m_settings.setOutput(m_settings.getOutput() + "/MetaAnalysis/"); + } + + if (!Gpio.exists(m_settings.getOutput())) { + Gpio.createDir(m_settings.getOutput()); + } + m_settings.save(); + + String[] locations = new String[m_settings.getDatasetnames().size()]; + for (int i = 0; i < locations.length; i++) { + locations[i] = m_settings.getDatasetlocations().get(i); + } + + int permstart = 0; + int permstop = m_settings.getNrPermutations() + 1; + + if (m_settings.getRunonlypermutation() > -1) { + permstart = m_settings.getRunonlypermutation(); + permstop = m_settings.getRunonlypermutation() + m_settings.getNrPermutations(); + } + + System.out.println(permstart + " - " + permstop); + + for (int perm = permstart; perm < permstop; perm++) { + ds = new BinaryResultDataset[m_settings.getDatasetlocations().size()]; + runCalculationRound(perm, locations, datasets, -1); + } + + if (m_settings.getRunonlypermutation() == -1) { + + if (m_settings.getNrPermutations() > 0) { + FDR.calculateFDR(m_settings.getOutput(), m_settings.getNrPermutations(), m_settings.getFinalEQTLBufferMaxLength(), m_settings.getFdrthreshold(), true, null, null, FDR.FDRMethod.ALL, true); + EQTLDotPlot edp = new EQTLDotPlot(); + edp.draw(m_settings.getOutput() + "/eQTLsFDR" + m_settings.getFdrthreshold() + ".txt", m_settings.getOutput() + "/DotPlot-FDR" + m_settings.getFdrthreshold() + ".pdf", EQTLDotPlot.Output.PDF); // "/eQTLsFDR" + fdrCutOff + ".txt", outputReportsDir + "/eQTLsFDR" + fdrCutOff + "DotPlot.png" + edp = null; + } + } + + } + + protected void initdatasets(String[] locations, int perm, int dToUse) throws IOException { + + int numProbes = probeTranslation.getNumProbes(); + System.out.println(numProbes + " probes found in translation table. Now matching probes across datasets.."); + probeTranslationLookupTable = new Integer[ds.length][numProbes]; + HashSet probesPresentInDatasets = new HashSet(); // m_settings.getSNPSelection(); - HashSet selectedSNPs = null; + HashSet selectedSNPs = null; // if (m_settings.getSNPSelection() != null) { // System.out.println("Selecting SNPs from: " + m_settings.getSNPSelection()); @@ -153,93 +155,93 @@ protected void initdatasets(String[] locations, int perm, int dToUse) throws IOE // System.out.println("Selected " + selectedSNPs.size() + " unique SNPs from file."); // } - HashMap> selectedSNPProbePairs = null; - if (m_settings.getSNPProbeSelection() != null) { - System.out.println("Selecting SNP-probe pairs from: " + m_settings.getSNPProbeSelection()); - selectedSNPProbePairs = new HashMap>(); - selectedSNPs = new HashSet(); - TextFile stf = new TextFile(m_settings.getSNPProbeSelection(), TextFile.R); - int ctr = 0; - String[] felems = stf.readLineElems(TextFile.tab); - while (felems != null) { - String snp = felems[0].intern(); - String probe = felems[1].intern(); - HashSet probesForSNP = selectedSNPProbePairs.get(snp); - if (probesForSNP == null) { - probesForSNP = new HashSet(); - } - probesForSNP.add(probe.intern()); - selectedSNPs.add(snp.intern()); - selectedSNPProbePairs.put(snp.intern(), probesForSNP); - ctr++; - felems = stf.readLineElems(TextFile.tab); - } - - stf.close(); - System.out.println("Selected " + ctr + " unique SNPs from file."); - } - - HashSet probesToInclude = null; - - if (m_settings.getProbeselection() != null) { - TextFile tf = new TextFile(m_settings.getProbeselection(), TextFile.R); - - ArrayList probesSelected = tf.readAsArrayList(); - - probesToInclude = new HashSet(); - probesToInclude.addAll(probesSelected); - System.out.println(probesSelected.size() +" probes selected from file: "+m_settings.getProbeselection()); - tf.close(); - } - - for (int d = 0; d < ds.length; d++) { - - int probeAnnotationToUse = d; - if (dToUse != -1) { - probeAnnotationToUse = dToUse; - } - - ds[d] = new BinaryResultDataset(locations[d], m_settings.getDatasetPrefix().get(probeAnnotationToUse), perm); - BinaryResultProbe[] dsProbes = ds[d].getProbes(); - BinaryResultSNP[] dsSNPs = ds[d].getSnps(); - nrTotalSamples += ds[d].getMaxNrSamples(); - - for (BinaryResultProbe p : dsProbes) { - Integer newProbeId = probeTranslation.getProbeId(m_settings.getDatasetannotations().get(probeAnnotationToUse) + p.getName()); - if (newProbeId == null) { - System.out.println(m_settings.getDatasetannotations().get(probeAnnotationToUse) + "\t" + p.getName() + " probe not present in annotationfile...?"); - System.exit(0); - } - if (probesToInclude == null || probesToInclude.contains("" + newProbeId)) { - probesPresentInDatasets.add(newProbeId); - probeTranslationLookupTable[d][newProbeId] = p.getId(); - } else { - probeTranslationLookupTable[d][newProbeId] = null; - } - } - - for (BinaryResultSNP s : dsSNPs) { - if (!uniqueSNPs.contains(s.getName().intern()) && (selectedSNPs == null || selectedSNPs.contains(s.getName().intern()))) { - snps.add(s.getName().intern()); - snpChr.add(s.getChr()); - snpChrPos.add(s.getChrpos()); - uniqueSNPs.add(s.getName().intern()); - } - } + HashMap> selectedSNPProbePairs = null; + if (m_settings.getSNPProbeSelection() != null) { + System.out.println("Selecting SNP-probe pairs from: " + m_settings.getSNPProbeSelection()); + selectedSNPProbePairs = new HashMap>(); + selectedSNPs = new HashSet(); + TextFile stf = new TextFile(m_settings.getSNPProbeSelection(), TextFile.R); + int ctr = 0; + String[] felems = stf.readLineElems(TextFile.tab); + while (felems != null) { + String snp = felems[0].intern(); + String probe = felems[1].intern(); + HashSet probesForSNP = selectedSNPProbePairs.get(snp); + if (probesForSNP == null) { + probesForSNP = new HashSet(); + } + probesForSNP.add(probe.intern()); + selectedSNPs.add(snp.intern()); + selectedSNPProbePairs.put(snp.intern(), probesForSNP); + ctr++; + felems = stf.readLineElems(TextFile.tab); + } + + stf.close(); + System.out.println("Selected " + ctr + " unique SNPs from file."); + } + + HashSet probesToInclude = null; + + if (m_settings.getProbeselection() != null) { + TextFile tf = new TextFile(m_settings.getProbeselection(), TextFile.R); + + ArrayList probesSelected = tf.readAsArrayList(); + + probesToInclude = new HashSet(); + probesToInclude.addAll(probesSelected); + System.out.println(probesSelected.size() + " probes selected from file: " + m_settings.getProbeselection()); + tf.close(); + } + + for (int d = 0; d < ds.length; d++) { + + int probeAnnotationToUse = d; + if (dToUse != -1) { + probeAnnotationToUse = dToUse; + } + + ds[d] = new BinaryResultDataset(locations[d], m_settings.getDatasetPrefix().get(probeAnnotationToUse), perm); + BinaryResultProbe[] dsProbes = ds[d].getProbes(); + BinaryResultSNP[] dsSNPs = ds[d].getSnps(); + nrTotalSamples += ds[d].getMaxNrSamples(); + + for (BinaryResultProbe p : dsProbes) { + Integer newProbeId = probeTranslation.getProbeId(m_settings.getDatasetannotations().get(probeAnnotationToUse) + p.getName()); + if (newProbeId == null) { + System.out.println(m_settings.getDatasetannotations().get(probeAnnotationToUse) + "\t" + p.getName() + " probe not present in annotationfile...?"); + System.exit(0); + } + if (probesToInclude == null || probesToInclude.contains("" + newProbeId)) { + probesPresentInDatasets.add(newProbeId); + probeTranslationLookupTable[d][newProbeId] = p.getId(); + } else { + probeTranslationLookupTable[d][newProbeId] = null; + } + } + + for (BinaryResultSNP s : dsSNPs) { + if (!uniqueSNPs.contains(s.getName().intern()) && (selectedSNPs == null || selectedSNPs.contains(s.getName().intern()))) { + snps.add(s.getName().intern()); + snpChr.add(s.getChr()); + snpChrPos.add(s.getChrpos()); + uniqueSNPs.add(s.getName().intern()); + } + } // ds[d].clearProbeObjects(); - } + } - TextFile probesPresentFile = new TextFile(m_settings.getOutput() + "ProbesPresentInAtLeastOneDataset.txt", TextFile.W); + TextFile probesPresentFile = new TextFile(m_settings.getOutput() + "ProbesPresentInAtLeastOneDataset.txt", TextFile.W); - System.out.println(probesPresentInDatasets.size() + "\tunique probes present in all datasets."); - Integer[] presentNrs = probesPresentInDatasets.toArray(new Integer[0]); - for (Integer i : presentNrs) { - probesPresentFile.writeln("" + i); - } - probesPresentFile.close(); + System.out.println(probesPresentInDatasets.size() + "\tunique probes present in all datasets."); + Integer[] presentNrs = probesPresentInDatasets.toArray(new Integer[0]); + for (Integer i : presentNrs) { + probesPresentFile.writeln("" + i); + } + probesPresentFile.close(); - int selectedprobes = 0; + int selectedprobes = 0; // if (m_settings.getProbeselection() != null) { @@ -280,258 +282,255 @@ protected void initdatasets(String[] locations, int perm, int dToUse) throws IOE // System.out.println(probePresenceCounter + "\tprobes selected."); // // } else { - for (int q = 0; q < probeTranslationLookupTable[0].length; q++) { - int probePresenceCounter = 0; - if (probeListToAnalyze != null) { - if (!probeListToAnalyze.contains("" + q)) { - for (int d = 0; d < ds.length; d++) { - probeTranslationLookupTable[d][q] = null; - } - } - } - - for (int i = 0; i < ds.length; i++) { - if (probeTranslationLookupTable[i][q] != null && ds[i].getMaxNrSamples() > m_settings.getProbeAndSNPPresenceFilterSampleThreshold()) { - probePresenceCounter++; - } - } - - - - if (m_settings.getProbeDatasetPresenceThreshold() > 0 && probePresenceCounter < m_settings.getProbeDatasetPresenceThreshold()) { - for (int d = 0; d < ds.length; d++) { - probeTranslationLookupTable[d][q] = null; - } - } else if (probePresenceCounter > 0) { - selectedprobes++; - } - } - System.out.println("Selected " + selectedprobes + " probes from at least " + m_settings.getProbeDatasetPresenceThreshold() + " datasets of at least " + m_settings.getProbeAndSNPPresenceFilterSampleThreshold() + " samples."); + for (int q = 0; q < probeTranslationLookupTable[0].length; q++) { + int probePresenceCounter = 0; + if (probeListToAnalyze != null) { + if (!probeListToAnalyze.contains("" + q)) { + for (int d = 0; d < ds.length; d++) { + probeTranslationLookupTable[d][q] = null; + } + } + } + + for (int i = 0; i < ds.length; i++) { + if (probeTranslationLookupTable[i][q] != null && ds[i].getMaxNrSamples() > m_settings.getProbeAndSNPPresenceFilterSampleThreshold()) { + probePresenceCounter++; + } + } + + + if (m_settings.getProbeDatasetPresenceThreshold() > 0 && probePresenceCounter < m_settings.getProbeDatasetPresenceThreshold()) { + for (int d = 0; d < ds.length; d++) { + probeTranslationLookupTable[d][q] = null; + } + } else if (probePresenceCounter > 0) { + selectedprobes++; + } + } + System.out.println("Selected " + selectedprobes + " probes from at least " + m_settings.getProbeDatasetPresenceThreshold() + " datasets of at least " + m_settings.getProbeAndSNPPresenceFilterSampleThreshold() + " samples."); // } // numProbes = uniqueProbes.size(); - numSNPs = uniqueSNPs.size(); + numSNPs = uniqueSNPs.size(); - initSNPTranslation(); - } + initSNPTranslation(); + } - protected void initSNPTranslation() throws IOException { - snpTranslation = new Integer[ds.length][numSNPs]; + protected void initSNPTranslation() throws IOException { + snpTranslation = new Integer[ds.length][numSNPs]; - for (int d = 0; d < ds.length; d++) { - BinaryResultProbe[] dsProbes = ds[d].getProbes(); - BinaryResultSNP[] dsSNPs = ds[d].getSnps(); + for (int d = 0; d < ds.length; d++) { + BinaryResultProbe[] dsProbes = ds[d].getProbes(); + BinaryResultSNP[] dsSNPs = ds[d].getSnps(); - for (int i = 0; i < snps.size(); i++) { - BinaryResultSNP s = ds[d].getStringToSNP().get(snps.get(i)); - if (s != null) { - snpTranslation[d][i] = s.getId(); - } else { - snpTranslation[d][i] = null; - } - } - } + for (int i = 0; i < snps.size(); i++) { + BinaryResultSNP s = ds[d].getStringToSNP().get(snps.get(i)); + if (s != null) { + snpTranslation[d][i] = s.getId(); + } else { + snpTranslation[d][i] = null; + } + } + } - int selectedsnps = 0; + int selectedsnps = 0; - HashSet selectedSNPs = null; - if (m_settings.getSNPSelection() != null) { - System.out.println("Selecting SNPs from: " + m_settings.getSNPSelection()); - selectedSNPs = new HashSet(); - TextFile stf = new TextFile(m_settings.getSNPSelection(), TextFile.R); - selectedSNPs.addAll(stf.readAsArrayList()); - stf.close(); - System.out.println("Selected " + selectedSNPs.size() + " unique SNPs from file."); - } + HashSet selectedSNPs = null; + if (m_settings.getSNPSelection() != null) { + System.out.println("Selecting SNPs from: " + m_settings.getSNPSelection()); + selectedSNPs = new HashSet(); + TextFile stf = new TextFile(m_settings.getSNPSelection(), TextFile.R); + selectedSNPs.addAll(stf.readAsArrayList()); + stf.close(); + System.out.println("Selected " + selectedSNPs.size() + " unique SNPs from file."); + } - TextFile selectedSNPFile = new TextFile(m_settings.getOutput() + "/SelectedSNPs.txt", TextFile.W); - for (int s = 0; s < numSNPs; s++) { + TextFile selectedSNPFile = new TextFile(m_settings.getOutput() + "/SelectedSNPs.txt", TextFile.W); + for (int s = 0; s < numSNPs; s++) { - String snpName = snps.get(s); + String snpName = snps.get(s); - int snppresencecounter = 0; - for (int d = 0; d < ds.length; d++) { - if (snpTranslation[d][s] != null && ds[d].getMaxNrSamples() >= m_settings.getProbeAndSNPPresenceFilterSampleThreshold()) { - snppresencecounter++; - } - } + int snppresencecounter = 0; + for (int d = 0; d < ds.length; d++) { + if (snpTranslation[d][s] != null && ds[d].getMaxNrSamples() >= m_settings.getProbeAndSNPPresenceFilterSampleThreshold()) { + snppresencecounter++; + } + } - if (m_settings.getSnpDatasetPresenceThreshold() > 0 && snppresencecounter < m_settings.getSnpDatasetPresenceThreshold() || (selectedSNPs != null && !selectedSNPs.contains(snpName))) { - for (int d = 0; d < ds.length; d++) { - snpTranslation[d][s] = null; - } - } else if (snppresencecounter > 0) { - selectedSNPFile.writeln(snps.get(s)); - selectedsnps++; - } + if (m_settings.getSnpDatasetPresenceThreshold() > 0 && snppresencecounter < m_settings.getSnpDatasetPresenceThreshold() || (selectedSNPs != null && !selectedSNPs.contains(snpName))) { + for (int d = 0; d < ds.length; d++) { + snpTranslation[d][s] = null; + } + } else if (snppresencecounter > 0) { + selectedSNPFile.writeln(snps.get(s)); + selectedsnps++; + } - } + } - selectedSNPFile.close(); + selectedSNPFile.close(); - System.out.println("Selected " + selectedsnps + " snps from at least " + m_settings.getSnpDatasetPresenceThreshold() + " datasets of at least " + m_settings.getProbeAndSNPPresenceFilterSampleThreshold() + " samples."); - } + System.out.println("Selected " + selectedsnps + " snps from at least " + m_settings.getSnpDatasetPresenceThreshold() + " datasets of at least " + m_settings.getProbeAndSNPPresenceFilterSampleThreshold() + " samples."); + } - protected void runCalculationRound(int perm, String[] locations, String[] datasets, int dToUse) throws IOException, Exception { - pvaluedistribution = null; - eQTLBuffer = null; - finalEQTLBuffer = null; - nrInFinalBuffer = 0; + protected void runCalculationRound(int perm, String[] locations, String[] datasets, int dToUse) throws IOException, Exception { + pvaluedistribution = null; + eQTLBuffer = null; + finalEQTLBuffer = null; + nrInFinalBuffer = 0; - uniqueProbes = new HashSet(); - uniqueSNPs = new HashSet(); + uniqueProbes = new HashSet(); + uniqueSNPs = new HashSet(); - int numDatasets = ds.length; - probes = new ArrayList(); + int numDatasets = ds.length; + probes = new ArrayList(); - snps = new ArrayList(); - snpChr = new ArrayList(); - snpChrPos = new ArrayList(); + snps = new ArrayList(); + snpChr = new ArrayList(); + snpChrPos = new ArrayList(); - nrTotalSamples = 0; + nrTotalSamples = 0; - String[] probeName = probeTranslation.getProbes(); - probes.addAll(Arrays.asList(probeName)); + String[] probeName = probeTranslation.getProbes(); + probes.addAll(Arrays.asList(probeName)); - initdatasets(locations, perm, dToUse); + initdatasets(locations, perm, dToUse); - String zsName = null; - if (m_settings.isMakezscoreplot()) { - zs = new ZScorePlot(); - String[] datasets2 = new String[datasets.length + 1]; - System.arraycopy(datasets, 0, datasets2, 0, datasets.length); - datasets2[datasets2.length - 1] = "Meta-Analysis"; + String zsName = null; + if (m_settings.isMakezscoreplot()) { + zs = new ZScorePlot(); + String[] datasets2 = new String[datasets.length + 1]; + System.arraycopy(datasets, 0, datasets2, 0, datasets.length); + datasets2[datasets2.length - 1] = "Meta-Analysis"; - if (perm > 0) { - zsName = m_settings.getOutput() + "ZScoreComparison-PermutationRound" + perm; - } else { - zsName = m_settings.getOutput() + "ZScoreComparison"; - } - zs.init(numDatasets + 1, datasets2, true, zsName); - } + if (perm > 0) { + zsName = m_settings.getOutput() + "ZScoreComparison-PermutationRound" + perm; + } else { + zsName = m_settings.getOutput() + "ZScoreComparison"; + } + zs.init(numDatasets + 1, datasets2, true, zsName); + } - Descriptives.lookupSqrt(nrTotalSamples); - pvaluedistribution = new int[m_settings.getNrOfBins()]; + Descriptives.lookupSqrt(nrTotalSamples); + pvaluedistribution = new int[m_settings.getNrOfBins()]; - eQTLBuffer = new EQTL[10000]; - finalEQTLBuffer = new EQTL[0]; + eQTLBuffer = new EQTL[10000]; + finalEQTLBuffer = new EQTL[0]; - pvaluethreshold = Double.MAX_VALUE; + pvaluethreshold = Double.MAX_VALUE; - zsumPerSNP = new double[snps.size()]; - zsumSNPsNumberOfProbes = new int[snps.size()]; - zsumPerProbe = new double[probes.size()]; - zsumProbesNumberOfSNPs = new int[probes.size()]; + zsumPerSNP = new double[snps.size()]; + zsumSNPsNumberOfProbes = new int[snps.size()]; + zsumPerProbe = new double[probes.size()]; + zsumProbesNumberOfSNPs = new int[probes.size()]; - System.out.println("Performing the meta-analysis now: "); + System.out.println("Performing the meta-analysis now: "); // System.out.println(snps.size() + "\t unique SNPs present in at least " + m_settings.snpDatasetPresenceThreshold + " datasets"); // System.out.println(probes.size() + "\t unique Probespresent in at least " + m_settings.probeDatasetPresenceThreshold + " datasets"); - System.out.println(nrTotalSamples + "\t total samples"); - - if (m_settings.isMakezscoretable()) { - if (perm == 0) { - zscoretable = new TextFile(m_settings.getOutput() + "metazscoretable.txt.gz", TextFile.W, (10 * 1048576)); - } else { - zscoretable = new TextFile(m_settings.getOutput() + "metazscoretable-Permutation" + perm + ".txt.gz", TextFile.W, (10 * 1048576)); - } - StringBuilder zscoreout = new StringBuilder(); - zscoreout.append("SNP\tAlleleCoding\tAssessedAllele"); - for (int i = 0; i < probes.size(); i++) { - zscoreout.append("\t").append(probes.get(i)); - } - zscoretable.writeln(zscoreout.toString()); - - } - - HashMap> selectedSNPProbePairs = null; - if (m_settings.getSNPProbeSelection() != null) { - System.out.println("Selecting SNP-probe pairs from: " + m_settings.getSNPProbeSelection()); - selectedSNPProbePairs = new HashMap>(); - - TextFile stf = new TextFile(m_settings.getSNPProbeSelection(), TextFile.R); - int ctr = 0; - String[] felems = stf.readLineElems(TextFile.tab); - while (felems != null) { - String snp = felems[0]; - String probe = felems[1]; - HashSet probesForSNP = selectedSNPProbePairs.get(snp); - if (probesForSNP == null) { - probesForSNP = new HashSet(); - } - probesForSNP.add(probe); - selectedSNPProbePairs.put(snp, probesForSNP); - ctr++; - felems = stf.readLineElems(TextFile.tab); - } - - stf.close(); - System.out.println("Selected " + ctr + " unique SNPs from file."); - } - - /// init calculation pool, - - int nrProcs = Runtime.getRuntime().availableProcessors(); - if (m_settings.getNrThresds() > 0) { - if (m_settings.getNrThresds() > nrProcs) { - m_settings.setNrThresds(nrProcs); - } - nrProcs = m_settings.getNrThresds(); - } - System.out.println("Using " + nrProcs + " threads :)"); - MetaAnalysisCalculationThread[] calcPool = new MetaAnalysisCalculationThread[nrProcs]; - LinkedBlockingQueue loaderQueue = new LinkedBlockingQueue(nrProcs); - MetaAnalysisLoaderThread loaderThread = new MetaAnalysisLoaderThread(loaderQueue, snpTranslation, snps, ds); - loaderThread.setName("Loader"); - loaderThread.start(); - - PValueThreshold p = new PValueThreshold(); - LinkedBlockingQueue resultQueue = new LinkedBlockingQueue(nrProcs); - MetaAnalysisResultThread resultThread = new MetaAnalysisResultThread(resultQueue, m_settings, datasets, perm, zscoretable, p, snps, selectedSNPProbePairs, probes); - resultThread.setName("Result"); - resultThread.start(); - - for (int i = 0; i < nrProcs; i++) { - calcPool[i] = new MetaAnalysisCalculationThread(loaderQueue, resultQueue, snps, probes, snpChr, snpChrPos, ds, snpTranslation, probeTranslationLookupTable, probeTranslation, m_settings, zs, p); - calcPool[i].setName("MetaCalc-" + i); - calcPool[i].start(); - } - - // kill the threads - try { - loaderThread.join(); - MetaAnalysisWorkPackage poison = new MetaAnalysisWorkPackage(0, 0); - poison.poisonTheWell(); - - for (int threadNum = 0; threadNum < calcPool.length; threadNum++) { - try { - loaderQueue.put(poison); - } catch (InterruptedException ex) { - ex.printStackTrace(); - } - } - for (int threadNum = 0; threadNum < calcPool.length; threadNum++) { - calcPool[threadNum].join(); - } - - resultQueue.put(poison); - resultThread.join(); - - } catch (InterruptedException e) { - System.err.println("Exception: Thread main interrupted."); - } - - if (m_settings.isMakezscoretable()) { + System.out.println(nrTotalSamples + "\t total samples"); + + if (m_settings.isMakezscoretable()) { + if (perm == 0) { + zscoretable = new TextFile(m_settings.getOutput() + "metazscoretable.txt.gz", TextFile.W, (10 * 1048576)); + } else { + zscoretable = new TextFile(m_settings.getOutput() + "metazscoretable-Permutation" + perm + ".txt.gz", TextFile.W, (10 * 1048576)); + } + StringBuilder zscoreout = new StringBuilder(); + zscoreout.append("SNP\tAlleleCoding\tAssessedAllele"); + for (int i = 0; i < probes.size(); i++) { + zscoreout.append("\t").append(probes.get(i)); + } + zscoretable.writeln(zscoreout.toString()); + + } + + HashMap> selectedSNPProbePairs = null; + if (m_settings.getSNPProbeSelection() != null) { + System.out.println("Selecting SNP-probe pairs from: " + m_settings.getSNPProbeSelection()); + selectedSNPProbePairs = new HashMap>(); + + TextFile stf = new TextFile(m_settings.getSNPProbeSelection(), TextFile.R); + int ctr = 0; + String[] felems = stf.readLineElems(TextFile.tab); + while (felems != null) { + String snp = felems[0]; + String probe = felems[1]; + HashSet probesForSNP = selectedSNPProbePairs.get(snp); + if (probesForSNP == null) { + probesForSNP = new HashSet(); + } + probesForSNP.add(probe); + selectedSNPProbePairs.put(snp, probesForSNP); + ctr++; + felems = stf.readLineElems(TextFile.tab); + } + + stf.close(); + System.out.println("Selected " + ctr + " unique SNPs from file."); + } + + /// init calculation pool, + + int nrProcs = Runtime.getRuntime().availableProcessors(); + if (m_settings.getNrThresds() > 0) { + if (m_settings.getNrThresds() > nrProcs) { + m_settings.setNrThresds(nrProcs); + } + nrProcs = m_settings.getNrThresds(); + } + System.out.println("Using " + nrProcs + " threads :)"); + MetaAnalysisCalculationThread[] calcPool = new MetaAnalysisCalculationThread[nrProcs]; + LinkedBlockingQueue loaderQueue = new LinkedBlockingQueue(nrProcs); + MetaAnalysisLoaderThread loaderThread = new MetaAnalysisLoaderThread(loaderQueue, snpTranslation, snps, ds); + loaderThread.setName("Loader"); + loaderThread.start(); + + PValueThreshold p = new PValueThreshold(); + LinkedBlockingQueue resultQueue = new LinkedBlockingQueue(nrProcs); + MetaAnalysisResultThread resultThread = new MetaAnalysisResultThread(resultQueue, m_settings, datasets, perm, zscoretable, p, snps, selectedSNPProbePairs, probes); + resultThread.setName("Result"); + resultThread.start(); + + for (int i = 0; i < nrProcs; i++) { + calcPool[i] = new MetaAnalysisCalculationThread(loaderQueue, resultQueue, snps, probes, snpChr, snpChrPos, ds, snpTranslation, probeTranslationLookupTable, probeTranslation, m_settings, zs, p); + calcPool[i].setName("MetaCalc-" + i); + calcPool[i].start(); + } + + // kill the threads + try { + loaderThread.join(); + MetaAnalysisWorkPackage poison = new MetaAnalysisWorkPackage(0, 0); + poison.poisonTheWell(); + + for (int threadNum = 0; threadNum < calcPool.length; threadNum++) { + try { + loaderQueue.put(poison); + } catch (InterruptedException ex) { + ex.printStackTrace(); + } + } + for (int threadNum = 0; threadNum < calcPool.length; threadNum++) { + calcPool[threadNum].join(); + } + + resultQueue.put(poison); + resultThread.join(); + + } catch (InterruptedException e) { + System.err.println("Exception: Thread main interrupted."); + } + + if (m_settings.isMakezscoretable()) { // if (perm == 0) { - zscoretable.close(); + zscoretable.close(); // } - } - if (zs != null) { - zs.write(zsName); - } + } + if (zs != null) { + zs.write(zsName); + } - - - } + } } \ No newline at end of file diff --git a/eqtl-mapping-pipeline/src/main/java/eqtlmappingpipeline/binarymeta/meta/MetaSettings.java b/eqtl-mapping-pipeline/src/main/java/eqtlmappingpipeline/binarymeta/meta/MetaSettings.java index b59e72955..1e553f7aa 100644 --- a/eqtl-mapping-pipeline/src/main/java/eqtlmappingpipeline/binarymeta/meta/MetaSettings.java +++ b/eqtl-mapping-pipeline/src/main/java/eqtlmappingpipeline/binarymeta/meta/MetaSettings.java @@ -4,497 +4,508 @@ */ package eqtlmappingpipeline.binarymeta.meta; +import org.apache.commons.configuration.ConfigurationException; +import org.apache.commons.configuration.XMLConfiguration; + import java.util.ArrayList; import java.util.logging.Level; import java.util.logging.Logger; -import org.apache.commons.configuration.ConfigurationException; -import org.apache.commons.configuration.XMLConfiguration; /** - * * @author harm-jan */ public class MetaSettings { - private int nrPermutations = 10; - private boolean useAbsoluteZscore = false; - private int finalEQTLBufferMaxLength = 1000000; - private int nrOfBins = 100; - private double fdrthreshold = 0.05; - private boolean includeSNPsWithoutProperMapping = true; - private boolean includeProbesWithoutProperMapping = true; - private boolean cis = true; - private boolean trans = true; - private int cisdistance = 250000; - private int transdistance = 5000000; - private boolean makezscoreplot = true; - private String probetranslationfile; - private ArrayList datasetnames; - private ArrayList datasetPrefix; - private ArrayList datasetlocations; - private ArrayList datasetannotations; - private ArrayList selectedProbes; - private String output; - private boolean makezscoretable = false; - private int probeDatasetPresenceThreshold = 0; - private int snpDatasetPresenceThreshold = 0; - private int probeAndSNPPresenceFilterSampleThreshold = 0; - private int runonlypermutation; - private int nrThresds; - private String probeselection; - private String snpselection; - private XMLConfiguration config; - private String snpprobeselection; - - public void parse(String settings, String texttoreplace, String replacetextwith) { - try { - config = new XMLConfiguration(settings); - - nrPermutations = config.getInt("defaults.permutations", 0); - - useAbsoluteZscore = config.getBoolean("defaults.absolutezscore", false); - finalEQTLBufferMaxLength = config.getInt("defaults.finalnreqtls", 100000); - fdrthreshold = config.getDouble("defaults.fdrthreshold", 0.05); - cisdistance = config.getInt("defaults.cisprobedistance", 250000); - transdistance = config.getInt("defaults.transprobedistance", 5000000); - includeProbesWithoutProperMapping = config.getBoolean("defaults.includeprobeswithoutmapping", true); - includeSNPsWithoutProperMapping = config.getBoolean("defaults.includesnpswithoutmapping", true); - makezscoreplot = config.getBoolean("defaults.makezscoreplot", true); - makezscoretable = config.getBoolean("defaults.makezscoretable", false); - probetranslationfile = config.getString("defaults.probetranslationfile"); - output = config.getString("defaults.output"); - - - - - probeDatasetPresenceThreshold = config.getInt("defaults.minimalnumberofdatasetsthatcontainprobe", 0); - snpDatasetPresenceThreshold = config.getInt("defaults.minimalnumberofdatasetsthatcontainsnp", 0); - probeAndSNPPresenceFilterSampleThreshold = config.getInt("defaults.snpprobeselectsamplesizethreshold", -1); - - runonlypermutation = config.getInt("defaults.runonlypermutation", -1); - nrThresds = config.getInt("defaults.threads", 0); - cis = config.getBoolean("defaults.cis", false); - trans = config.getBoolean("defaults.trans", false); - - probeselection = config.getString("defaults.probeselection"); - - if (probeselection != null && probeselection.trim().length() == 0) { - probeselection = null; - } - snpselection = config.getString("defaults.snpselection"); - - if (snpselection != null && snpselection.trim().length() == 0) { - snpselection = null; - } - - snpprobeselection = config.getString("defaults.snpprobeselection"); - - if (snpprobeselection != null && snpprobeselection.trim().length() == 0) { - snpprobeselection = null; - } else { - System.out.println("SNP PROBE SELECTION: "+snpprobeselection); - } - - - - int i = 0; - - String dataset = ""; - datasetnames = new ArrayList(); - datasetlocations = new ArrayList(); - datasetannotations = new ArrayList(); - datasetPrefix = new ArrayList(); - - while (dataset != null) { - dataset = config.getString("datasets.dataset(" + i + ").name"); // see if a dataset is defined - if (dataset != null) { - - datasetnames.add(dataset); - String prefix = config.getString("datasets.dataset(" + i + ").prefix"); // see if a dataset is defined - - if (prefix == null) { - prefix = "Dataset"; - } - datasetPrefix.add(prefix); - String datasetlocation = config.getString("datasets.dataset(" + i + ").location"); // see if a dataset is defined - if (texttoreplace != null && replacetextwith != null && datasetlocation.contains(texttoreplace)) { - datasetlocation = datasetlocation.replace(texttoreplace, replacetextwith); - } - String datasetannotation = config.getString("datasets.dataset(" + i + ").expressionplatform"); // see if a dataset is defined - - datasetlocations.add(datasetlocation); - datasetannotations.add(datasetannotation); - } - i++; - } - - - // parse datasets - } catch (ConfigurationException e) { - e.printStackTrace(); - } - } - - /** - * @return the nrPermutations - */ - public int getNrPermutations() { - return nrPermutations; - } - - /** - * @param nrPermutations the nrPermutations to set - */ - public void setNrPermutations(int nrPermutations) { - this.nrPermutations = nrPermutations; - } - - /** - * @return the useAbsoluteZscore - */ - public boolean isUseAbsoluteZscore() { - return useAbsoluteZscore; - } - - /** - * @param useAbsoluteZscore the useAbsoluteZscore to set - */ - public void setUseAbsoluteZscore(boolean useAbsoluteZscore) { - this.useAbsoluteZscore = useAbsoluteZscore; - } - - /** - * @return the finalEQTLBufferMaxLength - */ - public int getFinalEQTLBufferMaxLength() { - return finalEQTLBufferMaxLength; - } - - /** - * @param finalEQTLBufferMaxLength the finalEQTLBufferMaxLength to set - */ - public void setFinalEQTLBufferMaxLength(int finalEQTLBufferMaxLength) { - this.finalEQTLBufferMaxLength = finalEQTLBufferMaxLength; - } - - /** - * @return the nrOfBins - */ - public int getNrOfBins() { - return nrOfBins; - } - - /** - * @param nrOfBins the nrOfBins to set - */ - public void setNrOfBins(int nrOfBins) { - this.nrOfBins = nrOfBins; - } - - /** - * @return the fdrthreshold - */ - public double getFdrthreshold() { - return fdrthreshold; - } - - /** - * @param fdrthreshold the fdrthreshold to set - */ - public void setFdrthreshold(double fdrthreshold) { - this.fdrthreshold = fdrthreshold; - } - - /** - * @return the includeSNPsWithoutProperMapping - */ - public boolean isIncludeSNPsWithoutProperMapping() { - return includeSNPsWithoutProperMapping; - } - - /** - * @param includeSNPsWithoutProperMapping the - * includeSNPsWithoutProperMapping to set - */ - public void setIncludeSNPsWithoutProperMapping(boolean includeSNPsWithoutProperMapping) { - this.includeSNPsWithoutProperMapping = includeSNPsWithoutProperMapping; - } - - /** - * @return the includeProbesWithoutProperMapping - */ - public boolean isIncludeProbesWithoutProperMapping() { - return includeProbesWithoutProperMapping; - } - - /** - * @param includeProbesWithoutProperMapping the - * includeProbesWithoutProperMapping to set - */ - public void setIncludeProbesWithoutProperMapping(boolean includeProbesWithoutProperMapping) { - this.includeProbesWithoutProperMapping = includeProbesWithoutProperMapping; - } - - /** - * @return the cis - */ - public boolean isCis() { - return cis; - } - - /** - * @param cis the cis to set - */ - public void setCis(boolean cis) { - this.cis = cis; - } - - /** - * @return the trans - */ - public boolean isTrans() { - return trans; - } - - /** - * @param trans the trans to set - */ - public void setTrans(boolean trans) { - this.trans = trans; - } - - /** - * @return the cisdistance - */ - public int getCisdistance() { - return cisdistance; - } - - /** - * @param cisdistance the cisdistance to set - */ - public void setCisdistance(int cisdistance) { - this.cisdistance = cisdistance; - } - - /** - * @return the transdistance - */ - public int getTransdistance() { - return transdistance; - } - - /** - * @param transdistance the transdistance to set - */ - public void setTransdistance(int transdistance) { - this.transdistance = transdistance; - } - - /** - * @return the makezscoreplot - */ - public boolean isMakezscoreplot() { - return makezscoreplot; - } - - /** - * @param makezscoreplot the makezscoreplot to set - */ - public void setMakezscoreplot(boolean makezscoreplot) { - this.makezscoreplot = makezscoreplot; - } - - /** - * @return the probetranslationfile - */ - public String getProbetranslationfile() { - return probetranslationfile; - } - - /** - * @param probetranslationfile the probetranslationfile to set - */ - public void setProbetranslationfile(String probetranslationfile) { - this.probetranslationfile = probetranslationfile; - } - - /** - * @return the datasetnames - */ - public ArrayList getDatasetnames() { - return datasetnames; - } - - /** - * @param datasetnames the datasetnames to set - */ - public void setDatasetnames(ArrayList datasetnames) { - this.datasetnames = datasetnames; - } - - /** - * @return the datasetlocations - */ - public ArrayList getDatasetlocations() { - return datasetlocations; - } - - /** - * @param datasetlocations the datasetlocations to set - */ - public void setDatasetlocations(ArrayList datasetlocations) { - this.datasetlocations = datasetlocations; - } - - /** - * @return the datasetannotations - */ - public ArrayList getDatasetannotations() { - return datasetannotations; - } - - /** - * @param datasetannotations the datasetannotations to set - */ - public void setDatasetannotations(ArrayList datasetannotations) { - this.datasetannotations = datasetannotations; - } - - /** - * @return the output - */ - public String getOutput() { - return output; - } - - /** - * @param output the output to set - */ - public void setOutput(String output) { - this.output = output; - } - - /** - * @return the makezscoretable - */ - public boolean isMakezscoretable() { - return makezscoretable; - } - - /** - * @param makezscoretable the makezscoretable to set - */ - public void setMakezscoretable(boolean makezscoretable) { - this.makezscoretable = makezscoretable; - } - - /** - * @return the probeDatasetPresenceThreshold - */ - public int getProbeDatasetPresenceThreshold() { - return probeDatasetPresenceThreshold; - } - - /** - * @param probeDatasetPresenceThreshold the probeDatasetPresenceThreshold to - * set - */ - public void setProbeDatasetPresenceThreshold(int probeDatasetPresenceThreshold) { - this.probeDatasetPresenceThreshold = probeDatasetPresenceThreshold; - } - - /** - * @return the snpDatasetPresenceThreshold - */ - public int getSnpDatasetPresenceThreshold() { - return snpDatasetPresenceThreshold; - } - - /** - * @param snpDatasetPresenceThreshold the snpDatasetPresenceThreshold to set - */ - public void setSnpDatasetPresenceThreshold(int snpDatasetPresenceThreshold) { - this.snpDatasetPresenceThreshold = snpDatasetPresenceThreshold; - } - - /** - * @return the probeAndSNPPresenceFilterSampleThreshold - */ - public int getProbeAndSNPPresenceFilterSampleThreshold() { - return probeAndSNPPresenceFilterSampleThreshold; - } - - /** - * @param probeAndSNPPresenceFilterSampleThreshold the - * probeAndSNPPresenceFilterSampleThreshold to set - */ - public void setProbeAndSNPPresenceFilterSampleThreshold(int probeAndSNPPresenceFilterSampleThreshold) { - this.probeAndSNPPresenceFilterSampleThreshold = probeAndSNPPresenceFilterSampleThreshold; - } - - /** - * @return the runonlypermutation - */ - public int getRunonlypermutation() { - return runonlypermutation; - } - - /** - * @param runonlypermutation the runonlypermutation to set - */ - public void setRunonlypermutation(int runonlypermutation) { - this.runonlypermutation = runonlypermutation; - } - - /** - * @return the nrThresds - */ - public int getNrThresds() { - return nrThresds; - } - - /** - * @param nrThresds the nrThresds to set - */ - public void setNrThresds(int nrThresds) { - this.nrThresds = nrThresds; - } - - ArrayList getDatasetPrefix() { - return datasetPrefix; - } - - /** - * @return the probeselection - */ - public String getProbeselection() { - return probeselection; - } - - /** - * @param probeselection the probeselection to set - */ - public void setProbeselection(String probeselection) { - this.probeselection = probeselection; - } - - public String getSNPSelection() { - return snpselection; - } - - public String getSNPProbeSelection() { - return snpprobeselection; - } - - void save() { - try { - config.save(output + "metasettings.xml"); - } catch (ConfigurationException ex) { - Logger.getLogger(MetaSettings.class.getName()).log(Level.SEVERE, null, ex); - } - - } + private int nrPermutations = 10; + private boolean useAbsoluteZscore = false; + private int finalEQTLBufferMaxLength = 1000000; + private int nrOfBins = 100; + private double fdrthreshold = 0.05; + private boolean includeSNPsWithoutProperMapping = true; + private boolean includeProbesWithoutProperMapping = true; + private boolean cis = true; + private boolean trans = true; + private int cisdistance = 250000; + private int transdistance = 5000000; + private boolean makezscoreplot = true; + private String probetranslationfile; + private ArrayList datasetnames; + private ArrayList datasetPrefix; + private ArrayList datasetlocations; + private ArrayList datasetannotations; + private ArrayList selectedProbes; + private String output; + private boolean makezscoretable = false; + private int probeDatasetPresenceThreshold = 0; + private int snpDatasetPresenceThreshold = 0; + private int probeAndSNPPresenceFilterSampleThreshold = 0; + private int runonlypermutation; + private int nrThresds; + private String probeselection; + private String snpselection; + private XMLConfiguration config; + private String snpprobeselection; + + public void parse(String settings, String texttoreplace, String replacetextwith) { + try { + config = new XMLConfiguration(settings); + + nrPermutations = config.getInt("defaults.permutations", 0); + + useAbsoluteZscore = config.getBoolean("defaults.absolutezscore", false); + finalEQTLBufferMaxLength = config.getInt("defaults.finalnreqtls", 100000); + fdrthreshold = config.getDouble("defaults.fdrthreshold", 0.05); + cisdistance = config.getInt("defaults.cisprobedistance", 250000); + transdistance = config.getInt("defaults.transprobedistance", 5000000); + includeProbesWithoutProperMapping = config.getBoolean("defaults.includeprobeswithoutmapping", true); + includeSNPsWithoutProperMapping = config.getBoolean("defaults.includesnpswithoutmapping", true); + makezscoreplot = config.getBoolean("defaults.makezscoreplot", true); + makezscoretable = config.getBoolean("defaults.makezscoretable", false); + probetranslationfile = config.getString("defaults.probetranslationfile"); + String outputStr = config.getString("defaults.output"); + + System.out.println("outputstr: " + outputStr); + + if (texttoreplace != null && replacetextwith != null && outputStr.contains(texttoreplace)) { + outputStr = outputStr.replaceAll(texttoreplace, replacetextwith); + System.out.println("outputstr: " + outputStr); + } + output = outputStr; + System.out.println("outputstr: " + outputStr); +// System.exit(-1); + + + probeDatasetPresenceThreshold = config.getInt("defaults.minimalnumberofdatasetsthatcontainprobe", 0); + snpDatasetPresenceThreshold = config.getInt("defaults.minimalnumberofdatasetsthatcontainsnp", 0); + probeAndSNPPresenceFilterSampleThreshold = config.getInt("defaults.snpprobeselectsamplesizethreshold", -1); + + runonlypermutation = config.getInt("defaults.runonlypermutation", -1); + nrThresds = config.getInt("defaults.threads", 0); + cis = config.getBoolean("defaults.cis", false); + trans = config.getBoolean("defaults.trans", false); + + probeselection = config.getString("defaults.probeselection"); + + if (probeselection != null && probeselection.trim().length() == 0) { + probeselection = null; + } + snpselection = config.getString("defaults.snpselection"); + + if (snpselection != null && snpselection.trim().length() == 0) { + snpselection = null; + } + + if (texttoreplace != null && replacetextwith != null && snpselection.contains(texttoreplace)) { + snpselection = snpselection.replaceAll(texttoreplace, replacetextwith); + } + + snpprobeselection = config.getString("defaults.snpprobeselection"); + + if (snpprobeselection != null && snpprobeselection.trim().length() == 0) { + snpprobeselection = null; + } else { + System.out.println("SNP PROBE SELECTION: " + snpprobeselection); + } + + + int i = 0; + + String dataset = ""; + datasetnames = new ArrayList(); + datasetlocations = new ArrayList(); + datasetannotations = new ArrayList(); + datasetPrefix = new ArrayList(); + + while (dataset != null) { + dataset = config.getString("datasets.dataset(" + i + ").name"); // see if a dataset is defined + if (dataset != null) { + + datasetnames.add(dataset); + String prefix = config.getString("datasets.dataset(" + i + ").prefix"); // see if a dataset is defined + + if (prefix == null) { + prefix = "Dataset"; + } + datasetPrefix.add(prefix); + String datasetlocation = config.getString("datasets.dataset(" + i + ").location"); // see if a dataset is defined + if (texttoreplace != null && replacetextwith != null && datasetlocation.contains(texttoreplace)) { + datasetlocation = datasetlocation.replace(texttoreplace, replacetextwith); + } + String datasetannotation = config.getString("datasets.dataset(" + i + ").expressionplatform"); // see if a dataset is defined + + datasetlocations.add(datasetlocation); + datasetannotations.add(datasetannotation); + } + i++; + } + + + // parse datasets + } catch (ConfigurationException e) { + e.printStackTrace(); + } + } + + /** + * @return the nrPermutations + */ + public int getNrPermutations() { + return nrPermutations; + } + + /** + * @param nrPermutations the nrPermutations to set + */ + public void setNrPermutations(int nrPermutations) { + this.nrPermutations = nrPermutations; + } + + /** + * @return the useAbsoluteZscore + */ + public boolean isUseAbsoluteZscore() { + return useAbsoluteZscore; + } + + /** + * @param useAbsoluteZscore the useAbsoluteZscore to set + */ + public void setUseAbsoluteZscore(boolean useAbsoluteZscore) { + this.useAbsoluteZscore = useAbsoluteZscore; + } + + /** + * @return the finalEQTLBufferMaxLength + */ + public int getFinalEQTLBufferMaxLength() { + return finalEQTLBufferMaxLength; + } + + /** + * @param finalEQTLBufferMaxLength the finalEQTLBufferMaxLength to set + */ + public void setFinalEQTLBufferMaxLength(int finalEQTLBufferMaxLength) { + this.finalEQTLBufferMaxLength = finalEQTLBufferMaxLength; + } + + /** + * @return the nrOfBins + */ + public int getNrOfBins() { + return nrOfBins; + } + + /** + * @param nrOfBins the nrOfBins to set + */ + public void setNrOfBins(int nrOfBins) { + this.nrOfBins = nrOfBins; + } + + /** + * @return the fdrthreshold + */ + public double getFdrthreshold() { + return fdrthreshold; + } + + /** + * @param fdrthreshold the fdrthreshold to set + */ + public void setFdrthreshold(double fdrthreshold) { + this.fdrthreshold = fdrthreshold; + } + + /** + * @return the includeSNPsWithoutProperMapping + */ + public boolean isIncludeSNPsWithoutProperMapping() { + return includeSNPsWithoutProperMapping; + } + + /** + * @param includeSNPsWithoutProperMapping the + * includeSNPsWithoutProperMapping to set + */ + public void setIncludeSNPsWithoutProperMapping(boolean includeSNPsWithoutProperMapping) { + this.includeSNPsWithoutProperMapping = includeSNPsWithoutProperMapping; + } + + /** + * @return the includeProbesWithoutProperMapping + */ + public boolean isIncludeProbesWithoutProperMapping() { + return includeProbesWithoutProperMapping; + } + + /** + * @param includeProbesWithoutProperMapping the + * includeProbesWithoutProperMapping to set + */ + public void setIncludeProbesWithoutProperMapping(boolean includeProbesWithoutProperMapping) { + this.includeProbesWithoutProperMapping = includeProbesWithoutProperMapping; + } + + /** + * @return the cis + */ + public boolean isCis() { + return cis; + } + + /** + * @param cis the cis to set + */ + public void setCis(boolean cis) { + this.cis = cis; + } + + /** + * @return the trans + */ + public boolean isTrans() { + return trans; + } + + /** + * @param trans the trans to set + */ + public void setTrans(boolean trans) { + this.trans = trans; + } + + /** + * @return the cisdistance + */ + public int getCisdistance() { + return cisdistance; + } + + /** + * @param cisdistance the cisdistance to set + */ + public void setCisdistance(int cisdistance) { + this.cisdistance = cisdistance; + } + + /** + * @return the transdistance + */ + public int getTransdistance() { + return transdistance; + } + + /** + * @param transdistance the transdistance to set + */ + public void setTransdistance(int transdistance) { + this.transdistance = transdistance; + } + + /** + * @return the makezscoreplot + */ + public boolean isMakezscoreplot() { + return makezscoreplot; + } + + /** + * @param makezscoreplot the makezscoreplot to set + */ + public void setMakezscoreplot(boolean makezscoreplot) { + this.makezscoreplot = makezscoreplot; + } + + /** + * @return the probetranslationfile + */ + public String getProbetranslationfile() { + return probetranslationfile; + } + + /** + * @param probetranslationfile the probetranslationfile to set + */ + public void setProbetranslationfile(String probetranslationfile) { + this.probetranslationfile = probetranslationfile; + } + + /** + * @return the datasetnames + */ + public ArrayList getDatasetnames() { + return datasetnames; + } + + /** + * @param datasetnames the datasetnames to set + */ + public void setDatasetnames(ArrayList datasetnames) { + this.datasetnames = datasetnames; + } + + /** + * @return the datasetlocations + */ + public ArrayList getDatasetlocations() { + return datasetlocations; + } + + /** + * @param datasetlocations the datasetlocations to set + */ + public void setDatasetlocations(ArrayList datasetlocations) { + this.datasetlocations = datasetlocations; + } + + /** + * @return the datasetannotations + */ + public ArrayList getDatasetannotations() { + return datasetannotations; + } + + /** + * @param datasetannotations the datasetannotations to set + */ + public void setDatasetannotations(ArrayList datasetannotations) { + this.datasetannotations = datasetannotations; + } + + /** + * @return the output + */ + public String getOutput() { + return output; + } + + /** + * @param output the output to set + */ + public void setOutput(String output) { + this.output = output; + } + + /** + * @return the makezscoretable + */ + public boolean isMakezscoretable() { + return makezscoretable; + } + + /** + * @param makezscoretable the makezscoretable to set + */ + public void setMakezscoretable(boolean makezscoretable) { + this.makezscoretable = makezscoretable; + } + + /** + * @return the probeDatasetPresenceThreshold + */ + public int getProbeDatasetPresenceThreshold() { + return probeDatasetPresenceThreshold; + } + + /** + * @param probeDatasetPresenceThreshold the probeDatasetPresenceThreshold to + * set + */ + public void setProbeDatasetPresenceThreshold(int probeDatasetPresenceThreshold) { + this.probeDatasetPresenceThreshold = probeDatasetPresenceThreshold; + } + + /** + * @return the snpDatasetPresenceThreshold + */ + public int getSnpDatasetPresenceThreshold() { + return snpDatasetPresenceThreshold; + } + + /** + * @param snpDatasetPresenceThreshold the snpDatasetPresenceThreshold to set + */ + public void setSnpDatasetPresenceThreshold(int snpDatasetPresenceThreshold) { + this.snpDatasetPresenceThreshold = snpDatasetPresenceThreshold; + } + + /** + * @return the probeAndSNPPresenceFilterSampleThreshold + */ + public int getProbeAndSNPPresenceFilterSampleThreshold() { + return probeAndSNPPresenceFilterSampleThreshold; + } + + /** + * @param probeAndSNPPresenceFilterSampleThreshold the + * probeAndSNPPresenceFilterSampleThreshold to set + */ + public void setProbeAndSNPPresenceFilterSampleThreshold(int probeAndSNPPresenceFilterSampleThreshold) { + this.probeAndSNPPresenceFilterSampleThreshold = probeAndSNPPresenceFilterSampleThreshold; + } + + /** + * @return the runonlypermutation + */ + public int getRunonlypermutation() { + return runonlypermutation; + } + + /** + * @param runonlypermutation the runonlypermutation to set + */ + public void setRunonlypermutation(int runonlypermutation) { + this.runonlypermutation = runonlypermutation; + } + + /** + * @return the nrThresds + */ + public int getNrThresds() { + return nrThresds; + } + + /** + * @param nrThresds the nrThresds to set + */ + public void setNrThresds(int nrThresds) { + this.nrThresds = nrThresds; + } + + ArrayList getDatasetPrefix() { + return datasetPrefix; + } + + /** + * @return the probeselection + */ + public String getProbeselection() { + return probeselection; + } + + /** + * @param probeselection the probeselection to set + */ + public void setProbeselection(String probeselection) { + this.probeselection = probeselection; + } + + public String getSNPSelection() { + return snpselection; + } + + public String getSNPProbeSelection() { + return snpprobeselection; + } + + void save() { + try { + config.save(output + "metasettings.xml"); + } catch (ConfigurationException ex) { + Logger.getLogger(MetaSettings.class.getName()).log(Level.SEVERE, null, ex); + } + + } } /* diff --git a/genetica-libraries/src/main/java/umcg/genetica/text/Strings.java b/genetica-libraries/src/main/java/umcg/genetica/text/Strings.java index 7eb516724..a9c93db44 100644 --- a/genetica-libraries/src/main/java/umcg/genetica/text/Strings.java +++ b/genetica-libraries/src/main/java/umcg/genetica/text/Strings.java @@ -65,7 +65,7 @@ public static String concat(double[] s, Pattern t) { } return output.toString(); } - + public static String concat(double[] s, DecimalFormat f, Pattern t) { StringBuilder output = new StringBuilder(); @@ -78,13 +78,15 @@ public static String concat(double[] s, DecimalFormat f, Pattern t) { } return output.toString(); } - + public static String concat(float[] s, DecimalFormat f, Pattern t) { StringBuilder output = new StringBuilder(); for (int i = 0; i < s.length; i++) { if (i == 0) { output.append(f.format(s[i])); + } else if (Float.isNaN(i)) { + output.append("NaN"); } else { output.append(t.toString()).append(f.format(s[i])); } @@ -135,7 +137,6 @@ public static String[] split(String in) { i++; } - return list.toArray(new String[0]); } From 64f516abfadcae81bfd5aa42e2c134722fc93d37 Mon Sep 17 00:00:00 2001 From: Patrick Deelen Date: Tue, 5 May 2015 13:33:53 +0200 Subject: [PATCH 029/143] Expand replicate interaction script --- eqtl-mapping-pipeline/pom.xml | 2 +- .../ReplicateInteractions.java | 48 +++++++++++++++++-- 2 files changed, 44 insertions(+), 6 deletions(-) diff --git a/eqtl-mapping-pipeline/pom.xml b/eqtl-mapping-pipeline/pom.xml index e15aa90e7..e83281b89 100644 --- a/eqtl-mapping-pipeline/pom.xml +++ b/eqtl-mapping-pipeline/pom.xml @@ -7,7 +7,7 @@ 1.0.2-SNAPSHOT eqtl-mapping-pipeline - 1.3.3-SNAPSHOT + 1.3.4-SNAPSHOT jar 4.0.0 diff --git a/eqtl-mapping-pipeline/src/main/java/eqtlmappingpipeline/binaryInteraction/ReplicateInteractions.java b/eqtl-mapping-pipeline/src/main/java/eqtlmappingpipeline/binaryInteraction/ReplicateInteractions.java index f400c266a..4950ba392 100644 --- a/eqtl-mapping-pipeline/src/main/java/eqtlmappingpipeline/binaryInteraction/ReplicateInteractions.java +++ b/eqtl-mapping-pipeline/src/main/java/eqtlmappingpipeline/binaryInteraction/ReplicateInteractions.java @@ -101,6 +101,12 @@ public class ReplicateInteractions { OptionBuilder.withLongOpt("covariats"); OPTIONS.addOption(OptionBuilder.create("c")); + OptionBuilder.withArgName("path"); + OptionBuilder.hasArg(); + OptionBuilder.withDescription("File with eQTL genes to include in analysis"); + OptionBuilder.withLongOpt("genes"); + OPTIONS.addOption(OptionBuilder.create("g")); + } public static void main(String[] args) throws FileNotFoundException, IOException, BinaryInteractionFileException { @@ -114,6 +120,7 @@ public static void main(String[] args) throws FileNotFoundException, IOException final boolean matchOnChrPos; final String outputPrefix; final File covariatesToIncludeFile; + final File genesToIncludeFile; try { final CommandLine commandLine = new PosixParser().parse(OPTIONS, args, false); @@ -160,6 +167,12 @@ public static void main(String[] args) throws FileNotFoundException, IOException covariatesToIncludeFile = null; } + if (commandLine.hasOption("g")) { + genesToIncludeFile = new File(commandLine.getOptionValue("g")); + } else { + genesToIncludeFile = null; + } + matchOnChrPos = commandLine.hasOption("cp"); } catch (ParseException ex) { @@ -186,6 +199,9 @@ public static void main(String[] args) throws FileNotFoundException, IOException if (covariatesToIncludeFile != null) { writeAndOut("Covariates to include: " + covariatesToIncludeFile.getAbsolutePath(), logWriter); } + if (genesToIncludeFile != null) { + writeAndOut("eQTL genes to include: " + genesToIncludeFile.getAbsolutePath(), logWriter); + } writeAndOut("", logWriter); @@ -203,6 +219,20 @@ public static void main(String[] args) throws FileNotFoundException, IOException covariantsToInclude = null; } + final HashSet genesToInclude; + if (genesToIncludeFile != null) { + genesToInclude = new HashSet(); + BufferedReader reader = new BufferedReader(new InputStreamReader(new FileInputStream(genesToIncludeFile), "UTF-8")); + String line; + while ((line = reader.readLine()) != null) { + genesToInclude.add(line.trim()); + } + writeAndOut("eQTL genes included: " + genesToInclude.size(), logWriter); + writeAndOut("", logWriter); + } else { + genesToInclude = null; + } + BinaryInteractionFile inputFile = BinaryInteractionFile.load(inputInteractionFile, true); BinaryInteractionFile replicationFile = BinaryInteractionFile.load(replicationInteractionFile, true); @@ -212,6 +242,7 @@ public static void main(String[] args) throws FileNotFoundException, IOException CSVWriter replicatedOppositeDirectionWriter = writeHeader(new File(outputPrefix + "_ReplicatedOppositeDirection.txt"), row); CSVWriter notReplicatedSameDirectionWriter = writeHeader(new File(outputPrefix + "_NotReplicatedSameDirection.txt"), row); CSVWriter notReplicatedOppositeDirectionWriter = writeHeader(new File(outputPrefix + "_NotReplicatedOppositeDirection.txt"), row); + CSVWriter notInReplicationWriter = writeHeader(new File(outputPrefix + "_NotInReplication.txt"), row); int significant = 0; int notSignificant = 0; @@ -259,9 +290,13 @@ public static void main(String[] args) throws FileNotFoundException, IOException //Do loop anyway to also count not replicated int[] genePointers = inputFile.getVariant(variantName).getGenePointers(); + genes: for (int genePointer : genePointers) { BinaryInteractionGene gene = inputFile.getGene(genePointer); + if (genesToInclude != null && !genesToInclude.contains(gene.getName())) { + continue genes; + } covairates: for (Iterator iterator = inputFile.readVariantGeneResults(variantName, gene.getName()); iterator.hasNext();) { @@ -312,10 +347,12 @@ public static void main(String[] args) throws FileNotFoundException, IOException } } } else { + writeInteraction(row, variantName, gene, interaction, variant, replicationQtlRes, replicationZscores, swap, notInReplicationWriter); ++nanReplication; } } else { + writeInteraction(row, variantName, gene, interaction, variant, null, null, swap, notInReplicationWriter); ++notTestedInReplication; } @@ -356,7 +393,7 @@ public static void main(String[] args) throws FileNotFoundException, IOException } else { } - + } else { } @@ -379,6 +416,7 @@ public static void main(String[] args) throws FileNotFoundException, IOException replicatedOppositeDirectionWriter.close(); notReplicatedSameDirectionWriter.close(); notReplicatedOppositeDirectionWriter.close(); + notInReplicationWriter.close(); writeCovaraiteCounts(new File(outputPrefix + "_CovariateCounts.txt"), covariateCounts); @@ -416,10 +454,10 @@ private static void writeInteraction(String[] row, String variantName, BinaryInt row[c++] = String.valueOf(interation.getInteractionZscores().getZscoreSnpMeta()); row[c++] = String.valueOf(interation.getInteractionZscores().getZscoreCovariateMeta()); row[c++] = String.valueOf(interation.getInteractionZscores().getZscoreInteractionMeta()); - row[c++] = String.valueOf(replicationQtlRes.getMetaZscore() * (swap ? -1 : 1)); - row[c++] = String.valueOf(replicationZscores.getZscoreSnpMeta() * (swap ? -1 : 1)); - row[c++] = String.valueOf(replicationZscores.getZscoreCovariateMeta()); - row[c++] = String.valueOf(replicationZscores.getZscoreInteractionMeta() * (swap ? -1 : 1)); + row[c++] = replicationQtlRes == null ? "NaN" : String.valueOf(replicationQtlRes.getMetaZscore() * (swap ? -1 : 1)); + row[c++] = replicationZscores == null ? "NaN" : String.valueOf(replicationZscores.getZscoreSnpMeta() * (swap ? -1 : 1)); + row[c++] = replicationZscores == null ? "NaN" : String.valueOf(replicationZscores.getZscoreCovariateMeta()); + row[c++] = replicationZscores == null ? "NaN" : String.valueOf(replicationZscores.getZscoreInteractionMeta() * (swap ? -1 : 1)); interactionWriter.writeNext(row); } From f5e229e31b7ec3471400011444398f6ef07642bf Mon Sep 17 00:00:00 2001 From: harmjanwestra Date: Tue, 5 May 2015 17:38:31 -0400 Subject: [PATCH 030/143] Fixes for binary meta-analysis --- .../BinaryMicrobePcaAnalysis.java | 15 +- .../BinaryMetaAnalysis.java | 111 ++++- .../BinaryMetaAnalysisDataset.java | 15 +- .../MetaQTL4TraitAnnotation.java | 12 +- .../metaqtl3/CalculationThread.java | 8 +- .../metaqtl3/MetaQTL3.java | 2 +- .../metaqtl3/ResultProcessorThread.java | 53 ++- .../java/umcg/genetica/io/bin/BinaryFile.java | 380 +++++++++--------- .../main/java/umcg/genetica/text/Strings.java | 12 +- 9 files changed, 375 insertions(+), 233 deletions(-) diff --git a/BinaryMetaAnalyzer/src/main/java/nl/umcg/bondermj/microbialmetanalysis/BinaryMicrobePcaAnalysis.java b/BinaryMetaAnalyzer/src/main/java/nl/umcg/bondermj/microbialmetanalysis/BinaryMicrobePcaAnalysis.java index 8279566c0..61f7602dc 100644 --- a/BinaryMetaAnalyzer/src/main/java/nl/umcg/bondermj/microbialmetanalysis/BinaryMicrobePcaAnalysis.java +++ b/BinaryMetaAnalyzer/src/main/java/nl/umcg/bondermj/microbialmetanalysis/BinaryMicrobePcaAnalysis.java @@ -113,7 +113,7 @@ public void run(int bufferSize) throws IOException { System.out.println("Loading datasets"); for (int d = 0; d < datasets.length; d++) { - datasets[d] = new BinaryMetaAnalysisDataset(settings.getDatasetlocations().get(d), settings.getDatasetPrefix().get(d), permutation, settings.getDatasetannotations().get(d), probeAnnotation); + datasets[d] = new BinaryMetaAnalysisDataset(settings.getDatasetlocations().get(d), settings.getDatasetnames().get(d), settings.getDatasetPrefix().get(d), permutation, settings.getDatasetannotations().get(d), probeAnnotation); } System.out.println("Loaded " + datasets.length + " datasets"); @@ -222,7 +222,7 @@ public void run(int bufferSize) throws IOException { for (int probe = 0; probe < traitList.length; probe++) { double metaAnalysisZ = ZScores.getWeightedZ(finalZScores[probe], sampleSizes); double tScore = ZScores.zScoreToCorrelation(metaAnalysisZ, totalSampleSize); - summedRsquare += tScore*tScore; + summedRsquare += tScore * tScore; } double newMetaZ = Correlation.convertCorrelationToZScore(totalSampleSize, Math.sqrt(summedRsquare)); double newMetaAnalysisP = Descriptives.convertZscoreToPvalue(newMetaZ); @@ -242,10 +242,10 @@ public void run(int bufferSize) throws IOException { double metaAnalysisZ = ZScores.getWeightedZ(finalZScores[probe], sampleSizes); for (int i = 0; i < finalZScores[probe].length; i++) { double tScore = ZScores.zScoreToCorrelation(finalZScores[probe][i], sampleSizes[i]); - summedPerDataSet[i] += tScore*tScore; + summedPerDataSet[i] += tScore * tScore; } double tScore = ZScores.zScoreToCorrelation(metaAnalysisZ, totalSampleSize); - summedRsquare += tScore*tScore; + summedRsquare += tScore * tScore; } for (int i = 0; i < summedPerDataSet.length; i++) { @@ -259,7 +259,7 @@ public void run(int bufferSize) throws IOException { MetaQTL4MetaTrait t = new MetaQTL4MetaTrait(21, "Microbe_Components", "-", -1, -1, "", traitList[0].getPlatformIds()); QTL q = new QTL(newMetaAnalysisP, t, snp, BaseAnnot.toByte(alleleAssessed), newMetaZ, BaseAnnot.toByteArray(alleles), summedPerDataSet, sampleSizes); // sort buffer if needed. addEQTL(q); - } else { + } else { System.out.println("Error in procedure."); } } @@ -544,10 +544,11 @@ private void writeBuffer(String outdir, int permutation) throws IOException { System.out.println( "Done."); } - + private void clearResultsBuffer() { Arrays.fill(finalEQTLs, null); bufferHasOverFlown = false; - locationToStoreResult=0; + locationToStoreResult = 0; + maxSavedPvalue = -Double.MAX_VALUE; } } diff --git a/BinaryMetaAnalyzer/src/main/java/nl/umcg/westrah/binarymetaanalyzer/BinaryMetaAnalysis.java b/BinaryMetaAnalyzer/src/main/java/nl/umcg/westrah/binarymetaanalyzer/BinaryMetaAnalysis.java index 85d5e8f91..2c9167723 100644 --- a/BinaryMetaAnalyzer/src/main/java/nl/umcg/westrah/binarymetaanalyzer/BinaryMetaAnalysis.java +++ b/BinaryMetaAnalyzer/src/main/java/nl/umcg/westrah/binarymetaanalyzer/BinaryMetaAnalysis.java @@ -54,7 +54,7 @@ public static void main(String[] args) { System.exit(0); } - + private MetaQTL4TraitAnnotation probeAnnotation; private BinaryMetaAnalysisDataset[] datasets = new BinaryMetaAnalysisDataset[0]; private int[][] snpIndex; @@ -105,26 +105,36 @@ public void run() throws IOException { System.out.println("Loading probe annotation from: " + settings.getProbetranslationfile()); loadProbeAnnotation(); + if (traitList.length == 0) { + System.err.println("Error: no annotation loaded."); + System.exit(-1); + } + for (int permutation = 0; permutation < settings.getNrPermutations() + 1; permutation++) { clearResultsBuffer(); - + // create dataset objects System.out.println("Running permutation " + permutation); datasets = new BinaryMetaAnalysisDataset[settings.getDatasetlocations().size()]; System.out.println("Loading datasets"); for (int d = 0; d < datasets.length; d++) { - datasets[d] = new BinaryMetaAnalysisDataset(settings.getDatasetlocations().get(d), settings.getDatasetPrefix().get(d), permutation, settings.getDatasetannotations().get(d), probeAnnotation); + datasets[d] = new BinaryMetaAnalysisDataset(settings.getDatasetlocations().get(d), + settings.getDatasetnames().get(d), + settings.getDatasetPrefix().get(d), + permutation, + settings.getDatasetannotations().get(d), + probeAnnotation); } System.out.println("Loaded " + datasets.length + " datasets"); // create meta-analysis SNP index. have to recreate this every permutation, // since the order of SNPs is generated at random. System.out.println("Creating SNP index"); - createSNPIndex(); + createSNPIndex(outdir); System.out.println("Total of " + snpIndex.length + " SNPs"); System.out.println("Creating probe index"); - createProbeIndex(); + createProbeIndex(outdir); System.out.println("Total of " + probeIndex.length + " probes"); if (snpChr == null) { @@ -336,7 +346,7 @@ public void run() throws IOException { */ } - private void createSNPIndex() throws IOException { + private void createSNPIndex(String outdir) throws IOException { HashSet confineToTheseSNPs = null; if (settings.getSNPSelection() != null) { @@ -388,6 +398,21 @@ private void createSNPIndex() throws IOException { } } } + + TextFile tf = new TextFile(outdir + "snpindex.txt", TextFile.W); + String header = "metaID"; + for (int d = 0; d < datasets.length; d++) { + header += "\t" + datasets[d].getName() + "-sid"; + } + tf.writeln(header); + for (int s = 0; s < snpList.length; s++) { + String ln = snpList[s]; + for (int d = 0; d < datasets.length; d++) { + ln += "\t" + snpIndex[s][d]; + } + tf.writeln(ln); + } + tf.close(); } private void loadProbeAnnotation() throws IOException { @@ -436,31 +461,73 @@ private void loadSNPAnnotation() throws IOException { } // index the probes - private void createProbeIndex() throws IOException { - + private void createProbeIndex(String outdir) throws IOException { + HashSet confineToTheseProbes = null; - if (settings.getProbeselection()!= null) { + if (settings.getProbeselection() != null) { System.out.println("Selecting Probes from file: " + settings.getProbeselection()); confineToTheseProbes = new HashSet(); TextFile tf = new TextFile(settings.getProbeselection(), TextFile.R); confineToTheseProbes.addAll(tf.readAsArrayList()); tf.close(); - System.out.println(confineToTheseProbes.size() + " Probes loaded."); } - + + System.out.println(""); probeIndex = new Integer[traitList.length][datasets.length]; + for (int d = 0; d < datasets.length; d++) { String[] probes = datasets[d].getProbeList(); - int platformId = probeAnnotation.getPlatformId(settings.getDatasetannotations().get(d)); + int platformId = probeAnnotation.getPlatformId(datasets[d].getPlatform()); + + HashMap traitHashForPlatform = probeAnnotation.getTraitHashForPlatform(platformId); + System.out.println(probeAnnotation.getTraitHashPerPlatform().size()); + + System.out.println(datasets[d].getName() + "\t" + platformId + "\t" + datasets[d].getPlatform() + "\t" + traitHashForPlatform.size()); for (int p = 0; p < probes.length; p++) { + + MetaQTL4MetaTrait t = traitHashForPlatform.get(probes[p]); + int index = traitMap.get(t); + + if (probes[p].equals("60437")) { + if (t != null) { + System.out.println(t.getMetaTraitId()); + } else { + System.out.println("not found"); + } + } + if (confineToTheseProbes == null || confineToTheseProbes.contains(probes[p])) { - MetaQTL4MetaTrait t = probeAnnotation.getTraitForPlatformId(platformId, probes[p]); - int index = traitMap.get(t); probeIndex[index][d] = p; } } } + + System.out.println(""); + + TextFile out = new TextFile(outdir + "probeindex.txt", TextFile.W); + + String header = "metaID"; + for (int d = 0; d < datasets.length; d++) { + header += "\t" + datasets[d].getName() + "-pid\t" + datasets[d].getName() + "-probename"; + } + out.writeln(header); + for (int p = 0; p < probeIndex.length; p++) { + + String lnout = "" + traitList[p].getMetaTraitId(); + for (int d = 0; d < datasets.length; d++) { + Integer pid = probeIndex[p][d]; + String probeName = null; + if (pid != null) { + probeName = datasets[d].getProbeList()[pid]; + } + lnout += "\t" + pid + "\t" + probeName; + } + + out.writeln(lnout); + } + + out.close(); } private void addEQTL(QTL q) { @@ -533,10 +600,10 @@ private void writeBuffer(String outdir, int permutation) throws IOException { + "Meta-Beta (SE)\t" + "Beta (SE)\t" + "FoldChange"; - + output.writeln(header); // PValue SNPName SNPChr SNPChrPos ProbeName ProbeChr ProbeCenterChrPos CisTrans SNPType AlleleAssessed OverallZScore DatasetsWhereSNPProbePairIsAvailableAndPassesQC DatasetsZScores DatasetsNrSamples IncludedDatasetsMeanProbeExpression IncludedDatasetsProbeExpressionVariance HGNCName IncludedDatasetsCorrelationCoefficient Meta-Beta (SE) Beta (SE) FoldChange FDR - + DecimalFormat format = new DecimalFormat("###.#######", new DecimalFormatSymbols(Locale.US)); DecimalFormat smallFormat = new DecimalFormat("0.#####E0", new DecimalFormatSymbols(Locale.US)); for (int i = 0; i < settings.getFinalEQTLBufferMaxLength(); i++) { @@ -584,13 +651,20 @@ private void writeBuffer(String outdir, int permutation) throws IOException { float[] datasetZScores = q.getDatasetZScores(); String[] dsBuilder = new String[datasets.length]; String[] dsNBuilder = new String[datasets.length]; + String[] dsZBuilder = new String[datasets.length]; + for (int d = 0; d < datasetZScores.length; d++) { + if (!Float.isNaN(datasetZScores[d])) { + String str = format.format(datasetZScores[d]); + dsBuilder[d] = settings.getDatasetnames().get(d); dsNBuilder[d] = "" + q.getDatasetSampleSizes()[d]; + dsZBuilder[d] = str; } else { dsBuilder[d] = "-"; dsNBuilder[d] = "-"; + dsZBuilder[d] = "-"; } } @@ -598,7 +672,7 @@ private void writeBuffer(String outdir, int permutation) throws IOException { sb.append(Strings.concat(dsBuilder, Strings.semicolon)); sb.append("\t"); - sb.append(Strings.concat(datasetZScores, format, Strings.semicolon)); + sb.append(Strings.concat(dsZBuilder, Strings.semicolon)); sb.append("\t"); sb.append(Strings.concat(dsNBuilder, Strings.semicolon)); @@ -620,6 +694,7 @@ private void writeBuffer(String outdir, int permutation) throws IOException { private void clearResultsBuffer() { Arrays.fill(finalEQTLs, null); bufferHasOverFlown = false; - locationToStoreResult=0; + locationToStoreResult = 0; + maxSavedPvalue = -Double.MAX_VALUE; } } diff --git a/BinaryMetaAnalyzer/src/main/java/nl/umcg/westrah/binarymetaanalyzer/BinaryMetaAnalysisDataset.java b/BinaryMetaAnalyzer/src/main/java/nl/umcg/westrah/binarymetaanalyzer/BinaryMetaAnalysisDataset.java index 7b9657a3b..4af125e99 100644 --- a/BinaryMetaAnalyzer/src/main/java/nl/umcg/westrah/binarymetaanalyzer/BinaryMetaAnalysisDataset.java +++ b/BinaryMetaAnalyzer/src/main/java/nl/umcg/westrah/binarymetaanalyzer/BinaryMetaAnalysisDataset.java @@ -38,11 +38,16 @@ public class BinaryMetaAnalysisDataset { private final int platformId; private RandomAccessFile raf; - public BinaryMetaAnalysisDataset(String dir, String prefix, int permutation, String platform, MetaQTL4TraitAnnotation probeAnnotation) throws IOException { + private String name = null; + private String platform = null; + + public BinaryMetaAnalysisDataset(String dir, String name, String prefix, int permutation, String platform, MetaQTL4TraitAnnotation probeAnnotation) throws IOException { dir = Gpio.formatAsDirectory(dir); String matrix = dir; String probeFile = dir; String snpFile = dir; + this.platform = platform; + this.name = name; this.probeAnnotation = probeAnnotation; this.platformId = probeAnnotation.getPlatformId(platform); String pref = "Dataset"; @@ -265,4 +270,12 @@ public void close() throws IOException { raf.close(); } + public String getName() { + return name; + } + + public String getPlatform() { + return platform; + } + } diff --git a/BinaryMetaAnalyzer/src/main/java/nl/umcg/westrah/binarymetaanalyzer/MetaQTL4TraitAnnotation.java b/BinaryMetaAnalyzer/src/main/java/nl/umcg/westrah/binarymetaanalyzer/MetaQTL4TraitAnnotation.java index 8737b2175..e00e376da 100644 --- a/BinaryMetaAnalyzer/src/main/java/nl/umcg/westrah/binarymetaanalyzer/MetaQTL4TraitAnnotation.java +++ b/BinaryMetaAnalyzer/src/main/java/nl/umcg/westrah/binarymetaanalyzer/MetaQTL4TraitAnnotation.java @@ -67,8 +67,9 @@ public MetaQTL4TraitAnnotation(File probeAnnotationFile, Set platformsTo } } - int probeCounter = 0; + + // parse lines for (String[] elems : tf.readLineElemsIterable(TextFile.tab)) { String metaTraitName = elems[0]; @@ -89,12 +90,14 @@ public MetaQTL4TraitAnnotation(File probeAnnotationFile, Set platformsTo } String hugo = elems[4]; + String[] platformIds = new String[nrPlatforms]; // int metaTraitId, String metaTraitName, String chr, int chrStart, int chrEnd, String annotation, String[] platformIds + MetaQTL4MetaTrait metaTraitObj = new MetaQTL4MetaTrait(probeCounter, metaTraitName, chr, chrstartpos, chrendpos, hugo, platformIds); + platformNr = 0; for (int i = 5; i < elems.length; i++) { - platformNr = 0; if (colsToInclude[i]) { platformIds[platformNr] = elems[i]; HashMap probeToId = traitHashPerPlatform.get(platformNr); @@ -102,6 +105,7 @@ public MetaQTL4TraitAnnotation(File probeAnnotationFile, Set platformsTo platformNr++; } } + probeCounter++; metatraits.add(metaTraitObj); metaTraitNameToObj.put(metaTraitName, metaTraitObj); @@ -118,6 +122,10 @@ public MetaQTL4MetaTrait getTraitForPlatformId(Integer platformId, String platfo return traitHashPerPlatform.get(platformId).get(platformTrait); } + public HashMap getTraitHashForPlatform(Integer platformId) { + return traitHashPerPlatform.get(platformId); + } + public String[] getPlatforms() { return platforms; } diff --git a/eqtl-mapping-pipeline/src/main/java/eqtlmappingpipeline/metaqtl3/CalculationThread.java b/eqtl-mapping-pipeline/src/main/java/eqtlmappingpipeline/metaqtl3/CalculationThread.java index cf8a551eb..98becd2b8 100644 --- a/eqtl-mapping-pipeline/src/main/java/eqtlmappingpipeline/metaqtl3/CalculationThread.java +++ b/eqtl-mapping-pipeline/src/main/java/eqtlmappingpipeline/metaqtl3/CalculationThread.java @@ -333,7 +333,7 @@ private void analyze(WorkPackage wp) { // now push the results in the queue.. try { wp.setNumTested(testsPerformed); - m_result_queue.put(wp); + throwResult(wp); } catch (InterruptedException e) { e.printStackTrace(); } @@ -341,6 +341,8 @@ private void analyze(WorkPackage wp) { // System.out.println("Analyze: "+t1.getTimeDesc()); } + + protected static void test(int d, int p, Integer probeId, double[] x, double[] originalGenotypes, double varianceX, double varianceY, double meanY, boolean[] includeExpressionSample, int sampleCount, double[][] rawData, double[][] covariateRawData, Result r, WorkPackage wp, boolean metaAnalyseModelCorrelationYHat, boolean metaAnalyseInteractionTerms, boolean determinefoldchange) { final double[] y; double[][] covariates = covariateRawData; @@ -826,4 +828,8 @@ private void ploteQTL(WorkPackage wp, int p) { // } // randomNumberGenerator.deflatedZScores = inflatedZScores; // } + + private void throwResult(WorkPackage wp) throws InterruptedException { + m_result_queue.put(wp); + } } diff --git a/eqtl-mapping-pipeline/src/main/java/eqtlmappingpipeline/metaqtl3/MetaQTL3.java b/eqtl-mapping-pipeline/src/main/java/eqtlmappingpipeline/metaqtl3/MetaQTL3.java index 500d2aaf9..6770c0be7 100644 --- a/eqtl-mapping-pipeline/src/main/java/eqtlmappingpipeline/metaqtl3/MetaQTL3.java +++ b/eqtl-mapping-pipeline/src/main/java/eqtlmappingpipeline/metaqtl3/MetaQTL3.java @@ -807,7 +807,7 @@ public void mapEQTLs() throws IOException { expressionToGenotypeIds[d] = m_gg[d].getExpressionToGenotypeIdArray(); } - LinkedBlockingQueue resultQueue = new LinkedBlockingQueue(100); + LinkedBlockingQueue resultQueue = new LinkedBlockingQueue(250); ResultProcessorThread resultthread = new ResultProcessorThread(m_settings.nrThreads, resultQueue, m_settings.createBinaryOutputFiles, m_gg, m_settings, m_probeTranslationTable, permuting, permutationRound, m_snpList, m_probeList, m_workPackages); resultthread.setName("ResultProcessorThread"); diff --git a/eqtl-mapping-pipeline/src/main/java/eqtlmappingpipeline/metaqtl3/ResultProcessorThread.java b/eqtl-mapping-pipeline/src/main/java/eqtlmappingpipeline/metaqtl3/ResultProcessorThread.java index 65b15aada..4e7562ded 100644 --- a/eqtl-mapping-pipeline/src/main/java/eqtlmappingpipeline/metaqtl3/ResultProcessorThread.java +++ b/eqtl-mapping-pipeline/src/main/java/eqtlmappingpipeline/metaqtl3/ResultProcessorThread.java @@ -354,7 +354,7 @@ private void writeBinaryResult(Result r) throws IOException { SNP[] snps = currentWP.getSnps(); int numDatasets = zscores.length; double[] finalZscores = r.finalZScore; - String snpoutput = null; + StringBuilder snpoutput = null; // if we're doing a meta-analysis, write the meta-analysis Z to a separate binaryFile if (m_gg.length > 1) { @@ -372,7 +372,14 @@ private void writeBinaryResult(Result r) throws IOException { alleleassessed = alleles[0]; } if (snpoutput == null) { - snpoutput = snpname + "\t" + BaseAnnot.getAllelesDescription(alleles) + "\t" + BaseAnnot.toString(minorAllele) + "\t" + BaseAnnot.toString(alleleassessed); + snpoutput = new StringBuilder(); + snpoutput.append(snpname); + snpoutput.append("\t"); + snpoutput.append(BaseAnnot.getAllelesDescription(alleles)); + snpoutput.append("\t"); + snpoutput.append(BaseAnnot.toString(minorAllele)); + snpoutput.append("\t"); + snpoutput.append(BaseAnnot.toString(alleleassessed)); } totalSampleNr += r.numSamples[d]; } @@ -398,12 +405,21 @@ private void writeBinaryResult(Result r) throws IOException { } } - if (sb != null) { - zScoreMetaAnalysisRowNamesFile.writeln(snpoutput + "\t" + totalSampleNr + "\t-\t-\t-\t" + finalZscores.length + "\t" + sb.toString()); - } else { - zScoreMetaAnalysisRowNamesFile.writeln(snpoutput + "\t" + totalSampleNr + "\t-\t-\t-\t" + finalZscores.length + "\t-"); + if (snpoutput != null) { + snpoutput.append("\t"); + snpoutput.append(totalSampleNr); + snpoutput.append("\t-\t-\t-\t"); + snpoutput.append(finalZscores.length); + snpoutput.append("\t"); + if (sb != null) { + snpoutput.append(sb.toString()); + } else { + snpoutput.append("-"); + } + zScoreMetaAnalysisRowNamesFile.writeln(snpoutput.toString()); } } + for (int d = 0; d < numDatasets; d++) { double[] datasetZScores = zscores[d]; SNP datasetSNP = snps[d]; @@ -447,12 +463,33 @@ private void writeBinaryResult(Result r) throws IOException { } } + StringBuilder buffer = new StringBuilder(); + buffer.append(snpname) + .append("\t") + .append(BaseAnnot.getAllelesDescription(alleles)) + .append("\t") + .append(BaseAnnot.toString(minorAllele)) + .append("\t") + .append(BaseAnnot.toString(alleleassessed)) + .append("\t") + .append(datasetSNP.getNrCalled()) + .append("\t") + .append(maf) + .append("\t") + .append(hwe) + .append("\t") + .append(cr) + .append("\t") + .append(datasetZScores.length) + .append("\t"); if (sb != null) { - snpfile.writeln(snpname + "\t" + BaseAnnot.getAllelesDescription(alleles) + "\t" + BaseAnnot.toString(minorAllele) + "\t" + BaseAnnot.toString(alleleassessed) + "\t" + datasetSNP.getNrCalled() + "\t" + maf + "\t" + hwe + "\t" + cr + "\t" + datasetZScores.length + "\t" + sb.toString()); + buffer.append(sb.toString()); } else { - snpfile.writeln(snpname + "\t" + BaseAnnot.getAllelesDescription(alleles) + "\t" + BaseAnnot.toString(minorAllele) + "\t" + BaseAnnot.toString(alleleassessed) + "\t" + datasetSNP.getNrCalled() + "\t" + maf + "\t" + hwe + "\t" + cr + "\t" + datasetZScores.length + "\t-"); + buffer.append("-"); } + snpfile.writeln(buffer.toString()); + } } } diff --git a/genetica-libraries/src/main/java/umcg/genetica/io/bin/BinaryFile.java b/genetica-libraries/src/main/java/umcg/genetica/io/bin/BinaryFile.java index e7685de19..bae48877d 100644 --- a/genetica-libraries/src/main/java/umcg/genetica/io/bin/BinaryFile.java +++ b/genetica-libraries/src/main/java/umcg/genetica/io/bin/BinaryFile.java @@ -7,8 +7,6 @@ import com.mastfrog.util.streams.HashingOutputStream; import java.io.*; import java.security.NoSuchAlgorithmException; -import java.util.logging.Level; -import java.util.logging.Logger; /** * @@ -16,193 +14,193 @@ */ public class BinaryFile { - public static final boolean W = true; - public static final boolean R = false; - protected final DataOutputStream os; - protected final DataInputStream is; - protected final String loc; - protected final boolean writeable; - private final HashingOutputStream osh; - - public BinaryFile(String loc, boolean mode) throws IOException { - if (loc.trim().length() == 0) { - throw new IOException("Could not find file: no file specified"); - } - this.writeable = mode; - this.loc = loc; - - if (writeable) { - try { - is = null; - osh = new HashingOutputStream("md5", new FileOutputStream(loc)); - os = new DataOutputStream(new BufferedOutputStream(osh)); - } catch (NoSuchAlgorithmException ex) { - throw new RuntimeException(ex); - } - } else { - is = new DataInputStream(new BufferedInputStream(new FileInputStream(loc))); - os = null; - osh = null; - } - } - - public BinaryFile(String loc, boolean mode, int buffersize) throws IOException { - if (loc.trim().length() == 0) { - throw new IOException("Could not find file: no file specified"); - } - this.writeable = mode; - this.loc = loc; - - if (writeable) { - try { - is = null; - osh = new HashingOutputStream("md5", new FileOutputStream(loc)); - os = new DataOutputStream(new BufferedOutputStream(osh, buffersize)); - } catch (NoSuchAlgorithmException ex) { - throw new RuntimeException(ex); - } - } else { - is = new DataInputStream(new BufferedInputStream(new FileInputStream(loc), buffersize)); - os = null; - osh = null; - } - } - - public void writeBytes(byte[] v) throws IOException { - if (writeable) { - os.write(v); - } else { - throw new IOException("File is read only."); - } - } - - public void writeInt(int v) throws IOException { - if (writeable) { - os.writeInt(v); - } else { - throw new IOException("File is read only."); - } - } - - public void writeString(String s) throws IOException { - if (writeable) { - os.writeChars(s); - } else { - throw new IOException("File is read only."); - } - } - - public void writeBool(boolean b) throws IOException { - if (writeable) { - os.writeBoolean(b); - } else { - throw new IOException("File is read only."); - } - } - - public void writeFloat(float f) throws IOException { - if (writeable) { - os.writeFloat(f); - } else { - throw new IOException("File is read only."); - } - } - - public void writeDouble(double d) throws IOException { - if (writeable) { - os.writeDouble(d); - } else { - throw new IOException("File is read only."); - } - } - - public void writeLong(long l) throws IOException { - if (writeable) { - os.writeLong(l); - } else { - throw new IOException("File is read only."); - } - } - - // read functions - public int readInt() throws IOException, EOFException { - if (writeable) { - throw new IOException("File is write only."); - } else { - return is.readInt(); - } - } - - public boolean readBool() throws IOException, EOFException { - if (writeable) { - throw new IOException("File is write only."); - } else { - return is.readBoolean(); - } - } - - public String readString() throws IOException, EOFException { - if (writeable) { - throw new IOException("File is write only."); - } else { - return is.readUTF(); - } - } - - public float readFloat() throws IOException, EOFException { - if (writeable) { - throw new IOException("File is write only."); - } else { - return is.readFloat(); - - } - } - - public double readDouble() throws IOException, EOFException { - if (writeable) { - throw new IOException("File is write only."); - } else { - return is.readDouble(); - } - } - - public long readLong() throws IOException, EOFException { - if (writeable) { - throw new IOException("File is write only."); - } else { - return is.readLong(); - } - } - - public void close() throws IOException { - if (writeable) { - os.close(); - } else { - is.close(); - } - } - - public void writeByte(byte b) throws IOException { - if (writeable) { - os.writeByte(b); - } else { - throw new IOException("File is read only."); - } - } - - public int read() throws IOException { - return is.read(); - } - - public void write(int b) throws IOException { - os.write(b); - } - - public byte[] getWrittenHash() throws IOException { - if (writeable) { - return osh.getDigest(); - } else { - return null; - } - } + public static final boolean W = true; + public static final boolean R = false; + protected final DataOutputStream os; + protected final DataInputStream is; + protected final String loc; + protected final boolean writeable; + private final HashingOutputStream osh; + + public BinaryFile(String loc, boolean mode) throws IOException { + if (loc.trim().length() == 0) { + throw new IOException("Could not find file: no file specified"); + } + this.writeable = mode; + this.loc = loc; + + if (writeable) { + try { + is = null; + osh = new HashingOutputStream("md5", new FileOutputStream(loc)); + os = new DataOutputStream(new BufferedOutputStream(osh, 32 * 1024)); + } catch (NoSuchAlgorithmException ex) { + throw new RuntimeException(ex); + } + } else { + is = new DataInputStream(new BufferedInputStream(new FileInputStream(loc))); + os = null; + osh = null; + } + } + + public BinaryFile(String loc, boolean mode, int buffersize) throws IOException { + if (loc.trim().length() == 0) { + throw new IOException("Could not find file: no file specified"); + } + this.writeable = mode; + this.loc = loc; + + if (writeable) { + try { + is = null; + osh = new HashingOutputStream("md5", new FileOutputStream(loc)); + os = new DataOutputStream(new BufferedOutputStream(osh, buffersize)); + } catch (NoSuchAlgorithmException ex) { + throw new RuntimeException(ex); + } + } else { + is = new DataInputStream(new BufferedInputStream(new FileInputStream(loc), buffersize)); + os = null; + osh = null; + } + } + + public void writeBytes(byte[] v) throws IOException { + if (writeable) { + os.write(v); + } else { + throw new IOException("File is read only."); + } + } + + public void writeInt(int v) throws IOException { + if (writeable) { + os.writeInt(v); + } else { + throw new IOException("File is read only."); + } + } + + public void writeString(String s) throws IOException { + if (writeable) { + os.writeChars(s); + } else { + throw new IOException("File is read only."); + } + } + + public void writeBool(boolean b) throws IOException { + if (writeable) { + os.writeBoolean(b); + } else { + throw new IOException("File is read only."); + } + } + + public void writeFloat(float f) throws IOException { + if (writeable) { + os.writeFloat(f); + } else { + throw new IOException("File is read only."); + } + } + + public void writeDouble(double d) throws IOException { + if (writeable) { + os.writeDouble(d); + } else { + throw new IOException("File is read only."); + } + } + + public void writeLong(long l) throws IOException { + if (writeable) { + os.writeLong(l); + } else { + throw new IOException("File is read only."); + } + } + + // read functions + public int readInt() throws IOException, EOFException { + if (writeable) { + throw new IOException("File is write only."); + } else { + return is.readInt(); + } + } + + public boolean readBool() throws IOException, EOFException { + if (writeable) { + throw new IOException("File is write only."); + } else { + return is.readBoolean(); + } + } + + public String readString() throws IOException, EOFException { + if (writeable) { + throw new IOException("File is write only."); + } else { + return is.readUTF(); + } + } + + public float readFloat() throws IOException, EOFException { + if (writeable) { + throw new IOException("File is write only."); + } else { + return is.readFloat(); + + } + } + + public double readDouble() throws IOException, EOFException { + if (writeable) { + throw new IOException("File is write only."); + } else { + return is.readDouble(); + } + } + + public long readLong() throws IOException, EOFException { + if (writeable) { + throw new IOException("File is write only."); + } else { + return is.readLong(); + } + } + + public void close() throws IOException { + if (writeable) { + os.close(); + } else { + is.close(); + } + } + + public void writeByte(byte b) throws IOException { + if (writeable) { + os.writeByte(b); + } else { + throw new IOException("File is read only."); + } + } + + public int read() throws IOException { + return is.read(); + } + + public void write(int b) throws IOException { + os.write(b); + } + + public byte[] getWrittenHash() throws IOException { + if (writeable) { + return osh.getDigest(); + } else { + return null; + } + } } diff --git a/genetica-libraries/src/main/java/umcg/genetica/text/Strings.java b/genetica-libraries/src/main/java/umcg/genetica/text/Strings.java index a9c93db44..2a8ce20cf 100644 --- a/genetica-libraries/src/main/java/umcg/genetica/text/Strings.java +++ b/genetica-libraries/src/main/java/umcg/genetica/text/Strings.java @@ -83,12 +83,16 @@ public static String concat(float[] s, DecimalFormat f, Pattern t) { StringBuilder output = new StringBuilder(); for (int i = 0; i < s.length; i++) { + float floatVal = s[i]; + String str = f.format(floatVal); + if (Float.isNaN(floatVal)) { + str = "NaN"; + } + if (i == 0) { - output.append(f.format(s[i])); - } else if (Float.isNaN(i)) { - output.append("NaN"); + output.append(str); } else { - output.append(t.toString()).append(f.format(s[i])); + output.append(t.toString()).append(str); } } return output.toString(); From d44b1e5836d148668ddfb65cc90afed3b8b64ef6 Mon Sep 17 00:00:00 2001 From: Marc Jan Bonder Date: Wed, 6 May 2015 08:48:52 +0200 Subject: [PATCH 031/143] heterogenity calculation fix --- .../src/main/java/umcg/genetica/math/stats/Heterogeneity.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/genetica-libraries/src/main/java/umcg/genetica/math/stats/Heterogeneity.java b/genetica-libraries/src/main/java/umcg/genetica/math/stats/Heterogeneity.java index 5fb796f7c..015b2de06 100644 --- a/genetica-libraries/src/main/java/umcg/genetica/math/stats/Heterogeneity.java +++ b/genetica-libraries/src/main/java/umcg/genetica/math/stats/Heterogeneity.java @@ -88,7 +88,7 @@ public static Pair getISq(double[] datasetZ, int[] datasetWeight double hetSum = 0; int hetDf = 0; for (int d = 0; d < datasetZ.length; d++) { - if (Double.isNaN(datasetZ[d])) { + if (!Double.isNaN(datasetZ[d])) { double expectedZ = Math.sqrt(datasetWeights[d]) * weightedZ / totalSample; hetSum += (datasetZ[d] - expectedZ) * (datasetZ[d] - expectedZ); From a8ff46d42bd8cffd8d1ad842b993883d7f7e49cd Mon Sep 17 00:00:00 2001 From: Marc Jan Bonder Date: Wed, 6 May 2015 08:49:05 +0200 Subject: [PATCH 032/143] Annotation update --- .../util/QTLAnnotator.java | 55 +++++++++++++------ 1 file changed, 39 insertions(+), 16 deletions(-) diff --git a/eqtl-mapping-pipeline/src/main/java/eqtlmappingpipeline/util/QTLAnnotator.java b/eqtl-mapping-pipeline/src/main/java/eqtlmappingpipeline/util/QTLAnnotator.java index a2468ec65..3fe8c4b0f 100644 --- a/eqtl-mapping-pipeline/src/main/java/eqtlmappingpipeline/util/QTLAnnotator.java +++ b/eqtl-mapping-pipeline/src/main/java/eqtlmappingpipeline/util/QTLAnnotator.java @@ -49,30 +49,53 @@ public static void main(String[] args) throws IOException { // "1;8-9-10-11-12-13-14;1;4-5;0;17-18-20-49-50-51-52-53-54-55-56-57-58-59-60-61-62-63-64-65-66-67-68-70-71-81-133-134-141-142-176-177-178-179-180-181-182-183-184-185-186-187-188-189-190-191-148-149-150-151-152-153-154-155-156-157-158-159-160-161-162-163-164-165-166-167-168-169-170-171-172-173-174-175-192-193-194-195-196-197-198;0;1-2-3-4", "snp;probe;snp;snp", null, // "D:\\OnlineFolders\\AeroFS\\RP3_BIOS_Methylation\\eQTMs\\Optimal_PC_and_QTL_Corrected\\RP3_0.25MB_TSS_extendedCis_eQTMs_2015\\eQTLSNPsFDR0.05-SNPLevel.txt-ExtendedInfo5.txt"); +// addAnnotationToQTLOutput( +// "D:\\OnlineFolders\\AeroFS\\RP3_BIOS_Methylation\\eQTMs\\Optimal_PC_and_QTL_Corrected\\RP3_0.25MB_TSS_extendedCis_eQTMs_2015\\eQTLSNPsFDR0.05-SNPLevel.txt", +// "D:\\OnlineFolders\\AeroFS\\RP3_BIOS_Methylation\\Annotations\\Annotation450k_AdditionMJ_v6.txt.gz;D:\\OnlineFolders\\AeroFS\\RP3_BIOS_Methylation\\Annotations\\GC_ContentSuroundingProbes_full.txt;D:\\OnlineFolders\\AeroFS\\RP3_BIOS_Methylation\\Annotations\\statisticsTMM_exprssion.txt;D:\\OnlineFolders\\AeroFS\\RP3_BIOS_Methylation\\Annotations\\GC_ContentSurounding_RP3Genes.txt", +// "0;209-210-211-212-213-214-215-216-217-218-219-220-221-222-223-224-225-226-227-228-229-230-231-232-233-234-235-236-237-238-239-240-241-242-243;0;1-2-3-4-5-6-7-8-9-10-11-12-13-14-15;0;1-2-3-4;0;1-2-3-4-5-6-7-8-9-10-11-12-13-14-15", "snp;snp;probe;probe", null, +// "D:\\OnlineFolders\\AeroFS\\RP3_BIOS_Methylation\\eQTMs\\Optimal_PC_and_QTL_Corrected\\RP3_0.25MB_TSS_extendedCis_eQTMs_2015\\eQTLSNPsFDR0.05-SNPLevel_ExtendedInfo3.txt"); + +// addAnnotationToQTLOutput( +// "D:\\OnlineFolders\\AeroFS\\RP3_BIOS_Methylation\\eQTMs\\Optimal_PC_and_QTL_Corrected\\RP3_0.25MB_TSS_extendedCis_eQTMs_2015\\eQTLSNPsFDR0.05-SNPLevel.txt", +// "D:\\OnlineFolders\\AeroFS\\RP3_BIOS_Methylation\\Annotations\\Annotation450k_AdditionMJ_v8.txt.gz", +// "0;264-265-266-267-268-269-270-271-272-273-274-275-276-277-278-279-280-281-282-283-284-285-286-287-288-289-290-291-292-293-294-295", "snp", null, +// "D:\\OnlineFolders\\AeroFS\\RP3_BIOS_Methylation\\eQTMs\\Optimal_PC_and_QTL_Corrected\\RP3_0.25MB_TSS_extendedCis_eQTMs_2015\\eQTLSNPsFDR0.05-SNPLevel_ExtendedInfo4.txt"); + +// addAnnotationToQTLOutput( +// "D:\\OnlineFolders\\AeroFS\\RP3_BIOS_Methylation\\eQTMs\\Optimal_PC_and_QTL_Corrected\\RP3_0.25MB_TSS_extendedCis_eQTMs_2015\\eQTLsFDR0.05-SNPLevel.txt", +// "D:\\UMCG\\Methylation_GPL13534\\annotationFile\\Illumina450K_MQtlMappingFile_Extensive.txt;D:\\UMCG\\Data\\RP3_RNA_Seq\\annotation_geneIds+overlapping_v71_cut.txt;D:\\OnlineFolders\\AeroFS\\RP3_BIOS_Methylation\\Annotations\\Annotation450k_AdditionMJ_v5.txt.gz;D:\\OnlineFolders\\AeroFS\\RP3_BIOS_Methylation\\Annotations\\CODAM_NTR_LLS_LLD_RS_BBMRI_450K_var_mean_median.txt", +// "1;8-9-10-11-12-13-14;1;4-5;0;17-18-20-49-50-51-52-53-54-55-56-57-58-59-60-61-62-63-64-65-66-67-68-70-71-81-133-134-141-142-176-177-178-179-180-181-182-183-184-185-186-187-188-189-190-191-148-149-150-151-152-153-154-155-156-157-158-159-160-161-162-163-164-165-166-167-168-169-170-171-172-173-174-175-192-193-194-195-196-197-198;0;1-2-3-4", "snp;probe;snp;snp", null, +// "D:\\OnlineFolders\\AeroFS\\RP3_BIOS_Methylation\\eQTMs\\Optimal_PC_and_QTL_Corrected\\RP3_0.25MB_TSS_extendedCis_eQTMs_2015\\eQTLsFDR0.05-SNPLevel.txt-ExtendedInfo.txt"); + +// addAnnotationToQTLOutput( +// "D:\\OnlineFolders\\AeroFS\\RP3_BIOS_Methylation\\eQTMs\\Optimal_PC_and_QTL_Corrected\\RP3_0.25MB_TSS_extendedCis_eQTMs_2015\\eQTLsFDR0.05-SNPLevel.txt", +// "D:\\OnlineFolders\\AeroFS\\RP3_BIOS_Methylation\\Annotations\\Annotation450k_AdditionMJ_v6.txt.gz;D:\\OnlineFolders\\AeroFS\\RP3_BIOS_Methylation\\Annotations\\GC_ContentSuroundingProbes_full.txt", +// "0;209-210-211-212-213-214-215-216-217-218-219-220-221-222-223-224-225-226-227-228-229-230-231-232-233-234-235-236-237-238-239-240-241-242-243;0;1-2-3-4-5-6-7-8-9-10-11-12-13-14-15", "snp;snp", null, +// "D:\\OnlineFolders\\AeroFS\\RP3_BIOS_Methylation\\eQTMs\\Optimal_PC_and_QTL_Corrected\\RP3_0.25MB_TSS_extendedCis_eQTMs_2015\\eQTLsFDR0.05-SNPLevel.txt_ExtendedInfo2.txt"); +// + addAnnotationToQTLOutput( + "D:\\OnlineFolders\\AeroFS\\RP3_BIOS_Methylation\\eQTMs\\Optimal_PC_and_QTL_Corrected\\RP3_0.25MB_TSS_extendedCis_eQTMs_2015\\eQTLSNPsFDR0.05-SNPLevel.txt", + "D:\\OnlineFolders\\AeroFS\\RP3_BIOS_Methylation\\Annotations\\Annotation450k_AdditionMJ_v10_13BM.txt.gz;D:\\OnlineFolders\\AeroFS\\RP3_BIOS_Methylation\\Annotations\\GC_ContentSuroundingProbes_full.txt;D:\\OnlineFolders\\AeroFS\\RP3_BIOS_Methylation\\Annotations\\statisticsTMM_exprssion.txt;D:\\OnlineFolders\\AeroFS\\RP3_BIOS_Methylation\\Annotations\\GC_ContentSurounding_RP3Genes.txt;D:\\UMCG\\Data\\RP3_RNA_Seq\\annotation_geneIds+overlapping_v71_cut.txt;D:\\OnlineFolders\\AeroFS\\RP3_BIOS_Methylation\\Annotations\\CODAM_NTR_LLS_LLD_RS_BBMRI_450K_var_mean_median.txt", + "0;8-9-10-11-12-13-14-15-16-17-18-175-176-177-178-179-180-181-182-183-184-185-186-187-188-189-190-191-192-193-194-195-196-197-198-199-200-201-202-203-204-205-206-207-208-291-292-293-294-295-296-297-298-299-300-301-302-303-304-305-306-307-308-309-310-311-312-313-314-315-316-317-318-319-320-321-322-323-324-325-326-327-328-329-330-331-332-333-334-335-336-337-338-339-340-341-342-343-344-345-346;0;1-2-3-4-5-6-7-8-9-10-11-12-13-14-15;0;1-2-3-4;0;1-2-3-4-5-6-7-8-9-10-11-12-13-14-15;1;4-5;0;1-2-3-4", "snp;snp;probe;probe;probe;snp", null, + "D:\\OnlineFolders\\AeroFS\\RP3_BIOS_Methylation\\eQTMs\\Optimal_PC_and_QTL_Corrected\\RP3_0.25MB_TSS_extendedCis_eQTMs_2015\\eQTLSNPsFDR0.05-SNPLevel_ExtendedInfo13BM.txt"); + + // addAnnotationToQTLOutput( // "D:\\OnlineFolders\\AeroFS\\RP3_BIOS_Methylation\\eQTMs\\QTLCorrected\\RP3_0.25MB_TSS_extendedCis_eQTMs_2015\\eQTLSNPsFDR0.05-SNPLevel.txt", // "D:\\UMCG\\Methylation_GPL13534\\annotationFile\\Illumina450K_MQtlMappingFile_Extensive.txt;D:\\UMCG\\Data\\RP3_RNA_Seq\\annotation_geneIds+overlapping_v71_cut.txt;D:\\OnlineFolders\\AeroFS\\RP3_BIOS_Methylation\\Annotations\\Annotation450k_AdditionMJ_v5.txt.gz;D:\\OnlineFolders\\AeroFS\\RP3_BIOS_Methylation\\Annotations\\CODAM_NTR_LLS_LLD_RS_BBMRI_450K_var_mean_median.txt;D:\\OnlineFolders\\AeroFS\\RP3_BIOS_Methylation\\Annotations\\GC_ContentSuroundingProbes_full.txt;", // "1;8-9-10-11-12-13-14;1;4-5;0;17-18-20-49-50-51-52-53-54-55-56-57-58-59-60-61-62-63-64-65-66-67-68-70-71-81-133-134-141-142-176-177-178-179-180-181-182-183-184-185-186-187-188-189-190-191-148-149-150-151-152-153-154-155-156-157-158-159-160-161-162-163-164-165-166-167-168-169-170-171-172-173-174-175-192-193-194-195-196-197-198;0;1-2-3-4;0;1-2-3-4-5-6-7-8-9-10-11-12-13-14-15", "snp;probe;snp;snp;snp", null, // "D:\\OnlineFolders\\AeroFS\\RP3_BIOS_Methylation\\eQTMs\\QTLCorrected\\RP3_0.25MB_TSS_extendedCis_eQTMs_2015\\eQTLSNPsFDR0.05-SNPLevel_ExtendedInfo4.txt"); -// - - addAnnotationToQTLOutput( - "D:\\OnlineFolders\\AeroFS\\RP3_BIOS_Methylation\\eQTMs_Exon\\Optimal_PC_and_QTL_Corrected\\eQTLSNPsFDR0.05-SNPLevel.txt", - "D:\\UMCG\\Methylation_GPL13534\\annotationFile\\Illumina450K_MQtlMappingFile_Extensive.txt;D:\\OnlineFolders\\AeroFS\\RP3_BIOS_Methylation\\Annotations\\Annotation450k_AdditionMJ_v5.txt.gz;D:\\OnlineFolders\\AeroFS\\RP3_BIOS_Methylation\\Annotations\\CODAM_NTR_LLS_LLD_RS_BBMRI_450K_var_mean_median.txt;D:\\OnlineFolders\\AeroFS\\RP3_BIOS_Methylation\\Annotations\\GC_ContentSuroundingProbes_full.txt;", - "1;8-9-10-11-12-13-14;0;17-18-20-49-50-51-52-53-54-55-56-57-58-59-60-61-62-63-64-65-66-67-68-70-71-81-133-134-141-142-176-177-178-179-180-181-182-183-184-185-186-187-188-189-190-191-148-149-150-151-152-153-154-155-156-157-158-159-160-161-162-163-164-165-166-167-168-169-170-171-172-173-174-175-192-193-194-195-196-197-198;0;1-2-3-4;0;1-2-3-4-5-6-7-8-9-10-11-12-13-14-15", "snp;snp;snp;snp", null, - "D:\\OnlineFolders\\AeroFS\\RP3_BIOS_Methylation\\eQTMs_Exon\\Optimal_PC_and_QTL_Corrected\\eQTLSNPsFDR0.05-SNPLevel_ExtendedInfo.txt"); // addAnnotationToQTLOutput( -// "D:\\OnlineFolders\\AeroFS\\RP3_BIOS_Methylation\\meQTLs\\Cis_Pc22c_meQTLs\\Comparison_eQTLs_meQTLs.txt", -// "D:\\UMCG\\Methylation_GPL13534\\annotationFile\\Illumina450K_MQtlMappingFile_Extensive.txt;D:\\OnlineFolders\\AeroFS\\RP3_BIOS_Methylation\\Annotations\\Annotation450k_AdditionMJ_v5.txt.gz;D:\\OnlineFolders\\AeroFS\\RP3_BIOS_Methylation\\Annotations\\CODAM_NTR_LLS_LLD_RS_BBMRI_450K_var_mean_median.txt;D:\\OnlineFolders\\AeroFS\\RP3_BIOS_Methylation\\Annotations\\Annotation_InterestCis_MJ_v1.txt.gz", -// "1;8-9-10-11-12-13-14;0;17-18-20-49-50-51-52-53-54-55-56-57-58-59-60-61-62-63-64-65-66-67-68-70-71-81-133-134-141-142-176-177-178-179-180-181-182-183-184-185-186-187-188-189-190-191-148-149-150-151-152-153-154-155-156-157-158-159-160-161-162-163-164-165-166-167-168-169-170-171-172-173-174-175-192-193-194-195-196-197-198;0;1-2-3-4;0;3-4-5-6-7-8-9-10-11-12-13-14-15-16-17-18-19-20-21-22-23-24-25-26-27-28-29-30-31-32-33-34-35-36-37", "probe;probe;probe;snp", null, -// "D:\\OnlineFolders\\AeroFS\\RP3_BIOS_Methylation\\meQTLs\\Cis_Pc22c_meQTLs\\Comparison_eQTLs_meQTLs3.txt"); +// "D:\\OnlineFolders\\AeroFS\\RP3_BIOS_Methylation\\eQTMs_Exon\\Optimal_PC_and_QTL_Corrected\\eQTLSNPsFDR0.05-SNPLevel.txt", +// "D:\\UMCG\\Methylation_GPL13534\\annotationFile\\Illumina450K_MQtlMappingFile_Extensive.txt;D:\\OnlineFolders\\AeroFS\\RP3_BIOS_Methylation\\Annotations\\Annotation450k_AdditionMJ_v5.txt.gz;D:\\OnlineFolders\\AeroFS\\RP3_BIOS_Methylation\\Annotations\\CODAM_NTR_LLS_LLD_RS_BBMRI_450K_var_mean_median.txt;D:\\OnlineFolders\\AeroFS\\RP3_BIOS_Methylation\\Annotations\\GC_ContentSuroundingProbes_full.txt;", +// "1;8-9-10-11-12-13-14;0;17-18-20-49-50-51-52-53-54-55-56-57-58-59-60-61-62-63-64-65-66-67-68-70-71-81-133-134-141-142-176-177-178-179-180-181-182-183-184-185-186-187-188-189-190-191-148-149-150-151-152-153-154-155-156-157-158-159-160-161-162-163-164-165-166-167-168-169-170-171-172-173-174-175-192-193-194-195-196-197-198;0;1-2-3-4;0;1-2-3-4-5-6-7-8-9-10-11-12-13-14-15", "snp;snp;snp;snp", null, +// "D:\\OnlineFolders\\AeroFS\\RP3_BIOS_Methylation\\eQTMs_Exon\\Optimal_PC_and_QTL_Corrected\\eQTLSNPsFDR0.05-SNPLevel_ExtendedInfo2.txt"); // -// addAnnotationToQTLOutput( -// "D:\\OnlineFolders\\AeroFS\\RP3_BIOS_Methylation\\Comparison_QTLs_eQTMs\\Comparison_forAnnot.txt", -// "D:\\UMCG\\Methylation_GPL13534\\annotationFile\\Illumina450K_MQtlMappingFile_Extensive.txt;D:\\OnlineFolders\\AeroFS\\RP3_BIOS_Methylation\\Annotations\\Annotation450k_AdditionMJ_v5.txt.gz;D:\\OnlineFolders\\AeroFS\\RP3_BIOS_Methylation\\Annotations\\CODAM_NTR_LLS_LLD_RS_BBMRI_450K_var_mean_median.txt;D:\\OnlineFolders\\AeroFS\\RP3_BIOS_Methylation\\Annotations\\Annotation_InterestCis_MJ_v1.txt.gz", -// "1;8-9-10-11-12-13-14;0;17-18-20-49-50-51-52-53-54-55-56-57-58-59-60-61-62-63-64-65-66-67-68-70-71-81-133-134-141-142-176-177-178-179-180-181-182-183-184-185-186-187-188-189-190-191-148-149-150-151-152-153-154-155-156-157-158-159-160-161-162-163-164-165-166-167-168-169-170-171-172-173-174-175-192-193-194-195-196-197-198;0;1-2-3-4;0;3-4-5-6-7-8-9-10-11-12-13-14-15-16-17-18-19-20-21-22-23-24-25-26-27-28-29-30-31-32-33-34-35-36-37", "probe;probe;probe;snp", null, -// "D:\\OnlineFolders\\AeroFS\\RP3_BIOS_Methylation\\Comparison_QTLs_eQTMs\\Comparison_forAnnot_out.txt"); + + + + } static void addAnnotationToQTLOutput(String in, String sources, String keyValuePairs, String idsToAnnotate, String reannotateGene, String out) throws IOException { From 649fd7e528d508f75450d58cb3d56bc7cfc1d0f5 Mon Sep 17 00:00:00 2001 From: Patrick Deelen Date: Wed, 6 May 2015 12:41:50 +0200 Subject: [PATCH 033/143] Interaction replication script --- .../ReplicateInteractions.java | 63 ++++++++++++++++++- 1 file changed, 62 insertions(+), 1 deletion(-) diff --git a/eqtl-mapping-pipeline/src/main/java/eqtlmappingpipeline/binaryInteraction/ReplicateInteractions.java b/eqtl-mapping-pipeline/src/main/java/eqtlmappingpipeline/binaryInteraction/ReplicateInteractions.java index 4950ba392..e092c64b8 100644 --- a/eqtl-mapping-pipeline/src/main/java/eqtlmappingpipeline/binaryInteraction/ReplicateInteractions.java +++ b/eqtl-mapping-pipeline/src/main/java/eqtlmappingpipeline/binaryInteraction/ReplicateInteractions.java @@ -106,6 +106,18 @@ public class ReplicateInteractions { OptionBuilder.withDescription("File with eQTL genes to include in analysis"); OptionBuilder.withLongOpt("genes"); OPTIONS.addOption(OptionBuilder.create("g")); + + OptionBuilder.withArgName("path"); + OptionBuilder.hasArg(); + OptionBuilder.withDescription("File with covariates to test for replication"); + OptionBuilder.withLongOpt("covariatsReplication"); + OPTIONS.addOption(OptionBuilder.create("cr")); + + OptionBuilder.withArgName("path"); + OptionBuilder.hasArg(); + OptionBuilder.withDescription("File with eQTL genes to test for replication"); + OptionBuilder.withLongOpt("genesReplication"); + OPTIONS.addOption(OptionBuilder.create("gr")); } @@ -121,6 +133,8 @@ public static void main(String[] args) throws FileNotFoundException, IOException final String outputPrefix; final File covariatesToIncludeFile; final File genesToIncludeFile; + final File covariatesReplicationToIncludeFile; + final File genesReplicationToIncludeFile; try { final CommandLine commandLine = new PosixParser().parse(OPTIONS, args, false); @@ -172,6 +186,18 @@ public static void main(String[] args) throws FileNotFoundException, IOException } else { genesToIncludeFile = null; } + + if (commandLine.hasOption("cr")) { + covariatesReplicationToIncludeFile = new File(commandLine.getOptionValue("cr")); + } else { + covariatesReplicationToIncludeFile = null; + } + + if (commandLine.hasOption("gr")) { + genesReplicationToIncludeFile = new File(commandLine.getOptionValue("gr")); + } else { + genesReplicationToIncludeFile = null; + } matchOnChrPos = commandLine.hasOption("cp"); @@ -202,6 +228,13 @@ public static void main(String[] args) throws FileNotFoundException, IOException if (genesToIncludeFile != null) { writeAndOut("eQTL genes to include: " + genesToIncludeFile.getAbsolutePath(), logWriter); } + if (covariatesReplicationToIncludeFile != null) { + writeAndOut("Covariates replication to include: " + covariatesReplicationToIncludeFile.getAbsolutePath(), logWriter); + } + if (genesReplicationToIncludeFile != null) { + writeAndOut("eQTL genes replication to include: " + genesReplicationToIncludeFile.getAbsolutePath(), logWriter); + } + writeAndOut("", logWriter); @@ -232,6 +265,34 @@ public static void main(String[] args) throws FileNotFoundException, IOException } else { genesToInclude = null; } + + final HashSet covariantsReplicationToInclude; + if (covariatesReplicationToIncludeFile != null) { + covariantsReplicationToInclude = new HashSet(); + BufferedReader reader = new BufferedReader(new InputStreamReader(new FileInputStream(covariatesReplicationToIncludeFile), "UTF-8")); + String line; + while ((line = reader.readLine()) != null) { + covariantsReplicationToInclude.add(line.trim()); + } + writeAndOut("Covariates replication included: " + covariantsReplicationToInclude.size(), logWriter); + writeAndOut("", logWriter); + } else { + covariantsReplicationToInclude = null; + } + + final HashSet genesReplicationToInclude; + if (genesReplicationToIncludeFile != null) { + genesReplicationToInclude = new HashSet(); + BufferedReader reader = new BufferedReader(new InputStreamReader(new FileInputStream(genesReplicationToIncludeFile), "UTF-8")); + String line; + while ((line = reader.readLine()) != null) { + genesReplicationToInclude.add(line.trim()); + } + writeAndOut("eQTL genes replication included: " + genesReplicationToInclude.size(), logWriter); + writeAndOut("", logWriter); + } else { + genesReplicationToInclude = null; + } BinaryInteractionFile inputFile = BinaryInteractionFile.load(inputInteractionFile, true); BinaryInteractionFile replicationFile = BinaryInteractionFile.load(replicationInteractionFile, true); @@ -312,7 +373,7 @@ public static void main(String[] args) throws FileNotFoundException, IOException if (metaInteractionZ >= minAbsInteractionZ || metaInteractionZ <= -minAbsInteractionZ) { ++significant; - if (replicationVariant != null && replicationFile.containsInteraction(replicationVariant.getName(), gene.getName(), interaction.getCovariateName())) { + if (replicationVariant != null && replicationFile.containsInteraction(replicationVariant.getName(), gene.getName(), interaction.getCovariateName()) && (genesReplicationToInclude == null || genesReplicationToInclude.contains(interaction.getGeneName()) && (covariantsReplicationToInclude == null || covariantsToInclude.contains(interaction.getCovariateName())) )) { BinaryInteractionZscores replicationZscores = replicationFile.readInteractionResults(replicationVariant.getName(), gene.getName(), interaction.getCovariateName()); double replicationInteractionZscore = replicationZscores.getZscoreInteractionMeta(); From c7413edfd5b561cfb6d12dcaad73e60601d93b6c Mon Sep 17 00:00:00 2001 From: Marc Jan Bonder Date: Thu, 7 May 2015 12:34:18 +0200 Subject: [PATCH 034/143] Bump meta-analyzer --- BinaryMetaAnalyzer/pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/BinaryMetaAnalyzer/pom.xml b/BinaryMetaAnalyzer/pom.xml index d8010dbd1..a01baf7c4 100644 --- a/BinaryMetaAnalyzer/pom.xml +++ b/BinaryMetaAnalyzer/pom.xml @@ -7,7 +7,7 @@ 1.0.2-SNAPSHOT BinaryMetaAnalyzer - 1.0.6-SNAPSHOT + 1.0.7-SNAPSHOT jar From 937c6c44f20c2eb603f0fa9eff08ebb969d3130e Mon Sep 17 00:00:00 2001 From: Marc Jan Bonder Date: Thu, 7 May 2015 12:34:29 +0200 Subject: [PATCH 035/143] annotator updates --- .../util/HiCTransQTLAnnotator.java | 457 ++++++++++++++++++ .../util/QTLAnnotator.java | 42 +- 2 files changed, 472 insertions(+), 27 deletions(-) create mode 100644 eqtl-mapping-pipeline/src/main/java/eqtlmappingpipeline/util/HiCTransQTLAnnotator.java diff --git a/eqtl-mapping-pipeline/src/main/java/eqtlmappingpipeline/util/HiCTransQTLAnnotator.java b/eqtl-mapping-pipeline/src/main/java/eqtlmappingpipeline/util/HiCTransQTLAnnotator.java new file mode 100644 index 000000000..f044c389e --- /dev/null +++ b/eqtl-mapping-pipeline/src/main/java/eqtlmappingpipeline/util/HiCTransQTLAnnotator.java @@ -0,0 +1,457 @@ +/* + * To change this template, choose Tools | Templates + * and open the template in the editor. + */ +package eqtlmappingpipeline.util; + +import java.io.BufferedReader; +import java.io.FileInputStream; +import java.io.IOException; +import java.io.InputStreamReader; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.LinkedHashSet; +import java.util.regex.Pattern; +import org.apache.commons.lang3.StringUtils; +import umcg.genetica.containers.Pair; +import umcg.genetica.io.text.TextFile; +import umcg.genetica.io.trityper.EQTL; +import umcg.genetica.io.trityper.QTLTextFile; + +/** + * + * @author MarcJan + */ +class HiCTransQTLAnnotator { + + private static final Pattern SPLIT_TAB = Pattern.compile("\t"); + + public static void main(String[] args) throws IOException { + //"D:\\WebFolders\\OwnCloud\\AeroFS\\RP3_BIOS_Methylation\\meQTLs\\Trans_Pc22c_CisMeQTLc_meQTLs\\RegressedOut_CisEffects_New\\eQTLsFDR0.05-ProbeLevel_BsFiltered_And_Filtered.txt" + + String QTLfile = "D:\\WebFolders\\OwnCloud\\AeroFS\\RP3_BIOS_Methylation\\meQTLs\\Trans_Pc22c_CisMeQTLc_meQTLs\\RegressedOut_CisEffects_New\\eQTLsFDR0.05-ProbeLevel_BsFiltered&Filtered.txt"; + String proxyfile = "D:\\WebFolders\\OwnCloud\\AeroFS\\RP3_BIOS_Methylation\\meQTLs\\Trans_Pc22c_CisMeQTLc_meQTLs\\RegressedOut_CisEffects_New\\proxiesMeQTLSnps.txt"; + String QTLoutfile = "D:\\WebFolders\\OwnCloud\\AeroFS\\RP3_BIOS_Methylation\\meQTLs\\Trans_Pc22c_CisMeQTLc_meQTLs\\RegressedOut_CisEffects_New\\eQTLsFDR0.05-ProbeLevel_BsFiltered&Filtered_HiC_LD_annotated.txt"; + String folderHighC = "F:\\Contacts\\GM12878_combined_interchromosomal\\"; + String resolution = "1kb"; + String qualityCutOff = "E30"; //0 or E30 + String normMethod = null; //null / KRnorm / SQRTVCnorm / VCnorm + double minValueQuality = 0; + + boolean lowMemMode = true; + + if (!lowMemMode) { + addAnnotationToQTLOutput( + QTLfile, + proxyfile, + folderHighC, + resolution, + qualityCutOff, + normMethod, + minValueQuality, + QTLoutfile); + } else { + addAnnotationToQTLOutputLowMem( + QTLfile, + proxyfile, + folderHighC, + resolution, + qualityCutOff, + normMethod, + minValueQuality, + QTLoutfile); + } + } + + static void addAnnotationToQTLOutput(String in, String inProxies, String folderHighC, String resolution, String qualityCutOff, String normMethod, double minValue, String out) throws IOException { + QTLTextFile eqtlTextFile = new QTLTextFile(in, QTLTextFile.R); + + ArrayList qtls = eqtlTextFile.readList(); + + if (inProxies != null) { + qtls = includeProxyInfo(qtls, inProxies); + } + + HashMap>> contactBuffer = new HashMap>>(); + //Here we need to make a new Type to store the potentialy inflated files. + TextFile outWriter = new TextFile(out, TextFile.W); + for (EQTL eqtl : qtls) { + String chrProbe = String.valueOf(eqtl.getProbeChr()); + String chrSnp = String.valueOf(eqtl.getRsChr()); + +// System.out.println(chrProbe+"\t"+chrSnp); + if (chrProbe.equals(chrSnp)) { + //Here we need to check how to normalize and treat intra-chromosomal data. + continue; + } + + int posChrSmaller; + int posChrLarger; + + LinkedHashSet> interestRegions = null; + if (Integer.parseInt(chrProbe) < Integer.parseInt(chrSnp)) { + posChrSmaller = eqtl.getProbeChrPos(); + posChrLarger = eqtl.getRsChrPos(); + if (contactBuffer.containsKey("chr" + chrProbe + "_chr" + chrSnp)) { + interestRegions = contactBuffer.get("chr" + chrProbe + "_chr" + chrSnp); + } else { + String baseName = folderHighC + resolution + "_resolution_interchromosomal\\chr" + chrProbe + "_chr" + chrSnp + "\\MAPQG" + qualityCutOff; + String fileToReads = baseName + "\\chr" + chrProbe + "_" + chrSnp + "_1kb.RAWobserved"; +// System.out.println("Reading: " + fileToReads); + if (normMethod == null) { + interestRegions = readRawInterContactInformation(fileToReads, minValue); + } else { + interestRegions = readNormalizedInterContactInformation(fileToReads, baseName, normMethod, chrProbe, chrSnp, resolution, minValue); + } + contactBuffer.put("chr" + chrProbe + "_chr" + chrSnp, interestRegions); + } + } else { + posChrSmaller = eqtl.getRsChrPos(); + posChrLarger = eqtl.getProbeChrPos(); + if (contactBuffer.containsKey("chr" + chrSnp + "_chr" + chrProbe)) { + interestRegions = contactBuffer.get("chr" + chrSnp + "_chr" + chrProbe); + } else { + String baseName = folderHighC + resolution + "_resolution_interchromosomal\\chr" + chrSnp + "_chr" + chrProbe + "\\MAPQG" + qualityCutOff; + String fileToReads = baseName + "\\chr" + chrSnp + "_" + chrProbe + "_1kb.RAWobserved"; +// System.out.println("Reading: " + fileToReads); + if (normMethod == null) { + interestRegions = readRawInterContactInformation(fileToReads, minValue); + } else { + interestRegions = readNormalizedInterContactInformation(fileToReads, baseName, normMethod, chrSnp, chrProbe, resolution, minValue); + } + contactBuffer.put("chr" + chrSnp + "_chr" + chrProbe, interestRegions); + } + } + + if (determineContact(posChrSmaller, posChrLarger, interestRegions, getNumericResolution(resolution))) { + outWriter.writeln(eqtl.getRsName() + "\t" + eqtl.getProbe() + "\tContact"); + } else { + outWriter.writeln(eqtl.getRsName() + "\t" + eqtl.getProbe() + "\t-"); + } + } + outWriter.close(); + } + + static void addAnnotationToQTLOutputLowMem(String in, String inProxies, String folderHighC, String resolution, String qualityCutOff, String normMethod, double minValue, String out) throws IOException { + QTLTextFile eqtlTextFile = new QTLTextFile(in, QTLTextFile.R); + + ArrayList qtls = eqtlTextFile.readList(); + + if (inProxies != null) { + qtls = includeProxyInfo(qtls, inProxies); + } + + //Here we need to make a new Type to store the potentialy inflated files. + TextFile outWriter = new TextFile(out, TextFile.W); + for (EQTL eqtl : qtls) { + String chrProbe = String.valueOf(eqtl.getProbeChr()); + String chrSnp = String.valueOf(eqtl.getRsChr()); + +// System.out.println(chrProbe+"\t"+chrSnp); + if (chrProbe.equals(chrSnp)) { + //Here we need to check how to normalize and treat intra-chromosomal data. + continue; + } + + int posChrSmaller; + int posChrLarger; + + if (Integer.parseInt(chrProbe) < Integer.parseInt(chrSnp)) { + posChrSmaller = eqtl.getProbeChrPos(); + posChrLarger = eqtl.getRsChrPos(); + String baseName = folderHighC + resolution + "_resolution_interchromosomal\\chr" + chrProbe + "_chr" + chrSnp + "\\MAPQG" + qualityCutOff; + String fileToReads = baseName + "\\chr" + chrProbe + "_" + chrSnp + "_1kb.RAWobserved"; +// System.out.println("Reading: " + fileToReads); + if (normMethod == null) { + if (readRawInterContactInformationLowMem(fileToReads, minValue, posChrSmaller, posChrLarger, resolution)) { + outWriter.writeln(eqtl.getRsName() + "\t" + eqtl.getProbe() + "\tContact"); + } else { + outWriter.writeln(eqtl.getRsName() + "\t" + eqtl.getProbe() + "\t-"); + } + } else { + if (readNormalizedInterContactInformationLowMem(fileToReads, baseName, normMethod, chrProbe, chrSnp, posChrSmaller, posChrLarger, resolution, minValue)) { + outWriter.writeln(eqtl.getRsName() + "\t" + eqtl.getProbe() + "\tContact"); + } else { + outWriter.writeln(eqtl.getRsName() + "\t" + eqtl.getProbe() + "\t-"); + } + } + + } else { + posChrSmaller = eqtl.getRsChrPos(); + posChrLarger = eqtl.getProbeChrPos(); + + String baseName = folderHighC + resolution + "_resolution_interchromosomal\\chr" + chrSnp + "_chr" + chrProbe + "\\MAPQG" + qualityCutOff; + String fileToReads = baseName + "\\chr" + chrSnp + "_" + chrProbe + "_1kb.RAWobserved"; +// System.out.println("Reading: " + fileToReads); + if (normMethod == null) { + if (readRawInterContactInformationLowMem(fileToReads, minValue, posChrSmaller, posChrLarger, resolution)) { + outWriter.writeln(eqtl.getRsName() + "\t" + eqtl.getProbe() + "\tContact"); + } else { + outWriter.writeln(eqtl.getRsName() + "\t" + eqtl.getProbe() + "\t-"); + } + } else { + if (readNormalizedInterContactInformationLowMem(fileToReads, baseName, normMethod, chrSnp, chrProbe, posChrSmaller, posChrLarger, resolution, minValue)) { + outWriter.writeln(eqtl.getRsName() + "\t" + eqtl.getProbe() + "\tContact"); + } else { + outWriter.writeln(eqtl.getRsName() + "\t" + eqtl.getProbe() + "\t-"); + } + } + } + } + outWriter.close(); + } + + private static ArrayList includeProxyInfo(ArrayList qtls, String inProxies) throws IOException { + ArrayList newQtlList = new ArrayList(); + + TextFile readProxies = new TextFile(inProxies, TextFile.R); + + String line = readProxies.readLine(); +// System.out.println(line); + while ((line = readProxies.readLine()) != null) { +// System.out.println(line); + String[] lineParts = SPLIT_TAB.split(line); + String chr = lineParts[4]; + int chrPos = Integer.parseInt(lineParts[5]); + int chrNewPos = Integer.parseInt(lineParts[8]); + for (EQTL e : qtls) { + if (String.valueOf(e.getRsChr()).equals(chr) && e.getRsChrPos() == chrPos) { + EQTL newQtl = new EQTL(); + newQtl.setProbe(e.getProbe()); + newQtl.setProbeChr(e.getProbeChr()); + newQtl.setProbeChrPos(e.getProbeChrPos()); + + newQtl.setRsName(e.getRsName() + "-" + lineParts[1]); + newQtl.setRsChr(e.getRsChr()); + newQtl.setRsChrPos(chrNewPos); + newQtlList.add(newQtl); + } + } + } + + for (EQTL e : qtls) { + newQtlList.add(e); + } + + return newQtlList; + } + + //For example, here is a line from the 5kb chr1 MAPQGE30 raw observed contact matrix (GM12878_combined/5kb_resolution_intrachromosomal/chr1/MAPQGE30/chr1_5kb.RAWobserved): +//40000000 40100000 59.0 + private static LinkedHashSet> readRawInterContactInformation(String fileToReads, double minContactValue) throws IOException { + LinkedHashSet> chrContactInfo = new LinkedHashSet>(); + + BufferedReader input = new BufferedReader(new InputStreamReader(new FileInputStream(fileToReads), "UTF-8")); + + String row; + + while ((row = input.readLine()) != null) { + String[] parts = StringUtils.split(row, '\t'); + + int posChr1 = Integer.parseInt(parts[0]); + int posChr2 = Integer.parseInt(parts[1]); + double contact = Double.parseDouble(parts[2]); + if (contact >= minContactValue) { + chrContactInfo.add(new Pair(posChr1, posChr2)); + } + } + input.close(); + return chrContactInfo; + + } + + //For example, here is a line from the 5kb chr1 MAPQGE30 raw observed contact matrix (GM12878_combined/5kb_resolution_intrachromosomal/chr1/MAPQGE30/chr1_5kb.RAWobserved): + //40000000 40100000 59.0 + //To normalize this entry using the KR normalization vector, one would divide 59.0 by the 8001st line ((40000000/5000)+1=8001) and the 8021st line ((40100000/5000)+1=8021) + //of GM12878_combined/5kb_resolution_intrachromosomal/chr1/MAPQGE30/chr1_5kb.KRnorm. The 8001st line of the KR norm file is 1.2988778370674694; + //The 8021st line of the KR norm file is 1.6080499717941548. So the corresponding KR normalized entry for the entry above is 59.0/(1.2988778370674694*1.6080499717941548) + //or 28.24776973966101. + //If the KR normalization vector file is empty or all NaNs, then the KR algorithm didn�t converge on that particular matrix (likely due to sparsity of the matrix). + private static LinkedHashSet> readNormalizedInterContactInformation(String fileToRead, String baseName, String normMethod, String chrSmaller, String chrLarger, String resolution, double minContactValue) throws IOException { + + //ReadIn normalization chr1 + TextFile inputNormChr1 = new TextFile(baseName + "\\chr" + chrSmaller + "_" + resolution + "." + normMethod, TextFile.R); + ArrayList normFactorSmallerChr = inputNormChr1.readAsArrayList(); + inputNormChr1.close(); + + //ReadIn normalization chr2 + TextFile inputNormChr2 = new TextFile(baseName + "\\chr" + chrLarger + "_" + resolution + "." + normMethod, TextFile.R); + ArrayList normFactorLargerChr = inputNormChr2.readAsArrayList(); + + inputNormChr2.close(); + + LinkedHashSet> chrContactInfo = new LinkedHashSet>(); + + BufferedReader input = new BufferedReader(new InputStreamReader(new FileInputStream(fileToRead), "UTF-8")); + + String row; + + while ((row = input.readLine()) != null) { + String[] parts = StringUtils.split(row, '\t'); + + int posChr1 = Integer.parseInt(parts[0]); + int posChr2 = Integer.parseInt(parts[1]); + + String factor1Base = normFactorSmallerChr.get((posChr1 / getNumericResolution(resolution)) + 1); + String factor2Base = normFactorLargerChr.get((posChr2 / getNumericResolution(resolution)) + 1); + + double factor1; + double factor2; + + if (StringUtils.isNumeric(factor1Base) && StringUtils.isNumeric(factor2Base)) { + factor1 = Double.parseDouble(factor1Base); + factor2 = Double.parseDouble(factor2Base); + + double contact = Double.parseDouble(parts[2]) / (factor1 * factor2); + if (contact >= minContactValue) { + chrContactInfo.add(new Pair(posChr1, posChr2)); + } + + } + } + input.close(); + return chrContactInfo; + } + + private static boolean determineContact(int posChrSmaller, int posChrLarger, LinkedHashSet> interestRegions, int resolution) { + //Determine bin1 + //Starts counting at 0-resulution + int bin1 = posChrSmaller - (posChrSmaller % resolution); + + //Determine bin2 + int bin2 = posChrLarger - (posChrLarger % resolution); + + //See if bin1 and bin2 are in the file. + boolean contact = false; + + for (Pair entry : interestRegions) { + if (entry.getLeft() == bin1) { + if (entry.getRight() == bin2) { + contact = true; + break; + } else if (entry.getRight() > bin2) { + break; + } + } else if (entry.getLeft() > bin1) { + break; + } + } + return contact; + } + + private static boolean readRawInterContactInformationLowMem(String fileToReads, double minValue, int posChrSmaller, int posChrLarger, String resolution) throws IOException { + //Determine bin1 + //Starts counting at 0-resulution + int bin1 = posChrSmaller - (posChrSmaller % getNumericResolution(resolution)); + + //Determine bin2 + int bin2 = posChrLarger - (posChrLarger % getNumericResolution(resolution)); + + //See if bin1 and bin2 are in the file. + boolean contactFound = false; + + BufferedReader input = new BufferedReader(new InputStreamReader(new FileInputStream(fileToReads), "UTF-8")); + + String row; + + while ((row = input.readLine()) != null) { + String[] parts = StringUtils.split(row, '\t'); + + int posChr1 = Integer.parseInt(parts[0]); + if (posChr1 == bin1) { + int posChr2 = Integer.parseInt(parts[1]); + if (posChr2 == bin2) { + double contact = Double.parseDouble(parts[2]); + if (contact >= minValue) { + contactFound = true; + } + break; + } else if (posChr2 > bin2) { + break; + } + } else if (posChr1 > bin1) { + break; + } + + } + input.close(); + return contactFound; + } + + private static boolean readNormalizedInterContactInformationLowMem(String fileToRead, String baseName, String normMethod, String chrSmaller, String chrLarger, int posChrSmaller, int posChrLarger, String resolution, double minValue) throws IOException { + //Determine bin1 + //Starts counting at 0-resulution + int bin1 = posChrSmaller - (posChrSmaller % getNumericResolution(resolution)); + + //Determine bin2 + int bin2 = posChrLarger - (posChrLarger % getNumericResolution(resolution)); + + //ReadIn normalization chr1 + TextFile inputNormChr1 = new TextFile(baseName + "\\chr" + chrSmaller + "_" + resolution + "." + normMethod, TextFile.R); + ArrayList normFactorSmallerChr = inputNormChr1.readAsArrayList(); + inputNormChr1.close(); + + //ReadIn normalization chr2 + TextFile inputNormChr2 = new TextFile(baseName + "\\chr" + chrLarger + "_" + resolution + "." + normMethod, TextFile.R); + ArrayList normFactorLargerChr = inputNormChr2.readAsArrayList(); + + inputNormChr2.close(); + + LinkedHashSet> chrContactInfo = new LinkedHashSet>(); + + BufferedReader input = new BufferedReader(new InputStreamReader(new FileInputStream(fileToRead), "UTF-8")); + + String row; + + //See if bin1 and bin2 are in the file. + boolean contactFound = false; + + while ((row = input.readLine()) != null) { + String[] parts = StringUtils.split(row, '\t'); + + int posChr1 = Integer.parseInt(parts[0]); + if (posChr1 == bin1) { + int posChr2 = Integer.parseInt(parts[1]); + if (posChr2 == bin2) { + + String factor1Base = normFactorSmallerChr.get((posChr1 / getNumericResolution(resolution)) + 1); + String factor2Base = normFactorLargerChr.get((posChr2 / getNumericResolution(resolution)) + 1); + + double factor1; + double factor2; + + if (StringUtils.isNumeric(factor1Base) && StringUtils.isNumeric(factor2Base)) { + factor1 = Double.parseDouble(factor1Base); + factor2 = Double.parseDouble(factor2Base); + + double contact = Double.parseDouble(parts[2]) / (factor1 * factor2); + if (contact >= minValue) { + contactFound = true; + } + break; + } + + } else if (posChr2 > bin2) { + break; + } + } else if (posChr1 > bin1) { + break; + } + + } + input.close(); + return contactFound; + } + + private static int getNumericResolution(String resolution) { + if (resolution.equals("1kb")) { + return 1000; + } else if (resolution.equals("5kb")) { + return 5000; + } else { + System.out.println("\nError in resolution setting!\n"); + System.exit(-1); + } + return 0; + } +} \ No newline at end of file diff --git a/eqtl-mapping-pipeline/src/main/java/eqtlmappingpipeline/util/QTLAnnotator.java b/eqtl-mapping-pipeline/src/main/java/eqtlmappingpipeline/util/QTLAnnotator.java index 3fe8c4b0f..ade926b89 100644 --- a/eqtl-mapping-pipeline/src/main/java/eqtlmappingpipeline/util/QTLAnnotator.java +++ b/eqtl-mapping-pipeline/src/main/java/eqtlmappingpipeline/util/QTLAnnotator.java @@ -49,36 +49,24 @@ public static void main(String[] args) throws IOException { // "1;8-9-10-11-12-13-14;1;4-5;0;17-18-20-49-50-51-52-53-54-55-56-57-58-59-60-61-62-63-64-65-66-67-68-70-71-81-133-134-141-142-176-177-178-179-180-181-182-183-184-185-186-187-188-189-190-191-148-149-150-151-152-153-154-155-156-157-158-159-160-161-162-163-164-165-166-167-168-169-170-171-172-173-174-175-192-193-194-195-196-197-198;0;1-2-3-4", "snp;probe;snp;snp", null, // "D:\\OnlineFolders\\AeroFS\\RP3_BIOS_Methylation\\eQTMs\\Optimal_PC_and_QTL_Corrected\\RP3_0.25MB_TSS_extendedCis_eQTMs_2015\\eQTLSNPsFDR0.05-SNPLevel.txt-ExtendedInfo5.txt"); -// addAnnotationToQTLOutput( -// "D:\\OnlineFolders\\AeroFS\\RP3_BIOS_Methylation\\eQTMs\\Optimal_PC_and_QTL_Corrected\\RP3_0.25MB_TSS_extendedCis_eQTMs_2015\\eQTLSNPsFDR0.05-SNPLevel.txt", -// "D:\\OnlineFolders\\AeroFS\\RP3_BIOS_Methylation\\Annotations\\Annotation450k_AdditionMJ_v6.txt.gz;D:\\OnlineFolders\\AeroFS\\RP3_BIOS_Methylation\\Annotations\\GC_ContentSuroundingProbes_full.txt;D:\\OnlineFolders\\AeroFS\\RP3_BIOS_Methylation\\Annotations\\statisticsTMM_exprssion.txt;D:\\OnlineFolders\\AeroFS\\RP3_BIOS_Methylation\\Annotations\\GC_ContentSurounding_RP3Genes.txt", -// "0;209-210-211-212-213-214-215-216-217-218-219-220-221-222-223-224-225-226-227-228-229-230-231-232-233-234-235-236-237-238-239-240-241-242-243;0;1-2-3-4-5-6-7-8-9-10-11-12-13-14-15;0;1-2-3-4;0;1-2-3-4-5-6-7-8-9-10-11-12-13-14-15", "snp;snp;probe;probe", null, -// "D:\\OnlineFolders\\AeroFS\\RP3_BIOS_Methylation\\eQTMs\\Optimal_PC_and_QTL_Corrected\\RP3_0.25MB_TSS_extendedCis_eQTMs_2015\\eQTLSNPsFDR0.05-SNPLevel_ExtendedInfo3.txt"); - -// addAnnotationToQTLOutput( -// "D:\\OnlineFolders\\AeroFS\\RP3_BIOS_Methylation\\eQTMs\\Optimal_PC_and_QTL_Corrected\\RP3_0.25MB_TSS_extendedCis_eQTMs_2015\\eQTLSNPsFDR0.05-SNPLevel.txt", -// "D:\\OnlineFolders\\AeroFS\\RP3_BIOS_Methylation\\Annotations\\Annotation450k_AdditionMJ_v8.txt.gz", -// "0;264-265-266-267-268-269-270-271-272-273-274-275-276-277-278-279-280-281-282-283-284-285-286-287-288-289-290-291-292-293-294-295", "snp", null, -// "D:\\OnlineFolders\\AeroFS\\RP3_BIOS_Methylation\\eQTMs\\Optimal_PC_and_QTL_Corrected\\RP3_0.25MB_TSS_extendedCis_eQTMs_2015\\eQTLSNPsFDR0.05-SNPLevel_ExtendedInfo4.txt"); - -// addAnnotationToQTLOutput( -// "D:\\OnlineFolders\\AeroFS\\RP3_BIOS_Methylation\\eQTMs\\Optimal_PC_and_QTL_Corrected\\RP3_0.25MB_TSS_extendedCis_eQTMs_2015\\eQTLsFDR0.05-SNPLevel.txt", -// "D:\\UMCG\\Methylation_GPL13534\\annotationFile\\Illumina450K_MQtlMappingFile_Extensive.txt;D:\\UMCG\\Data\\RP3_RNA_Seq\\annotation_geneIds+overlapping_v71_cut.txt;D:\\OnlineFolders\\AeroFS\\RP3_BIOS_Methylation\\Annotations\\Annotation450k_AdditionMJ_v5.txt.gz;D:\\OnlineFolders\\AeroFS\\RP3_BIOS_Methylation\\Annotations\\CODAM_NTR_LLS_LLD_RS_BBMRI_450K_var_mean_median.txt", -// "1;8-9-10-11-12-13-14;1;4-5;0;17-18-20-49-50-51-52-53-54-55-56-57-58-59-60-61-62-63-64-65-66-67-68-70-71-81-133-134-141-142-176-177-178-179-180-181-182-183-184-185-186-187-188-189-190-191-148-149-150-151-152-153-154-155-156-157-158-159-160-161-162-163-164-165-166-167-168-169-170-171-172-173-174-175-192-193-194-195-196-197-198;0;1-2-3-4", "snp;probe;snp;snp", null, -// "D:\\OnlineFolders\\AeroFS\\RP3_BIOS_Methylation\\eQTMs\\Optimal_PC_and_QTL_Corrected\\RP3_0.25MB_TSS_extendedCis_eQTMs_2015\\eQTLsFDR0.05-SNPLevel.txt-ExtendedInfo.txt"); - -// addAnnotationToQTLOutput( -// "D:\\OnlineFolders\\AeroFS\\RP3_BIOS_Methylation\\eQTMs\\Optimal_PC_and_QTL_Corrected\\RP3_0.25MB_TSS_extendedCis_eQTMs_2015\\eQTLsFDR0.05-SNPLevel.txt", -// "D:\\OnlineFolders\\AeroFS\\RP3_BIOS_Methylation\\Annotations\\Annotation450k_AdditionMJ_v6.txt.gz;D:\\OnlineFolders\\AeroFS\\RP3_BIOS_Methylation\\Annotations\\GC_ContentSuroundingProbes_full.txt", -// "0;209-210-211-212-213-214-215-216-217-218-219-220-221-222-223-224-225-226-227-228-229-230-231-232-233-234-235-236-237-238-239-240-241-242-243;0;1-2-3-4-5-6-7-8-9-10-11-12-13-14-15", "snp;snp", null, -// "D:\\OnlineFolders\\AeroFS\\RP3_BIOS_Methylation\\eQTMs\\Optimal_PC_and_QTL_Corrected\\RP3_0.25MB_TSS_extendedCis_eQTMs_2015\\eQTLsFDR0.05-SNPLevel.txt_ExtendedInfo2.txt"); -// addAnnotationToQTLOutput( "D:\\OnlineFolders\\AeroFS\\RP3_BIOS_Methylation\\eQTMs\\Optimal_PC_and_QTL_Corrected\\RP3_0.25MB_TSS_extendedCis_eQTMs_2015\\eQTLSNPsFDR0.05-SNPLevel.txt", - "D:\\OnlineFolders\\AeroFS\\RP3_BIOS_Methylation\\Annotations\\Annotation450k_AdditionMJ_v10_13BM.txt.gz;D:\\OnlineFolders\\AeroFS\\RP3_BIOS_Methylation\\Annotations\\GC_ContentSuroundingProbes_full.txt;D:\\OnlineFolders\\AeroFS\\RP3_BIOS_Methylation\\Annotations\\statisticsTMM_exprssion.txt;D:\\OnlineFolders\\AeroFS\\RP3_BIOS_Methylation\\Annotations\\GC_ContentSurounding_RP3Genes.txt;D:\\UMCG\\Data\\RP3_RNA_Seq\\annotation_geneIds+overlapping_v71_cut.txt;D:\\OnlineFolders\\AeroFS\\RP3_BIOS_Methylation\\Annotations\\CODAM_NTR_LLS_LLD_RS_BBMRI_450K_var_mean_median.txt", - "0;8-9-10-11-12-13-14-15-16-17-18-175-176-177-178-179-180-181-182-183-184-185-186-187-188-189-190-191-192-193-194-195-196-197-198-199-200-201-202-203-204-205-206-207-208-291-292-293-294-295-296-297-298-299-300-301-302-303-304-305-306-307-308-309-310-311-312-313-314-315-316-317-318-319-320-321-322-323-324-325-326-327-328-329-330-331-332-333-334-335-336-337-338-339-340-341-342-343-344-345-346;0;1-2-3-4-5-6-7-8-9-10-11-12-13-14-15;0;1-2-3-4;0;1-2-3-4-5-6-7-8-9-10-11-12-13-14-15;1;4-5;0;1-2-3-4", "snp;snp;probe;probe;probe;snp", null, - "D:\\OnlineFolders\\AeroFS\\RP3_BIOS_Methylation\\eQTMs\\Optimal_PC_and_QTL_Corrected\\RP3_0.25MB_TSS_extendedCis_eQTMs_2015\\eQTLSNPsFDR0.05-SNPLevel_ExtendedInfo13BM.txt"); + "D:\\OnlineFolders\\AeroFS\\RP3_BIOS_Methylation\\Annotations\\Annotation450k_AdditionMJ_v10.txt.gz;D:\\OnlineFolders\\AeroFS\\RP3_BIOS_Methylation\\Annotations\\GC_ContentSuroundingProbes_full.txt;D:\\OnlineFolders\\AeroFS\\RP3_BIOS_Methylation\\Annotations\\statisticsTMM_exprssion.txt;D:\\OnlineFolders\\AeroFS\\RP3_BIOS_Methylation\\Annotations\\GC_ContentSurounding_RP3Genes.txt;D:\\UMCG\\Data\\RP3_RNA_Seq\\annotation_geneIds+overlapping_v71_cut.txt;D:\\OnlineFolders\\AeroFS\\RP3_BIOS_Methylation\\Annotations\\CODAM_NTR_LLS_LLD_RS_BBMRI_450K_var_mean_median.txt", + "0;8-9-10-11-12-13-14-15-16-17-18-19-20-21-22-23-24-25-26-27-28-29-30-31-32-33-34-35-36-37-38-39-40-41-42-43-44-45-46-47-48-49-50-51-52-53-54-55-56-57-58-59-60-61-62-63-64-65-66-67-68-69-70-71-72-73-74-75-76-77-78-79-80-81-82-83-84-85-86-87-88-89-90-91-92-93-94-95-96-97-98-99-100-101-102-103-104-105-106-107-108-109-110-111-112-113-114-115-116-117-118-119-120-121-122-123-124-125-126-127-128-129-130-131-132-133-134-135-136-137-138-139-140-141-142-143-144-145-146-147-148-149-150-151-152-153-154-155-156-157-158-159-160-161-162-163-164-165-166-167-168-169-170-171-172-173-174-175-176-177-178-179-180-181-182-183-184-185-186-187-188-189-190-191-192-193-194-195-196-197-198-199-200-201-202-203-204-205-206-207-208-209-210-211-212-213-214-215-216-217-218-219-220-221-222-223-224-225-226-227-228-229-230-231-232-233-234-235-236-237-238-239-240-241-242-243-244-245-246-247-248-249-250-251-252-253-254-255-256-257-258-259-260-261-262-263-264-265-266-267-268-269-270-271-272-273-274-275-276-277-278-279-280-281-282-283-284-285-286-287-288-289-290-291-292-293-294-295-296-297-298-299-300-301-302-303-304-305-306-307-308-309-310-311-312-313-314-315-316-317-318-319-320-321-322-323-324-325-326-327-328-329-330-331-332-333-334-335-336-337-338-339-340-341-342-343-344-345-346-347-348-349-350-351-352-353-354-355-356-357;0;1-2-3-4-5-6-7-8-9-10-11-12-13-14-15;0;1-2-3-4;0;1-2-3-4-5-6-7-8-9-10-11-12-13-14-15;1;4-5;0;1-2-3-4", "snp;snp;probe;probe;probe;snp", null, + "D:\\OnlineFolders\\AeroFS\\RP3_BIOS_Methylation\\eQTMs\\Optimal_PC_and_QTL_Corrected\\RP3_0.25MB_TSS_extendedCis_eQTMs_2015\\eQTLSNPsFDR0.05-SNPLevel_ExtendedInfTMP.txt"); +// +// addAnnotationToQTLOutput( +// "D:\\OnlineFolders\\AeroFS\\RP3_BIOS_Methylation\\eQTMs\\Optimal_PC_and_QTL_Corrected\\RP3_0.25MB_TSS_extendedCis_eQTMs_2015\\eQTLSNPsFDR0.05-SNPLevel.txt", +// "D:\\OnlineFolders\\AeroFS\\RP3_BIOS_Methylation\\Annotations\\Annotation450k_AdditionMJ_v10_13BM.txt.gz;D:\\OnlineFolders\\AeroFS\\RP3_BIOS_Methylation\\Annotations\\GC_ContentSuroundingProbes_full.txt;D:\\OnlineFolders\\AeroFS\\RP3_BIOS_Methylation\\Annotations\\statisticsTMM_exprssion.txt;D:\\OnlineFolders\\AeroFS\\RP3_BIOS_Methylation\\Annotations\\GC_ContentSurounding_RP3Genes.txt;D:\\UMCG\\Data\\RP3_RNA_Seq\\annotation_geneIds+overlapping_v71_cut.txt;D:\\OnlineFolders\\AeroFS\\RP3_BIOS_Methylation\\Annotations\\CODAM_NTR_LLS_LLD_RS_BBMRI_450K_var_mean_median.txt", +// "0;8-9-10-11-12-13-14-15-16-17-18-175-176-177-178-179-180-181-182-183-184-185-186-187-188-189-190-191-192-193-194-195-196-197-198-199-200-201-202-203-204-205-206-207-208-291-292-293-294-295-296-297-298-299-300-301-302-303-304-305-306-307-308-309-310-311-312-313-314-315-316-317-318-319-320-321-322-323-324-325-326-327-328-329-330-331-332-333-334-335-336-337-338-339-340-341-342-343-344-345-346;0;1-2-3-4-5-6-7-8-9-10-11-12-13-14-15;0;1-2-3-4;0;1-2-3-4-5-6-7-8-9-10-11-12-13-14-15;1;4-5;0;1-2-3-4", "snp;snp;probe;probe;probe;snp", null, +// "D:\\OnlineFolders\\AeroFS\\RP3_BIOS_Methylation\\eQTMs\\Optimal_PC_and_QTL_Corrected\\RP3_0.25MB_TSS_extendedCis_eQTMs_2015\\eQTLSNPsFDR0.05-SNPLevel_ExtendedInfo13BM.txt"); +// addAnnotationToQTLOutput( +// "D:\\OnlineFolders\\AeroFS\\RP3_BIOS_Methylation\\Artificial_eQTMs0.0_Stringent.txt", +// "D:\\OnlineFolders\\AeroFS\\RP3_BIOS_Methylation\\Annotations\\Annotation450k_AdditionMJ_v10.txt.gz;D:\\OnlineFolders\\AeroFS\\RP3_BIOS_Methylation\\Annotations\\GC_ContentSuroundingProbes_full.txt;D:\\OnlineFolders\\AeroFS\\RP3_BIOS_Methylation\\Annotations\\statisticsTMM_exprssion.txt;D:\\OnlineFolders\\AeroFS\\RP3_BIOS_Methylation\\Annotations\\GC_ContentSurounding_RP3Genes.txt;D:\\UMCG\\Data\\RP3_RNA_Seq\\annotation_geneIds+overlapping_v71_cut.txt;D:\\OnlineFolders\\AeroFS\\RP3_BIOS_Methylation\\Annotations\\CODAM_NTR_LLS_LLD_RS_BBMRI_450K_var_mean_median.txt", +// "0;8-9-10-11-12-13-14-15-16-17-18-19-20-21-22-23-24-25-26-27-28-29-30-31-32-33-34-35-36-37-38-39-40-41-42-43-44-45-46-47-48-49-50-51-52-53-54-55-56-57-58-59-60-61-62-63-64-65-66-67-68-69-70-71-72-73-74-75-76-77-78-79-80-81-82-83-84-85-86-87-88-89-90-91-92-93-94-95-96-97-98-99-100-101-102-103-104-105-106-107-108-109-110-111-112-113-114-115-116-117-118-119-120-121-122-123-124-125-126-127-128-129-130-131-132-133-134-135-136-137-138-139-140-141-142-143-144-145-146-147-148-149-150-151-152-153-154-155-156-157-158-159-160-161-162-163-164-165-166-167-168-169-170-171-172-173-174-175-176-177-178-179-180-181-182-183-184-185-186-187-188-189-190-191-192-193-194-195-196-197-198-199-200-201-202-203-204-205-206-207-208-209-210-211-212-213-214-215-216-217-218-219-220-221-222-223-224-225-226-227-228-229-230-231-232-233-234-235-236-237-238-239-240-241-242-243-244-245-246-247-248-249-250-251-252-253-254-255-256-257-258-259-260-261-262-263-264-265-266-267-268-269-270-271-272-273-274-275-276-277-278-279-280-281-282-283-284-285-286-287-288-289-290-291-292-293-294-295-296-297-298-299-300-301-302-303-304-305-306-307-308-309-310-311-312-313-314-315-316-317-318-319-320-321-322-323-324-325-326-327-328-329-330-331-332-333-334-335-336-337-338-339-340-341-342-343-344-345-346-347-348-349-350-351-352-353-354-355-356-357;0;1-2-3-4-5-6-7-8-9-10-11-12-13-14-15;0;1-2-3-4;0;1-2-3-4-5-6-7-8-9-10-11-12-13-14-15;1;4-5;0;1-2-3-4", "snp;snp;probe;probe;probe;snp", null, +// "D:\\OnlineFolders\\AeroFS\\RP3_BIOS_Methylation\\Artificial_eQTMs0.0_Stringent_ExtendedInfo.txt"); + // addAnnotationToQTLOutput( // "D:\\OnlineFolders\\AeroFS\\RP3_BIOS_Methylation\\eQTMs\\QTLCorrected\\RP3_0.25MB_TSS_extendedCis_eQTMs_2015\\eQTLSNPsFDR0.05-SNPLevel.txt", From 19e30d372703173461fc563af63828ca0e6a0599 Mon Sep 17 00:00:00 2001 From: Patrick Deelen Date: Thu, 7 May 2015 14:32:10 +0200 Subject: [PATCH 036/143] minor bug --- eqtl-mapping-pipeline/nb-configuration.xml | 9 +++++++++ .../binaryInteraction/ReplicateInteractions.java | 2 +- genetica-libraries/nb-configuration.xml | 9 +++++++++ 3 files changed, 19 insertions(+), 1 deletion(-) diff --git a/eqtl-mapping-pipeline/nb-configuration.xml b/eqtl-mapping-pipeline/nb-configuration.xml index 49bf3691c..15fa6c5e7 100644 --- a/eqtl-mapping-pipeline/nb-configuration.xml +++ b/eqtl-mapping-pipeline/nb-configuration.xml @@ -16,4 +16,13 @@ Without this configuration present, some functionality in the IDE may be limited + + + JDK_1.7 + diff --git a/eqtl-mapping-pipeline/src/main/java/eqtlmappingpipeline/binaryInteraction/ReplicateInteractions.java b/eqtl-mapping-pipeline/src/main/java/eqtlmappingpipeline/binaryInteraction/ReplicateInteractions.java index e092c64b8..295f00063 100644 --- a/eqtl-mapping-pipeline/src/main/java/eqtlmappingpipeline/binaryInteraction/ReplicateInteractions.java +++ b/eqtl-mapping-pipeline/src/main/java/eqtlmappingpipeline/binaryInteraction/ReplicateInteractions.java @@ -490,7 +490,7 @@ public static void main(String[] args) throws FileNotFoundException, IOException writeAndOut(" - Not significant: " + numberFormat.format(notSignificant) + " (" + numberFormat.format(notSignificant * 100d / (notSignificant + significant)) + "%)", logWriter); writeAndOut(" - Significant: " + numberFormat.format(significant) + " (" + numberFormat.format(significant * 100d / (notSignificant + significant)) + "%)", logWriter); writeAndOut(" * Not in replication: " + numberFormat.format(notTestedInReplication) + " (" + numberFormat.format(notTestedInReplication * 100d / significant) + "%)", logWriter); - writeAndOut(" * NaN in replication: " + numberFormat.format(nanReplication) + " (" + numberFormat.format(notTestedInReplication * 100d / significant) + "%)", logWriter); + writeAndOut(" * NaN in replication: " + numberFormat.format(nanReplication) + " (" + numberFormat.format(nanReplication * 100d / significant) + "%)", logWriter); writeAndOut(" * Not significant in replication: " + numberFormat.format(notSignificantReplicationSameDirection + notSignificantReplicationOppositeDirection) + " (" + numberFormat.format((notSignificantReplicationSameDirection + notSignificantReplicationOppositeDirection) * 100d / significant) + "%)", logWriter); writeAndOut(" # Same direction: " + numberFormat.format(notSignificantReplicationSameDirection) + " (" + numberFormat.format(notSignificantReplicationSameDirection * 100d / (notSignificantReplicationSameDirection + notSignificantReplicationOppositeDirection)) + "%)", logWriter); writeAndOut(" # Opposite direction: " + numberFormat.format(notSignificantReplicationOppositeDirection) + " (" + numberFormat.format(notSignificantReplicationOppositeDirection * 100d / (notSignificantReplicationSameDirection + notSignificantReplicationOppositeDirection)) + "%)", logWriter); diff --git a/genetica-libraries/nb-configuration.xml b/genetica-libraries/nb-configuration.xml index 9d924f817..dbb1c5fb6 100644 --- a/genetica-libraries/nb-configuration.xml +++ b/genetica-libraries/nb-configuration.xml @@ -11,4 +11,13 @@ Without this configuration present, some functionality in the IDE may be limited + + + JDK_1.7 + From 489cc8c0387ab1e3605c943c9f76f9d1f0c77262 Mon Sep 17 00:00:00 2001 From: Patrick Deelen Date: Sat, 9 May 2015 14:27:36 +0200 Subject: [PATCH 037/143] Extended double matrix dataset --- .../math/matrix2/DoubleMatrixDataset.java | 1263 +++++++++-------- 1 file changed, 642 insertions(+), 621 deletions(-) diff --git a/genetica-libraries/src/main/java/umcg/genetica/math/matrix2/DoubleMatrixDataset.java b/genetica-libraries/src/main/java/umcg/genetica/math/matrix2/DoubleMatrixDataset.java index 2b227f25b..2230738c9 100644 --- a/genetica-libraries/src/main/java/umcg/genetica/math/matrix2/DoubleMatrixDataset.java +++ b/genetica-libraries/src/main/java/umcg/genetica/math/matrix2/DoubleMatrixDataset.java @@ -4,6 +4,7 @@ */ package umcg.genetica.math.matrix2; +import cern.colt.matrix.tdouble.DoubleMatrix1D; import cern.colt.matrix.tdouble.DoubleMatrix2D; import cern.colt.matrix.tdouble.impl.DenseDoubleMatrix2D; import cern.colt.matrix.tdouble.impl.DenseLargeDoubleMatrix2D; @@ -22,7 +23,6 @@ import java.util.Map; import java.util.Map.Entry; import java.util.NoSuchElementException; -import java.util.Set; import java.util.logging.Level; import java.util.logging.Logger; import java.util.regex.Pattern; @@ -36,625 +36,646 @@ */ public class DoubleMatrixDataset { - static final IOException doubleMatrixDatasetNonUniqueHeaderException = new IOException("Tried to use a non-unique header set in an identifier HashMap"); - static final Logger LOGGER = Logger.getLogger(DoubleMatrixDataset.class.getName()); - protected DoubleMatrix2D matrix; - protected LinkedHashMap hashRows; - protected LinkedHashMap hashCols; - - public DoubleMatrixDataset() { - hashRows = new LinkedHashMap(); - hashCols = new LinkedHashMap(); - } - - public DoubleMatrixDataset(int rows, int columns) { - hashRows = new LinkedHashMap((int) Math.ceil(rows / 0.75)); - hashCols = new LinkedHashMap((int) Math.ceil(columns / 0.75)); - if ((rows * (long) columns) < (Integer.MAX_VALUE - 2)) { - matrix = new DenseDoubleMatrix2D(rows, columns); - } else { - matrix = new DenseLargeDoubleMatrix2D(rows, columns); - } - } - - public DoubleMatrixDataset(LinkedHashMap hashRows, LinkedHashMap hashCols) { - this.hashRows = hashRows; - this.hashCols = hashCols; - if ((hashRows.size() * (long) hashCols.size()) < (Integer.MAX_VALUE - 2)) { - matrix = new DenseDoubleMatrix2D(hashRows.size(), hashCols.size()); - } else { - matrix = new DenseLargeDoubleMatrix2D(hashRows.size(), hashCols.size()); - } - } - - public DoubleMatrixDataset(DoubleMatrix2D matrix, LinkedHashMap hashRows, LinkedHashMap hashCols) { - this.hashRows = hashRows; - this.hashCols = hashCols; - this.matrix = matrix; - } - - public DoubleMatrixDataset(Collection rowNames, Collection colNames) { - - hashRows = new LinkedHashMap(rowNames.size()); - hashCols = new LinkedHashMap(colNames.size()); - - int i = 0; - for (R row : rowNames) { - hashRows.put(row, i); - ++i; - } - - i = 0; - for (C col : colNames) { - hashCols.put(col, i); + static final IOException doubleMatrixDatasetNonUniqueHeaderException = new IOException("Tried to use a non-unique header set in an identifier HashMap"); + static final Logger LOGGER = Logger.getLogger(DoubleMatrixDataset.class.getName()); + + public static DoubleMatrixDataset loadDoubleTextData(String expressionDataPath, char c) { + throw new UnsupportedOperationException("Not supported yet."); + } + protected DoubleMatrix2D matrix; + protected LinkedHashMap hashRows; + protected LinkedHashMap hashCols; + + public DoubleMatrixDataset() { + hashRows = new LinkedHashMap(); + hashCols = new LinkedHashMap(); + } + + public DoubleMatrixDataset(int rows, int columns) { + hashRows = new LinkedHashMap((int) Math.ceil(rows / 0.75)); + hashCols = new LinkedHashMap((int) Math.ceil(columns / 0.75)); + if ((rows * (long) columns) < (Integer.MAX_VALUE - 2)) { + matrix = new DenseDoubleMatrix2D(rows, columns); + } else { + matrix = new DenseLargeDoubleMatrix2D(rows, columns); + } + } + + public DoubleMatrixDataset(LinkedHashMap hashRows, LinkedHashMap hashCols) { + this.hashRows = hashRows; + this.hashCols = hashCols; + if ((hashRows.size() * (long) hashCols.size()) < (Integer.MAX_VALUE - 2)) { + matrix = new DenseDoubleMatrix2D(hashRows.size(), hashCols.size()); + } else { + matrix = new DenseLargeDoubleMatrix2D(hashRows.size(), hashCols.size()); + } + } + + public DoubleMatrixDataset(DoubleMatrix2D matrix, LinkedHashMap hashRows, LinkedHashMap hashCols) { + this.hashRows = hashRows; + this.hashCols = hashCols; + this.matrix = matrix; + } + + public DoubleMatrixDataset(Collection rowNames, Collection colNames) { + + hashRows = new LinkedHashMap(rowNames.size()); + hashCols = new LinkedHashMap(colNames.size()); + + int i = 0; + for (R row : rowNames) { + hashRows.put(row, i); ++i; - } - - if ((hashRows.size() * (long) hashCols.size()) < (Integer.MAX_VALUE - 2)) { - matrix = new DenseDoubleMatrix2D(hashRows.size(), hashCols.size()); - } else { - matrix = new DenseLargeDoubleMatrix2D(hashRows.size(), hashCols.size()); - } - - } - - public static DoubleMatrixDataset loadDoubleData(String fileName) throws IOException { - if ((fileName.endsWith(".txt") || fileName.endsWith(".tsv") || fileName.endsWith(".txt.gz"))) { - return loadDoubleTextData(fileName, "\t"); - } else if (fileName.endsWith(".binary")) { - return loadDoubleBinaryData(fileName); - } else { - throw new IllegalArgumentException("File type must be \".txt\", \".tsv\" or \".txt.gz\" when delimiter is set to: \"tab\" \n Input filename: " + fileName); - } - } - - public static DoubleMatrixDataset loadDoubleTextData(String fileName, String delimiter) throws IOException { - if (!(fileName.endsWith(".txt") || fileName.endsWith(".tsv") || fileName.endsWith(".txt.gz"))) { - throw new IllegalArgumentException("File type must be \".txt\", \".tsv\" or \".txt.gz\" when delimiter is set. \n Input filename: " + fileName); - } - - Pattern splitPatern = Pattern.compile(delimiter); - - int columnOffset = 1; - - TextFile in = new TextFile(fileName, TextFile.R); - String str = in.readLine(); // header - String[] data = splitPatern.split(str); - - int tmpCols = (data.length - columnOffset); - - LinkedHashMap colMap = new LinkedHashMap((int) Math.ceil(tmpCols / 0.75)); - - for (int s = 0; s < tmpCols; s++) { - String colName = data[s + columnOffset]; - if (!colMap.containsKey(colName)) { - colMap.put(colName, s); - } else { - LOGGER.warning("Duplicated column name!"); - throw (doubleMatrixDatasetNonUniqueHeaderException); - } - } - - int tmpRows = 0; - - while (in.readLine() != null) { - tmpRows++; - } - in.close(); - - LinkedHashMap rowMap = new LinkedHashMap((int) Math.ceil(tmpRows / 0.75)); - DoubleMatrix2D tmpMatrix; - - if ((tmpRows * (long) tmpCols) < (Integer.MAX_VALUE - 2)) { - tmpMatrix = new DenseDoubleMatrix2D(tmpRows, tmpCols); - } else { - tmpMatrix = new DenseLargeDoubleMatrix2D(tmpRows, tmpCols); - } - in.open(); - in.readLine(); // read header - int row = 0; - - boolean correctData = true; - while ((str = in.readLine()) != null) { - data = splitPatern.split(str); - - if (!rowMap.containsKey(data[0])) { - rowMap.put(data[0], row); - for (int s = 0; s < tmpCols; s++) { - double d; - try { - d = Double.parseDouble(data[s + columnOffset]); - } catch (NumberFormatException e) { - correctData = false; - d = Double.NaN; - } - tmpMatrix.setQuick(row, s, d); - } - row++; - } else { - LOGGER.warning("Duplicated row name!"); - throw (doubleMatrixDatasetNonUniqueHeaderException); - } - - } - if (!correctData) { - LOGGER.warning("Your data contains NaN/unparseable values!"); - } - in.close(); - - DoubleMatrixDataset dataset = new DoubleMatrixDataset(tmpMatrix, rowMap, colMap); - - LOGGER.log(Level.INFO, "''{0}'' has been loaded, nrRows: {1} nrCols: {2}", new Object[]{fileName, dataset.matrix.rows(), dataset.matrix.columns()}); - return dataset; - } - - public static DoubleMatrixDataset loadSubsetOfTextDoubleData(String fileName, String delimiter, HashSet desiredRows, HashSet desiredCols) throws IOException { - if (!(fileName.endsWith(".txt") || fileName.endsWith(".txt.gz"))) { - throw new IllegalArgumentException("File type must be .txt when delimiter is given (given filename: " + fileName + ")"); - } - - LinkedHashSet desiredColPos = new LinkedHashSet(); - - Pattern splitPatern = Pattern.compile(delimiter); - - int columnOffset = 1; - - TextFile in = new TextFile(fileName, TextFile.R); - String str = in.readLine(); // header - String[] data = splitPatern.split(str); - - int tmpCols = (data.length - columnOffset); - - LinkedHashMap colMap = new LinkedHashMap((int) Math.ceil(tmpCols / 0.75)); - - int storedCols = 0; - for (int s = 0; s < tmpCols; s++) { - String colName = data[s + columnOffset]; - if (!colMap.containsKey(colName) && (desiredCols == null || desiredCols.contains(colName) || desiredCols.isEmpty())) { - colMap.put(colName, storedCols); - desiredColPos.add((s)); - storedCols++; - } else if (colMap.containsKey(colName)) { - LOGGER.warning("Duplicated column name!"); - System.out.println("Tried to add: " + colName); - throw (doubleMatrixDatasetNonUniqueHeaderException); - } - } - - LinkedHashSet desiredRowPos = new LinkedHashSet(); - int rowsToStore = 0; - int totalRows = 0; - //System.out.println(desiredRows.toString()); - while ((str = in.readLine()) != null) { - String[] info = splitPatern.split(str); - if (desiredRows == null || desiredRows.contains(info[0]) || desiredRows.isEmpty()) { - rowsToStore++; - desiredRowPos.add(totalRows); - } - totalRows++; - } - in.close(); - - DoubleMatrix2D matrix; - if ((rowsToStore * (long) tmpCols) < (Integer.MAX_VALUE - 2)) { - matrix = new DenseDoubleMatrix2D(rowsToStore, storedCols); - } else { - matrix = new DenseLargeDoubleMatrix2D(rowsToStore, storedCols); - } - - in.open(); - in.readLine(); // read header - int storingRow = 0; - totalRows = 0; - LinkedHashMap rowMap = new LinkedHashMap((int) Math.ceil(rowsToStore / 0.75)); - - boolean correctData = true; - while ((str = in.readLine()) != null) { - - if (desiredRowPos.contains(totalRows)) { - data = splitPatern.split(str); - if (!rowMap.containsKey(data[0])) { - rowMap.put(data[0], storingRow); - int storingCol = 0; - for (int s : desiredColPos) { - double d; - try { - d = Double.parseDouble(data[s + columnOffset]); - } catch (NumberFormatException e) { - correctData = false; - d = Double.NaN; - } - matrix.setQuick(storingRow, storingCol, d); - storingCol++; - } - storingRow++; - } else if (rowMap.containsKey(data[0])) { - LOGGER.warning("Duplicated row name!"); - System.out.println("Tried to add: " + data[0]); - throw (doubleMatrixDatasetNonUniqueHeaderException); - } - } - totalRows++; - } - if (!correctData) { - LOGGER.warning("Your data contains NaN/unparseable values!"); - } - in.close(); - - DoubleMatrixDataset dataset = new DoubleMatrixDataset(matrix, rowMap, colMap); - - LOGGER.log(Level.INFO, "''{0}'' has been loaded, nrRows: {1} nrCols: {2}", new Object[]{fileName, dataset.matrix.rows(), dataset.matrix.columns()}); - return dataset; - } - - private static DoubleMatrixDataset loadDoubleBinaryData(String fileName) throws FileNotFoundException, IOException { - //First load the raw binary data: - File fileBinary = new File(fileName + ".dat"); - BufferedInputStream in; - int nrRows; - int nrCols; - in = new BufferedInputStream(new FileInputStream(fileBinary)); - byte[] bytes = new byte[4]; - in.read(bytes, 0, 4); - nrRows = byteArrayToInt(bytes); - in.read(bytes, 0, 4); - nrCols = byteArrayToInt(bytes); - - DoubleMatrix2D matrix; - if ((nrRows * (long) nrCols) < (Integer.MAX_VALUE - 2)) { - matrix = new DenseDoubleMatrix2D(nrRows, nrCols); - } else { - matrix = new DenseLargeDoubleMatrix2D(nrRows, nrCols); - } - - //Now load the row and column identifiers from files - LinkedHashMap rowMap = loadIdentifiers(fileName + ".rows.txt"); - LinkedHashMap colMap = loadIdentifiers(fileName + ".cols.txt"); - - byte[] buffer = new byte[nrCols * 8]; - long bits; - for (int row = 0; row < nrRows; row++) { - in.read(buffer, 0, nrCols * 8); - int bufferLoc = 0; - for (int col = 0; col < nrCols; col++) { - bits = (long) (0xff & buffer[bufferLoc + 7]) - | (long) (0xff & buffer[bufferLoc + 6]) << 8 - | (long) (0xff & buffer[bufferLoc + 5]) << 16 - | (long) (0xff & buffer[bufferLoc + 4]) << 24 - | (long) (0xff & buffer[bufferLoc + 3]) << 32 - | (long) (0xff & buffer[bufferLoc + 2]) << 40 - | (long) (0xff & buffer[bufferLoc + 1]) << 48 - | (long) (buffer[bufferLoc]) << 56; - - matrix.setQuick(row, col, Double.longBitsToDouble(bits)); - bufferLoc += 8; - } - } - in.close(); - - DoubleMatrixDataset dataset = new DoubleMatrixDataset(matrix, rowMap, colMap); - LOGGER.log(Level.INFO, "Binary file ''{0}'' has been loaded, nrRows: {1} nrCols: {2}", new Object[]{fileName, nrRows, nrCols}); - - return dataset; - } - - private static LinkedHashMap loadIdentifiers(String filename) throws IOException { - TextFile tf = new TextFile(filename, false); - String[] rowsArr = tf.readAsArray(); - tf.close(); - LinkedHashMap rowMap = new LinkedHashMap(); - for (String row : rowsArr) { - rowMap.put(row, rowMap.size()); - } - return rowMap; - } - - public void save(File file) throws IOException { - TextFile out = new TextFile(file, TextFile.W); - - out.append('-'); - for (C col : hashCols.keySet()) { - - out.append('\t'); - out.append(col.toString()); - } - out.append('\n'); - int r = 0; - for (R row : hashRows.keySet()) { - out.append(row.toString()); - for (int c = 0; c < matrix.columns(); c++) { - out.append('\t'); - out.append(String.valueOf(matrix.getQuick(r, c))); - } - out.append('\n'); - ++r; - } - out.close(); - } - - public void save(String fileName) throws IOException { - save(new File(fileName)); - } - - public void saveDice(String fileName) throws IOException { - TextFile out = new TextFile(fileName, TextFile.W); - - out.append('-'); - for (R row : hashRows.keySet()) { - out.append('\t'); - out.append(row.toString()); - } - out.append('\n'); - - int c = 0; - for (C col : hashCols.keySet()) { - out.append(col.toString()); - for (int r = 0; r < matrix.rows(); r++) { - - out.append('\t'); - out.append(String.valueOf(matrix.getQuick(r, c))); - } - out.append('\n'); - ++c; - } - out.close(); - } - - private static byte[] intToByteArray(int value) { - return new byte[]{(byte) (value >>> 24), - (byte) (value >>> 16), - (byte) (value >>> 8), - (byte) value}; - } - - private static int byteArrayToInt(byte[] b) { - return (b[0] << 24) - + ((b[1] & 0xff) << 16) - + ((b[2] & 0xff) << 8) - + (b[3] & 0xff); - } - - //Getters and setters - public int rows() { - return matrix.rows(); - } - - public int columns() { - return matrix.columns(); - } - - public LinkedHashMap getHashRows() { - return hashRows; - } - - public void setHashRows(LinkedHashMap hashRows) { - this.hashRows = hashRows; - } - - public LinkedHashMap getHashCols() { - return hashCols; - } - - public void setHashCols(LinkedHashMap hashCols) { - this.hashCols = hashCols; - } - - public ArrayList getRowObjects() { - return new ArrayList(hashRows.keySet()); - } - - public void setRowObjects(List arrayList) throws Exception { - LinkedHashMap newHashRows = new LinkedHashMap((int) Math.ceil(arrayList.size() / 0.75)); - int i = 0; - for (R s : arrayList) { - if (!newHashRows.containsKey(s)) { - newHashRows.put(s, i); - } else { - System.out.println("Error, new row names contains dupilcates."); - throw (doubleMatrixDatasetNonUniqueHeaderException); - } - i++; - } - - this.hashRows = newHashRows; - } - - public ArrayList getColObjects() { - return new ArrayList(hashCols.keySet()); - } - - public void setColObjects(List arrayList) throws Exception { - LinkedHashMap newHashCols = new LinkedHashMap((int) Math.ceil(arrayList.size() / 0.75)); - int i = 0; - for (C s : arrayList) { - if (!newHashCols.containsKey(s)) { - newHashCols.put(s, i); - } else { - System.out.println("Error, new column names contains dupilcates."); - throw (doubleMatrixDatasetNonUniqueHeaderException); - } - i++; - } - this.hashCols = newHashCols; - } - - public DoubleMatrix2D getMatrix() { - return matrix; - } - - public void setMatrix(DoubleMatrix2D matrix) { - this.matrix = matrix; - } - - public void setMatrix(double[][] matrix) { - if ((matrix.length * (long) matrix[0].length) < (Integer.MAX_VALUE - 2)) { - this.matrix = new DenseDoubleMatrix2D(matrix); - } else { - this.matrix = new DenseLargeDoubleMatrix2D(matrix.length, matrix[0].length); - this.matrix.assign(matrix); - } - } - - /** - * Order columns - * - */ - public void OrderOnColumnnames() { - LinkedHashMap newColHash = new LinkedHashMap((int) Math.ceil(this.matrix.columns() / 0.75)); - ArrayList names = this.getColObjects(); - Collections.sort(names); - - int pos = 0; - for (C name : names) { - newColHash.put(name, pos); - pos++; - } - reorderCols(newColHash); - } - - /** - * Order rows - * - */ - public void OrderOnRownames() { - LinkedHashMap newRowHash = new LinkedHashMap((int) Math.ceil(this.matrix.rows() / 0.75)); - ArrayList names = this.getRowObjects(); - Collections.sort(names); - - int pos = -1; - for (R name : names) { - pos++; - newRowHash.put(name, pos); - } - reorderRows(newRowHash); - - } - - public void reorderRows(LinkedHashMap mappingIndex) { - boolean equal = compareHashRows(mappingIndex, this.hashRows); - if (!equal) { - DoubleMatrix2D newRawData; - if ((this.rows() * (long) this.columns()) < (Integer.MAX_VALUE - 2)) { - newRawData = new DenseDoubleMatrix2D(this.rows(), this.columns()); - } else { - newRawData = new DenseLargeDoubleMatrix2D(this.rows(), this.columns()); - } - - for (Map.Entry ent : mappingIndex.entrySet()) { - int pos = this.getHashRows().get(ent.getKey()); - for (int s = 0; s < this.columns(); ++s) { - newRawData.set(ent.getValue(), s, this.getMatrix().get(pos, s)); - } - } - this.setHashRows(mappingIndex); - this.setMatrix(newRawData); - } - - } - - public void reorderCols(LinkedHashMap mappingIndex) { - boolean equal = compareHashCols(mappingIndex, this.hashCols); - if (!equal) { - DoubleMatrix2D newRawData; - if ((this.rows() * (long) this.columns()) < (Integer.MAX_VALUE - 2)) { - newRawData = new DenseDoubleMatrix2D(this.rows(), this.columns()); - } else { - newRawData = new DenseLargeDoubleMatrix2D(this.rows(), this.columns()); - } - - for (Map.Entry ent : mappingIndex.entrySet()) { - int pos = this.getHashCols().get(ent.getKey()); - for (int p = 0; p < this.rows(); ++p) { - newRawData.set(p, ent.getValue(), this.getMatrix().get(p, pos)); - } - } - - this.setHashCols(mappingIndex); - this.setMatrix(newRawData); - } - } - - public DoubleMatrixDataset viewDice() { - return new DoubleMatrixDataset(matrix.viewDice(), hashCols, hashRows); - } - - private boolean compareHashCols(LinkedHashMap mappingIndex, LinkedHashMap originalHash) { - - for (Entry entry : mappingIndex.entrySet()) { - if (entry.getValue() != originalHash.get(entry.getKey())) { - return false; - } - } - return true; - } - - private boolean compareHashRows(LinkedHashMap mappingIndex, LinkedHashMap originalHash) { - - for (Entry entry : mappingIndex.entrySet()) { - if (entry.getValue() != originalHash.get(entry.getKey())) { - return false; - } - } - return true; - } - - /** - * Set a element of the dataset. - * - * @param rowName - * @param columnName - * @param value - */ - public void setElement(R rowName, C columnName, double value) { - - Integer row = hashRows.get(rowName); - Integer column = hashCols.get(columnName); - - if (row != null && column != null) { - matrix.setQuick(row, column, value); - } else { - if (row == null) { - throw new NoSuchElementException("Row not found: " + rowName.toString()); - } else { - throw new NoSuchElementException("Column not found: " + columnName.toString()); - } - - } - - } - - /** - * Get specific element. - * - * @param rowName - * @param columnName - * @return - */ - public double getElement(R rowName, C columnName) { - - Integer row = hashRows.get(rowName); - Integer column = hashCols.get(columnName); - - if (row != null && column != null) { - return matrix.getQuick(row, column); - } else { - if (row == null) { - throw new NoSuchElementException("Row not found: " + rowName.toString()); - } else { - throw new NoSuchElementException("Column not found: " + columnName.toString()); - } - } - } - - /** - * Get specific element. - * - * @param row - * @param column - * @return - */ - public double getElement(int row, int column) { - - return matrix.get(row, column); - } + } + + i = 0; + for (C col : colNames) { + hashCols.put(col, i); + ++i; + } + + if ((hashRows.size() * (long) hashCols.size()) < (Integer.MAX_VALUE - 2)) { + matrix = new DenseDoubleMatrix2D(hashRows.size(), hashCols.size()); + } else { + matrix = new DenseLargeDoubleMatrix2D(hashRows.size(), hashCols.size()); + } + + } + + public static DoubleMatrixDataset loadDoubleData(String fileName) throws IOException { + if ((fileName.endsWith(".txt") || fileName.endsWith(".tsv") || fileName.endsWith(".txt.gz"))) { + return loadDoubleTextData(fileName, "\t"); + } else if (fileName.endsWith(".binary")) { + return loadDoubleBinaryData(fileName); + } else { + throw new IllegalArgumentException("File type must be \".txt\", \".tsv\" or \".txt.gz\" when delimiter is set to: \"tab\" \n Input filename: " + fileName); + } + } + + public static DoubleMatrixDataset loadDoubleTextData(String fileName, String delimiter) throws IOException { + if (!(fileName.endsWith(".txt") || fileName.endsWith(".tsv") || fileName.endsWith(".txt.gz"))) { + throw new IllegalArgumentException("File type must be \".txt\", \".tsv\" or \".txt.gz\" when delimiter is set. \n Input filename: " + fileName); + } + + Pattern splitPatern = Pattern.compile(delimiter); + + int columnOffset = 1; + + TextFile in = new TextFile(fileName, TextFile.R); + String str = in.readLine(); // header + String[] data = splitPatern.split(str); + + int tmpCols = (data.length - columnOffset); + + LinkedHashMap colMap = new LinkedHashMap((int) Math.ceil(tmpCols / 0.75)); + + for (int s = 0; s < tmpCols; s++) { + String colName = data[s + columnOffset]; + if (!colMap.containsKey(colName)) { + colMap.put(colName, s); + } else { + LOGGER.warning("Duplicated column name!"); + throw (doubleMatrixDatasetNonUniqueHeaderException); + } + } + + int tmpRows = 0; + + while (in.readLine() != null) { + tmpRows++; + } + in.close(); + + LinkedHashMap rowMap = new LinkedHashMap((int) Math.ceil(tmpRows / 0.75)); + DoubleMatrix2D tmpMatrix; + + if ((tmpRows * (long) tmpCols) < (Integer.MAX_VALUE - 2)) { + tmpMatrix = new DenseDoubleMatrix2D(tmpRows, tmpCols); + } else { + tmpMatrix = new DenseLargeDoubleMatrix2D(tmpRows, tmpCols); + } + in.open(); + in.readLine(); // read header + int row = 0; + + boolean correctData = true; + while ((str = in.readLine()) != null) { + data = splitPatern.split(str); + + if (!rowMap.containsKey(data[0])) { + rowMap.put(data[0], row); + for (int s = 0; s < tmpCols; s++) { + double d; + try { + d = Double.parseDouble(data[s + columnOffset]); + } catch (NumberFormatException e) { + correctData = false; + d = Double.NaN; + } + tmpMatrix.setQuick(row, s, d); + } + row++; + } else { + LOGGER.warning("Duplicated row name!"); + throw (doubleMatrixDatasetNonUniqueHeaderException); + } + + } + if (!correctData) { + LOGGER.warning("Your data contains NaN/unparseable values!"); + } + in.close(); + + DoubleMatrixDataset dataset = new DoubleMatrixDataset(tmpMatrix, rowMap, colMap); + + LOGGER.log(Level.INFO, "''{0}'' has been loaded, nrRows: {1} nrCols: {2}", new Object[]{fileName, dataset.matrix.rows(), dataset.matrix.columns()}); + return dataset; + } + + public static DoubleMatrixDataset loadSubsetOfTextDoubleData(String fileName, String delimiter, HashSet desiredRows, HashSet desiredCols) throws IOException { + if (!(fileName.endsWith(".txt") || fileName.endsWith(".txt.gz"))) { + throw new IllegalArgumentException("File type must be .txt when delimiter is given (given filename: " + fileName + ")"); + } + + LinkedHashSet desiredColPos = new LinkedHashSet(); + + Pattern splitPatern = Pattern.compile(delimiter); + + int columnOffset = 1; + + TextFile in = new TextFile(fileName, TextFile.R); + String str = in.readLine(); // header + String[] data = splitPatern.split(str); + + int tmpCols = (data.length - columnOffset); + + LinkedHashMap colMap = new LinkedHashMap((int) Math.ceil(tmpCols / 0.75)); + + int storedCols = 0; + for (int s = 0; s < tmpCols; s++) { + String colName = data[s + columnOffset]; + if (!colMap.containsKey(colName) && (desiredCols == null || desiredCols.contains(colName) || desiredCols.isEmpty())) { + colMap.put(colName, storedCols); + desiredColPos.add((s)); + storedCols++; + } else if (colMap.containsKey(colName)) { + LOGGER.warning("Duplicated column name!"); + System.out.println("Tried to add: " + colName); + throw (doubleMatrixDatasetNonUniqueHeaderException); + } + } + + LinkedHashSet desiredRowPos = new LinkedHashSet(); + int rowsToStore = 0; + int totalRows = 0; + //System.out.println(desiredRows.toString()); + while ((str = in.readLine()) != null) { + String[] info = splitPatern.split(str); + if (desiredRows == null || desiredRows.contains(info[0]) || desiredRows.isEmpty()) { + rowsToStore++; + desiredRowPos.add(totalRows); + } + totalRows++; + } + in.close(); + + DoubleMatrix2D matrix; + if ((rowsToStore * (long) tmpCols) < (Integer.MAX_VALUE - 2)) { + matrix = new DenseDoubleMatrix2D(rowsToStore, storedCols); + } else { + matrix = new DenseLargeDoubleMatrix2D(rowsToStore, storedCols); + } + + in.open(); + in.readLine(); // read header + int storingRow = 0; + totalRows = 0; + LinkedHashMap rowMap = new LinkedHashMap((int) Math.ceil(rowsToStore / 0.75)); + + boolean correctData = true; + while ((str = in.readLine()) != null) { + + if (desiredRowPos.contains(totalRows)) { + data = splitPatern.split(str); + if (!rowMap.containsKey(data[0])) { + rowMap.put(data[0], storingRow); + int storingCol = 0; + for (int s : desiredColPos) { + double d; + try { + d = Double.parseDouble(data[s + columnOffset]); + } catch (NumberFormatException e) { + correctData = false; + d = Double.NaN; + } + matrix.setQuick(storingRow, storingCol, d); + storingCol++; + } + storingRow++; + } else if (rowMap.containsKey(data[0])) { + LOGGER.warning("Duplicated row name!"); + System.out.println("Tried to add: " + data[0]); + throw (doubleMatrixDatasetNonUniqueHeaderException); + } + } + totalRows++; + } + if (!correctData) { + LOGGER.warning("Your data contains NaN/unparseable values!"); + } + in.close(); + + DoubleMatrixDataset dataset = new DoubleMatrixDataset(matrix, rowMap, colMap); + + LOGGER.log(Level.INFO, "''{0}'' has been loaded, nrRows: {1} nrCols: {2}", new Object[]{fileName, dataset.matrix.rows(), dataset.matrix.columns()}); + return dataset; + } + + private static DoubleMatrixDataset loadDoubleBinaryData(String fileName) throws FileNotFoundException, IOException { + //First load the raw binary data: + File fileBinary = new File(fileName + ".dat"); + BufferedInputStream in; + int nrRows; + int nrCols; + in = new BufferedInputStream(new FileInputStream(fileBinary)); + byte[] bytes = new byte[4]; + in.read(bytes, 0, 4); + nrRows = byteArrayToInt(bytes); + in.read(bytes, 0, 4); + nrCols = byteArrayToInt(bytes); + + DoubleMatrix2D matrix; + if ((nrRows * (long) nrCols) < (Integer.MAX_VALUE - 2)) { + matrix = new DenseDoubleMatrix2D(nrRows, nrCols); + } else { + matrix = new DenseLargeDoubleMatrix2D(nrRows, nrCols); + } + + //Now load the row and column identifiers from files + LinkedHashMap rowMap = loadIdentifiers(fileName + ".rows.txt"); + LinkedHashMap colMap = loadIdentifiers(fileName + ".cols.txt"); + + byte[] buffer = new byte[nrCols * 8]; + long bits; + for (int row = 0; row < nrRows; row++) { + in.read(buffer, 0, nrCols * 8); + int bufferLoc = 0; + for (int col = 0; col < nrCols; col++) { + bits = (long) (0xff & buffer[bufferLoc + 7]) + | (long) (0xff & buffer[bufferLoc + 6]) << 8 + | (long) (0xff & buffer[bufferLoc + 5]) << 16 + | (long) (0xff & buffer[bufferLoc + 4]) << 24 + | (long) (0xff & buffer[bufferLoc + 3]) << 32 + | (long) (0xff & buffer[bufferLoc + 2]) << 40 + | (long) (0xff & buffer[bufferLoc + 1]) << 48 + | (long) (buffer[bufferLoc]) << 56; + + matrix.setQuick(row, col, Double.longBitsToDouble(bits)); + bufferLoc += 8; + } + } + in.close(); + + DoubleMatrixDataset dataset = new DoubleMatrixDataset(matrix, rowMap, colMap); + LOGGER.log(Level.INFO, "Binary file ''{0}'' has been loaded, nrRows: {1} nrCols: {2}", new Object[]{fileName, nrRows, nrCols}); + + return dataset; + } + + private static LinkedHashMap loadIdentifiers(String filename) throws IOException { + TextFile tf = new TextFile(filename, false); + String[] rowsArr = tf.readAsArray(); + tf.close(); + LinkedHashMap rowMap = new LinkedHashMap(); + for (String row : rowsArr) { + rowMap.put(row, rowMap.size()); + } + return rowMap; + } + + public void save(File file) throws IOException { + TextFile out = new TextFile(file, TextFile.W); + + out.append('-'); + for (C col : hashCols.keySet()) { + + out.append('\t'); + out.append(col.toString()); + } + out.append('\n'); + int r = 0; + for (R row : hashRows.keySet()) { + out.append(row.toString()); + for (int c = 0; c < matrix.columns(); c++) { + out.append('\t'); + out.append(String.valueOf(matrix.getQuick(r, c))); + } + out.append('\n'); + ++r; + } + out.close(); + } + + public void save(String fileName) throws IOException { + save(new File(fileName)); + } + + public void saveDice(String fileName) throws IOException { + TextFile out = new TextFile(fileName, TextFile.W); + + out.append('-'); + for (R row : hashRows.keySet()) { + out.append('\t'); + out.append(row.toString()); + } + out.append('\n'); + + int c = 0; + for (C col : hashCols.keySet()) { + out.append(col.toString()); + for (int r = 0; r < matrix.rows(); r++) { + + out.append('\t'); + out.append(String.valueOf(matrix.getQuick(r, c))); + } + out.append('\n'); + ++c; + } + out.close(); + } + + private static byte[] intToByteArray(int value) { + return new byte[]{(byte) (value >>> 24), + (byte) (value >>> 16), + (byte) (value >>> 8), + (byte) value}; + } + + private static int byteArrayToInt(byte[] b) { + return (b[0] << 24) + + ((b[1] & 0xff) << 16) + + ((b[2] & 0xff) << 8) + + (b[3] & 0xff); + } + + //Getters and setters + public int rows() { + return matrix.rows(); + } + + public int columns() { + return matrix.columns(); + } + + public LinkedHashMap getHashRows() { + return hashRows; + } + + public void setHashRows(LinkedHashMap hashRows) { + this.hashRows = hashRows; + } + + public LinkedHashMap getHashCols() { + return hashCols; + } + + public void setHashCols(LinkedHashMap hashCols) { + this.hashCols = hashCols; + } + + public ArrayList getRowObjects() { + return new ArrayList(hashRows.keySet()); + } + + public void setRowObjects(List arrayList) throws Exception { + LinkedHashMap newHashRows = new LinkedHashMap((int) Math.ceil(arrayList.size() / 0.75)); + int i = 0; + for (R s : arrayList) { + if (!newHashRows.containsKey(s)) { + newHashRows.put(s, i); + } else { + System.out.println("Error, new row names contains dupilcates."); + throw (doubleMatrixDatasetNonUniqueHeaderException); + } + i++; + } + + this.hashRows = newHashRows; + } + + public ArrayList getColObjects() { + return new ArrayList(hashCols.keySet()); + } + + public void setColObjects(List arrayList) throws Exception { + LinkedHashMap newHashCols = new LinkedHashMap((int) Math.ceil(arrayList.size() / 0.75)); + int i = 0; + for (C s : arrayList) { + if (!newHashCols.containsKey(s)) { + newHashCols.put(s, i); + } else { + System.out.println("Error, new column names contains dupilcates."); + throw (doubleMatrixDatasetNonUniqueHeaderException); + } + i++; + } + this.hashCols = newHashCols; + } + + public DoubleMatrix2D getMatrix() { + return matrix; + } + + public void setMatrix(DoubleMatrix2D matrix) { + this.matrix = matrix; + } + + public void setMatrix(double[][] matrix) { + if ((matrix.length * (long) matrix[0].length) < (Integer.MAX_VALUE - 2)) { + this.matrix = new DenseDoubleMatrix2D(matrix); + } else { + this.matrix = new DenseLargeDoubleMatrix2D(matrix.length, matrix[0].length); + this.matrix.assign(matrix); + } + } + + /** + * Order columns + * + */ + public void OrderOnColumnnames() { + LinkedHashMap newColHash = new LinkedHashMap((int) Math.ceil(this.matrix.columns() / 0.75)); + ArrayList names = this.getColObjects(); + Collections.sort(names); + + int pos = 0; + for (C name : names) { + newColHash.put(name, pos); + pos++; + } + reorderCols(newColHash); + } + + /** + * Order rows + * + */ + public void OrderOnRownames() { + LinkedHashMap newRowHash = new LinkedHashMap((int) Math.ceil(this.matrix.rows() / 0.75)); + ArrayList names = this.getRowObjects(); + Collections.sort(names); + + int pos = -1; + for (R name : names) { + pos++; + newRowHash.put(name, pos); + } + reorderRows(newRowHash); + + } + + public void reorderRows(LinkedHashMap mappingIndex) { + boolean equal = compareHashRows(mappingIndex, this.hashRows); + if (!equal) { + DoubleMatrix2D newRawData; + if ((this.rows() * (long) this.columns()) < (Integer.MAX_VALUE - 2)) { + newRawData = new DenseDoubleMatrix2D(this.rows(), this.columns()); + } else { + newRawData = new DenseLargeDoubleMatrix2D(this.rows(), this.columns()); + } + + for (Map.Entry ent : mappingIndex.entrySet()) { + int pos = this.getHashRows().get(ent.getKey()); + for (int s = 0; s < this.columns(); ++s) { + newRawData.set(ent.getValue(), s, this.getMatrix().get(pos, s)); + } + } + this.setHashRows(mappingIndex); + this.setMatrix(newRawData); + } + + } + + public void reorderCols(LinkedHashMap mappingIndex) { + boolean equal = compareHashCols(mappingIndex, this.hashCols); + if (!equal) { + DoubleMatrix2D newRawData; + if ((this.rows() * (long) this.columns()) < (Integer.MAX_VALUE - 2)) { + newRawData = new DenseDoubleMatrix2D(this.rows(), this.columns()); + } else { + newRawData = new DenseLargeDoubleMatrix2D(this.rows(), this.columns()); + } + + for (Map.Entry ent : mappingIndex.entrySet()) { + int pos = this.getHashCols().get(ent.getKey()); + for (int p = 0; p < this.rows(); ++p) { + newRawData.set(p, ent.getValue(), this.getMatrix().get(p, pos)); + } + } + + this.setHashCols(mappingIndex); + this.setMatrix(newRawData); + } + } + + public DoubleMatrixDataset viewDice() { + return new DoubleMatrixDataset(matrix.viewDice(), hashCols, hashRows); + } + + private boolean compareHashCols(LinkedHashMap mappingIndex, LinkedHashMap originalHash) { + + for (Entry entry : mappingIndex.entrySet()) { + if (entry.getValue() != originalHash.get(entry.getKey())) { + return false; + } + } + return true; + } + + private boolean compareHashRows(LinkedHashMap mappingIndex, LinkedHashMap originalHash) { + + for (Entry entry : mappingIndex.entrySet()) { + if (entry.getValue() != originalHash.get(entry.getKey())) { + return false; + } + } + return true; + } + + /** + * Set a element of the dataset. + * + * @param rowName + * @param columnName + * @param value + */ + public void setElement(R rowName, C columnName, double value) { + + Integer row = hashRows.get(rowName); + Integer column = hashCols.get(columnName); + + if (row != null && column != null) { + matrix.setQuick(row, column, value); + } else { + if (row == null) { + throw new NoSuchElementException("Row not found: " + rowName.toString()); + } else { + throw new NoSuchElementException("Column not found: " + columnName.toString()); + } + + } + + } + + /** + * Get specific element. + * + * @param rowName + * @param columnName + * @return + */ + public double getElement(R rowName, C columnName) { + + Integer row = hashRows.get(rowName); + Integer column = hashCols.get(columnName); + + if (row != null && column != null) { + return matrix.getQuick(row, column); + } else { + if (row == null) { + throw new NoSuchElementException("Row not found: " + rowName.toString()); + } else { + throw new NoSuchElementException("Column not found: " + columnName.toString()); + } + } + } + + public DoubleMatrix1D getRow (R rowName){ + Integer row = hashRows.get(rowName); + if (row != null){ + return matrix.viewRow(row); + } else { + throw new NoSuchElementException("Row not found: " + rowName.toString()); + } + } + + /** + * Get specific element. + * + * @param row + * @param column + * @return + */ + public double getElement(int row, int column) { + + return matrix.get(row, column); + } + + public boolean containsRow(R rowId){ + return hashRows.containsKey(rowId); + } + + public boolean containsCol(C colId){ + return hashCols.containsKey(colId); + } } From 483861b9b42eedcdbe2630ee18ec14e4f6c478ee Mon Sep 17 00:00:00 2001 From: Patrick Deelen Date: Sat, 9 May 2015 14:27:48 +0200 Subject: [PATCH 038/143] Working on interaction direction --- .../AbstractRandomAccessGenotypeData.java | 76 ++-- .../genotype/RandomAccessGenotypeData.java | 5 + eqtl-mapping-pipeline/pom.xml | 301 ++++++++-------- ...InteractionAnalysisDetermineDirection.java | 337 ++++++++++++++++++ 4 files changed, 531 insertions(+), 188 deletions(-) create mode 100644 eqtl-mapping-pipeline/src/main/java/eqtlmappingpipeline/interactionanalysis/InteractionAnalysisDetermineDirection.java diff --git a/Genotype-IO/src/main/java/org/molgenis/genotype/AbstractRandomAccessGenotypeData.java b/Genotype-IO/src/main/java/org/molgenis/genotype/AbstractRandomAccessGenotypeData.java index 8770c6bc7..6335d7538 100644 --- a/Genotype-IO/src/main/java/org/molgenis/genotype/AbstractRandomAccessGenotypeData.java +++ b/Genotype-IO/src/main/java/org/molgenis/genotype/AbstractRandomAccessGenotypeData.java @@ -6,15 +6,14 @@ import org.molgenis.genotype.variant.GeneticVariant; import org.molgenis.genotype.variantFilter.VariantFilter; -public abstract class AbstractRandomAccessGenotypeData extends AbstractGenotypeData implements RandomAccessGenotypeData -{ +public abstract class AbstractRandomAccessGenotypeData extends AbstractGenotypeData implements RandomAccessGenotypeData { + + private HashMap fullVariantMap = null; + @Override - public Sequence getSequenceByName(String name) - { - for (Sequence sequence : getSequences()) - { - if (sequence.getName().equals(name)) - { + public Sequence getSequenceByName(String name) { + for (Sequence sequence : getSequences()) { + if (sequence.getName().equals(name)) { return sequence; } } @@ -23,14 +22,11 @@ public Sequence getSequenceByName(String name) } @Override - public GeneticVariant getSnpVariantByPos(String seqName, int startPos) - { + public GeneticVariant getSnpVariantByPos(String seqName, int startPos) { Iterable variants = getVariantsByPos(seqName, startPos); - for (GeneticVariant variant : variants) - { - if (variant.isSnp()) - { + for (GeneticVariant variant : variants) { + if (variant.isSnp()) { // only one SNP possible per position. Returning this SNP only return variant; } @@ -42,59 +38,63 @@ public GeneticVariant getSnpVariantByPos(String seqName, int startPos) @Override public HashMap getVariantIdMap() { - return getVariantIdMap(null); + + if (fullVariantMap == null) { + fullVariantMap = getVariantIdMap(null); + } + return fullVariantMap; + } + @Override + public void clearVariantIdMap(){ + fullVariantMap = null; + } + @Override public HashMap getVariantIdMap(VariantFilter filter) { - + HashMap variantIdMap = new HashMap(); - - for(GeneticVariant variant : this){ - if( variant.getVariantId().getPrimairyId() != null && !variant.getPrimaryVariantId().equals("") && (filter == null || filter.doesVariantPassFilter(variant))){ + + for (GeneticVariant variant : this) { + if (variant.getVariantId().getPrimairyId() != null && !variant.getPrimaryVariantId().equals("") && (filter == null || filter.doesVariantPassFilter(variant))) { variantIdMap.put(variant.getPrimaryVariantId(), variant); } } - + return variantIdMap; - + } @Override - public Iterator iterator() - { + public Iterator iterator() { return new GeneticVariantsIterator(this); } - private static class GeneticVariantsIterator implements Iterator - { + private static class GeneticVariantsIterator implements Iterator { + private Iterator seqNames; private Iterator seqGeneticVariants; private RandomAccessGenotypeData randomAccessGenotypeData; - public GeneticVariantsIterator(RandomAccessGenotypeData randomAccessGenotypeData) - { + public GeneticVariantsIterator(RandomAccessGenotypeData randomAccessGenotypeData) { seqNames = randomAccessGenotypeData.getSeqNames().iterator(); seqGeneticVariants = randomAccessGenotypeData.getSequenceGeneticVariants(seqNames.next()).iterator(); this.randomAccessGenotypeData = randomAccessGenotypeData; } @Override - public boolean hasNext() - { + public boolean hasNext() { return seqGeneticVariants.hasNext() || seqNames.hasNext(); } @Override - public GeneticVariant next() - { - if (seqGeneticVariants.hasNext()) - { + public GeneticVariant next() { + if (seqGeneticVariants.hasNext()) { return seqGeneticVariants.next(); } - if (seqNames.hasNext()) - { + if (seqNames.hasNext()) { seqGeneticVariants = randomAccessGenotypeData.getSequenceGeneticVariants(seqNames.next()).iterator(); return seqGeneticVariants.next(); } @@ -103,12 +103,8 @@ public GeneticVariant next() } @Override - public void remove() - { + public void remove() { throw new UnsupportedOperationException(); } - - - } } diff --git a/Genotype-IO/src/main/java/org/molgenis/genotype/RandomAccessGenotypeData.java b/Genotype-IO/src/main/java/org/molgenis/genotype/RandomAccessGenotypeData.java index 68309c1fe..2cdccd06f 100644 --- a/Genotype-IO/src/main/java/org/molgenis/genotype/RandomAccessGenotypeData.java +++ b/Genotype-IO/src/main/java/org/molgenis/genotype/RandomAccessGenotypeData.java @@ -81,6 +81,11 @@ public interface RandomAccessGenotypeData extends GenotypeData { */ HashMap getVariantIdMap(VariantFilter filter); + /** + * Variant ID map without filter is saved as cache, use this function to clear this cache. + */ + void clearVariantIdMap(); + /** * Get a HashMap with the variants that have a primairy ID. * diff --git a/eqtl-mapping-pipeline/pom.xml b/eqtl-mapping-pipeline/pom.xml index e83281b89..db8281179 100644 --- a/eqtl-mapping-pipeline/pom.xml +++ b/eqtl-mapping-pipeline/pom.xml @@ -1,151 +1,156 @@ - - nl.systemsgenetics - systemsgenetics - 1.0.2-SNAPSHOT - - eqtl-mapping-pipeline - 1.3.4-SNAPSHOT - jar - 4.0.0 - - - nl.systemsgenetics - genetica-libraries - 1.0.6-SNAPSHOT - - - log4j - log4j - 1.2.17 - - - nl.systemsgenetics - Genotype-IO - 1.0.1 - - - net.sf.trove4j - trove4j - 3.0.3 - - - commons-cli - commons-cli - 1.2 - - - commons-beanutils - commons-beanutils - 1.8.3 - - - commons-codec - commons-codec - 1.5 - - - commons-digester - commons-digester - 2.0 - - - net.sourceforge.parallelcolt - parallelcolt - 0.10.0 - - - ${project.groupId} - imputation-tool - 1.0.3 - - - net.rforge - Rserve - 0.6-8.1 - - - org.testng - testng - 6.5.2 - test - - - net.sf.opencsv - opencsv - 2.3 - - - - - - src/main/resources - true - - **/version.properties - - - - eqtl-mapping-pipeline-${project.version} - - - - org.apache.maven.plugins - maven-assembly-plugin - 2.4 - - - - - src/main/assembly/assembly.xml - - - - - make-assembly - package - - single - - - - - - - org.apache.maven.plugins - maven-jar-plugin - 2.3.1 - - - - - - - - true - - lib/ - - eqtlmappingpipeline.Main - - - - - - org.apache.maven.plugins - maven-compiler-plugin - 2.3.2 - - 1.7 - 1.7 - - - - + xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> + + nl.systemsgenetics + systemsgenetics + 1.0.2-SNAPSHOT + + eqtl-mapping-pipeline + 1.3.4-SNAPSHOT + jar + 4.0.0 + + + nl.systemsgenetics + genetica-libraries + 1.0.6-SNAPSHOT + + + log4j + log4j + 1.2.17 + + + nl.systemsgenetics + Genotype-IO + 1.0.1 + + + net.sf.trove4j + trove4j + 3.0.3 + + + commons-cli + commons-cli + 1.2 + + + commons-beanutils + commons-beanutils + 1.8.3 + + + commons-codec + commons-codec + 1.5 + + + commons-digester + commons-digester + 2.0 + + + net.sourceforge.parallelcolt + parallelcolt + 0.10.0 + + + ${project.groupId} + imputation-tool + 1.0.3 + + + net.rforge + Rserve + 0.6-8.1 + + + org.testng + testng + 6.5.2 + test + + + net.sf.opencsv + opencsv + 2.3 + + + org.apache.commons + commons-collections4 + 4.0 + + + + + + src/main/resources + true + + **/version.properties + + + + eqtl-mapping-pipeline-${project.version} + + + + org.apache.maven.plugins + maven-assembly-plugin + 2.4 + + + + + src/main/assembly/assembly.xml + + + + + make-assembly + package + + single + + + + + + + org.apache.maven.plugins + maven-jar-plugin + 2.3.1 + + + + + + + + true + + lib/ + + eqtlmappingpipeline.Main + + + + + + org.apache.maven.plugins + maven-compiler-plugin + 2.3.2 + + 1.7 + 1.7 + + + + \ No newline at end of file diff --git a/eqtl-mapping-pipeline/src/main/java/eqtlmappingpipeline/interactionanalysis/InteractionAnalysisDetermineDirection.java b/eqtl-mapping-pipeline/src/main/java/eqtlmappingpipeline/interactionanalysis/InteractionAnalysisDetermineDirection.java new file mode 100644 index 000000000..0da9e9ef1 --- /dev/null +++ b/eqtl-mapping-pipeline/src/main/java/eqtlmappingpipeline/interactionanalysis/InteractionAnalysisDetermineDirection.java @@ -0,0 +1,337 @@ +package eqtlmappingpipeline.interactionanalysis; + +import gnu.trove.list.array.TDoubleArrayList; +import java.io.BufferedReader; +import java.io.FileInputStream; +import java.io.IOException; +import java.io.InputStreamReader; +import java.util.Collections; +import java.util.HashMap; +import java.util.HashSet; +import java.util.Iterator; +import java.util.LinkedHashSet; +import java.util.Map; +import org.apache.commons.cli.CommandLine; +import org.apache.commons.cli.CommandLineParser; +import org.apache.commons.cli.HelpFormatter; +import org.apache.commons.cli.OptionBuilder; +import org.apache.commons.cli.Options; +import org.apache.commons.cli.ParseException; +import org.apache.commons.cli.PosixParser; +import org.apache.commons.collections4.BidiMap; +import org.apache.commons.collections4.bidimap.DualHashBidiMap; +import org.apache.commons.lang3.StringUtils; +import org.apache.commons.math3.random.Well19937c; +import org.apache.commons.math3.stat.correlation.SpearmansCorrelation; +import org.apache.commons.math3.stat.ranking.NaNStrategy; +import org.apache.commons.math3.stat.ranking.NaturalRanking; +import org.apache.commons.math3.stat.ranking.RankingAlgorithm; +import org.apache.log4j.Logger; +import org.molgenis.genotype.Allele; +import org.molgenis.genotype.Alleles; +import org.molgenis.genotype.GenotypeDataException; +import org.molgenis.genotype.GenotypeInfo; +import org.molgenis.genotype.RandomAccessGenotypeData; +import org.molgenis.genotype.RandomAccessGenotypeDataReaderFormats; +import org.molgenis.genotype.multipart.IncompatibleMultiPartGenotypeDataException; +import org.molgenis.genotype.tabix.TabixFileNotFoundException; +import org.molgenis.genotype.variant.GeneticVariant; +import umcg.genetica.math.matrix2.DoubleMatrixDataset; + +/** + * + * @author Patrick Deelen + */ +public class InteractionAnalysisDetermineDirection { + + private final RandomAccessGenotypeData genotypeData; + private final DoubleMatrixDataset expressionData; + private final DoubleMatrixDataset covariatesData; + private final BidiMap gte; + private final HashMap variantIdMap; + private static final RankingAlgorithm COV_RANKER = new NaturalRanking(NaNStrategy.FAILED, new Well19937c(1)); + private static final SpearmansCorrelation spearmanCalculator = new SpearmansCorrelation(); + private static final Options OPTIONS; + private static Logger LOGGER; + + static { + + LOGGER = Logger.getLogger(GenotypeInfo.class); + + OPTIONS = new Options(); + + OptionBuilder.withArgName("basePath"); + OptionBuilder.hasArgs(); + OptionBuilder.withDescription("The genotype"); + OptionBuilder.withLongOpt("genotypes"); + OptionBuilder.isRequired(); + OPTIONS.addOption(OptionBuilder.create("g")); + + OptionBuilder.withArgName("format"); + OptionBuilder.hasArg(); + OptionBuilder.withDescription("The genotype data format. If not defined will attempt to automatically select the first matching dataset on the specified path\n" + + "* PED_MAP - plink PED MAP files.\n" + + "* PLINK_BED - plink BED BIM FAM files.\n" + + "* VCF - bgziped vcf with tabix index file\n" + + "* VCFFOLDER - matches all bgziped vcf files + tabix index in a folder\n" + + "* SHAPEIT2 - shapeit2 phased haplotypes .haps & .sample\n" + + "* GEN - Oxford .gen & .sample\n" + + "* TRITYPER - TriTyper format folder"); + OptionBuilder.withLongOpt("genotypesFormat"); + OPTIONS.addOption(OptionBuilder.create("G")); + + OptionBuilder.withArgName("path"); + OptionBuilder.hasArg(); + OptionBuilder.withDescription("Expression data"); + OptionBuilder.withLongOpt("expression"); + OptionBuilder.isRequired(); + OPTIONS.addOption(OptionBuilder.create("e")); + + OptionBuilder.withArgName("path"); + OptionBuilder.hasArg(); + OptionBuilder.withDescription("Covariate data"); + OptionBuilder.withLongOpt("covariates"); + OptionBuilder.isRequired(); + OPTIONS.addOption(OptionBuilder.create("c")); + + OptionBuilder.withArgName("path"); + OptionBuilder.hasArg(); + OptionBuilder.withDescription("Genotype to expression coupling"); + OptionBuilder.withLongOpt("gte"); + OptionBuilder.isRequired(); + OPTIONS.addOption(OptionBuilder.create("gte")); + + OptionBuilder.withArgName("path"); + OptionBuilder.hasArg(); + OptionBuilder.withDescription("Query variant gene covariate. No header"); + OptionBuilder.withLongOpt("query"); + OptionBuilder.isRequired(); + OPTIONS.addOption(OptionBuilder.create("q")); + + } + + public static void main(String[] args) throws IOException { + + CommandLineParser parser = new PosixParser(); + final CommandLine commandLine; + try { + commandLine = parser.parse(OPTIONS, args, false); + } catch (ParseException ex) { + System.err.println("Invalid command line arguments: " + ex.getMessage()); + System.err.println(); + new HelpFormatter().printHelp(" ", OPTIONS); + System.exit(1); + return; + } + + final String[] genotypePath = commandLine.getOptionValues("g"); + final RandomAccessGenotypeDataReaderFormats genotypeFormat; + + try { + if (commandLine.hasOption("G")) { + genotypeFormat = RandomAccessGenotypeDataReaderFormats.valueOf(commandLine.getOptionValue("G").toUpperCase()); + } else { + if (genotypePath[0].endsWith(".vcf")) { + System.err.println("Only vcf.gz is supported. Please see manual on how to do create a vcf.gz file."); + System.exit(1); + return; + } + try { + genotypeFormat = RandomAccessGenotypeDataReaderFormats.matchFormatToPath(genotypePath[0]); + } catch (GenotypeDataException e) { + System.err.println("Unable to determine input 1 type based on specified path. Please specify --G"); + System.exit(1); + return; + } + } + } catch (IllegalArgumentException e) { + System.err.println("Error parsing --G \"" + commandLine.getOptionValue("G") + "\" is not a valid input data format"); + System.exit(1); + return; + } + + final String expressionDataPath = commandLine.getOptionValue("e"); + final String covariateDataPath = commandLine.getOptionValue("c"); + final String gtePath = commandLine.getOptionValue("gte"); + final String gueryPath = commandLine.getOptionValue("q"); + + System.out.println("Genotype data: " + genotypePath); + System.out.println("Genotype data format: " + genotypeFormat); + System.out.println("Expression data: " + expressionDataPath); + System.out.println("Covariate data: " + covariateDataPath); + System.out.println("Gte data: " + gtePath); + System.out.println("Query: " + gueryPath); + + final RandomAccessGenotypeData genotypeData; + + try { + genotypeData = genotypeFormat.createFilteredGenotypeData(genotypePath, 100, null, null, null, 0.8); + } catch (TabixFileNotFoundException e) { + LOGGER.fatal("Tabix file not found for input data at: " + e.getPath() + "\n" + + "Please see README on how to create a tabix file"); + System.exit(1); + return; + } catch (IOException e) { + LOGGER.fatal("Error reading input data: " + e.getMessage(), e); + System.exit(1); + return; + } catch (IncompatibleMultiPartGenotypeDataException e) { + LOGGER.fatal("Error combining the impute genotype data files: " + e.getMessage(), e); + System.exit(1); + return; + } catch (GenotypeDataException e) { + LOGGER.fatal("Error reading input data: " + e.getMessage(), e); + System.exit(1); + return; + } + + System.out.println("Genotype data loaded for " + genotypeData.getSampleNames().length + " individuals"); + + final DoubleMatrixDataset expressionData = DoubleMatrixDataset.loadDoubleTextData(expressionDataPath, "\t"); + + System.out.println("Loaded expression data for: " + expressionData.rows() + " genes and " + expressionData.columns() + " individuals"); + + final DoubleMatrixDataset covariatesData = DoubleMatrixDataset.loadDoubleTextData(covariateDataPath, "\t"); + + System.out.println("Loaded covariate data for: " + expressionData.rows() + " genes and " + expressionData.columns() + " individuals"); + + final BidiMap gte = loadGte(gtePath); + + } + + private static BidiMap loadGte(String gtePath) throws IOException { + + BufferedReader reader = new BufferedReader(new InputStreamReader(new FileInputStream(gtePath), "UTF-8")); + + String line; + + BidiMap gte = new DualHashBidiMap(); + + while ((line = reader.readLine()) != null) { + String[] elements = StringUtils.split(line, '\t'); + gte.put(elements[0], elements[1]); + } + + return gte; + } + + public InteractionAnalysisDetermineDirection(RandomAccessGenotypeData genotypeData, DoubleMatrixDataset expressionData, DoubleMatrixDataset covariatesData, BidiMap gte) { + this.genotypeData = genotypeData; + this.expressionData = expressionData; + this.covariatesData = covariatesData; + this.gte = gte; + this.variantIdMap = genotypeData.getVariantIdMap(); + + HashSet genotypedSamples = new HashSet(); + Collections.addAll(genotypedSamples, genotypeData.getSampleNames()); + + for (Iterator> it = gte.entrySet().iterator(); it.hasNext();) { + Map.Entry gteEntry = it.next(); + + if (!genotypedSamples.contains(gteEntry.getKey())) { + it.remove(); + } + + if (!expressionData.containsCol(gteEntry.getValue())) { + it.remove(); + } + + if (!covariatesData.containsCol(gteEntry.getValue())) { + it.remove(); + } + + } + + System.out.println("Samples with: genotypes, expression & covariate data: " + gte.size()); + + } + + public double calculateEffectDifference(String snpId, String geneName, String covariateName, Allele assessedAllele, double fractionOfSamplesPerGroup) { + + if (!variantIdMap.containsKey(snpId)) { + return Double.NaN; + } + + if (!expressionData.containsRow(geneName)) { + return Double.NaN; + } + + if (!covariatesData.containsRow(covariateName)) { + return Double.NaN; + } + + if (fractionOfSamplesPerGroup <= 0 || fractionOfSamplesPerGroup >= 1) { + throw new RuntimeException("Fraction must be between 0 and 1"); + } + + GeneticVariant variant = variantIdMap.get(snpId); + Alleles variantAlleles = variant.getVariantAlleles(); + + if (!variantAlleles.contains(assessedAllele)) { + return Double.NaN; + } + + if (variantAlleles.getAlleleCount() != 2) { + return Double.NaN; + } + + float[] dosagesAll = variant.getSampleDosages(); + String[] genotypedSamples = genotypeData.getSampleNames(); + + LinkedHashSet includedGenotypedSamples = new LinkedHashSet<>(); + TDoubleArrayList dosages = new TDoubleArrayList(dosagesAll.length); + + for (int i = 0; i < dosagesAll.length; ++i) { + if (dosagesAll[i] >= 0 && gte.containsKey(genotypedSamples[i])) { + includedGenotypedSamples.add(genotypedSamples[i]); + dosages.add(dosagesAll[i]); + } + } + + System.out.println("Included samples: " + includedGenotypedSamples.size()); + + double[] expressionLevels = new double[includedGenotypedSamples.size()]; + double[] covariateLevels = new double[includedGenotypedSamples.size()]; + + int s = 0; + for (String genotypeSample : includedGenotypedSamples) { + expressionLevels[s] = expressionData.getElement(geneName, gte.get(genotypeSample)); + covariateLevels[s] = covariatesData.getElement(covariateName, gte.get(genotypeSample)); + ++s; + } + + if (assessedAllele != variantAlleles.get(0)) { + for (int i = 0; i < dosages.size(); ++i) { + dosages.setQuick(i, dosages.getQuick(i) * -1); + } + } + + double[] covariateRanks = COV_RANKER.rank(covariateLevels); + + int samplesPerGroup = (int) Math.floor(covariateRanks.length * fractionOfSamplesPerGroup); + + System.out.println("Samples per group: " + samplesPerGroup); + + double[] dosagesLow = new double[samplesPerGroup]; + double[] expressionLow = new double[samplesPerGroup]; + + double[] dosagesHigh = new double[samplesPerGroup]; + double[] expressionHigh = new double[samplesPerGroup]; + + for (int i = 0; i < samplesPerGroup; ++i) { + dosagesLow[i] = dosages.get((int) covariateRanks[i]); + expressionLow[i] = expressionLevels[(int) covariateRanks[i]]; + dosagesHigh[i] = dosages.get((int) covariateRanks[covariateRanks.length - 1 - i]); + expressionHigh[i] = expressionLevels[(int) covariateRanks[covariateRanks.length - 1 - i]]; + } + + double rhoLow = spearmanCalculator.correlation(dosagesLow, expressionLow); + double rhoHigh = spearmanCalculator.correlation(dosagesHigh, expressionHigh); + + System.out.println("rho low:" + rhoLow); + System.out.println("rho high:" + rhoHigh); + + return rhoHigh - rhoLow; + + } +} From f47ed4522345183311918555263c569c9cd7a5bc Mon Sep 17 00:00:00 2001 From: Patrick Deelen Date: Sun, 10 May 2015 22:17:24 +0200 Subject: [PATCH 039/143] fix in settings.xml --- .../src/main/scripts/settings.xml | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/eqtl-mapping-pipeline/src/main/scripts/settings.xml b/eqtl-mapping-pipeline/src/main/scripts/settings.xml index 6f1053fed..420e86859 100644 --- a/eqtl-mapping-pipeline/src/main/scripts/settings.xml +++ b/eqtl-mapping-pipeline/src/main/scripts/settings.xml @@ -1,7 +1,7 @@ - - + + 0.95 0.0001 0.05 @@ -20,8 +20,8 @@ fdr 0.05 - probe-level - false + probe-level + false 100 @@ -36,15 +36,15 @@ - + false false true - + - + Dataset1 From 92dae98cacc004e5d1c5faaa3d7d73531d948fef Mon Sep 17 00:00:00 2001 From: Patrick Deelen Date: Sun, 17 May 2015 21:45:04 +0200 Subject: [PATCH 040/143] Determine direction tool --- ...InteractionAnalysisDetermineDirection.java | 95 +++++++++++++++---- 1 file changed, 75 insertions(+), 20 deletions(-) diff --git a/eqtl-mapping-pipeline/src/main/java/eqtlmappingpipeline/interactionanalysis/InteractionAnalysisDetermineDirection.java b/eqtl-mapping-pipeline/src/main/java/eqtlmappingpipeline/interactionanalysis/InteractionAnalysisDetermineDirection.java index 0da9e9ef1..f52ad47d5 100644 --- a/eqtl-mapping-pipeline/src/main/java/eqtlmappingpipeline/interactionanalysis/InteractionAnalysisDetermineDirection.java +++ b/eqtl-mapping-pipeline/src/main/java/eqtlmappingpipeline/interactionanalysis/InteractionAnalysisDetermineDirection.java @@ -1,8 +1,12 @@ package eqtlmappingpipeline.interactionanalysis; +import au.com.bytecode.opencsv.CSVReader; +import au.com.bytecode.opencsv.CSVWriter; import gnu.trove.list.array.TDoubleArrayList; import java.io.BufferedReader; import java.io.FileInputStream; +import java.io.FileReader; +import java.io.FileWriter; import java.io.IOException; import java.io.InputStreamReader; import java.util.Collections; @@ -79,7 +83,7 @@ public class InteractionAnalysisDetermineDirection { + "* TRITYPER - TriTyper format folder"); OptionBuilder.withLongOpt("genotypesFormat"); OPTIONS.addOption(OptionBuilder.create("G")); - + OptionBuilder.withArgName("path"); OptionBuilder.hasArg(); OptionBuilder.withDescription("Expression data"); @@ -93,21 +97,35 @@ public class InteractionAnalysisDetermineDirection { OptionBuilder.withLongOpt("covariates"); OptionBuilder.isRequired(); OPTIONS.addOption(OptionBuilder.create("c")); - + OptionBuilder.withArgName("path"); OptionBuilder.hasArg(); OptionBuilder.withDescription("Genotype to expression coupling"); OptionBuilder.withLongOpt("gte"); OptionBuilder.isRequired(); OPTIONS.addOption(OptionBuilder.create("gte")); - + OptionBuilder.withArgName("path"); OptionBuilder.hasArg(); - OptionBuilder.withDescription("Query variant gene covariate. No header"); + OptionBuilder.withDescription("Query variant gene covariate assessedAllele. No header"); OptionBuilder.withLongOpt("query"); OptionBuilder.isRequired(); OPTIONS.addOption(OptionBuilder.create("q")); + OptionBuilder.withArgName("path"); + OptionBuilder.hasArg(); + OptionBuilder.withDescription("Output file"); + OptionBuilder.withLongOpt("output"); + OptionBuilder.isRequired(); + OPTIONS.addOption(OptionBuilder.create("o")); + + OptionBuilder.withArgName("double"); + OptionBuilder.hasArg(); + OptionBuilder.withDescription("Fraction of tail of either end of covarate to use."); + OptionBuilder.withLongOpt("fraction"); + OptionBuilder.isRequired(); + OPTIONS.addOption(OptionBuilder.create("f")); + } public static void main(String[] args) throws IOException { @@ -149,19 +167,23 @@ public static void main(String[] args) throws IOException { System.exit(1); return; } - + final String expressionDataPath = commandLine.getOptionValue("e"); final String covariateDataPath = commandLine.getOptionValue("c"); final String gtePath = commandLine.getOptionValue("gte"); - final String gueryPath = commandLine.getOptionValue("q"); - + final String queryPath = commandLine.getOptionValue("q"); + final String outputPath = commandLine.getOptionValue("o"); + final double fractionToUse = Double.parseDouble(commandLine.getOptionValue("f")); + System.out.println("Genotype data: " + genotypePath); System.out.println("Genotype data format: " + genotypeFormat); System.out.println("Expression data: " + expressionDataPath); System.out.println("Covariate data: " + covariateDataPath); System.out.println("Gte data: " + gtePath); - System.out.println("Query: " + gueryPath); - + System.out.println("Query: " + queryPath); + System.out.println("Output: " + outputPath); + System.out.println("Outer fractions to use: " + fractionToUse); + final RandomAccessGenotypeData genotypeData; try { @@ -186,32 +208,65 @@ public static void main(String[] args) throws IOException { } System.out.println("Genotype data loaded for " + genotypeData.getSampleNames().length + " individuals"); - + final DoubleMatrixDataset expressionData = DoubleMatrixDataset.loadDoubleTextData(expressionDataPath, "\t"); - + System.out.println("Loaded expression data for: " + expressionData.rows() + " genes and " + expressionData.columns() + " individuals"); - + final DoubleMatrixDataset covariatesData = DoubleMatrixDataset.loadDoubleTextData(covariateDataPath, "\t"); - + System.out.println("Loaded covariate data for: " + expressionData.rows() + " genes and " + expressionData.columns() + " individuals"); - + final BidiMap gte = loadGte(gtePath); + + InteractionAnalysisDetermineDirection directionTool = new InteractionAnalysisDetermineDirection(genotypeData, expressionData, covariatesData, gte); + + CSVReader reader = new CSVReader(new FileReader(queryPath), '\t', '\0', 1); + CSVWriter writer = new CSVWriter(new FileWriter(outputPath), '\t', CSVWriter.NO_QUOTE_CHARACTER); + + String[] outputLine = new String[5]; + + String[] nextLine; + while ((nextLine = reader.readNext()) != null) { + final String variant = nextLine[0]; + final String gene = nextLine[1]; + final String covariate = nextLine[2]; + final Allele assessedAllele = Allele.create(nextLine[3]); + + final double direction = directionTool.calculateEffectDifference(variant, gene, covariate, assessedAllele, fractionToUse); + + int c = 0; + outputLine[c++] = variant; + outputLine[c++] = gene; + outputLine[c++] = covariate; + outputLine[c++] = assessedAllele.getAlleleAsString(); + outputLine[c++] = String.valueOf(direction); + writer.writeNext(outputLine); + + } + writer.close(); + reader.close(); + System.out.println("Done"); + } - + private static BidiMap loadGte(String gtePath) throws IOException { - + BufferedReader reader = new BufferedReader(new InputStreamReader(new FileInputStream(gtePath), "UTF-8")); - + String line; - + BidiMap gte = new DualHashBidiMap(); - + while ((line = reader.readLine()) != null) { String[] elements = StringUtils.split(line, '\t'); + if (elements.length != 2) { + throw new RuntimeException("Error in GTE file line: " + line); + } gte.put(elements[0], elements[1]); } - + return gte; } From eafc76fb44470b55f5701fdfd2db5fab598baedb Mon Sep 17 00:00:00 2001 From: Bonder-MJ Date: Tue, 5 May 2015 22:56:39 +0200 Subject: [PATCH 041/143] hiC annotator --- .../util/HighCTransQTLAnnotator.java | 457 ++++++++++++++++++ 1 file changed, 457 insertions(+) create mode 100644 eqtl-mapping-pipeline/src/main/java/eqtlmappingpipeline/util/HighCTransQTLAnnotator.java diff --git a/eqtl-mapping-pipeline/src/main/java/eqtlmappingpipeline/util/HighCTransQTLAnnotator.java b/eqtl-mapping-pipeline/src/main/java/eqtlmappingpipeline/util/HighCTransQTLAnnotator.java new file mode 100644 index 000000000..1f62edc5b --- /dev/null +++ b/eqtl-mapping-pipeline/src/main/java/eqtlmappingpipeline/util/HighCTransQTLAnnotator.java @@ -0,0 +1,457 @@ +/* + * To change this template, choose Tools | Templates + * and open the template in the editor. + */ +package eqtlmappingpipeline.util; + +import java.io.BufferedReader; +import java.io.FileInputStream; +import java.io.IOException; +import java.io.InputStreamReader; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.LinkedHashSet; +import java.util.regex.Pattern; +import org.apache.commons.lang3.StringUtils; +import umcg.genetica.containers.Pair; +import umcg.genetica.io.text.TextFile; +import umcg.genetica.io.trityper.EQTL; +import umcg.genetica.io.trityper.QTLTextFile; + +/** + * + * @author MarcJan + */ +class HighCTransQTLAnnotator { + + private static final Pattern SPLIT_TAB = Pattern.compile("\t"); + + public static void main(String[] args) throws IOException { + //"D:\\WebFolders\\OwnCloud\\AeroFS\\RP3_BIOS_Methylation\\meQTLs\\Trans_Pc22c_CisMeQTLc_meQTLs\\RegressedOut_CisEffects_New\\eQTLsFDR0.05-ProbeLevel_BsFiltered_And_Filtered.txt" + + String QTLfile = "D:\\WebFolders\\OwnCloud\\AeroFS\\RP3_BIOS_Methylation\\meQTLs\\Trans_Pc22c_CisMeQTLc_meQTLs\\RegressedOut_CisEffects_New\\eQTLsFDR0.05-ProbeLevel_BsFiltered&Filtered.txt"; + String proxyfile = "D:\\WebFolders\\OwnCloud\\AeroFS\\RP3_BIOS_Methylation\\meQTLs\\Trans_Pc22c_CisMeQTLc_meQTLs\\RegressedOut_CisEffects_New\\proxiesMeQTLSnps.txt"; + String QTLoutfile = "D:\\WebFolders\\OwnCloud\\AeroFS\\RP3_BIOS_Methylation\\meQTLs\\Trans_Pc22c_CisMeQTLc_meQTLs\\RegressedOut_CisEffects_New\\eQTLsFDR0.05-ProbeLevel_BsFiltered&Filtered_HiC_LD_annotated.txt"; + String folderHighC = "F:\\Contacts\\GM12878_combined_interchromosomal\\"; + String resolution = "1kb"; + String qualityCutOff = "E30"; //0 or E30 + String normMethod = null; //null / KRnorm / SQRTVCnorm / VCnorm + double minValueQuality = 0; + + boolean lowMemMode = true; + + if (!lowMemMode) { + addAnnotationToQTLOutput( + QTLfile, + proxyfile, + folderHighC, + resolution, + qualityCutOff, + normMethod, + minValueQuality, + QTLoutfile); + } else { + addAnnotationToQTLOutputLowMem( + QTLfile, + proxyfile, + folderHighC, + resolution, + qualityCutOff, + normMethod, + minValueQuality, + QTLoutfile); + } + } + + static void addAnnotationToQTLOutput(String in, String inProxies, String folderHighC, String resolution, String qualityCutOff, String normMethod, double minValue, String out) throws IOException { + QTLTextFile eqtlTextFile = new QTLTextFile(in, QTLTextFile.R); + + ArrayList qtls = eqtlTextFile.readList(); + + if (inProxies != null) { + qtls = includeProxyInfo(qtls, inProxies); + } + + HashMap>> contactBuffer = new HashMap>>(); + //Here we need to make a new Type to store the potentialy inflated files. + TextFile outWriter = new TextFile(out, TextFile.W); + for (EQTL eqtl : qtls) { + String chrProbe = String.valueOf(eqtl.getProbeChr()); + String chrSnp = String.valueOf(eqtl.getRsChr()); + +// System.out.println(chrProbe+"\t"+chrSnp); + if (chrProbe.equals(chrSnp)) { + //Here we need to check how to normalize and treat intra-chromosomal data. + continue; + } + + int posChrSmaller; + int posChrLarger; + + LinkedHashSet> interestRegions = null; + if (Integer.parseInt(chrProbe) < Integer.parseInt(chrSnp)) { + posChrSmaller = eqtl.getProbeChrPos(); + posChrLarger = eqtl.getRsChrPos(); + if (contactBuffer.containsKey("chr" + chrProbe + "_chr" + chrSnp)) { + interestRegions = contactBuffer.get("chr" + chrProbe + "_chr" + chrSnp); + } else { + String baseName = folderHighC + resolution + "_resolution_interchromosomal\\chr" + chrProbe + "_chr" + chrSnp + "\\MAPQG" + qualityCutOff; + String fileToReads = baseName + "\\chr" + chrProbe + "_" + chrSnp + "_1kb.RAWobserved"; +// System.out.println("Reading: " + fileToReads); + if (normMethod == null) { + interestRegions = readRawInterContactInformation(fileToReads, minValue); + } else { + interestRegions = readNormalizedInterContactInformation(fileToReads, baseName, normMethod, chrProbe, chrSnp, resolution, minValue); + } + contactBuffer.put("chr" + chrProbe + "_chr" + chrSnp, interestRegions); + } + } else { + posChrSmaller = eqtl.getRsChrPos(); + posChrLarger = eqtl.getProbeChrPos(); + if (contactBuffer.containsKey("chr" + chrSnp + "_chr" + chrProbe)) { + interestRegions = contactBuffer.get("chr" + chrSnp + "_chr" + chrProbe); + } else { + String baseName = folderHighC + resolution + "_resolution_interchromosomal\\chr" + chrSnp + "_chr" + chrProbe + "\\MAPQG" + qualityCutOff; + String fileToReads = baseName + "\\chr" + chrSnp + "_" + chrProbe + "_1kb.RAWobserved"; +// System.out.println("Reading: " + fileToReads); + if (normMethod == null) { + interestRegions = readRawInterContactInformation(fileToReads, minValue); + } else { + interestRegions = readNormalizedInterContactInformation(fileToReads, baseName, normMethod, chrSnp, chrProbe, resolution, minValue); + } + contactBuffer.put("chr" + chrSnp + "_chr" + chrProbe, interestRegions); + } + } + + if (determineContact(posChrSmaller, posChrLarger, interestRegions, getNumericResolution(resolution))) { + outWriter.writeln(eqtl.getRsName() + "\t" + eqtl.getProbe() + "\tContact"); + } else { + outWriter.writeln(eqtl.getRsName() + "\t" + eqtl.getProbe() + "\t-"); + } + } + outWriter.close(); + } + + static void addAnnotationToQTLOutputLowMem(String in, String inProxies, String folderHighC, String resolution, String qualityCutOff, String normMethod, double minValue, String out) throws IOException { + QTLTextFile eqtlTextFile = new QTLTextFile(in, QTLTextFile.R); + + ArrayList qtls = eqtlTextFile.readList(); + + if (inProxies != null) { + qtls = includeProxyInfo(qtls, inProxies); + } + + //Here we need to make a new Type to store the potentialy inflated files. + TextFile outWriter = new TextFile(out, TextFile.W); + for (EQTL eqtl : qtls) { + String chrProbe = String.valueOf(eqtl.getProbeChr()); + String chrSnp = String.valueOf(eqtl.getRsChr()); + +// System.out.println(chrProbe+"\t"+chrSnp); + if (chrProbe.equals(chrSnp)) { + //Here we need to check how to normalize and treat intra-chromosomal data. + continue; + } + + int posChrSmaller; + int posChrLarger; + + if (Integer.parseInt(chrProbe) < Integer.parseInt(chrSnp)) { + posChrSmaller = eqtl.getProbeChrPos(); + posChrLarger = eqtl.getRsChrPos(); + String baseName = folderHighC + resolution + "_resolution_interchromosomal\\chr" + chrProbe + "_chr" + chrSnp + "\\MAPQG" + qualityCutOff; + String fileToReads = baseName + "\\chr" + chrProbe + "_" + chrSnp + "_1kb.RAWobserved"; +// System.out.println("Reading: " + fileToReads); + if (normMethod == null) { + if (readRawInterContactInformationLowMem(fileToReads, minValue, posChrSmaller, posChrLarger, resolution)) { + outWriter.writeln(eqtl.getRsName() + "\t" + eqtl.getProbe() + "\tContact"); + } else { + outWriter.writeln(eqtl.getRsName() + "\t" + eqtl.getProbe() + "\t-"); + } + } else { + if (readNormalizedInterContactInformationLowMem(fileToReads, baseName, normMethod, chrProbe, chrSnp, posChrSmaller, posChrLarger, resolution, minValue)) { + outWriter.writeln(eqtl.getRsName() + "\t" + eqtl.getProbe() + "\tContact"); + } else { + outWriter.writeln(eqtl.getRsName() + "\t" + eqtl.getProbe() + "\t-"); + } + } + + } else { + posChrSmaller = eqtl.getRsChrPos(); + posChrLarger = eqtl.getProbeChrPos(); + + String baseName = folderHighC + resolution + "_resolution_interchromosomal\\chr" + chrSnp + "_chr" + chrProbe + "\\MAPQG" + qualityCutOff; + String fileToReads = baseName + "\\chr" + chrSnp + "_" + chrProbe + "_1kb.RAWobserved"; +// System.out.println("Reading: " + fileToReads); + if (normMethod == null) { + if (readRawInterContactInformationLowMem(fileToReads, minValue, posChrSmaller, posChrLarger, resolution)) { + outWriter.writeln(eqtl.getRsName() + "\t" + eqtl.getProbe() + "\tContact"); + } else { + outWriter.writeln(eqtl.getRsName() + "\t" + eqtl.getProbe() + "\t-"); + } + } else { + if (readNormalizedInterContactInformationLowMem(fileToReads, baseName, normMethod, chrSnp, chrProbe, posChrSmaller, posChrLarger, resolution, minValue)) { + outWriter.writeln(eqtl.getRsName() + "\t" + eqtl.getProbe() + "\tContact"); + } else { + outWriter.writeln(eqtl.getRsName() + "\t" + eqtl.getProbe() + "\t-"); + } + } + } + } + outWriter.close(); + } + + private static ArrayList includeProxyInfo(ArrayList qtls, String inProxies) throws IOException { + ArrayList newQtlList = new ArrayList(); + + TextFile readProxies = new TextFile(inProxies, TextFile.R); + + String line = readProxies.readLine(); +// System.out.println(line); + while ((line = readProxies.readLine()) != null) { +// System.out.println(line); + String[] lineParts = SPLIT_TAB.split(line); + String chr = lineParts[4]; + int chrPos = Integer.parseInt(lineParts[5]); + int chrNewPos = Integer.parseInt(lineParts[8]); + for (EQTL e : qtls) { + if (String.valueOf(e.getRsChr()).equals(chr) && e.getRsChrPos() == chrPos) { + EQTL newQtl = new EQTL(); + newQtl.setProbe(e.getProbe()); + newQtl.setProbeChr(e.getProbeChr()); + newQtl.setProbeChrPos(e.getProbeChrPos()); + + newQtl.setRsName(e.getRsName() + "-" + lineParts[1]); + newQtl.setRsChr(e.getRsChr()); + newQtl.setRsChrPos(chrNewPos); + newQtlList.add(newQtl); + } + } + } + + for (EQTL e : qtls) { + newQtlList.add(e); + } + + return newQtlList; + } + + //For example, here is a line from the 5kb chr1 MAPQGE30 raw observed contact matrix (GM12878_combined/5kb_resolution_intrachromosomal/chr1/MAPQGE30/chr1_5kb.RAWobserved): +//40000000 40100000 59.0 + private static LinkedHashSet> readRawInterContactInformation(String fileToReads, double minContactValue) throws IOException { + LinkedHashSet> chrContactInfo = new LinkedHashSet>(); + + BufferedReader input = new BufferedReader(new InputStreamReader(new FileInputStream(fileToReads), "UTF-8")); + + String row; + + while ((row = input.readLine()) != null) { + String[] parts = StringUtils.split(row, '\t'); + + int posChr1 = Integer.parseInt(parts[0]); + int posChr2 = Integer.parseInt(parts[1]); + double contact = Double.parseDouble(parts[2]); + if (contact >= minContactValue) { + chrContactInfo.add(new Pair(posChr1, posChr2)); + } + } + input.close(); + return chrContactInfo; + + } + + //For example, here is a line from the 5kb chr1 MAPQGE30 raw observed contact matrix (GM12878_combined/5kb_resolution_intrachromosomal/chr1/MAPQGE30/chr1_5kb.RAWobserved): + //40000000 40100000 59.0 + //To normalize this entry using the KR normalization vector, one would divide 59.0 by the 8001st line ((40000000/5000)+1=8001) and the 8021st line ((40100000/5000)+1=8021) + //of GM12878_combined/5kb_resolution_intrachromosomal/chr1/MAPQGE30/chr1_5kb.KRnorm. The 8001st line of the KR norm file is 1.2988778370674694; + //The 8021st line of the KR norm file is 1.6080499717941548. So the corresponding KR normalized entry for the entry above is 59.0/(1.2988778370674694*1.6080499717941548) + //or 28.24776973966101. + //If the KR normalization vector file is empty or all NaNs, then the KR algorithm didn’t converge on that particular matrix (likely due to sparsity of the matrix). + private static LinkedHashSet> readNormalizedInterContactInformation(String fileToRead, String baseName, String normMethod, String chrSmaller, String chrLarger, String resolution, double minContactValue) throws IOException { + + //ReadIn normalization chr1 + TextFile inputNormChr1 = new TextFile(baseName + "\\chr" + chrSmaller + "_" + resolution + "." + normMethod, TextFile.R); + ArrayList normFactorSmallerChr = inputNormChr1.readAsArrayList(); + inputNormChr1.close(); + + //ReadIn normalization chr2 + TextFile inputNormChr2 = new TextFile(baseName + "\\chr" + chrLarger + "_" + resolution + "." + normMethod, TextFile.R); + ArrayList normFactorLargerChr = inputNormChr2.readAsArrayList(); + + inputNormChr2.close(); + + LinkedHashSet> chrContactInfo = new LinkedHashSet>(); + + BufferedReader input = new BufferedReader(new InputStreamReader(new FileInputStream(fileToRead), "UTF-8")); + + String row; + + while ((row = input.readLine()) != null) { + String[] parts = StringUtils.split(row, '\t'); + + int posChr1 = Integer.parseInt(parts[0]); + int posChr2 = Integer.parseInt(parts[1]); + + String factor1Base = normFactorSmallerChr.get((posChr1 / getNumericResolution(resolution)) + 1); + String factor2Base = normFactorLargerChr.get((posChr2 / getNumericResolution(resolution)) + 1); + + double factor1; + double factor2; + + if (StringUtils.isNumeric(factor1Base) && StringUtils.isNumeric(factor2Base)) { + factor1 = Double.parseDouble(factor1Base); + factor2 = Double.parseDouble(factor2Base); + + double contact = Double.parseDouble(parts[2]) / (factor1 * factor2); + if (contact >= minContactValue) { + chrContactInfo.add(new Pair(posChr1, posChr2)); + } + + } + } + input.close(); + return chrContactInfo; + } + + private static boolean determineContact(int posChrSmaller, int posChrLarger, LinkedHashSet> interestRegions, int resolution) { + //Determine bin1 + //Starts counting at 0-resulution + int bin1 = posChrSmaller - (posChrSmaller % resolution); + + //Determine bin2 + int bin2 = posChrLarger - (posChrLarger % resolution); + + //See if bin1 and bin2 are in the file. + boolean contact = false; + + for (Pair entry : interestRegions) { + if (entry.getLeft() == bin1) { + if (entry.getRight() == bin2) { + contact = true; + break; + } else if (entry.getRight() > bin2) { + break; + } + } else if (entry.getLeft() > bin1) { + break; + } + } + return contact; + } + + private static boolean readRawInterContactInformationLowMem(String fileToReads, double minValue, int posChrSmaller, int posChrLarger, String resolution) throws IOException { + //Determine bin1 + //Starts counting at 0-resulution + int bin1 = posChrSmaller - (posChrSmaller % getNumericResolution(resolution)); + + //Determine bin2 + int bin2 = posChrLarger - (posChrLarger % getNumericResolution(resolution)); + + //See if bin1 and bin2 are in the file. + boolean contactFound = false; + + BufferedReader input = new BufferedReader(new InputStreamReader(new FileInputStream(fileToReads), "UTF-8")); + + String row; + + while ((row = input.readLine()) != null) { + String[] parts = StringUtils.split(row, '\t'); + + int posChr1 = Integer.parseInt(parts[0]); + if (posChr1 == bin1) { + int posChr2 = Integer.parseInt(parts[1]); + if (posChr2 == bin2) { + double contact = Double.parseDouble(parts[2]); + if (contact >= minValue) { + contactFound = true; + } + break; + } else if (posChr2 > bin2) { + break; + } + } else if (posChr1 > bin1) { + break; + } + + } + input.close(); + return contactFound; + } + + private static boolean readNormalizedInterContactInformationLowMem(String fileToRead, String baseName, String normMethod, String chrSmaller, String chrLarger, int posChrSmaller, int posChrLarger, String resolution, double minValue) throws IOException { + //Determine bin1 + //Starts counting at 0-resulution + int bin1 = posChrSmaller - (posChrSmaller % getNumericResolution(resolution)); + + //Determine bin2 + int bin2 = posChrLarger - (posChrLarger % getNumericResolution(resolution)); + + //ReadIn normalization chr1 + TextFile inputNormChr1 = new TextFile(baseName + "\\chr" + chrSmaller + "_" + resolution + "." + normMethod, TextFile.R); + ArrayList normFactorSmallerChr = inputNormChr1.readAsArrayList(); + inputNormChr1.close(); + + //ReadIn normalization chr2 + TextFile inputNormChr2 = new TextFile(baseName + "\\chr" + chrLarger + "_" + resolution + "." + normMethod, TextFile.R); + ArrayList normFactorLargerChr = inputNormChr2.readAsArrayList(); + + inputNormChr2.close(); + + LinkedHashSet> chrContactInfo = new LinkedHashSet>(); + + BufferedReader input = new BufferedReader(new InputStreamReader(new FileInputStream(fileToRead), "UTF-8")); + + String row; + + //See if bin1 and bin2 are in the file. + boolean contactFound = false; + + while ((row = input.readLine()) != null) { + String[] parts = StringUtils.split(row, '\t'); + + int posChr1 = Integer.parseInt(parts[0]); + if (posChr1 == bin1) { + int posChr2 = Integer.parseInt(parts[1]); + if (posChr2 == bin2) { + + String factor1Base = normFactorSmallerChr.get((posChr1 / getNumericResolution(resolution)) + 1); + String factor2Base = normFactorLargerChr.get((posChr2 / getNumericResolution(resolution)) + 1); + + double factor1; + double factor2; + + if (StringUtils.isNumeric(factor1Base) && StringUtils.isNumeric(factor2Base)) { + factor1 = Double.parseDouble(factor1Base); + factor2 = Double.parseDouble(factor2Base); + + double contact = Double.parseDouble(parts[2]) / (factor1 * factor2); + if (contact >= minValue) { + contactFound = true; + } + break; + } + + } else if (posChr2 > bin2) { + break; + } + } else if (posChr1 > bin1) { + break; + } + + } + input.close(); + return contactFound; + } + + private static int getNumericResolution(String resolution) { + if (resolution.equals("1kb")) { + return 1000; + } else if (resolution.equals("5kb")) { + return 5000; + } else { + System.out.println("\nError in resolution setting!\n"); + System.exit(-1); + } + return 0; + } +} From cbdd92de787afa71ec107556ed8125a40f89f46f Mon Sep 17 00:00:00 2001 From: Bonder-MJ Date: Wed, 6 May 2015 08:51:37 +0200 Subject: [PATCH 042/143] Revert "hiC annotator" This reverts commit af134d6dbc8277d4695dc0a7324913a7e2337c9b. --- .../util/HighCTransQTLAnnotator.java | 457 ------------------ 1 file changed, 457 deletions(-) delete mode 100644 eqtl-mapping-pipeline/src/main/java/eqtlmappingpipeline/util/HighCTransQTLAnnotator.java diff --git a/eqtl-mapping-pipeline/src/main/java/eqtlmappingpipeline/util/HighCTransQTLAnnotator.java b/eqtl-mapping-pipeline/src/main/java/eqtlmappingpipeline/util/HighCTransQTLAnnotator.java deleted file mode 100644 index 1f62edc5b..000000000 --- a/eqtl-mapping-pipeline/src/main/java/eqtlmappingpipeline/util/HighCTransQTLAnnotator.java +++ /dev/null @@ -1,457 +0,0 @@ -/* - * To change this template, choose Tools | Templates - * and open the template in the editor. - */ -package eqtlmappingpipeline.util; - -import java.io.BufferedReader; -import java.io.FileInputStream; -import java.io.IOException; -import java.io.InputStreamReader; -import java.util.ArrayList; -import java.util.HashMap; -import java.util.LinkedHashSet; -import java.util.regex.Pattern; -import org.apache.commons.lang3.StringUtils; -import umcg.genetica.containers.Pair; -import umcg.genetica.io.text.TextFile; -import umcg.genetica.io.trityper.EQTL; -import umcg.genetica.io.trityper.QTLTextFile; - -/** - * - * @author MarcJan - */ -class HighCTransQTLAnnotator { - - private static final Pattern SPLIT_TAB = Pattern.compile("\t"); - - public static void main(String[] args) throws IOException { - //"D:\\WebFolders\\OwnCloud\\AeroFS\\RP3_BIOS_Methylation\\meQTLs\\Trans_Pc22c_CisMeQTLc_meQTLs\\RegressedOut_CisEffects_New\\eQTLsFDR0.05-ProbeLevel_BsFiltered_And_Filtered.txt" - - String QTLfile = "D:\\WebFolders\\OwnCloud\\AeroFS\\RP3_BIOS_Methylation\\meQTLs\\Trans_Pc22c_CisMeQTLc_meQTLs\\RegressedOut_CisEffects_New\\eQTLsFDR0.05-ProbeLevel_BsFiltered&Filtered.txt"; - String proxyfile = "D:\\WebFolders\\OwnCloud\\AeroFS\\RP3_BIOS_Methylation\\meQTLs\\Trans_Pc22c_CisMeQTLc_meQTLs\\RegressedOut_CisEffects_New\\proxiesMeQTLSnps.txt"; - String QTLoutfile = "D:\\WebFolders\\OwnCloud\\AeroFS\\RP3_BIOS_Methylation\\meQTLs\\Trans_Pc22c_CisMeQTLc_meQTLs\\RegressedOut_CisEffects_New\\eQTLsFDR0.05-ProbeLevel_BsFiltered&Filtered_HiC_LD_annotated.txt"; - String folderHighC = "F:\\Contacts\\GM12878_combined_interchromosomal\\"; - String resolution = "1kb"; - String qualityCutOff = "E30"; //0 or E30 - String normMethod = null; //null / KRnorm / SQRTVCnorm / VCnorm - double minValueQuality = 0; - - boolean lowMemMode = true; - - if (!lowMemMode) { - addAnnotationToQTLOutput( - QTLfile, - proxyfile, - folderHighC, - resolution, - qualityCutOff, - normMethod, - minValueQuality, - QTLoutfile); - } else { - addAnnotationToQTLOutputLowMem( - QTLfile, - proxyfile, - folderHighC, - resolution, - qualityCutOff, - normMethod, - minValueQuality, - QTLoutfile); - } - } - - static void addAnnotationToQTLOutput(String in, String inProxies, String folderHighC, String resolution, String qualityCutOff, String normMethod, double minValue, String out) throws IOException { - QTLTextFile eqtlTextFile = new QTLTextFile(in, QTLTextFile.R); - - ArrayList qtls = eqtlTextFile.readList(); - - if (inProxies != null) { - qtls = includeProxyInfo(qtls, inProxies); - } - - HashMap>> contactBuffer = new HashMap>>(); - //Here we need to make a new Type to store the potentialy inflated files. - TextFile outWriter = new TextFile(out, TextFile.W); - for (EQTL eqtl : qtls) { - String chrProbe = String.valueOf(eqtl.getProbeChr()); - String chrSnp = String.valueOf(eqtl.getRsChr()); - -// System.out.println(chrProbe+"\t"+chrSnp); - if (chrProbe.equals(chrSnp)) { - //Here we need to check how to normalize and treat intra-chromosomal data. - continue; - } - - int posChrSmaller; - int posChrLarger; - - LinkedHashSet> interestRegions = null; - if (Integer.parseInt(chrProbe) < Integer.parseInt(chrSnp)) { - posChrSmaller = eqtl.getProbeChrPos(); - posChrLarger = eqtl.getRsChrPos(); - if (contactBuffer.containsKey("chr" + chrProbe + "_chr" + chrSnp)) { - interestRegions = contactBuffer.get("chr" + chrProbe + "_chr" + chrSnp); - } else { - String baseName = folderHighC + resolution + "_resolution_interchromosomal\\chr" + chrProbe + "_chr" + chrSnp + "\\MAPQG" + qualityCutOff; - String fileToReads = baseName + "\\chr" + chrProbe + "_" + chrSnp + "_1kb.RAWobserved"; -// System.out.println("Reading: " + fileToReads); - if (normMethod == null) { - interestRegions = readRawInterContactInformation(fileToReads, minValue); - } else { - interestRegions = readNormalizedInterContactInformation(fileToReads, baseName, normMethod, chrProbe, chrSnp, resolution, minValue); - } - contactBuffer.put("chr" + chrProbe + "_chr" + chrSnp, interestRegions); - } - } else { - posChrSmaller = eqtl.getRsChrPos(); - posChrLarger = eqtl.getProbeChrPos(); - if (contactBuffer.containsKey("chr" + chrSnp + "_chr" + chrProbe)) { - interestRegions = contactBuffer.get("chr" + chrSnp + "_chr" + chrProbe); - } else { - String baseName = folderHighC + resolution + "_resolution_interchromosomal\\chr" + chrSnp + "_chr" + chrProbe + "\\MAPQG" + qualityCutOff; - String fileToReads = baseName + "\\chr" + chrSnp + "_" + chrProbe + "_1kb.RAWobserved"; -// System.out.println("Reading: " + fileToReads); - if (normMethod == null) { - interestRegions = readRawInterContactInformation(fileToReads, minValue); - } else { - interestRegions = readNormalizedInterContactInformation(fileToReads, baseName, normMethod, chrSnp, chrProbe, resolution, minValue); - } - contactBuffer.put("chr" + chrSnp + "_chr" + chrProbe, interestRegions); - } - } - - if (determineContact(posChrSmaller, posChrLarger, interestRegions, getNumericResolution(resolution))) { - outWriter.writeln(eqtl.getRsName() + "\t" + eqtl.getProbe() + "\tContact"); - } else { - outWriter.writeln(eqtl.getRsName() + "\t" + eqtl.getProbe() + "\t-"); - } - } - outWriter.close(); - } - - static void addAnnotationToQTLOutputLowMem(String in, String inProxies, String folderHighC, String resolution, String qualityCutOff, String normMethod, double minValue, String out) throws IOException { - QTLTextFile eqtlTextFile = new QTLTextFile(in, QTLTextFile.R); - - ArrayList qtls = eqtlTextFile.readList(); - - if (inProxies != null) { - qtls = includeProxyInfo(qtls, inProxies); - } - - //Here we need to make a new Type to store the potentialy inflated files. - TextFile outWriter = new TextFile(out, TextFile.W); - for (EQTL eqtl : qtls) { - String chrProbe = String.valueOf(eqtl.getProbeChr()); - String chrSnp = String.valueOf(eqtl.getRsChr()); - -// System.out.println(chrProbe+"\t"+chrSnp); - if (chrProbe.equals(chrSnp)) { - //Here we need to check how to normalize and treat intra-chromosomal data. - continue; - } - - int posChrSmaller; - int posChrLarger; - - if (Integer.parseInt(chrProbe) < Integer.parseInt(chrSnp)) { - posChrSmaller = eqtl.getProbeChrPos(); - posChrLarger = eqtl.getRsChrPos(); - String baseName = folderHighC + resolution + "_resolution_interchromosomal\\chr" + chrProbe + "_chr" + chrSnp + "\\MAPQG" + qualityCutOff; - String fileToReads = baseName + "\\chr" + chrProbe + "_" + chrSnp + "_1kb.RAWobserved"; -// System.out.println("Reading: " + fileToReads); - if (normMethod == null) { - if (readRawInterContactInformationLowMem(fileToReads, minValue, posChrSmaller, posChrLarger, resolution)) { - outWriter.writeln(eqtl.getRsName() + "\t" + eqtl.getProbe() + "\tContact"); - } else { - outWriter.writeln(eqtl.getRsName() + "\t" + eqtl.getProbe() + "\t-"); - } - } else { - if (readNormalizedInterContactInformationLowMem(fileToReads, baseName, normMethod, chrProbe, chrSnp, posChrSmaller, posChrLarger, resolution, minValue)) { - outWriter.writeln(eqtl.getRsName() + "\t" + eqtl.getProbe() + "\tContact"); - } else { - outWriter.writeln(eqtl.getRsName() + "\t" + eqtl.getProbe() + "\t-"); - } - } - - } else { - posChrSmaller = eqtl.getRsChrPos(); - posChrLarger = eqtl.getProbeChrPos(); - - String baseName = folderHighC + resolution + "_resolution_interchromosomal\\chr" + chrSnp + "_chr" + chrProbe + "\\MAPQG" + qualityCutOff; - String fileToReads = baseName + "\\chr" + chrSnp + "_" + chrProbe + "_1kb.RAWobserved"; -// System.out.println("Reading: " + fileToReads); - if (normMethod == null) { - if (readRawInterContactInformationLowMem(fileToReads, minValue, posChrSmaller, posChrLarger, resolution)) { - outWriter.writeln(eqtl.getRsName() + "\t" + eqtl.getProbe() + "\tContact"); - } else { - outWriter.writeln(eqtl.getRsName() + "\t" + eqtl.getProbe() + "\t-"); - } - } else { - if (readNormalizedInterContactInformationLowMem(fileToReads, baseName, normMethod, chrSnp, chrProbe, posChrSmaller, posChrLarger, resolution, minValue)) { - outWriter.writeln(eqtl.getRsName() + "\t" + eqtl.getProbe() + "\tContact"); - } else { - outWriter.writeln(eqtl.getRsName() + "\t" + eqtl.getProbe() + "\t-"); - } - } - } - } - outWriter.close(); - } - - private static ArrayList includeProxyInfo(ArrayList qtls, String inProxies) throws IOException { - ArrayList newQtlList = new ArrayList(); - - TextFile readProxies = new TextFile(inProxies, TextFile.R); - - String line = readProxies.readLine(); -// System.out.println(line); - while ((line = readProxies.readLine()) != null) { -// System.out.println(line); - String[] lineParts = SPLIT_TAB.split(line); - String chr = lineParts[4]; - int chrPos = Integer.parseInt(lineParts[5]); - int chrNewPos = Integer.parseInt(lineParts[8]); - for (EQTL e : qtls) { - if (String.valueOf(e.getRsChr()).equals(chr) && e.getRsChrPos() == chrPos) { - EQTL newQtl = new EQTL(); - newQtl.setProbe(e.getProbe()); - newQtl.setProbeChr(e.getProbeChr()); - newQtl.setProbeChrPos(e.getProbeChrPos()); - - newQtl.setRsName(e.getRsName() + "-" + lineParts[1]); - newQtl.setRsChr(e.getRsChr()); - newQtl.setRsChrPos(chrNewPos); - newQtlList.add(newQtl); - } - } - } - - for (EQTL e : qtls) { - newQtlList.add(e); - } - - return newQtlList; - } - - //For example, here is a line from the 5kb chr1 MAPQGE30 raw observed contact matrix (GM12878_combined/5kb_resolution_intrachromosomal/chr1/MAPQGE30/chr1_5kb.RAWobserved): -//40000000 40100000 59.0 - private static LinkedHashSet> readRawInterContactInformation(String fileToReads, double minContactValue) throws IOException { - LinkedHashSet> chrContactInfo = new LinkedHashSet>(); - - BufferedReader input = new BufferedReader(new InputStreamReader(new FileInputStream(fileToReads), "UTF-8")); - - String row; - - while ((row = input.readLine()) != null) { - String[] parts = StringUtils.split(row, '\t'); - - int posChr1 = Integer.parseInt(parts[0]); - int posChr2 = Integer.parseInt(parts[1]); - double contact = Double.parseDouble(parts[2]); - if (contact >= minContactValue) { - chrContactInfo.add(new Pair(posChr1, posChr2)); - } - } - input.close(); - return chrContactInfo; - - } - - //For example, here is a line from the 5kb chr1 MAPQGE30 raw observed contact matrix (GM12878_combined/5kb_resolution_intrachromosomal/chr1/MAPQGE30/chr1_5kb.RAWobserved): - //40000000 40100000 59.0 - //To normalize this entry using the KR normalization vector, one would divide 59.0 by the 8001st line ((40000000/5000)+1=8001) and the 8021st line ((40100000/5000)+1=8021) - //of GM12878_combined/5kb_resolution_intrachromosomal/chr1/MAPQGE30/chr1_5kb.KRnorm. The 8001st line of the KR norm file is 1.2988778370674694; - //The 8021st line of the KR norm file is 1.6080499717941548. So the corresponding KR normalized entry for the entry above is 59.0/(1.2988778370674694*1.6080499717941548) - //or 28.24776973966101. - //If the KR normalization vector file is empty or all NaNs, then the KR algorithm didn’t converge on that particular matrix (likely due to sparsity of the matrix). - private static LinkedHashSet> readNormalizedInterContactInformation(String fileToRead, String baseName, String normMethod, String chrSmaller, String chrLarger, String resolution, double minContactValue) throws IOException { - - //ReadIn normalization chr1 - TextFile inputNormChr1 = new TextFile(baseName + "\\chr" + chrSmaller + "_" + resolution + "." + normMethod, TextFile.R); - ArrayList normFactorSmallerChr = inputNormChr1.readAsArrayList(); - inputNormChr1.close(); - - //ReadIn normalization chr2 - TextFile inputNormChr2 = new TextFile(baseName + "\\chr" + chrLarger + "_" + resolution + "." + normMethod, TextFile.R); - ArrayList normFactorLargerChr = inputNormChr2.readAsArrayList(); - - inputNormChr2.close(); - - LinkedHashSet> chrContactInfo = new LinkedHashSet>(); - - BufferedReader input = new BufferedReader(new InputStreamReader(new FileInputStream(fileToRead), "UTF-8")); - - String row; - - while ((row = input.readLine()) != null) { - String[] parts = StringUtils.split(row, '\t'); - - int posChr1 = Integer.parseInt(parts[0]); - int posChr2 = Integer.parseInt(parts[1]); - - String factor1Base = normFactorSmallerChr.get((posChr1 / getNumericResolution(resolution)) + 1); - String factor2Base = normFactorLargerChr.get((posChr2 / getNumericResolution(resolution)) + 1); - - double factor1; - double factor2; - - if (StringUtils.isNumeric(factor1Base) && StringUtils.isNumeric(factor2Base)) { - factor1 = Double.parseDouble(factor1Base); - factor2 = Double.parseDouble(factor2Base); - - double contact = Double.parseDouble(parts[2]) / (factor1 * factor2); - if (contact >= minContactValue) { - chrContactInfo.add(new Pair(posChr1, posChr2)); - } - - } - } - input.close(); - return chrContactInfo; - } - - private static boolean determineContact(int posChrSmaller, int posChrLarger, LinkedHashSet> interestRegions, int resolution) { - //Determine bin1 - //Starts counting at 0-resulution - int bin1 = posChrSmaller - (posChrSmaller % resolution); - - //Determine bin2 - int bin2 = posChrLarger - (posChrLarger % resolution); - - //See if bin1 and bin2 are in the file. - boolean contact = false; - - for (Pair entry : interestRegions) { - if (entry.getLeft() == bin1) { - if (entry.getRight() == bin2) { - contact = true; - break; - } else if (entry.getRight() > bin2) { - break; - } - } else if (entry.getLeft() > bin1) { - break; - } - } - return contact; - } - - private static boolean readRawInterContactInformationLowMem(String fileToReads, double minValue, int posChrSmaller, int posChrLarger, String resolution) throws IOException { - //Determine bin1 - //Starts counting at 0-resulution - int bin1 = posChrSmaller - (posChrSmaller % getNumericResolution(resolution)); - - //Determine bin2 - int bin2 = posChrLarger - (posChrLarger % getNumericResolution(resolution)); - - //See if bin1 and bin2 are in the file. - boolean contactFound = false; - - BufferedReader input = new BufferedReader(new InputStreamReader(new FileInputStream(fileToReads), "UTF-8")); - - String row; - - while ((row = input.readLine()) != null) { - String[] parts = StringUtils.split(row, '\t'); - - int posChr1 = Integer.parseInt(parts[0]); - if (posChr1 == bin1) { - int posChr2 = Integer.parseInt(parts[1]); - if (posChr2 == bin2) { - double contact = Double.parseDouble(parts[2]); - if (contact >= minValue) { - contactFound = true; - } - break; - } else if (posChr2 > bin2) { - break; - } - } else if (posChr1 > bin1) { - break; - } - - } - input.close(); - return contactFound; - } - - private static boolean readNormalizedInterContactInformationLowMem(String fileToRead, String baseName, String normMethod, String chrSmaller, String chrLarger, int posChrSmaller, int posChrLarger, String resolution, double minValue) throws IOException { - //Determine bin1 - //Starts counting at 0-resulution - int bin1 = posChrSmaller - (posChrSmaller % getNumericResolution(resolution)); - - //Determine bin2 - int bin2 = posChrLarger - (posChrLarger % getNumericResolution(resolution)); - - //ReadIn normalization chr1 - TextFile inputNormChr1 = new TextFile(baseName + "\\chr" + chrSmaller + "_" + resolution + "." + normMethod, TextFile.R); - ArrayList normFactorSmallerChr = inputNormChr1.readAsArrayList(); - inputNormChr1.close(); - - //ReadIn normalization chr2 - TextFile inputNormChr2 = new TextFile(baseName + "\\chr" + chrLarger + "_" + resolution + "." + normMethod, TextFile.R); - ArrayList normFactorLargerChr = inputNormChr2.readAsArrayList(); - - inputNormChr2.close(); - - LinkedHashSet> chrContactInfo = new LinkedHashSet>(); - - BufferedReader input = new BufferedReader(new InputStreamReader(new FileInputStream(fileToRead), "UTF-8")); - - String row; - - //See if bin1 and bin2 are in the file. - boolean contactFound = false; - - while ((row = input.readLine()) != null) { - String[] parts = StringUtils.split(row, '\t'); - - int posChr1 = Integer.parseInt(parts[0]); - if (posChr1 == bin1) { - int posChr2 = Integer.parseInt(parts[1]); - if (posChr2 == bin2) { - - String factor1Base = normFactorSmallerChr.get((posChr1 / getNumericResolution(resolution)) + 1); - String factor2Base = normFactorLargerChr.get((posChr2 / getNumericResolution(resolution)) + 1); - - double factor1; - double factor2; - - if (StringUtils.isNumeric(factor1Base) && StringUtils.isNumeric(factor2Base)) { - factor1 = Double.parseDouble(factor1Base); - factor2 = Double.parseDouble(factor2Base); - - double contact = Double.parseDouble(parts[2]) / (factor1 * factor2); - if (contact >= minValue) { - contactFound = true; - } - break; - } - - } else if (posChr2 > bin2) { - break; - } - } else if (posChr1 > bin1) { - break; - } - - } - input.close(); - return contactFound; - } - - private static int getNumericResolution(String resolution) { - if (resolution.equals("1kb")) { - return 1000; - } else if (resolution.equals("5kb")) { - return 5000; - } else { - System.out.println("\nError in resolution setting!\n"); - System.exit(-1); - } - return 0; - } -} From fa8f519b737c3703a000019563c5ad86866be8cc Mon Sep 17 00:00:00 2001 From: Bonder-MJ Date: Wed, 6 May 2015 10:05:20 +0200 Subject: [PATCH 043/143] Revert "Revert "hiC annotator"" This reverts commit a03c2e09640bb3a7eba26f3c6d04e1a4e364da95. --- .../util/HighCTransQTLAnnotator.java | 457 ++++++++++++++++++ 1 file changed, 457 insertions(+) create mode 100644 eqtl-mapping-pipeline/src/main/java/eqtlmappingpipeline/util/HighCTransQTLAnnotator.java diff --git a/eqtl-mapping-pipeline/src/main/java/eqtlmappingpipeline/util/HighCTransQTLAnnotator.java b/eqtl-mapping-pipeline/src/main/java/eqtlmappingpipeline/util/HighCTransQTLAnnotator.java new file mode 100644 index 000000000..1f62edc5b --- /dev/null +++ b/eqtl-mapping-pipeline/src/main/java/eqtlmappingpipeline/util/HighCTransQTLAnnotator.java @@ -0,0 +1,457 @@ +/* + * To change this template, choose Tools | Templates + * and open the template in the editor. + */ +package eqtlmappingpipeline.util; + +import java.io.BufferedReader; +import java.io.FileInputStream; +import java.io.IOException; +import java.io.InputStreamReader; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.LinkedHashSet; +import java.util.regex.Pattern; +import org.apache.commons.lang3.StringUtils; +import umcg.genetica.containers.Pair; +import umcg.genetica.io.text.TextFile; +import umcg.genetica.io.trityper.EQTL; +import umcg.genetica.io.trityper.QTLTextFile; + +/** + * + * @author MarcJan + */ +class HighCTransQTLAnnotator { + + private static final Pattern SPLIT_TAB = Pattern.compile("\t"); + + public static void main(String[] args) throws IOException { + //"D:\\WebFolders\\OwnCloud\\AeroFS\\RP3_BIOS_Methylation\\meQTLs\\Trans_Pc22c_CisMeQTLc_meQTLs\\RegressedOut_CisEffects_New\\eQTLsFDR0.05-ProbeLevel_BsFiltered_And_Filtered.txt" + + String QTLfile = "D:\\WebFolders\\OwnCloud\\AeroFS\\RP3_BIOS_Methylation\\meQTLs\\Trans_Pc22c_CisMeQTLc_meQTLs\\RegressedOut_CisEffects_New\\eQTLsFDR0.05-ProbeLevel_BsFiltered&Filtered.txt"; + String proxyfile = "D:\\WebFolders\\OwnCloud\\AeroFS\\RP3_BIOS_Methylation\\meQTLs\\Trans_Pc22c_CisMeQTLc_meQTLs\\RegressedOut_CisEffects_New\\proxiesMeQTLSnps.txt"; + String QTLoutfile = "D:\\WebFolders\\OwnCloud\\AeroFS\\RP3_BIOS_Methylation\\meQTLs\\Trans_Pc22c_CisMeQTLc_meQTLs\\RegressedOut_CisEffects_New\\eQTLsFDR0.05-ProbeLevel_BsFiltered&Filtered_HiC_LD_annotated.txt"; + String folderHighC = "F:\\Contacts\\GM12878_combined_interchromosomal\\"; + String resolution = "1kb"; + String qualityCutOff = "E30"; //0 or E30 + String normMethod = null; //null / KRnorm / SQRTVCnorm / VCnorm + double minValueQuality = 0; + + boolean lowMemMode = true; + + if (!lowMemMode) { + addAnnotationToQTLOutput( + QTLfile, + proxyfile, + folderHighC, + resolution, + qualityCutOff, + normMethod, + minValueQuality, + QTLoutfile); + } else { + addAnnotationToQTLOutputLowMem( + QTLfile, + proxyfile, + folderHighC, + resolution, + qualityCutOff, + normMethod, + minValueQuality, + QTLoutfile); + } + } + + static void addAnnotationToQTLOutput(String in, String inProxies, String folderHighC, String resolution, String qualityCutOff, String normMethod, double minValue, String out) throws IOException { + QTLTextFile eqtlTextFile = new QTLTextFile(in, QTLTextFile.R); + + ArrayList qtls = eqtlTextFile.readList(); + + if (inProxies != null) { + qtls = includeProxyInfo(qtls, inProxies); + } + + HashMap>> contactBuffer = new HashMap>>(); + //Here we need to make a new Type to store the potentialy inflated files. + TextFile outWriter = new TextFile(out, TextFile.W); + for (EQTL eqtl : qtls) { + String chrProbe = String.valueOf(eqtl.getProbeChr()); + String chrSnp = String.valueOf(eqtl.getRsChr()); + +// System.out.println(chrProbe+"\t"+chrSnp); + if (chrProbe.equals(chrSnp)) { + //Here we need to check how to normalize and treat intra-chromosomal data. + continue; + } + + int posChrSmaller; + int posChrLarger; + + LinkedHashSet> interestRegions = null; + if (Integer.parseInt(chrProbe) < Integer.parseInt(chrSnp)) { + posChrSmaller = eqtl.getProbeChrPos(); + posChrLarger = eqtl.getRsChrPos(); + if (contactBuffer.containsKey("chr" + chrProbe + "_chr" + chrSnp)) { + interestRegions = contactBuffer.get("chr" + chrProbe + "_chr" + chrSnp); + } else { + String baseName = folderHighC + resolution + "_resolution_interchromosomal\\chr" + chrProbe + "_chr" + chrSnp + "\\MAPQG" + qualityCutOff; + String fileToReads = baseName + "\\chr" + chrProbe + "_" + chrSnp + "_1kb.RAWobserved"; +// System.out.println("Reading: " + fileToReads); + if (normMethod == null) { + interestRegions = readRawInterContactInformation(fileToReads, minValue); + } else { + interestRegions = readNormalizedInterContactInformation(fileToReads, baseName, normMethod, chrProbe, chrSnp, resolution, minValue); + } + contactBuffer.put("chr" + chrProbe + "_chr" + chrSnp, interestRegions); + } + } else { + posChrSmaller = eqtl.getRsChrPos(); + posChrLarger = eqtl.getProbeChrPos(); + if (contactBuffer.containsKey("chr" + chrSnp + "_chr" + chrProbe)) { + interestRegions = contactBuffer.get("chr" + chrSnp + "_chr" + chrProbe); + } else { + String baseName = folderHighC + resolution + "_resolution_interchromosomal\\chr" + chrSnp + "_chr" + chrProbe + "\\MAPQG" + qualityCutOff; + String fileToReads = baseName + "\\chr" + chrSnp + "_" + chrProbe + "_1kb.RAWobserved"; +// System.out.println("Reading: " + fileToReads); + if (normMethod == null) { + interestRegions = readRawInterContactInformation(fileToReads, minValue); + } else { + interestRegions = readNormalizedInterContactInformation(fileToReads, baseName, normMethod, chrSnp, chrProbe, resolution, minValue); + } + contactBuffer.put("chr" + chrSnp + "_chr" + chrProbe, interestRegions); + } + } + + if (determineContact(posChrSmaller, posChrLarger, interestRegions, getNumericResolution(resolution))) { + outWriter.writeln(eqtl.getRsName() + "\t" + eqtl.getProbe() + "\tContact"); + } else { + outWriter.writeln(eqtl.getRsName() + "\t" + eqtl.getProbe() + "\t-"); + } + } + outWriter.close(); + } + + static void addAnnotationToQTLOutputLowMem(String in, String inProxies, String folderHighC, String resolution, String qualityCutOff, String normMethod, double minValue, String out) throws IOException { + QTLTextFile eqtlTextFile = new QTLTextFile(in, QTLTextFile.R); + + ArrayList qtls = eqtlTextFile.readList(); + + if (inProxies != null) { + qtls = includeProxyInfo(qtls, inProxies); + } + + //Here we need to make a new Type to store the potentialy inflated files. + TextFile outWriter = new TextFile(out, TextFile.W); + for (EQTL eqtl : qtls) { + String chrProbe = String.valueOf(eqtl.getProbeChr()); + String chrSnp = String.valueOf(eqtl.getRsChr()); + +// System.out.println(chrProbe+"\t"+chrSnp); + if (chrProbe.equals(chrSnp)) { + //Here we need to check how to normalize and treat intra-chromosomal data. + continue; + } + + int posChrSmaller; + int posChrLarger; + + if (Integer.parseInt(chrProbe) < Integer.parseInt(chrSnp)) { + posChrSmaller = eqtl.getProbeChrPos(); + posChrLarger = eqtl.getRsChrPos(); + String baseName = folderHighC + resolution + "_resolution_interchromosomal\\chr" + chrProbe + "_chr" + chrSnp + "\\MAPQG" + qualityCutOff; + String fileToReads = baseName + "\\chr" + chrProbe + "_" + chrSnp + "_1kb.RAWobserved"; +// System.out.println("Reading: " + fileToReads); + if (normMethod == null) { + if (readRawInterContactInformationLowMem(fileToReads, minValue, posChrSmaller, posChrLarger, resolution)) { + outWriter.writeln(eqtl.getRsName() + "\t" + eqtl.getProbe() + "\tContact"); + } else { + outWriter.writeln(eqtl.getRsName() + "\t" + eqtl.getProbe() + "\t-"); + } + } else { + if (readNormalizedInterContactInformationLowMem(fileToReads, baseName, normMethod, chrProbe, chrSnp, posChrSmaller, posChrLarger, resolution, minValue)) { + outWriter.writeln(eqtl.getRsName() + "\t" + eqtl.getProbe() + "\tContact"); + } else { + outWriter.writeln(eqtl.getRsName() + "\t" + eqtl.getProbe() + "\t-"); + } + } + + } else { + posChrSmaller = eqtl.getRsChrPos(); + posChrLarger = eqtl.getProbeChrPos(); + + String baseName = folderHighC + resolution + "_resolution_interchromosomal\\chr" + chrSnp + "_chr" + chrProbe + "\\MAPQG" + qualityCutOff; + String fileToReads = baseName + "\\chr" + chrSnp + "_" + chrProbe + "_1kb.RAWobserved"; +// System.out.println("Reading: " + fileToReads); + if (normMethod == null) { + if (readRawInterContactInformationLowMem(fileToReads, minValue, posChrSmaller, posChrLarger, resolution)) { + outWriter.writeln(eqtl.getRsName() + "\t" + eqtl.getProbe() + "\tContact"); + } else { + outWriter.writeln(eqtl.getRsName() + "\t" + eqtl.getProbe() + "\t-"); + } + } else { + if (readNormalizedInterContactInformationLowMem(fileToReads, baseName, normMethod, chrSnp, chrProbe, posChrSmaller, posChrLarger, resolution, minValue)) { + outWriter.writeln(eqtl.getRsName() + "\t" + eqtl.getProbe() + "\tContact"); + } else { + outWriter.writeln(eqtl.getRsName() + "\t" + eqtl.getProbe() + "\t-"); + } + } + } + } + outWriter.close(); + } + + private static ArrayList includeProxyInfo(ArrayList qtls, String inProxies) throws IOException { + ArrayList newQtlList = new ArrayList(); + + TextFile readProxies = new TextFile(inProxies, TextFile.R); + + String line = readProxies.readLine(); +// System.out.println(line); + while ((line = readProxies.readLine()) != null) { +// System.out.println(line); + String[] lineParts = SPLIT_TAB.split(line); + String chr = lineParts[4]; + int chrPos = Integer.parseInt(lineParts[5]); + int chrNewPos = Integer.parseInt(lineParts[8]); + for (EQTL e : qtls) { + if (String.valueOf(e.getRsChr()).equals(chr) && e.getRsChrPos() == chrPos) { + EQTL newQtl = new EQTL(); + newQtl.setProbe(e.getProbe()); + newQtl.setProbeChr(e.getProbeChr()); + newQtl.setProbeChrPos(e.getProbeChrPos()); + + newQtl.setRsName(e.getRsName() + "-" + lineParts[1]); + newQtl.setRsChr(e.getRsChr()); + newQtl.setRsChrPos(chrNewPos); + newQtlList.add(newQtl); + } + } + } + + for (EQTL e : qtls) { + newQtlList.add(e); + } + + return newQtlList; + } + + //For example, here is a line from the 5kb chr1 MAPQGE30 raw observed contact matrix (GM12878_combined/5kb_resolution_intrachromosomal/chr1/MAPQGE30/chr1_5kb.RAWobserved): +//40000000 40100000 59.0 + private static LinkedHashSet> readRawInterContactInformation(String fileToReads, double minContactValue) throws IOException { + LinkedHashSet> chrContactInfo = new LinkedHashSet>(); + + BufferedReader input = new BufferedReader(new InputStreamReader(new FileInputStream(fileToReads), "UTF-8")); + + String row; + + while ((row = input.readLine()) != null) { + String[] parts = StringUtils.split(row, '\t'); + + int posChr1 = Integer.parseInt(parts[0]); + int posChr2 = Integer.parseInt(parts[1]); + double contact = Double.parseDouble(parts[2]); + if (contact >= minContactValue) { + chrContactInfo.add(new Pair(posChr1, posChr2)); + } + } + input.close(); + return chrContactInfo; + + } + + //For example, here is a line from the 5kb chr1 MAPQGE30 raw observed contact matrix (GM12878_combined/5kb_resolution_intrachromosomal/chr1/MAPQGE30/chr1_5kb.RAWobserved): + //40000000 40100000 59.0 + //To normalize this entry using the KR normalization vector, one would divide 59.0 by the 8001st line ((40000000/5000)+1=8001) and the 8021st line ((40100000/5000)+1=8021) + //of GM12878_combined/5kb_resolution_intrachromosomal/chr1/MAPQGE30/chr1_5kb.KRnorm. The 8001st line of the KR norm file is 1.2988778370674694; + //The 8021st line of the KR norm file is 1.6080499717941548. So the corresponding KR normalized entry for the entry above is 59.0/(1.2988778370674694*1.6080499717941548) + //or 28.24776973966101. + //If the KR normalization vector file is empty or all NaNs, then the KR algorithm didn’t converge on that particular matrix (likely due to sparsity of the matrix). + private static LinkedHashSet> readNormalizedInterContactInformation(String fileToRead, String baseName, String normMethod, String chrSmaller, String chrLarger, String resolution, double minContactValue) throws IOException { + + //ReadIn normalization chr1 + TextFile inputNormChr1 = new TextFile(baseName + "\\chr" + chrSmaller + "_" + resolution + "." + normMethod, TextFile.R); + ArrayList normFactorSmallerChr = inputNormChr1.readAsArrayList(); + inputNormChr1.close(); + + //ReadIn normalization chr2 + TextFile inputNormChr2 = new TextFile(baseName + "\\chr" + chrLarger + "_" + resolution + "." + normMethod, TextFile.R); + ArrayList normFactorLargerChr = inputNormChr2.readAsArrayList(); + + inputNormChr2.close(); + + LinkedHashSet> chrContactInfo = new LinkedHashSet>(); + + BufferedReader input = new BufferedReader(new InputStreamReader(new FileInputStream(fileToRead), "UTF-8")); + + String row; + + while ((row = input.readLine()) != null) { + String[] parts = StringUtils.split(row, '\t'); + + int posChr1 = Integer.parseInt(parts[0]); + int posChr2 = Integer.parseInt(parts[1]); + + String factor1Base = normFactorSmallerChr.get((posChr1 / getNumericResolution(resolution)) + 1); + String factor2Base = normFactorLargerChr.get((posChr2 / getNumericResolution(resolution)) + 1); + + double factor1; + double factor2; + + if (StringUtils.isNumeric(factor1Base) && StringUtils.isNumeric(factor2Base)) { + factor1 = Double.parseDouble(factor1Base); + factor2 = Double.parseDouble(factor2Base); + + double contact = Double.parseDouble(parts[2]) / (factor1 * factor2); + if (contact >= minContactValue) { + chrContactInfo.add(new Pair(posChr1, posChr2)); + } + + } + } + input.close(); + return chrContactInfo; + } + + private static boolean determineContact(int posChrSmaller, int posChrLarger, LinkedHashSet> interestRegions, int resolution) { + //Determine bin1 + //Starts counting at 0-resulution + int bin1 = posChrSmaller - (posChrSmaller % resolution); + + //Determine bin2 + int bin2 = posChrLarger - (posChrLarger % resolution); + + //See if bin1 and bin2 are in the file. + boolean contact = false; + + for (Pair entry : interestRegions) { + if (entry.getLeft() == bin1) { + if (entry.getRight() == bin2) { + contact = true; + break; + } else if (entry.getRight() > bin2) { + break; + } + } else if (entry.getLeft() > bin1) { + break; + } + } + return contact; + } + + private static boolean readRawInterContactInformationLowMem(String fileToReads, double minValue, int posChrSmaller, int posChrLarger, String resolution) throws IOException { + //Determine bin1 + //Starts counting at 0-resulution + int bin1 = posChrSmaller - (posChrSmaller % getNumericResolution(resolution)); + + //Determine bin2 + int bin2 = posChrLarger - (posChrLarger % getNumericResolution(resolution)); + + //See if bin1 and bin2 are in the file. + boolean contactFound = false; + + BufferedReader input = new BufferedReader(new InputStreamReader(new FileInputStream(fileToReads), "UTF-8")); + + String row; + + while ((row = input.readLine()) != null) { + String[] parts = StringUtils.split(row, '\t'); + + int posChr1 = Integer.parseInt(parts[0]); + if (posChr1 == bin1) { + int posChr2 = Integer.parseInt(parts[1]); + if (posChr2 == bin2) { + double contact = Double.parseDouble(parts[2]); + if (contact >= minValue) { + contactFound = true; + } + break; + } else if (posChr2 > bin2) { + break; + } + } else if (posChr1 > bin1) { + break; + } + + } + input.close(); + return contactFound; + } + + private static boolean readNormalizedInterContactInformationLowMem(String fileToRead, String baseName, String normMethod, String chrSmaller, String chrLarger, int posChrSmaller, int posChrLarger, String resolution, double minValue) throws IOException { + //Determine bin1 + //Starts counting at 0-resulution + int bin1 = posChrSmaller - (posChrSmaller % getNumericResolution(resolution)); + + //Determine bin2 + int bin2 = posChrLarger - (posChrLarger % getNumericResolution(resolution)); + + //ReadIn normalization chr1 + TextFile inputNormChr1 = new TextFile(baseName + "\\chr" + chrSmaller + "_" + resolution + "." + normMethod, TextFile.R); + ArrayList normFactorSmallerChr = inputNormChr1.readAsArrayList(); + inputNormChr1.close(); + + //ReadIn normalization chr2 + TextFile inputNormChr2 = new TextFile(baseName + "\\chr" + chrLarger + "_" + resolution + "." + normMethod, TextFile.R); + ArrayList normFactorLargerChr = inputNormChr2.readAsArrayList(); + + inputNormChr2.close(); + + LinkedHashSet> chrContactInfo = new LinkedHashSet>(); + + BufferedReader input = new BufferedReader(new InputStreamReader(new FileInputStream(fileToRead), "UTF-8")); + + String row; + + //See if bin1 and bin2 are in the file. + boolean contactFound = false; + + while ((row = input.readLine()) != null) { + String[] parts = StringUtils.split(row, '\t'); + + int posChr1 = Integer.parseInt(parts[0]); + if (posChr1 == bin1) { + int posChr2 = Integer.parseInt(parts[1]); + if (posChr2 == bin2) { + + String factor1Base = normFactorSmallerChr.get((posChr1 / getNumericResolution(resolution)) + 1); + String factor2Base = normFactorLargerChr.get((posChr2 / getNumericResolution(resolution)) + 1); + + double factor1; + double factor2; + + if (StringUtils.isNumeric(factor1Base) && StringUtils.isNumeric(factor2Base)) { + factor1 = Double.parseDouble(factor1Base); + factor2 = Double.parseDouble(factor2Base); + + double contact = Double.parseDouble(parts[2]) / (factor1 * factor2); + if (contact >= minValue) { + contactFound = true; + } + break; + } + + } else if (posChr2 > bin2) { + break; + } + } else if (posChr1 > bin1) { + break; + } + + } + input.close(); + return contactFound; + } + + private static int getNumericResolution(String resolution) { + if (resolution.equals("1kb")) { + return 1000; + } else if (resolution.equals("5kb")) { + return 5000; + } else { + System.out.println("\nError in resolution setting!\n"); + System.exit(-1); + } + return 0; + } +} From 08d7e6060915554db71ad8bf22c99706cef8e0c8 Mon Sep 17 00:00:00 2001 From: Bonder-MJ Date: Tue, 19 May 2015 14:36:17 +0200 Subject: [PATCH 044/143] Contact sorter --- .../io/chrContacts/InterChrContact.java | 52 +++++++++++++ .../io/chrContacts/SortInterChrContacts.java | 76 +++++++++++++++++++ 2 files changed, 128 insertions(+) create mode 100644 genetica-libraries/src/main/java/umcg/genetica/io/chrContacts/InterChrContact.java create mode 100644 genetica-libraries/src/main/java/umcg/genetica/io/chrContacts/SortInterChrContacts.java diff --git a/genetica-libraries/src/main/java/umcg/genetica/io/chrContacts/InterChrContact.java b/genetica-libraries/src/main/java/umcg/genetica/io/chrContacts/InterChrContact.java new file mode 100644 index 000000000..d1fe25323 --- /dev/null +++ b/genetica-libraries/src/main/java/umcg/genetica/io/chrContacts/InterChrContact.java @@ -0,0 +1,52 @@ +/* + * To change this license header, choose License Headers in Project Properties. + * To change this template file, choose Tools | Templates + * and open the template in the editor. + */ +package umcg.genetica.io.chrContacts; + +/** + * + * @author MaKKie_Admin + */ +public class InterChrContact implements Comparable { + + final int chrLocationSmaller; + final int chrLocationLarger; + final double contactValue; + + public InterChrContact(int chrLocSmal, int chrLocLarge, double contactVal) { + this.chrLocationLarger = chrLocLarge; + this.chrLocationSmaller = chrLocSmal; + this.contactValue = contactVal; + } + + @Override + public int compareTo(InterChrContact other) { + if (other.getChrLocationSmaller() > this.chrLocationSmaller) { + return -1; + } else if (other.getChrLocationSmaller() < this.chrLocationSmaller) { + return 1; + } else { + if (other.getChrLocationLarger() > this.chrLocationLarger) { + return -1; + } else if (other.getChrLocationLarger() < this.chrLocationLarger) { + return 1; + } else { + return 0; + } + } + } + + public int getChrLocationSmaller() { + return chrLocationSmaller; + } + + public int getChrLocationLarger() { + return chrLocationLarger; + } + + public double getContactValue() { + return contactValue; + } +} diff --git a/genetica-libraries/src/main/java/umcg/genetica/io/chrContacts/SortInterChrContacts.java b/genetica-libraries/src/main/java/umcg/genetica/io/chrContacts/SortInterChrContacts.java new file mode 100644 index 000000000..20875f8e5 --- /dev/null +++ b/genetica-libraries/src/main/java/umcg/genetica/io/chrContacts/SortInterChrContacts.java @@ -0,0 +1,76 @@ +/* + * To change this license header, choose License Headers in Project Properties. + * To change this template file, choose Tools | Templates + * and open the template in the editor. + */ +package umcg.genetica.io.chrContacts; + +import java.io.BufferedReader; +import java.io.FileInputStream; +import java.io.IOException; +import java.io.InputStreamReader; +import java.util.ArrayList; +import java.util.Collections; +import java.util.logging.Level; +import java.util.logging.Logger; +import org.apache.commons.lang3.StringUtils; +import umcg.genetica.io.text.TextFile; + +/** + * + * @author MaKKie_Admin + */ +public class SortInterChrContacts { + + public static void readNonSortedWriteSorted(String fileToReads, String fileToWrite){ + ArrayList contacts = null; + try { + contacts = readRawInterContactInformation(fileToReads); + } catch (IOException ex) { + Logger.getLogger(SortInterChrContacts.class.getName()).log(Level.SEVERE, null, ex); + } + Collections.sort(contacts); + + try { + writeRawInterContactInformation(contacts, fileToWrite); + } catch (IOException ex) { + Logger.getLogger(SortInterChrContacts.class.getName()).log(Level.SEVERE, null, ex); + } + + } + + private static ArrayList readRawInterContactInformation(String fileToReads) throws IOException { + ArrayList chrContactInfo = new ArrayList(); + + BufferedReader input = new BufferedReader(new InputStreamReader(new FileInputStream(fileToReads), "UTF-8")); + + String row; + + while ((row = input.readLine()) != null) { + String[] parts = StringUtils.split(row, '\t'); + + int posChr1 = Integer.parseInt(parts[0]); + int posChr2 = Integer.parseInt(parts[1]); + double contact = Double.parseDouble(parts[2]); + chrContactInfo.add(new InterChrContact(posChr1, posChr2, contact)); + } + input.close(); + return chrContactInfo; + + } + + private static ArrayList writeRawInterContactInformation(ArrayList contacts, String fileToWrite) throws IOException { + ArrayList chrContactInfo = new ArrayList(); + + TextFile outWriter = new TextFile(fileToWrite, TextFile.W); + + String row; + + for(InterChrContact contact : contacts){ + outWriter.writeln(contact.getChrLocationSmaller()+"\t"+contact.getChrLocationLarger()+"\t"+contact.getContactValue()); + } + outWriter.close(); + return chrContactInfo; + + } +} From dbb8baf4b3c34d1f07c2ae9605e7ac461c0d3042 Mon Sep 17 00:00:00 2001 From: Bonder-MJ Date: Tue, 19 May 2015 14:36:53 +0200 Subject: [PATCH 045/143] Updates to Contact annotator --- .../util/HighCTransQTLAnnotator.java | 303 +++++------------- 1 file changed, 79 insertions(+), 224 deletions(-) diff --git a/eqtl-mapping-pipeline/src/main/java/eqtlmappingpipeline/util/HighCTransQTLAnnotator.java b/eqtl-mapping-pipeline/src/main/java/eqtlmappingpipeline/util/HighCTransQTLAnnotator.java index 1f62edc5b..066f68d07 100644 --- a/eqtl-mapping-pipeline/src/main/java/eqtlmappingpipeline/util/HighCTransQTLAnnotator.java +++ b/eqtl-mapping-pipeline/src/main/java/eqtlmappingpipeline/util/HighCTransQTLAnnotator.java @@ -10,10 +10,9 @@ import java.io.InputStreamReader; import java.util.ArrayList; import java.util.HashMap; -import java.util.LinkedHashSet; import java.util.regex.Pattern; import org.apache.commons.lang3.StringUtils; -import umcg.genetica.containers.Pair; +import umcg.genetica.io.Gpio; import umcg.genetica.io.text.TextFile; import umcg.genetica.io.trityper.EQTL; import umcg.genetica.io.trityper.QTLTextFile; @@ -24,13 +23,15 @@ */ class HighCTransQTLAnnotator { + //ToDo Tiedy up the code. To fit the objects made for sorting better. + //Remove hihg-memory part! private static final Pattern SPLIT_TAB = Pattern.compile("\t"); public static void main(String[] args) throws IOException { - //"D:\\WebFolders\\OwnCloud\\AeroFS\\RP3_BIOS_Methylation\\meQTLs\\Trans_Pc22c_CisMeQTLc_meQTLs\\RegressedOut_CisEffects_New\\eQTLsFDR0.05-ProbeLevel_BsFiltered_And_Filtered.txt" String QTLfile = "D:\\WebFolders\\OwnCloud\\AeroFS\\RP3_BIOS_Methylation\\meQTLs\\Trans_Pc22c_CisMeQTLc_meQTLs\\RegressedOut_CisEffects_New\\eQTLsFDR0.05-ProbeLevel_BsFiltered&Filtered.txt"; String proxyfile = "D:\\WebFolders\\OwnCloud\\AeroFS\\RP3_BIOS_Methylation\\meQTLs\\Trans_Pc22c_CisMeQTLc_meQTLs\\RegressedOut_CisEffects_New\\proxiesMeQTLSnps.txt"; +// String proxyfile = null; String QTLoutfile = "D:\\WebFolders\\OwnCloud\\AeroFS\\RP3_BIOS_Methylation\\meQTLs\\Trans_Pc22c_CisMeQTLc_meQTLs\\RegressedOut_CisEffects_New\\eQTLsFDR0.05-ProbeLevel_BsFiltered&Filtered_HiC_LD_annotated.txt"; String folderHighC = "F:\\Contacts\\GM12878_combined_interchromosomal\\"; String resolution = "1kb"; @@ -38,29 +39,16 @@ public static void main(String[] args) throws IOException { String normMethod = null; //null / KRnorm / SQRTVCnorm / VCnorm double minValueQuality = 0; - boolean lowMemMode = true; - - if (!lowMemMode) { - addAnnotationToQTLOutput( - QTLfile, - proxyfile, - folderHighC, - resolution, - qualityCutOff, - normMethod, - minValueQuality, - QTLoutfile); - } else { - addAnnotationToQTLOutputLowMem( - QTLfile, - proxyfile, - folderHighC, - resolution, - qualityCutOff, - normMethod, - minValueQuality, - QTLoutfile); - } + addAnnotationToQTLOutput( + QTLfile, + proxyfile, + folderHighC, + resolution, + qualityCutOff, + normMethod, + minValueQuality, + QTLoutfile); + } static void addAnnotationToQTLOutput(String in, String inProxies, String folderHighC, String resolution, String qualityCutOff, String normMethod, double minValue, String out) throws IOException { @@ -72,7 +60,6 @@ static void addAnnotationToQTLOutput(String in, String inProxies, String folderH qtls = includeProxyInfo(qtls, inProxies); } - HashMap>> contactBuffer = new HashMap>>(); //Here we need to make a new Type to store the potentialy inflated files. TextFile outWriter = new TextFile(out, TextFile.W); for (EQTL eqtl : qtls) { @@ -87,116 +74,77 @@ static void addAnnotationToQTLOutput(String in, String inProxies, String folderH int posChrSmaller; int posChrLarger; + String ChrSmaller; + String ChrLarger; + int bin1; + int bin2; + String baseName; + String fileToReads; + + HashMap contactBuffer = new HashMap(); - LinkedHashSet> interestRegions = null; if (Integer.parseInt(chrProbe) < Integer.parseInt(chrSnp)) { posChrSmaller = eqtl.getProbeChrPos(); posChrLarger = eqtl.getRsChrPos(); - if (contactBuffer.containsKey("chr" + chrProbe + "_chr" + chrSnp)) { - interestRegions = contactBuffer.get("chr" + chrProbe + "_chr" + chrSnp); - } else { - String baseName = folderHighC + resolution + "_resolution_interchromosomal\\chr" + chrProbe + "_chr" + chrSnp + "\\MAPQG" + qualityCutOff; - String fileToReads = baseName + "\\chr" + chrProbe + "_" + chrSnp + "_1kb.RAWobserved"; + + ChrSmaller = chrProbe; + ChrLarger = chrSnp; + + //Determine bin1 + //Startscounting at 0-resulution + bin1 = posChrSmaller - (posChrSmaller % getNumericResolution(resolution)); + // System.out.println("\t"+bin1); + //Determine bin2 + bin2 = posChrLarger - (posChrLarger % getNumericResolution(resolution)); + + baseName = folderHighC + resolution + "_resolution_interchromosomal\\chr" + chrProbe + "_chr" + chrSnp + "\\MAPQG" + qualityCutOff; + fileToReads = baseName + "\\chr" + chrProbe + "_" + chrSnp + "_" + resolution + ".RAWobserved"; // System.out.println("Reading: " + fileToReads); - if (normMethod == null) { - interestRegions = readRawInterContactInformation(fileToReads, minValue); - } else { - interestRegions = readNormalizedInterContactInformation(fileToReads, baseName, normMethod, chrProbe, chrSnp, resolution, minValue); - } - contactBuffer.put("chr" + chrProbe + "_chr" + chrSnp, interestRegions); - } + } else { posChrSmaller = eqtl.getRsChrPos(); posChrLarger = eqtl.getProbeChrPos(); - if (contactBuffer.containsKey("chr" + chrSnp + "_chr" + chrProbe)) { - interestRegions = contactBuffer.get("chr" + chrSnp + "_chr" + chrProbe); - } else { - String baseName = folderHighC + resolution + "_resolution_interchromosomal\\chr" + chrSnp + "_chr" + chrProbe + "\\MAPQG" + qualityCutOff; - String fileToReads = baseName + "\\chr" + chrSnp + "_" + chrProbe + "_1kb.RAWobserved"; -// System.out.println("Reading: " + fileToReads); - if (normMethod == null) { - interestRegions = readRawInterContactInformation(fileToReads, minValue); - } else { - interestRegions = readNormalizedInterContactInformation(fileToReads, baseName, normMethod, chrSnp, chrProbe, resolution, minValue); - } - contactBuffer.put("chr" + chrSnp + "_chr" + chrProbe, interestRegions); - } - } - if (determineContact(posChrSmaller, posChrLarger, interestRegions, getNumericResolution(resolution))) { - outWriter.writeln(eqtl.getRsName() + "\t" + eqtl.getProbe() + "\tContact"); - } else { - outWriter.writeln(eqtl.getRsName() + "\t" + eqtl.getProbe() + "\t-"); - } - } - outWriter.close(); - } + ChrSmaller = chrSnp; + ChrLarger = chrProbe; - static void addAnnotationToQTLOutputLowMem(String in, String inProxies, String folderHighC, String resolution, String qualityCutOff, String normMethod, double minValue, String out) throws IOException { - QTLTextFile eqtlTextFile = new QTLTextFile(in, QTLTextFile.R); - - ArrayList qtls = eqtlTextFile.readList(); - - if (inProxies != null) { - qtls = includeProxyInfo(qtls, inProxies); - } + bin1 = posChrSmaller - (posChrSmaller % getNumericResolution(resolution)); + // System.out.println("\t"+bin1); + //Determine bin2 + bin2 = posChrLarger - (posChrLarger % getNumericResolution(resolution)); - //Here we need to make a new Type to store the potentialy inflated files. - TextFile outWriter = new TextFile(out, TextFile.W); - for (EQTL eqtl : qtls) { - String chrProbe = String.valueOf(eqtl.getProbeChr()); - String chrSnp = String.valueOf(eqtl.getRsChr()); + baseName = folderHighC + resolution + "_resolution_interchromosomal\\chr" + chrSnp + "_chr" + chrProbe + "\\MAPQG" + qualityCutOff; + fileToReads = baseName + "\\chr" + chrSnp + "_" + chrProbe + "_" + resolution + ".RAWobserved"; +// System.out.println("Reading: " + fileToReads); -// System.out.println(chrProbe+"\t"+chrSnp); - if (chrProbe.equals(chrSnp)) { - //Here we need to check how to normalize and treat intra-chromosomal data. - continue; } - int posChrSmaller; - int posChrLarger; - - if (Integer.parseInt(chrProbe) < Integer.parseInt(chrSnp)) { - posChrSmaller = eqtl.getProbeChrPos(); - posChrLarger = eqtl.getRsChrPos(); - String baseName = folderHighC + resolution + "_resolution_interchromosomal\\chr" + chrProbe + "_chr" + chrSnp + "\\MAPQG" + qualityCutOff; - String fileToReads = baseName + "\\chr" + chrProbe + "_" + chrSnp + "_1kb.RAWobserved"; -// System.out.println("Reading: " + fileToReads); - if (normMethod == null) { - if (readRawInterContactInformationLowMem(fileToReads, minValue, posChrSmaller, posChrLarger, resolution)) { - outWriter.writeln(eqtl.getRsName() + "\t" + eqtl.getProbe() + "\tContact"); - } else { - outWriter.writeln(eqtl.getRsName() + "\t" + eqtl.getProbe() + "\t-"); - } + if (normMethod == null) { + if (contactBuffer.containsKey(ChrSmaller + "_" + ChrLarger + "_" + bin1 + "_" + bin2)) { + outWriter.writeln(eqtl.getRsName() + "\t" + eqtl.getProbe() + "\t" + contactBuffer.get(ChrSmaller + "_" + ChrLarger + "_" + bin1 + "_" + bin2)); } else { - if (readNormalizedInterContactInformationLowMem(fileToReads, baseName, normMethod, chrProbe, chrSnp, posChrSmaller, posChrLarger, resolution, minValue)) { + if (readRawInterContactInformation(fileToReads, minValue, bin1, bin2)) { outWriter.writeln(eqtl.getRsName() + "\t" + eqtl.getProbe() + "\tContact"); + contactBuffer.put(ChrSmaller + "_" + ChrLarger + "_" + bin1 + "_" + bin2, "Contact"); } else { outWriter.writeln(eqtl.getRsName() + "\t" + eqtl.getProbe() + "\t-"); + contactBuffer.put(ChrSmaller + "_" + ChrLarger + "_" + bin1 + "_" + bin2, "-"); } } - } else { - posChrSmaller = eqtl.getRsChrPos(); - posChrLarger = eqtl.getProbeChrPos(); - - String baseName = folderHighC + resolution + "_resolution_interchromosomal\\chr" + chrSnp + "_chr" + chrProbe + "\\MAPQG" + qualityCutOff; - String fileToReads = baseName + "\\chr" + chrSnp + "_" + chrProbe + "_1kb.RAWobserved"; -// System.out.println("Reading: " + fileToReads); - if (normMethod == null) { - if (readRawInterContactInformationLowMem(fileToReads, minValue, posChrSmaller, posChrLarger, resolution)) { - outWriter.writeln(eqtl.getRsName() + "\t" + eqtl.getProbe() + "\tContact"); - } else { - outWriter.writeln(eqtl.getRsName() + "\t" + eqtl.getProbe() + "\t-"); - } + if (contactBuffer.containsKey(ChrSmaller + "_" + ChrLarger + "_" + bin1 + "_" + bin2)) { + outWriter.writeln(eqtl.getRsName() + "\t" + eqtl.getProbe() + "\t" + contactBuffer.get(ChrSmaller + "_" + ChrLarger + "_" + bin1 + "_" + bin2)); } else { - if (readNormalizedInterContactInformationLowMem(fileToReads, baseName, normMethod, chrSnp, chrProbe, posChrSmaller, posChrLarger, resolution, minValue)) { + if (readNormalizedInterContactInformation(fileToReads, baseName, normMethod, ChrSmaller, ChrLarger, posChrSmaller, posChrLarger, resolution, minValue)) { outWriter.writeln(eqtl.getRsName() + "\t" + eqtl.getProbe() + "\tContact"); + contactBuffer.put(ChrSmaller + "_" + ChrLarger + "_" + bin1 + "_" + bin2, "Contact"); } else { outWriter.writeln(eqtl.getRsName() + "\t" + eqtl.getProbe() + "\t-"); + contactBuffer.put(ChrSmaller + "_" + ChrLarger + "_" + bin1 + "_" + bin2, "-"); } } } + } outWriter.close(); } @@ -238,125 +186,25 @@ private static ArrayList includeProxyInfo(ArrayList qtls, String inP //For example, here is a line from the 5kb chr1 MAPQGE30 raw observed contact matrix (GM12878_combined/5kb_resolution_intrachromosomal/chr1/MAPQGE30/chr1_5kb.RAWobserved): //40000000 40100000 59.0 - private static LinkedHashSet> readRawInterContactInformation(String fileToReads, double minContactValue) throws IOException { - LinkedHashSet> chrContactInfo = new LinkedHashSet>(); - - BufferedReader input = new BufferedReader(new InputStreamReader(new FileInputStream(fileToReads), "UTF-8")); - - String row; - - while ((row = input.readLine()) != null) { - String[] parts = StringUtils.split(row, '\t'); - - int posChr1 = Integer.parseInt(parts[0]); - int posChr2 = Integer.parseInt(parts[1]); - double contact = Double.parseDouble(parts[2]); - if (contact >= minContactValue) { - chrContactInfo.add(new Pair(posChr1, posChr2)); - } - } - input.close(); - return chrContactInfo; - - } - - //For example, here is a line from the 5kb chr1 MAPQGE30 raw observed contact matrix (GM12878_combined/5kb_resolution_intrachromosomal/chr1/MAPQGE30/chr1_5kb.RAWobserved): - //40000000 40100000 59.0 - //To normalize this entry using the KR normalization vector, one would divide 59.0 by the 8001st line ((40000000/5000)+1=8001) and the 8021st line ((40100000/5000)+1=8021) - //of GM12878_combined/5kb_resolution_intrachromosomal/chr1/MAPQGE30/chr1_5kb.KRnorm. The 8001st line of the KR norm file is 1.2988778370674694; - //The 8021st line of the KR norm file is 1.6080499717941548. So the corresponding KR normalized entry for the entry above is 59.0/(1.2988778370674694*1.6080499717941548) - //or 28.24776973966101. - //If the KR normalization vector file is empty or all NaNs, then the KR algorithm didn’t converge on that particular matrix (likely due to sparsity of the matrix). - private static LinkedHashSet> readNormalizedInterContactInformation(String fileToRead, String baseName, String normMethod, String chrSmaller, String chrLarger, String resolution, double minContactValue) throws IOException { - - //ReadIn normalization chr1 - TextFile inputNormChr1 = new TextFile(baseName + "\\chr" + chrSmaller + "_" + resolution + "." + normMethod, TextFile.R); - ArrayList normFactorSmallerChr = inputNormChr1.readAsArrayList(); - inputNormChr1.close(); - - //ReadIn normalization chr2 - TextFile inputNormChr2 = new TextFile(baseName + "\\chr" + chrLarger + "_" + resolution + "." + normMethod, TextFile.R); - ArrayList normFactorLargerChr = inputNormChr2.readAsArrayList(); - - inputNormChr2.close(); - - LinkedHashSet> chrContactInfo = new LinkedHashSet>(); - - BufferedReader input = new BufferedReader(new InputStreamReader(new FileInputStream(fileToRead), "UTF-8")); - - String row; - - while ((row = input.readLine()) != null) { - String[] parts = StringUtils.split(row, '\t'); - - int posChr1 = Integer.parseInt(parts[0]); - int posChr2 = Integer.parseInt(parts[1]); - - String factor1Base = normFactorSmallerChr.get((posChr1 / getNumericResolution(resolution)) + 1); - String factor2Base = normFactorLargerChr.get((posChr2 / getNumericResolution(resolution)) + 1); - - double factor1; - double factor2; - - if (StringUtils.isNumeric(factor1Base) && StringUtils.isNumeric(factor2Base)) { - factor1 = Double.parseDouble(factor1Base); - factor2 = Double.parseDouble(factor2Base); - - double contact = Double.parseDouble(parts[2]) / (factor1 * factor2); - if (contact >= minContactValue) { - chrContactInfo.add(new Pair(posChr1, posChr2)); - } - - } - } - input.close(); - return chrContactInfo; - } - - private static boolean determineContact(int posChrSmaller, int posChrLarger, LinkedHashSet> interestRegions, int resolution) { - //Determine bin1 - //Starts counting at 0-resulution - int bin1 = posChrSmaller - (posChrSmaller % resolution); - - //Determine bin2 - int bin2 = posChrLarger - (posChrLarger % resolution); - + private static boolean readRawInterContactInformation(String fileToReads, double minValue, int bin1, int bin2) throws IOException { +// System.out.println("\t\t"+fileToReads); +// System.out.println("\t"+bin2); //See if bin1 and bin2 are in the file. - boolean contact = false; + boolean contactFound = false; - for (Pair entry : interestRegions) { - if (entry.getLeft() == bin1) { - if (entry.getRight() == bin2) { - contact = true; - break; - } else if (entry.getRight() > bin2) { - break; - } - } else if (entry.getLeft() > bin1) { - break; - } + //Check if sorted version is available + //If not make sorted available. + if (!Gpio.exists(fileToReads + ".sorted")) { + umcg.genetica.io.chrContacts.SortInterChrContacts.readNonSortedWriteSorted(fileToReads, fileToReads + ".sorted"); } - return contact; - } - - private static boolean readRawInterContactInformationLowMem(String fileToReads, double minValue, int posChrSmaller, int posChrLarger, String resolution) throws IOException { - //Determine bin1 - //Starts counting at 0-resulution - int bin1 = posChrSmaller - (posChrSmaller % getNumericResolution(resolution)); - //Determine bin2 - int bin2 = posChrLarger - (posChrLarger % getNumericResolution(resolution)); - - //See if bin1 and bin2 are in the file. - boolean contactFound = false; - - BufferedReader input = new BufferedReader(new InputStreamReader(new FileInputStream(fileToReads), "UTF-8")); + BufferedReader input = new BufferedReader(new InputStreamReader(new FileInputStream(fileToReads + ".sorted"), "UTF-8")); String row; while ((row = input.readLine()) != null) { String[] parts = StringUtils.split(row, '\t'); - +// System.out.println(row); int posChr1 = Integer.parseInt(parts[0]); if (posChr1 == bin1) { int posChr2 = Integer.parseInt(parts[1]); @@ -372,13 +220,19 @@ private static boolean readRawInterContactInformationLowMem(String fileToReads, } else if (posChr1 > bin1) { break; } - } input.close(); return contactFound; } - private static boolean readNormalizedInterContactInformationLowMem(String fileToRead, String baseName, String normMethod, String chrSmaller, String chrLarger, int posChrSmaller, int posChrLarger, String resolution, double minValue) throws IOException { + //For example, here is a line from the 5kb chr1 MAPQGE30 raw observed contact matrix (GM12878_combined/5kb_resolution_intrachromosomal/chr1/MAPQGE30/chr1_5kb.RAWobserved): + //40000000 40100000 59.0 + //To normalize this entry using the KR normalization vector, one would divide 59.0 by the 8001st line ((40000000/5000)+1=8001) and the 8021st line ((40100000/5000)+1=8021) + //of GM12878_combined/5kb_resolution_intrachromosomal/chr1/MAPQGE30/chr1_5kb.KRnorm. The 8001st line of the KR norm file is 1.2988778370674694; + //The 8021st line of the KR norm file is 1.6080499717941548. So the corresponding KR normalized entry for the entry above is 59.0/(1.2988778370674694*1.6080499717941548) + //or 28.24776973966101. + //If the KR normalization vector file is empty or all NaNs, then the KR algorithm didn’t converge on that particular matrix (likely due to sparsity of the matrix). + private static boolean readNormalizedInterContactInformation(String fileToRead, String baseName, String normMethod, String chrSmaller, String chrLarger, int posChrSmaller, int posChrLarger, String resolution, double minValue) throws IOException { //Determine bin1 //Starts counting at 0-resulution int bin1 = posChrSmaller - (posChrSmaller % getNumericResolution(resolution)); @@ -397,9 +251,11 @@ private static boolean readNormalizedInterContactInformationLowMem(String fileTo inputNormChr2.close(); - LinkedHashSet> chrContactInfo = new LinkedHashSet>(); + if (!Gpio.exists(fileToRead + ".sorted")) { + umcg.genetica.io.chrContacts.SortInterChrContacts.readNonSortedWriteSorted(fileToRead, fileToRead + ".sorted"); + } - BufferedReader input = new BufferedReader(new InputStreamReader(new FileInputStream(fileToRead), "UTF-8")); + BufferedReader input = new BufferedReader(new InputStreamReader(new FileInputStream(fileToRead + ".sorted"), "UTF-8")); String row; @@ -437,7 +293,6 @@ private static boolean readNormalizedInterContactInformationLowMem(String fileTo } else if (posChr1 > bin1) { break; } - } input.close(); return contactFound; From da3d9ec470abf48e493a23cfc4f19b5c6210f8f5 Mon Sep 17 00:00:00 2001 From: Patrick Deelen Date: Wed, 20 May 2015 09:57:45 +0200 Subject: [PATCH 046/143] Proper plink chromosome names when writing --- Genotype-Harmonizer/pom.xml | 2 +- Genotype-IO/pom.xml | 2 + .../plink/BedBimFamGenotypeWriter.java | 2 +- .../genotype/plink/FormatPlinkChr.java | 31 +++++++++ .../genotype/plink/PedMapGenotypeWriter.java | 2 +- ...InteractionAnalysisDetermineDirection.java | 64 +++++++++++++------ 6 files changed, 82 insertions(+), 21 deletions(-) create mode 100644 Genotype-IO/src/main/java/org/molgenis/genotype/plink/FormatPlinkChr.java diff --git a/Genotype-Harmonizer/pom.xml b/Genotype-Harmonizer/pom.xml index a2cc8496a..4f200ecef 100644 --- a/Genotype-Harmonizer/pom.xml +++ b/Genotype-Harmonizer/pom.xml @@ -7,7 +7,7 @@ 4.0.0 Genotype-Harmonizer - 1.4.12-SNAPSHOT + 1.4.13-SNAPSHOT Genotype Harmonizer jar diff --git a/Genotype-IO/pom.xml b/Genotype-IO/pom.xml index da4b14682..d071df3b4 100644 --- a/Genotype-IO/pom.xml +++ b/Genotype-IO/pom.xml @@ -82,6 +82,8 @@ 2.3.2 UTF-8 + 1.7 + 1.7 diff --git a/Genotype-IO/src/main/java/org/molgenis/genotype/plink/BedBimFamGenotypeWriter.java b/Genotype-IO/src/main/java/org/molgenis/genotype/plink/BedBimFamGenotypeWriter.java index a79b9836d..963a9dd2f 100644 --- a/Genotype-IO/src/main/java/org/molgenis/genotype/plink/BedBimFamGenotypeWriter.java +++ b/Genotype-IO/src/main/java/org/molgenis/genotype/plink/BedBimFamGenotypeWriter.java @@ -143,7 +143,7 @@ private void writeBimBedFile(File bimFile, File bedFile) throws IOException { continue; } - bimFileWriter.append(variant.getSequenceName()); + bimFileWriter.append(FormatPlinkChr.formatChr(variant.getSequenceName())); bimFileWriter.append(SEPARATOR); bimFileWriter.append(variant.getPrimaryVariantId() == null ? variant.getSequenceName() + ":" + variant.getStartPos() : variant.getPrimaryVariantId()); bimFileWriter.append(SEPARATOR); diff --git a/Genotype-IO/src/main/java/org/molgenis/genotype/plink/FormatPlinkChr.java b/Genotype-IO/src/main/java/org/molgenis/genotype/plink/FormatPlinkChr.java new file mode 100644 index 000000000..92bcbbe70 --- /dev/null +++ b/Genotype-IO/src/main/java/org/molgenis/genotype/plink/FormatPlinkChr.java @@ -0,0 +1,31 @@ +package org.molgenis.genotype.plink; + +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +/** + * + * @author Patrick Deelen + */ +public class FormatPlinkChr { + + private static final Pattern CHR_PATTERN = Pattern.compile("^chr(.*)$", Pattern.CASE_INSENSITIVE); + + public static String formatChr(String chrName){ + + Matcher chrMatcher = CHR_PATTERN.matcher(chrName); + if (chrMatcher.find()) { + chrName = chrMatcher.group(1); + } + + switch(chrName){ + case "X": return "23"; + case "Y": return "24"; + case "XY": return "25"; + case "MT": return "26"; + default: return chrName; + } + + } + +} diff --git a/Genotype-IO/src/main/java/org/molgenis/genotype/plink/PedMapGenotypeWriter.java b/Genotype-IO/src/main/java/org/molgenis/genotype/plink/PedMapGenotypeWriter.java index d6c091cb7..d5ec45c7b 100644 --- a/Genotype-IO/src/main/java/org/molgenis/genotype/plink/PedMapGenotypeWriter.java +++ b/Genotype-IO/src/main/java/org/molgenis/genotype/plink/PedMapGenotypeWriter.java @@ -76,7 +76,7 @@ private void writeMapFile(File mapFile) throws IOException { continue; } - mapFileWriter.append(variant.getSequenceName()); + mapFileWriter.append(FormatPlinkChr.formatChr(variant.getSequenceName())); mapFileWriter.append(SEPARATOR); mapFileWriter.append(variant.getPrimaryVariantId() == null ? variant.getSequenceName() + ":" + variant.getStartPos() : variant.getPrimaryVariantId()); mapFileWriter.append(SEPARATOR); diff --git a/eqtl-mapping-pipeline/src/main/java/eqtlmappingpipeline/interactionanalysis/InteractionAnalysisDetermineDirection.java b/eqtl-mapping-pipeline/src/main/java/eqtlmappingpipeline/interactionanalysis/InteractionAnalysisDetermineDirection.java index f52ad47d5..3b4f520c6 100644 --- a/eqtl-mapping-pipeline/src/main/java/eqtlmappingpipeline/interactionanalysis/InteractionAnalysisDetermineDirection.java +++ b/eqtl-mapping-pipeline/src/main/java/eqtlmappingpipeline/interactionanalysis/InteractionAnalysisDetermineDirection.java @@ -111,7 +111,7 @@ public class InteractionAnalysisDetermineDirection { OptionBuilder.withLongOpt("query"); OptionBuilder.isRequired(); OPTIONS.addOption(OptionBuilder.create("q")); - + OptionBuilder.withArgName("path"); OptionBuilder.hasArg(); OptionBuilder.withDescription("Output file"); @@ -223,27 +223,36 @@ public static void main(String[] args) throws IOException { CSVReader reader = new CSVReader(new FileReader(queryPath), '\t', '\0', 1); CSVWriter writer = new CSVWriter(new FileWriter(outputPath), '\t', CSVWriter.NO_QUOTE_CHARACTER); - - String[] outputLine = new String[5]; - - + + String[] outputLine = new String[6]; + int c = 0; + outputLine[c++] = "variant"; + outputLine[c++] = "gene"; + outputLine[c++] = "covariate"; + outputLine[c++] = "assessedAllele"; + outputLine[c++] = "rhoLow"; + outputLine[c++] = "rhoHigh"; + writer.writeNext(outputLine); + String[] nextLine; while ((nextLine = reader.readNext()) != null) { + final String variant = nextLine[0]; final String gene = nextLine[1]; final String covariate = nextLine[2]; final Allele assessedAllele = Allele.create(nextLine[3]); - - final double direction = directionTool.calculateEffectDifference(variant, gene, covariate, assessedAllele, fractionToUse); - - int c = 0; + + final EffectDiffResult effectDiff = directionTool.calculateEffectDifference(variant, gene, covariate, assessedAllele, fractionToUse); + + c = 0; outputLine[c++] = variant; outputLine[c++] = gene; outputLine[c++] = covariate; outputLine[c++] = assessedAllele.getAlleleAsString(); - outputLine[c++] = String.valueOf(direction); + outputLine[c++] = String.valueOf(effectDiff.getRhoLow()); + outputLine[c++] = String.valueOf(effectDiff.getRhoHigh()); writer.writeNext(outputLine); - + } writer.close(); reader.close(); @@ -301,18 +310,18 @@ public InteractionAnalysisDetermineDirection(RandomAccessGenotypeData genotypeDa } - public double calculateEffectDifference(String snpId, String geneName, String covariateName, Allele assessedAllele, double fractionOfSamplesPerGroup) { + public EffectDiffResult calculateEffectDifference(String snpId, String geneName, String covariateName, Allele assessedAllele, double fractionOfSamplesPerGroup) { if (!variantIdMap.containsKey(snpId)) { - return Double.NaN; + return new EffectDiffResult(Double.NaN, Double.NaN); } if (!expressionData.containsRow(geneName)) { - return Double.NaN; + return new EffectDiffResult(Double.NaN, Double.NaN); } if (!covariatesData.containsRow(covariateName)) { - return Double.NaN; + return new EffectDiffResult(Double.NaN, Double.NaN); } if (fractionOfSamplesPerGroup <= 0 || fractionOfSamplesPerGroup >= 1) { @@ -323,11 +332,11 @@ public double calculateEffectDifference(String snpId, String geneName, String co Alleles variantAlleles = variant.getVariantAlleles(); if (!variantAlleles.contains(assessedAllele)) { - return Double.NaN; + return new EffectDiffResult(Double.NaN, Double.NaN); } if (variantAlleles.getAlleleCount() != 2) { - return Double.NaN; + return new EffectDiffResult(Double.NaN, Double.NaN); } float[] dosagesAll = variant.getSampleDosages(); @@ -386,7 +395,26 @@ public double calculateEffectDifference(String snpId, String geneName, String co System.out.println("rho low:" + rhoLow); System.out.println("rho high:" + rhoHigh); - return rhoHigh - rhoLow; + return new EffectDiffResult(rhoLow, rhoHigh); } + + static class EffectDiffResult { + + private final double rhoLow; + private final double rhoHigh; + + public EffectDiffResult(double rhoLow, double rhoHigh) { + this.rhoLow = rhoLow; + this.rhoHigh = rhoHigh; + } + + public double getRhoLow() { + return rhoLow; + } + + public double getRhoHigh() { + return rhoHigh; + } + } } From ccfba958af43bf67301185209e9d8d00d3aeb527 Mon Sep 17 00:00:00 2001 From: Bonder-MJ Date: Wed, 20 May 2015 22:39:01 +0200 Subject: [PATCH 047/143] test with other ranker --- .../java/umcg/genetica/util/RankArray.java | 103 +++++++++--------- 1 file changed, 50 insertions(+), 53 deletions(-) diff --git a/genetica-libraries/src/main/java/umcg/genetica/util/RankArray.java b/genetica-libraries/src/main/java/umcg/genetica/util/RankArray.java index 33aa00a1a..0dc405b15 100644 --- a/genetica-libraries/src/main/java/umcg/genetica/util/RankArray.java +++ b/genetica-libraries/src/main/java/umcg/genetica/util/RankArray.java @@ -7,8 +7,12 @@ import cern.colt.GenericSorting; import java.util.HashSet; import org.apache.commons.collections.primitives.ArrayDoubleList; +import org.apache.commons.math3.stat.ranking.RankingAlgorithm; import cern.colt.function.tint.IntComparator; import cern.colt.Swapper; +import org.apache.commons.math3.stat.ranking.NaNStrategy; +import org.apache.commons.math3.stat.ranking.NaturalRanking; +import org.apache.commons.math3.stat.ranking.TiesStrategy; /** * @@ -38,7 +42,8 @@ public class RankArray { // } // return dranks; // } - + private static final RankingAlgorithm COV_RANKER_TIE = new NaturalRanking(NaNStrategy.FAILED, TiesStrategy.AVERAGE); + private static final RankingAlgorithm COV_RANKER = new NaturalRanking(NaNStrategy.FAILED, TiesStrategy.SEQUENTIAL); public double[] xdouble = null; public int[] ydouble = null; public Swapper swapperdouble = null; @@ -93,24 +98,16 @@ public int compare(int a, int b) { } public double[] rank(double[] x, boolean giveTiesSameRank) { - - this.xdouble = x.clone(); - ydouble = new int[x.length]; - for (int v = 0; v < x.length; v++) { - ydouble[v] = v; - } - GenericSorting.quickSort(0, x.length, compdouble, swapperdouble); - double[] rank = new double[x.length]; - for (int v = 0; v < x.length; v++) { - rank[ydouble[v]] = v; - } - + double[] rank; if (!giveTiesSameRank) { - return rank; + rank = COV_RANKER.rank(x); } else { - fixTiesDouble(rank, x); - return rank; + rank = COV_RANKER_TIE.rank(x); } + for (int v = 0; v < rank.length; v++) { + rank[v] = rank[v]-1; + } + return rank; } public float[] rank(float[] x, boolean giveTiesSameRank) { @@ -133,43 +130,43 @@ public float[] rank(float[] x, boolean giveTiesSameRank) { } } - private void fixTiesDouble(double[] rank, double[] x) { - HashSet fixedValues = new HashSet(); - - for(int i=0; i fixedValues = new HashSet(); +// +// for(int i=0; i fixedValues = new HashSet(); From ccf498e54f3878dd594e743dc1572b58ba2e3b3e Mon Sep 17 00:00:00 2001 From: Marc Jan Bonder Date: Fri, 22 May 2015 09:52:41 +0200 Subject: [PATCH 048/143] Speed up QN using math3 natural rank --- .../math/stats/QuantileNormalization.java | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/genetica-libraries/src/main/java/umcg/genetica/math/stats/QuantileNormalization.java b/genetica-libraries/src/main/java/umcg/genetica/math/stats/QuantileNormalization.java index 396513bad..0b2bf4af0 100644 --- a/genetica-libraries/src/main/java/umcg/genetica/math/stats/QuantileNormalization.java +++ b/genetica-libraries/src/main/java/umcg/genetica/math/stats/QuantileNormalization.java @@ -7,15 +7,20 @@ import java.util.ArrayList; import java.util.Arrays; import org.apache.commons.collections.primitives.ArrayDoubleList; +import org.apache.commons.math3.stat.ranking.NaNStrategy; +import org.apache.commons.math3.stat.ranking.NaturalRanking; +import org.apache.commons.math3.stat.ranking.RankingAlgorithm; +import org.apache.commons.math3.stat.ranking.TiesStrategy; import umcg.genetica.math.matrix.DoubleMatrixDataset; import umcg.genetica.util.RankArray; + /** * * @author Harm Jan & Marc Jan Bonder */ public class QuantileNormalization { - + private static final RankingAlgorithm COV_RANKER_TIE = new NaturalRanking(NaNStrategy.FAILED, TiesStrategy.AVERAGE); /** * Quantile normalize a double[][] double[probes][sample] * @@ -53,25 +58,24 @@ public static void quantilenormalize(double[][] rawData) { rankedMeanClasses[probeID] = ((rankedMean[probeID]+rankedMean[probeID+1])/2); } - RankArray rda = new RankArray(); //Iterate through each sample: for (int s = 0; s < sampleCount; s++) { double[] probes = new double[probeCount]; for (int p = 0; p < probeCount; p++) { probes[p] = rawData[p][s]; } - double[] probesRanked = rda.rank(probes, true); + double[] probesRanked = COV_RANKER_TIE.rank(probes); double[] probesQuantileNormalized = new double[probeCount]; for (int p = 0; p < probeCount; p++) { if((probesRanked[p]%1)!=0){ - probesQuantileNormalized[p] = rankedMeanClasses[(int)Math.floor(probesRanked[p])]; - rawData[p][s] = probesQuantileNormalized[p]; + probesQuantileNormalized[p] = rankedMeanClasses[(int)Math.floor((probesRanked[p]-1))]; } else { - probesQuantileNormalized[p] = rankedMean[(int) probesRanked[p]]; - rawData[p][s] = probesQuantileNormalized[p]; + probesQuantileNormalized[p] = rankedMean[(int) (probesRanked[p]-1)]; } + + rawData[p][s] = probesQuantileNormalized[p]; } // double[] probesRankedAfterQQNorm = rda.rank(probesQuantileNormalized, false); From 2c785f880c72326a6a8045d5e6309fccfe9bada6 Mon Sep 17 00:00:00 2001 From: Marc Jan Bonder Date: Fri, 22 May 2015 09:52:56 +0200 Subject: [PATCH 049/143] Depricated Rank array due to speed --- .../src/main/java/umcg/genetica/util/RankArray.java | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/genetica-libraries/src/main/java/umcg/genetica/util/RankArray.java b/genetica-libraries/src/main/java/umcg/genetica/util/RankArray.java index 0dc405b15..f97f57a9b 100644 --- a/genetica-libraries/src/main/java/umcg/genetica/util/RankArray.java +++ b/genetica-libraries/src/main/java/umcg/genetica/util/RankArray.java @@ -18,6 +18,12 @@ * * @author harmjan */ + +//Please use the math3 natural ranker, especialy in tie resolving it is much faster! +//Note we can also chose to make this depend on natural ranker and do some more speed tweaks, no NaN strategy and only one tie fixer. +//Also make it directly 0 based in this case! + +@Deprecated public class RankArray { // public static double[] rank(double[] x){ // umcg.genetica.util.Rank rank = new umcg.genetica.util.Rank(x, 0d); From 3d8248c5a07e0bd11d2eb3e33fc67b25de14b287 Mon Sep 17 00:00:00 2001 From: Marc Jan Bonder Date: Fri, 22 May 2015 09:53:02 +0200 Subject: [PATCH 050/143] Bump --- eqtl-mapping-pipeline/pom.xml | 4 ++-- genetica-libraries/pom.xml | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/eqtl-mapping-pipeline/pom.xml b/eqtl-mapping-pipeline/pom.xml index db8281179..47279a84f 100644 --- a/eqtl-mapping-pipeline/pom.xml +++ b/eqtl-mapping-pipeline/pom.xml @@ -7,14 +7,14 @@ 1.0.2-SNAPSHOT eqtl-mapping-pipeline - 1.3.4-SNAPSHOT + 1.3.5-SNAPSHOT jar 4.0.0 nl.systemsgenetics genetica-libraries - 1.0.6-SNAPSHOT + 1.0.7-SNAPSHOT log4j diff --git a/genetica-libraries/pom.xml b/genetica-libraries/pom.xml index 06700db71..e2ae6e6c0 100644 --- a/genetica-libraries/pom.xml +++ b/genetica-libraries/pom.xml @@ -7,7 +7,7 @@ 1.0.2-SNAPSHOT genetica-libraries - 1.0.6-SNAPSHOT + 1.0.7-SNAPSHOT jar 4.0.0 From 354906ae5e4f2ca42709f7318e930027141e1604 Mon Sep 17 00:00:00 2001 From: Patrick Deelen Date: Fri, 22 May 2015 13:35:50 +0200 Subject: [PATCH 051/143] Minor fix normalizer --- .../eqtlmappingpipeline/normalization/Normalizer.java | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/eqtl-mapping-pipeline/src/main/java/eqtlmappingpipeline/normalization/Normalizer.java b/eqtl-mapping-pipeline/src/main/java/eqtlmappingpipeline/normalization/Normalizer.java index 6243337d4..0e59f10e6 100644 --- a/eqtl-mapping-pipeline/src/main/java/eqtlmappingpipeline/normalization/Normalizer.java +++ b/eqtl-mapping-pipeline/src/main/java/eqtlmappingpipeline/normalization/Normalizer.java @@ -407,12 +407,14 @@ public String adjustCovariates(DoubleMatrixDataset traitData, St } } - DoubleMatrixDataset datasetNormalized = new DoubleMatrixDataset(rawdata, traitData.rowObjects, traitData.colObjects); - fileNamePrefix += ".CovariatesRemoved"; - datasetNormalized.save(fileNamePrefix + ".txt.gz"); - traitData.rawData = rawdata; + + //Why was this done??????? + //DoubleMatrixDataset datasetNormalized = new DoubleMatrixDataset(rawdata, traitData.rowObjects, traitData.colObjects); + fileNamePrefix += ".CovariatesRemoved"; + traitData.save(fileNamePrefix + ".txt.gz"); + return fileNamePrefix; } From 215f70dbd88eb3262f1a516b3bac0d24bcb32353 Mon Sep 17 00:00:00 2001 From: Patrick Deelen Date: Mon, 1 Jun 2015 09:43:17 +0200 Subject: [PATCH 052/143] covariate importance gene filter --- .../CovariateImportance.java | 35 +++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/eqtl-mapping-pipeline/src/main/java/eqtlmappingpipeline/binaryInteraction/CovariateImportance.java b/eqtl-mapping-pipeline/src/main/java/eqtlmappingpipeline/binaryInteraction/CovariateImportance.java index 32576274d..48d0b130f 100644 --- a/eqtl-mapping-pipeline/src/main/java/eqtlmappingpipeline/binaryInteraction/CovariateImportance.java +++ b/eqtl-mapping-pipeline/src/main/java/eqtlmappingpipeline/binaryInteraction/CovariateImportance.java @@ -55,6 +55,12 @@ public class CovariateImportance { OptionBuilder.withDescription("File with covariates to include in analysis"); OptionBuilder.withLongOpt("covariats"); OPTIONS.addOption(OptionBuilder.create("c")); + + OptionBuilder.withArgName("path"); + OptionBuilder.hasArg(); + OptionBuilder.withDescription("File with eQTL genes to include in analysis"); + OptionBuilder.withLongOpt("genes"); + OPTIONS.addOption(OptionBuilder.create("g")); } @@ -66,6 +72,7 @@ public static void main(String[] args) throws FileNotFoundException, IOException final File inputInteractionFile; final File outputFile; final File covariatesToIncludeFile; + final File genesToIncludeFile; try { final CommandLine commandLine = new PosixParser().parse(OPTIONS, args, false); @@ -78,6 +85,12 @@ public static void main(String[] args) throws FileNotFoundException, IOException } else { covariatesToIncludeFile = null; } + + if (commandLine.hasOption("g")) { + genesToIncludeFile = new File(commandLine.getOptionValue("g")); + } else { + genesToIncludeFile = null; + } } catch (ParseException ex) { System.err.println("Invalid command line arguments: "); @@ -93,6 +106,23 @@ public static void main(String[] args) throws FileNotFoundException, IOException if (covariatesToIncludeFile != null) { System.out.println("Covariates to include: " + covariatesToIncludeFile.getAbsolutePath()); } + if (genesToIncludeFile != null) { + System.out.println("eQTL genes to include: " + genesToIncludeFile.getAbsolutePath()); + } + + final HashSet genesToInclude; + if (genesToIncludeFile != null) { + genesToInclude = new HashSet(); + BufferedReader reader = new BufferedReader(new InputStreamReader(new FileInputStream(genesToIncludeFile), "UTF-8")); + String line; + while ((line = reader.readLine()) != null) { + genesToInclude.add(line.trim()); + } + System.out.println("eQTL genes included: " + genesToInclude.size()); + System.out.println(""); + } else { + genesToInclude = null; + } final HashSet covariantsToInclude; if (covariatesToIncludeFile != null) { @@ -118,9 +148,14 @@ public static void main(String[] args) throws FileNotFoundException, IOException String variantName = variant.getName(); int[] genePointers = variant.getGenePointers(); + genes: for (int genePointer : genePointers) { BinaryInteractionGene gene = inputFile.getGene(genePointer); + + if (genesToInclude != null && !genesToInclude.contains(gene.getName())) { + continue genes; + } covariates: for (Iterator iterator = inputFile.readVariantGeneResults(variantName, gene.getName()); iterator.hasNext();) { From 4528a39b546e1740b95fb45ca55b5667b30312d5 Mon Sep 17 00:00:00 2001 From: Patrick Deelen Date: Mon, 15 Jun 2015 20:38:17 +0200 Subject: [PATCH 053/143] Improvements GH --- Genotype-Harmonizer/nb-configuration.xml | 9 ++ Genotype-Harmonizer/pom.xml | 11 ++- .../deelenp/genotypeharmonizer/Aligner.java | 83 +++++++------------ .../GenotypeHarmonizerParamaters.java | 2 +- .../GenotypeHarmonizerTest.java | 12 +-- ...RandomAccessGenotypeDataReaderFormats.java | 4 + 6 files changed, 59 insertions(+), 62 deletions(-) diff --git a/Genotype-Harmonizer/nb-configuration.xml b/Genotype-Harmonizer/nb-configuration.xml index 5f8c56a87..4c7dcce1c 100644 --- a/Genotype-Harmonizer/nb-configuration.xml +++ b/Genotype-Harmonizer/nb-configuration.xml @@ -12,4 +12,13 @@ Without this configuration present, some functionality in the IDE may be limited + + + JDK_1.7 + diff --git a/Genotype-Harmonizer/pom.xml b/Genotype-Harmonizer/pom.xml index 4f200ecef..472f3d33e 100644 --- a/Genotype-Harmonizer/pom.xml +++ b/Genotype-Harmonizer/pom.xml @@ -7,7 +7,7 @@ 4.0.0 Genotype-Harmonizer - 1.4.13-SNAPSHOT + 1.4.15 Genotype Harmonizer jar @@ -108,6 +108,15 @@ + + org.apache.maven.plugins + maven-compiler-plugin + 2.3.2 + + 1.7 + 1.7 + + diff --git a/Genotype-Harmonizer/src/main/java/nl/umcg/deelenp/genotypeharmonizer/Aligner.java b/Genotype-Harmonizer/src/main/java/nl/umcg/deelenp/genotypeharmonizer/Aligner.java index 27153622f..1a68bf6cb 100644 --- a/Genotype-Harmonizer/src/main/java/nl/umcg/deelenp/genotypeharmonizer/Aligner.java +++ b/Genotype-Harmonizer/src/main/java/nl/umcg/deelenp/genotypeharmonizer/Aligner.java @@ -1,7 +1,6 @@ package nl.umcg.deelenp.genotypeharmonizer; import static JSci.maths.ArrayMath.covariance; -import static JSci.maths.ArrayMath.variance; import com.google.common.collect.Lists; import java.io.BufferedWriter; import java.io.File; @@ -69,21 +68,21 @@ public ModifiableGenotypeData alignToRef(RandomAccessGenotypeData study, RandomA //In this loop we filter the variants present in the reference and swap the AG, AC, TC, TG SNPs. studyVariants: for (ModifiableGeneticVariant studyVariant : aligendStudyData.getModifiableGeneticVariants()) { - + ++iterationCounter; if (iterationCounter % 10000 == 0) { //LOGGER.info("Iteration 1 - " + GenotypeHarmonizer.DEFAULT_NUMBER_FORMATTER.format(iterationCounter) + " variants processed"); System.out.println("Iteration 1 - " + GenotypeHarmonizer.DEFAULT_NUMBER_FORMATTER.format(iterationCounter) + " variants processed"); } - + if (!studyVariant.isMapped()) { snpLogWriter.addToLog(studyVariant, SnpLogWriter.Actions.EXCLUDED, "No mapping"); studyVariant.exclude(); continue studyVariants; } - - if(studyVariant.getStartPos() == 0){ + + if (studyVariant.getStartPos() == 0) { snpLogWriter.addToLog(studyVariant, SnpLogWriter.Actions.EXCLUDED, "No mapping"); studyVariant.exclude(); continue studyVariants; @@ -176,19 +175,19 @@ public ModifiableGenotypeData alignToRef(RandomAccessGenotypeData study, RandomA //If we get here we have found a variant is our reference data on the same position with comparable alleles. - //We have to exclude maf of zero otherwise we cannot do LD calculation - if (!(studyVariant.getMinorAlleleFrequency() > 0)) { - snpLogWriter.addToLog(studyVariant, SnpLogWriter.Actions.EXCLUDED, "MAF of 0 in study data"); - studyVariant.exclude(); - continue studyVariants; - } - - //We have to exclude maf of zero otherwise we can not do LD calculation - if (!(refVariant.getMinorAlleleFrequency() > 0)) { - snpLogWriter.addToLog(studyVariant, SnpLogWriter.Actions.EXCLUDED, "MAF of 0 in reference data"); - studyVariant.exclude(); - continue studyVariants; - } +// //We have to exclude maf of zero otherwise we cannot do LD calculation +// if (!(studyVariant.getMinorAlleleFrequency() > 0)) { +// snpLogWriter.addToLog(studyVariant, SnpLogWriter.Actions.EXCLUDED, "MAF of 0 in study data"); +// studyVariant.exclude(); +// continue studyVariants; +// } +// +// //We have to exclude maf of zero otherwise we can not do LD calculation +// if (!(refVariant.getMinorAlleleFrequency() > 0)) { +// snpLogWriter.addToLog(studyVariant, SnpLogWriter.Actions.EXCLUDED, "MAF of 0 in reference data"); +// studyVariant.exclude(); +// continue studyVariants; +// } @@ -238,8 +237,8 @@ public ModifiableGenotypeData alignToRef(RandomAccessGenotypeData study, RandomA if (updateId) { snpUpdateWriter.close(); } - - if(iterationCounter == 0){ + + if (iterationCounter == 0) { throw new GenotypeAlignmentException("No variants where found in the input genotype data. Please check your variant filter options"); } @@ -283,7 +282,7 @@ public ModifiableGenotypeData alignToRef(RandomAccessGenotypeData study, RandomA if (!studyVariant.isAtOrGcSnp()) { //Correlate the haps with both these snps between study and ref - correlationResults hapCor = correlateHaplotypes(minLdToIncludeAlign, + CorrelationResults hapCor = correlateHaplotypes(minLdToIncludeAlign, flankSnpsToConsider, studyVariantList, refVariantList, variantIndex, studyVariant, refVariant); @@ -345,7 +344,7 @@ public ModifiableGenotypeData alignToRef(RandomAccessGenotypeData study, RandomA ++GcAtSnpsEncountered; //Correlate the haps with both these snps between study and ref - correlationResults hapCor = correlateHaplotypes(minLdToIncludeAlign, + CorrelationResults hapCor = correlateHaplotypes(minLdToIncludeAlign, flankSnpsToConsider, studyVariantList, refVariantList, variantIndex, studyVariant, refVariant); @@ -396,7 +395,7 @@ public ModifiableGenotypeData alignToRef(RandomAccessGenotypeData study, RandomA //Ld pattern should be okay now. but we are going to do the extra check //Correlate the haps with both these snps between study and ref - correlationResults hapCorSwapped = correlateHaplotypes(minLdToIncludeAlign, + CorrelationResults hapCorSwapped = correlateHaplotypes(minLdToIncludeAlign, flankSnpsToConsider, studyVariantList, refVariantList, variantIndex, studyVariant, refVariant); @@ -445,7 +444,7 @@ public ModifiableGenotypeData alignToRef(RandomAccessGenotypeData study, RandomA } - private correlationResults correlateHaplotypes(double minLdToIncludeAlignBase, + private CorrelationResults correlateHaplotypes(double minLdToIncludeAlignBase, int flankSnpsToConsider, ArrayList studyVariantList, ArrayList refVariantList, int variantIndex, @@ -488,36 +487,12 @@ private correlationResults correlateHaplotypes(double minLdToIncludeAlignBase, ldStudy = LdCalculator.calculateLd(snpStudyVariant, otherSnpStudyVariant); ldRef = LdCalculator.calculateLd(refVariant, otherRefVariant); } catch (LdCalculatorException e) { - LOGGER.warn("Error in LD calculation, skipping this comparison when comparing haplotype structure. Following error occurred: " + e.getMessage()); + LOGGER.debug("Error in LD calculation, skipping this comparison when comparing haplotype structure. Following error occurred: " + e.getMessage()); continue; } -// if(snpStudyVariant.getPrimaryVariantId().equals("rs1001945")){ -// LOGGER.debug(" * Other variant: " + otherSnpStudyVariant.getPrimaryVariantId() + -// "\nstudy alleles: " + otherSnpStudyVariant.getVariantAlleles() + " ref alleles: " + otherRefVariant.getVariantAlleles() + "\n" -// + "maf study: " + otherSnpStudyVariant.getMinorAlleleFrequency() + "(" + otherSnpStudyVariant.getMinorAllele() + ") maf ref: " + otherRefVariant.getMinorAlleleFrequency() + "(" + otherRefVariant.getMinorAllele() + ")\n" + -// "LD study, R2: " + ldStudy.getR2() + " D': " + ldStudy.getDPrime() + "\n" + -// "LD ref, R2: " + ldRef.getR2() + " D': " + ldRef.getDPrime() + "\n"); -// -// -// StringBuilder s = new StringBuilder(); -// for(byte b : snpStudyVariant.getSampleCalledDosages()){ -// s.append(b); -// } -// LOGGER.debug(s); -// -// s = new StringBuilder(); -// for(byte b : otherSnpStudyVariant.getSampleCalledDosages()){ -// s.append(b); -// } -// LOGGER.debug(s); -// -// -// -// -// } //only use SNPs with min R2 in both study as ref - if (ldStudy.getR2() >= minLdToIncludeAlignBase && ldRef.getR2() >= minLdToIncludeAlignBase) { + if ( !Double.isNaN(ldStudy.getR2()) && !Double.isNaN(ldRef.getR2()) && ldStudy.getR2() >= minLdToIncludeAlignBase && ldRef.getR2() >= minLdToIncludeAlignBase) { //Put in tree map to sort haplotypes. This can differ in the case of different reference allele TreeMap studyHapFreq = new TreeMap(ldStudy.getHaplotypesFreq()); @@ -542,14 +517,14 @@ private correlationResults correlateHaplotypes(double minLdToIncludeAlignBase, ++posCor; } - } + } } } - return new correlationResults(posCor, negCor); + return new CorrelationResults(posCor, negCor); } private double[] createDoubleArrayFromCollection( @@ -567,12 +542,12 @@ private double[] createDoubleArrayFromCollection( return array; } - private static class correlationResults { + private static class CorrelationResults { private final int posCor; private final int negCor; - public correlationResults(int posCor, int negCor) { + public CorrelationResults(int posCor, int negCor) { super(); this.posCor = posCor; this.negCor = negCor; diff --git a/Genotype-Harmonizer/src/main/java/nl/umcg/deelenp/genotypeharmonizer/GenotypeHarmonizerParamaters.java b/Genotype-Harmonizer/src/main/java/nl/umcg/deelenp/genotypeharmonizer/GenotypeHarmonizerParamaters.java index 29d70adf1..8ce8b3cdc 100644 --- a/Genotype-Harmonizer/src/main/java/nl/umcg/deelenp/genotypeharmonizer/GenotypeHarmonizerParamaters.java +++ b/Genotype-Harmonizer/src/main/java/nl/umcg/deelenp/genotypeharmonizer/GenotypeHarmonizerParamaters.java @@ -282,7 +282,7 @@ public GenotypeHarmonizerParamaters(String... args) throws ParseException { try { if (commandLine.hasOption('I')) { - inputType = RandomAccessGenotypeDataReaderFormats.valueOf(commandLine.getOptionValue('I').toUpperCase()); + inputType = RandomAccessGenotypeDataReaderFormats.valueOfSmart(commandLine.getOptionValue('I').toUpperCase()); } else { if (inputBasePaths[0].endsWith(".vcf")) { throw new ParseException("Only vcf.gz is supported. Please see manual on how to do create a vcf.gz file."); diff --git a/Genotype-Harmonizer/src/test/java/nl/umcg/deelenp/genotypeharmonizer/GenotypeHarmonizerTest.java b/Genotype-Harmonizer/src/test/java/nl/umcg/deelenp/genotypeharmonizer/GenotypeHarmonizerTest.java index 4fc4fa4c7..a4d338bac 100644 --- a/Genotype-Harmonizer/src/test/java/nl/umcg/deelenp/genotypeharmonizer/GenotypeHarmonizerTest.java +++ b/Genotype-Harmonizer/src/test/java/nl/umcg/deelenp/genotypeharmonizer/GenotypeHarmonizerTest.java @@ -115,7 +115,7 @@ public void testMain() throws Exception { } - assertEquals(variantCounter, 3745); + assertEquals(variantCounter, 3747); //Check if ID is updated based on 1000G assertEquals(aligenedHapmap3Data.getSnpVariantByPos("20", 809930).getPrimaryVariantId(), "rs78472400"); @@ -183,7 +183,7 @@ public void testMain2() throws Exception { } - assertEquals(variantCounter, 4086); + assertEquals(variantCounter, 4088); //Check if number of samples is correct assertEquals(aligenedHapmap3Data.getSamples().size(), 165); @@ -252,7 +252,7 @@ public void testMain3() throws Exception { } - assertEquals(variantCounter, 4086); + assertEquals(variantCounter, 4088); //Check if ID is updated based on 1000G assertEquals(aligenedHapmap3Data.getSnpVariantByPos("20", 809930).getPrimaryVariantId(), "rs78472400"); @@ -358,7 +358,7 @@ public void testMain5() throws Exception { } - assertEquals(variantCounter, 3778); + assertEquals(variantCounter, 3780); //Check if ID is updated based on 1000G assertEquals(aligenedHapmap3Data.getSnpVariantByPos("20", 809930).getPrimaryVariantId(), "rs78472400"); @@ -423,7 +423,7 @@ public void testMain6() throws Exception { } - assertEquals(variantCounter, 3745); + assertEquals(variantCounter, 3747); //Check if ID is updated based on 1000G assertEquals(aligenedHapmap3Data.getSnpVariantByPos("20", 809930).getPrimaryVariantId(), "rs78472400"); @@ -486,7 +486,7 @@ public void testMain7() throws Exception { } - assertEquals(variantCounter, 4078); + assertEquals(variantCounter, 4087); //Check if number of samples is correct assertEquals(aligenedHapmap3Data.getSamples().size(), 155); diff --git a/Genotype-IO/src/main/java/org/molgenis/genotype/RandomAccessGenotypeDataReaderFormats.java b/Genotype-IO/src/main/java/org/molgenis/genotype/RandomAccessGenotypeDataReaderFormats.java index 37c1ef10d..bafbfd8bb 100644 --- a/Genotype-IO/src/main/java/org/molgenis/genotype/RandomAccessGenotypeDataReaderFormats.java +++ b/Genotype-IO/src/main/java/org/molgenis/genotype/RandomAccessGenotypeDataReaderFormats.java @@ -310,6 +310,10 @@ public static RandomAccessGenotypeDataReaderFormats valueOfSmart(String value){ return PLINK_BED; } else if (value.equals("B_PLINK")){ return PLINK_BED; + } else if (value.equals("PLINKB")){ + return PLINK_BED; + } else if (value.equals("PLINK_B")){ + return PLINK_BED; } return RandomAccessGenotypeDataReaderFormats.valueOf(value); From 3073f8a736fa7e282373adfcccd518ca983842b1 Mon Sep 17 00:00:00 2001 From: Patrick Deelen Date: Tue, 30 Jun 2015 11:10:23 +0200 Subject: [PATCH 054/143] Added lude interaction analysis --- .gitignore | 8 +- Genotype-Harmonizer/pom.xml | 2 +- .../trityper/TriTyperGenotypeData.java | 11 +- eQTLInteractionAnalyser/pom.xml | 81 ++ .../DoubleArrayIntegerObject.java | 21 + .../DoubleArrayIntegerObjectSorter.java | 29 + .../EQTLInteractionAnalyser.java | 26 + .../ExpressionDataset.java | 580 ++++++++++++ ...ormInteractionAnalysisPermutationTask.java | 77 ++ .../StringIntegerObject.java | 21 + .../StringIntegerObjectSorter.java | 36 + .../TestEQTLDatasetForInteractions.java | 878 ++++++++++++++++++ .../eqtlinteractionanalyser/VectorSorter.java | 129 +++ .../eqtlinteractionanalyser/AppTest.java | 38 + .../QueryBinaryInteraction.java | 1 + pom.xml | 3 +- 16 files changed, 1937 insertions(+), 4 deletions(-) create mode 100644 eQTLInteractionAnalyser/pom.xml create mode 100644 eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/DoubleArrayIntegerObject.java create mode 100644 eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/DoubleArrayIntegerObjectSorter.java create mode 100644 eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/EQTLInteractionAnalyser.java create mode 100644 eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/ExpressionDataset.java create mode 100644 eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/PerformInteractionAnalysisPermutationTask.java create mode 100644 eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/StringIntegerObject.java create mode 100644 eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/StringIntegerObjectSorter.java create mode 100644 eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/TestEQTLDatasetForInteractions.java create mode 100644 eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/VectorSorter.java create mode 100644 eQTLInteractionAnalyser/src/test/java/nl/systemsgenetics/eqtlinteractionanalyser/AppTest.java diff --git a/.gitignore b/.gitignore index 3aef1432f..f98d6fb6f 100644 --- a/.gitignore +++ b/.gitignore @@ -20,4 +20,10 @@ nbactions*.xml nb-configuration.xml -eqtl-mapping-pipeline/nb-configuration.xml \ No newline at end of file +eqtl-mapping-pipeline/nb-configuration.xml +/eQTLInteractionAnalyser/nbproject/private/ +/eQTLInteractionAnalyser/build/ +/eQTLInteractionAnalyser/dist/ +/eQTLInteractionAnalyser2/build/ +/eQTLInteractionAnalyser2/dist/ +/eQTLInteractionAnalyser2/nbproject/private/ \ No newline at end of file diff --git a/Genotype-Harmonizer/pom.xml b/Genotype-Harmonizer/pom.xml index 472f3d33e..915a595c5 100644 --- a/Genotype-Harmonizer/pom.xml +++ b/Genotype-Harmonizer/pom.xml @@ -7,7 +7,7 @@ 4.0.0 Genotype-Harmonizer - 1.4.15 + 1.4.16-SNAPSHOT Genotype Harmonizer jar diff --git a/Genotype-IO/src/main/java/org/molgenis/genotype/trityper/TriTyperGenotypeData.java b/Genotype-IO/src/main/java/org/molgenis/genotype/trityper/TriTyperGenotypeData.java index 4ce5477c7..6653e6e10 100644 --- a/Genotype-IO/src/main/java/org/molgenis/genotype/trityper/TriTyperGenotypeData.java +++ b/Genotype-IO/src/main/java/org/molgenis/genotype/trityper/TriTyperGenotypeData.java @@ -345,6 +345,9 @@ private void loadSNPAnnotation(GeneticVariantRange.GeneticVariantRangeCreate snp String line; while ((line = snpFileReader.readLine()) != null) { + if(allSNPHash.contains(line)){ + throw new GenotypeDataException("SNP found twice: " + line + ". All SNP ID's must be unique"); + } if (variantFilter == null || variantFilter.doesIdPassFilter(line)) { allSNPHash.put(line, unfilteredSnpCount); } @@ -459,11 +462,17 @@ public List getSampleVariants(GeneticVariant variant) { try { genotypeHandle.seek(indexLong); if (genotypeHandle.read(buffer) != buffer.length) { + + LOG.fatal("ERROR loading trityper SNP: " + variant.getPrimaryVariantId() + " at: " + variant.getSequenceName() + ":" + variant.getStartPos() + " variant index: " + index); + throw new GenotypeDataException("Could not read bytes from: " + indexLong + " in genotype file " + genotypeDataFile.getAbsolutePath() + " (size: " + genotypeDataFile.length() + ")"); } } catch (IOException e) { - throw new GenotypeDataException("Could not read bytes from: " + indexLong + " in genotype file " + genotypeDataFile.getAbsolutePath() + " (size: " + genotypeDataFile.length() + ")"); + + LOG.fatal("ERROR loading trityper SNP: " + variant.getPrimaryVariantId() + " at: " + variant.getSequenceName() + ":" + variant.getStartPos() + " variant index: " + index); + + throw new GenotypeDataException("Could not read bytes from: " + indexLong + " in genotype file " + genotypeDataFile.getAbsolutePath() + " (size: " + genotypeDataFile.length() + ")", e); } List alleles = new ArrayList(includedSamples.size()); diff --git a/eQTLInteractionAnalyser/pom.xml b/eQTLInteractionAnalyser/pom.xml new file mode 100644 index 000000000..936e42c66 --- /dev/null +++ b/eQTLInteractionAnalyser/pom.xml @@ -0,0 +1,81 @@ + + + 4.0.0 + + nl.systemsgenetics + systemsgenetics + 1.0.2-SNAPSHOT + + nl.systemsgenetics + eQTLInteractionAnalyser + 1.0-SNAPSHOT + eQTLInteractionAnalyser + http://maven.apache.org + + UTF-8 + + + + net.sf.jsci + jsci + 1.2 + + + org.apache.commons + commons-math3 + 3.2 + + + net.sourceforge.parallelcolt + parallelcolt + 0.10.0 + + + gov.nist.math + jama + 1.0.3 + + + junit + junit + 3.8.1 + test + + + + + + org.apache.maven.plugins + maven-compiler-plugin + 2.3.2 + + UTF-8 + + + + maven-assembly-plugin + + + jar-with-dependencies + + + + nl.systemsgenetics.eqtlinteractionanalyser.eqtlinteractionanalyser.EQTLInteractionAnalyser + true + true + + + + + + package + + single + + + + + + + diff --git a/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/DoubleArrayIntegerObject.java b/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/DoubleArrayIntegerObject.java new file mode 100644 index 000000000..6438d9215 --- /dev/null +++ b/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/DoubleArrayIntegerObject.java @@ -0,0 +1,21 @@ +/* + * To change this template, choose Tools | Templates + * and open the template in the editor. + */ + +package nl.systemsgenetics.eqtlinteractionanalyser.eqtlinteractionanalyser; + +/** + * + * @author ludefranke + */ +public class DoubleArrayIntegerObject { + + public double[] doubleArray; + public int intValue; + public DoubleArrayIntegerObject(double[] doubleArray, int intValue) { + this.doubleArray = doubleArray; + this.intValue = intValue; + } + +} diff --git a/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/DoubleArrayIntegerObjectSorter.java b/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/DoubleArrayIntegerObjectSorter.java new file mode 100644 index 000000000..3e639d144 --- /dev/null +++ b/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/DoubleArrayIntegerObjectSorter.java @@ -0,0 +1,29 @@ +/* + * GeneLocationObjectSorter.java + * + * Created on 23 December 2003, 17:14 + */ + +package nl.systemsgenetics.eqtlinteractionanalyser.eqtlinteractionanalyser; + +/** + * + * @author Like + */ +public class DoubleArrayIntegerObjectSorter extends VectorSorter { + + /** Creates a new instance of GeneLocationObjectSorter */ + public DoubleArrayIntegerObjectSorter() { + super(); + } + + /** Override object comparer + * @param a the first GeneLocationObject to be compared + * @param b the second GeneLocationObject to be compared + * @return true if the first GeneLocationObject.getChrStart() is lower than the second one + */ + protected boolean lt (Object a, Object b) { + return (((DoubleArrayIntegerObject)a).intValue < ((DoubleArrayIntegerObject)b).intValue); + } + +} \ No newline at end of file diff --git a/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/EQTLInteractionAnalyser.java b/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/EQTLInteractionAnalyser.java new file mode 100644 index 000000000..808d8d861 --- /dev/null +++ b/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/EQTLInteractionAnalyser.java @@ -0,0 +1,26 @@ +/* + * To change this license header, choose License Headers in Project Properties. + * To change this template file, choose Tools | Templates + * and open the template in the editor. + */ + +package nl.systemsgenetics.eqtlinteractionanalyser.eqtlinteractionanalyser; + +/** + * + * @author lude + */ +public class EQTLInteractionAnalyser { + + /** + * @param args the command line arguments + */ + public static void main(String[] args) { + // TODO code application logic here + + new TestEQTLDatasetForInteractions("/Users/lude/Documents/InteractionAnalyser/"); + //new TestEQTLDatasetForInteractions(args[0]); + + } + +} diff --git a/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/ExpressionDataset.java b/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/ExpressionDataset.java new file mode 100644 index 000000000..944dbeef4 --- /dev/null +++ b/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/ExpressionDataset.java @@ -0,0 +1,580 @@ +/* + * To change this template, choose Tools | Templates + * and open the template in the editor. + */ + +package nl.systemsgenetics.eqtlinteractionanalyser.eqtlinteractionanalyser; + +import java.io.*; +import java.net.*; +import java.util.*; +import java.awt.image.BufferedImage; +import java.awt.image.*; +import java.awt.*; +import java.awt.geom.*; +import java.lang.Math; +import javax.imageio.*; + +/** + * + * @author lude + */ +public class ExpressionDataset { + + public double[][] rawData = null; + public int nrSamples = 0; + public int nrProbes = 0; + public String[] probeNames = null; + public String[] sampleNames = null; + public HashMap hashSamples = new HashMap(); + public HashMap hashProbes = new HashMap(); + private HashMap hashProbesToInclude = null; + private HashMap hashSamplesToInclude = null; + public String fileName = null; + + public ExpressionDataset(String fileName) { + if (fileName.endsWith(".binary")) { + loadExpressionDataInBinaryFormat(fileName); + } else { + loadExpressionData(fileName, "\t"); + } + } + + public ExpressionDataset(String fileName, String delimiter) { + if (fileName.endsWith(".binary")) { + loadExpressionDataInBinaryFormat(fileName); + } else { + loadExpressionData(fileName, delimiter); + } + } + + public ExpressionDataset(String fileName, String delimiter, HashMap hashProbesToInclude) { + this.hashProbesToInclude = hashProbesToInclude; + if (fileName.endsWith(".binary")) { + loadExpressionDataInBinaryFormat(fileName); + } else { + loadExpressionData(fileName, delimiter); + } + } + + public ExpressionDataset(String fileName, String delimiter, HashMap hashProbesToInclude, HashMap hashSamplesToInclude) { + this.hashProbesToInclude = hashProbesToInclude; + this.hashSamplesToInclude = hashSamplesToInclude; + if (fileName.endsWith(".binary")) { + loadExpressionDataInBinaryFormat(fileName); + } else { + loadExpressionData(fileName, delimiter); + } + } + + public ExpressionDataset(int nrProbes, int nrSamples) { + this.nrProbes = nrProbes; + this.nrSamples = nrSamples; + sampleNames = new String[nrSamples]; + for (int s=0; s2 && data[1].length() > 0 && data[1].equals("MultipleHits")) { + dataIsInTriTyperFormat = true; + sampleOffset = 9; + + } + + if (hashSamplesToInclude==null) { + nrSamples = data.length - sampleOffset; + sampleNames = new String[nrSamples]; + sampleIndex = new int[nrSamples]; + for (int s=0; s> 56); + buffer[bufferLoc + 1] = (byte) (bits >> 48 & 0xff); + buffer[bufferLoc + 2] = (byte) (bits >> 40 & 0xff); + buffer[bufferLoc + 3] = (byte) (bits >> 32 & 0xff); + buffer[bufferLoc + 4] = (byte) (bits >> 24 & 0xff); + buffer[bufferLoc + 5] = (byte) (bits >> 16 & 0xff); + buffer[bufferLoc + 6] = (byte) (bits >> 8 & 0xff); + buffer[bufferLoc + 7] = (byte) (bits & 0xff); + bufferLoc += 8; + } + try { + out.write(buffer); + } catch (IOException e) { + System.err.println("Can't write to " + fileBinary.getName() + ": " + e.getMessage()); + System.exit(1); + } + } + try { + out.close(); + } catch (IOException e) { + e.printStackTrace(); + } + File fileProbes = new File(fileName + ".rows.txt"); + try { + java.io.BufferedWriter outProbes = new java.io.BufferedWriter(new java.io.FileWriter(fileProbes)); + for (int p=0; p>> 24), + (byte) (value >>> 16), + (byte) (value >>> 8), + (byte) value}; + } + + private int byteArrayToInt(byte[] b) { + return (b[0] << 24) + + ((b[1] & 0xff) << 16) + + ((b[2] & 0xff) << 8) + + (b[3] & 0xff); + } + + +} diff --git a/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/PerformInteractionAnalysisPermutationTask.java b/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/PerformInteractionAnalysisPermutationTask.java new file mode 100644 index 000000000..98c8a3802 --- /dev/null +++ b/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/PerformInteractionAnalysisPermutationTask.java @@ -0,0 +1,77 @@ +/* + * To change this license header, choose License Headers in Project Properties. + * To change this template file, choose Tools | Templates + * and open the template in the editor. + */ + +package nl.systemsgenetics.eqtlinteractionanalyser.eqtlinteractionanalyser; + +import cern.jet.random.tdouble.engine.DoubleRandomEngine; +import java.util.concurrent.Callable; +/** + * + * @author lude + */ +public class PerformInteractionAnalysisPermutationTask implements Callable { + + public ExpressionDataset datasetGenotypes; + public ExpressionDataset datasetExpression; + public ExpressionDataset datasetCovariates; + public int covToTest = -1; + public int nrSamples = -1; + public org.apache.commons.math3.stat.regression.OLSMultipleLinearRegression regression = null; + public cern.jet.random.tdouble.StudentT tDistColt = null; + + public PerformInteractionAnalysisPermutationTask(ExpressionDataset datasetGenotypes, ExpressionDataset datasetExpression, ExpressionDataset datasetCovariates, int covToTest) { + this.datasetGenotypes = datasetGenotypes; + this.datasetExpression = datasetExpression; + this.datasetCovariates = datasetCovariates; + this.covToTest = covToTest; + this.nrSamples = datasetGenotypes.nrSamples; + + this.regression = new org.apache.commons.math3.stat.regression.OLSMultipleLinearRegression(); + cern.jet.random.tdouble.engine.DoubleRandomEngine randomEngine = new cern.jet.random.tdouble.engine.DRand(); + this.tDistColt = new cern.jet.random.tdouble.StudentT(this.nrSamples - 4, randomEngine); + + } + + @Override + public DoubleArrayIntegerObject call() throws Exception { + + double[] zScores = new double[datasetGenotypes.nrProbes]; + + for (int snp=0; snp= 0) { + return false; + } else { + return true; + } + } + +} \ No newline at end of file diff --git a/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/TestEQTLDatasetForInteractions.java b/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/TestEQTLDatasetForInteractions.java new file mode 100644 index 000000000..7907f799a --- /dev/null +++ b/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/TestEQTLDatasetForInteractions.java @@ -0,0 +1,878 @@ +/* + * To change this license header, choose License Headers in Project Properties. + * To change this template file, choose Tools | Templates + * and open the template in the editor. + */ + +package nl.systemsgenetics.eqtlinteractionanalyser.eqtlinteractionanalyser; + +import java.awt.Color; +import java.awt.Graphics2D; +import java.awt.RenderingHints; +import java.awt.image.BufferedImage; +import java.io.File; +import java.io.IOException; +import java.util.HashMap; +import java.util.Vector; +import java.util.concurrent.CompletionService; +import java.util.concurrent.ExecutionException; +import java.util.concurrent.ExecutorCompletionService; +import java.util.concurrent.Executors; +import java.util.logging.Level; +import java.util.logging.Logger; +import org.apache.commons.math3.stat.ranking.NaturalRanking; + +/** + * + * @author lude + */ +public class TestEQTLDatasetForInteractions { + + String inputDir = null; + + public TestEQTLDatasetForInteractions(String inputDir) { + + this.inputDir = inputDir; + + //preprocessData(); + + String[] covsToCorrect = {"gender","GC","MEDIAN_5PRIME_BIAS","MEDIAN_3PRIME_BIAS"}; + performInteractionAnalysis(covsToCorrect); + + } + + public void preprocessData() { + + HashMap hashGenotypes = new HashMap(); + HashMap hashExpression = new HashMap(); + HashMap hashEQTLs = new HashMap(); + try { + java.io.BufferedReader in = new java.io.BufferedReader(new java.io.FileReader(new File(inputDir + "/bigTableLude.txt"))); + String str = in.readLine(); + String[] data = str.split("\t"); + for (int d=0; d3) hashSamplesToExclude.put(datasetCovariates.sampleNames[s], null); + } + } + if (1==1) { + int index = ((Integer) datasetCovariates.hashProbes.get("MEDIAN_3PRIME_BIAS")).intValue(); + double mean = JSci.maths.ArrayMath.mean(datasetCovariates.rawData[index]); + double stdev = JSci.maths.ArrayMath.standardDeviation(datasetCovariates.rawData[index]); + for (int s=0; s3) hashSamplesToExclude.put(datasetCovariates.sampleNames[s], null); + } + } + hashSamples = new HashMap(); + for (int s=0; s0) { + for (int comp=0; comp1E-5) { + double[] rc = getLinearRegressionCoefficients(datasetCovariatesToCorrectFor.rawData[cov], datasetExpression.rawData[snp]); + for (int s=0; s1E-5) { + double[] rc = getLinearRegressionCoefficients(datasetCovariatesToCorrectFor.rawData[cov], datasetCovariates.rawData[p]); + for (int s=0; s + double pValue = (0.5d + rankedValues[s] - 1d) / (double) (rankedValues.length); + //Convert the pValue to a Z-Score: + double zScore = cern.jet.stat.tdouble.Probability.normalInverse(pValue); + datasetCovariates.rawData[p][s] = zScore; //Replace original expression value with the Z-Score + } + } + + } + + cern.jet.random.tdouble.engine.DoubleRandomEngine randomEngine = new cern.jet.random.tdouble.engine.DRand(); + cern.jet.random.tdouble.StudentT tDistColt = new cern.jet.random.tdouble.StudentT(nrSamples - 4, randomEngine); + + ExpressionDataset datasetExpressionBeforeEQTLCorrection = new ExpressionDataset(datasetExpression.nrProbes, datasetExpression.nrSamples); + for (int p=0; p + double pValue = (0.5d + rankedValues[s] - 1d) / (double) (rankedValues.length); + //Convert the pValue to a Z-Score: + double zScore = cern.jet.stat.tdouble.Probability.normalInverse(pValue); + datasetExpression.rawData[p][s] = zScore; //Replace original expression value with the Z-Score + } + } + + } + + if (1==2) { + System.out.println("WARNING: PERMUTING GENOTYPE DATA!!!!"); + String[] cohorts = {"LLDeep","LLS","RS","CODAM"}; + int[] permSampleIDs = new int[datasetGenotypes.nrSamples]; + for (int p=0; p pool = new ExecutorCompletionService(threadPool); + int nrTasks = 0; + for (int cov = 0; cov 0) { + PerformInteractionAnalysisPermutationTask task = new PerformInteractionAnalysisPermutationTask(datasetGenotypes, datasetExpression, datasetCovariates, cov); + pool.submit(task); + nrTasks++; + } + } + try { + + for (int task = 0; task0 && task%1000==0) { + datasetZScores.save(inputDir + "/InteractionZScoresMatrix.txt"); + } + } catch (ExecutionException ex) { + Logger.getLogger(PerformInteractionAnalysisPermutationTask.class.getName()).log(Level.SEVERE, null, ex); + } + } + threadPool.shutdown(); + } catch (Exception e) { + e.printStackTrace(); + System.out.println(e.getMessage()); + } + + datasetZScores.save(inputDir + "/InteractionZScoresMatrix.txt"); + + System.exit(0); + } + + System.exit(0); + } + + public void makeInteractionPlot(String fileName, double[] genotype, double[] expression, double[] covariate, String[] sampleNames) { + + int nrSamples = genotype.length; + + int[] cohortIndex = new int[4]; + String[] cohorts = {"LLDeep","LLS","RS","CODAM"}; + for (int cohort=0; cohort=0; rep--) { + for (int s=0; s1.5) { + g2d.setComposite(java.awt.AlphaComposite.getInstance(java.awt.AlphaComposite.SRC_ATOP, 0.30f - (float) rep / 10f)); + g2d.setColor(new Color(171, 178, 114)); + } else { + g2d.setComposite(java.awt.AlphaComposite.getInstance(java.awt.AlphaComposite.SRC_ATOP, 0.30f - (float) rep / 10f)); + g2d.setColor(new Color(98,175,255)); + } + } + g2d.fillOval(posX - 3 - rep * 4, posY - 3 - rep * 4, 7 + rep * 8, 7 + rep * 8); + + } + } + + //Draw the four independent cohorts seperately: + //int[] cohortIndex = {0,626,1280,1933}; + for (int rep=2; rep>=0; rep--) { + for (int s=0; s=cohortIndex[c]) cohort = c; + } + + int posY = marginTop + 100 + cohort * 125 - (int) ((expression[s] - minY) / (maxY - minY) * 100); + int posX = marginLeft + innerWidth + 50 + (int) ((covariate[s] - minX) / (maxX - minX) * 100); + if (genotype[s]<0.5) { + g2d.setComposite(java.awt.AlphaComposite.getInstance(java.awt.AlphaComposite.SRC_ATOP, 0.30f - (float) rep / 10f)); + g2d.setColor(new Color(204,86,78)); + } else { + if (genotype[s]>1.5) { + g2d.setComposite(java.awt.AlphaComposite.getInstance(java.awt.AlphaComposite.SRC_ATOP, 0.30f - (float) rep / 10f)); + g2d.setColor(new Color(171, 178, 114)); + } else { + g2d.setComposite(java.awt.AlphaComposite.getInstance(java.awt.AlphaComposite.SRC_ATOP, 0.30f - (float) rep / 10f)); + g2d.setColor(new Color(98,175,255)); + } + } + g2d.fillOval(posX - 1 - rep * 2, posY - 1 - rep * 2, 3 + rep * 4, 3 + rep * 4); + + } + } + + + g2d.setComposite(alphaComposite50); + double[][] valsX = new double[nrSamples][3]; + for (int s=0; s 0.001) pValueString = (new java.text.DecimalFormat("##.###;-##.###", new java.text.DecimalFormatSymbols(java.util.Locale.US))).format(pValueInteraction); + g2d.setFont(new java.awt.Font("Arial", java.awt.Font.BOLD, 14)); + g2d.setColor(new Color(0, 0, 0)); + int posX = marginLeft; + int posY = marginTop + innerHeight + 20; + g2d.drawString("Interaction P-Value: " + pValueString, posX, posY); + + + for (int g=0; g<=2; g++) { + + double valMin = betas[0] + betas[1] * g + minX * betas[2] + betas[3] * g * minX; + double valMax = betas[0] + betas[1] * g + maxX * betas[2] + betas[3] * g * maxX; + int posXMin = marginLeft + (int) ((minX - minX) / (maxX - minX) * innerWidth); + int posYMin = marginTop + innerHeight - (int) ((valMin - minY) / (maxY - minY) * innerHeight); + int posXMax = marginLeft + (int) ((maxX - minX) / (maxX - minX) * innerWidth); + int posYMax = marginTop + innerHeight - (int) ((valMax - minY) / (maxY - minY) * innerHeight); + + g2d.setComposite(java.awt.AlphaComposite.getInstance(java.awt.AlphaComposite.SRC_OVER, 0.8f)); + g2d.setColor(new Color(255,255,255)); + g2d.setStroke(new java.awt.BasicStroke(5.0f, java.awt.BasicStroke.CAP_ROUND, java.awt.BasicStroke.JOIN_ROUND)); + g2d.drawLine(posXMin, posYMin, posXMax, posYMax); + if (g<0.5) { + g2d.setComposite(java.awt.AlphaComposite.getInstance(java.awt.AlphaComposite.SRC_OVER, 0.30f)); + g2d.setColor(new Color(204,86,78)); + } else { + if (g>1.5) { + g2d.setComposite(java.awt.AlphaComposite.getInstance(java.awt.AlphaComposite.SRC_OVER, 0.50f)); + g2d.setColor(new Color(171, 178, 114)); + } else { + g2d.setComposite(java.awt.AlphaComposite.getInstance(java.awt.AlphaComposite.SRC_OVER, 0.50f)); + g2d.setColor(new Color(98,175,255)); + } + } + g2d.setStroke(new java.awt.BasicStroke(3.0f, java.awt.BasicStroke.CAP_ROUND, java.awt.BasicStroke.JOIN_ROUND)); + g2d.drawLine(posXMin, posYMin, posXMax, posYMax); + + } + + try { + javax.imageio.ImageIO.write(bimage, "png", new File(fileName)); + } catch (IOException e) { + System.out.println(e.getMessage()); + e.printStackTrace(); + } + + + } + + public void orthogonalizeDataset(String inputFile) { + + ExpressionDataset dataset = new ExpressionDataset(inputFile); + dataset.transposeDataset(); + dataset.standardNormalizeData(); + int nrVars = dataset.nrProbes; + int nrSamples = dataset.nrSamples; + + double[][] matrix = new double[nrVars][nrSamples]; + for (int s=0; s1) covariance = 1d; + if (covariance<-1) covariance = -1d; + correlationMatrix[p][q] = covariance; + correlationMatrix[q][p] = covariance; + } + } + Jama.EigenvalueDecomposition eig = eigenValueDecomposition(correlationMatrix); + double[] eigenValues = eig.getRealEigenvalues(); + int nrCompsWithPositiveEigenvalues = 0; + for (int e=0; e1e-10) nrCompsWithPositiveEigenvalues++; + } + + ExpressionDataset datasetEigenvectors = new ExpressionDataset(correlationMatrix.length, correlationMatrix.length); + for (int pca = 0; pca < correlationMatrix.length; pca++) { + datasetEigenvectors.rawData[pca] = getEigenVector(eig, pca); + } + datasetEigenvectors.transposeDataset(); + + //Calculate principal components: + ExpressionDataset datasetPCs = new ExpressionDataset(dataset.nrSamples, nrCompsWithPositiveEigenvalues); + for (int pca = 0; pca < nrCompsWithPositiveEigenvalues; pca++) { + datasetPCs.sampleNames[pca] = "Comp" + (pca + 1); + } + for (int p=0; plt(Object,Object)). + *

+ * It implements a generic version of C.A.R Hoare's Quick Sort + * algorithm. + *

+ * The code is based on example given in java swing package. + *

+ * + * This is an example how to use this class to sort a vector of strings: + *

+ *    Vector v = new Vector();
+ *    v.addElement ("X");
+ *    v.addElement ("A");
+ *    new Sorter().sort (v);
+ * 
+ * + * @author Martin Senger + * @version $Id: VectorSorter.java,v 1.1.1.1 2004/01/26 09:27:02 lude Exp $ + * @see TestSorter + */ +public class VectorSorter { + + /**************************************************************************** + * A default constructor. It does nothing. + ****************************************************************************/ + public VectorSorter() { + } + + /**************************************************************************** + * Sort the given vector. + * By default it is assumed that the vector contains elements of type String. + * If not a subclass must be written which overwrites method + * lt(Object,Object). + *

+ * @param v a vector to be sorted + ****************************************************************************/ + public void sort (Vector v) { + quickSort (v, 0, v.size() - 1); + } + + /**************************************************************************** + * Compare two objects. + *

+ * By default this method works for Strings. It is meant to be overwritten + * for other objects. + *

+ * @param a the first object to be compared + * @param b the second object to be compared + * @return true if the first object is lower than the second one + ****************************************************************************/ + protected boolean lt (Object a, Object b) { + return ((String)a).compareTo ((String)b) < 0; + } + + /**************************************************************************** + * The main algorithm. + ****************************************************************************/ + private void quickSort (Vector v, int lo0, int hi0) { + int lo = lo0; + int hi = hi0; + Object mid; + + if (hi0 > lo0) { + // Arbitrarily establishing partition element as the midpoint of + // the array. + mid = v.elementAt ((lo0 + hi0) / 2); + + // loop through the array until indices cross + while (lo <= hi) { + // find the first element that is greater than or equal to + // the partition element starting from the left Index. + while ((lo < hi0) && lt (v.elementAt (lo), mid)) { + ++lo; + } + + // find an element that is smaller than or equal to + // the partition element starting from the right Index. + while ((hi > lo0) && lt (mid, v.elementAt(hi))) { + --hi; + } + + // if the indexes have not crossed, swap + if (lo <= hi) { + swap (v, lo, hi); + ++lo; + --hi; + } + } + + + // If the right index has not reached the left side of array + // must now sort the left partition. + if (lo0 < hi) { + quickSort (v, lo0, hi); + } + + // If the left index has not reached the right side of array + // must now sort the right partition. + if (lo < hi0) { + quickSort (v, lo, hi0); + } + } + } + + private static void swap (Vector a, int i, int j) { + Object T = a.elementAt(i); + a.setElementAt (a.elementAt(j), i); + a.setElementAt (T, j); + } + +} diff --git a/eQTLInteractionAnalyser/src/test/java/nl/systemsgenetics/eqtlinteractionanalyser/AppTest.java b/eQTLInteractionAnalyser/src/test/java/nl/systemsgenetics/eqtlinteractionanalyser/AppTest.java new file mode 100644 index 000000000..fd6b2c4e5 --- /dev/null +++ b/eQTLInteractionAnalyser/src/test/java/nl/systemsgenetics/eqtlinteractionanalyser/AppTest.java @@ -0,0 +1,38 @@ +package nl.systemsgenetics.eqtlinteractionanalyser; + +import junit.framework.Test; +import junit.framework.TestCase; +import junit.framework.TestSuite; + +/** + * Unit test for simple App. + */ +public class AppTest + extends TestCase +{ + /** + * Create the test case + * + * @param testName name of the test case + */ + public AppTest( String testName ) + { + super( testName ); + } + + /** + * @return the suite of tests being tested + */ + public static Test suite() + { + return new TestSuite( AppTest.class ); + } + + /** + * Rigourous Test :-) + */ + public void testApp() + { + assertTrue( true ); + } +} diff --git a/eqtl-mapping-pipeline/src/main/java/eqtlmappingpipeline/binaryInteraction/QueryBinaryInteraction.java b/eqtl-mapping-pipeline/src/main/java/eqtlmappingpipeline/binaryInteraction/QueryBinaryInteraction.java index ebe9246e9..17978aafb 100644 --- a/eqtl-mapping-pipeline/src/main/java/eqtlmappingpipeline/binaryInteraction/QueryBinaryInteraction.java +++ b/eqtl-mapping-pipeline/src/main/java/eqtlmappingpipeline/binaryInteraction/QueryBinaryInteraction.java @@ -24,6 +24,7 @@ import org.apache.commons.cli.Options; import org.apache.commons.cli.ParseException; import org.apache.commons.cli.PosixParser; +import org.apache.commons.math3.stat.regression.OLSMultipleLinearRegression; import umcg.genetica.containers.Pair; import umcg.genetica.io.binInteraction.BinaryInteractionCohort; import umcg.genetica.io.binInteraction.BinaryInteractionFile; diff --git a/pom.xml b/pom.xml index f8da03aba..afe972e4f 100644 --- a/pom.xml +++ b/pom.xml @@ -82,5 +82,6 @@ eqtl-functional-enrichment GeneticRiskScoreCalculator BinaryMetaAnalyzer - + eQTLInteractionAnalyser + \ No newline at end of file From 3ef35d7b7ff3457925b056ac314f2ba29e05a526 Mon Sep 17 00:00:00 2001 From: Patrick Deelen Date: Tue, 30 Jun 2015 12:35:34 +0200 Subject: [PATCH 055/143] Fix? --- eQTLInteractionAnalyser/pom.xml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/eQTLInteractionAnalyser/pom.xml b/eQTLInteractionAnalyser/pom.xml index 936e42c66..02dffd52a 100644 --- a/eQTLInteractionAnalyser/pom.xml +++ b/eQTLInteractionAnalyser/pom.xml @@ -32,9 +32,9 @@ 0.10.0 - gov.nist.math - jama - 1.0.3 + gov.nist.math.jama + gov.nist.math.jama + 1.1.1 junit From dc96dc173d9d0723a194a10bf5aff0d7803acdf1 Mon Sep 17 00:00:00 2001 From: Patrick Deelen Date: Thu, 2 Jul 2015 09:59:15 +0200 Subject: [PATCH 056/143] Regress out covariates --- .../TestEQTLDatasetForInteractions.java | 72 ++++++++++++++++--- 1 file changed, 61 insertions(+), 11 deletions(-) diff --git a/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/TestEQTLDatasetForInteractions.java b/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/TestEQTLDatasetForInteractions.java index 7907f799a..2d0d47dbc 100644 --- a/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/TestEQTLDatasetForInteractions.java +++ b/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/TestEQTLDatasetForInteractions.java @@ -36,10 +36,56 @@ public TestEQTLDatasetForInteractions(String inputDir) { //preprocessData(); - String[] covsToCorrect = {"gender","GC","MEDIAN_5PRIME_BIAS","MEDIAN_3PRIME_BIAS"}; - performInteractionAnalysis(covsToCorrect); + if (1==2) { + String[] covsToCorrect = {"gender","GC","MEDIAN_5PRIME_BIAS","MEDIAN_3PRIME_BIAS","ENSG00000116701","ENSG00000126353"}; + while (1==1) { + String topCov = performInteractionAnalysis(covsToCorrect); + String[] covsToCorrectNew = new String[covsToCorrect.length + 1]; + for (int c=0;c 4 && absZDiff > maxAbsZDiff) { + maxAbsZDiff = absZDiff; + output = nrCovsRemoved + "\t" + p + "\t" + dataset.probeNames[p] + "\t" + q + "\t" + dataset.sampleNames[q] + "\t" + dataset.rawData[p][q] + "\t" + dataset2.rawData[p][q] + "\t" + zDiff; + } + } + if (maxAbsZDiff > 4) { + System.out.println(output); + } + } + } + + System.exit(0); + } public void preprocessData() { @@ -85,7 +131,7 @@ public void preprocessData() { } - public void performInteractionAnalysis(String[] covsToCorrect) { + public String performInteractionAnalysis(String[] covsToCorrect) { HashMap hashEQTLs = null; HashMap hashSamples = new HashMap(); @@ -285,7 +331,7 @@ public void performInteractionAnalysis(String[] covsToCorrect) { System.exit(0); } - if (1==1) { + if (1==2) { ExpressionDataset datasetICA = new ExpressionDataset("/Users/lude/Documents/ICA/mixingmatrix.txt"); //ExpressionDataset datasetICA = new ExpressionDataset("/Users/lude/Documents/ICA/signals.txt"); datasetICA.transposeDataset(); @@ -328,7 +374,6 @@ public void performInteractionAnalysis(String[] covsToCorrect) { } cern.jet.random.tdouble.engine.DoubleRandomEngine randomEngine = new cern.jet.random.tdouble.engine.DRand(); - cern.jet.random.tdouble.StudentT tDistColt = new cern.jet.random.tdouble.StudentT(nrSamples - 4, randomEngine); ExpressionDataset datasetExpressionBeforeEQTLCorrection = new ExpressionDataset(datasetExpression.nrProbes, datasetExpression.nrSamples); for (int p=0; p0 && task%1000==0) { - datasetZScores.save(inputDir + "/InteractionZScoresMatrix.txt"); + if (chi2Sum > maxChi2) { + maxChi2 = chi2Sum; + maxChi2Cov = datasetCovariates.probeNames[cov]; } + System.out.println(covsToCorrect.length + "\t" + cov + "\t" + datasetCovariates.probeNames[cov] + "\t" + chi2Sum); } catch (ExecutionException ex) { Logger.getLogger(PerformInteractionAnalysisPermutationTask.class.getName()).log(Level.SEVERE, null, ex); } @@ -462,12 +511,13 @@ public void performInteractionAnalysis(String[] covsToCorrect) { System.out.println(e.getMessage()); } - datasetZScores.save(inputDir + "/InteractionZScoresMatrix.txt"); + System.out.println("Top covariate:\t" + maxChi2 + "\t" + maxChi2Cov); + datasetZScores.save("/Volumes/Promise_RAID/lude/InteractionZScoresMatrix-" + covsToCorrect.length + "Covariates.txt"); - System.exit(0); + return maxChi2Cov; } - System.exit(0); + return null; } public void makeInteractionPlot(String fileName, double[] genotype, double[] expression, double[] covariate, String[] sampleNames) { From 024fca53798cbbe09b3b5dca21996deff5056002 Mon Sep 17 00:00:00 2001 From: Dasha Zhernakova Date: Mon, 6 Jul 2015 10:53:21 +0300 Subject: [PATCH 057/143] small edits to Lude's interaction analysis --- eQTLInteractionAnalyser/pom.xml | 7 +- .../EQTLInteractionAnalyser.java | 9 +- .../TestEQTLDatasetForInteractions.java | 107 +++++++++++++----- .../InteractionAnalysisResults.java | 26 +++++ 4 files changed, 113 insertions(+), 36 deletions(-) diff --git a/eQTLInteractionAnalyser/pom.xml b/eQTLInteractionAnalyser/pom.xml index 02dffd52a..46ab78409 100644 --- a/eQTLInteractionAnalyser/pom.xml +++ b/eQTLInteractionAnalyser/pom.xml @@ -42,6 +42,11 @@ 3.8.1 test + + nl.systemsgenetics + genetica-libraries + 1.0.7-SNAPSHOT + @@ -75,7 +80,7 @@ - + diff --git a/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/EQTLInteractionAnalyser.java b/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/EQTLInteractionAnalyser.java index 808d8d861..2b8fd66cf 100644 --- a/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/EQTLInteractionAnalyser.java +++ b/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/EQTLInteractionAnalyser.java @@ -6,21 +6,22 @@ package nl.systemsgenetics.eqtlinteractionanalyser.eqtlinteractionanalyser; +import java.io.IOException; + /** * * @author lude */ public class EQTLInteractionAnalyser { + /** * @param args the command line arguments */ - public static void main(String[] args) { + public static void main(String[] args) throws IOException { // TODO code application logic here - new TestEQTLDatasetForInteractions("/Users/lude/Documents/InteractionAnalyser/"); - //new TestEQTLDatasetForInteractions(args[0]); - + new TestEQTLDatasetForInteractions(args[0], args[1], args[2]); } } diff --git a/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/TestEQTLDatasetForInteractions.java b/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/TestEQTLDatasetForInteractions.java index 2d0d47dbc..bc2196b8c 100644 --- a/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/TestEQTLDatasetForInteractions.java +++ b/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/TestEQTLDatasetForInteractions.java @@ -6,12 +6,12 @@ package nl.systemsgenetics.eqtlinteractionanalyser.eqtlinteractionanalyser; -import java.awt.Color; -import java.awt.Graphics2D; -import java.awt.RenderingHints; +import java.awt.*; import java.awt.image.BufferedImage; +import java.io.BufferedReader; import java.io.File; import java.io.IOException; +import java.util.ArrayList; import java.util.HashMap; import java.util.Vector; import java.util.concurrent.CompletionService; @@ -21,6 +21,7 @@ import java.util.logging.Level; import java.util.logging.Logger; import org.apache.commons.math3.stat.ranking.NaturalRanking; +import umcg.genetica.io.text.TextFile; /** * @@ -29,17 +30,18 @@ public class TestEQTLDatasetForInteractions { String inputDir = null; + String outputDir = null; - public TestEQTLDatasetForInteractions(String inputDir) { + public TestEQTLDatasetForInteractions(String inputDir, String outputDir) throws IOException { this.inputDir = inputDir; - + this.outputDir = outputDir; //preprocessData(); - if (1==2) { - String[] covsToCorrect = {"gender","GC","MEDIAN_5PRIME_BIAS","MEDIAN_3PRIME_BIAS","ENSG00000116701","ENSG00000126353"}; + if (1==1) { + String[] covsToCorrect = {"gender","GC","MEDIAN_5PRIME_BIAS","MEDIAN_3PRIME_BIAS"}; while (1==1) { - String topCov = performInteractionAnalysis(covsToCorrect); + String topCov = performInteractionAnalysis(covsToCorrect, null, null); String[] covsToCorrectNew = new String[covsToCorrect.length + 1]; for (int c=0;c eqtlGenes = getEqtls(eQTLfileName); + + String[] covsToCorrect = {"gender","GC","MEDIAN_5PRIME_BIAS","MEDIAN_3PRIME_BIAS"}; + int cnt = 0; + int maxNumTopCovs = 300; + while (cnt < maxNumTopCovs) { + String topCov = performInteractionAnalysis(covsToCorrect, eqtlGenes, outputTopCovs); + String[] covsToCorrectNew = new String[covsToCorrect.length + 1]; + for (int c=0;c getEqtls(String fname) throws IOException { + TextFile file = new TextFile(fname, false); + ArrayList genes = file.readAsArrayList(4, TextFile.tab); + HashMap eqtlGenes = new HashMap(); + for (String gene : genes){ + eqtlGenes.put(gene, null); + } + file.close(); + return eqtlGenes; + + } public void interpretInteractionZScoreMatrix() { - /* - for (int nrCovsRemoved = 11; nrCovsRemoved<=14; nrCovsRemoved++) { - ExpressionDataset dataset = new ExpressionDataset("/Volumes/Promise_RAID/lude/InteractionZScoresMatrix-" + nrCovsRemoved + "Covariates.txt"); + + for (int nrCovsRemoved = 4; nrCovsRemoved<=50; nrCovsRemoved++) { + ExpressionDataset dataset = new ExpressionDataset(outputDir + "/InteractionZScoresMatrix-" + nrCovsRemoved + "Covariates.txt"); dataset.save(dataset.fileName + ".binary"); } - */ + - for (int nrCovsRemoved = 6; nrCovsRemoved<=13; nrCovsRemoved++) { + for (int nrCovsRemoved = 4; nrCovsRemoved<=50; nrCovsRemoved++) { - ExpressionDataset dataset = new ExpressionDataset("/Volumes/Promise_RAID/lude/InteractionZScoresMatrix-" + nrCovsRemoved + "Covariates.txt.binary"); - ExpressionDataset dataset2 = new ExpressionDataset("/Volumes/Promise_RAID/lude/InteractionZScoresMatrix-" + (nrCovsRemoved + 1) + "Covariates.txt.binary"); + //ExpressionDataset dataset = new ExpressionDataset("/Volumes/Promise_RAID/lude/InteractionZScoresMatrix-" + nrCovsRemoved + "Covariates.txt.binary"); + //ExpressionDataset dataset2 = new ExpressionDataset("/Volumes/Promise_RAID/lude/InteractionZScoresMatrix-" + (nrCovsRemoved + 1) + "Covariates.txt.binary"); + ExpressionDataset dataset = new ExpressionDataset(outputDir + "/InteractionZScoresMatrix-" + nrCovsRemoved + "Covariates.txt.binary"); + ExpressionDataset dataset2 = new ExpressionDataset(outputDir + "/InteractionZScoresMatrix-" + (nrCovsRemoved + 1) + "Covariates.txt.binary"); + for (int q=0; q pool = new ExecutorCompletionService(threadPool); int nrTasks = 0; @@ -500,7 +541,7 @@ public String performInteractionAnalysis(String[] covsToCorrect) { maxChi2 = chi2Sum; maxChi2Cov = datasetCovariates.probeNames[cov]; } - System.out.println(covsToCorrect.length + "\t" + cov + "\t" + datasetCovariates.probeNames[cov] + "\t" + chi2Sum); + //System.out.println(covsToCorrect.length + "\t" + cov + "\t" + datasetCovariates.probeNames[cov] + "\t" + chi2Sum); } catch (ExecutionException ex) { Logger.getLogger(PerformInteractionAnalysisPermutationTask.class.getName()).log(Level.SEVERE, null, ex); } @@ -512,14 +553,18 @@ public String performInteractionAnalysis(String[] covsToCorrect) { } System.out.println("Top covariate:\t" + maxChi2 + "\t" + maxChi2Cov); - datasetZScores.save("/Volumes/Promise_RAID/lude/InteractionZScoresMatrix-" + covsToCorrect.length + "Covariates.txt"); + outputTopCovs.writeln("Top covariate:\t" + maxChi2 + "\t" + maxChi2Cov); + //datasetZScores.save("/Volumes/Promise_RAID/lude/InteractionZScoresMatrix-" + covsToCorrect.length + "Covariates.txt"); + datasetZScores.save(outputDir + "/InteractionZScoresMatrix-" + covsToCorrect.length + "Covariates.txt"); return maxChi2Cov; } return null; } - + + + public void makeInteractionPlot(String fileName, double[] genotype, double[] expression, double[] covariate, String[] sampleNames) { int nrSamples = genotype.length; diff --git a/eqtl-mapping-pipeline/src/main/java/eqtlmappingpipeline/interactionanalysis/InteractionAnalysisResults.java b/eqtl-mapping-pipeline/src/main/java/eqtlmappingpipeline/interactionanalysis/InteractionAnalysisResults.java index 0e1abfaee..95fe9d84a 100644 --- a/eqtl-mapping-pipeline/src/main/java/eqtlmappingpipeline/interactionanalysis/InteractionAnalysisResults.java +++ b/eqtl-mapping-pipeline/src/main/java/eqtlmappingpipeline/interactionanalysis/InteractionAnalysisResults.java @@ -5,7 +5,12 @@ package eqtlmappingpipeline.interactionanalysis; import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashMap; +import java.util.LinkedHashSet; + import umcg.genetica.containers.Pair; +import umcg.genetica.io.trityper.SNP; /** * @@ -86,6 +91,7 @@ public class InteractionAnalysisResults { this.rsquared = rsquaredMatrix; } + public String getQcString() { return qcString; } @@ -142,4 +148,24 @@ public double[][] getCovariateSE() { return covariateSE; } + public ArrayList getProbeIds() { + ArrayList probeIds = new ArrayList(); + + for (Pair eqtl : eQTLsTested){ + String gene = eqtl.getRight(); + if (! probeIds.contains(gene)) + probeIds.add(gene); + } + return probeIds; + } + public ArrayList getSNPIds() { + ArrayList snpIds = new ArrayList(); + + for (Pair eqtl : eQTLsTested){ + String snp = eqtl.getLeft(); + if (! snpIds.contains(snp)) + snpIds.add(snp); + } + return snpIds; + } } From 257409fbfff0bbb3ba66f8d53cebe6ce8879e303 Mon Sep 17 00:00:00 2001 From: Patrick Deelen Date: Mon, 6 Jul 2015 12:22:33 +0200 Subject: [PATCH 058/143] minor --- .../TestEQTLDatasetForInteractions.java | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/TestEQTLDatasetForInteractions.java b/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/TestEQTLDatasetForInteractions.java index bc2196b8c..7531de0fe 100644 --- a/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/TestEQTLDatasetForInteractions.java +++ b/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/TestEQTLDatasetForInteractions.java @@ -57,6 +57,10 @@ public TestEQTLDatasetForInteractions(String inputDir, String outputDir) throws public TestEQTLDatasetForInteractions(String inputDir, String outputDir, String eQTLfileName) throws IOException { + System.out.println("Input dir: " + inputDir); + System.out.println("Output dir: " + outputDir); + System.out.println("eQTL file: " + eQTLfileName); + this.inputDir = inputDir; this.outputDir = outputDir; TextFile outputTopCovs = new TextFile(outputDir + "/outputTopCovariates.txt", true); @@ -169,7 +173,7 @@ public void preprocessData() { } - public String performInteractionAnalysis(String[] covsToCorrect, HashMap hashEQTLs, TextFile outputTopCovs) throws IOException { + public final String performInteractionAnalysis(String[] covsToCorrect, HashMap hashEQTLs, TextFile outputTopCovs) throws IOException { HashMap hashSamples = new HashMap(); From 2118c6df5c1d93fac993026a1226acd2e26ae653 Mon Sep 17 00:00:00 2001 From: Patrick Deelen Date: Mon, 6 Jul 2015 15:44:12 +0200 Subject: [PATCH 059/143] geuvadis interaction --- .../TestEQTLDatasetForInteractions.java | 1892 +++++++++-------- 1 file changed, 952 insertions(+), 940 deletions(-) diff --git a/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/TestEQTLDatasetForInteractions.java b/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/TestEQTLDatasetForInteractions.java index 7531de0fe..48f42e33e 100644 --- a/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/TestEQTLDatasetForInteractions.java +++ b/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/TestEQTLDatasetForInteractions.java @@ -3,7 +3,6 @@ * To change this template file, choose Tools | Templates * and open the template in the editor. */ - package nl.systemsgenetics.eqtlinteractionanalyser.eqtlinteractionanalyser; import java.awt.*; @@ -28,950 +27,963 @@ * @author lude */ public class TestEQTLDatasetForInteractions { - - String inputDir = null; - String outputDir = null; - - public TestEQTLDatasetForInteractions(String inputDir, String outputDir) throws IOException { - - this.inputDir = inputDir; - this.outputDir = outputDir; - //preprocessData(); - - if (1==1) { - String[] covsToCorrect = {"gender","GC","MEDIAN_5PRIME_BIAS","MEDIAN_3PRIME_BIAS"}; - while (1==1) { - String topCov = performInteractionAnalysis(covsToCorrect, null, null); - String[] covsToCorrectNew = new String[covsToCorrect.length + 1]; - for (int c=0;c eqtlGenes = getEqtls(eQTLfileName); - - String[] covsToCorrect = {"gender","GC","MEDIAN_5PRIME_BIAS","MEDIAN_3PRIME_BIAS"}; - int cnt = 0; - int maxNumTopCovs = 300; - while (cnt < maxNumTopCovs) { - String topCov = performInteractionAnalysis(covsToCorrect, eqtlGenes, outputTopCovs); - String[] covsToCorrectNew = new String[covsToCorrect.length + 1]; - for (int c=0;c getEqtls(String fname) throws IOException { - TextFile file = new TextFile(fname, false); - ArrayList genes = file.readAsArrayList(4, TextFile.tab); - HashMap eqtlGenes = new HashMap(); - for (String gene : genes){ - eqtlGenes.put(gene, null); - } - file.close(); - return eqtlGenes; - - } - public void interpretInteractionZScoreMatrix() { - - - for (int nrCovsRemoved = 4; nrCovsRemoved<=50; nrCovsRemoved++) { - ExpressionDataset dataset = new ExpressionDataset(outputDir + "/InteractionZScoresMatrix-" + nrCovsRemoved + "Covariates.txt"); - dataset.save(dataset.fileName + ".binary"); - } - - - for (int nrCovsRemoved = 4; nrCovsRemoved<=50; nrCovsRemoved++) { - - //ExpressionDataset dataset = new ExpressionDataset("/Volumes/Promise_RAID/lude/InteractionZScoresMatrix-" + nrCovsRemoved + "Covariates.txt.binary"); - //ExpressionDataset dataset2 = new ExpressionDataset("/Volumes/Promise_RAID/lude/InteractionZScoresMatrix-" + (nrCovsRemoved + 1) + "Covariates.txt.binary"); - ExpressionDataset dataset = new ExpressionDataset(outputDir + "/InteractionZScoresMatrix-" + nrCovsRemoved + "Covariates.txt.binary"); - ExpressionDataset dataset2 = new ExpressionDataset(outputDir + "/InteractionZScoresMatrix-" + (nrCovsRemoved + 1) + "Covariates.txt.binary"); - - for (int q=0; q 4 && absZDiff > maxAbsZDiff) { - maxAbsZDiff = absZDiff; - output = nrCovsRemoved + "\t" + p + "\t" + dataset.probeNames[p] + "\t" + q + "\t" + dataset.sampleNames[q] + "\t" + dataset.rawData[p][q] + "\t" + dataset2.rawData[p][q] + "\t" + zDiff; - } - } - if (maxAbsZDiff > 4) { - System.out.println(output); - } - } - } - - System.exit(0); - } - - public void preprocessData() { - - HashMap hashGenotypes = new HashMap(); - HashMap hashExpression = new HashMap(); - HashMap hashEQTLs = new HashMap(); - try { - java.io.BufferedReader in = new java.io.BufferedReader(new java.io.FileReader(new File(inputDir + "/bigTableLude.txt"))); - String str = in.readLine(); - String[] data = str.split("\t"); - for (int d=0; d3) hashSamplesToExclude.put(datasetCovariates.sampleNames[s], null); - } - } - if (1==1) { - int index = ((Integer) datasetCovariates.hashProbes.get("MEDIAN_3PRIME_BIAS")).intValue(); - double mean = JSci.maths.ArrayMath.mean(datasetCovariates.rawData[index]); - double stdev = JSci.maths.ArrayMath.standardDeviation(datasetCovariates.rawData[index]); - for (int s=0; s3) hashSamplesToExclude.put(datasetCovariates.sampleNames[s], null); - } - } - hashSamples = new HashMap(); - for (int s=0; s0) { - for (int comp=0; comp1E-5) { - double[] rc = getLinearRegressionCoefficients(datasetCovariatesToCorrectFor.rawData[cov], datasetExpression.rawData[snp]); - for (int s=0; s1E-5) { - double[] rc = getLinearRegressionCoefficients(datasetCovariatesToCorrectFor.rawData[cov], datasetCovariates.rawData[p]); - for (int s=0; s - double pValue = (0.5d + rankedValues[s] - 1d) / (double) (rankedValues.length); - //Convert the pValue to a Z-Score: - double zScore = cern.jet.stat.tdouble.Probability.normalInverse(pValue); - datasetCovariates.rawData[p][s] = zScore; //Replace original expression value with the Z-Score - } - } - - } - - cern.jet.random.tdouble.engine.DoubleRandomEngine randomEngine = new cern.jet.random.tdouble.engine.DRand(); - - ExpressionDataset datasetExpressionBeforeEQTLCorrection = new ExpressionDataset(datasetExpression.nrProbes, datasetExpression.nrSamples); - for (int p=0; p eqtlGenes = getEqtls(eQTLfileName); + + String[] covsToCorrect = {"gender", "GC", "MEDIAN_5PRIME_BIAS", "MEDIAN_3PRIME_BIAS", "CEU", "GBR", "FIN", "TSI", "YRI"}; + int cnt = 0; + int maxNumTopCovs = 300; + while (cnt < maxNumTopCovs) { + String topCov = performInteractionAnalysis(covsToCorrect, eqtlGenes, outputTopCovs); + String[] covsToCorrectNew = new String[covsToCorrect.length + 1]; + for (int c = 0; c < covsToCorrect.length; c++) { + covsToCorrectNew[c] = covsToCorrect[c]; + } + covsToCorrectNew[covsToCorrect.length] = topCov; + covsToCorrect = covsToCorrectNew; + cnt++; + } + outputTopCovs.close(); + } + + private HashMap getEqtls(String fname) throws IOException { + TextFile file = new TextFile(fname, false); + ArrayList genes = file.readAsArrayList(4, TextFile.tab); + HashMap eqtlGenes = new HashMap(); + for (String gene : genes) { + eqtlGenes.put(gene, null); + } + file.close(); + return eqtlGenes; + + } + + public void interpretInteractionZScoreMatrix() { + + + for (int nrCovsRemoved = 4; nrCovsRemoved <= 50; nrCovsRemoved++) { + ExpressionDataset dataset = new ExpressionDataset(outputDir + "/InteractionZScoresMatrix-" + nrCovsRemoved + "Covariates.txt"); + dataset.save(dataset.fileName + ".binary"); + } + + + for (int nrCovsRemoved = 4; nrCovsRemoved <= 50; nrCovsRemoved++) { + + //ExpressionDataset dataset = new ExpressionDataset("/Volumes/Promise_RAID/lude/InteractionZScoresMatrix-" + nrCovsRemoved + "Covariates.txt.binary"); + //ExpressionDataset dataset2 = new ExpressionDataset("/Volumes/Promise_RAID/lude/InteractionZScoresMatrix-" + (nrCovsRemoved + 1) + "Covariates.txt.binary"); + ExpressionDataset dataset = new ExpressionDataset(outputDir + "/InteractionZScoresMatrix-" + nrCovsRemoved + "Covariates.txt.binary"); + ExpressionDataset dataset2 = new ExpressionDataset(outputDir + "/InteractionZScoresMatrix-" + (nrCovsRemoved + 1) + "Covariates.txt.binary"); + + for (int q = 0; q < dataset.nrSamples; q++) { + double maxAbsZDiff = 0; + String output = ""; + for (int p = 0; p < dataset.nrProbes; p++) { + double zDiff = dataset.rawData[p][q] - dataset2.rawData[p][q]; + double absZDiff = Math.abs(zDiff); + if (absZDiff > 4 && absZDiff > maxAbsZDiff) { + maxAbsZDiff = absZDiff; + output = nrCovsRemoved + "\t" + p + "\t" + dataset.probeNames[p] + "\t" + q + "\t" + dataset.sampleNames[q] + "\t" + dataset.rawData[p][q] + "\t" + dataset2.rawData[p][q] + "\t" + zDiff; + } + } + if (maxAbsZDiff > 4) { + System.out.println(output); + } + } + } + + System.exit(0); + } + + public void preprocessData() { + + HashMap hashGenotypes = new HashMap(); + HashMap hashExpression = new HashMap(); + HashMap hashEQTLs = new HashMap(); + try { + java.io.BufferedReader in = new java.io.BufferedReader(new java.io.FileReader(new File(inputDir + "/bigTableLude.txt"))); + String str = in.readLine(); + String[] data = str.split("\t"); + for (int d = 0; d < data.length; d++) { + System.out.println(d + "\t" + data[d]); + if (data[d].endsWith("_dosage")) { + hashGenotypes.put(data[d], null); + } + if (data[d].endsWith("_exp")) { + hashExpression.put(data[d], null); + } + } + int itr = 0; + while ((str = in.readLine()) != null) { + if (!str.contains("NA")) { + data = str.split("\t"); + hashEQTLs.put(data[0], null); + itr++; + if (itr % 100 == 0) { + System.out.println(itr); + } + } + } + } catch (Exception e) { + System.out.println("Error:\t" + e.getMessage()); + e.printStackTrace(); + } + + ExpressionDataset datasetGenotypes = new ExpressionDataset(inputDir + "/bigTableLude.txt", "\t", hashEQTLs, hashGenotypes); + ExpressionDataset datasetExpression = new ExpressionDataset(inputDir + "/bigTableLude.txt", "\t", hashEQTLs, hashExpression); + datasetGenotypes.save(datasetGenotypes.fileName + ".Genotypes.binary"); + datasetExpression.save(datasetGenotypes.fileName + ".Expression.binary"); + + ExpressionDataset datasetCovariates = new ExpressionDataset(inputDir + "/covariateTableLude.txt"); + datasetCovariates.save(datasetCovariates.fileName + ".Covariates.binary"); + System.exit(0); + + } + + public final String performInteractionAnalysis(String[] covsToCorrect, HashMap hashEQTLs, TextFile outputTopCovs) throws IOException { + + HashMap hashSamples = new HashMap(); + + if (1 == 1) { + + System.out.println("Removing outlier samples!!!"); + HashMap hashCovariates = new HashMap(); + hashCovariates.put("MEDIAN_5PRIME_BIAS", null); + hashCovariates.put("MEDIAN_3PRIME_BIAS", null); + ExpressionDataset datasetCovariates = new ExpressionDataset(inputDir + "/covariateTableLude.txt.Covariates.binary", "\t", hashCovariates, null); + hashSamples = new HashMap(); + for (int s = 0; s < datasetCovariates.nrSamples; s++) { + if (datasetCovariates.rawData[0][s] != 0) { + hashSamples.put(datasetCovariates.sampleNames[s], null); + } + } + datasetCovariates = new ExpressionDataset(inputDir + "/covariateTableLude.txt.Covariates.binary", "\t", hashCovariates, hashSamples); + HashMap hashSamplesToExclude = new HashMap(); + if (1 == 1) { + int index = ((Integer) datasetCovariates.hashProbes.get("MEDIAN_5PRIME_BIAS")).intValue(); + double mean = JSci.maths.ArrayMath.mean(datasetCovariates.rawData[index]); + double stdev = JSci.maths.ArrayMath.standardDeviation(datasetCovariates.rawData[index]); + for (int s = 0; s < datasetCovariates.nrSamples; s++) { + double z = (datasetCovariates.rawData[index][s] - mean) / stdev; + if (Math.abs(z) > 3) { + hashSamplesToExclude.put(datasetCovariates.sampleNames[s], null); + } + } + } + if (1 == 1) { + int index = ((Integer) datasetCovariates.hashProbes.get("MEDIAN_3PRIME_BIAS")).intValue(); + double mean = JSci.maths.ArrayMath.mean(datasetCovariates.rawData[index]); + double stdev = JSci.maths.ArrayMath.standardDeviation(datasetCovariates.rawData[index]); + for (int s = 0; s < datasetCovariates.nrSamples; s++) { + double z = (datasetCovariates.rawData[index][s] - mean) / stdev; + if (Math.abs(z) > 3) { + hashSamplesToExclude.put(datasetCovariates.sampleNames[s], null); + } + } + } + hashSamples = new HashMap(); + for (int s = 0; s < datasetCovariates.nrSamples; s++) { + if (!hashSamplesToExclude.containsKey(datasetCovariates.sampleNames[s])) { + hashSamples.put(datasetCovariates.sampleNames[s], null); + hashSamples.put(datasetCovariates.sampleNames[s] + "_exp", null); + hashSamples.put(datasetCovariates.sampleNames[s] + "_dosage", null); + } + } + } + + ExpressionDataset datasetGenotypes = new ExpressionDataset(inputDir + "/bigTableLude.txt.Genotypes.binary", "\t", hashEQTLs, hashSamples); + ExpressionDataset datasetExpression = new ExpressionDataset(inputDir + "/bigTableLude.txt.Expression.binary", "\t", hashEQTLs, hashSamples); + ExpressionDataset datasetCovariates = new ExpressionDataset(inputDir + "/covariateTableLude.txt.Covariates.binary", "\t", null, hashSamples); + + org.apache.commons.math3.stat.regression.OLSMultipleLinearRegression regression = new org.apache.commons.math3.stat.regression.OLSMultipleLinearRegression(); + int nrSamples = datasetGenotypes.nrSamples; + + + if (1 == 1) { + //Define a set of covariates that we want to use as correction: + System.out.println("Correcting gene expression data for cohort specific effects and top 25 components"); + //String[] cohorts = {"LLDeep", "LLS", "RS", "CODAM"}; + int nrCompsToCorrectFor = 25; + ExpressionDataset datasetCovariatesToCorrectFor = new ExpressionDataset(nrCompsToCorrectFor, datasetGenotypes.nrSamples); + datasetCovariatesToCorrectFor.sampleNames = datasetGenotypes.sampleNames; +// for (int p = 0; p < cohorts.length; p++) { +// for (int s = 0; s < datasetGenotypes.nrSamples; s++) { +// if (datasetGenotypes.sampleNames[s].startsWith(cohorts[p])) { +// datasetCovariatesToCorrectFor.rawData[p][s] = 1; +// } +// } +// } + if (nrCompsToCorrectFor > 0) { + for (int comp = 0; comp < nrCompsToCorrectFor; comp++) { + for (int s = 0; s < datasetGenotypes.nrSamples; s++) { + datasetCovariatesToCorrectFor.rawData[comp][s] = datasetCovariates.rawData[datasetCovariates.nrProbes - 51 + comp][s]; + } + } + } + + datasetCovariatesToCorrectFor.transposeDataset(); + + datasetCovariatesToCorrectFor.save(inputDir + "/CovariatesToCorrectFor.txt"); + orthogonalizeDataset(inputDir + "/CovariatesToCorrectFor.txt"); + datasetCovariatesToCorrectFor = new ExpressionDataset(inputDir + "/CovariatesToCorrectFor.txt.PrincipalComponents.txt"); + datasetCovariatesToCorrectFor.transposeDataset(); + ExpressionDataset datasetCovariatesToCorrectForEigenvalues = new ExpressionDataset(inputDir + "/CovariatesToCorrectFor.txt.Eigenvalues.txt"); + for (int snp = 0; snp < datasetExpression.nrProbes; snp++) { + for (int cov = 0; cov < datasetCovariatesToCorrectFor.nrProbes; cov++) { + if (datasetCovariatesToCorrectForEigenvalues.rawData[cov][0] > 1E-5) { + double[] rc = getLinearRegressionCoefficients(datasetCovariatesToCorrectFor.rawData[cov], datasetExpression.rawData[snp]); + for (int s = 0; s < datasetGenotypes.nrSamples; s++) { + datasetExpression.rawData[snp][s] -= rc[0] * datasetCovariatesToCorrectFor.rawData[cov][s]; + } + } + } + } + + + } + + + double[] mainEQTLCorr = new double[datasetGenotypes.nrProbes]; + if (1 == 1) { + System.out.println("Enforcing for every eQTL that the genotype dosage positively correlated with gene expression levels:"); + for (int snp = 0; snp < datasetGenotypes.nrProbes; snp++) { + double corr = JSci.maths.ArrayMath.correlation(datasetGenotypes.rawData[snp], datasetExpression.rawData[snp]); + //System.out.println(datasetExpression.probeNames[snp] + "\t" + snp + "\t" + corr); + + if (corr < 0) { + corr = -corr; + for (int s = 0; s < datasetGenotypes.nrSamples; s++) { + datasetGenotypes.rawData[snp][s] = 2 - datasetGenotypes.rawData[snp][s]; + } + } + + mainEQTLCorr[snp] = corr; + } + } + + if (1 == 1) { + + if (1 == 1) { + System.out.println("Correcting covariate data for cohort specific effects:"); +// String[] cohorts = {"LLDeep","LLS","RS","CODAM"}; + ExpressionDataset datasetCovariatesToCorrectFor = new ExpressionDataset(covsToCorrect.length, datasetGenotypes.nrSamples); + datasetCovariatesToCorrectFor.sampleNames = datasetGenotypes.sampleNames; +// for (int p=0; p 1E-5) { + double[] rc = getLinearRegressionCoefficients(datasetCovariatesToCorrectFor.rawData[cov], datasetCovariates.rawData[p]); + for (int s = 0; s < datasetGenotypes.nrSamples; s++) { + datasetCovariates.rawData[p][s] -= rc[0] * datasetCovariatesToCorrectFor.rawData[cov][s]; + } + } + } + double stdev = JSci.maths.ArrayMath.standardDeviation(datasetCovariates.rawData[p]); + double mean = JSci.maths.ArrayMath.mean(datasetCovariates.rawData[p]); + if (stdev < 1E-5) { + for (int s = 0; s < datasetGenotypes.nrSamples; s++) { + datasetCovariates.rawData[p][s] = mean; + } + } + } + } + + + } + + if (1 == 1) { + System.out.println("Correcting covariate data for cis-eQTL effects:"); + for (int p = 0; p < datasetCovariates.nrProbes; p++) { + if (datasetExpression.hashProbes.containsKey(datasetCovariates.probeNames[p])) { + int index = ((Integer) datasetExpression.hashProbes.get(datasetCovariates.probeNames[p])).intValue(); + double[] rc = getLinearRegressionCoefficients(datasetGenotypes.rawData[index], datasetCovariates.rawData[p]); + for (int s = 0; s < datasetGenotypes.nrSamples; s++) { + datasetCovariates.rawData[p][s] -= rc[0] * datasetGenotypes.rawData[index][s]; + } + } + } + } + + if (1 == 2) { + datasetCovariates.save(inputDir + "/CovariatesCorrected.txt"); + HashMap hashProbesToFilter = new HashMap(); + for (int p = 0; p < datasetCovariates.nrProbes; p++) { + if (datasetCovariates.probeNames[p].startsWith("ENSG")) { + hashProbesToFilter.put(datasetCovariates.probeNames[p], null); + } + } + ExpressionDataset datasetCovariatesCorrected = new ExpressionDataset(inputDir + "/CovariatesCorrected.txt", "\t", hashProbesToFilter, null); + datasetCovariatesCorrected.transposeDataset(); + datasetCovariatesCorrected.save(inputDir + "/CovariatesCorrected.txt"); + System.exit(0); + } + + if (1 == 2) { + ExpressionDataset datasetICA = new ExpressionDataset("/Users/lude/Documents/ICA/mixingmatrix.txt"); + //ExpressionDataset datasetICA = new ExpressionDataset("/Users/lude/Documents/ICA/signals.txt"); + datasetICA.transposeDataset(); + for (int p = 0; p < datasetICA.nrProbes; p++) { + datasetCovariates.rawData[p] = datasetICA.rawData[p]; + datasetCovariates.probeNames[p] = datasetICA.probeNames[p]; + if (p == 7) { + for (int q = 0; q < datasetCovariates.nrProbes; q++) { + double corr = JSci.maths.ArrayMath.correlation(datasetICA.rawData[p], datasetCovariates.rawData[q]); + System.out.println(p + "\t" + datasetICA.probeNames[p] + "\t" + q + "\t" + datasetCovariates.probeNames[q] + "\t" + corr + "\t" + corr * corr); + } + } + } + + orthogonalizeDataset("/Users/lude/Documents/ICA/mixingmatrix.txt"); + //System.exit(0); + } + + System.out.println("Enforcing normal distribution on covariates"); + NaturalRanking ranker = new NaturalRanking(); - - for (int p=0; p - double pValue = (0.5d + rankedValues[s] - 1d) / (double) (rankedValues.length); - //Convert the pValue to a Z-Score: - double zScore = cern.jet.stat.tdouble.Probability.normalInverse(pValue); - datasetExpression.rawData[p][s] = zScore; //Replace original expression value with the Z-Score - } - } - - } - - if (1==2) { - System.out.println("WARNING: PERMUTING GENOTYPE DATA!!!!"); - String[] cohorts = {"LLDeep","LLS","RS","CODAM"}; - int[] permSampleIDs = new int[datasetGenotypes.nrSamples]; - for (int p=0; p pool = new ExecutorCompletionService(threadPool); - int nrTasks = 0; - for (int cov = 0; cov 0) { - PerformInteractionAnalysisPermutationTask task = new PerformInteractionAnalysisPermutationTask(datasetGenotypes, datasetExpression, datasetCovariates, cov); - pool.submit(task); - nrTasks++; - } - } - - String maxChi2Cov = ""; - double maxChi2 = 0; - try { - - for (int task = 0; task maxChi2) { - maxChi2 = chi2Sum; - maxChi2Cov = datasetCovariates.probeNames[cov]; - } - //System.out.println(covsToCorrect.length + "\t" + cov + "\t" + datasetCovariates.probeNames[cov] + "\t" + chi2Sum); - } catch (ExecutionException ex) { - Logger.getLogger(PerformInteractionAnalysisPermutationTask.class.getName()).log(Level.SEVERE, null, ex); - } - } - threadPool.shutdown(); - } catch (Exception e) { - e.printStackTrace(); - System.out.println(e.getMessage()); - } - - System.out.println("Top covariate:\t" + maxChi2 + "\t" + maxChi2Cov); - outputTopCovs.writeln("Top covariate:\t" + maxChi2 + "\t" + maxChi2Cov); - //datasetZScores.save("/Volumes/Promise_RAID/lude/InteractionZScoresMatrix-" + covsToCorrect.length + "Covariates.txt"); - datasetZScores.save(outputDir + "/InteractionZScoresMatrix-" + covsToCorrect.length + "Covariates.txt"); - - return maxChi2Cov; - } - - return null; - } - - - - public void makeInteractionPlot(String fileName, double[] genotype, double[] expression, double[] covariate, String[] sampleNames) { - - int nrSamples = genotype.length; - - int[] cohortIndex = new int[4]; - String[] cohorts = {"LLDeep","LLS","RS","CODAM"}; - for (int cohort=0; cohort=0; rep--) { - for (int s=0; s1.5) { - g2d.setComposite(java.awt.AlphaComposite.getInstance(java.awt.AlphaComposite.SRC_ATOP, 0.30f - (float) rep / 10f)); - g2d.setColor(new Color(171, 178, 114)); - } else { - g2d.setComposite(java.awt.AlphaComposite.getInstance(java.awt.AlphaComposite.SRC_ATOP, 0.30f - (float) rep / 10f)); - g2d.setColor(new Color(98,175,255)); - } - } - g2d.fillOval(posX - 3 - rep * 4, posY - 3 - rep * 4, 7 + rep * 8, 7 + rep * 8); - - } - } - - //Draw the four independent cohorts seperately: - //int[] cohortIndex = {0,626,1280,1933}; - for (int rep=2; rep>=0; rep--) { - for (int s=0; s=cohortIndex[c]) cohort = c; - } - - int posY = marginTop + 100 + cohort * 125 - (int) ((expression[s] - minY) / (maxY - minY) * 100); - int posX = marginLeft + innerWidth + 50 + (int) ((covariate[s] - minX) / (maxX - minX) * 100); - if (genotype[s]<0.5) { - g2d.setComposite(java.awt.AlphaComposite.getInstance(java.awt.AlphaComposite.SRC_ATOP, 0.30f - (float) rep / 10f)); - g2d.setColor(new Color(204,86,78)); - } else { - if (genotype[s]>1.5) { - g2d.setComposite(java.awt.AlphaComposite.getInstance(java.awt.AlphaComposite.SRC_ATOP, 0.30f - (float) rep / 10f)); - g2d.setColor(new Color(171, 178, 114)); - } else { - g2d.setComposite(java.awt.AlphaComposite.getInstance(java.awt.AlphaComposite.SRC_ATOP, 0.30f - (float) rep / 10f)); - g2d.setColor(new Color(98,175,255)); - } - } - g2d.fillOval(posX - 1 - rep * 2, posY - 1 - rep * 2, 3 + rep * 4, 3 + rep * 4); - - } - } - - - g2d.setComposite(alphaComposite50); - double[][] valsX = new double[nrSamples][3]; - for (int s=0; s 0.001) pValueString = (new java.text.DecimalFormat("##.###;-##.###", new java.text.DecimalFormatSymbols(java.util.Locale.US))).format(pValueInteraction); - g2d.setFont(new java.awt.Font("Arial", java.awt.Font.BOLD, 14)); - g2d.setColor(new Color(0, 0, 0)); - int posX = marginLeft; - int posY = marginTop + innerHeight + 20; - g2d.drawString("Interaction P-Value: " + pValueString, posX, posY); - - - for (int g=0; g<=2; g++) { - - double valMin = betas[0] + betas[1] * g + minX * betas[2] + betas[3] * g * minX; - double valMax = betas[0] + betas[1] * g + maxX * betas[2] + betas[3] * g * maxX; - int posXMin = marginLeft + (int) ((minX - minX) / (maxX - minX) * innerWidth); - int posYMin = marginTop + innerHeight - (int) ((valMin - minY) / (maxY - minY) * innerHeight); - int posXMax = marginLeft + (int) ((maxX - minX) / (maxX - minX) * innerWidth); - int posYMax = marginTop + innerHeight - (int) ((valMax - minY) / (maxY - minY) * innerHeight); - - g2d.setComposite(java.awt.AlphaComposite.getInstance(java.awt.AlphaComposite.SRC_OVER, 0.8f)); - g2d.setColor(new Color(255,255,255)); - g2d.setStroke(new java.awt.BasicStroke(5.0f, java.awt.BasicStroke.CAP_ROUND, java.awt.BasicStroke.JOIN_ROUND)); - g2d.drawLine(posXMin, posYMin, posXMax, posYMax); - if (g<0.5) { - g2d.setComposite(java.awt.AlphaComposite.getInstance(java.awt.AlphaComposite.SRC_OVER, 0.30f)); - g2d.setColor(new Color(204,86,78)); - } else { - if (g>1.5) { - g2d.setComposite(java.awt.AlphaComposite.getInstance(java.awt.AlphaComposite.SRC_OVER, 0.50f)); - g2d.setColor(new Color(171, 178, 114)); - } else { - g2d.setComposite(java.awt.AlphaComposite.getInstance(java.awt.AlphaComposite.SRC_OVER, 0.50f)); - g2d.setColor(new Color(98,175,255)); - } - } - g2d.setStroke(new java.awt.BasicStroke(3.0f, java.awt.BasicStroke.CAP_ROUND, java.awt.BasicStroke.JOIN_ROUND)); - g2d.drawLine(posXMin, posYMin, posXMax, posYMax); - - } - - try { - javax.imageio.ImageIO.write(bimage, "png", new File(fileName)); - } catch (IOException e) { - System.out.println(e.getMessage()); - e.printStackTrace(); - } - - - } - - public void orthogonalizeDataset(String inputFile) { - - ExpressionDataset dataset = new ExpressionDataset(inputFile); - dataset.transposeDataset(); - dataset.standardNormalizeData(); - int nrVars = dataset.nrProbes; - int nrSamples = dataset.nrSamples; - - double[][] matrix = new double[nrVars][nrSamples]; - for (int s=0; s1) covariance = 1d; - if (covariance<-1) covariance = -1d; - correlationMatrix[p][q] = covariance; - correlationMatrix[q][p] = covariance; - } - } - Jama.EigenvalueDecomposition eig = eigenValueDecomposition(correlationMatrix); - double[] eigenValues = eig.getRealEigenvalues(); - int nrCompsWithPositiveEigenvalues = 0; - for (int e=0; e1e-10) nrCompsWithPositiveEigenvalues++; - } - - ExpressionDataset datasetEigenvectors = new ExpressionDataset(correlationMatrix.length, correlationMatrix.length); - for (int pca = 0; pca < correlationMatrix.length; pca++) { - datasetEigenvectors.rawData[pca] = getEigenVector(eig, pca); - } - datasetEigenvectors.transposeDataset(); - - //Calculate principal components: - ExpressionDataset datasetPCs = new ExpressionDataset(dataset.nrSamples, nrCompsWithPositiveEigenvalues); - for (int pca = 0; pca < nrCompsWithPositiveEigenvalues; pca++) { - datasetPCs.sampleNames[pca] = "Comp" + (pca + 1); - } - for (int p=0; p + double pValue = (0.5d + rankedValues[s] - 1d) / (double) (rankedValues.length); + //Convert the pValue to a Z-Score: + double zScore = cern.jet.stat.tdouble.Probability.normalInverse(pValue); + datasetCovariates.rawData[p][s] = zScore; //Replace original expression value with the Z-Score + } + } + + } + + cern.jet.random.tdouble.engine.DoubleRandomEngine randomEngine = new cern.jet.random.tdouble.engine.DRand(); + + ExpressionDataset datasetExpressionBeforeEQTLCorrection = new ExpressionDataset(datasetExpression.nrProbes, datasetExpression.nrSamples); + for (int p = 0; p < datasetExpression.nrProbes; p++) { + for (int s = 0; s < datasetExpression.nrSamples; s++) { + datasetExpressionBeforeEQTLCorrection.rawData[p][s] = datasetExpression.rawData[p][s]; + } + } + + if (1 == 1) { + System.out.println("Correcting expression data for predefined gene environment interaction effects (GC content, Gender, 5'Median Bias, 3'Median Bias):"); + int[] covsToCorrectIndex = new int[covsToCorrect.length]; + for (int c = 0; c < covsToCorrect.length; c++) { + covsToCorrectIndex[c] = ((Integer) datasetCovariates.hashProbes.get(covsToCorrect[c])).intValue(); + } + for (int snp = 0; snp < datasetGenotypes.nrProbes; snp++) { + double[][] valsX = new double[nrSamples][1 + covsToCorrect.length * 2]; //store genotypes, covariates, interactions + for (int s = 0; s < nrSamples; s++) { + valsX[s][0] = datasetGenotypes.rawData[snp][s]; //genotypes + } + for (int c = 0; c < covsToCorrect.length; c++) { + for (int s = 0; s < nrSamples; s++) { + valsX[s][c * 2 + 1] = datasetCovariates.rawData[covsToCorrectIndex[c]][s]; //covariate + valsX[s][c * 2 + 2] = valsX[s][0] * valsX[s][c * 2 + 1]; //interction + } + } + double[] valsY = datasetExpression.rawData[snp]; + regression.newSampleData(valsY, valsX); + datasetExpression.rawData[snp] = regression.estimateResiduals(); + } + } + + + if (1 == 1) { + System.out.println("Enforcing normal distribution on expression data:"); + + NaturalRanking ranker = new NaturalRanking(); + for (int p = 0; p < datasetExpression.nrProbes; p++) { + //Rank order the expression values: + double[] values = new double[datasetExpression.nrSamples]; + for (int s = 0; s < datasetExpression.nrSamples; s++) { + values[s] = datasetExpression.rawData[p][s]; + } + + double[] rankedValues = ranker.rank(values); + //Replace the original expression value with the standard distribution enforce: + for (int s = 0; s < datasetExpression.nrSamples; s++) { + //Convert the rank to a proportion, with range <0, 1> + double pValue = (0.5d + rankedValues[s] - 1d) / (double) (rankedValues.length); + //Convert the pValue to a Z-Score: + double zScore = cern.jet.stat.tdouble.Probability.normalInverse(pValue); + datasetExpression.rawData[p][s] = zScore; //Replace original expression value with the Z-Score + } + } + + } + + if (1 == 2) { + System.out.println("WARNING: PERMUTING GENOTYPE DATA!!!!"); + String[] cohorts = {"LLDeep", "LLS", "RS", "CODAM"}; + int[] permSampleIDs = new int[datasetGenotypes.nrSamples]; + for (int p = 0; p < cohorts.length; p++) { + Vector vecSamples = new Vector(); + for (int s = 0; s < datasetGenotypes.nrSamples; s++) { + if (datasetGenotypes.sampleNames[s].startsWith(cohorts[p])) { + vecSamples.add(s); + } + } + int nrSamplesThisCohort = vecSamples.size(); + for (int s = 0; s < datasetGenotypes.nrSamples; s++) { + if (datasetGenotypes.sampleNames[s].startsWith(cohorts[p])) { + int randomSample = ((Integer) vecSamples.remove((int) ((double) vecSamples.size() * Math.random()))).intValue(); + permSampleIDs[s] = randomSample; + } + } + } + ExpressionDataset datasetGenotypes2 = new ExpressionDataset(datasetGenotypes.nrProbes, datasetGenotypes.nrSamples); + datasetGenotypes2.probeNames = datasetGenotypes.probeNames; + datasetGenotypes2.sampleNames = datasetGenotypes.sampleNames; + datasetGenotypes2.recalculateHashMaps(); + for (int p = 0; p < datasetGenotypes2.nrProbes; p++) { + for (int s = 0; s < datasetGenotypes2.nrSamples; s++) { + datasetGenotypes2.rawData[p][s] = datasetGenotypes.rawData[p][permSampleIDs[s]]; + } + } + datasetGenotypes = datasetGenotypes2; + } + + + if (1 == 1) { + + + + ExpressionDataset datasetZScores = new ExpressionDataset(datasetCovariates.nrProbes, datasetExpression.nrProbes); + datasetZScores.probeNames = datasetCovariates.probeNames; + datasetZScores.sampleNames = datasetGenotypes.probeNames; + datasetZScores.recalculateHashMaps(); + + + + java.util.concurrent.ExecutorService threadPool = Executors.newFixedThreadPool(Runtime.getRuntime().availableProcessors()); + CompletionService pool = new ExecutorCompletionService(threadPool); + int nrTasks = 0; + for (int cov = 0; cov < datasetCovariates.nrProbes; cov++) { + double stdev = JSci.maths.ArrayMath.standardDeviation(datasetCovariates.rawData[cov]); + if (stdev > 0) { + PerformInteractionAnalysisPermutationTask task = new PerformInteractionAnalysisPermutationTask(datasetGenotypes, datasetExpression, datasetCovariates, cov); + pool.submit(task); + nrTasks++; + } + } + + String maxChi2Cov = ""; + double maxChi2 = 0; + try { + + for (int task = 0; task < nrTasks; task++) { + try { + DoubleArrayIntegerObject result = pool.take().get(); + int cov = result.intValue; + double chi2Sum = 0; + for (int snp = 0; snp < datasetGenotypes.nrProbes; snp++) { + double z = result.doubleArray[snp]; + datasetZScores.rawData[cov][snp] = z; + chi2Sum += z * z; + } + if (chi2Sum > maxChi2) { + maxChi2 = chi2Sum; + maxChi2Cov = datasetCovariates.probeNames[cov]; + } + //System.out.println(covsToCorrect.length + "\t" + cov + "\t" + datasetCovariates.probeNames[cov] + "\t" + chi2Sum); + } catch (ExecutionException ex) { + Logger.getLogger(PerformInteractionAnalysisPermutationTask.class.getName()).log(Level.SEVERE, null, ex); + } + } + threadPool.shutdown(); + } catch (Exception e) { + e.printStackTrace(); + System.out.println(e.getMessage()); + } + + System.out.println("Top covariate:\t" + maxChi2 + "\t" + maxChi2Cov); + outputTopCovs.writeln("Top covariate:\t" + maxChi2 + "\t" + maxChi2Cov); + //datasetZScores.save("/Volumes/Promise_RAID/lude/InteractionZScoresMatrix-" + covsToCorrect.length + "Covariates.txt"); + datasetZScores.save(outputDir + "/InteractionZScoresMatrix-" + covsToCorrect.length + "Covariates.txt"); + + return maxChi2Cov; + } + + return null; + } + + public void makeInteractionPlot(String fileName, double[] genotype, double[] expression, double[] covariate, String[] sampleNames) { + + int nrSamples = genotype.length; + + int[] cohortIndex = new int[4]; + String[] cohorts = {"LLDeep", "LLS", "RS", "CODAM"}; + for (int cohort = 0; cohort < cohorts.length; cohort++) { + for (int s = 0; s < nrSamples; s++) { + if (sampleNames[s].startsWith(cohorts[cohort])) { + cohortIndex[cohort] = s; + break; + } + } + } + + int marginLeft = 100; + int marginRight = 200; + int marginTop = 100; + int marginBottom = 100; + int innerHeight = 500; + int innerWidth = 500; + int docWidth = marginLeft + marginRight + innerWidth; + int docHeight = marginTop + marginBottom + innerHeight; + + BufferedImage bimage = new BufferedImage(docWidth, docHeight, BufferedImage.TYPE_INT_RGB); + Graphics2D g2d = bimage.createGraphics(); + + g2d.setRenderingHint(RenderingHints.KEY_ANTIALIASING, RenderingHints.VALUE_ANTIALIAS_ON); + g2d.setColor(new Color(255, 255, 255)); + g2d.fillRect(0, 0, docWidth, docHeight); + java.awt.AlphaComposite alphaComposite10 = java.awt.AlphaComposite.getInstance(java.awt.AlphaComposite.SRC_OVER, 0.10f); + java.awt.AlphaComposite alphaComposite25 = java.awt.AlphaComposite.getInstance(java.awt.AlphaComposite.SRC_OVER, 0.25f); + java.awt.AlphaComposite alphaComposite50 = java.awt.AlphaComposite.getInstance(java.awt.AlphaComposite.SRC_OVER, 0.50f); + java.awt.AlphaComposite alphaComposite100 = java.awt.AlphaComposite.getInstance(java.awt.AlphaComposite.SRC, 1.00f); + + float fontSize = 12f; + java.awt.Font font = new java.awt.Font("Gill Sans MT", java.awt.Font.PLAIN, (int) fontSize); + java.awt.Font fontBold = new java.awt.Font("Gill Sans MT", java.awt.Font.BOLD, (int) fontSize); + java.awt.Font fontSmall = new java.awt.Font("Gill Sans MT", java.awt.Font.PLAIN, 8); + java.awt.Font fontBoldSmall = new java.awt.Font("Gill Sans MT", java.awt.Font.BOLD, 8); + + java.awt.Color dataColor[] = new Color[10]; + dataColor[0] = new java.awt.Color(167, 72, 20); + dataColor[1] = new java.awt.Color(62, 138, 20); + dataColor[2] = new java.awt.Color(228, 171, 0); + dataColor[3] = new java.awt.Color(0, 148, 183); + dataColor[4] = new java.awt.Color(119, 80, 152); + dataColor[5] = new java.awt.Color(106, 106, 106); + dataColor[6] = new java.awt.Color(212, 215, 10); + dataColor[7] = new java.awt.Color(210, 111, 0); + dataColor[8] = new java.awt.Color(0, 0, 141); + dataColor[9] = new java.awt.Color(190, 190, 190); + + g2d.setComposite(alphaComposite50); + g2d.setColor(new Color(0, 0, 0)); + g2d.drawLine(marginLeft, marginTop, marginLeft, marginTop + innerHeight); + g2d.drawLine(marginLeft, marginTop + innerHeight, marginLeft + innerWidth, marginTop + innerHeight); + + double minX = JSci.maths.ArrayMath.min(covariate); + double maxX = JSci.maths.ArrayMath.max(covariate); + double minY = JSci.maths.ArrayMath.min(expression); + double maxY = JSci.maths.ArrayMath.max(expression); + + g2d.setComposite(alphaComposite10); + for (int rep = 2; rep >= 0; rep--) { + for (int s = 0; s < nrSamples; s++) { + int posY = marginTop + innerHeight - (int) ((expression[s] - minY) / (maxY - minY) * innerHeight); + int posX = marginLeft + (int) ((covariate[s] - minX) / (maxX - minX) * innerWidth); + if (genotype[s] < 0.5) { + g2d.setComposite(java.awt.AlphaComposite.getInstance(java.awt.AlphaComposite.SRC_ATOP, 0.30f - (float) rep / 10f)); + g2d.setColor(new Color(204, 86, 78)); + } else { + if (genotype[s] > 1.5) { + g2d.setComposite(java.awt.AlphaComposite.getInstance(java.awt.AlphaComposite.SRC_ATOP, 0.30f - (float) rep / 10f)); + g2d.setColor(new Color(171, 178, 114)); + } else { + g2d.setComposite(java.awt.AlphaComposite.getInstance(java.awt.AlphaComposite.SRC_ATOP, 0.30f - (float) rep / 10f)); + g2d.setColor(new Color(98, 175, 255)); + } + } + g2d.fillOval(posX - 3 - rep * 4, posY - 3 - rep * 4, 7 + rep * 8, 7 + rep * 8); + + } + } + + //Draw the four independent cohorts seperately: + //int[] cohortIndex = {0,626,1280,1933}; + for (int rep = 2; rep >= 0; rep--) { + for (int s = 0; s < nrSamples; s++) { + int cohort = 0; + for (int c = 0; c < cohortIndex.length; c++) { + if (s >= cohortIndex[c]) { + cohort = c; + } + } + + int posY = marginTop + 100 + cohort * 125 - (int) ((expression[s] - minY) / (maxY - minY) * 100); + int posX = marginLeft + innerWidth + 50 + (int) ((covariate[s] - minX) / (maxX - minX) * 100); + if (genotype[s] < 0.5) { + g2d.setComposite(java.awt.AlphaComposite.getInstance(java.awt.AlphaComposite.SRC_ATOP, 0.30f - (float) rep / 10f)); + g2d.setColor(new Color(204, 86, 78)); + } else { + if (genotype[s] > 1.5) { + g2d.setComposite(java.awt.AlphaComposite.getInstance(java.awt.AlphaComposite.SRC_ATOP, 0.30f - (float) rep / 10f)); + g2d.setColor(new Color(171, 178, 114)); + } else { + g2d.setComposite(java.awt.AlphaComposite.getInstance(java.awt.AlphaComposite.SRC_ATOP, 0.30f - (float) rep / 10f)); + g2d.setColor(new Color(98, 175, 255)); + } + } + g2d.fillOval(posX - 1 - rep * 2, posY - 1 - rep * 2, 3 + rep * 4, 3 + rep * 4); + + } + } + + + g2d.setComposite(alphaComposite50); + double[][] valsX = new double[nrSamples][3]; + for (int s = 0; s < nrSamples; s++) { + valsX[s][0] = genotype[s]; + valsX[s][1] = covariate[s]; + valsX[s][2] = valsX[s][0] * valsX[s][1]; + } + double[] valsY = expression; + org.apache.commons.math3.stat.regression.OLSMultipleLinearRegression regression = new org.apache.commons.math3.stat.regression.OLSMultipleLinearRegression(); + regression.newSampleData(valsY, valsX); + double[] betas = regression.estimateRegressionParameters(); + double betaInteraction = betas[3]; + double seInteraction = regression.estimateRegressionParametersStandardErrors()[3]; + double tInteraction = betaInteraction / seInteraction; + double pValueInteraction = 1; + double zScoreInteraction = 0; + cern.jet.random.tdouble.engine.DoubleRandomEngine randomEngine = new cern.jet.random.tdouble.engine.DRand(); + cern.jet.random.tdouble.StudentT tDistColt = new cern.jet.random.tdouble.StudentT(genotype.length - 4, randomEngine); + if (tInteraction < 0) { + pValueInteraction = tDistColt.cdf(tInteraction); + if (pValueInteraction < 2.0E-323) { + pValueInteraction = 2.0E-323; + } + zScoreInteraction = cern.jet.stat.tdouble.Probability.normalInverse(pValueInteraction); + } else { + pValueInteraction = tDistColt.cdf(-tInteraction); + if (pValueInteraction < 2.0E-323) { + pValueInteraction = 2.0E-323; + } + zScoreInteraction = -cern.jet.stat.tdouble.Probability.normalInverse(pValueInteraction); + } + pValueInteraction *= 2; + + String pValueString = (new java.text.DecimalFormat("0.##E0", new java.text.DecimalFormatSymbols(java.util.Locale.US))).format(pValueInteraction); + if (pValueInteraction > 0.001) { + pValueString = (new java.text.DecimalFormat("##.###;-##.###", new java.text.DecimalFormatSymbols(java.util.Locale.US))).format(pValueInteraction); + } + g2d.setFont(new java.awt.Font("Arial", java.awt.Font.BOLD, 14)); + g2d.setColor(new Color(0, 0, 0)); + int posX = marginLeft; + int posY = marginTop + innerHeight + 20; + g2d.drawString("Interaction P-Value: " + pValueString, posX, posY); + + + for (int g = 0; g <= 2; g++) { + + double valMin = betas[0] + betas[1] * g + minX * betas[2] + betas[3] * g * minX; + double valMax = betas[0] + betas[1] * g + maxX * betas[2] + betas[3] * g * maxX; + int posXMin = marginLeft + (int) ((minX - minX) / (maxX - minX) * innerWidth); + int posYMin = marginTop + innerHeight - (int) ((valMin - minY) / (maxY - minY) * innerHeight); + int posXMax = marginLeft + (int) ((maxX - minX) / (maxX - minX) * innerWidth); + int posYMax = marginTop + innerHeight - (int) ((valMax - minY) / (maxY - minY) * innerHeight); + + g2d.setComposite(java.awt.AlphaComposite.getInstance(java.awt.AlphaComposite.SRC_OVER, 0.8f)); + g2d.setColor(new Color(255, 255, 255)); + g2d.setStroke(new java.awt.BasicStroke(5.0f, java.awt.BasicStroke.CAP_ROUND, java.awt.BasicStroke.JOIN_ROUND)); + g2d.drawLine(posXMin, posYMin, posXMax, posYMax); + if (g < 0.5) { + g2d.setComposite(java.awt.AlphaComposite.getInstance(java.awt.AlphaComposite.SRC_OVER, 0.30f)); + g2d.setColor(new Color(204, 86, 78)); + } else { + if (g > 1.5) { + g2d.setComposite(java.awt.AlphaComposite.getInstance(java.awt.AlphaComposite.SRC_OVER, 0.50f)); + g2d.setColor(new Color(171, 178, 114)); + } else { + g2d.setComposite(java.awt.AlphaComposite.getInstance(java.awt.AlphaComposite.SRC_OVER, 0.50f)); + g2d.setColor(new Color(98, 175, 255)); + } + } + g2d.setStroke(new java.awt.BasicStroke(3.0f, java.awt.BasicStroke.CAP_ROUND, java.awt.BasicStroke.JOIN_ROUND)); + g2d.drawLine(posXMin, posYMin, posXMax, posYMax); + + } + + try { + javax.imageio.ImageIO.write(bimage, "png", new File(fileName)); + } catch (IOException e) { + System.out.println(e.getMessage()); + e.printStackTrace(); + } + + + } + + public void orthogonalizeDataset(String inputFile) { + + ExpressionDataset dataset = new ExpressionDataset(inputFile); + dataset.transposeDataset(); + dataset.standardNormalizeData(); + int nrVars = dataset.nrProbes; + int nrSamples = dataset.nrSamples; + + double[][] matrix = new double[nrVars][nrSamples]; + for (int s = 0; s < nrVars; s++) { + for (int sample = 0; sample < nrSamples; sample++) { + matrix[s][sample] = dataset.rawData[s][sample]; + } + } + double[][] correlationMatrix = new double[nrVars][nrVars]; + for (int p = 0; p < nrVars; p++) { + correlationMatrix[p][p] = 1d; + for (int q = p + 1; q < nrVars; q++) { + double covariance = 0; + for (int sample = 0; sample < nrSamples; sample++) { + covariance += matrix[p][sample] * matrix[q][sample]; + } + covariance /= (double) (nrSamples - 1); + correlationMatrix[p][q] = covariance; + correlationMatrix[q][p] = covariance; + } + } + Jama.EigenvalueDecomposition eig = eigenValueDecomposition(correlationMatrix); + double[] eigenValues = eig.getRealEigenvalues(); + + double[][] eigenVectors = new double[correlationMatrix.length][correlationMatrix.length]; + ExpressionDataset datasetEigenvectors = new ExpressionDataset(correlationMatrix.length, correlationMatrix.length); + ExpressionDataset datasetEigenvalues = new ExpressionDataset(correlationMatrix.length, 2); + for (int pca = 0; pca < correlationMatrix.length; pca++) { + datasetEigenvectors.probeNames[pca] = "Comp" + (pca + 1); + datasetEigenvalues.probeNames[pca] = "Comp" + (pca + 1); + datasetEigenvectors.sampleNames[pca] = dataset.probeNames[pca]; + } + datasetEigenvalues.sampleNames[0] = "Eigenvalues"; + datasetEigenvalues.sampleNames[1] = "ExplainedVariance"; + for (int pca = 0; pca < correlationMatrix.length; pca++) { + datasetEigenvectors.rawData[pca] = getEigenVector(eig, pca); + datasetEigenvalues.rawData[pca][0] = eigenValues[eigenValues.length - 1 - pca]; + datasetEigenvalues.rawData[pca][1] = getEigenValueVar(eigenValues, pca); + System.out.println(pca + "\tExplainedVariance:\t" + getEigenValueVar(eigenValues, pca) + "\tEigenvalue:\t" + eigenValues[eigenValues.length - 1 - pca]); + } + datasetEigenvectors.transposeDataset(); + datasetEigenvectors.save(inputFile + ".Eigenvectors.txt"); + datasetEigenvalues.save(inputFile + ".Eigenvalues.txt"); + + //Calculate principal components: + ExpressionDataset datasetPCs = new ExpressionDataset(dataset.nrSamples, correlationMatrix.length); + for (int pca = 0; pca < correlationMatrix.length; pca++) { + datasetPCs.sampleNames[pca] = "Comp" + (pca + 1); + } + for (int p = 0; p < datasetPCs.nrProbes; p++) { + datasetPCs.probeNames[p] = dataset.sampleNames[p]; + } + for (int pca = 0; pca < correlationMatrix.length; pca++) { + for (int p = 0; p < dataset.nrProbes; p++) { + for (int s = 0; s < dataset.nrSamples; s++) { + datasetPCs.rawData[s][pca] += datasetEigenvectors.rawData[p][pca] * dataset.rawData[p][s]; + } + } + } + datasetPCs.save(dataset.fileName + ".PrincipalComponents.txt"); + + ExpressionDataset datasetFactorloadings = new ExpressionDataset(correlationMatrix.length, correlationMatrix.length); + datasetPCs.transposeDataset(); + for (int p = 0; p < dataset.nrProbes; p++) { + datasetFactorloadings.probeNames[p] = dataset.probeNames[p]; + } + for (int pca = 0; pca < datasetPCs.nrProbes; pca++) { + datasetFactorloadings.sampleNames[pca] = "Comp" + (pca + 1); + for (int p = 0; p < dataset.nrProbes; p++) { + datasetFactorloadings.rawData[p][pca] = JSci.maths.ArrayMath.correlation(datasetPCs.rawData[pca], dataset.rawData[p]); + } + } + datasetFactorloadings.save(dataset.fileName + ".Factorloadings.txt"); + + } + + public ExpressionDataset orthogonalizeMatrix(ExpressionDataset dataset) { + + dataset.standardNormalizeData(); + int nrVars = dataset.nrProbes; + int nrSamples = dataset.nrSamples; + double[][] matrix = new double[nrVars][nrSamples]; + for (int s = 0; s < nrVars; s++) { + for (int sample = 0; sample < nrSamples; sample++) { + matrix[s][sample] = dataset.rawData[s][sample]; + } + } + double[][] correlationMatrix = new double[nrVars][nrVars]; + for (int p = 0; p < nrVars; p++) { + correlationMatrix[p][p] = 1d; + for (int q = p + 1; q < nrVars; q++) { + double covariance = 0; + for (int sample = 0; sample < nrSamples; sample++) { + covariance += matrix[p][sample] * matrix[q][sample]; + } + covariance /= (double) (nrSamples - 1); + if (covariance > 1) { + covariance = 1d; + } + if (covariance < -1) { + covariance = -1d; + } + correlationMatrix[p][q] = covariance; + correlationMatrix[q][p] = covariance; + } + } + Jama.EigenvalueDecomposition eig = eigenValueDecomposition(correlationMatrix); + double[] eigenValues = eig.getRealEigenvalues(); + int nrCompsWithPositiveEigenvalues = 0; + for (int e = 0; e < eigenValues.length; e++) { + //System.out.println(e + "\t" + eigenValues[e]); + if (eigenValues[e] > 1e-10) { + nrCompsWithPositiveEigenvalues++; + } + } + + ExpressionDataset datasetEigenvectors = new ExpressionDataset(correlationMatrix.length, correlationMatrix.length); + for (int pca = 0; pca < correlationMatrix.length; pca++) { + datasetEigenvectors.rawData[pca] = getEigenVector(eig, pca); + } + datasetEigenvectors.transposeDataset(); + + //Calculate principal components: + ExpressionDataset datasetPCs = new ExpressionDataset(dataset.nrSamples, nrCompsWithPositiveEigenvalues); + for (int pca = 0; pca < nrCompsWithPositiveEigenvalues; pca++) { + datasetPCs.sampleNames[pca] = "Comp" + (pca + 1); + } + for (int p = 0; p < datasetPCs.nrProbes; p++) { + datasetPCs.probeNames[p] = dataset.sampleNames[p]; + } + for (int pca = 0; pca < nrCompsWithPositiveEigenvalues; pca++) { + for (int p = 0; p < dataset.nrProbes; p++) { + for (int s = 0; s < dataset.nrSamples; s++) { + datasetPCs.rawData[s][pca] += datasetEigenvectors.rawData[p][pca] * dataset.rawData[p][s]; + } + } + } + datasetPCs.transposeDataset(); + return datasetPCs; + + } + + public double[] getLinearRegressionCoefficients(double[] xVal, double[] yVal) { + double n = (double) xVal.length; + double sumX = 0; + double sumXX = 0; + double sumY = 0; + double sumXY = 0; + for (int x = 0; x < xVal.length; x++) { + sumX += xVal[x]; + sumXX += xVal[x] * xVal[x]; + sumY += yVal[x]; + sumXY += xVal[x] * yVal[x]; + } + double sXX = sumXX - sumX * sumX / n; + double sXY = sumXY - sumX * sumY / n; + double a = sXY / sXX; + double b = (sumY - a * sumX) / n; + double[] regressionCoefficients = new double[2]; + regressionCoefficients[0] = a; + regressionCoefficients[1] = b; + return regressionCoefficients; + } + + private Jama.EigenvalueDecomposition eigenValueDecomposition(double[][] data) { + Jama.Matrix m = new Jama.Matrix(data); + Jama.EigenvalueDecomposition eig = m.eig(); + return eig; + } + + private double[] getEigenVector(Jama.EigenvalueDecomposition eig, double[] eigenValues, int pca) { + Jama.Matrix eigenValueMatrix = eig.getV(); + double[][] eigenValueMat = eigenValueMatrix.getArray(); + double[] eigenVector = new double[eigenValueMat.length]; + for (int i = 0; i < eigenValueMat.length; i++) { + eigenVector[i] = eigenValueMat[i][eigenValueMat.length - 1 - pca]; // * Math.sqrt(eigenValues[eigenValues.length - 1 - pca]); + } + return eigenVector; + } + + private double[] getEigenVector(Jama.EigenvalueDecomposition eig, int pca) { + Jama.Matrix eigenValueMatrix = eig.getV(); + double[][] eigenValueMat = eigenValueMatrix.getArray(); + double[] eigenVector = new double[eigenValueMat.length]; + for (int i = 0; i < eigenValueMat.length; i++) { + eigenVector[i] = eigenValueMat[i][eigenValueMat.length - 1 - pca]; // * Math.sqrt(eigenValues[eigenValues.length - 1 - pca]); + } + return eigenVector; + } + + private double getEigenValueVar(double[] eigenValues, int pca) { + double sumEigenvalues = 0.0; + for (Double d : eigenValues) { + sumEigenvalues += Math.abs(d); + } + double result = eigenValues[eigenValues.length - 1 - pca] / sumEigenvalues; + return result; + } + + private double[] getEigenVectorSVD(Jama.SingularValueDecomposition svd, double[] singularValues, int pca) { + Jama.Matrix eigenValueMatrix = svd.getV(); + double[][] eigenValueMat = eigenValueMatrix.getArray(); + double[] eigenVector = new double[eigenValueMat.length]; + for (int i = 0; i < eigenValueMat.length; i++) { + eigenVector[i] = eigenValueMat[i][pca] * Math.sqrt(singularValues[pca]); + } + return eigenVector; + } } From 70c84965193263699a12a5ae03f0607f19d39292 Mon Sep 17 00:00:00 2001 From: Patrick Deelen Date: Mon, 6 Jul 2015 15:55:24 +0200 Subject: [PATCH 060/143] minor --- .../eqtlinteractionanalyser/ExpressionDataset.java | 1 + 1 file changed, 1 insertion(+) diff --git a/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/ExpressionDataset.java b/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/ExpressionDataset.java index 944dbeef4..4876bf0da 100644 --- a/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/ExpressionDataset.java +++ b/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/ExpressionDataset.java @@ -447,6 +447,7 @@ public void standardNormalizeData() { rawData[probeID][s] = (float) (vals[s] / standardDeviation); } } + System.out.println("Mean and stdev done"); } From ed97176ad8a2158aed2a56da4845d45594f86579 Mon Sep 17 00:00:00 2001 From: Patrick Deelen Date: Mon, 6 Jul 2015 19:07:55 +0200 Subject: [PATCH 061/143] interactions --- .../TestEQTLDatasetForInteractions.java | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/TestEQTLDatasetForInteractions.java b/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/TestEQTLDatasetForInteractions.java index 48f42e33e..33416d389 100644 --- a/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/TestEQTLDatasetForInteractions.java +++ b/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/TestEQTLDatasetForInteractions.java @@ -62,6 +62,9 @@ public TestEQTLDatasetForInteractions(String inputDir, String outputDir, String this.inputDir = inputDir; this.outputDir = outputDir; + + //preprocessData(); + TextFile outputTopCovs = new TextFile(outputDir + "/outputTopCovariates.txt", true); HashMap eqtlGenes = getEqtls(eQTLfileName); @@ -225,7 +228,7 @@ public final String performInteractionAnalysis(String[] covsToCorrect, HashMap h } } - ExpressionDataset datasetGenotypes = new ExpressionDataset(inputDir + "/bigTableLude.txt.Genotypes.binary", "\t", hashEQTLs, hashSamples); + ExpressionDataset datasetGenotypes = new ExpressionDataset(inputDir + "/bigTableLude.txt.Genotypes.binary", "\t", hashEQTLs, hashSamples); ExpressionDataset datasetExpression = new ExpressionDataset(inputDir + "/bigTableLude.txt.Expression.binary", "\t", hashEQTLs, hashSamples); ExpressionDataset datasetCovariates = new ExpressionDataset(inputDir + "/covariateTableLude.txt.Covariates.binary", "\t", null, hashSamples); @@ -475,6 +478,8 @@ public final String performInteractionAnalysis(String[] covsToCorrect, HashMap h } } +System.out.println("Expression data now force normal"); + } if (1 == 2) { @@ -551,6 +556,9 @@ public final String performInteractionAnalysis(String[] covsToCorrect, HashMap h maxChi2Cov = datasetCovariates.probeNames[cov]; } //System.out.println(covsToCorrect.length + "\t" + cov + "\t" + datasetCovariates.probeNames[cov] + "\t" + chi2Sum); + if((task + 1) % 512 == 0){ +System.out.println(task + 1 + " tasks processed"); +} } catch (ExecutionException ex) { Logger.getLogger(PerformInteractionAnalysisPermutationTask.class.getName()).log(Level.SEVERE, null, ex); } From a6969b10582fbad07ee045ff82c7d529ee041a97 Mon Sep 17 00:00:00 2001 From: Patrick Deelen Date: Mon, 6 Jul 2015 19:15:25 +0200 Subject: [PATCH 062/143] interactions --- ...ormInteractionAnalysisPermutationTask.java | 124 +++++++++--------- .../TestEQTLDatasetForInteractions.java | 16 ++- 2 files changed, 73 insertions(+), 67 deletions(-) diff --git a/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/PerformInteractionAnalysisPermutationTask.java b/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/PerformInteractionAnalysisPermutationTask.java index 98c8a3802..46cd488d5 100644 --- a/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/PerformInteractionAnalysisPermutationTask.java +++ b/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/PerformInteractionAnalysisPermutationTask.java @@ -3,75 +3,79 @@ * To change this template file, choose Tools | Templates * and open the template in the editor. */ - package nl.systemsgenetics.eqtlinteractionanalyser.eqtlinteractionanalyser; import cern.jet.random.tdouble.engine.DoubleRandomEngine; import java.util.concurrent.Callable; +import org.apache.commons.math3.linear.SingularMatrixException; + /** * * @author lude */ public class PerformInteractionAnalysisPermutationTask implements Callable { - public ExpressionDataset datasetGenotypes; - public ExpressionDataset datasetExpression; - public ExpressionDataset datasetCovariates; - public int covToTest = -1; - public int nrSamples = -1; - public org.apache.commons.math3.stat.regression.OLSMultipleLinearRegression regression = null; - public cern.jet.random.tdouble.StudentT tDistColt = null; - - public PerformInteractionAnalysisPermutationTask(ExpressionDataset datasetGenotypes, ExpressionDataset datasetExpression, ExpressionDataset datasetCovariates, int covToTest) { - this.datasetGenotypes = datasetGenotypes; - this.datasetExpression = datasetExpression; - this.datasetCovariates = datasetCovariates; - this.covToTest = covToTest; - this.nrSamples = datasetGenotypes.nrSamples; - - this.regression = new org.apache.commons.math3.stat.regression.OLSMultipleLinearRegression(); - cern.jet.random.tdouble.engine.DoubleRandomEngine randomEngine = new cern.jet.random.tdouble.engine.DRand(); - this.tDistColt = new cern.jet.random.tdouble.StudentT(this.nrSamples - 4, randomEngine); - - } - - @Override - public DoubleArrayIntegerObject call() throws Exception { - - double[] zScores = new double[datasetGenotypes.nrProbes]; - - for (int snp=0; snp eqtlGenes = getEqtls(eQTLfileName); @@ -228,7 +228,7 @@ public final String performInteractionAnalysis(String[] covsToCorrect, HashMap h } } - ExpressionDataset datasetGenotypes = new ExpressionDataset(inputDir + "/bigTableLude.txt.Genotypes.binary", "\t", hashEQTLs, hashSamples); + ExpressionDataset datasetGenotypes = new ExpressionDataset(inputDir + "/bigTableLude.txt.Genotypes.binary", "\t", hashEQTLs, hashSamples); ExpressionDataset datasetExpression = new ExpressionDataset(inputDir + "/bigTableLude.txt.Expression.binary", "\t", hashEQTLs, hashSamples); ExpressionDataset datasetCovariates = new ExpressionDataset(inputDir + "/covariateTableLude.txt.Covariates.binary", "\t", null, hashSamples); @@ -478,7 +478,7 @@ public final String performInteractionAnalysis(String[] covsToCorrect, HashMap h } } -System.out.println("Expression data now force normal"); + System.out.println("Expression data now force normal"); } @@ -549,16 +549,18 @@ public final String performInteractionAnalysis(String[] covsToCorrect, HashMap h for (int snp = 0; snp < datasetGenotypes.nrProbes; snp++) { double z = result.doubleArray[snp]; datasetZScores.rawData[cov][snp] = z; - chi2Sum += z * z; + if(!Double.isNaN(z)){ + chi2Sum += z * z; + } } if (chi2Sum > maxChi2) { maxChi2 = chi2Sum; maxChi2Cov = datasetCovariates.probeNames[cov]; } //System.out.println(covsToCorrect.length + "\t" + cov + "\t" + datasetCovariates.probeNames[cov] + "\t" + chi2Sum); - if((task + 1) % 512 == 0){ -System.out.println(task + 1 + " tasks processed"); -} + if ((task + 1) % 512 == 0) { + System.out.println(task + 1 + " tasks processed"); + } } catch (ExecutionException ex) { Logger.getLogger(PerformInteractionAnalysisPermutationTask.class.getName()).log(Level.SEVERE, null, ex); } From ab581799a6d0123bc8eb9aa3de5c0d82639de131 Mon Sep 17 00:00:00 2001 From: Patrick Deelen Date: Mon, 6 Jul 2015 19:18:53 +0200 Subject: [PATCH 063/143] interaction --- .../TestEQTLDatasetForInteractions.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/TestEQTLDatasetForInteractions.java b/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/TestEQTLDatasetForInteractions.java index 2e963e07d..0b732ab0a 100644 --- a/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/TestEQTLDatasetForInteractions.java +++ b/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/TestEQTLDatasetForInteractions.java @@ -546,9 +546,10 @@ public final String performInteractionAnalysis(String[] covsToCorrect, HashMap h DoubleArrayIntegerObject result = pool.take().get(); int cov = result.intValue; double chi2Sum = 0; + double[] covZ = datasetZScores.rawData[cov]; for (int snp = 0; snp < datasetGenotypes.nrProbes; snp++) { double z = result.doubleArray[snp]; - datasetZScores.rawData[cov][snp] = z; + covZ[snp] = z; if(!Double.isNaN(z)){ chi2Sum += z * z; } From bc1d40bd19e30a1a30dcdfc40ad1cfd1f0bd6e1c Mon Sep 17 00:00:00 2001 From: Dasha Zhernakova Date: Wed, 8 Jul 2015 13:29:23 +0300 Subject: [PATCH 064/143] added command line options --- .DS_Store | Bin 0 -> 6148 bytes .../EQTLInteractionAnalyser.java | 100 ++++++++++++++++-- .../TestEQTLDatasetForInteractions.java | 32 ++---- 3 files changed, 103 insertions(+), 29 deletions(-) create mode 100644 .DS_Store diff --git a/.DS_Store b/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..5008ddfcf53c02e82d7eee2e57c38e5672ef89f6 GIT binary patch literal 6148 zcmeH~Jr2S!425mzP>H1@V-^m;4Wg<&0T*E43hX&L&p$$qDprKhvt+--jT7}7np#A3 zem<@ulZcFPQ@L2!n>{z**++&mCkOWA81W14cNZlEfg7;MkzE(HCqgga^y>{tEnwC%0;vJ&^%eQ zLs35+`xjp>T0 eqtlGenes = getEqtls(eQTLfileName); String[] covsToCorrect = {"gender","GC","MEDIAN_5PRIME_BIAS","MEDIAN_3PRIME_BIAS"}; int cnt = 0; - int maxNumTopCovs = 300; + while (cnt < maxNumTopCovs) { + System.out.println("Counter: " + cnt); String topCov = performInteractionAnalysis(covsToCorrect, eqtlGenes, outputTopCovs); String[] covsToCorrectNew = new String[covsToCorrect.length + 1]; for (int c=0;c getEqtls(String fname) throws IOException { @@ -100,8 +88,6 @@ public void interpretInteractionZScoreMatrix() { for (int nrCovsRemoved = 4; nrCovsRemoved<=50; nrCovsRemoved++) { - //ExpressionDataset dataset = new ExpressionDataset("/Volumes/Promise_RAID/lude/InteractionZScoresMatrix-" + nrCovsRemoved + "Covariates.txt.binary"); - //ExpressionDataset dataset2 = new ExpressionDataset("/Volumes/Promise_RAID/lude/InteractionZScoresMatrix-" + (nrCovsRemoved + 1) + "Covariates.txt.binary"); ExpressionDataset dataset = new ExpressionDataset(outputDir + "/InteractionZScoresMatrix-" + nrCovsRemoved + "Covariates.txt.binary"); ExpressionDataset dataset2 = new ExpressionDataset(outputDir + "/InteractionZScoresMatrix-" + (nrCovsRemoved + 1) + "Covariates.txt.binary"); From 323a31cee976bb00fe461867d0375d85ded66f0f Mon Sep 17 00:00:00 2001 From: Patrick Deelen Date: Thu, 9 Jul 2015 14:59:17 +0200 Subject: [PATCH 065/143] Interaction --- .../CompareToGeuvadis.java | 16 ++++++++++++++++ .../EQTLInteractionAnalyser.java | 17 ++++++++--------- .../ExpressionDataset.java | 4 ++-- 3 files changed, 26 insertions(+), 11 deletions(-) create mode 100644 eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/CompareToGeuvadis.java diff --git a/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/CompareToGeuvadis.java b/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/CompareToGeuvadis.java new file mode 100644 index 000000000..131a2db04 --- /dev/null +++ b/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/CompareToGeuvadis.java @@ -0,0 +1,16 @@ +package nl.systemsgenetics.eqtlinteractionanalyser.eqtlinteractionanalyser; + +/** + * + * @author Patrick Deelen + */ +public class CompareToGeuvadis { + + /** + * @param args the command line arguments + */ + public static void main(String[] args) { + // TODO code application logic here + } + +} diff --git a/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/EQTLInteractionAnalyser.java b/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/EQTLInteractionAnalyser.java index 2b8fd66cf..582bd7ce2 100644 --- a/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/EQTLInteractionAnalyser.java +++ b/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/EQTLInteractionAnalyser.java @@ -3,7 +3,6 @@ * To change this template file, choose Tools | Templates * and open the template in the editor. */ - package nl.systemsgenetics.eqtlinteractionanalyser.eqtlinteractionanalyser; import java.io.IOException; @@ -15,13 +14,13 @@ public class EQTLInteractionAnalyser { - /** - * @param args the command line arguments - */ - public static void main(String[] args) throws IOException { - // TODO code application logic here - + /** + * @param args the command line arguments + */ + public static void main(String[] args) throws IOException { + // TODO code application logic here + new TestEQTLDatasetForInteractions(args[0], args[1], args[2]); } - -} + + } diff --git a/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/ExpressionDataset.java b/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/ExpressionDataset.java index 4876bf0da..ccb212160 100644 --- a/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/ExpressionDataset.java +++ b/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/ExpressionDataset.java @@ -26,8 +26,8 @@ public class ExpressionDataset { public int nrProbes = 0; public String[] probeNames = null; public String[] sampleNames = null; - public HashMap hashSamples = new HashMap(); - public HashMap hashProbes = new HashMap(); + public HashMap hashSamples = new HashMap(); + public HashMap hashProbes = new HashMap(); private HashMap hashProbesToInclude = null; private HashMap hashSamplesToInclude = null; public String fileName = null; From 380c631f3a7c8c6b473e9eaaef3d0d45dc9794fd Mon Sep 17 00:00:00 2001 From: Patrick Deelen Date: Thu, 9 Jul 2015 21:04:41 +0200 Subject: [PATCH 066/143] Interactions --- .../CompareToGeuvadis.java | 51 +- .../InteractionPlotter.java | 561 ++++++++++++++++++ .../TestEQTLDatasetForInteractions.java | 218 +------ 3 files changed, 620 insertions(+), 210 deletions(-) create mode 100644 eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/InteractionPlotter.java diff --git a/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/CompareToGeuvadis.java b/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/CompareToGeuvadis.java index 131a2db04..f53b55a67 100644 --- a/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/CompareToGeuvadis.java +++ b/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/CompareToGeuvadis.java @@ -1,5 +1,8 @@ package nl.systemsgenetics.eqtlinteractionanalyser.eqtlinteractionanalyser; +import java.util.HashSet; +import java.util.Map; + /** * * @author Patrick Deelen @@ -10,7 +13,53 @@ public class CompareToGeuvadis { * @param args the command line arguments */ public static void main(String[] args) { - // TODO code application logic here + + ExpressionDataset bios = new ExpressionDataset("/Volumes/Promise_RAID/lude/InteractionZScoresMatrix-4Covariates.txt.binary"); + ExpressionDataset geuvadis = new ExpressionDataset("/Volumes/Promise_RAID/projects/BBMRI/interactionsGeuvadisRegressOut/InteractionZScoresMatrix-9Covariates.txt.binary"); + + HashSet covariatesReplicated = new HashSet(); + HashSet genesReplicated = new HashSet(); + int interactionsReplicated = 0; + + for (Map.Entry covariateEntry : bios.hashProbes.entrySet()) { + for (Map.Entry eQtlGeneEntry : bios.hashSamples.entrySet()) { + + String covariate = covariateEntry.getKey(); + String eQtlGene = eQtlGeneEntry.getKey(); + + double biosInteractionZ = bios.rawData[covariateEntry.getValue()][eQtlGeneEntry.getValue()]; + + if (biosInteractionZ >= 4 || biosInteractionZ <= -4) { + + Integer geuvadisCovI = geuvadis.hashProbes.get(covariate); + Integer geuvadisGenI = geuvadis.hashSamples.get(eQtlGene); + + if (geuvadisCovI != null && geuvadisGenI != null) { + + double geuvadisInteractionZ = geuvadis.rawData[geuvadisCovI][geuvadisGenI]; + + if (geuvadisInteractionZ >= 4 || geuvadisInteractionZ <= -4) { + + covariatesReplicated.add(covariate); + genesReplicated.add(eQtlGene); + interactionsReplicated++; + + System.out.println(covariate + "\t" + eQtlGene + "\t" + biosInteractionZ + "\t" + geuvadisInteractionZ); + + } + + } + + + } + + } + } + + System.out.println("Covariates replicated: " + covariatesReplicated.size()); + System.out.println("Genes replicated: " + genesReplicated.size()); + System.out.println("Interactions replicated: " + interactionsReplicated); + } } diff --git a/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/InteractionPlotter.java b/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/InteractionPlotter.java new file mode 100644 index 000000000..d3542834e --- /dev/null +++ b/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/InteractionPlotter.java @@ -0,0 +1,561 @@ +package nl.systemsgenetics.eqtlinteractionanalyser.eqtlinteractionanalyser; + +import java.awt.Color; +import java.awt.Graphics2D; +import java.awt.RenderingHints; +import java.awt.image.BufferedImage; +import java.io.File; +import java.io.IOException; +import java.util.HashMap; +import static nl.systemsgenetics.eqtlinteractionanalyser.eqtlinteractionanalyser.TestEQTLDatasetForInteractions.getEqtls; +import static nl.systemsgenetics.eqtlinteractionanalyser.eqtlinteractionanalyser.TestEQTLDatasetForInteractions.getLinearRegressionCoefficients; +import static nl.systemsgenetics.eqtlinteractionanalyser.eqtlinteractionanalyser.TestEQTLDatasetForInteractions.orthogonalizeDataset; +import org.apache.commons.math3.stat.ranking.NaturalRanking; + +/** + * + * @author Patrick Deelen + */ +public class InteractionPlotter { + + static String inputDir = null; + static String outputDir = null; + + /** + * @param args the command line arguments + */ + public static void main(String[] args) throws IOException { + + //makeInteractionPlot("D:\\tmp\\test.png", new double[]{0,0,0,0.2,1,1,1,1,2,2,2}, new double[]{5,4,3,0.2,8,12,6,7,23,5,7}, new double[]{3,2,1,0.2,2,6,4,6,20,2,5}); + + inputDir = args[1]; + outputDir = args[2]; + String eQTLfileName = args[3]; + + System.out.println("Input dir: " + inputDir); + System.out.println("Output dir: " + outputDir); + System.out.println("eQTL file: " + eQTLfileName); + + + String[] covsToCorrect = {"gender", "GC", "MEDIAN_5PRIME_BIAS", "MEDIAN_3PRIME_BIAS", "CEU", "GBR", "FIN", "TSI", "YRI"}; + HashMap hashEQTLs = getEqtls(eQTLfileName); + + HashMap hashSamples = new HashMap(); + + if (1 == 1) { + + System.out.println("Removing outlier samples!!!"); + HashMap hashCovariates = new HashMap(); + hashCovariates.put("MEDIAN_5PRIME_BIAS", null); + hashCovariates.put("MEDIAN_3PRIME_BIAS", null); + ExpressionDataset datasetCovariates = new ExpressionDataset(inputDir + "/covariateTableLude.txt.Covariates.binary", "\t", hashCovariates, null); + hashSamples = new HashMap(); + for (int s = 0; s < datasetCovariates.nrSamples; s++) { + if (datasetCovariates.rawData[0][s] != 0) { + hashSamples.put(datasetCovariates.sampleNames[s], null); + } + } + datasetCovariates = new ExpressionDataset(inputDir + "/covariateTableLude.txt.Covariates.binary", "\t", hashCovariates, hashSamples); + HashMap hashSamplesToExclude = new HashMap(); + if (1 == 1) { + int index = ((Integer) datasetCovariates.hashProbes.get("MEDIAN_5PRIME_BIAS")).intValue(); + double mean = JSci.maths.ArrayMath.mean(datasetCovariates.rawData[index]); + double stdev = JSci.maths.ArrayMath.standardDeviation(datasetCovariates.rawData[index]); + for (int s = 0; s < datasetCovariates.nrSamples; s++) { + double z = (datasetCovariates.rawData[index][s] - mean) / stdev; + if (Math.abs(z) > 3) { + hashSamplesToExclude.put(datasetCovariates.sampleNames[s], null); + } + } + } + if (1 == 1) { + int index = ((Integer) datasetCovariates.hashProbes.get("MEDIAN_3PRIME_BIAS")).intValue(); + double mean = JSci.maths.ArrayMath.mean(datasetCovariates.rawData[index]); + double stdev = JSci.maths.ArrayMath.standardDeviation(datasetCovariates.rawData[index]); + for (int s = 0; s < datasetCovariates.nrSamples; s++) { + double z = (datasetCovariates.rawData[index][s] - mean) / stdev; + if (Math.abs(z) > 3) { + hashSamplesToExclude.put(datasetCovariates.sampleNames[s], null); + } + } + } + hashSamples = new HashMap(); + for (int s = 0; s < datasetCovariates.nrSamples; s++) { + if (!hashSamplesToExclude.containsKey(datasetCovariates.sampleNames[s])) { + hashSamples.put(datasetCovariates.sampleNames[s], null); + hashSamples.put(datasetCovariates.sampleNames[s] + "_exp", null); + hashSamples.put(datasetCovariates.sampleNames[s] + "_dosage", null); + } + } + } + + ExpressionDataset datasetGenotypes = new ExpressionDataset(inputDir + "/bigTableLude.txt.Genotypes.binary", "\t", hashEQTLs, hashSamples); + ExpressionDataset datasetExpression = new ExpressionDataset(inputDir + "/bigTableLude.txt.Expression.binary", "\t", hashEQTLs, hashSamples); + ExpressionDataset datasetCovariates = new ExpressionDataset(inputDir + "/covariateTableLude.txt.Covariates.binary", "\t", null, hashSamples); + + org.apache.commons.math3.stat.regression.OLSMultipleLinearRegression regression = new org.apache.commons.math3.stat.regression.OLSMultipleLinearRegression(); + int nrSamples = datasetGenotypes.nrSamples; + + + if (1 == 1) { + //Define a set of covariates that we want to use as correction: + System.out.println("Correcting gene expression data for cohort specific effects and top 25 components"); + //String[] cohorts = {"LLDeep", "LLS", "RS", "CODAM"}; + int nrCompsToCorrectFor = 25; + ExpressionDataset datasetCovariatesToCorrectFor = new ExpressionDataset(nrCompsToCorrectFor, datasetGenotypes.nrSamples); + datasetCovariatesToCorrectFor.sampleNames = datasetGenotypes.sampleNames; +// for (int p = 0; p < cohorts.length; p++) { +// for (int s = 0; s < datasetGenotypes.nrSamples; s++) { +// if (datasetGenotypes.sampleNames[s].startsWith(cohorts[p])) { +// datasetCovariatesToCorrectFor.rawData[p][s] = 1; +// } +// } +// } + if (nrCompsToCorrectFor > 0) { + for (int comp = 0; comp < nrCompsToCorrectFor; comp++) { + for (int s = 0; s < datasetGenotypes.nrSamples; s++) { + datasetCovariatesToCorrectFor.rawData[comp][s] = datasetCovariates.rawData[datasetCovariates.nrProbes - 51 + comp][s]; + } + } + } + + datasetCovariatesToCorrectFor.transposeDataset(); + + datasetCovariatesToCorrectFor.save(inputDir + "/CovariatesToCorrectFor.txt"); + orthogonalizeDataset(inputDir + "/CovariatesToCorrectFor.txt"); + datasetCovariatesToCorrectFor = new ExpressionDataset(inputDir + "/CovariatesToCorrectFor.txt.PrincipalComponents.txt"); + datasetCovariatesToCorrectFor.transposeDataset(); + ExpressionDataset datasetCovariatesToCorrectForEigenvalues = new ExpressionDataset(inputDir + "/CovariatesToCorrectFor.txt.Eigenvalues.txt"); + for (int snp = 0; snp < datasetExpression.nrProbes; snp++) { + for (int cov = 0; cov < datasetCovariatesToCorrectFor.nrProbes; cov++) { + if (datasetCovariatesToCorrectForEigenvalues.rawData[cov][0] > 1E-5) { + double[] rc = getLinearRegressionCoefficients(datasetCovariatesToCorrectFor.rawData[cov], datasetExpression.rawData[snp]); + for (int s = 0; s < datasetGenotypes.nrSamples; s++) { + datasetExpression.rawData[snp][s] -= rc[0] * datasetCovariatesToCorrectFor.rawData[cov][s]; + } + } + } + } + + + } + + + double[] mainEQTLCorr = new double[datasetGenotypes.nrProbes]; + if (1 == 1) { + System.out.println("Enforcing for every eQTL that the genotype dosage positively correlated with gene expression levels:"); + for (int snp = 0; snp < datasetGenotypes.nrProbes; snp++) { + double corr = JSci.maths.ArrayMath.correlation(datasetGenotypes.rawData[snp], datasetExpression.rawData[snp]); + //System.out.println(datasetExpression.probeNames[snp] + "\t" + snp + "\t" + corr); + + if (corr < 0) { + corr = -corr; + for (int s = 0; s < datasetGenotypes.nrSamples; s++) { + datasetGenotypes.rawData[snp][s] = 2 - datasetGenotypes.rawData[snp][s]; + } + } + + mainEQTLCorr[snp] = corr; + } + } + + if (1 == 1) { + + if (1 == 1) { + System.out.println("Correcting covariate data for cohort specific effects:"); +// String[] cohorts = {"LLDeep","LLS","RS","CODAM"}; + ExpressionDataset datasetCovariatesToCorrectFor = new ExpressionDataset(covsToCorrect.length, datasetGenotypes.nrSamples); + datasetCovariatesToCorrectFor.sampleNames = datasetGenotypes.sampleNames; +// for (int p=0; p 1E-5) { + double[] rc = getLinearRegressionCoefficients(datasetCovariatesToCorrectFor.rawData[cov], datasetCovariates.rawData[p]); + for (int s = 0; s < datasetGenotypes.nrSamples; s++) { + datasetCovariates.rawData[p][s] -= rc[0] * datasetCovariatesToCorrectFor.rawData[cov][s]; + } + } + } + double stdev = JSci.maths.ArrayMath.standardDeviation(datasetCovariates.rawData[p]); + double mean = JSci.maths.ArrayMath.mean(datasetCovariates.rawData[p]); + if (stdev < 1E-5) { + for (int s = 0; s < datasetGenotypes.nrSamples; s++) { + datasetCovariates.rawData[p][s] = mean; + } + } + } + } + + + } + + if (1 == 1) { + System.out.println("Correcting covariate data for cis-eQTL effects:"); + for (int p = 0; p < datasetCovariates.nrProbes; p++) { + if (datasetExpression.hashProbes.containsKey(datasetCovariates.probeNames[p])) { + int index = ((Integer) datasetExpression.hashProbes.get(datasetCovariates.probeNames[p])).intValue(); + double[] rc = getLinearRegressionCoefficients(datasetGenotypes.rawData[index], datasetCovariates.rawData[p]); + for (int s = 0; s < datasetGenotypes.nrSamples; s++) { + datasetCovariates.rawData[p][s] -= rc[0] * datasetGenotypes.rawData[index][s]; + } + } + } + } + + if (1 == 2) { + datasetCovariates.save(inputDir + "/CovariatesCorrected.txt"); + HashMap hashProbesToFilter = new HashMap(); + for (int p = 0; p < datasetCovariates.nrProbes; p++) { + if (datasetCovariates.probeNames[p].startsWith("ENSG")) { + hashProbesToFilter.put(datasetCovariates.probeNames[p], null); + } + } + ExpressionDataset datasetCovariatesCorrected = new ExpressionDataset(inputDir + "/CovariatesCorrected.txt", "\t", hashProbesToFilter, null); + datasetCovariatesCorrected.transposeDataset(); + datasetCovariatesCorrected.save(inputDir + "/CovariatesCorrected.txt"); + System.exit(0); + } + + if (1 == 2) { + ExpressionDataset datasetICA = new ExpressionDataset("/Users/lude/Documents/ICA/mixingmatrix.txt"); + //ExpressionDataset datasetICA = new ExpressionDataset("/Users/lude/Documents/ICA/signals.txt"); + datasetICA.transposeDataset(); + for (int p = 0; p < datasetICA.nrProbes; p++) { + datasetCovariates.rawData[p] = datasetICA.rawData[p]; + datasetCovariates.probeNames[p] = datasetICA.probeNames[p]; + if (p == 7) { + for (int q = 0; q < datasetCovariates.nrProbes; q++) { + double corr = JSci.maths.ArrayMath.correlation(datasetICA.rawData[p], datasetCovariates.rawData[q]); + System.out.println(p + "\t" + datasetICA.probeNames[p] + "\t" + q + "\t" + datasetCovariates.probeNames[q] + "\t" + corr + "\t" + corr * corr); + } + } + } + + orthogonalizeDataset("/Users/lude/Documents/ICA/mixingmatrix.txt"); + //System.exit(0); + } + + System.out.println("Enforcing normal distribution on covariates"); + + NaturalRanking ranker = new NaturalRanking(); + + for (int p = 0; p < datasetCovariates.nrProbes; p++) { + //Rank order the expression values: + double[] values = new double[datasetCovariates.nrSamples]; + for (int s = 0; s < datasetGenotypes.nrSamples; s++) { + values[s] = datasetCovariates.rawData[p][s]; + } + double[] rankedValues = ranker.rank(values); + //Replace the original expression value with the standard distribution enforce: + for (int s = 0; s < datasetGenotypes.nrSamples; s++) { + //Convert the rank to a proportion, with range <0, 1> + double pValue = (0.5d + rankedValues[s] - 1d) / (double) (rankedValues.length); + //Convert the pValue to a Z-Score: + double zScore = cern.jet.stat.tdouble.Probability.normalInverse(pValue); + datasetCovariates.rawData[p][s] = zScore; //Replace original expression value with the Z-Score + } + } + + } + + cern.jet.random.tdouble.engine.DoubleRandomEngine randomEngine = new cern.jet.random.tdouble.engine.DRand(); + + ExpressionDataset datasetExpressionBeforeEQTLCorrection = new ExpressionDataset(datasetExpression.nrProbes, datasetExpression.nrSamples); + for (int p = 0; p < datasetExpression.nrProbes; p++) { + for (int s = 0; s < datasetExpression.nrSamples; s++) { + datasetExpressionBeforeEQTLCorrection.rawData[p][s] = datasetExpression.rawData[p][s]; + } + } + + if (1 == 1) { + System.out.println("Correcting expression data for predefined gene environment interaction effects (GC content, Gender, 5'Median Bias, 3'Median Bias):"); + int[] covsToCorrectIndex = new int[covsToCorrect.length]; + for (int c = 0; c < covsToCorrect.length; c++) { + covsToCorrectIndex[c] = ((Integer) datasetCovariates.hashProbes.get(covsToCorrect[c])).intValue(); + } + for (int snp = 0; snp < datasetGenotypes.nrProbes; snp++) { + double[][] valsX = new double[nrSamples][1 + covsToCorrect.length * 2]; //store genotypes, covariates, interactions + for (int s = 0; s < nrSamples; s++) { + valsX[s][0] = datasetGenotypes.rawData[snp][s]; //genotypes + } + for (int c = 0; c < covsToCorrect.length; c++) { + for (int s = 0; s < nrSamples; s++) { + valsX[s][c * 2 + 1] = datasetCovariates.rawData[covsToCorrectIndex[c]][s]; //covariate + valsX[s][c * 2 + 2] = valsX[s][0] * valsX[s][c * 2 + 1]; //interction + } + } + double[] valsY = datasetExpression.rawData[snp]; + regression.newSampleData(valsY, valsX); + datasetExpression.rawData[snp] = regression.estimateResiduals(); + } + } + + + if (1 == 1) { + System.out.println("Enforcing normal distribution on expression data:"); + + NaturalRanking ranker = new NaturalRanking(); + + for (int p = 0; p < datasetExpression.nrProbes; p++) { + //Rank order the expression values: + double[] values = new double[datasetExpression.nrSamples]; + for (int s = 0; s < datasetExpression.nrSamples; s++) { + values[s] = datasetExpression.rawData[p][s]; + } + + double[] rankedValues = ranker.rank(values); + //Replace the original expression value with the standard distribution enforce: + for (int s = 0; s < datasetExpression.nrSamples; s++) { + //Convert the rank to a proportion, with range <0, 1> + double pValue = (0.5d + rankedValues[s] - 1d) / (double) (rankedValues.length); + //Convert the pValue to a Z-Score: + double zScore = cern.jet.stat.tdouble.Probability.normalInverse(pValue); + datasetExpression.rawData[p][s] = zScore; //Replace original expression value with the Z-Score + } + } + + System.out.println("Expression data now force normal"); + + } + + String eQtlGene = "ENSG00000084072"; + String covariate = ""; + + Integer eQtlGeneI = datasetExpression.hashProbes.get(eQtlGene); + Integer covariateI = datasetCovariates.hashProbes.get(covariate); + Integer snpI = eQtlGeneI; + + makeInteractionPlot("D:\\tmp\test2.png", datasetGenotypes.rawData[snpI], datasetExpression.rawData[eQtlGeneI], datasetCovariates.rawData[covariateI]); + + + + + } + + public static void makeInteractionPlot(String fileName, double[] genotype, double[] expression, double[] covariate) { + + int nrSamples = genotype.length; + +// int[] cohortIndex = new int[4]; +// String[] cohorts = {"LLDeep", "LLS", "RS", "CODAM"}; +// for (int cohort = 0; cohort < cohorts.length; cohort++) { +// for (int s = 0; s < nrSamples; s++) { +// if (sampleNames[s].startsWith(cohorts[cohort])) { +// cohortIndex[cohort] = s; +// break; +// } +// } +// } + + int marginLeft = 100; + int marginRight = 200; + int marginTop = 100; + int marginBottom = 100; + int innerHeight = 500; + int innerWidth = 500; + int docWidth = marginLeft + marginRight + innerWidth; + int docHeight = marginTop + marginBottom + innerHeight; + + BufferedImage bimage = new BufferedImage(docWidth, docHeight, BufferedImage.TYPE_INT_RGB); + Graphics2D g2d = bimage.createGraphics(); + + g2d.setRenderingHint(RenderingHints.KEY_ANTIALIASING, RenderingHints.VALUE_ANTIALIAS_ON); + g2d.setColor(new Color(255, 255, 255)); + g2d.fillRect(0, 0, docWidth, docHeight); + java.awt.AlphaComposite alphaComposite10 = java.awt.AlphaComposite.getInstance(java.awt.AlphaComposite.SRC_OVER, 0.10f); + java.awt.AlphaComposite alphaComposite25 = java.awt.AlphaComposite.getInstance(java.awt.AlphaComposite.SRC_OVER, 0.25f); + java.awt.AlphaComposite alphaComposite50 = java.awt.AlphaComposite.getInstance(java.awt.AlphaComposite.SRC_OVER, 0.50f); + java.awt.AlphaComposite alphaComposite100 = java.awt.AlphaComposite.getInstance(java.awt.AlphaComposite.SRC, 1.00f); + + float fontSize = 12f; + java.awt.Font font = new java.awt.Font("Gill Sans MT", java.awt.Font.PLAIN, (int) fontSize); + java.awt.Font fontBold = new java.awt.Font("Gill Sans MT", java.awt.Font.BOLD, (int) fontSize); + java.awt.Font fontSmall = new java.awt.Font("Gill Sans MT", java.awt.Font.PLAIN, 8); + java.awt.Font fontBoldSmall = new java.awt.Font("Gill Sans MT", java.awt.Font.BOLD, 8); + + java.awt.Color dataColor[] = new Color[10]; + dataColor[0] = new java.awt.Color(167, 72, 20); + dataColor[1] = new java.awt.Color(62, 138, 20); + dataColor[2] = new java.awt.Color(228, 171, 0); + dataColor[3] = new java.awt.Color(0, 148, 183); + dataColor[4] = new java.awt.Color(119, 80, 152); + dataColor[5] = new java.awt.Color(106, 106, 106); + dataColor[6] = new java.awt.Color(212, 215, 10); + dataColor[7] = new java.awt.Color(210, 111, 0); + dataColor[8] = new java.awt.Color(0, 0, 141); + dataColor[9] = new java.awt.Color(190, 190, 190); + + g2d.setComposite(alphaComposite50); + g2d.setColor(new Color(0, 0, 0)); + g2d.drawLine(marginLeft, marginTop, marginLeft, marginTop + innerHeight); + g2d.drawLine(marginLeft, marginTop + innerHeight, marginLeft + innerWidth, marginTop + innerHeight); + + double minX = JSci.maths.ArrayMath.min(covariate); + double maxX = JSci.maths.ArrayMath.max(covariate); + double minY = JSci.maths.ArrayMath.min(expression); + double maxY = JSci.maths.ArrayMath.max(expression); + + g2d.setComposite(alphaComposite10); + for (int rep = 1; rep >= 1; rep--) { + for (int s = 0; s < nrSamples; s++) { + int posY = marginTop + innerHeight - (int) ((expression[s] - minY) / (maxY - minY) * innerHeight); + int posX = marginLeft + (int) ((covariate[s] - minX) / (maxX - minX) * innerWidth); + if (genotype[s] < 0.5) { + g2d.setComposite(java.awt.AlphaComposite.getInstance(java.awt.AlphaComposite.SRC_ATOP, 0.30f - (float) rep / 10f)); + g2d.setColor(new Color(204, 86, 78)); + } else { + if (genotype[s] > 1.5) { + g2d.setComposite(java.awt.AlphaComposite.getInstance(java.awt.AlphaComposite.SRC_ATOP, 0.30f - (float) rep / 10f)); + g2d.setColor(new Color(171, 178, 114)); + } else { + g2d.setComposite(java.awt.AlphaComposite.getInstance(java.awt.AlphaComposite.SRC_ATOP, 0.30f - (float) rep / 10f)); + g2d.setColor(new Color(98, 175, 255)); + } + } + + g2d.fillOval(posX - 5 - rep * 4, posY - 5 - rep * 4, 7 + rep * 8, 7 + rep * 8); + + } + } + + //Draw the four independent cohorts seperately: + //int[] cohortIndex = {0,626,1280,1933}; +// for (int rep = 2; rep >= 0; rep--) { +// for (int s = 0; s < nrSamples; s++) { +// int cohort = 0; +// for (int c = 0; c < cohortIndex.length; c++) { +// if (s >= cohortIndex[c]) { +// cohort = c; +// } +// } +// +// int posY = marginTop + 100 + cohort * 125 - (int) ((expression[s] - minY) / (maxY - minY) * 100); +// int posX = marginLeft + innerWidth + 50 + (int) ((covariate[s] - minX) / (maxX - minX) * 100); +// if (genotype[s] < 0.5) { +// g2d.setComposite(java.awt.AlphaComposite.getInstance(java.awt.AlphaComposite.SRC_ATOP, 0.30f - (float) rep / 10f)); +// g2d.setColor(new Color(204, 86, 78)); +// } else { +// if (genotype[s] > 1.5) { +// g2d.setComposite(java.awt.AlphaComposite.getInstance(java.awt.AlphaComposite.SRC_ATOP, 0.30f - (float) rep / 10f)); +// g2d.setColor(new Color(171, 178, 114)); +// } else { +// g2d.setComposite(java.awt.AlphaComposite.getInstance(java.awt.AlphaComposite.SRC_ATOP, 0.30f - (float) rep / 10f)); +// g2d.setColor(new Color(98, 175, 255)); +// } +// } +// g2d.fillOval(posX - 1 - rep * 2, posY - 1 - rep * 2, 3 + rep * 4, 3 + rep * 4); +// +// } +// } + + + g2d.setComposite(alphaComposite50); + double[][] valsX = new double[nrSamples][3]; + for (int s = 0; s < nrSamples; s++) { + valsX[s][0] = genotype[s]; + valsX[s][1] = covariate[s]; + valsX[s][2] = valsX[s][0] * valsX[s][1]; + } + double[] valsY = expression; + org.apache.commons.math3.stat.regression.OLSMultipleLinearRegression regression = new org.apache.commons.math3.stat.regression.OLSMultipleLinearRegression(); + regression.newSampleData(valsY, valsX); + double[] betas = regression.estimateRegressionParameters(); + double betaInteraction = betas[3]; + double seInteraction = regression.estimateRegressionParametersStandardErrors()[3]; + double tInteraction = betaInteraction / seInteraction; + double pValueInteraction = 1; + double zScoreInteraction = 0; + cern.jet.random.tdouble.engine.DoubleRandomEngine randomEngine = new cern.jet.random.tdouble.engine.DRand(); + cern.jet.random.tdouble.StudentT tDistColt = new cern.jet.random.tdouble.StudentT(genotype.length - 4, randomEngine); + if (tInteraction < 0) { + pValueInteraction = tDistColt.cdf(tInteraction); + if (pValueInteraction < 2.0E-323) { + pValueInteraction = 2.0E-323; + } + zScoreInteraction = cern.jet.stat.tdouble.Probability.normalInverse(pValueInteraction); + } else { + pValueInteraction = tDistColt.cdf(-tInteraction); + if (pValueInteraction < 2.0E-323) { + pValueInteraction = 2.0E-323; + } + zScoreInteraction = -cern.jet.stat.tdouble.Probability.normalInverse(pValueInteraction); + } + pValueInteraction *= 2; + + String pValueString = (new java.text.DecimalFormat("0.##E0", new java.text.DecimalFormatSymbols(java.util.Locale.US))).format(pValueInteraction); + if (pValueInteraction > 0.001) { + pValueString = (new java.text.DecimalFormat("##.###;-##.###", new java.text.DecimalFormatSymbols(java.util.Locale.US))).format(pValueInteraction); + } + g2d.setFont(new java.awt.Font("Arial", java.awt.Font.BOLD, 14)); + g2d.setColor(new Color(0, 0, 0)); + int posX = marginLeft; + int posY = marginTop + innerHeight + 20; + g2d.drawString("Interaction P-Value: " + pValueString, posX, posY); + + + for (int g = 0; g <= 2; g++) { + + double valMin = betas[0] + betas[1] * g + minX * betas[2] + betas[3] * g * minX; + double valMax = betas[0] + betas[1] * g + maxX * betas[2] + betas[3] * g * maxX; + int posXMin = marginLeft + (int) ((minX - minX) / (maxX - minX) * innerWidth); + int posYMin = marginTop + innerHeight - (int) ((valMin - minY) / (maxY - minY) * innerHeight); + int posXMax = marginLeft + (int) ((maxX - minX) / (maxX - minX) * innerWidth); + int posYMax = marginTop + innerHeight - (int) ((valMax - minY) / (maxY - minY) * innerHeight); + + g2d.setComposite(java.awt.AlphaComposite.getInstance(java.awt.AlphaComposite.SRC_OVER, 0.8f)); + g2d.setColor(new Color(255, 255, 255)); + g2d.setStroke(new java.awt.BasicStroke(5.0f, java.awt.BasicStroke.CAP_ROUND, java.awt.BasicStroke.JOIN_ROUND)); + g2d.drawLine(posXMin, posYMin, posXMax, posYMax); + if (g < 0.5) { + g2d.setComposite(java.awt.AlphaComposite.getInstance(java.awt.AlphaComposite.SRC_OVER, 0.30f)); + g2d.setColor(new Color(204, 86, 78)); + } else { + if (g > 1.5) { + g2d.setComposite(java.awt.AlphaComposite.getInstance(java.awt.AlphaComposite.SRC_OVER, 0.50f)); + g2d.setColor(new Color(171, 178, 114)); + } else { + g2d.setComposite(java.awt.AlphaComposite.getInstance(java.awt.AlphaComposite.SRC_OVER, 0.50f)); + g2d.setColor(new Color(98, 175, 255)); + } + } + g2d.setStroke(new java.awt.BasicStroke(3.0f, java.awt.BasicStroke.CAP_ROUND, java.awt.BasicStroke.JOIN_ROUND)); + g2d.drawLine(posXMin, posYMin, posXMax, posYMax); + + } + + try { + javax.imageio.ImageIO.write(bimage, "png", new File(fileName)); + } catch (IOException e) { + System.out.println(e.getMessage()); + e.printStackTrace(); + } + + + } + +} diff --git a/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/TestEQTLDatasetForInteractions.java b/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/TestEQTLDatasetForInteractions.java index 0b732ab0a..dace0e1b1 100644 --- a/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/TestEQTLDatasetForInteractions.java +++ b/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/TestEQTLDatasetForInteractions.java @@ -84,7 +84,7 @@ public TestEQTLDatasetForInteractions(String inputDir, String outputDir, String outputTopCovs.close(); } - private HashMap getEqtls(String fname) throws IOException { + static public HashMap getEqtls(String fname) throws IOException { TextFile file = new TextFile(fname, false); ArrayList genes = file.readAsArrayList(4, TextFile.tab); HashMap eqtlGenes = new HashMap(); @@ -583,207 +583,7 @@ public final String performInteractionAnalysis(String[] covsToCorrect, HashMap h return null; } - public void makeInteractionPlot(String fileName, double[] genotype, double[] expression, double[] covariate, String[] sampleNames) { - - int nrSamples = genotype.length; - - int[] cohortIndex = new int[4]; - String[] cohorts = {"LLDeep", "LLS", "RS", "CODAM"}; - for (int cohort = 0; cohort < cohorts.length; cohort++) { - for (int s = 0; s < nrSamples; s++) { - if (sampleNames[s].startsWith(cohorts[cohort])) { - cohortIndex[cohort] = s; - break; - } - } - } - - int marginLeft = 100; - int marginRight = 200; - int marginTop = 100; - int marginBottom = 100; - int innerHeight = 500; - int innerWidth = 500; - int docWidth = marginLeft + marginRight + innerWidth; - int docHeight = marginTop + marginBottom + innerHeight; - - BufferedImage bimage = new BufferedImage(docWidth, docHeight, BufferedImage.TYPE_INT_RGB); - Graphics2D g2d = bimage.createGraphics(); - - g2d.setRenderingHint(RenderingHints.KEY_ANTIALIASING, RenderingHints.VALUE_ANTIALIAS_ON); - g2d.setColor(new Color(255, 255, 255)); - g2d.fillRect(0, 0, docWidth, docHeight); - java.awt.AlphaComposite alphaComposite10 = java.awt.AlphaComposite.getInstance(java.awt.AlphaComposite.SRC_OVER, 0.10f); - java.awt.AlphaComposite alphaComposite25 = java.awt.AlphaComposite.getInstance(java.awt.AlphaComposite.SRC_OVER, 0.25f); - java.awt.AlphaComposite alphaComposite50 = java.awt.AlphaComposite.getInstance(java.awt.AlphaComposite.SRC_OVER, 0.50f); - java.awt.AlphaComposite alphaComposite100 = java.awt.AlphaComposite.getInstance(java.awt.AlphaComposite.SRC, 1.00f); - - float fontSize = 12f; - java.awt.Font font = new java.awt.Font("Gill Sans MT", java.awt.Font.PLAIN, (int) fontSize); - java.awt.Font fontBold = new java.awt.Font("Gill Sans MT", java.awt.Font.BOLD, (int) fontSize); - java.awt.Font fontSmall = new java.awt.Font("Gill Sans MT", java.awt.Font.PLAIN, 8); - java.awt.Font fontBoldSmall = new java.awt.Font("Gill Sans MT", java.awt.Font.BOLD, 8); - - java.awt.Color dataColor[] = new Color[10]; - dataColor[0] = new java.awt.Color(167, 72, 20); - dataColor[1] = new java.awt.Color(62, 138, 20); - dataColor[2] = new java.awt.Color(228, 171, 0); - dataColor[3] = new java.awt.Color(0, 148, 183); - dataColor[4] = new java.awt.Color(119, 80, 152); - dataColor[5] = new java.awt.Color(106, 106, 106); - dataColor[6] = new java.awt.Color(212, 215, 10); - dataColor[7] = new java.awt.Color(210, 111, 0); - dataColor[8] = new java.awt.Color(0, 0, 141); - dataColor[9] = new java.awt.Color(190, 190, 190); - - g2d.setComposite(alphaComposite50); - g2d.setColor(new Color(0, 0, 0)); - g2d.drawLine(marginLeft, marginTop, marginLeft, marginTop + innerHeight); - g2d.drawLine(marginLeft, marginTop + innerHeight, marginLeft + innerWidth, marginTop + innerHeight); - - double minX = JSci.maths.ArrayMath.min(covariate); - double maxX = JSci.maths.ArrayMath.max(covariate); - double minY = JSci.maths.ArrayMath.min(expression); - double maxY = JSci.maths.ArrayMath.max(expression); - - g2d.setComposite(alphaComposite10); - for (int rep = 2; rep >= 0; rep--) { - for (int s = 0; s < nrSamples; s++) { - int posY = marginTop + innerHeight - (int) ((expression[s] - minY) / (maxY - minY) * innerHeight); - int posX = marginLeft + (int) ((covariate[s] - minX) / (maxX - minX) * innerWidth); - if (genotype[s] < 0.5) { - g2d.setComposite(java.awt.AlphaComposite.getInstance(java.awt.AlphaComposite.SRC_ATOP, 0.30f - (float) rep / 10f)); - g2d.setColor(new Color(204, 86, 78)); - } else { - if (genotype[s] > 1.5) { - g2d.setComposite(java.awt.AlphaComposite.getInstance(java.awt.AlphaComposite.SRC_ATOP, 0.30f - (float) rep / 10f)); - g2d.setColor(new Color(171, 178, 114)); - } else { - g2d.setComposite(java.awt.AlphaComposite.getInstance(java.awt.AlphaComposite.SRC_ATOP, 0.30f - (float) rep / 10f)); - g2d.setColor(new Color(98, 175, 255)); - } - } - g2d.fillOval(posX - 3 - rep * 4, posY - 3 - rep * 4, 7 + rep * 8, 7 + rep * 8); - - } - } - - //Draw the four independent cohorts seperately: - //int[] cohortIndex = {0,626,1280,1933}; - for (int rep = 2; rep >= 0; rep--) { - for (int s = 0; s < nrSamples; s++) { - int cohort = 0; - for (int c = 0; c < cohortIndex.length; c++) { - if (s >= cohortIndex[c]) { - cohort = c; - } - } - - int posY = marginTop + 100 + cohort * 125 - (int) ((expression[s] - minY) / (maxY - minY) * 100); - int posX = marginLeft + innerWidth + 50 + (int) ((covariate[s] - minX) / (maxX - minX) * 100); - if (genotype[s] < 0.5) { - g2d.setComposite(java.awt.AlphaComposite.getInstance(java.awt.AlphaComposite.SRC_ATOP, 0.30f - (float) rep / 10f)); - g2d.setColor(new Color(204, 86, 78)); - } else { - if (genotype[s] > 1.5) { - g2d.setComposite(java.awt.AlphaComposite.getInstance(java.awt.AlphaComposite.SRC_ATOP, 0.30f - (float) rep / 10f)); - g2d.setColor(new Color(171, 178, 114)); - } else { - g2d.setComposite(java.awt.AlphaComposite.getInstance(java.awt.AlphaComposite.SRC_ATOP, 0.30f - (float) rep / 10f)); - g2d.setColor(new Color(98, 175, 255)); - } - } - g2d.fillOval(posX - 1 - rep * 2, posY - 1 - rep * 2, 3 + rep * 4, 3 + rep * 4); - - } - } - - - g2d.setComposite(alphaComposite50); - double[][] valsX = new double[nrSamples][3]; - for (int s = 0; s < nrSamples; s++) { - valsX[s][0] = genotype[s]; - valsX[s][1] = covariate[s]; - valsX[s][2] = valsX[s][0] * valsX[s][1]; - } - double[] valsY = expression; - org.apache.commons.math3.stat.regression.OLSMultipleLinearRegression regression = new org.apache.commons.math3.stat.regression.OLSMultipleLinearRegression(); - regression.newSampleData(valsY, valsX); - double[] betas = regression.estimateRegressionParameters(); - double betaInteraction = betas[3]; - double seInteraction = regression.estimateRegressionParametersStandardErrors()[3]; - double tInteraction = betaInteraction / seInteraction; - double pValueInteraction = 1; - double zScoreInteraction = 0; - cern.jet.random.tdouble.engine.DoubleRandomEngine randomEngine = new cern.jet.random.tdouble.engine.DRand(); - cern.jet.random.tdouble.StudentT tDistColt = new cern.jet.random.tdouble.StudentT(genotype.length - 4, randomEngine); - if (tInteraction < 0) { - pValueInteraction = tDistColt.cdf(tInteraction); - if (pValueInteraction < 2.0E-323) { - pValueInteraction = 2.0E-323; - } - zScoreInteraction = cern.jet.stat.tdouble.Probability.normalInverse(pValueInteraction); - } else { - pValueInteraction = tDistColt.cdf(-tInteraction); - if (pValueInteraction < 2.0E-323) { - pValueInteraction = 2.0E-323; - } - zScoreInteraction = -cern.jet.stat.tdouble.Probability.normalInverse(pValueInteraction); - } - pValueInteraction *= 2; - - String pValueString = (new java.text.DecimalFormat("0.##E0", new java.text.DecimalFormatSymbols(java.util.Locale.US))).format(pValueInteraction); - if (pValueInteraction > 0.001) { - pValueString = (new java.text.DecimalFormat("##.###;-##.###", new java.text.DecimalFormatSymbols(java.util.Locale.US))).format(pValueInteraction); - } - g2d.setFont(new java.awt.Font("Arial", java.awt.Font.BOLD, 14)); - g2d.setColor(new Color(0, 0, 0)); - int posX = marginLeft; - int posY = marginTop + innerHeight + 20; - g2d.drawString("Interaction P-Value: " + pValueString, posX, posY); - - - for (int g = 0; g <= 2; g++) { - - double valMin = betas[0] + betas[1] * g + minX * betas[2] + betas[3] * g * minX; - double valMax = betas[0] + betas[1] * g + maxX * betas[2] + betas[3] * g * maxX; - int posXMin = marginLeft + (int) ((minX - minX) / (maxX - minX) * innerWidth); - int posYMin = marginTop + innerHeight - (int) ((valMin - minY) / (maxY - minY) * innerHeight); - int posXMax = marginLeft + (int) ((maxX - minX) / (maxX - minX) * innerWidth); - int posYMax = marginTop + innerHeight - (int) ((valMax - minY) / (maxY - minY) * innerHeight); - - g2d.setComposite(java.awt.AlphaComposite.getInstance(java.awt.AlphaComposite.SRC_OVER, 0.8f)); - g2d.setColor(new Color(255, 255, 255)); - g2d.setStroke(new java.awt.BasicStroke(5.0f, java.awt.BasicStroke.CAP_ROUND, java.awt.BasicStroke.JOIN_ROUND)); - g2d.drawLine(posXMin, posYMin, posXMax, posYMax); - if (g < 0.5) { - g2d.setComposite(java.awt.AlphaComposite.getInstance(java.awt.AlphaComposite.SRC_OVER, 0.30f)); - g2d.setColor(new Color(204, 86, 78)); - } else { - if (g > 1.5) { - g2d.setComposite(java.awt.AlphaComposite.getInstance(java.awt.AlphaComposite.SRC_OVER, 0.50f)); - g2d.setColor(new Color(171, 178, 114)); - } else { - g2d.setComposite(java.awt.AlphaComposite.getInstance(java.awt.AlphaComposite.SRC_OVER, 0.50f)); - g2d.setColor(new Color(98, 175, 255)); - } - } - g2d.setStroke(new java.awt.BasicStroke(3.0f, java.awt.BasicStroke.CAP_ROUND, java.awt.BasicStroke.JOIN_ROUND)); - g2d.drawLine(posXMin, posYMin, posXMax, posYMax); - - } - - try { - javax.imageio.ImageIO.write(bimage, "png", new File(fileName)); - } catch (IOException e) { - System.out.println(e.getMessage()); - e.printStackTrace(); - } - - - } - - public void orthogonalizeDataset(String inputFile) { + static public void orthogonalizeDataset(String inputFile) { ExpressionDataset dataset = new ExpressionDataset(inputFile); dataset.transposeDataset(); @@ -865,7 +665,7 @@ public void orthogonalizeDataset(String inputFile) { } - public ExpressionDataset orthogonalizeMatrix(ExpressionDataset dataset) { + static public ExpressionDataset orthogonalizeMatrix(ExpressionDataset dataset) { dataset.standardNormalizeData(); int nrVars = dataset.nrProbes; @@ -931,7 +731,7 @@ public ExpressionDataset orthogonalizeMatrix(ExpressionDataset dataset) { } - public double[] getLinearRegressionCoefficients(double[] xVal, double[] yVal) { + static public double[] getLinearRegressionCoefficients(double[] xVal, double[] yVal) { double n = (double) xVal.length; double sumX = 0; double sumXX = 0; @@ -953,13 +753,13 @@ public double[] getLinearRegressionCoefficients(double[] xVal, double[] yVal) { return regressionCoefficients; } - private Jama.EigenvalueDecomposition eigenValueDecomposition(double[][] data) { + static public Jama.EigenvalueDecomposition eigenValueDecomposition(double[][] data) { Jama.Matrix m = new Jama.Matrix(data); Jama.EigenvalueDecomposition eig = m.eig(); return eig; } - private double[] getEigenVector(Jama.EigenvalueDecomposition eig, double[] eigenValues, int pca) { + static public double[] getEigenVector(Jama.EigenvalueDecomposition eig, double[] eigenValues, int pca) { Jama.Matrix eigenValueMatrix = eig.getV(); double[][] eigenValueMat = eigenValueMatrix.getArray(); double[] eigenVector = new double[eigenValueMat.length]; @@ -969,7 +769,7 @@ private double[] getEigenVector(Jama.EigenvalueDecomposition eig, double[] eigen return eigenVector; } - private double[] getEigenVector(Jama.EigenvalueDecomposition eig, int pca) { + static public double[] getEigenVector(Jama.EigenvalueDecomposition eig, int pca) { Jama.Matrix eigenValueMatrix = eig.getV(); double[][] eigenValueMat = eigenValueMatrix.getArray(); double[] eigenVector = new double[eigenValueMat.length]; @@ -979,7 +779,7 @@ private double[] getEigenVector(Jama.EigenvalueDecomposition eig, int pca) { return eigenVector; } - private double getEigenValueVar(double[] eigenValues, int pca) { + static public double getEigenValueVar(double[] eigenValues, int pca) { double sumEigenvalues = 0.0; for (Double d : eigenValues) { sumEigenvalues += Math.abs(d); @@ -988,7 +788,7 @@ private double getEigenValueVar(double[] eigenValues, int pca) { return result; } - private double[] getEigenVectorSVD(Jama.SingularValueDecomposition svd, double[] singularValues, int pca) { + static public double[] getEigenVectorSVD(Jama.SingularValueDecomposition svd, double[] singularValues, int pca) { Jama.Matrix eigenValueMatrix = svd.getV(); double[][] eigenValueMat = eigenValueMatrix.getArray(); double[] eigenVector = new double[eigenValueMat.length]; From 5c6012106151c48fbb593a5c0f0ffac6f1f80542 Mon Sep 17 00:00:00 2001 From: Dasha Zhernakova Date: Thu, 9 Jul 2015 22:26:15 +0300 Subject: [PATCH 067/143] Added options and gene distance limit --- .../EQTLInteractionAnalyser.java | 138 +++++++++- .../TestEQTLDatasetForInteractions.java | 246 ++++++++++++++---- 2 files changed, 323 insertions(+), 61 deletions(-) diff --git a/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/EQTLInteractionAnalyser.java b/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/EQTLInteractionAnalyser.java index fc257859d..6a3a8f9fe 100644 --- a/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/EQTLInteractionAnalyser.java +++ b/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/EQTLInteractionAnalyser.java @@ -3,9 +3,16 @@ * To change this template file, choose Tools | Templates * and open the template in the editor. */ + package nl.systemsgenetics.eqtlinteractionanalyser.eqtlinteractionanalyser; +import org.apache.commons.cli.*; +import umcg.genetica.io.text.TextFile; + import java.io.IOException; +import java.text.DateFormat; +import java.text.SimpleDateFormat; +import java.util.Date; /** * @@ -13,14 +20,133 @@ */ public class EQTLInteractionAnalyser { + private static final DateFormat DATE_TIME_FORMAT = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); + private static final Date currentDataTime = new Date(); + private static final Options OPTIONS; + + static { + + OPTIONS = new Options(); + + OptionBuilder.withArgName("path"); + OptionBuilder.hasArgs(); + OptionBuilder.withDescription("Path to the folder containing expression and genotype data"); + OptionBuilder.withLongOpt("input"); + OptionBuilder.isRequired(); + OPTIONS.addOption(OptionBuilder.create("i")); + + OptionBuilder.withArgName("path"); + OptionBuilder.hasArg(); + OptionBuilder.withDescription("Path to the output folder"); + OptionBuilder.withLongOpt("output"); + OptionBuilder.isRequired(); + OPTIONS.addOption(OptionBuilder.create("o")); + + OptionBuilder.withArgName("path"); + OptionBuilder.hasArg(); + OptionBuilder.withDescription("Path to the eQTL file to test for interactions"); + OptionBuilder.withLongOpt("eqtls"); + OPTIONS.addOption(OptionBuilder.create("e")); + + OptionBuilder.withArgName("path"); + OptionBuilder.hasArg(); + OptionBuilder.withDescription("Path to the gene annotation file in the format of eQTL mapping pipeline"); + OptionBuilder.withLongOpt("annot"); + OPTIONS.addOption(OptionBuilder.create("a")); + + OptionBuilder.withArgName("int"); + OptionBuilder.hasArg(); + OptionBuilder.withDescription("Maximum number of covariates to regress out"); + OptionBuilder.withLongOpt("maxcov"); + OPTIONS.addOption(OptionBuilder.create("n")); + + OptionBuilder.withArgName("boolean"); + OptionBuilder.hasArg(); + OptionBuilder.withDescription("Interpret the z-score matrices"); + OptionBuilder.withLongOpt("interpret"); + OPTIONS.addOption(OptionBuilder.create("it")); + + OptionBuilder.withArgName("boolean"); + OptionBuilder.hasArg(); + OptionBuilder.withDescription("Find chi2sum differences for each covariate between 2 consequtive interaction runs"); + OptionBuilder.withLongOpt("chi2sumDiff"); + OPTIONS.addOption(OptionBuilder.create("dif")); + + OptionBuilder.withArgName("string"); + OptionBuilder.hasArg(); + OptionBuilder.withDescription("covariates to correct for before running the interaction analysis"); + OptionBuilder.withLongOpt("cov"); + OPTIONS.addOption(OptionBuilder.create("c")); + + OptionBuilder.withArgName("path"); + OptionBuilder.hasArg(); + OptionBuilder.withDescription("File containing the covariates to correct for before running the interaction analysis. No header, each covariate on a separate line"); + OptionBuilder.withLongOpt("covFile"); + OPTIONS.addOption(OptionBuilder.create("cf")); + } - /** - * @param args the command line arguments - */ public static void main(String[] args) throws IOException { - // TODO code application logic here + System.out.println("Starting interaction analysis"); + System.out.println("Current date and time: " + DATE_TIME_FORMAT.format(currentDataTime)); + System.out.println(); + + args = new String[] {"-c", "gender", "-c", "LLS", "-a", "/Users/dashazhernakova/Documents/UMCG/hg19/v71/annotations/annotation_geneIds+overlapping_v71_cut.24-06-14.txt", "-e", "/Users/dashazhernakova/Documents/UMCG/data/BBMRI/first_run_final/genes/LL+RS+CODAM+LLS_eqtls_genes_23062014/GWAS/Crohns.txt", "-i", "/Users/dashazhernakova/Documents/UMCG/data/BBMRI/interactionWithTFs/allGenes/LudeInteractionAnalyser", "-o", "/Users/dashazhernakova/Documents/UMCG/data/BBMRI/interactionWithTFs/allGenes/LudeInteractionAnalyser/output/test3/", "-n", "2", "--interpret", "False"}; + + String inputDir, outputDir, eqtlFile = null, annotationFile = null; + int maxNumCovariatesToRegress = 20; + boolean interpret = false, chi2sumDiff = false; + String[] covariates = null; + try { + final CommandLine commandLine = new PosixParser().parse(OPTIONS, args, false); + + inputDir = commandLine.getOptionValue("i"); + outputDir = commandLine.getOptionValue("o"); + + if (commandLine.hasOption('e')) { + eqtlFile = commandLine.getOptionValue("e"); + } + if (commandLine.hasOption('n')) { + maxNumCovariatesToRegress = Integer.parseInt(commandLine.getOptionValue("n")); + } + if (commandLine.hasOption("it")) { + interpret = Boolean.parseBoolean(commandLine.getOptionValue("t")); + } + if (commandLine.hasOption("dif")) { + chi2sumDiff = Boolean.parseBoolean(commandLine.getOptionValue("d")); + } + if (commandLine.hasOption('a')) { + annotationFile = commandLine.getOptionValue("a"); + } + + if (commandLine.hasOption("cf")) { + TextFile covFile = new TextFile(commandLine.getOptionValue("cf"), false); + covariates = covFile.readAsArray(); + covFile.close(); + } + else if (commandLine.hasOption("c")){ + covariates = commandLine.getOptionValues("cf"); + } + + } catch (ParseException ex) { + System.err.println("Invalid command line arguments: "); + System.err.println(ex.getMessage()); + System.err.println(); + new HelpFormatter().printHelp(" ", OPTIONS); + System.exit(1); + return; + } - new TestEQTLDatasetForInteractions(args[0], args[1], args[2]); + if (interpret){ + TestEQTLDatasetForInteractions interactor = new TestEQTLDatasetForInteractions(inputDir, outputDir); + interactor.interpretInteractionZScoreMatrix(maxNumCovariatesToRegress); + } + else if (chi2sumDiff){ + TestEQTLDatasetForInteractions interactor = new TestEQTLDatasetForInteractions(inputDir, outputDir); + interactor.findChi2SumDifferences(maxNumCovariatesToRegress); + } + else { + new TestEQTLDatasetForInteractions(inputDir, outputDir, eqtlFile, maxNumCovariatesToRegress, annotationFile, covariates); + } } -} \ No newline at end of file +} diff --git a/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/TestEQTLDatasetForInteractions.java b/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/TestEQTLDatasetForInteractions.java index 2e751b5f3..74684e3bc 100644 --- a/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/TestEQTLDatasetForInteractions.java +++ b/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/TestEQTLDatasetForInteractions.java @@ -7,7 +7,6 @@ import java.awt.*; import java.awt.image.BufferedImage; -import java.io.BufferedReader; import java.io.File; import java.io.IOException; import java.util.ArrayList; @@ -20,6 +19,7 @@ import java.util.logging.Level; import java.util.logging.Logger; import org.apache.commons.math3.stat.ranking.NaturalRanking; +import umcg.genetica.genomicboundaries.GenomicBoundary; import umcg.genetica.io.text.TextFile; /** @@ -30,47 +30,55 @@ public class TestEQTLDatasetForInteractions { String inputDir = null; String outputDir = null; + HashMap> geneDistanceMap = null; + String[] primaryCovsToCorrect; public TestEQTLDatasetForInteractions(String inputDir, String outputDir) throws IOException { this.inputDir = inputDir; this.outputDir = outputDir; //preprocessData(); - - if (1 == 1) { - String[] covsToCorrect = {"gender", "GC", "MEDIAN_5PRIME_BIAS", "MEDIAN_3PRIME_BIAS"}; - while (1 == 1) { - String topCov = performInteractionAnalysis(covsToCorrect, null, null); - String[] covsToCorrectNew = new String[covsToCorrect.length + 1]; - for (int c = 0; c < covsToCorrect.length; c++) { - covsToCorrectNew[c] = covsToCorrect[c]; - } - covsToCorrectNew[covsToCorrect.length] = topCov; - covsToCorrect = covsToCorrectNew; - } - } - - //interpretInteractionZScoreMatrix(); - } - public TestEQTLDatasetForInteractions(String inputDir, String outputDir, String eQTLfileName) throws IOException { + public TestEQTLDatasetForInteractions(String inputDir, String outputDir, String eQTLfileName, int maxNumTopCovs, String annotationFile, String[] covariatesToCorrect) throws IOException { System.out.println("Input dir: " + inputDir); System.out.println("Output dir: " + outputDir); System.out.println("eQTL file: " + eQTLfileName); + System.out.println("Maximum number of covariates to regress out: " + maxNumTopCovs); this.inputDir = inputDir; this.outputDir = outputDir; + + HashMap eqtlGenes = getEqtls(eQTLfileName); + + if (annotationFile != null) { + createGeneDistanceMap(annotationFile); + } + //preprocessData(); TextFile outputTopCovs = new TextFile(outputDir + "/outputTopCovariates.txt", true); - HashMap eqtlGenes = getEqtls(eQTLfileName); - String[] covsToCorrect = {"gender", "GC", "MEDIAN_5PRIME_BIAS", "MEDIAN_3PRIME_BIAS", "CEU", "GBR", "FIN", "TSI", "YRI"}; + + if (covariatesToCorrect != null){ + primaryCovsToCorrect = covariatesToCorrect; + } + else { + primaryCovsToCorrect = new String[]{"gender", "GC", "MEDIAN_5PRIME_BIAS", "MEDIAN_3PRIME_BIAS", "LLdeep", "RS", "CODAM", "LLS"}; + } + + + System.out.print("\nPrimary covariates to correct for before running interaction analysis: "); + for (String cov : primaryCovsToCorrect){ + System.out.print("\n\t" + cov); + } + System.out.println(); + + + String[] covsToCorrect = primaryCovsToCorrect; int cnt = 0; - int maxNumTopCovs = 300; while (cnt < maxNumTopCovs) { String topCov = performInteractionAnalysis(covsToCorrect, eqtlGenes, outputTopCovs); String[] covsToCorrectNew = new String[covsToCorrect.length + 1]; @@ -84,6 +92,12 @@ public TestEQTLDatasetForInteractions(String inputDir, String outputDir, String outputTopCovs.close(); } + /** + * Extracts eQTL gene names + * @param fname - eQTL file (in the eqtlmappingpipeline format) + * @return gene names in keys of a HashMap + * @throws IOException + */ private HashMap getEqtls(String fname) throws IOException { TextFile file = new TextFile(fname, false); ArrayList genes = file.readAsArrayList(4, TextFile.tab); @@ -96,34 +110,34 @@ private HashMap getEqtls(String fname) throws IOException { } - public void interpretInteractionZScoreMatrix() { + public void interpretInteractionZScoreMatrix(int maxNumRegressedCovariates) { + System.out.println("Interpreting the z-score matrix"); - for (int nrCovsRemoved = 4; nrCovsRemoved <= 50; nrCovsRemoved++) { + int numPrimaryCovsToCorrect = primaryCovsToCorrect.length; + for (int nrCovsRemoved = numPrimaryCovsToCorrect; nrCovsRemoved < numPrimaryCovsToCorrect + maxNumRegressedCovariates; nrCovsRemoved++) { ExpressionDataset dataset = new ExpressionDataset(outputDir + "/InteractionZScoresMatrix-" + nrCovsRemoved + "Covariates.txt"); dataset.save(dataset.fileName + ".binary"); } - for (int nrCovsRemoved = 4; nrCovsRemoved <= 50; nrCovsRemoved++) { + for (int nrCovsRemoved = numPrimaryCovsToCorrect; nrCovsRemoved < numPrimaryCovsToCorrect + maxNumRegressedCovariates; nrCovsRemoved++) { - //ExpressionDataset dataset = new ExpressionDataset("/Volumes/Promise_RAID/lude/InteractionZScoresMatrix-" + nrCovsRemoved + "Covariates.txt.binary"); - //ExpressionDataset dataset2 = new ExpressionDataset("/Volumes/Promise_RAID/lude/InteractionZScoresMatrix-" + (nrCovsRemoved + 1) + "Covariates.txt.binary"); ExpressionDataset dataset = new ExpressionDataset(outputDir + "/InteractionZScoresMatrix-" + nrCovsRemoved + "Covariates.txt.binary"); ExpressionDataset dataset2 = new ExpressionDataset(outputDir + "/InteractionZScoresMatrix-" + (nrCovsRemoved + 1) + "Covariates.txt.binary"); - for (int q = 0; q < dataset.nrSamples; q++) { + for (int q=0; q 4 && absZDiff > maxAbsZDiff) { + if (absZDiff > 2 && absZDiff > maxAbsZDiff) { maxAbsZDiff = absZDiff; output = nrCovsRemoved + "\t" + p + "\t" + dataset.probeNames[p] + "\t" + q + "\t" + dataset.sampleNames[q] + "\t" + dataset.rawData[p][q] + "\t" + dataset2.rawData[p][q] + "\t" + zDiff; } } - if (maxAbsZDiff > 4) { + if (maxAbsZDiff > 2) { System.out.println(output); } } @@ -132,6 +146,37 @@ public void interpretInteractionZScoreMatrix() { System.exit(0); } + public void findChi2SumDifferences(int maxNumRegressedCovariates) { + + int numPrimaryCovsToCorrect = primaryCovsToCorrect.length; + System.out.println("Interpreting the z-score matrix"); + System.out.println("Preparing the data"); + for (int nrCovsRemoved = numPrimaryCovsToCorrect; nrCovsRemoved < numPrimaryCovsToCorrect + maxNumRegressedCovariates; nrCovsRemoved++) { + ExpressionDataset dataset = new ExpressionDataset(outputDir + "/InteractionZScoresMatrix-" + nrCovsRemoved + "Covariates.txt"); + dataset.save(dataset.fileName + ".binary"); + } + + System.out.println("Comparing chi2sums"); + for (int nrCovsRemoved = numPrimaryCovsToCorrect; nrCovsRemoved < numPrimaryCovsToCorrect + maxNumRegressedCovariates; nrCovsRemoved++) { + + ExpressionDataset dataset = new ExpressionDataset(outputDir + "/InteractionZScoresMatrix-" + nrCovsRemoved + "Covariates.txt.binary"); + ExpressionDataset dataset2 = new ExpressionDataset(outputDir + "/InteractionZScoresMatrix-" + (nrCovsRemoved + 1) + "Covariates.txt.binary"); + + for (int covariate = 0; covariate < dataset.nrProbes; covariate++) { + double chi2Sum1 = 0, chi2Sum2 = 0; + for (int gene = 0; gene < dataset.nrSamples; gene++) { + double z_before = dataset.rawData[covariate][gene]; + chi2Sum1 += z_before * z_before; + double z_after = dataset2.rawData[covariate][gene]; + chi2Sum2 += z_after * z_after; + + } + System.out.println(nrCovsRemoved + "\t" + dataset.probeNames[covariate] + "\t" + chi2Sum1 + "\t" + chi2Sum2 + "\t" + (chi2Sum1 - chi2Sum2)); + } + } + } + + public void preprocessData() { HashMap hashGenotypes = new HashMap(); @@ -316,8 +361,12 @@ public final String performInteractionAnalysis(String[] covsToCorrect, HashMap h int[] covsToCorrectIndex = new int[covsToCorrect.length]; for (int c = 0; c < covsToCorrect.length; c++) { hashCovsToCorrect.put(covsToCorrect[c], null); - covsToCorrectIndex[c] = ((Integer) datasetCovariates.hashProbes.get(covsToCorrect[c])).intValue(); - for (int s = 0; s < datasetGenotypes.nrSamples; s++) { + try { + covsToCorrectIndex[c] = ((Integer) datasetCovariates.hashProbes.get(covsToCorrect[c])).intValue(); + } catch (Exception e){ + System.out.println("test"); + } + for (int s = 0; s < datasetGenotypes.nrSamples; s++) { datasetCovariatesToCorrectFor.rawData[c][s] = datasetCovariates.rawData[covsToCorrectIndex[c]][s]; } } @@ -435,7 +484,11 @@ public final String performInteractionAnalysis(String[] covsToCorrect, HashMap h System.out.println("Correcting expression data for predefined gene environment interaction effects (GC content, Gender, 5'Median Bias, 3'Median Bias):"); int[] covsToCorrectIndex = new int[covsToCorrect.length]; for (int c = 0; c < covsToCorrect.length; c++) { - covsToCorrectIndex[c] = ((Integer) datasetCovariates.hashProbes.get(covsToCorrect[c])).intValue(); + try { + covsToCorrectIndex[c] = ((Integer) datasetCovariates.hashProbes.get(covsToCorrect[c])).intValue(); + } catch (Exception e){ + System.out.println("test"); + } } for (int snp = 0; snp < datasetGenotypes.nrProbes; snp++) { double[][] valsX = new double[nrSamples][1 + covsToCorrect.length * 2]; //store genotypes, covariates, interactions @@ -540,30 +593,62 @@ public final String performInteractionAnalysis(String[] covsToCorrect, HashMap h String maxChi2Cov = ""; double maxChi2 = 0; try { - - for (int task = 0; task < nrTasks; task++) { - try { - DoubleArrayIntegerObject result = pool.take().get(); - int cov = result.intValue; - double chi2Sum = 0; - double[] covZ = datasetZScores.rawData[cov]; - for (int snp = 0; snp < datasetGenotypes.nrProbes; snp++) { - double z = result.doubleArray[snp]; - covZ[snp] = z; - if(!Double.isNaN(z)){ - chi2Sum += z * z; + // If gene annotation provided, for chi2sum calculation use only genes that are 1mb apart + if (geneDistanceMap != null) { + for (int task = 0; task < nrTasks; task++) { + try { + DoubleArrayIntegerObject result = pool.take().get(); + int cov = result.intValue; + double chi2Sum = 0; + double[] covZ = datasetZScores.rawData[cov]; + for (int snp = 0; snp < datasetGenotypes.nrProbes; snp++) { + if (genesFarAway(datasetZScores.sampleNames[snp], datasetZScores.probeNames[cov])) { + double z = result.doubleArray[snp]; + covZ[snp] = z; + if (!Double.isNaN(z)) { + chi2Sum += z * z; + } + } } + if (chi2Sum > maxChi2) { + maxChi2 = chi2Sum; + maxChi2Cov = datasetCovariates.probeNames[cov]; + } + //System.out.println(covsToCorrect.length + "\t" + cov + "\t" + datasetCovariates.probeNames[cov] + "\t" + chi2Sum); + if ((task + 1) % 512 == 0) { + System.out.println(task + 1 + " tasks processed"); + } + } catch (ExecutionException ex) { + Logger.getLogger(PerformInteractionAnalysisPermutationTask.class.getName()).log(Level.SEVERE, null, ex); } - if (chi2Sum > maxChi2) { - maxChi2 = chi2Sum; - maxChi2Cov = datasetCovariates.probeNames[cov]; - } - //System.out.println(covsToCorrect.length + "\t" + cov + "\t" + datasetCovariates.probeNames[cov] + "\t" + chi2Sum); - if ((task + 1) % 512 == 0) { - System.out.println(task + 1 + " tasks processed"); + } + } + //If gene annotation not provided, use all gene pairs + else { + for (int task = 0; task < nrTasks; task++) { + try { + DoubleArrayIntegerObject result = pool.take().get(); + int cov = result.intValue; + double chi2Sum = 0; + double[] covZ = datasetZScores.rawData[cov]; + for (int snp = 0; snp < datasetGenotypes.nrProbes; snp++) { + double z = result.doubleArray[snp]; + covZ[snp] = z; + if (!Double.isNaN(z)) { + chi2Sum += z * z; + } + } + if (chi2Sum > maxChi2) { + maxChi2 = chi2Sum; + maxChi2Cov = datasetCovariates.probeNames[cov]; + } + //System.out.println(covsToCorrect.length + "\t" + cov + "\t" + datasetCovariates.probeNames[cov] + "\t" + chi2Sum); + if ((task + 1) % 512 == 0) { + System.out.println(task + 1 + " tasks processed"); + } + } catch (ExecutionException ex) { + Logger.getLogger(PerformInteractionAnalysisPermutationTask.class.getName()).log(Level.SEVERE, null, ex); } - } catch (ExecutionException ex) { - Logger.getLogger(PerformInteractionAnalysisPermutationTask.class.getName()).log(Level.SEVERE, null, ex); } } threadPool.shutdown(); @@ -574,7 +659,7 @@ public final String performInteractionAnalysis(String[] covsToCorrect, HashMap h System.out.println("Top covariate:\t" + maxChi2 + "\t" + maxChi2Cov); outputTopCovs.writeln("Top covariate:\t" + maxChi2 + "\t" + maxChi2Cov); - //datasetZScores.save("/Volumes/Promise_RAID/lude/InteractionZScoresMatrix-" + covsToCorrect.length + "Covariates.txt"); + outputTopCovs.flush(); datasetZScores.save(outputDir + "/InteractionZScoresMatrix-" + covsToCorrect.length + "Covariates.txt"); return maxChi2Cov; @@ -583,6 +668,57 @@ public final String performInteractionAnalysis(String[] covsToCorrect, HashMap h return null; } + /** + * Creates a map of gene name to GenomicBoundary containing gene coordinates and the coordinate of its midpoint as annotation + * @param annotFname - path to the annotation file (in the eqtlmappingpipeline format) + * @throws IOException + */ + private void createGeneDistanceMap(String annotFname) throws IOException { + System.out.println("Creating a gene distance map from " + annotFname); + + geneDistanceMap = new HashMap>(); + + TextFile annotFile = new TextFile(annotFname, false); + String els[] = annotFile.readLineElems(TextFile.tab); + + while ((els = annotFile.readLineElems(TextFile.tab)) != null){ + int start = Integer.parseInt(els[4]), end = Integer.parseInt(els[5]), middle = start + (end - start)/2; + GenomicBoundary genomicboundary = new GenomicBoundary(els[3], Integer.parseInt(els[4]), Integer.parseInt(els[5]), middle); + geneDistanceMap.put(els[1], genomicboundary); + } + annotFile.close(); + } + + /** + * Checks if the genomic distance between 2 genes is more than 1mb + * @param gene1 + * @param gene2 + * @return true if the genes are more than 1mb apart + */ + private boolean genesFarAway(String gene1, String gene2) { + // if one of the covariates is a technical bias or a cell count etc + if ((! gene1.startsWith("ENS")) || (! gene2.startsWith("ENS"))){ + return true; + } + + GenomicBoundary gb1 = null, gb2 = null; + try { + gb1 = geneDistanceMap.get(gene1); + gb2 = geneDistanceMap.get(gene2); + + if (gb1.getChromosome() != gb2.getChromosome()){ + return true; + } + if (Math.abs(gb1.getAnnotation() - gb2.getAnnotation()) > 1000000){ + return true; + } + } catch (Exception e){ + System.out.println("Error: gene annotation doesn't contain one of these genes: " + gene1 + " or " + gene2); + System.exit(1); + } + return false; + } + public void makeInteractionPlot(String fileName, double[] genotype, double[] expression, double[] covariate, String[] sampleNames) { int nrSamples = genotype.length; From b10857ace7511162ca397b5ad525fc73f11e7871 Mon Sep 17 00:00:00 2001 From: Dasha Zhernakova Date: Thu, 9 Jul 2015 22:30:58 +0300 Subject: [PATCH 068/143] added options and gene distance check -small edit --- .../eqtlinteractionanalyser/EQTLInteractionAnalyser.java | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/EQTLInteractionAnalyser.java b/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/EQTLInteractionAnalyser.java index 6a3a8f9fe..48555185f 100644 --- a/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/EQTLInteractionAnalyser.java +++ b/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/EQTLInteractionAnalyser.java @@ -89,9 +89,7 @@ public static void main(String[] args) throws IOException { System.out.println("Starting interaction analysis"); System.out.println("Current date and time: " + DATE_TIME_FORMAT.format(currentDataTime)); System.out.println(); - - args = new String[] {"-c", "gender", "-c", "LLS", "-a", "/Users/dashazhernakova/Documents/UMCG/hg19/v71/annotations/annotation_geneIds+overlapping_v71_cut.24-06-14.txt", "-e", "/Users/dashazhernakova/Documents/UMCG/data/BBMRI/first_run_final/genes/LL+RS+CODAM+LLS_eqtls_genes_23062014/GWAS/Crohns.txt", "-i", "/Users/dashazhernakova/Documents/UMCG/data/BBMRI/interactionWithTFs/allGenes/LudeInteractionAnalyser", "-o", "/Users/dashazhernakova/Documents/UMCG/data/BBMRI/interactionWithTFs/allGenes/LudeInteractionAnalyser/output/test3/", "-n", "2", "--interpret", "False"}; - + String inputDir, outputDir, eqtlFile = null, annotationFile = null; int maxNumCovariatesToRegress = 20; boolean interpret = false, chi2sumDiff = false; From 597ef74639e2300588202be7c571ec57b5f6716e Mon Sep 17 00:00:00 2001 From: Dasha Zhernakova Date: Fri, 10 Jul 2015 10:19:05 +0300 Subject: [PATCH 069/143] minor edits --- .../TestEQTLDatasetForInteractions.java | 15 ++++----------- 1 file changed, 4 insertions(+), 11 deletions(-) diff --git a/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/TestEQTLDatasetForInteractions.java b/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/TestEQTLDatasetForInteractions.java index 74684e3bc..94ae9de6c 100644 --- a/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/TestEQTLDatasetForInteractions.java +++ b/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/TestEQTLDatasetForInteractions.java @@ -361,12 +361,8 @@ public final String performInteractionAnalysis(String[] covsToCorrect, HashMap h int[] covsToCorrectIndex = new int[covsToCorrect.length]; for (int c = 0; c < covsToCorrect.length; c++) { hashCovsToCorrect.put(covsToCorrect[c], null); - try { - covsToCorrectIndex[c] = ((Integer) datasetCovariates.hashProbes.get(covsToCorrect[c])).intValue(); - } catch (Exception e){ - System.out.println("test"); - } - for (int s = 0; s < datasetGenotypes.nrSamples; s++) { + covsToCorrectIndex[c] = ((Integer) datasetCovariates.hashProbes.get(covsToCorrect[c])).intValue(); + for (int s = 0; s < datasetGenotypes.nrSamples; s++) { datasetCovariatesToCorrectFor.rawData[c][s] = datasetCovariates.rawData[covsToCorrectIndex[c]][s]; } } @@ -484,11 +480,8 @@ public final String performInteractionAnalysis(String[] covsToCorrect, HashMap h System.out.println("Correcting expression data for predefined gene environment interaction effects (GC content, Gender, 5'Median Bias, 3'Median Bias):"); int[] covsToCorrectIndex = new int[covsToCorrect.length]; for (int c = 0; c < covsToCorrect.length; c++) { - try { - covsToCorrectIndex[c] = ((Integer) datasetCovariates.hashProbes.get(covsToCorrect[c])).intValue(); - } catch (Exception e){ - System.out.println("test"); - } + covsToCorrectIndex[c] = ((Integer) datasetCovariates.hashProbes.get(covsToCorrect[c])).intValue(); + } for (int snp = 0; snp < datasetGenotypes.nrProbes; snp++) { double[][] valsX = new double[nrSamples][1 + covsToCorrect.length * 2]; //store genotypes, covariates, interactions From a6a0685a09b8f803d0c1f5d98c91a1292d0116a9 Mon Sep 17 00:00:00 2001 From: Patrick Deelen Date: Fri, 10 Jul 2015 09:27:16 +0200 Subject: [PATCH 070/143] Interactions --- .../CompareToGeuvadis.java | 8 ++++++-- .../InteractionPlotter.java | 19 +++++++++++-------- 2 files changed, 17 insertions(+), 10 deletions(-) diff --git a/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/CompareToGeuvadis.java b/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/CompareToGeuvadis.java index f53b55a67..67d609657 100644 --- a/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/CompareToGeuvadis.java +++ b/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/CompareToGeuvadis.java @@ -26,10 +26,14 @@ public static void main(String[] args) { String covariate = covariateEntry.getKey(); String eQtlGene = eQtlGeneEntry.getKey(); + + if(!covariate.equals("ENSG00000084072")){ + continue; + } double biosInteractionZ = bios.rawData[covariateEntry.getValue()][eQtlGeneEntry.getValue()]; - if (biosInteractionZ >= 4 || biosInteractionZ <= -4) { + if (biosInteractionZ >= 3 || biosInteractionZ <= -3) { Integer geuvadisCovI = geuvadis.hashProbes.get(covariate); Integer geuvadisGenI = geuvadis.hashSamples.get(eQtlGene); @@ -38,7 +42,7 @@ public static void main(String[] args) { double geuvadisInteractionZ = geuvadis.rawData[geuvadisCovI][geuvadisGenI]; - if (geuvadisInteractionZ >= 4 || geuvadisInteractionZ <= -4) { + if (geuvadisInteractionZ >= 2 || geuvadisInteractionZ <= -2) { covariatesReplicated.add(covariate); genesReplicated.add(eQtlGene); diff --git a/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/InteractionPlotter.java b/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/InteractionPlotter.java index d3542834e..9df3bc053 100644 --- a/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/InteractionPlotter.java +++ b/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/InteractionPlotter.java @@ -28,16 +28,17 @@ public static void main(String[] args) throws IOException { //makeInteractionPlot("D:\\tmp\\test.png", new double[]{0,0,0,0.2,1,1,1,1,2,2,2}, new double[]{5,4,3,0.2,8,12,6,7,23,5,7}, new double[]{3,2,1,0.2,2,6,4,6,20,2,5}); - inputDir = args[1]; - outputDir = args[2]; - String eQTLfileName = args[3]; + inputDir = args[0]; + outputDir = args[1]; + String eQTLfileName = args[2]; System.out.println("Input dir: " + inputDir); System.out.println("Output dir: " + outputDir); System.out.println("eQTL file: " + eQTLfileName); - String[] covsToCorrect = {"gender", "GC", "MEDIAN_5PRIME_BIAS", "MEDIAN_3PRIME_BIAS", "CEU", "GBR", "FIN", "TSI", "YRI"}; + //String[] covsToCorrect = {"gender", "GC", "MEDIAN_5PRIME_BIAS", "MEDIAN_3PRIME_BIAS", "CEU", "GBR", "FIN", "TSI", "YRI"}; + String[] covsToCorrect = {"age", "gender", "GC", "MEDIAN_5PRIME_BIAS", "MEDIAN_3PRIME_BIAS", "LLdeep", "LLS", "RS", "CODAM"}; HashMap hashEQTLs = getEqtls(eQTLfileName); HashMap hashSamples = new HashMap(); @@ -343,14 +344,16 @@ public static void main(String[] args) throws IOException { } - String eQtlGene = "ENSG00000084072"; - String covariate = ""; + String eQtlGene = "ENSG00000116688"; + String covariate = "ENSG00000084072"; Integer eQtlGeneI = datasetExpression.hashProbes.get(eQtlGene); Integer covariateI = datasetCovariates.hashProbes.get(covariate); Integer snpI = eQtlGeneI; - makeInteractionPlot("D:\\tmp\test2.png", datasetGenotypes.rawData[snpI], datasetExpression.rawData[eQtlGeneI], datasetCovariates.rawData[covariateI]); + + + makeInteractionPlot(outputDir + "/" + covariate + "-" + eQtlGene + ".png" , datasetGenotypes.rawData[snpI], datasetExpression.rawData[eQtlGeneI], datasetCovariates.rawData[covariateI]); @@ -421,7 +424,7 @@ public static void makeInteractionPlot(String fileName, double[] genotype, doubl double maxY = JSci.maths.ArrayMath.max(expression); g2d.setComposite(alphaComposite10); - for (int rep = 1; rep >= 1; rep--) { + for (int rep = 0; rep >= 0; rep--) { for (int s = 0; s < nrSamples; s++) { int posY = marginTop + innerHeight - (int) ((expression[s] - minY) / (maxY - minY) * innerHeight); int posX = marginLeft + (int) ((covariate[s] - minX) / (maxX - minX) * innerWidth); From e2f94c32c2db9efec839b5341073e9a1f2c3f8c2 Mon Sep 17 00:00:00 2001 From: Patrick Deelen Date: Fri, 10 Jul 2015 10:09:37 +0200 Subject: [PATCH 071/143] interactions --- eQTLInteractionAnalyser/pom.xml | 6 ++ .../InteractionPlotter.java | 66 +++++++++++-------- 2 files changed, 43 insertions(+), 29 deletions(-) diff --git a/eQTLInteractionAnalyser/pom.xml b/eQTLInteractionAnalyser/pom.xml index 46ab78409..c93865306 100644 --- a/eQTLInteractionAnalyser/pom.xml +++ b/eQTLInteractionAnalyser/pom.xml @@ -47,6 +47,12 @@ genetica-libraries 1.0.7-SNAPSHOT + + com.opencsv + opencsv + 3.4 + + diff --git a/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/InteractionPlotter.java b/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/InteractionPlotter.java index 9df3bc053..88e8e008b 100644 --- a/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/InteractionPlotter.java +++ b/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/InteractionPlotter.java @@ -1,10 +1,13 @@ package nl.systemsgenetics.eqtlinteractionanalyser.eqtlinteractionanalyser; +import au.com.bytecode.opencsv.CSVReader; +import au.com.bytecode.opencsv.CSVWriter; import java.awt.Color; import java.awt.Graphics2D; import java.awt.RenderingHints; import java.awt.image.BufferedImage; import java.io.File; +import java.io.FileReader; import java.io.IOException; import java.util.HashMap; import static nl.systemsgenetics.eqtlinteractionanalyser.eqtlinteractionanalyser.TestEQTLDatasetForInteractions.getEqtls; @@ -20,27 +23,32 @@ public class InteractionPlotter { static String inputDir = null; static String outputDir = null; - - /** - * @param args the command line arguments - */ - public static void main(String[] args) throws IOException { - + + /** + * @param args the command line arguments + */ + public static void main(String[] args) throws IOException { + //makeInteractionPlot("D:\\tmp\\test.png", new double[]{0,0,0,0.2,1,1,1,1,2,2,2}, new double[]{5,4,3,0.2,8,12,6,7,23,5,7}, new double[]{3,2,1,0.2,2,6,4,6,20,2,5}); - + inputDir = args[0]; outputDir = args[1]; String eQTLfileName = args[2]; - + String covariate = args[3]; + File genesFile = new File(args[4]); + + + System.out.println("Input dir: " + inputDir); System.out.println("Output dir: " + outputDir); System.out.println("eQTL file: " + eQTLfileName); - + System.out.println("covariate: " + covariate); + System.out.println("genes file: " + genesFile.getAbsolutePath()); //String[] covsToCorrect = {"gender", "GC", "MEDIAN_5PRIME_BIAS", "MEDIAN_3PRIME_BIAS", "CEU", "GBR", "FIN", "TSI", "YRI"}; String[] covsToCorrect = {"age", "gender", "GC", "MEDIAN_5PRIME_BIAS", "MEDIAN_3PRIME_BIAS", "LLdeep", "LLS", "RS", "CODAM"}; HashMap hashEQTLs = getEqtls(eQTLfileName); - + HashMap hashSamples = new HashMap(); if (1 == 1) { @@ -344,22 +352,23 @@ public static void main(String[] args) throws IOException { } - String eQtlGene = "ENSG00000116688"; - String covariate = "ENSG00000084072"; - - Integer eQtlGeneI = datasetExpression.hashProbes.get(eQtlGene); - Integer covariateI = datasetCovariates.hashProbes.get(covariate); - Integer snpI = eQtlGeneI; - - - - makeInteractionPlot(outputDir + "/" + covariate + "-" + eQtlGene + ".png" , datasetGenotypes.rawData[snpI], datasetExpression.rawData[eQtlGeneI], datasetCovariates.rawData[covariateI]); - - - - - } - + + CSVReader reader = new CSVReader(new FileReader(genesFile), '\t', CSVWriter.NO_QUOTE_CHARACTER); + String[] nextLine; + while ((nextLine = reader.readNext()) != null) { + + String eQtlGene = nextLine[0]; + + Integer eQtlGeneI = datasetExpression.hashProbes.get(eQtlGene); + Integer covariateI = datasetCovariates.hashProbes.get(covariate); + Integer snpI = eQtlGeneI; + + makeInteractionPlot(outputDir + "/" + covariate + "-" + eQtlGene + ".png", datasetGenotypes.rawData[snpI], datasetExpression.rawData[eQtlGeneI], datasetCovariates.rawData[covariateI]); + + } + + } + public static void makeInteractionPlot(String fileName, double[] genotype, double[] expression, double[] covariate) { int nrSamples = genotype.length; @@ -440,9 +449,9 @@ public static void makeInteractionPlot(String fileName, double[] genotype, doubl g2d.setColor(new Color(98, 175, 255)); } } - + g2d.fillOval(posX - 5 - rep * 4, posY - 5 - rep * 4, 7 + rep * 8, 7 + rep * 8); - + } } @@ -560,5 +569,4 @@ public static void makeInteractionPlot(String fileName, double[] genotype, doubl } - } From 75478abc92dc549754d58b944ead1d5f9380d274 Mon Sep 17 00:00:00 2001 From: Patrick Deelen Date: Fri, 10 Jul 2015 11:20:59 +0200 Subject: [PATCH 072/143] interactions --- .../eqtlinteractionanalyser/InteractionPlotter.java | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/InteractionPlotter.java b/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/InteractionPlotter.java index 88e8e008b..5679bf317 100644 --- a/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/InteractionPlotter.java +++ b/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/InteractionPlotter.java @@ -45,8 +45,8 @@ public static void main(String[] args) throws IOException { System.out.println("covariate: " + covariate); System.out.println("genes file: " + genesFile.getAbsolutePath()); - //String[] covsToCorrect = {"gender", "GC", "MEDIAN_5PRIME_BIAS", "MEDIAN_3PRIME_BIAS", "CEU", "GBR", "FIN", "TSI", "YRI"}; - String[] covsToCorrect = {"age", "gender", "GC", "MEDIAN_5PRIME_BIAS", "MEDIAN_3PRIME_BIAS", "LLdeep", "LLS", "RS", "CODAM"}; + String[] covsToCorrect = {"gender", "GC", "MEDIAN_5PRIME_BIAS", "MEDIAN_3PRIME_BIAS", "CEU", "GBR", "FIN", "TSI", "YRI"}; + //String[] covsToCorrect = {"age", "gender", "GC", "MEDIAN_5PRIME_BIAS", "MEDIAN_3PRIME_BIAS", "LLdeep", "LLS", "RS", "CODAM"}; HashMap hashEQTLs = getEqtls(eQTLfileName); HashMap hashSamples = new HashMap(); @@ -358,6 +358,8 @@ public static void main(String[] args) throws IOException { while ((nextLine = reader.readNext()) != null) { String eQtlGene = nextLine[0]; + + System.out.println(eQtlGene); Integer eQtlGeneI = datasetExpression.hashProbes.get(eQtlGene); Integer covariateI = datasetCovariates.hashProbes.get(covariate); From aecd4f10cf6d527e11b0854ee2f4fcadf9302b53 Mon Sep 17 00:00:00 2001 From: Dasha Zhernakova Date: Fri, 10 Jul 2015 16:10:08 +0300 Subject: [PATCH 073/143] fixed no eQTL confinement case --- .../TestEQTLDatasetForInteractions.java | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/TestEQTLDatasetForInteractions.java b/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/TestEQTLDatasetForInteractions.java index c32ceca1a..d0fc53c14 100644 --- a/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/TestEQTLDatasetForInteractions.java +++ b/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/TestEQTLDatasetForInteractions.java @@ -20,6 +20,7 @@ import java.util.logging.Logger; import org.apache.commons.math3.stat.ranking.NaturalRanking; import umcg.genetica.genomicboundaries.GenomicBoundary; +import umcg.genetica.io.Gpio; import umcg.genetica.io.text.TextFile; /** @@ -49,7 +50,9 @@ public TestEQTLDatasetForInteractions(String inputDir, String outputDir, String this.inputDir = inputDir; this.outputDir = outputDir; - + if (!Gpio.exists(outputDir)) { + Gpio.createDir(outputDir); + } HashMap eqtlGenes = getEqtls(eQTLfileName); @@ -99,6 +102,9 @@ public TestEQTLDatasetForInteractions(String inputDir, String outputDir, String * @throws IOException */ public static HashMap getEqtls(String fname) throws IOException { + if (fname == null){ + return null; + } TextFile file = new TextFile(fname, false); ArrayList genes = file.readAsArrayList(4, TextFile.tab); HashMap eqtlGenes = new HashMap(); From 73703e397a02f66b6d1ceab27f9fa01db2723395 Mon Sep 17 00:00:00 2001 From: Patrick Deelen Date: Mon, 13 Jul 2015 14:47:04 +0200 Subject: [PATCH 074/143] interactions --- .../eqtlinteractionanalyser/CompareToGeuvadis.java | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/CompareToGeuvadis.java b/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/CompareToGeuvadis.java index 67d609657..612f17b35 100644 --- a/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/CompareToGeuvadis.java +++ b/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/CompareToGeuvadis.java @@ -20,6 +20,8 @@ public static void main(String[] args) { HashSet covariatesReplicated = new HashSet(); HashSet genesReplicated = new HashSet(); int interactionsReplicated = 0; + int sameDirection = 0; + int oppositeDirection = 0; for (Map.Entry covariateEntry : bios.hashProbes.entrySet()) { for (Map.Entry eQtlGeneEntry : bios.hashSamples.entrySet()) { @@ -48,6 +50,12 @@ public static void main(String[] args) { genesReplicated.add(eQtlGene); interactionsReplicated++; + if(biosInteractionZ * geuvadisInteractionZ > 0){ + sameDirection++; + } else { + oppositeDirection++; + } + System.out.println(covariate + "\t" + eQtlGene + "\t" + biosInteractionZ + "\t" + geuvadisInteractionZ); } @@ -63,6 +71,8 @@ public static void main(String[] args) { System.out.println("Covariates replicated: " + covariatesReplicated.size()); System.out.println("Genes replicated: " + genesReplicated.size()); System.out.println("Interactions replicated: " + interactionsReplicated); + System.out.println("Interactions replicated same: " + sameDirection); + System.out.println("Interactions replicated opposite: " + oppositeDirection); } From 85f52a89d8e7df9a96aa52db230896eda99380bd Mon Sep 17 00:00:00 2001 From: Dasha Zhernakova Date: Mon, 13 Jul 2015 17:36:19 +0300 Subject: [PATCH 075/143] small bug fixes --- .../eqtlinteractionanalyser/EQTLInteractionAnalyser.java | 4 ++-- .../TestEQTLDatasetForInteractions.java | 1 + 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/EQTLInteractionAnalyser.java b/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/EQTLInteractionAnalyser.java index 48555185f..c5180fe75 100644 --- a/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/EQTLInteractionAnalyser.java +++ b/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/EQTLInteractionAnalyser.java @@ -89,7 +89,7 @@ public static void main(String[] args) throws IOException { System.out.println("Starting interaction analysis"); System.out.println("Current date and time: " + DATE_TIME_FORMAT.format(currentDataTime)); System.out.println(); - + String inputDir, outputDir, eqtlFile = null, annotationFile = null; int maxNumCovariatesToRegress = 20; boolean interpret = false, chi2sumDiff = false; @@ -110,7 +110,7 @@ public static void main(String[] args) throws IOException { interpret = Boolean.parseBoolean(commandLine.getOptionValue("t")); } if (commandLine.hasOption("dif")) { - chi2sumDiff = Boolean.parseBoolean(commandLine.getOptionValue("d")); + chi2sumDiff = Boolean.parseBoolean(commandLine.getOptionValue("dif")); } if (commandLine.hasOption('a')) { annotationFile = commandLine.getOptionValue("a"); diff --git a/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/TestEQTLDatasetForInteractions.java b/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/TestEQTLDatasetForInteractions.java index d0fc53c14..b32f7636e 100644 --- a/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/TestEQTLDatasetForInteractions.java +++ b/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/TestEQTLDatasetForInteractions.java @@ -38,6 +38,7 @@ public TestEQTLDatasetForInteractions(String inputDir, String outputDir) throws this.inputDir = inputDir; this.outputDir = outputDir; + primaryCovsToCorrect = new String[]{"gender", "GC", "MEDIAN_5PRIME_BIAS", "MEDIAN_3PRIME_BIAS", "LLdeep", "RS", "CODAM", "LLS"}; //preprocessData(); } From 652731c498557a232e21e9a4cae453b1bb65e996 Mon Sep 17 00:00:00 2001 From: Patrick Deelen Date: Mon, 13 Jul 2015 16:40:17 +0200 Subject: [PATCH 076/143] interactions --- .../eqtlinteractionanalyser/CompareToGeuvadis.java | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/CompareToGeuvadis.java b/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/CompareToGeuvadis.java index 612f17b35..799d77c2f 100644 --- a/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/CompareToGeuvadis.java +++ b/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/CompareToGeuvadis.java @@ -29,13 +29,13 @@ public static void main(String[] args) { String covariate = covariateEntry.getKey(); String eQtlGene = eQtlGeneEntry.getKey(); - if(!covariate.equals("ENSG00000084072")){ - continue; - } +// if(!covariate.equals("ENSG00000084072")){ +// continue; +// } double biosInteractionZ = bios.rawData[covariateEntry.getValue()][eQtlGeneEntry.getValue()]; - if (biosInteractionZ >= 3 || biosInteractionZ <= -3) { + if (biosInteractionZ >= 6 || biosInteractionZ <= -6) { Integer geuvadisCovI = geuvadis.hashProbes.get(covariate); Integer geuvadisGenI = geuvadis.hashSamples.get(eQtlGene); @@ -44,7 +44,7 @@ public static void main(String[] args) { double geuvadisInteractionZ = geuvadis.rawData[geuvadisCovI][geuvadisGenI]; - if (geuvadisInteractionZ >= 2 || geuvadisInteractionZ <= -2) { + if (geuvadisInteractionZ >= 5 || geuvadisInteractionZ <= -5) { covariatesReplicated.add(covariate); genesReplicated.add(eQtlGene); From 3710709f1f6c24dd6b6927960084c098d4471178 Mon Sep 17 00:00:00 2001 From: Patrick Deelen Date: Tue, 14 Jul 2015 09:28:09 +0200 Subject: [PATCH 077/143] Interactions --- .../EQTLInteractionAnalyser.java | 18 +- .../TestEQTLDatasetForInteractions.java | 1398 +++++++++-------- 2 files changed, 738 insertions(+), 678 deletions(-) diff --git a/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/EQTLInteractionAnalyser.java b/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/EQTLInteractionAnalyser.java index c5180fe75..f21d00705 100644 --- a/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/EQTLInteractionAnalyser.java +++ b/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/EQTLInteractionAnalyser.java @@ -6,6 +6,7 @@ package nl.systemsgenetics.eqtlinteractionanalyser.eqtlinteractionanalyser; +import java.io.File; import org.apache.commons.cli.*; import umcg.genetica.io.text.TextFile; @@ -83,6 +84,12 @@ public class EQTLInteractionAnalyser { OptionBuilder.withDescription("File containing the covariates to correct for before running the interaction analysis. No header, each covariate on a separate line"); OptionBuilder.withLongOpt("covFile"); OPTIONS.addOption(OptionBuilder.create("cf")); + + OptionBuilder.withArgName("path"); + OptionBuilder.hasArg(); + OptionBuilder.withDescription("File containing the SNPs to swap"); + OptionBuilder.withLongOpt("swap"); + OPTIONS.addOption(OptionBuilder.create("sw")); } public static void main(String[] args) throws IOException { @@ -91,6 +98,7 @@ public static void main(String[] args) throws IOException { System.out.println(); String inputDir, outputDir, eqtlFile = null, annotationFile = null; + final File snpsToSwapFile; int maxNumCovariatesToRegress = 20; boolean interpret = false, chi2sumDiff = false; String[] covariates = null; @@ -122,8 +130,14 @@ public static void main(String[] args) throws IOException { covFile.close(); } else if (commandLine.hasOption("c")){ - covariates = commandLine.getOptionValues("cf"); + covariates = commandLine.getOptionValues("c"); } + + if (commandLine.hasOption("sw")){ + snpsToSwapFile = new File(commandLine.getOptionValue("sw")); + } else { + snpsToSwapFile = null; + } } catch (ParseException ex) { System.err.println("Invalid command line arguments: "); @@ -143,7 +157,7 @@ else if (chi2sumDiff){ interactor.findChi2SumDifferences(maxNumCovariatesToRegress); } else { - new TestEQTLDatasetForInteractions(inputDir, outputDir, eqtlFile, maxNumCovariatesToRegress, annotationFile, covariates); + new TestEQTLDatasetForInteractions(inputDir, outputDir, eqtlFile, maxNumCovariatesToRegress, annotationFile, covariates, snpsToSwapFile); } } diff --git a/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/TestEQTLDatasetForInteractions.java b/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/TestEQTLDatasetForInteractions.java index b32f7636e..f52b428f3 100644 --- a/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/TestEQTLDatasetForInteractions.java +++ b/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/TestEQTLDatasetForInteractions.java @@ -5,12 +5,17 @@ */ package nl.systemsgenetics.eqtlinteractionanalyser.eqtlinteractionanalyser; -import java.awt.*; -import java.awt.image.BufferedImage; +import java.io.BufferedReader; +import java.io.BufferedWriter; import java.io.File; +import java.io.FileInputStream; +import java.io.FileWriter; import java.io.IOException; +import java.io.InputStreamReader; +import java.io.Writer; import java.util.ArrayList; import java.util.HashMap; +import java.util.HashSet; import java.util.Vector; import java.util.concurrent.CompletionService; import java.util.concurrent.ExecutionException; @@ -29,272 +34,271 @@ */ public class TestEQTLDatasetForInteractions { - String inputDir = null; - String outputDir = null; - HashMap> geneDistanceMap = null; - String[] primaryCovsToCorrect; - - public TestEQTLDatasetForInteractions(String inputDir, String outputDir) throws IOException { - - this.inputDir = inputDir; - this.outputDir = outputDir; - primaryCovsToCorrect = new String[]{"gender", "GC", "MEDIAN_5PRIME_BIAS", "MEDIAN_3PRIME_BIAS", "LLdeep", "RS", "CODAM", "LLS"}; - //preprocessData(); - } - - public TestEQTLDatasetForInteractions(String inputDir, String outputDir, String eQTLfileName, int maxNumTopCovs, String annotationFile, String[] covariatesToCorrect) throws IOException { - - System.out.println("Input dir: " + inputDir); - System.out.println("Output dir: " + outputDir); - System.out.println("eQTL file: " + eQTLfileName); - System.out.println("Maximum number of covariates to regress out: " + maxNumTopCovs); - - this.inputDir = inputDir; - this.outputDir = outputDir; - if (!Gpio.exists(outputDir)) { - Gpio.createDir(outputDir); - } - - HashMap eqtlGenes = getEqtls(eQTLfileName); - - if (annotationFile != null) { - createGeneDistanceMap(annotationFile); - } - - //preprocessData(); - - TextFile outputTopCovs = new TextFile(outputDir + "/outputTopCovariates.txt", true); - - - if (covariatesToCorrect != null){ - primaryCovsToCorrect = covariatesToCorrect; - } - else { - primaryCovsToCorrect = new String[]{"gender", "GC", "MEDIAN_5PRIME_BIAS", "MEDIAN_3PRIME_BIAS", "LLdeep", "RS", "CODAM", "LLS"}; - } - - - System.out.print("\nPrimary covariates to correct for before running interaction analysis: "); - for (String cov : primaryCovsToCorrect){ - System.out.print("\n\t" + cov); - } - System.out.println(); - - - String[] covsToCorrect = primaryCovsToCorrect; - int cnt = 0; - while (cnt < maxNumTopCovs) { - String topCov = performInteractionAnalysis(covsToCorrect, eqtlGenes, outputTopCovs); - String[] covsToCorrectNew = new String[covsToCorrect.length + 1]; - for (int c = 0; c < covsToCorrect.length; c++) { - covsToCorrectNew[c] = covsToCorrect[c]; - } - covsToCorrectNew[covsToCorrect.length] = topCov; - covsToCorrect = covsToCorrectNew; - cnt++; - } - outputTopCovs.close(); - } - - /** - * Extracts eQTL gene names - * @param fname - eQTL file (in the eqtlmappingpipeline format) - * @return gene names in keys of a HashMap - * @throws IOException - */ - public static HashMap getEqtls(String fname) throws IOException { - if (fname == null){ - return null; - } - TextFile file = new TextFile(fname, false); - ArrayList genes = file.readAsArrayList(4, TextFile.tab); - HashMap eqtlGenes = new HashMap(); - for (String gene : genes) { - eqtlGenes.put(gene, null); - } - file.close(); - return eqtlGenes; - - } - - public void interpretInteractionZScoreMatrix(int maxNumRegressedCovariates) { - - System.out.println("Interpreting the z-score matrix"); - - int numPrimaryCovsToCorrect = primaryCovsToCorrect.length; - for (int nrCovsRemoved = numPrimaryCovsToCorrect; nrCovsRemoved < numPrimaryCovsToCorrect + maxNumRegressedCovariates; nrCovsRemoved++) { - ExpressionDataset dataset = new ExpressionDataset(outputDir + "/InteractionZScoresMatrix-" + nrCovsRemoved + "Covariates.txt"); - dataset.save(dataset.fileName + ".binary"); - } - - - for (int nrCovsRemoved = numPrimaryCovsToCorrect; nrCovsRemoved < numPrimaryCovsToCorrect + maxNumRegressedCovariates; nrCovsRemoved++) { - - ExpressionDataset dataset = new ExpressionDataset(outputDir + "/InteractionZScoresMatrix-" + nrCovsRemoved + "Covariates.txt.binary"); - ExpressionDataset dataset2 = new ExpressionDataset(outputDir + "/InteractionZScoresMatrix-" + (nrCovsRemoved + 1) + "Covariates.txt.binary"); - - for (int q=0; q 2 && absZDiff > maxAbsZDiff) { - maxAbsZDiff = absZDiff; - output = nrCovsRemoved + "\t" + p + "\t" + dataset.probeNames[p] + "\t" + q + "\t" + dataset.sampleNames[q] + "\t" + dataset.rawData[p][q] + "\t" + dataset2.rawData[p][q] + "\t" + zDiff; - } - } - if (maxAbsZDiff > 2) { - System.out.println(output); - } - } - } - - System.exit(0); - } - - public void findChi2SumDifferences(int maxNumRegressedCovariates) { - - int numPrimaryCovsToCorrect = primaryCovsToCorrect.length; - System.out.println("Interpreting the z-score matrix"); - System.out.println("Preparing the data"); - for (int nrCovsRemoved = numPrimaryCovsToCorrect; nrCovsRemoved < numPrimaryCovsToCorrect + maxNumRegressedCovariates; nrCovsRemoved++) { - ExpressionDataset dataset = new ExpressionDataset(outputDir + "/InteractionZScoresMatrix-" + nrCovsRemoved + "Covariates.txt"); - dataset.save(dataset.fileName + ".binary"); - } - - System.out.println("Comparing chi2sums"); - for (int nrCovsRemoved = numPrimaryCovsToCorrect; nrCovsRemoved < numPrimaryCovsToCorrect + maxNumRegressedCovariates; nrCovsRemoved++) { - - ExpressionDataset dataset = new ExpressionDataset(outputDir + "/InteractionZScoresMatrix-" + nrCovsRemoved + "Covariates.txt.binary"); - ExpressionDataset dataset2 = new ExpressionDataset(outputDir + "/InteractionZScoresMatrix-" + (nrCovsRemoved + 1) + "Covariates.txt.binary"); - - for (int covariate = 0; covariate < dataset.nrProbes; covariate++) { - double chi2Sum1 = 0, chi2Sum2 = 0; - for (int gene = 0; gene < dataset.nrSamples; gene++) { - double z_before = dataset.rawData[covariate][gene]; - chi2Sum1 += z_before * z_before; - double z_after = dataset2.rawData[covariate][gene]; - chi2Sum2 += z_after * z_after; - - } - System.out.println(nrCovsRemoved + "\t" + dataset.probeNames[covariate] + "\t" + chi2Sum1 + "\t" + chi2Sum2 + "\t" + (chi2Sum1 - chi2Sum2)); - } - } - } - - - public void preprocessData() { - - HashMap hashGenotypes = new HashMap(); - HashMap hashExpression = new HashMap(); - HashMap hashEQTLs = new HashMap(); - try { - java.io.BufferedReader in = new java.io.BufferedReader(new java.io.FileReader(new File(inputDir + "/bigTableLude.txt"))); - String str = in.readLine(); - String[] data = str.split("\t"); - for (int d = 0; d < data.length; d++) { - System.out.println(d + "\t" + data[d]); - if (data[d].endsWith("_dosage")) { - hashGenotypes.put(data[d], null); - } - if (data[d].endsWith("_exp")) { - hashExpression.put(data[d], null); - } - } - int itr = 0; - while ((str = in.readLine()) != null) { - if (!str.contains("NA")) { - data = str.split("\t"); - hashEQTLs.put(data[0], null); - itr++; - if (itr % 100 == 0) { - System.out.println(itr); - } - } - } - } catch (Exception e) { - System.out.println("Error:\t" + e.getMessage()); - e.printStackTrace(); - } - - ExpressionDataset datasetGenotypes = new ExpressionDataset(inputDir + "/bigTableLude.txt", "\t", hashEQTLs, hashGenotypes); - ExpressionDataset datasetExpression = new ExpressionDataset(inputDir + "/bigTableLude.txt", "\t", hashEQTLs, hashExpression); - datasetGenotypes.save(datasetGenotypes.fileName + ".Genotypes.binary"); - datasetExpression.save(datasetGenotypes.fileName + ".Expression.binary"); - - ExpressionDataset datasetCovariates = new ExpressionDataset(inputDir + "/covariateTableLude.txt"); - datasetCovariates.save(datasetCovariates.fileName + ".Covariates.binary"); - System.exit(0); - - } - - public final String performInteractionAnalysis(String[] covsToCorrect, HashMap hashEQTLs, TextFile outputTopCovs) throws IOException { - - HashMap hashSamples = new HashMap(); - - if (1 == 1) { - - System.out.println("Removing outlier samples!!!"); - HashMap hashCovariates = new HashMap(); - hashCovariates.put("MEDIAN_5PRIME_BIAS", null); - hashCovariates.put("MEDIAN_3PRIME_BIAS", null); - ExpressionDataset datasetCovariates = new ExpressionDataset(inputDir + "/covariateTableLude.txt.Covariates.binary", "\t", hashCovariates, null); - hashSamples = new HashMap(); - for (int s = 0; s < datasetCovariates.nrSamples; s++) { - if (datasetCovariates.rawData[0][s] != 0) { - hashSamples.put(datasetCovariates.sampleNames[s], null); - } - } - datasetCovariates = new ExpressionDataset(inputDir + "/covariateTableLude.txt.Covariates.binary", "\t", hashCovariates, hashSamples); - HashMap hashSamplesToExclude = new HashMap(); - if (1 == 1) { - int index = ((Integer) datasetCovariates.hashProbes.get("MEDIAN_5PRIME_BIAS")).intValue(); - double mean = JSci.maths.ArrayMath.mean(datasetCovariates.rawData[index]); - double stdev = JSci.maths.ArrayMath.standardDeviation(datasetCovariates.rawData[index]); - for (int s = 0; s < datasetCovariates.nrSamples; s++) { - double z = (datasetCovariates.rawData[index][s] - mean) / stdev; - if (Math.abs(z) > 3) { - hashSamplesToExclude.put(datasetCovariates.sampleNames[s], null); - } - } - } - if (1 == 1) { - int index = ((Integer) datasetCovariates.hashProbes.get("MEDIAN_3PRIME_BIAS")).intValue(); - double mean = JSci.maths.ArrayMath.mean(datasetCovariates.rawData[index]); - double stdev = JSci.maths.ArrayMath.standardDeviation(datasetCovariates.rawData[index]); - for (int s = 0; s < datasetCovariates.nrSamples; s++) { - double z = (datasetCovariates.rawData[index][s] - mean) / stdev; - if (Math.abs(z) > 3) { - hashSamplesToExclude.put(datasetCovariates.sampleNames[s], null); - } - } - } - hashSamples = new HashMap(); - for (int s = 0; s < datasetCovariates.nrSamples; s++) { - if (!hashSamplesToExclude.containsKey(datasetCovariates.sampleNames[s])) { - hashSamples.put(datasetCovariates.sampleNames[s], null); - hashSamples.put(datasetCovariates.sampleNames[s] + "_exp", null); - hashSamples.put(datasetCovariates.sampleNames[s] + "_dosage", null); - } - } - } - - ExpressionDataset datasetGenotypes = new ExpressionDataset(inputDir + "/bigTableLude.txt.Genotypes.binary", "\t", hashEQTLs, hashSamples); - ExpressionDataset datasetExpression = new ExpressionDataset(inputDir + "/bigTableLude.txt.Expression.binary", "\t", hashEQTLs, hashSamples); - ExpressionDataset datasetCovariates = new ExpressionDataset(inputDir + "/covariateTableLude.txt.Covariates.binary", "\t", null, hashSamples); - - org.apache.commons.math3.stat.regression.OLSMultipleLinearRegression regression = new org.apache.commons.math3.stat.regression.OLSMultipleLinearRegression(); - int nrSamples = datasetGenotypes.nrSamples; - - - if (1 == 1) { - //Define a set of covariates that we want to use as correction: - System.out.println("Correcting gene expression data for cohort specific effects and top 25 components"); - //String[] cohorts = {"LLDeep", "LLS", "RS", "CODAM"}; - int nrCompsToCorrectFor = 25; - ExpressionDataset datasetCovariatesToCorrectFor = new ExpressionDataset(nrCompsToCorrectFor, datasetGenotypes.nrSamples); - datasetCovariatesToCorrectFor.sampleNames = datasetGenotypes.sampleNames; + String inputDir = null; + String outputDir = null; + HashMap> geneDistanceMap = null; + String[] primaryCovsToCorrect; + + public TestEQTLDatasetForInteractions(String inputDir, String outputDir) throws IOException { + + this.inputDir = inputDir; + this.outputDir = outputDir; + primaryCovsToCorrect = new String[]{"gender", "GC", "MEDIAN_5PRIME_BIAS", "MEDIAN_3PRIME_BIAS", "LLdeep", "RS", "CODAM", "LLS"}; + //preprocessData(); + } + + public TestEQTLDatasetForInteractions(String inputDir, String outputDir, String eQTLfileName, int maxNumTopCovs, String annotationFile, String[] covariatesToCorrect, File snpsToSwapFile) throws IOException { + + System.out.println("Input dir: " + inputDir); + System.out.println("Output dir: " + outputDir); + System.out.println("eQTL file: " + eQTLfileName); + System.out.println("Maximum number of covariates to regress out: " + maxNumTopCovs); + + this.inputDir = inputDir; + this.outputDir = outputDir; + if (!Gpio.exists(outputDir)) { + Gpio.createDir(outputDir); + } + + HashMap eqtlGenes = getEqtls(eQTLfileName); + + if (annotationFile != null) { + createGeneDistanceMap(annotationFile); + } + + //preprocessData(); + + TextFile outputTopCovs = new TextFile(outputDir + "/outputTopCovariates.txt", true); + + + if (covariatesToCorrect != null) { + primaryCovsToCorrect = covariatesToCorrect; + } else { + primaryCovsToCorrect = new String[]{"gender", "GC", "MEDIAN_5PRIME_BIAS", "MEDIAN_3PRIME_BIAS", "LLdeep", "RS", "CODAM", "LLS"}; + } + + + System.out.print("\nPrimary covariates to correct for before running interaction analysis: "); + for (String cov : primaryCovsToCorrect) { + System.out.print("\n\t" + cov); + } + System.out.println(); + + + String[] covsToCorrect = primaryCovsToCorrect; + int cnt = 0; + while (cnt < maxNumTopCovs) { + String topCov = performInteractionAnalysis(covsToCorrect, eqtlGenes, outputTopCovs, snpsToSwapFile); + String[] covsToCorrectNew = new String[covsToCorrect.length + 1]; + for (int c = 0; c < covsToCorrect.length; c++) { + covsToCorrectNew[c] = covsToCorrect[c]; + } + covsToCorrectNew[covsToCorrect.length] = topCov; + covsToCorrect = covsToCorrectNew; + cnt++; + } + outputTopCovs.close(); + } + + /** + * Extracts eQTL gene names + * + * @param fname - eQTL file (in the eqtlmappingpipeline format) + * @return gene names in keys of a HashMap + * @throws IOException + */ + public static HashMap getEqtls(String fname) throws IOException { + if (fname == null) { + return null; + } + TextFile file = new TextFile(fname, false); + ArrayList genes = file.readAsArrayList(4, TextFile.tab); + HashMap eqtlGenes = new HashMap(); + for (String gene : genes) { + eqtlGenes.put(gene, null); + } + file.close(); + return eqtlGenes; + + } + + public void interpretInteractionZScoreMatrix(int maxNumRegressedCovariates) { + + System.out.println("Interpreting the z-score matrix"); + + int numPrimaryCovsToCorrect = primaryCovsToCorrect.length; + for (int nrCovsRemoved = numPrimaryCovsToCorrect; nrCovsRemoved < numPrimaryCovsToCorrect + maxNumRegressedCovariates; nrCovsRemoved++) { + ExpressionDataset dataset = new ExpressionDataset(outputDir + "/InteractionZScoresMatrix-" + nrCovsRemoved + "Covariates.txt"); + dataset.save(dataset.fileName + ".binary"); + } + + + for (int nrCovsRemoved = numPrimaryCovsToCorrect; nrCovsRemoved < numPrimaryCovsToCorrect + maxNumRegressedCovariates; nrCovsRemoved++) { + + ExpressionDataset dataset = new ExpressionDataset(outputDir + "/InteractionZScoresMatrix-" + nrCovsRemoved + "Covariates.txt.binary"); + ExpressionDataset dataset2 = new ExpressionDataset(outputDir + "/InteractionZScoresMatrix-" + (nrCovsRemoved + 1) + "Covariates.txt.binary"); + + for (int q = 0; q < dataset.nrSamples; q++) { + double maxAbsZDiff = 0; + String output = ""; + for (int p = 0; p < dataset.nrProbes; p++) { + double zDiff = dataset.rawData[p][q] - dataset2.rawData[p][q]; + double absZDiff = Math.abs(zDiff); + if (absZDiff > 2 && absZDiff > maxAbsZDiff) { + maxAbsZDiff = absZDiff; + output = nrCovsRemoved + "\t" + p + "\t" + dataset.probeNames[p] + "\t" + q + "\t" + dataset.sampleNames[q] + "\t" + dataset.rawData[p][q] + "\t" + dataset2.rawData[p][q] + "\t" + zDiff; + } + } + if (maxAbsZDiff > 2) { + System.out.println(output); + } + } + } + + System.exit(0); + } + + public void findChi2SumDifferences(int maxNumRegressedCovariates) { + + int numPrimaryCovsToCorrect = primaryCovsToCorrect.length; + System.out.println("Interpreting the z-score matrix"); + System.out.println("Preparing the data"); + for (int nrCovsRemoved = numPrimaryCovsToCorrect; nrCovsRemoved < numPrimaryCovsToCorrect + maxNumRegressedCovariates; nrCovsRemoved++) { + ExpressionDataset dataset = new ExpressionDataset(outputDir + "/InteractionZScoresMatrix-" + nrCovsRemoved + "Covariates.txt"); + dataset.save(dataset.fileName + ".binary"); + } + + System.out.println("Comparing chi2sums"); + for (int nrCovsRemoved = numPrimaryCovsToCorrect; nrCovsRemoved < numPrimaryCovsToCorrect + maxNumRegressedCovariates; nrCovsRemoved++) { + + ExpressionDataset dataset = new ExpressionDataset(outputDir + "/InteractionZScoresMatrix-" + nrCovsRemoved + "Covariates.txt.binary"); + ExpressionDataset dataset2 = new ExpressionDataset(outputDir + "/InteractionZScoresMatrix-" + (nrCovsRemoved + 1) + "Covariates.txt.binary"); + + for (int covariate = 0; covariate < dataset.nrProbes; covariate++) { + double chi2Sum1 = 0, chi2Sum2 = 0; + for (int gene = 0; gene < dataset.nrSamples; gene++) { + double z_before = dataset.rawData[covariate][gene]; + chi2Sum1 += z_before * z_before; + double z_after = dataset2.rawData[covariate][gene]; + chi2Sum2 += z_after * z_after; + + } + System.out.println(nrCovsRemoved + "\t" + dataset.probeNames[covariate] + "\t" + chi2Sum1 + "\t" + chi2Sum2 + "\t" + (chi2Sum1 - chi2Sum2)); + } + } + } + + public void preprocessData() { + + HashMap hashGenotypes = new HashMap(); + HashMap hashExpression = new HashMap(); + HashMap hashEQTLs = new HashMap(); + try { + java.io.BufferedReader in = new java.io.BufferedReader(new java.io.FileReader(new File(inputDir + "/bigTableLude.txt"))); + String str = in.readLine(); + String[] data = str.split("\t"); + for (int d = 0; d < data.length; d++) { + System.out.println(d + "\t" + data[d]); + if (data[d].endsWith("_dosage")) { + hashGenotypes.put(data[d], null); + } + if (data[d].endsWith("_exp")) { + hashExpression.put(data[d], null); + } + } + int itr = 0; + while ((str = in.readLine()) != null) { + if (!str.contains("NA")) { + data = str.split("\t"); + hashEQTLs.put(data[0], null); + itr++; + if (itr % 100 == 0) { + System.out.println(itr); + } + } + } + } catch (Exception e) { + System.out.println("Error:\t" + e.getMessage()); + e.printStackTrace(); + } + + ExpressionDataset datasetGenotypes = new ExpressionDataset(inputDir + "/bigTableLude.txt", "\t", hashEQTLs, hashGenotypes); + ExpressionDataset datasetExpression = new ExpressionDataset(inputDir + "/bigTableLude.txt", "\t", hashEQTLs, hashExpression); + datasetGenotypes.save(datasetGenotypes.fileName + ".Genotypes.binary"); + datasetExpression.save(datasetGenotypes.fileName + ".Expression.binary"); + + ExpressionDataset datasetCovariates = new ExpressionDataset(inputDir + "/covariateTableLude.txt"); + datasetCovariates.save(datasetCovariates.fileName + ".Covariates.binary"); + System.exit(0); + + } + + public final String performInteractionAnalysis(String[] covsToCorrect, HashMap hashEQTLs, TextFile outputTopCovs, File snpsToSwapFile) throws IOException { + + HashMap hashSamples = new HashMap(); + + if (1 == 1) { + + System.out.println("Removing outlier samples!!!"); + HashMap hashCovariates = new HashMap(); + hashCovariates.put("MEDIAN_5PRIME_BIAS", null); + hashCovariates.put("MEDIAN_3PRIME_BIAS", null); + ExpressionDataset datasetCovariates = new ExpressionDataset(inputDir + "/covariateTableLude.txt.Covariates.binary", "\t", hashCovariates, null); + hashSamples = new HashMap(); + for (int s = 0; s < datasetCovariates.nrSamples; s++) { + if (datasetCovariates.rawData[0][s] != 0) { + hashSamples.put(datasetCovariates.sampleNames[s], null); + } + } + datasetCovariates = new ExpressionDataset(inputDir + "/covariateTableLude.txt.Covariates.binary", "\t", hashCovariates, hashSamples); + HashMap hashSamplesToExclude = new HashMap(); + if (1 == 1) { + int index = ((Integer) datasetCovariates.hashProbes.get("MEDIAN_5PRIME_BIAS")).intValue(); + double mean = JSci.maths.ArrayMath.mean(datasetCovariates.rawData[index]); + double stdev = JSci.maths.ArrayMath.standardDeviation(datasetCovariates.rawData[index]); + for (int s = 0; s < datasetCovariates.nrSamples; s++) { + double z = (datasetCovariates.rawData[index][s] - mean) / stdev; + if (Math.abs(z) > 3) { + hashSamplesToExclude.put(datasetCovariates.sampleNames[s], null); + } + } + } + if (1 == 1) { + int index = ((Integer) datasetCovariates.hashProbes.get("MEDIAN_3PRIME_BIAS")).intValue(); + double mean = JSci.maths.ArrayMath.mean(datasetCovariates.rawData[index]); + double stdev = JSci.maths.ArrayMath.standardDeviation(datasetCovariates.rawData[index]); + for (int s = 0; s < datasetCovariates.nrSamples; s++) { + double z = (datasetCovariates.rawData[index][s] - mean) / stdev; + if (Math.abs(z) > 3) { + hashSamplesToExclude.put(datasetCovariates.sampleNames[s], null); + } + } + } + hashSamples = new HashMap(); + for (int s = 0; s < datasetCovariates.nrSamples; s++) { + if (!hashSamplesToExclude.containsKey(datasetCovariates.sampleNames[s])) { + hashSamples.put(datasetCovariates.sampleNames[s], null); + hashSamples.put(datasetCovariates.sampleNames[s] + "_exp", null); + hashSamples.put(datasetCovariates.sampleNames[s] + "_dosage", null); + } + } + } + + ExpressionDataset datasetGenotypes = new ExpressionDataset(inputDir + "/bigTableLude.txt.Genotypes.binary", "\t", hashEQTLs, hashSamples); + ExpressionDataset datasetExpression = new ExpressionDataset(inputDir + "/bigTableLude.txt.Expression.binary", "\t", hashEQTLs, hashSamples); + ExpressionDataset datasetCovariates = new ExpressionDataset(inputDir + "/covariateTableLude.txt.Covariates.binary", "\t", null, hashSamples); + + org.apache.commons.math3.stat.regression.OLSMultipleLinearRegression regression = new org.apache.commons.math3.stat.regression.OLSMultipleLinearRegression(); + int nrSamples = datasetGenotypes.nrSamples; + + + if (1 == 1) { + //Define a set of covariates that we want to use as correction: + System.out.println("Correcting gene expression data for cohort specific effects and top 25 components"); + //String[] cohorts = {"LLDeep", "LLS", "RS", "CODAM"}; + int nrCompsToCorrectFor = 25; + ExpressionDataset datasetCovariatesToCorrectFor = new ExpressionDataset(nrCompsToCorrectFor, datasetGenotypes.nrSamples); + datasetCovariatesToCorrectFor.sampleNames = datasetGenotypes.sampleNames; // for (int p = 0; p < cohorts.length; p++) { // for (int s = 0; s < datasetGenotypes.nrSamples; s++) { // if (datasetGenotypes.sampleNames[s].startsWith(cohorts[p])) { @@ -302,61 +306,100 @@ public final String performInteractionAnalysis(String[] covsToCorrect, HashMap h // } // } // } - if (nrCompsToCorrectFor > 0) { - for (int comp = 0; comp < nrCompsToCorrectFor; comp++) { - for (int s = 0; s < datasetGenotypes.nrSamples; s++) { - datasetCovariatesToCorrectFor.rawData[comp][s] = datasetCovariates.rawData[datasetCovariates.nrProbes - 51 + comp][s]; - } - } - } - - datasetCovariatesToCorrectFor.transposeDataset(); - - datasetCovariatesToCorrectFor.save(inputDir + "/CovariatesToCorrectFor.txt"); - orthogonalizeDataset(inputDir + "/CovariatesToCorrectFor.txt"); - datasetCovariatesToCorrectFor = new ExpressionDataset(inputDir + "/CovariatesToCorrectFor.txt.PrincipalComponents.txt"); - datasetCovariatesToCorrectFor.transposeDataset(); - ExpressionDataset datasetCovariatesToCorrectForEigenvalues = new ExpressionDataset(inputDir + "/CovariatesToCorrectFor.txt.Eigenvalues.txt"); - for (int snp = 0; snp < datasetExpression.nrProbes; snp++) { - for (int cov = 0; cov < datasetCovariatesToCorrectFor.nrProbes; cov++) { - if (datasetCovariatesToCorrectForEigenvalues.rawData[cov][0] > 1E-5) { - double[] rc = getLinearRegressionCoefficients(datasetCovariatesToCorrectFor.rawData[cov], datasetExpression.rawData[snp]); - for (int s = 0; s < datasetGenotypes.nrSamples; s++) { - datasetExpression.rawData[snp][s] -= rc[0] * datasetCovariatesToCorrectFor.rawData[cov][s]; - } - } - } - } - - - } - - - double[] mainEQTLCorr = new double[datasetGenotypes.nrProbes]; - if (1 == 1) { - System.out.println("Enforcing for every eQTL that the genotype dosage positively correlated with gene expression levels:"); - for (int snp = 0; snp < datasetGenotypes.nrProbes; snp++) { - double corr = JSci.maths.ArrayMath.correlation(datasetGenotypes.rawData[snp], datasetExpression.rawData[snp]); - //System.out.println(datasetExpression.probeNames[snp] + "\t" + snp + "\t" + corr); - - if (corr < 0) { - corr = -corr; - for (int s = 0; s < datasetGenotypes.nrSamples; s++) { - datasetGenotypes.rawData[snp][s] = 2 - datasetGenotypes.rawData[snp][s]; - } - } - - mainEQTLCorr[snp] = corr; - } - } - - if (1 == 1) { - - if (1 == 1) { - System.out.println("Correcting covariate data for cohort specific effects:"); + if (nrCompsToCorrectFor > 0) { + for (int comp = 0; comp < nrCompsToCorrectFor; comp++) { + for (int s = 0; s < datasetGenotypes.nrSamples; s++) { + datasetCovariatesToCorrectFor.rawData[comp][s] = datasetCovariates.rawData[datasetCovariates.nrProbes - 51 + comp][s]; + } + } + } + + datasetCovariatesToCorrectFor.transposeDataset(); + + datasetCovariatesToCorrectFor.save(inputDir + "/CovariatesToCorrectFor.txt"); + orthogonalizeDataset(inputDir + "/CovariatesToCorrectFor.txt"); + datasetCovariatesToCorrectFor = new ExpressionDataset(inputDir + "/CovariatesToCorrectFor.txt.PrincipalComponents.txt"); + datasetCovariatesToCorrectFor.transposeDataset(); + ExpressionDataset datasetCovariatesToCorrectForEigenvalues = new ExpressionDataset(inputDir + "/CovariatesToCorrectFor.txt.Eigenvalues.txt"); + for (int snp = 0; snp < datasetExpression.nrProbes; snp++) { + for (int cov = 0; cov < datasetCovariatesToCorrectFor.nrProbes; cov++) { + if (datasetCovariatesToCorrectForEigenvalues.rawData[cov][0] > 1E-5) { + double[] rc = getLinearRegressionCoefficients(datasetCovariatesToCorrectFor.rawData[cov], datasetExpression.rawData[snp]); + for (int s = 0; s < datasetGenotypes.nrSamples; s++) { + datasetExpression.rawData[snp][s] -= rc[0] * datasetCovariatesToCorrectFor.rawData[cov][s]; + } + } + } + } + + + } + + + + + + //double[] mainEQTLCorr = new double[datasetGenotypes.nrProbes]; + + + if (snpsToSwapFile != null) { + System.out.println("Enforcing for every eQTL that the genotype dosage is swapped based on: " + snpsToSwapFile.getAbsolutePath()); + + HashSet snpsToSwap = new HashSet(); + BufferedReader reader = new BufferedReader(new InputStreamReader(new FileInputStream(snpsToSwapFile), "UTF-8")); + String line; + while ((line = reader.readLine()) != null) { + snpsToSwap.add(line); + } + reader.close(); + + for (int snp = 0; snp < datasetGenotypes.nrProbes; snp++) { + + if (snpsToSwap.contains(datasetGenotypes.probeNames[snp])) { + + for (int s = 0; s < datasetGenotypes.nrSamples; s++) { + datasetGenotypes.rawData[snp][s] = 2 - datasetGenotypes.rawData[snp][s]; + } + + } + + //mainEQTLCorr[snp] = corr; + } + + + } else { + System.out.println("Enforcing for every eQTL that the genotype dosage positively correlated with gene expression levels:"); + + Writer writer = new BufferedWriter(new FileWriter(outputDir + "/swappedDosages.txt")); + for (int snp = 0; snp < datasetGenotypes.nrProbes; snp++) { + double corr = JSci.maths.ArrayMath.correlation(datasetGenotypes.rawData[snp], datasetExpression.rawData[snp]); + //System.out.println(datasetExpression.probeNames[snp] + "\t" + snp + "\t" + corr); + + if (corr < 0) { + corr = -corr; + for (int s = 0; s < datasetGenotypes.nrSamples; s++) { + datasetGenotypes.rawData[snp][s] = 2 - datasetGenotypes.rawData[snp][s]; + } + writer.append(datasetGenotypes.probeNames[snp]); + writer.append('\n'); + } + + //mainEQTLCorr[snp] = corr; + } + writer.close(); + + } + + System.exit(0); + + + if (1 == 1) { + + if (1 == 1) { + System.out.println("Correcting covariate data for cohort specific effects:"); // String[] cohorts = {"LLDeep","LLS","RS","CODAM"}; - ExpressionDataset datasetCovariatesToCorrectFor = new ExpressionDataset(covsToCorrect.length, datasetGenotypes.nrSamples); - datasetCovariatesToCorrectFor.sampleNames = datasetGenotypes.sampleNames; + ExpressionDataset datasetCovariatesToCorrectFor = new ExpressionDataset(covsToCorrect.length, datasetGenotypes.nrSamples); + datasetCovariatesToCorrectFor.sampleNames = datasetGenotypes.sampleNames; // for (int p=0; p 1E-5) { - double[] rc = getLinearRegressionCoefficients(datasetCovariatesToCorrectFor.rawData[cov], datasetCovariates.rawData[p]); - for (int s = 0; s < datasetGenotypes.nrSamples; s++) { - datasetCovariates.rawData[p][s] -= rc[0] * datasetCovariatesToCorrectFor.rawData[cov][s]; - } - } - } - double stdev = JSci.maths.ArrayMath.standardDeviation(datasetCovariates.rawData[p]); - double mean = JSci.maths.ArrayMath.mean(datasetCovariates.rawData[p]); - if (stdev < 1E-5) { - for (int s = 0; s < datasetGenotypes.nrSamples; s++) { - datasetCovariates.rawData[p][s] = mean; - } - } - } - } - - - } - - if (1 == 1) { - System.out.println("Correcting covariate data for cis-eQTL effects:"); - for (int p = 0; p < datasetCovariates.nrProbes; p++) { - if (datasetExpression.hashProbes.containsKey(datasetCovariates.probeNames[p])) { - int index = ((Integer) datasetExpression.hashProbes.get(datasetCovariates.probeNames[p])).intValue(); - double[] rc = getLinearRegressionCoefficients(datasetGenotypes.rawData[index], datasetCovariates.rawData[p]); - for (int s = 0; s < datasetGenotypes.nrSamples; s++) { - datasetCovariates.rawData[p][s] -= rc[0] * datasetGenotypes.rawData[index][s]; - } - } - } - } - - if (1 == 2) { - datasetCovariates.save(inputDir + "/CovariatesCorrected.txt"); - HashMap hashProbesToFilter = new HashMap(); - for (int p = 0; p < datasetCovariates.nrProbes; p++) { - if (datasetCovariates.probeNames[p].startsWith("ENSG")) { - hashProbesToFilter.put(datasetCovariates.probeNames[p], null); - } - } - ExpressionDataset datasetCovariatesCorrected = new ExpressionDataset(inputDir + "/CovariatesCorrected.txt", "\t", hashProbesToFilter, null); - datasetCovariatesCorrected.transposeDataset(); - datasetCovariatesCorrected.save(inputDir + "/CovariatesCorrected.txt"); - System.exit(0); - } - - if (1 == 2) { - ExpressionDataset datasetICA = new ExpressionDataset("/Users/lude/Documents/ICA/mixingmatrix.txt"); - //ExpressionDataset datasetICA = new ExpressionDataset("/Users/lude/Documents/ICA/signals.txt"); - datasetICA.transposeDataset(); - for (int p = 0; p < datasetICA.nrProbes; p++) { - datasetCovariates.rawData[p] = datasetICA.rawData[p]; - datasetCovariates.probeNames[p] = datasetICA.probeNames[p]; - if (p == 7) { - for (int q = 0; q < datasetCovariates.nrProbes; q++) { - double corr = JSci.maths.ArrayMath.correlation(datasetICA.rawData[p], datasetCovariates.rawData[q]); - System.out.println(p + "\t" + datasetICA.probeNames[p] + "\t" + q + "\t" + datasetCovariates.probeNames[q] + "\t" + corr + "\t" + corr * corr); - } - } - } - - orthogonalizeDataset("/Users/lude/Documents/ICA/mixingmatrix.txt"); - //System.exit(0); - } - - System.out.println("Enforcing normal distribution on covariates"); - - NaturalRanking ranker = new NaturalRanking(); - - for (int p = 0; p < datasetCovariates.nrProbes; p++) { - //Rank order the expression values: - double[] values = new double[datasetCovariates.nrSamples]; - for (int s = 0; s < datasetGenotypes.nrSamples; s++) { - values[s] = datasetCovariates.rawData[p][s]; - } - double[] rankedValues = ranker.rank(values); - //Replace the original expression value with the standard distribution enforce: - for (int s = 0; s < datasetGenotypes.nrSamples; s++) { - //Convert the rank to a proportion, with range <0, 1> - double pValue = (0.5d + rankedValues[s] - 1d) / (double) (rankedValues.length); - //Convert the pValue to a Z-Score: - double zScore = cern.jet.stat.tdouble.Probability.normalInverse(pValue); - datasetCovariates.rawData[p][s] = zScore; //Replace original expression value with the Z-Score - } - } - - } - - cern.jet.random.tdouble.engine.DoubleRandomEngine randomEngine = new cern.jet.random.tdouble.engine.DRand(); - - ExpressionDataset datasetExpressionBeforeEQTLCorrection = new ExpressionDataset(datasetExpression.nrProbes, datasetExpression.nrSamples); - for (int p = 0; p < datasetExpression.nrProbes; p++) { - for (int s = 0; s < datasetExpression.nrSamples; s++) { - datasetExpressionBeforeEQTLCorrection.rawData[p][s] = datasetExpression.rawData[p][s]; - } - } - - if (1 == 1) { - System.out.println("Correcting expression data for predefined gene environment interaction effects (GC content, Gender, 5'Median Bias, 3'Median Bias):"); - int[] covsToCorrectIndex = new int[covsToCorrect.length]; - for (int c = 0; c < covsToCorrect.length; c++) { - covsToCorrectIndex[c] = ((Integer) datasetCovariates.hashProbes.get(covsToCorrect[c])).intValue(); - - } - for (int snp = 0; snp < datasetGenotypes.nrProbes; snp++) { - double[][] valsX = new double[nrSamples][1 + covsToCorrect.length * 2]; //store genotypes, covariates, interactions - for (int s = 0; s < nrSamples; s++) { - valsX[s][0] = datasetGenotypes.rawData[snp][s]; //genotypes - } - for (int c = 0; c < covsToCorrect.length; c++) { - for (int s = 0; s < nrSamples; s++) { - valsX[s][c * 2 + 1] = datasetCovariates.rawData[covsToCorrectIndex[c]][s]; //covariate - valsX[s][c * 2 + 2] = valsX[s][0] * valsX[s][c * 2 + 1]; //interction - } - } - double[] valsY = datasetExpression.rawData[snp]; - regression.newSampleData(valsY, valsX); - datasetExpression.rawData[snp] = regression.estimateResiduals(); - } - } - - - if (1 == 1) { - System.out.println("Enforcing normal distribution on expression data:"); - - NaturalRanking ranker = new NaturalRanking(); - - for (int p = 0; p < datasetExpression.nrProbes; p++) { - //Rank order the expression values: - double[] values = new double[datasetExpression.nrSamples]; - for (int s = 0; s < datasetExpression.nrSamples; s++) { - values[s] = datasetExpression.rawData[p][s]; - } - - double[] rankedValues = ranker.rank(values); - //Replace the original expression value with the standard distribution enforce: - for (int s = 0; s < datasetExpression.nrSamples; s++) { - //Convert the rank to a proportion, with range <0, 1> - double pValue = (0.5d + rankedValues[s] - 1d) / (double) (rankedValues.length); - //Convert the pValue to a Z-Score: - double zScore = cern.jet.stat.tdouble.Probability.normalInverse(pValue); - datasetExpression.rawData[p][s] = zScore; //Replace original expression value with the Z-Score - } - } - - System.out.println("Expression data now force normal"); - - } - - if (1 == 2) { - System.out.println("WARNING: PERMUTING GENOTYPE DATA!!!!"); - String[] cohorts = {"LLDeep", "LLS", "RS", "CODAM"}; - int[] permSampleIDs = new int[datasetGenotypes.nrSamples]; - for (int p = 0; p < cohorts.length; p++) { - Vector vecSamples = new Vector(); - for (int s = 0; s < datasetGenotypes.nrSamples; s++) { - if (datasetGenotypes.sampleNames[s].startsWith(cohorts[p])) { - vecSamples.add(s); - } - } - int nrSamplesThisCohort = vecSamples.size(); - for (int s = 0; s < datasetGenotypes.nrSamples; s++) { - if (datasetGenotypes.sampleNames[s].startsWith(cohorts[p])) { - int randomSample = ((Integer) vecSamples.remove((int) ((double) vecSamples.size() * Math.random()))).intValue(); - permSampleIDs[s] = randomSample; - } - } - } - ExpressionDataset datasetGenotypes2 = new ExpressionDataset(datasetGenotypes.nrProbes, datasetGenotypes.nrSamples); - datasetGenotypes2.probeNames = datasetGenotypes.probeNames; - datasetGenotypes2.sampleNames = datasetGenotypes.sampleNames; - datasetGenotypes2.recalculateHashMaps(); - for (int p = 0; p < datasetGenotypes2.nrProbes; p++) { - for (int s = 0; s < datasetGenotypes2.nrSamples; s++) { - datasetGenotypes2.rawData[p][s] = datasetGenotypes.rawData[p][permSampleIDs[s]]; - } - } - datasetGenotypes = datasetGenotypes2; - } - - - if (1 == 1) { - - - - ExpressionDataset datasetZScores = new ExpressionDataset(datasetCovariates.nrProbes, datasetExpression.nrProbes); - datasetZScores.probeNames = datasetCovariates.probeNames; - datasetZScores.sampleNames = datasetGenotypes.probeNames; - datasetZScores.recalculateHashMaps(); - - - - java.util.concurrent.ExecutorService threadPool = Executors.newFixedThreadPool(Runtime.getRuntime().availableProcessors()); - CompletionService pool = new ExecutorCompletionService(threadPool); - int nrTasks = 0; - for (int cov = 0; cov < datasetCovariates.nrProbes; cov++) { - double stdev = JSci.maths.ArrayMath.standardDeviation(datasetCovariates.rawData[cov]); - if (stdev > 0) { - PerformInteractionAnalysisPermutationTask task = new PerformInteractionAnalysisPermutationTask(datasetGenotypes, datasetExpression, datasetCovariates, cov); - pool.submit(task); - nrTasks++; - } - } - - String maxChi2Cov = ""; - double maxChi2 = 0; - try { - // If gene annotation provided, for chi2sum calculation use only genes that are 1mb apart - if (geneDistanceMap != null) { - for (int task = 0; task < nrTasks; task++) { - try { - DoubleArrayIntegerObject result = pool.take().get(); - int cov = result.intValue; - double chi2Sum = 0; - double[] covZ = datasetZScores.rawData[cov]; - for (int snp = 0; snp < datasetGenotypes.nrProbes; snp++) { - if (genesFarAway(datasetZScores.sampleNames[snp], datasetZScores.probeNames[cov])) { - double z = result.doubleArray[snp]; - covZ[snp] = z; - if (!Double.isNaN(z)) { - chi2Sum += z * z; - } - } - } - if (chi2Sum > maxChi2) { - maxChi2 = chi2Sum; - maxChi2Cov = datasetCovariates.probeNames[cov]; - } - //System.out.println(covsToCorrect.length + "\t" + cov + "\t" + datasetCovariates.probeNames[cov] + "\t" + chi2Sum); - if ((task + 1) % 512 == 0) { - System.out.println(task + 1 + " tasks processed"); - } - } catch (ExecutionException ex) { - Logger.getLogger(PerformInteractionAnalysisPermutationTask.class.getName()).log(Level.SEVERE, null, ex); - } - } - } - //If gene annotation not provided, use all gene pairs - else { - for (int task = 0; task < nrTasks; task++) { - try { - DoubleArrayIntegerObject result = pool.take().get(); - int cov = result.intValue; - double chi2Sum = 0; - double[] covZ = datasetZScores.rawData[cov]; - for (int snp = 0; snp < datasetGenotypes.nrProbes; snp++) { - double z = result.doubleArray[snp]; - covZ[snp] = z; - if (!Double.isNaN(z)) { - chi2Sum += z * z; - } - } - if (chi2Sum > maxChi2) { - maxChi2 = chi2Sum; - maxChi2Cov = datasetCovariates.probeNames[cov]; - } - //System.out.println(covsToCorrect.length + "\t" + cov + "\t" + datasetCovariates.probeNames[cov] + "\t" + chi2Sum); - if ((task + 1) % 512 == 0) { - System.out.println(task + 1 + " tasks processed"); - } - } catch (ExecutionException ex) { - Logger.getLogger(PerformInteractionAnalysisPermutationTask.class.getName()).log(Level.SEVERE, null, ex); - } - } - } - threadPool.shutdown(); - } catch (Exception e) { - e.printStackTrace(); - System.out.println(e.getMessage()); - } - - System.out.println("Top covariate:\t" + maxChi2 + "\t" + maxChi2Cov); - outputTopCovs.writeln("Top covariate:\t" + maxChi2 + "\t" + maxChi2Cov); - outputTopCovs.flush(); - datasetZScores.save(outputDir + "/InteractionZScoresMatrix-" + covsToCorrect.length + "Covariates.txt"); - - return maxChi2Cov; - } - - return null; - } - - /** - * Creates a map of gene name to GenomicBoundary containing gene coordinates and the coordinate of its midpoint as annotation - * @param annotFname - path to the annotation file (in the eqtlmappingpipeline format) - * @throws IOException - */ - private void createGeneDistanceMap(String annotFname) throws IOException { - System.out.println("Creating a gene distance map from " + annotFname); - - geneDistanceMap = new HashMap>(); - - TextFile annotFile = new TextFile(annotFname, false); - String els[] = annotFile.readLineElems(TextFile.tab); - - while ((els = annotFile.readLineElems(TextFile.tab)) != null){ - int start = Integer.parseInt(els[4]), end = Integer.parseInt(els[5]), middle = start + (end - start)/2; - GenomicBoundary genomicboundary = new GenomicBoundary(els[3], Integer.parseInt(els[4]), Integer.parseInt(els[5]), middle); - geneDistanceMap.put(els[1], genomicboundary); - } - annotFile.close(); - } - - /** - * Checks if the genomic distance between 2 genes is more than 1mb - * @param gene1 - * @param gene2 - * @return true if the genes are more than 1mb apart - */ - private boolean genesFarAway(String gene1, String gene2) { - // if one of the covariates is a technical bias or a cell count etc - if ((! gene1.startsWith("ENS")) || (! gene2.startsWith("ENS"))){ - return true; - } - - GenomicBoundary gb1 = null, gb2 = null; - try { - gb1 = geneDistanceMap.get(gene1); - gb2 = geneDistanceMap.get(gene2); - - if (gb1.getChromosome() != gb2.getChromosome()){ - return true; - } - if (Math.abs(gb1.getAnnotation() - gb2.getAnnotation()) > 1000000){ - return true; - } - } catch (Exception e){ - System.out.println("Error: gene annotation doesn't contain one of these genes: " + gene1 + " or " + gene2); - System.exit(1); - } - return false; - } + HashMap hashCovsToCorrect = new HashMap(); + int[] covsToCorrectIndex = new int[covsToCorrect.length]; + for (int c = 0; c < covsToCorrect.length; c++) { + hashCovsToCorrect.put(covsToCorrect[c], null); + covsToCorrectIndex[c] = ((Integer) datasetCovariates.hashProbes.get(covsToCorrect[c])).intValue(); + for (int s = 0; s < datasetGenotypes.nrSamples; s++) { + datasetCovariatesToCorrectFor.rawData[c][s] = datasetCovariates.rawData[covsToCorrectIndex[c]][s]; + } + } + + datasetCovariatesToCorrectFor.transposeDataset(); + + datasetCovariatesToCorrectFor.save(inputDir + "/CovariatesToCorrectFor.txt"); + orthogonalizeDataset(inputDir + "/CovariatesToCorrectFor.txt"); + datasetCovariatesToCorrectFor = new ExpressionDataset(inputDir + "/CovariatesToCorrectFor.txt.PrincipalComponents.txt"); + datasetCovariatesToCorrectFor.transposeDataset(); + ExpressionDataset datasetCovariatesToCorrectForEigenvalues = new ExpressionDataset(inputDir + "/CovariatesToCorrectFor.txt.Eigenvalues.txt"); + + for (int p = 0; p < datasetCovariates.nrProbes; p++) { + if (!hashCovsToCorrect.containsKey(datasetCovariates.probeNames[p])) { + for (int cov = 0; cov < datasetCovariatesToCorrectFor.nrProbes; cov++) { + if (datasetCovariatesToCorrectForEigenvalues.rawData[cov][0] > 1E-5) { + double[] rc = getLinearRegressionCoefficients(datasetCovariatesToCorrectFor.rawData[cov], datasetCovariates.rawData[p]); + for (int s = 0; s < datasetGenotypes.nrSamples; s++) { + datasetCovariates.rawData[p][s] -= rc[0] * datasetCovariatesToCorrectFor.rawData[cov][s]; + } + } + } + double stdev = JSci.maths.ArrayMath.standardDeviation(datasetCovariates.rawData[p]); + double mean = JSci.maths.ArrayMath.mean(datasetCovariates.rawData[p]); + if (stdev < 1E-5) { + for (int s = 0; s < datasetGenotypes.nrSamples; s++) { + datasetCovariates.rawData[p][s] = mean; + } + } + } + } + + + } + + if (1 == 1) { + System.out.println("Correcting covariate data for cis-eQTL effects:"); + for (int p = 0; p < datasetCovariates.nrProbes; p++) { + if (datasetExpression.hashProbes.containsKey(datasetCovariates.probeNames[p])) { + int index = ((Integer) datasetExpression.hashProbes.get(datasetCovariates.probeNames[p])).intValue(); + double[] rc = getLinearRegressionCoefficients(datasetGenotypes.rawData[index], datasetCovariates.rawData[p]); + for (int s = 0; s < datasetGenotypes.nrSamples; s++) { + datasetCovariates.rawData[p][s] -= rc[0] * datasetGenotypes.rawData[index][s]; + } + } + } + } + + if (1 == 2) { + datasetCovariates.save(inputDir + "/CovariatesCorrected.txt"); + HashMap hashProbesToFilter = new HashMap(); + for (int p = 0; p < datasetCovariates.nrProbes; p++) { + if (datasetCovariates.probeNames[p].startsWith("ENSG")) { + hashProbesToFilter.put(datasetCovariates.probeNames[p], null); + } + } + ExpressionDataset datasetCovariatesCorrected = new ExpressionDataset(inputDir + "/CovariatesCorrected.txt", "\t", hashProbesToFilter, null); + datasetCovariatesCorrected.transposeDataset(); + datasetCovariatesCorrected.save(inputDir + "/CovariatesCorrected.txt"); + System.exit(0); + } + + if (1 == 2) { + ExpressionDataset datasetICA = new ExpressionDataset("/Users/lude/Documents/ICA/mixingmatrix.txt"); + //ExpressionDataset datasetICA = new ExpressionDataset("/Users/lude/Documents/ICA/signals.txt"); + datasetICA.transposeDataset(); + for (int p = 0; p < datasetICA.nrProbes; p++) { + datasetCovariates.rawData[p] = datasetICA.rawData[p]; + datasetCovariates.probeNames[p] = datasetICA.probeNames[p]; + if (p == 7) { + for (int q = 0; q < datasetCovariates.nrProbes; q++) { + double corr = JSci.maths.ArrayMath.correlation(datasetICA.rawData[p], datasetCovariates.rawData[q]); + System.out.println(p + "\t" + datasetICA.probeNames[p] + "\t" + q + "\t" + datasetCovariates.probeNames[q] + "\t" + corr + "\t" + corr * corr); + } + } + } + + orthogonalizeDataset("/Users/lude/Documents/ICA/mixingmatrix.txt"); + //System.exit(0); + } + + System.out.println("Enforcing normal distribution on covariates"); + + NaturalRanking ranker = new NaturalRanking(); + + for (int p = 0; p < datasetCovariates.nrProbes; p++) { + //Rank order the expression values: + double[] values = new double[datasetCovariates.nrSamples]; + for (int s = 0; s < datasetGenotypes.nrSamples; s++) { + values[s] = datasetCovariates.rawData[p][s]; + } + double[] rankedValues = ranker.rank(values); + //Replace the original expression value with the standard distribution enforce: + for (int s = 0; s < datasetGenotypes.nrSamples; s++) { + //Convert the rank to a proportion, with range <0, 1> + double pValue = (0.5d + rankedValues[s] - 1d) / (double) (rankedValues.length); + //Convert the pValue to a Z-Score: + double zScore = cern.jet.stat.tdouble.Probability.normalInverse(pValue); + datasetCovariates.rawData[p][s] = zScore; //Replace original expression value with the Z-Score + } + } + + } + + cern.jet.random.tdouble.engine.DoubleRandomEngine randomEngine = new cern.jet.random.tdouble.engine.DRand(); + + ExpressionDataset datasetExpressionBeforeEQTLCorrection = new ExpressionDataset(datasetExpression.nrProbes, datasetExpression.nrSamples); + for (int p = 0; p < datasetExpression.nrProbes; p++) { + for (int s = 0; s < datasetExpression.nrSamples; s++) { + datasetExpressionBeforeEQTLCorrection.rawData[p][s] = datasetExpression.rawData[p][s]; + } + } + + if (1 == 1) { + System.out.println("Correcting expression data for predefined gene environment interaction effects (GC content, Gender, 5'Median Bias, 3'Median Bias):"); + int[] covsToCorrectIndex = new int[covsToCorrect.length]; + for (int c = 0; c < covsToCorrect.length; c++) { + covsToCorrectIndex[c] = ((Integer) datasetCovariates.hashProbes.get(covsToCorrect[c])).intValue(); + + } + for (int snp = 0; snp < datasetGenotypes.nrProbes; snp++) { + double[][] valsX = new double[nrSamples][1 + covsToCorrect.length * 2]; //store genotypes, covariates, interactions + for (int s = 0; s < nrSamples; s++) { + valsX[s][0] = datasetGenotypes.rawData[snp][s]; //genotypes + } + for (int c = 0; c < covsToCorrect.length; c++) { + for (int s = 0; s < nrSamples; s++) { + valsX[s][c * 2 + 1] = datasetCovariates.rawData[covsToCorrectIndex[c]][s]; //covariate + valsX[s][c * 2 + 2] = valsX[s][0] * valsX[s][c * 2 + 1]; //interction + } + } + double[] valsY = datasetExpression.rawData[snp]; + regression.newSampleData(valsY, valsX); + datasetExpression.rawData[snp] = regression.estimateResiduals(); + } + } + + + if (1 == 1) { + System.out.println("Enforcing normal distribution on expression data:"); + + NaturalRanking ranker = new NaturalRanking(); + + for (int p = 0; p < datasetExpression.nrProbes; p++) { + //Rank order the expression values: + double[] values = new double[datasetExpression.nrSamples]; + for (int s = 0; s < datasetExpression.nrSamples; s++) { + values[s] = datasetExpression.rawData[p][s]; + } + + double[] rankedValues = ranker.rank(values); + //Replace the original expression value with the standard distribution enforce: + for (int s = 0; s < datasetExpression.nrSamples; s++) { + //Convert the rank to a proportion, with range <0, 1> + double pValue = (0.5d + rankedValues[s] - 1d) / (double) (rankedValues.length); + //Convert the pValue to a Z-Score: + double zScore = cern.jet.stat.tdouble.Probability.normalInverse(pValue); + datasetExpression.rawData[p][s] = zScore; //Replace original expression value with the Z-Score + } + } + + System.out.println("Expression data now force normal"); + + } + + if (1 == 2) { + System.out.println("WARNING: PERMUTING GENOTYPE DATA!!!!"); + String[] cohorts = {"LLDeep", "LLS", "RS", "CODAM"}; + int[] permSampleIDs = new int[datasetGenotypes.nrSamples]; + for (int p = 0; p < cohorts.length; p++) { + Vector vecSamples = new Vector(); + for (int s = 0; s < datasetGenotypes.nrSamples; s++) { + if (datasetGenotypes.sampleNames[s].startsWith(cohorts[p])) { + vecSamples.add(s); + } + } + int nrSamplesThisCohort = vecSamples.size(); + for (int s = 0; s < datasetGenotypes.nrSamples; s++) { + if (datasetGenotypes.sampleNames[s].startsWith(cohorts[p])) { + int randomSample = ((Integer) vecSamples.remove((int) ((double) vecSamples.size() * Math.random()))).intValue(); + permSampleIDs[s] = randomSample; + } + } + } + ExpressionDataset datasetGenotypes2 = new ExpressionDataset(datasetGenotypes.nrProbes, datasetGenotypes.nrSamples); + datasetGenotypes2.probeNames = datasetGenotypes.probeNames; + datasetGenotypes2.sampleNames = datasetGenotypes.sampleNames; + datasetGenotypes2.recalculateHashMaps(); + for (int p = 0; p < datasetGenotypes2.nrProbes; p++) { + for (int s = 0; s < datasetGenotypes2.nrSamples; s++) { + datasetGenotypes2.rawData[p][s] = datasetGenotypes.rawData[p][permSampleIDs[s]]; + } + } + datasetGenotypes = datasetGenotypes2; + } + + + if (1 == 1) { + + + + ExpressionDataset datasetZScores = new ExpressionDataset(datasetCovariates.nrProbes, datasetExpression.nrProbes); + datasetZScores.probeNames = datasetCovariates.probeNames; + datasetZScores.sampleNames = datasetGenotypes.probeNames; + datasetZScores.recalculateHashMaps(); + + + + java.util.concurrent.ExecutorService threadPool = Executors.newFixedThreadPool(Runtime.getRuntime().availableProcessors()); + CompletionService pool = new ExecutorCompletionService(threadPool); + int nrTasks = 0; + for (int cov = 0; cov < datasetCovariates.nrProbes; cov++) { + double stdev = JSci.maths.ArrayMath.standardDeviation(datasetCovariates.rawData[cov]); + if (stdev > 0) { + PerformInteractionAnalysisPermutationTask task = new PerformInteractionAnalysisPermutationTask(datasetGenotypes, datasetExpression, datasetCovariates, cov); + pool.submit(task); + nrTasks++; + } + } + + String maxChi2Cov = ""; + double maxChi2 = 0; + try { + // If gene annotation provided, for chi2sum calculation use only genes that are 1mb apart + if (geneDistanceMap != null) { + for (int task = 0; task < nrTasks; task++) { + try { + DoubleArrayIntegerObject result = pool.take().get(); + int cov = result.intValue; + double chi2Sum = 0; + double[] covZ = datasetZScores.rawData[cov]; + for (int snp = 0; snp < datasetGenotypes.nrProbes; snp++) { + if (genesFarAway(datasetZScores.sampleNames[snp], datasetZScores.probeNames[cov])) { + double z = result.doubleArray[snp]; + covZ[snp] = z; + if (!Double.isNaN(z)) { + chi2Sum += z * z; + } + } + } + if (chi2Sum > maxChi2) { + maxChi2 = chi2Sum; + maxChi2Cov = datasetCovariates.probeNames[cov]; + } + //System.out.println(covsToCorrect.length + "\t" + cov + "\t" + datasetCovariates.probeNames[cov] + "\t" + chi2Sum); + if ((task + 1) % 512 == 0) { + System.out.println(task + 1 + " tasks processed"); + } + } catch (ExecutionException ex) { + Logger.getLogger(PerformInteractionAnalysisPermutationTask.class.getName()).log(Level.SEVERE, null, ex); + } + } + } //If gene annotation not provided, use all gene pairs + else { + for (int task = 0; task < nrTasks; task++) { + try { + DoubleArrayIntegerObject result = pool.take().get(); + int cov = result.intValue; + double chi2Sum = 0; + double[] covZ = datasetZScores.rawData[cov]; + for (int snp = 0; snp < datasetGenotypes.nrProbes; snp++) { + double z = result.doubleArray[snp]; + covZ[snp] = z; + if (!Double.isNaN(z)) { + chi2Sum += z * z; + } + } + if (chi2Sum > maxChi2) { + maxChi2 = chi2Sum; + maxChi2Cov = datasetCovariates.probeNames[cov]; + } + //System.out.println(covsToCorrect.length + "\t" + cov + "\t" + datasetCovariates.probeNames[cov] + "\t" + chi2Sum); + if ((task + 1) % 512 == 0) { + System.out.println(task + 1 + " tasks processed"); + } + } catch (ExecutionException ex) { + Logger.getLogger(PerformInteractionAnalysisPermutationTask.class.getName()).log(Level.SEVERE, null, ex); + } + } + } + threadPool.shutdown(); + } catch (Exception e) { + e.printStackTrace(); + System.out.println(e.getMessage()); + } + + System.out.println("Top covariate:\t" + maxChi2 + "\t" + maxChi2Cov); + outputTopCovs.writeln("Top covariate:\t" + maxChi2 + "\t" + maxChi2Cov); + outputTopCovs.flush(); + datasetZScores.save(outputDir + "/InteractionZScoresMatrix-" + covsToCorrect.length + "Covariates.txt"); + + return maxChi2Cov; + } + + return null; + } + + /** + * Creates a map of gene name to GenomicBoundary containing gene coordinates + * and the coordinate of its midpoint as annotation + * + * @param annotFname - path to the annotation file (in the + * eqtlmappingpipeline format) + * @throws IOException + */ + private void createGeneDistanceMap(String annotFname) throws IOException { + System.out.println("Creating a gene distance map from " + annotFname); + + geneDistanceMap = new HashMap>(); + + TextFile annotFile = new TextFile(annotFname, false); + String els[] = annotFile.readLineElems(TextFile.tab); + + while ((els = annotFile.readLineElems(TextFile.tab)) != null) { + int start = Integer.parseInt(els[4]), end = Integer.parseInt(els[5]), middle = start + (end - start) / 2; + GenomicBoundary genomicboundary = new GenomicBoundary(els[3], Integer.parseInt(els[4]), Integer.parseInt(els[5]), middle); + geneDistanceMap.put(els[1], genomicboundary); + } + annotFile.close(); + } + + /** + * Checks if the genomic distance between 2 genes is more than 1mb + * + * @param gene1 + * @param gene2 + * @return true if the genes are more than 1mb apart + */ + private boolean genesFarAway(String gene1, String gene2) { + // if one of the covariates is a technical bias or a cell count etc + if ((!gene1.startsWith("ENS")) || (!gene2.startsWith("ENS"))) { + return true; + } + + GenomicBoundary gb1 = null, gb2 = null; + try { + gb1 = geneDistanceMap.get(gene1); + gb2 = geneDistanceMap.get(gene2); + + if (gb1.getChromosome() != gb2.getChromosome()) { + return true; + } + if (Math.abs(gb1.getAnnotation() - gb2.getAnnotation()) > 1000000) { + return true; + } + } catch (Exception e) { + System.out.println("Error: gene annotation doesn't contain one of these genes: " + gene1 + " or " + gene2); + System.exit(1); + } + return false; + } static public void orthogonalizeDataset(String inputFile) { From bb1777cdcd55d36858ee999a82d3ab1c39433c46 Mon Sep 17 00:00:00 2001 From: Patrick Deelen Date: Tue, 14 Jul 2015 09:33:07 +0200 Subject: [PATCH 078/143] Fix in parameters --- .../EQTLInteractionAnalyser.java | 17 ++++++----------- 1 file changed, 6 insertions(+), 11 deletions(-) diff --git a/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/EQTLInteractionAnalyser.java b/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/EQTLInteractionAnalyser.java index f21d00705..40bd6e3e8 100644 --- a/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/EQTLInteractionAnalyser.java +++ b/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/EQTLInteractionAnalyser.java @@ -61,14 +61,10 @@ public class EQTLInteractionAnalyser { OptionBuilder.withLongOpt("maxcov"); OPTIONS.addOption(OptionBuilder.create("n")); - OptionBuilder.withArgName("boolean"); - OptionBuilder.hasArg(); OptionBuilder.withDescription("Interpret the z-score matrices"); OptionBuilder.withLongOpt("interpret"); OPTIONS.addOption(OptionBuilder.create("it")); - OptionBuilder.withArgName("boolean"); - OptionBuilder.hasArg(); OptionBuilder.withDescription("Find chi2sum differences for each covariate between 2 consequtive interaction runs"); OptionBuilder.withLongOpt("chi2sumDiff"); OPTIONS.addOption(OptionBuilder.create("dif")); @@ -100,7 +96,8 @@ public static void main(String[] args) throws IOException { String inputDir, outputDir, eqtlFile = null, annotationFile = null; final File snpsToSwapFile; int maxNumCovariatesToRegress = 20; - boolean interpret = false, chi2sumDiff = false; + final boolean interpret, chi2sumDiff; + String[] covariates = null; try { final CommandLine commandLine = new PosixParser().parse(OPTIONS, args, false); @@ -114,12 +111,10 @@ public static void main(String[] args) throws IOException { if (commandLine.hasOption('n')) { maxNumCovariatesToRegress = Integer.parseInt(commandLine.getOptionValue("n")); } - if (commandLine.hasOption("it")) { - interpret = Boolean.parseBoolean(commandLine.getOptionValue("t")); - } - if (commandLine.hasOption("dif")) { - chi2sumDiff = Boolean.parseBoolean(commandLine.getOptionValue("dif")); - } + + interpret = commandLine.hasOption("t"); + chi2sumDiff = commandLine.hasOption("dif"); + if (commandLine.hasOption('a')) { annotationFile = commandLine.getOptionValue("a"); } From 73254e114fc1b96df47873bc1fc42fa064a6d497 Mon Sep 17 00:00:00 2001 From: Patrick Deelen Date: Tue, 14 Jul 2015 09:59:52 +0200 Subject: [PATCH 079/143] Interactions --- .../TestEQTLDatasetForInteractions.java | 3 --- 1 file changed, 3 deletions(-) diff --git a/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/TestEQTLDatasetForInteractions.java b/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/TestEQTLDatasetForInteractions.java index f52b428f3..05f3049c1 100644 --- a/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/TestEQTLDatasetForInteractions.java +++ b/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/TestEQTLDatasetForInteractions.java @@ -390,9 +390,6 @@ public final String performInteractionAnalysis(String[] covsToCorrect, HashMap h } - System.exit(0); - - if (1 == 1) { if (1 == 1) { From 4871e15e0fa3ceabfc7e82a0e78c7986fdff2525 Mon Sep 17 00:00:00 2001 From: Patrick Deelen Date: Tue, 14 Jul 2015 10:12:28 +0200 Subject: [PATCH 080/143] Interactions --- .../TestEQTLDatasetForInteractions.java | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/TestEQTLDatasetForInteractions.java b/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/TestEQTLDatasetForInteractions.java index 05f3049c1..d1fb5dcf3 100644 --- a/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/TestEQTLDatasetForInteractions.java +++ b/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/TestEQTLDatasetForInteractions.java @@ -74,7 +74,8 @@ public TestEQTLDatasetForInteractions(String inputDir, String outputDir, String if (covariatesToCorrect != null) { primaryCovsToCorrect = covariatesToCorrect; } else { - primaryCovsToCorrect = new String[]{"gender", "GC", "MEDIAN_5PRIME_BIAS", "MEDIAN_3PRIME_BIAS", "LLdeep", "RS", "CODAM", "LLS"}; + //primaryCovsToCorrect = new String[]{"gender", "GC", "MEDIAN_5PRIME_BIAS", "MEDIAN_3PRIME_BIAS", "LLdeep", "RS", "CODAM", "LLS"}; + primaryCovsToCorrect = new String[]{"gender", "GC", "MEDIAN_5PRIME_BIAS", "MEDIAN_3PRIME_BIAS", "CEU", "GBR", "FIN", "TSI", "YRI"}; } @@ -545,8 +546,8 @@ public final String performInteractionAnalysis(String[] covsToCorrect, HashMap h double[] valsY = datasetExpression.rawData[snp]; regression.newSampleData(valsY, valsX); datasetExpression.rawData[snp] = regression.estimateResiduals(); + } } - } if (1 == 1) { From f533ab25bfe98fa93fb07dbd7f142ac34764f144 Mon Sep 17 00:00:00 2001 From: Patrick Deelen Date: Tue, 14 Jul 2015 10:59:28 +0200 Subject: [PATCH 081/143] Interactions --- .../EQTLInteractionAnalyser.java | 25 ++++++-- .../TestEQTLDatasetForInteractions.java | 58 +++++++++++++------ 2 files changed, 60 insertions(+), 23 deletions(-) diff --git a/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/EQTLInteractionAnalyser.java b/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/EQTLInteractionAnalyser.java index 40bd6e3e8..5210a40af 100644 --- a/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/EQTLInteractionAnalyser.java +++ b/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/EQTLInteractionAnalyser.java @@ -69,15 +69,21 @@ public class EQTLInteractionAnalyser { OptionBuilder.withLongOpt("chi2sumDiff"); OPTIONS.addOption(OptionBuilder.create("dif")); - OptionBuilder.withArgName("string"); - OptionBuilder.hasArg(); - OptionBuilder.withDescription("covariates to correct for before running the interaction analysis"); + OptionBuilder.withArgName("strings"); + OptionBuilder.hasArgs(); + OptionBuilder.withDescription("covariates to correct for using an interaction term before running the interaction analysis"); OptionBuilder.withLongOpt("cov"); OPTIONS.addOption(OptionBuilder.create("c")); + + OptionBuilder.withArgName("strings"); + OptionBuilder.hasArgs(); + OptionBuilder.withDescription("Covariates to correct for without interaction term before running the interaction analysis"); + OptionBuilder.withLongOpt("cov2"); + OPTIONS.addOption(OptionBuilder.create("c2")); OptionBuilder.withArgName("path"); OptionBuilder.hasArg(); - OptionBuilder.withDescription("File containing the covariates to correct for before running the interaction analysis. No header, each covariate on a separate line"); + OptionBuilder.withDescription("File containing the covariates to correct for using an interaction term before running the interaction analysis. No header, each covariate on a separate line"); OptionBuilder.withLongOpt("covFile"); OPTIONS.addOption(OptionBuilder.create("cf")); @@ -88,7 +94,7 @@ public class EQTLInteractionAnalyser { OPTIONS.addOption(OptionBuilder.create("sw")); } - public static void main(String[] args) throws IOException { + public static void main(String[] args) throws IOException, Exception { System.out.println("Starting interaction analysis"); System.out.println("Current date and time: " + DATE_TIME_FORMAT.format(currentDataTime)); System.out.println(); @@ -99,6 +105,7 @@ public static void main(String[] args) throws IOException { final boolean interpret, chi2sumDiff; String[] covariates = null; + final String[] covariates2; try { final CommandLine commandLine = new PosixParser().parse(OPTIONS, args, false); @@ -128,6 +135,12 @@ else if (commandLine.hasOption("c")){ covariates = commandLine.getOptionValues("c"); } + if (commandLine.hasOption("c2")){ + covariates2 = commandLine.getOptionValues("c2"); + } else { + covariates2 = new String[0]; + } + if (commandLine.hasOption("sw")){ snpsToSwapFile = new File(commandLine.getOptionValue("sw")); } else { @@ -152,7 +165,7 @@ else if (chi2sumDiff){ interactor.findChi2SumDifferences(maxNumCovariatesToRegress); } else { - new TestEQTLDatasetForInteractions(inputDir, outputDir, eqtlFile, maxNumCovariatesToRegress, annotationFile, covariates, snpsToSwapFile); + new TestEQTLDatasetForInteractions(inputDir, outputDir, eqtlFile, maxNumCovariatesToRegress, annotationFile, covariates, covariates2, snpsToSwapFile); } } diff --git a/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/TestEQTLDatasetForInteractions.java b/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/TestEQTLDatasetForInteractions.java index 05f3049c1..3aa915c1d 100644 --- a/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/TestEQTLDatasetForInteractions.java +++ b/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/TestEQTLDatasetForInteractions.java @@ -47,7 +47,7 @@ public TestEQTLDatasetForInteractions(String inputDir, String outputDir) throws //preprocessData(); } - public TestEQTLDatasetForInteractions(String inputDir, String outputDir, String eQTLfileName, int maxNumTopCovs, String annotationFile, String[] covariatesToCorrect, File snpsToSwapFile) throws IOException { + public TestEQTLDatasetForInteractions(String inputDir, String outputDir, String eQTLfileName, int maxNumTopCovs, String annotationFile, String[] covariatesToCorrect, String[] covariatesToCorrect2, File snpsToSwapFile) throws IOException, Exception { System.out.println("Input dir: " + inputDir); System.out.println("Output dir: " + outputDir); @@ -74,7 +74,7 @@ public TestEQTLDatasetForInteractions(String inputDir, String outputDir, String if (covariatesToCorrect != null) { primaryCovsToCorrect = covariatesToCorrect; } else { - primaryCovsToCorrect = new String[]{"gender", "GC", "MEDIAN_5PRIME_BIAS", "MEDIAN_3PRIME_BIAS", "LLdeep", "RS", "CODAM", "LLS"}; + primaryCovsToCorrect = new String[]{"gender", "GC", "MEDIAN_5PRIME_BIAS", "MEDIAN_3PRIME_BIAS"}; } @@ -88,7 +88,7 @@ public TestEQTLDatasetForInteractions(String inputDir, String outputDir, String String[] covsToCorrect = primaryCovsToCorrect; int cnt = 0; while (cnt < maxNumTopCovs) { - String topCov = performInteractionAnalysis(covsToCorrect, eqtlGenes, outputTopCovs, snpsToSwapFile); + String topCov = performInteractionAnalysis(covsToCorrect, covariatesToCorrect2, eqtlGenes, outputTopCovs, snpsToSwapFile); String[] covsToCorrectNew = new String[covsToCorrect.length + 1]; for (int c = 0; c < covsToCorrect.length; c++) { covsToCorrectNew[c] = covsToCorrect[c]; @@ -233,7 +233,7 @@ public void preprocessData() { } - public final String performInteractionAnalysis(String[] covsToCorrect, HashMap hashEQTLs, TextFile outputTopCovs, File snpsToSwapFile) throws IOException { + public final String performInteractionAnalysis(String[] covsToCorrect, String[] covsToCorrect2, HashMap hashEQTLs, TextFile outputTopCovs, File snpsToSwapFile) throws IOException, Exception { HashMap hashSamples = new HashMap(); @@ -297,8 +297,20 @@ public final String performInteractionAnalysis(String[] covsToCorrect, HashMap h System.out.println("Correcting gene expression data for cohort specific effects and top 25 components"); //String[] cohorts = {"LLDeep", "LLS", "RS", "CODAM"}; int nrCompsToCorrectFor = 25; - ExpressionDataset datasetCovariatesToCorrectFor = new ExpressionDataset(nrCompsToCorrectFor, datasetGenotypes.nrSamples); + ExpressionDataset datasetCovariatesToCorrectFor = new ExpressionDataset(covsToCorrect2.length + nrCompsToCorrectFor, datasetGenotypes.nrSamples); datasetCovariatesToCorrectFor.sampleNames = datasetGenotypes.sampleNames; + + for (int i = 0; i < covsToCorrect2.length; ++i) { + String cov = covsToCorrect2[i]; + Integer c = datasetCovariatesToCorrectFor.hashProbes.get(cov); + if (c == null) { + throw new Exception("Covariate not found: " + cov); + } + for (int s = 0; s < datasetGenotypes.nrSamples; s++) { + datasetCovariatesToCorrectFor.rawData[i][s] = datasetCovariates.rawData[c][s]; + } + } + // for (int p = 0; p < cohorts.length; p++) { // for (int s = 0; s < datasetGenotypes.nrSamples; s++) { // if (datasetGenotypes.sampleNames[s].startsWith(cohorts[p])) { @@ -309,7 +321,7 @@ public final String performInteractionAnalysis(String[] covsToCorrect, HashMap h if (nrCompsToCorrectFor > 0) { for (int comp = 0; comp < nrCompsToCorrectFor; comp++) { for (int s = 0; s < datasetGenotypes.nrSamples; s++) { - datasetCovariatesToCorrectFor.rawData[comp][s] = datasetCovariates.rawData[datasetCovariates.nrProbes - 51 + comp][s]; + datasetCovariatesToCorrectFor.rawData[covsToCorrect2.length + comp][s] = datasetCovariates.rawData[datasetCovariates.nrProbes - 51 + comp][s]; } } } @@ -337,14 +349,14 @@ public final String performInteractionAnalysis(String[] covsToCorrect, HashMap h - + //double[] mainEQTLCorr = new double[datasetGenotypes.nrProbes]; if (snpsToSwapFile != null) { System.out.println("Enforcing for every eQTL that the genotype dosage is swapped based on: " + snpsToSwapFile.getAbsolutePath()); - + HashSet snpsToSwap = new HashSet(); BufferedReader reader = new BufferedReader(new InputStreamReader(new FileInputStream(snpsToSwapFile), "UTF-8")); String line; @@ -352,24 +364,24 @@ public final String performInteractionAnalysis(String[] covsToCorrect, HashMap h snpsToSwap.add(line); } reader.close(); - + for (int snp = 0; snp < datasetGenotypes.nrProbes; snp++) { - + if (snpsToSwap.contains(datasetGenotypes.probeNames[snp])) { - + for (int s = 0; s < datasetGenotypes.nrSamples; s++) { datasetGenotypes.rawData[snp][s] = 2 - datasetGenotypes.rawData[snp][s]; } - + } //mainEQTLCorr[snp] = corr; } - - + + } else { System.out.println("Enforcing for every eQTL that the genotype dosage positively correlated with gene expression levels:"); - + Writer writer = new BufferedWriter(new FileWriter(outputDir + "/swappedDosages.txt")); for (int snp = 0; snp < datasetGenotypes.nrProbes; snp++) { double corr = JSci.maths.ArrayMath.correlation(datasetGenotypes.rawData[snp], datasetExpression.rawData[snp]); @@ -395,8 +407,20 @@ public final String performInteractionAnalysis(String[] covsToCorrect, HashMap h if (1 == 1) { System.out.println("Correcting covariate data for cohort specific effects:"); // String[] cohorts = {"LLDeep","LLS","RS","CODAM"}; - ExpressionDataset datasetCovariatesToCorrectFor = new ExpressionDataset(covsToCorrect.length, datasetGenotypes.nrSamples); + ExpressionDataset datasetCovariatesToCorrectFor = new ExpressionDataset(covsToCorrect2.length + covsToCorrect.length, datasetGenotypes.nrSamples); datasetCovariatesToCorrectFor.sampleNames = datasetGenotypes.sampleNames; + + for (int i = 0; i < covsToCorrect2.length; ++i) { + String cov = covsToCorrect2[i]; + Integer c = datasetCovariatesToCorrectFor.hashProbes.get(cov); + if (c == null) { + throw new Exception("Covariate not found: " + cov); + } + for (int s = 0; s < datasetGenotypes.nrSamples; s++) { + datasetCovariatesToCorrectFor.rawData[i][s] = datasetCovariates.rawData[c][s]; + } + } + // for (int p=0; p Date: Tue, 14 Jul 2015 12:01:19 +0300 Subject: [PATCH 082/143] added an option for permutation --- .../EQTLInteractionAnalyser.java | 13 +++++++++++-- .../TestEQTLDatasetForInteractions.java | 8 ++++---- 2 files changed, 15 insertions(+), 6 deletions(-) diff --git a/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/EQTLInteractionAnalyser.java b/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/EQTLInteractionAnalyser.java index c5180fe75..ea73b0268 100644 --- a/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/EQTLInteractionAnalyser.java +++ b/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/EQTLInteractionAnalyser.java @@ -72,6 +72,12 @@ public class EQTLInteractionAnalyser { OptionBuilder.withLongOpt("chi2sumDiff"); OPTIONS.addOption(OptionBuilder.create("dif")); + OptionBuilder.withArgName("boolean"); + OptionBuilder.hasArg(); + OptionBuilder.withDescription("Run interaction analysis on permuted genotype data"); + OptionBuilder.withLongOpt("perm"); + OPTIONS.addOption(OptionBuilder.create("perm")); + OptionBuilder.withArgName("string"); OptionBuilder.hasArg(); OptionBuilder.withDescription("covariates to correct for before running the interaction analysis"); @@ -92,7 +98,7 @@ public static void main(String[] args) throws IOException { String inputDir, outputDir, eqtlFile = null, annotationFile = null; int maxNumCovariatesToRegress = 20; - boolean interpret = false, chi2sumDiff = false; + boolean interpret = false, chi2sumDiff = false, permute = false; String[] covariates = null; try { final CommandLine commandLine = new PosixParser().parse(OPTIONS, args, false); @@ -112,6 +118,9 @@ public static void main(String[] args) throws IOException { if (commandLine.hasOption("dif")) { chi2sumDiff = Boolean.parseBoolean(commandLine.getOptionValue("dif")); } + if (commandLine.hasOption("perm")) { + permute = Boolean.parseBoolean(commandLine.getOptionValue("perm")); + } if (commandLine.hasOption('a')) { annotationFile = commandLine.getOptionValue("a"); } @@ -143,7 +152,7 @@ else if (chi2sumDiff){ interactor.findChi2SumDifferences(maxNumCovariatesToRegress); } else { - new TestEQTLDatasetForInteractions(inputDir, outputDir, eqtlFile, maxNumCovariatesToRegress, annotationFile, covariates); + new TestEQTLDatasetForInteractions(inputDir, outputDir, eqtlFile, maxNumCovariatesToRegress, annotationFile, covariates, permute); } } diff --git a/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/TestEQTLDatasetForInteractions.java b/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/TestEQTLDatasetForInteractions.java index b32f7636e..6fa44d42b 100644 --- a/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/TestEQTLDatasetForInteractions.java +++ b/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/TestEQTLDatasetForInteractions.java @@ -42,7 +42,7 @@ public TestEQTLDatasetForInteractions(String inputDir, String outputDir) throws //preprocessData(); } - public TestEQTLDatasetForInteractions(String inputDir, String outputDir, String eQTLfileName, int maxNumTopCovs, String annotationFile, String[] covariatesToCorrect) throws IOException { + public TestEQTLDatasetForInteractions(String inputDir, String outputDir, String eQTLfileName, int maxNumTopCovs, String annotationFile, String[] covariatesToCorrect, boolean permute) throws IOException { System.out.println("Input dir: " + inputDir); System.out.println("Output dir: " + outputDir); @@ -84,7 +84,7 @@ public TestEQTLDatasetForInteractions(String inputDir, String outputDir, String String[] covsToCorrect = primaryCovsToCorrect; int cnt = 0; while (cnt < maxNumTopCovs) { - String topCov = performInteractionAnalysis(covsToCorrect, eqtlGenes, outputTopCovs); + String topCov = performInteractionAnalysis(covsToCorrect, eqtlGenes, outputTopCovs, permute); String[] covsToCorrectNew = new String[covsToCorrect.length + 1]; for (int c = 0; c < covsToCorrect.length; c++) { covsToCorrectNew[c] = covsToCorrect[c]; @@ -229,7 +229,7 @@ public void preprocessData() { } - public final String performInteractionAnalysis(String[] covsToCorrect, HashMap hashEQTLs, TextFile outputTopCovs) throws IOException { + public final String performInteractionAnalysis(String[] covsToCorrect, HashMap hashEQTLs, TextFile outputTopCovs, boolean permute) throws IOException { HashMap hashSamples = new HashMap(); @@ -536,7 +536,7 @@ public final String performInteractionAnalysis(String[] covsToCorrect, HashMap h } - if (1 == 2) { + if (permute) { System.out.println("WARNING: PERMUTING GENOTYPE DATA!!!!"); String[] cohorts = {"LLDeep", "LLS", "RS", "CODAM"}; int[] permSampleIDs = new int[datasetGenotypes.nrSamples]; From 3e5464314ddb7e69e3a028d03fb940ccf3f63bc6 Mon Sep 17 00:00:00 2001 From: Patrick Deelen Date: Tue, 14 Jul 2015 11:07:45 +0200 Subject: [PATCH 083/143] Interaction --- .../eqtlinteractionanalyser/TestEQTLDatasetForInteractions.java | 2 ++ 1 file changed, 2 insertions(+) diff --git a/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/TestEQTLDatasetForInteractions.java b/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/TestEQTLDatasetForInteractions.java index 3aa915c1d..98bd00819 100644 --- a/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/TestEQTLDatasetForInteractions.java +++ b/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/TestEQTLDatasetForInteractions.java @@ -53,6 +53,8 @@ public TestEQTLDatasetForInteractions(String inputDir, String outputDir, String System.out.println("Output dir: " + outputDir); System.out.println("eQTL file: " + eQTLfileName); System.out.println("Maximum number of covariates to regress out: " + maxNumTopCovs); + System.out.println("Covariates to correct for with interaction: " + covariatesToCorrect.toString()); + System.out.println("Covariates to correct for without interaction: " + covariatesToCorrect2.toString()); this.inputDir = inputDir; this.outputDir = outputDir; From 321bc2370c5c3d5a99add328a5ef918b6f581594 Mon Sep 17 00:00:00 2001 From: Patrick Deelen Date: Tue, 14 Jul 2015 11:11:09 +0200 Subject: [PATCH 084/143] interactions --- .../eqtlinteractionanalyser/EQTLInteractionAnalyser.java | 6 ++++-- .../TestEQTLDatasetForInteractions.java | 7 ------- 2 files changed, 4 insertions(+), 9 deletions(-) diff --git a/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/EQTLInteractionAnalyser.java b/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/EQTLInteractionAnalyser.java index 5210a40af..a4794f818 100644 --- a/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/EQTLInteractionAnalyser.java +++ b/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/EQTLInteractionAnalyser.java @@ -104,7 +104,7 @@ public static void main(String[] args) throws IOException, Exception { int maxNumCovariatesToRegress = 20; final boolean interpret, chi2sumDiff; - String[] covariates = null; + final String[] covariates; final String[] covariates2; try { final CommandLine commandLine = new PosixParser().parse(OPTIONS, args, false); @@ -133,7 +133,9 @@ public static void main(String[] args) throws IOException, Exception { } else if (commandLine.hasOption("c")){ covariates = commandLine.getOptionValues("c"); - } + } else { + covariates = new String[]{"gender", "GC", "MEDIAN_5PRIME_BIAS", "MEDIAN_3PRIME_BIAS"}; + } if (commandLine.hasOption("c2")){ covariates2 = commandLine.getOptionValues("c2"); diff --git a/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/TestEQTLDatasetForInteractions.java b/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/TestEQTLDatasetForInteractions.java index 98bd00819..38503f585 100644 --- a/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/TestEQTLDatasetForInteractions.java +++ b/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/TestEQTLDatasetForInteractions.java @@ -73,13 +73,6 @@ public TestEQTLDatasetForInteractions(String inputDir, String outputDir, String TextFile outputTopCovs = new TextFile(outputDir + "/outputTopCovariates.txt", true); - if (covariatesToCorrect != null) { - primaryCovsToCorrect = covariatesToCorrect; - } else { - primaryCovsToCorrect = new String[]{"gender", "GC", "MEDIAN_5PRIME_BIAS", "MEDIAN_3PRIME_BIAS"}; - } - - System.out.print("\nPrimary covariates to correct for before running interaction analysis: "); for (String cov : primaryCovsToCorrect) { System.out.print("\n\t" + cov); From 1ba6b26324f876477f2642f8853ae148cdb613e6 Mon Sep 17 00:00:00 2001 From: Patrick Deelen Date: Tue, 14 Jul 2015 11:11:56 +0200 Subject: [PATCH 085/143] Merge --- .../TestEQTLDatasetForInteractions.java | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/TestEQTLDatasetForInteractions.java b/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/TestEQTLDatasetForInteractions.java index 38503f585..6f3b3422c 100644 --- a/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/TestEQTLDatasetForInteractions.java +++ b/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/TestEQTLDatasetForInteractions.java @@ -72,7 +72,6 @@ public TestEQTLDatasetForInteractions(String inputDir, String outputDir, String TextFile outputTopCovs = new TextFile(outputDir + "/outputTopCovariates.txt", true); - System.out.print("\nPrimary covariates to correct for before running interaction analysis: "); for (String cov : primaryCovsToCorrect) { System.out.print("\n\t" + cov); @@ -564,8 +563,8 @@ public final String performInteractionAnalysis(String[] covsToCorrect, String[] double[] valsY = datasetExpression.rawData[snp]; regression.newSampleData(valsY, valsX); datasetExpression.rawData[snp] = regression.estimateResiduals(); + } } - } if (1 == 1) { From f61fc2842accd134388349a1dfec3b2f02f4d522 Mon Sep 17 00:00:00 2001 From: Patrick Deelen Date: Tue, 14 Jul 2015 11:16:10 +0200 Subject: [PATCH 086/143] int --- .../eqtlinteractionanalyser/TestEQTLDatasetForInteractions.java | 1 + 1 file changed, 1 insertion(+) diff --git a/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/TestEQTLDatasetForInteractions.java b/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/TestEQTLDatasetForInteractions.java index 6f3b3422c..8e814fd45 100644 --- a/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/TestEQTLDatasetForInteractions.java +++ b/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/TestEQTLDatasetForInteractions.java @@ -78,6 +78,7 @@ public TestEQTLDatasetForInteractions(String inputDir, String outputDir, String } System.out.println(); + primaryCovsToCorrect = covariatesToCorrect; String[] covsToCorrect = primaryCovsToCorrect; int cnt = 0; From 27fd073c70647100b36e22acf3ba731f20f08de2 Mon Sep 17 00:00:00 2001 From: Patrick Deelen Date: Tue, 14 Jul 2015 12:14:26 +0200 Subject: [PATCH 087/143] Interactions --- .../TestEQTLDatasetForInteractions.java | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/TestEQTLDatasetForInteractions.java b/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/TestEQTLDatasetForInteractions.java index 8e814fd45..16a7a2a88 100644 --- a/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/TestEQTLDatasetForInteractions.java +++ b/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/TestEQTLDatasetForInteractions.java @@ -24,6 +24,7 @@ import java.util.logging.Level; import java.util.logging.Logger; import org.apache.commons.math3.stat.ranking.NaturalRanking; +import org.apache.mahout.math.Arrays; import umcg.genetica.genomicboundaries.GenomicBoundary; import umcg.genetica.io.Gpio; import umcg.genetica.io.text.TextFile; @@ -53,11 +54,12 @@ public TestEQTLDatasetForInteractions(String inputDir, String outputDir, String System.out.println("Output dir: " + outputDir); System.out.println("eQTL file: " + eQTLfileName); System.out.println("Maximum number of covariates to regress out: " + maxNumTopCovs); - System.out.println("Covariates to correct for with interaction: " + covariatesToCorrect.toString()); - System.out.println("Covariates to correct for without interaction: " + covariatesToCorrect2.toString()); + System.out.println("Covariates to correct for with interaction: " + Arrays.toString(covariatesToCorrect)); + System.out.println("Covariates to correct for without interaction: " + Arrays.toString(covariatesToCorrect2)); this.inputDir = inputDir; this.outputDir = outputDir; + primaryCovsToCorrect = covariatesToCorrect; if (!Gpio.exists(outputDir)) { Gpio.createDir(outputDir); } @@ -78,7 +80,7 @@ public TestEQTLDatasetForInteractions(String inputDir, String outputDir, String } System.out.println(); - primaryCovsToCorrect = covariatesToCorrect; + String[] covsToCorrect = primaryCovsToCorrect; int cnt = 0; @@ -297,7 +299,7 @@ public final String performInteractionAnalysis(String[] covsToCorrect, String[] for (int i = 0; i < covsToCorrect2.length; ++i) { String cov = covsToCorrect2[i]; - Integer c = datasetCovariatesToCorrectFor.hashProbes.get(cov); + Integer c = datasetCovariates.hashProbes.get(cov); if (c == null) { throw new Exception("Covariate not found: " + cov); } @@ -407,7 +409,7 @@ public final String performInteractionAnalysis(String[] covsToCorrect, String[] for (int i = 0; i < covsToCorrect2.length; ++i) { String cov = covsToCorrect2[i]; - Integer c = datasetCovariatesToCorrectFor.hashProbes.get(cov); + Integer c = datasetCovariates.hashProbes.get(cov); if (c == null) { throw new Exception("Covariate not found: " + cov); } From 1c3d66fea03c7de98f8eac78e2a1b28054f132d6 Mon Sep 17 00:00:00 2001 From: Dasha Zhernakova Date: Tue, 14 Jul 2015 23:04:16 +0300 Subject: [PATCH 088/143] added an option for running permutation --- .../EQTLInteractionAnalyser.java | 10 +++++++--- .../TestEQTLDatasetForInteractions.java | 8 ++++---- 2 files changed, 11 insertions(+), 7 deletions(-) diff --git a/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/EQTLInteractionAnalyser.java b/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/EQTLInteractionAnalyser.java index 59d0e6528..619877ee6 100644 --- a/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/EQTLInteractionAnalyser.java +++ b/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/EQTLInteractionAnalyser.java @@ -65,6 +65,10 @@ public class EQTLInteractionAnalyser { OptionBuilder.withLongOpt("interpret"); OPTIONS.addOption(OptionBuilder.create("it")); + OptionBuilder.withDescription("Run permutation"); + OptionBuilder.withLongOpt("permute"); + OPTIONS.addOption(OptionBuilder.create("perm")); + OptionBuilder.withDescription("Find chi2sum differences for each covariate between 2 consequtive interaction runs"); OptionBuilder.withLongOpt("chi2sumDiff"); OPTIONS.addOption(OptionBuilder.create("dif")); @@ -102,7 +106,7 @@ public static void main(String[] args) throws IOException, Exception { String inputDir, outputDir, eqtlFile = null, annotationFile = null; final File snpsToSwapFile; int maxNumCovariatesToRegress = 20; - final boolean interpret, chi2sumDiff; + final boolean interpret, chi2sumDiff, permute; final String[] covariates; final String[] covariates2; @@ -121,6 +125,7 @@ public static void main(String[] args) throws IOException, Exception { interpret = commandLine.hasOption("t"); chi2sumDiff = commandLine.hasOption("dif"); + permute = commandLine.hasOption("perm"); if (commandLine.hasOption('a')) { annotationFile = commandLine.getOptionValue("a"); @@ -167,8 +172,7 @@ else if (chi2sumDiff){ interactor.findChi2SumDifferences(maxNumCovariatesToRegress); } else { - - new TestEQTLDatasetForInteractions(inputDir, outputDir, eqtlFile, maxNumCovariatesToRegress, annotationFile, covariates, covariates2, snpsToSwapFile); + new TestEQTLDatasetForInteractions(inputDir, outputDir, eqtlFile, maxNumCovariatesToRegress, annotationFile, covariates, covariates2, snpsToSwapFile, permute); } } diff --git a/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/TestEQTLDatasetForInteractions.java b/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/TestEQTLDatasetForInteractions.java index 25594ead7..df0e95c31 100644 --- a/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/TestEQTLDatasetForInteractions.java +++ b/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/TestEQTLDatasetForInteractions.java @@ -49,7 +49,7 @@ public TestEQTLDatasetForInteractions(String inputDir, String outputDir) throws //preprocessData(); } - public TestEQTLDatasetForInteractions(String inputDir, String outputDir, String eQTLfileName, int maxNumTopCovs, String annotationFile, String[] covariatesToCorrect, String[] covariatesToCorrect2, File snpsToSwapFile) throws IOException, Exception { + public TestEQTLDatasetForInteractions(String inputDir, String outputDir, String eQTLfileName, int maxNumTopCovs, String annotationFile, String[] covariatesToCorrect, String[] covariatesToCorrect2, File snpsToSwapFile, boolean permute) throws IOException, Exception { System.out.println("Input dir: " + inputDir); System.out.println("Output dir: " + outputDir); @@ -86,7 +86,7 @@ public TestEQTLDatasetForInteractions(String inputDir, String outputDir, String String[] covsToCorrect = primaryCovsToCorrect; int cnt = 0; while (cnt < maxNumTopCovs) { - String topCov = performInteractionAnalysis(covsToCorrect, covariatesToCorrect2, eqtlGenes, outputTopCovs, snpsToSwapFile); + String topCov = performInteractionAnalysis(covsToCorrect, covariatesToCorrect2, eqtlGenes, outputTopCovs, snpsToSwapFile, permute); String[] covsToCorrectNew = new String[covsToCorrect.length + 1]; for (int c = 0; c < covsToCorrect.length; c++) { covsToCorrectNew[c] = covsToCorrect[c]; @@ -231,7 +231,7 @@ public void preprocessData() { } - public final String performInteractionAnalysis(String[] covsToCorrect, String[] covsToCorrect2, HashMap hashEQTLs, TextFile outputTopCovs, File snpsToSwapFile) throws IOException, Exception { + public final String performInteractionAnalysis(String[] covsToCorrect, String[] covsToCorrect2, HashMap hashEQTLs, TextFile outputTopCovs, File snpsToSwapFile, boolean permute) throws IOException, Exception { HashMap hashSamples = new HashMap(); @@ -598,7 +598,7 @@ public final String performInteractionAnalysis(String[] covsToCorrect, String[] } - if (1 == 2) { + if (permute) { System.out.println("WARNING: PERMUTING GENOTYPE DATA!!!!"); String[] cohorts = {"LLDeep", "LLS", "RS", "CODAM"}; int[] permSampleIDs = new int[datasetGenotypes.nrSamples]; From 472d430c3d72790f344f2621985d69bf5cadf974 Mon Sep 17 00:00:00 2001 From: Patrick Deelen Date: Wed, 15 Jul 2015 10:12:16 +0200 Subject: [PATCH 089/143] Interactions --- .../TestEQTLDatasetForInteractions.java | 32 +++++++++++-------- 1 file changed, 18 insertions(+), 14 deletions(-) diff --git a/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/TestEQTLDatasetForInteractions.java b/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/TestEQTLDatasetForInteractions.java index 8e814fd45..68e1ae70b 100644 --- a/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/TestEQTLDatasetForInteractions.java +++ b/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/TestEQTLDatasetForInteractions.java @@ -24,6 +24,7 @@ import java.util.logging.Level; import java.util.logging.Logger; import org.apache.commons.math3.stat.ranking.NaturalRanking; +import org.apache.commons.math3.stat.regression.OLSMultipleLinearRegression; import umcg.genetica.genomicboundaries.GenomicBoundary; import umcg.genetica.io.Gpio; import umcg.genetica.io.text.TextFile; @@ -464,19 +465,7 @@ public final String performInteractionAnalysis(String[] covsToCorrect, String[] } - - if (1 == 1) { - System.out.println("Correcting covariate data for cis-eQTL effects:"); - for (int p = 0; p < datasetCovariates.nrProbes; p++) { - if (datasetExpression.hashProbes.containsKey(datasetCovariates.probeNames[p])) { - int index = ((Integer) datasetExpression.hashProbes.get(datasetCovariates.probeNames[p])).intValue(); - double[] rc = getLinearRegressionCoefficients(datasetGenotypes.rawData[index], datasetCovariates.rawData[p]); - for (int s = 0; s < datasetGenotypes.nrSamples; s++) { - datasetCovariates.rawData[p][s] -= rc[0] * datasetGenotypes.rawData[index][s]; - } - } - } - } + correctCovariatesForQtls(datasetCovariates, datasetExpression, datasetGenotypes); if (1 == 2) { datasetCovariates.save(inputDir + "/CovariatesCorrected.txt"); @@ -564,8 +553,8 @@ public final String performInteractionAnalysis(String[] covsToCorrect, String[] double[] valsY = datasetExpression.rawData[snp]; regression.newSampleData(valsY, valsX); datasetExpression.rawData[snp] = regression.estimateResiduals(); - } } + } if (1 == 1) { @@ -996,4 +985,19 @@ static public double[] getEigenVectorSVD(Jama.SingularValueDecomposition svd, do } return eigenVector; } + + private void correctCovariatesForQtls(ExpressionDataset datasetCovariates, ExpressionDataset datasetExpression, ExpressionDataset datasetGenotypes) { + + System.out.println("Correcting covariate data for cis-eQTL effects:"); + for (int p = 0; p < datasetCovariates.nrProbes; p++) { + if (datasetExpression.hashProbes.containsKey(datasetCovariates.probeNames[p])) { + int index = ((Integer) datasetExpression.hashProbes.get(datasetCovariates.probeNames[p])).intValue(); + double[] rc = getLinearRegressionCoefficients(datasetGenotypes.rawData[index], datasetCovariates.rawData[p]); + for (int s = 0; s < datasetGenotypes.nrSamples; s++) { + datasetCovariates.rawData[p][s] -= rc[0] * datasetGenotypes.rawData[index][s]; + } + } + } + + } } \ No newline at end of file From 30540f54f75c2281d46f57ef11563c568adf6262 Mon Sep 17 00:00:00 2001 From: Patrick Deelen Date: Wed, 15 Jul 2015 10:13:39 +0200 Subject: [PATCH 090/143] Merge --- .../TestEQTLDatasetForInteractions.java | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/TestEQTLDatasetForInteractions.java b/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/TestEQTLDatasetForInteractions.java index 68e1ae70b..741fe590c 100644 --- a/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/TestEQTLDatasetForInteractions.java +++ b/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/TestEQTLDatasetForInteractions.java @@ -14,6 +14,7 @@ import java.io.InputStreamReader; import java.io.Writer; import java.util.ArrayList; +import java.util.Arrays; import java.util.HashMap; import java.util.HashSet; import java.util.Vector; @@ -24,7 +25,6 @@ import java.util.logging.Level; import java.util.logging.Logger; import org.apache.commons.math3.stat.ranking.NaturalRanking; -import org.apache.commons.math3.stat.regression.OLSMultipleLinearRegression; import umcg.genetica.genomicboundaries.GenomicBoundary; import umcg.genetica.io.Gpio; import umcg.genetica.io.text.TextFile; @@ -54,11 +54,12 @@ public TestEQTLDatasetForInteractions(String inputDir, String outputDir, String System.out.println("Output dir: " + outputDir); System.out.println("eQTL file: " + eQTLfileName); System.out.println("Maximum number of covariates to regress out: " + maxNumTopCovs); - System.out.println("Covariates to correct for with interaction: " + covariatesToCorrect.toString()); - System.out.println("Covariates to correct for without interaction: " + covariatesToCorrect2.toString()); + System.out.println("Covariates to correct for with interaction: " + Arrays.toString(covariatesToCorrect)); + System.out.println("Covariates to correct for without interaction: " + Arrays.toString(covariatesToCorrect2)); this.inputDir = inputDir; this.outputDir = outputDir; + primaryCovsToCorrect = covariatesToCorrect; if (!Gpio.exists(outputDir)) { Gpio.createDir(outputDir); } @@ -79,7 +80,7 @@ public TestEQTLDatasetForInteractions(String inputDir, String outputDir, String } System.out.println(); - primaryCovsToCorrect = covariatesToCorrect; + String[] covsToCorrect = primaryCovsToCorrect; int cnt = 0; @@ -298,7 +299,7 @@ public final String performInteractionAnalysis(String[] covsToCorrect, String[] for (int i = 0; i < covsToCorrect2.length; ++i) { String cov = covsToCorrect2[i]; - Integer c = datasetCovariatesToCorrectFor.hashProbes.get(cov); + Integer c = datasetCovariates.hashProbes.get(cov); if (c == null) { throw new Exception("Covariate not found: " + cov); } @@ -408,7 +409,7 @@ public final String performInteractionAnalysis(String[] covsToCorrect, String[] for (int i = 0; i < covsToCorrect2.length; ++i) { String cov = covsToCorrect2[i]; - Integer c = datasetCovariatesToCorrectFor.hashProbes.get(cov); + Integer c = datasetCovariates.hashProbes.get(cov); if (c == null) { throw new Exception("Covariate not found: " + cov); } From 10b5584b3a43d8f2df7ed81de36a5c8de25906f1 Mon Sep 17 00:00:00 2001 From: Patrick Deelen Date: Wed, 15 Jul 2015 10:14:30 +0200 Subject: [PATCH 091/143] Merge --- .../eqtlinteractionanalyser/TestEQTLDatasetForInteractions.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/TestEQTLDatasetForInteractions.java b/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/TestEQTLDatasetForInteractions.java index 741fe590c..6851abf27 100644 --- a/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/TestEQTLDatasetForInteractions.java +++ b/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/TestEQTLDatasetForInteractions.java @@ -14,7 +14,6 @@ import java.io.InputStreamReader; import java.io.Writer; import java.util.ArrayList; -import java.util.Arrays; import java.util.HashMap; import java.util.HashSet; import java.util.Vector; @@ -25,6 +24,7 @@ import java.util.logging.Level; import java.util.logging.Logger; import org.apache.commons.math3.stat.ranking.NaturalRanking; +import org.apache.mahout.math.Arrays; import umcg.genetica.genomicboundaries.GenomicBoundary; import umcg.genetica.io.Gpio; import umcg.genetica.io.text.TextFile; From aacb2f0a2ca5c343a6528b5f99f622b0be2bb4cf Mon Sep 17 00:00:00 2001 From: Patrick Deelen Date: Wed, 15 Jul 2015 10:49:35 +0200 Subject: [PATCH 092/143] Clean up code --- .../TestEQTLDatasetForInteractions.java | 653 +++++++++--------- 1 file changed, 337 insertions(+), 316 deletions(-) diff --git a/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/TestEQTLDatasetForInteractions.java b/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/TestEQTLDatasetForInteractions.java index 6f358dcaa..c7ac458b8 100644 --- a/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/TestEQTLDatasetForInteractions.java +++ b/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/TestEQTLDatasetForInteractions.java @@ -23,7 +23,9 @@ import java.util.concurrent.Executors; import java.util.logging.Level; import java.util.logging.Logger; +import org.apache.commons.math3.exception.MathIllegalArgumentException; import org.apache.commons.math3.stat.ranking.NaturalRanking; +import org.apache.commons.math3.stat.regression.OLSMultipleLinearRegression; import org.apache.mahout.math.Arrays; import umcg.genetica.genomicboundaries.GenomicBoundary; import umcg.genetica.io.Gpio; @@ -235,52 +237,8 @@ public final String performInteractionAnalysis(String[] covsToCorrect, String[] HashMap hashSamples = new HashMap(); - if (1 == 1) { - - System.out.println("Removing outlier samples!!!"); - HashMap hashCovariates = new HashMap(); - hashCovariates.put("MEDIAN_5PRIME_BIAS", null); - hashCovariates.put("MEDIAN_3PRIME_BIAS", null); - ExpressionDataset datasetCovariates = new ExpressionDataset(inputDir + "/covariateTableLude.txt.Covariates.binary", "\t", hashCovariates, null); - hashSamples = new HashMap(); - for (int s = 0; s < datasetCovariates.nrSamples; s++) { - if (datasetCovariates.rawData[0][s] != 0) { - hashSamples.put(datasetCovariates.sampleNames[s], null); - } - } - datasetCovariates = new ExpressionDataset(inputDir + "/covariateTableLude.txt.Covariates.binary", "\t", hashCovariates, hashSamples); - HashMap hashSamplesToExclude = new HashMap(); - if (1 == 1) { - int index = ((Integer) datasetCovariates.hashProbes.get("MEDIAN_5PRIME_BIAS")).intValue(); - double mean = JSci.maths.ArrayMath.mean(datasetCovariates.rawData[index]); - double stdev = JSci.maths.ArrayMath.standardDeviation(datasetCovariates.rawData[index]); - for (int s = 0; s < datasetCovariates.nrSamples; s++) { - double z = (datasetCovariates.rawData[index][s] - mean) / stdev; - if (Math.abs(z) > 3) { - hashSamplesToExclude.put(datasetCovariates.sampleNames[s], null); - } - } - } - if (1 == 1) { - int index = ((Integer) datasetCovariates.hashProbes.get("MEDIAN_3PRIME_BIAS")).intValue(); - double mean = JSci.maths.ArrayMath.mean(datasetCovariates.rawData[index]); - double stdev = JSci.maths.ArrayMath.standardDeviation(datasetCovariates.rawData[index]); - for (int s = 0; s < datasetCovariates.nrSamples; s++) { - double z = (datasetCovariates.rawData[index][s] - mean) / stdev; - if (Math.abs(z) > 3) { - hashSamplesToExclude.put(datasetCovariates.sampleNames[s], null); - } - } - } - hashSamples = new HashMap(); - for (int s = 0; s < datasetCovariates.nrSamples; s++) { - if (!hashSamplesToExclude.containsKey(datasetCovariates.sampleNames[s])) { - hashSamples.put(datasetCovariates.sampleNames[s], null); - hashSamples.put(datasetCovariates.sampleNames[s] + "_exp", null); - hashSamples.put(datasetCovariates.sampleNames[s] + "_dosage", null); - } - } - } + hashSamples = excludeOutliers(hashSamples); + ExpressionDataset datasetGenotypes = new ExpressionDataset(inputDir + "/bigTableLude.txt.Genotypes.binary", "\t", hashEQTLs, hashSamples); ExpressionDataset datasetExpression = new ExpressionDataset(inputDir + "/bigTableLude.txt.Expression.binary", "\t", hashEQTLs, hashSamples); @@ -290,238 +248,24 @@ public final String performInteractionAnalysis(String[] covsToCorrect, String[] int nrSamples = datasetGenotypes.nrSamples; - if (1 == 1) { - //Define a set of covariates that we want to use as correction: - System.out.println("Correcting gene expression data for cohort specific effects and top 25 components"); - //String[] cohorts = {"LLDeep", "LLS", "RS", "CODAM"}; - int nrCompsToCorrectFor = 25; - ExpressionDataset datasetCovariatesToCorrectFor = new ExpressionDataset(covsToCorrect2.length + nrCompsToCorrectFor, datasetGenotypes.nrSamples); - datasetCovariatesToCorrectFor.sampleNames = datasetGenotypes.sampleNames; - - for (int i = 0; i < covsToCorrect2.length; ++i) { - String cov = covsToCorrect2[i]; - Integer c = datasetCovariates.hashProbes.get(cov); - if (c == null) { - throw new Exception("Covariate not found: " + cov); - } - for (int s = 0; s < datasetGenotypes.nrSamples; s++) { - datasetCovariatesToCorrectFor.rawData[i][s] = datasetCovariates.rawData[c][s]; - } - } - -// for (int p = 0; p < cohorts.length; p++) { -// for (int s = 0; s < datasetGenotypes.nrSamples; s++) { -// if (datasetGenotypes.sampleNames[s].startsWith(cohorts[p])) { -// datasetCovariatesToCorrectFor.rawData[p][s] = 1; -// } -// } -// } - if (nrCompsToCorrectFor > 0) { - for (int comp = 0; comp < nrCompsToCorrectFor; comp++) { - for (int s = 0; s < datasetGenotypes.nrSamples; s++) { - datasetCovariatesToCorrectFor.rawData[covsToCorrect2.length + comp][s] = datasetCovariates.rawData[datasetCovariates.nrProbes - 51 + comp][s]; - } - } - } - - datasetCovariatesToCorrectFor.transposeDataset(); - - datasetCovariatesToCorrectFor.save(inputDir + "/CovariatesToCorrectFor.txt"); - orthogonalizeDataset(inputDir + "/CovariatesToCorrectFor.txt"); - datasetCovariatesToCorrectFor = new ExpressionDataset(inputDir + "/CovariatesToCorrectFor.txt.PrincipalComponents.txt"); - datasetCovariatesToCorrectFor.transposeDataset(); - ExpressionDataset datasetCovariatesToCorrectForEigenvalues = new ExpressionDataset(inputDir + "/CovariatesToCorrectFor.txt.Eigenvalues.txt"); - for (int snp = 0; snp < datasetExpression.nrProbes; snp++) { - for (int cov = 0; cov < datasetCovariatesToCorrectFor.nrProbes; cov++) { - if (datasetCovariatesToCorrectForEigenvalues.rawData[cov][0] > 1E-5) { - double[] rc = getLinearRegressionCoefficients(datasetCovariatesToCorrectFor.rawData[cov], datasetExpression.rawData[snp]); - for (int s = 0; s < datasetGenotypes.nrSamples; s++) { - datasetExpression.rawData[snp][s] -= rc[0] * datasetCovariatesToCorrectFor.rawData[cov][s]; - } - } - } - } - - - } - - - - - - //double[] mainEQTLCorr = new double[datasetGenotypes.nrProbes]; - - - if (snpsToSwapFile != null) { - System.out.println("Enforcing for every eQTL that the genotype dosage is swapped based on: " + snpsToSwapFile.getAbsolutePath()); - - HashSet snpsToSwap = new HashSet(); - BufferedReader reader = new BufferedReader(new InputStreamReader(new FileInputStream(snpsToSwapFile), "UTF-8")); - String line; - while ((line = reader.readLine()) != null) { - snpsToSwap.add(line); - } - reader.close(); - - for (int snp = 0; snp < datasetGenotypes.nrProbes; snp++) { - - if (snpsToSwap.contains(datasetGenotypes.probeNames[snp])) { - - for (int s = 0; s < datasetGenotypes.nrSamples; s++) { - datasetGenotypes.rawData[snp][s] = 2 - datasetGenotypes.rawData[snp][s]; - } - - } - - //mainEQTLCorr[snp] = corr; - } - - - } else { - System.out.println("Enforcing for every eQTL that the genotype dosage positively correlated with gene expression levels:"); - - Writer writer = new BufferedWriter(new FileWriter(outputDir + "/swappedDosages.txt")); - for (int snp = 0; snp < datasetGenotypes.nrProbes; snp++) { - double corr = JSci.maths.ArrayMath.correlation(datasetGenotypes.rawData[snp], datasetExpression.rawData[snp]); - //System.out.println(datasetExpression.probeNames[snp] + "\t" + snp + "\t" + corr); - - if (corr < 0) { - corr = -corr; - for (int s = 0; s < datasetGenotypes.nrSamples; s++) { - datasetGenotypes.rawData[snp][s] = 2 - datasetGenotypes.rawData[snp][s]; - } - writer.append(datasetGenotypes.probeNames[snp]); - writer.append('\n'); - } - - //mainEQTLCorr[snp] = corr; - } - writer.close(); - - } + correctExpressionData(covsToCorrect2, datasetGenotypes, datasetCovariates, datasetExpression); + + correctDosageDirectionForQtl(snpsToSwapFile, datasetGenotypes, datasetExpression); if (1 == 1) { - if (1 == 1) { - System.out.println("Correcting covariate data for cohort specific effects:"); -// String[] cohorts = {"LLDeep","LLS","RS","CODAM"}; - ExpressionDataset datasetCovariatesToCorrectFor = new ExpressionDataset(covsToCorrect2.length + covsToCorrect.length, datasetGenotypes.nrSamples); - datasetCovariatesToCorrectFor.sampleNames = datasetGenotypes.sampleNames; - - for (int i = 0; i < covsToCorrect2.length; ++i) { - String cov = covsToCorrect2[i]; - Integer c = datasetCovariates.hashProbes.get(cov); - if (c == null) { - throw new Exception("Covariate not found: " + cov); - } - for (int s = 0; s < datasetGenotypes.nrSamples; s++) { - datasetCovariatesToCorrectFor.rawData[i][s] = datasetCovariates.rawData[c][s]; - } - } - -// for (int p=0; p 1E-5) { - double[] rc = getLinearRegressionCoefficients(datasetCovariatesToCorrectFor.rawData[cov], datasetCovariates.rawData[p]); - for (int s = 0; s < datasetGenotypes.nrSamples; s++) { - datasetCovariates.rawData[p][s] -= rc[0] * datasetCovariatesToCorrectFor.rawData[cov][s]; - } - } - } - double stdev = JSci.maths.ArrayMath.standardDeviation(datasetCovariates.rawData[p]); - double mean = JSci.maths.ArrayMath.mean(datasetCovariates.rawData[p]); - if (stdev < 1E-5) { - for (int s = 0; s < datasetGenotypes.nrSamples; s++) { - datasetCovariates.rawData[p][s] = mean; - } - } - } - } - + correctCovariateData(covsToCorrect2, covsToCorrect, datasetGenotypes, datasetCovariates); - } correctCovariatesForQtls(datasetCovariates, datasetExpression, datasetGenotypes); if (1 == 2) { - datasetCovariates.save(inputDir + "/CovariatesCorrected.txt"); - HashMap hashProbesToFilter = new HashMap(); - for (int p = 0; p < datasetCovariates.nrProbes; p++) { - if (datasetCovariates.probeNames[p].startsWith("ENSG")) { - hashProbesToFilter.put(datasetCovariates.probeNames[p], null); - } - } - ExpressionDataset datasetCovariatesCorrected = new ExpressionDataset(inputDir + "/CovariatesCorrected.txt", "\t", hashProbesToFilter, null); - datasetCovariatesCorrected.transposeDataset(); - datasetCovariatesCorrected.save(inputDir + "/CovariatesCorrected.txt"); - System.exit(0); + saveCorrectedCovariates(datasetCovariates); } if (1 == 2) { - ExpressionDataset datasetICA = new ExpressionDataset("/Users/lude/Documents/ICA/mixingmatrix.txt"); - //ExpressionDataset datasetICA = new ExpressionDataset("/Users/lude/Documents/ICA/signals.txt"); - datasetICA.transposeDataset(); - for (int p = 0; p < datasetICA.nrProbes; p++) { - datasetCovariates.rawData[p] = datasetICA.rawData[p]; - datasetCovariates.probeNames[p] = datasetICA.probeNames[p]; - if (p == 7) { - for (int q = 0; q < datasetCovariates.nrProbes; q++) { - double corr = JSci.maths.ArrayMath.correlation(datasetICA.rawData[p], datasetCovariates.rawData[q]); - System.out.println(p + "\t" + datasetICA.probeNames[p] + "\t" + q + "\t" + datasetCovariates.probeNames[q] + "\t" + corr + "\t" + corr * corr); - } - } - } - - orthogonalizeDataset("/Users/lude/Documents/ICA/mixingmatrix.txt"); - //System.exit(0); - } - - System.out.println("Enforcing normal distribution on covariates"); - - NaturalRanking ranker = new NaturalRanking(); - - for (int p = 0; p < datasetCovariates.nrProbes; p++) { - //Rank order the expression values: - double[] values = new double[datasetCovariates.nrSamples]; - for (int s = 0; s < datasetGenotypes.nrSamples; s++) { - values[s] = datasetCovariates.rawData[p][s]; - } - double[] rankedValues = ranker.rank(values); - //Replace the original expression value with the standard distribution enforce: - for (int s = 0; s < datasetGenotypes.nrSamples; s++) { - //Convert the rank to a proportion, with range <0, 1> - double pValue = (0.5d + rankedValues[s] - 1d) / (double) (rankedValues.length); - //Convert the pValue to a Z-Score: - double zScore = cern.jet.stat.tdouble.Probability.normalInverse(pValue); - datasetCovariates.rawData[p][s] = zScore; //Replace original expression value with the Z-Score - } + icaCovariates(datasetCovariates); } + forceNormalCovariates(datasetCovariates, datasetGenotypes); } @@ -534,57 +278,10 @@ public final String performInteractionAnalysis(String[] covsToCorrect, String[] } } - if (1 == 1) { - System.out.println("Correcting expression data for predefined gene environment interaction effects (GC content, Gender, 5'Median Bias, 3'Median Bias):"); - int[] covsToCorrectIndex = new int[covsToCorrect.length]; - for (int c = 0; c < covsToCorrect.length; c++) { - covsToCorrectIndex[c] = ((Integer) datasetCovariates.hashProbes.get(covsToCorrect[c])).intValue(); + correctExpressionDataForInteractions(covsToCorrect, datasetCovariates, datasetGenotypes, nrSamples, datasetExpression, regression); - } - for (int snp = 0; snp < datasetGenotypes.nrProbes; snp++) { - double[][] valsX = new double[nrSamples][1 + covsToCorrect.length * 2]; //store genotypes, covariates, interactions - for (int s = 0; s < nrSamples; s++) { - valsX[s][0] = datasetGenotypes.rawData[snp][s]; //genotypes - } - for (int c = 0; c < covsToCorrect.length; c++) { - for (int s = 0; s < nrSamples; s++) { - valsX[s][c * 2 + 1] = datasetCovariates.rawData[covsToCorrectIndex[c]][s]; //covariate - valsX[s][c * 2 + 2] = valsX[s][0] * valsX[s][c * 2 + 1]; //interction - } - } - double[] valsY = datasetExpression.rawData[snp]; - regression.newSampleData(valsY, valsX); - datasetExpression.rawData[snp] = regression.estimateResiduals(); - } - } - - - if (1 == 1) { - System.out.println("Enforcing normal distribution on expression data:"); + forceNormalExpressionData(datasetExpression); - NaturalRanking ranker = new NaturalRanking(); - - for (int p = 0; p < datasetExpression.nrProbes; p++) { - //Rank order the expression values: - double[] values = new double[datasetExpression.nrSamples]; - for (int s = 0; s < datasetExpression.nrSamples; s++) { - values[s] = datasetExpression.rawData[p][s]; - } - - double[] rankedValues = ranker.rank(values); - //Replace the original expression value with the standard distribution enforce: - for (int s = 0; s < datasetExpression.nrSamples; s++) { - //Convert the rank to a proportion, with range <0, 1> - double pValue = (0.5d + rankedValues[s] - 1d) / (double) (rankedValues.length); - //Convert the pValue to a Z-Score: - double zScore = cern.jet.stat.tdouble.Probability.normalInverse(pValue); - datasetExpression.rawData[p][s] = zScore; //Replace original expression value with the Z-Score - } - } - - System.out.println("Expression data now force normal"); - - } if (permute) { System.out.println("WARNING: PERMUTING GENOTYPE DATA!!!!"); @@ -1002,4 +699,328 @@ private void correctCovariatesForQtls(ExpressionDataset datasetCovariates, Expre } } + + private HashMap excludeOutliers(HashMap hashSamples) { + System.out.println("Removing outlier samples!!!"); + HashMap hashCovariates = new HashMap(); + hashCovariates.put("MEDIAN_5PRIME_BIAS", null); + hashCovariates.put("MEDIAN_3PRIME_BIAS", null); + ExpressionDataset datasetCovariates = new ExpressionDataset(inputDir + "/covariateTableLude.txt.Covariates.binary", "\t", hashCovariates, null); + hashSamples = new HashMap(); + for (int s = 0; s < datasetCovariates.nrSamples; s++) { + if (datasetCovariates.rawData[0][s] != 0) { + hashSamples.put(datasetCovariates.sampleNames[s], null); + } + } + datasetCovariates = new ExpressionDataset(inputDir + "/covariateTableLude.txt.Covariates.binary", "\t", hashCovariates, hashSamples); + HashMap hashSamplesToExclude = new HashMap(); + if (1 == 1) { + int index = ((Integer) datasetCovariates.hashProbes.get("MEDIAN_5PRIME_BIAS")).intValue(); + double mean = JSci.maths.ArrayMath.mean(datasetCovariates.rawData[index]); + double stdev = JSci.maths.ArrayMath.standardDeviation(datasetCovariates.rawData[index]); + for (int s = 0; s < datasetCovariates.nrSamples; s++) { + double z = (datasetCovariates.rawData[index][s] - mean) / stdev; + if (Math.abs(z) > 3) { + hashSamplesToExclude.put(datasetCovariates.sampleNames[s], null); + } + } + } + if (1 == 1) { + int index = ((Integer) datasetCovariates.hashProbes.get("MEDIAN_3PRIME_BIAS")).intValue(); + double mean = JSci.maths.ArrayMath.mean(datasetCovariates.rawData[index]); + double stdev = JSci.maths.ArrayMath.standardDeviation(datasetCovariates.rawData[index]); + for (int s = 0; s < datasetCovariates.nrSamples; s++) { + double z = (datasetCovariates.rawData[index][s] - mean) / stdev; + if (Math.abs(z) > 3) { + hashSamplesToExclude.put(datasetCovariates.sampleNames[s], null); + } + } + } + hashSamples = new HashMap(); + for (int s = 0; s < datasetCovariates.nrSamples; s++) { + if (!hashSamplesToExclude.containsKey(datasetCovariates.sampleNames[s])) { + hashSamples.put(datasetCovariates.sampleNames[s], null); + hashSamples.put(datasetCovariates.sampleNames[s] + "_exp", null); + hashSamples.put(datasetCovariates.sampleNames[s] + "_dosage", null); + } + } + return hashSamples; + } + + private void correctCovariateData(String[] covsToCorrect2, String[] covsToCorrect, ExpressionDataset datasetGenotypes, ExpressionDataset datasetCovariates) throws Exception { + + System.out.println("Correcting covariate data for cohort specific effects:"); +// String[] cohorts = {"LLDeep","LLS","RS","CODAM"}; + ExpressionDataset datasetCovariatesToCorrectFor = new ExpressionDataset(covsToCorrect2.length + covsToCorrect.length, datasetGenotypes.nrSamples); + datasetCovariatesToCorrectFor.sampleNames = datasetGenotypes.sampleNames; + + for (int i = 0; i < covsToCorrect2.length; ++i) { + String cov = covsToCorrect2[i]; + Integer c = datasetCovariates.hashProbes.get(cov); + if (c == null) { + throw new Exception("Covariate not found: " + cov); + } + for (int s = 0; s < datasetGenotypes.nrSamples; s++) { + datasetCovariatesToCorrectFor.rawData[i][s] = datasetCovariates.rawData[c][s]; + } + } + +// for (int p=0; p 1E-5) { + double[] rc = getLinearRegressionCoefficients(datasetCovariatesToCorrectFor.rawData[cov], datasetCovariates.rawData[p]); + for (int s = 0; s < datasetGenotypes.nrSamples; s++) { + datasetCovariates.rawData[p][s] -= rc[0] * datasetCovariatesToCorrectFor.rawData[cov][s]; + } + } + } + double stdev = JSci.maths.ArrayMath.standardDeviation(datasetCovariates.rawData[p]); + double mean = JSci.maths.ArrayMath.mean(datasetCovariates.rawData[p]); + if (stdev < 1E-5) { + for (int s = 0; s < datasetGenotypes.nrSamples; s++) { + datasetCovariates.rawData[p][s] = mean; + } + } + } + } + } + + private void correctExpressionData(String[] covsToCorrect2, ExpressionDataset datasetGenotypes, ExpressionDataset datasetCovariates, ExpressionDataset datasetExpression) throws Exception { + //Define a set of covariates that we want to use as correction: + System.out.println("Correcting gene expression data for cohort specific effects and top 25 components"); + //String[] cohorts = {"LLDeep", "LLS", "RS", "CODAM"}; + int nrCompsToCorrectFor = 25; + ExpressionDataset datasetCovariatesToCorrectFor = new ExpressionDataset(covsToCorrect2.length + nrCompsToCorrectFor, datasetGenotypes.nrSamples); + datasetCovariatesToCorrectFor.sampleNames = datasetGenotypes.sampleNames; + + for (int i = 0; i < covsToCorrect2.length; ++i) { + String cov = covsToCorrect2[i]; + Integer c = datasetCovariates.hashProbes.get(cov); + if (c == null) { + throw new Exception("Covariate not found: " + cov); + } + for (int s = 0; s < datasetGenotypes.nrSamples; s++) { + datasetCovariatesToCorrectFor.rawData[i][s] = datasetCovariates.rawData[c][s]; + } + } + +// for (int p = 0; p < cohorts.length; p++) { +// for (int s = 0; s < datasetGenotypes.nrSamples; s++) { +// if (datasetGenotypes.sampleNames[s].startsWith(cohorts[p])) { +// datasetCovariatesToCorrectFor.rawData[p][s] = 1; +// } +// } +// } + if (nrCompsToCorrectFor > 0) { + for (int comp = 0; comp < nrCompsToCorrectFor; comp++) { + for (int s = 0; s < datasetGenotypes.nrSamples; s++) { + datasetCovariatesToCorrectFor.rawData[covsToCorrect2.length + comp][s] = datasetCovariates.rawData[datasetCovariates.nrProbes - 51 + comp][s]; + } + } + } + + datasetCovariatesToCorrectFor.transposeDataset(); + + datasetCovariatesToCorrectFor.save(inputDir + "/CovariatesToCorrectFor.txt"); + orthogonalizeDataset(inputDir + "/CovariatesToCorrectFor.txt"); + datasetCovariatesToCorrectFor = new ExpressionDataset(inputDir + "/CovariatesToCorrectFor.txt.PrincipalComponents.txt"); + datasetCovariatesToCorrectFor.transposeDataset(); + ExpressionDataset datasetCovariatesToCorrectForEigenvalues = new ExpressionDataset(inputDir + "/CovariatesToCorrectFor.txt.Eigenvalues.txt"); + for (int snp = 0; snp < datasetExpression.nrProbes; snp++) { + for (int cov = 0; cov < datasetCovariatesToCorrectFor.nrProbes; cov++) { + if (datasetCovariatesToCorrectForEigenvalues.rawData[cov][0] > 1E-5) { + double[] rc = getLinearRegressionCoefficients(datasetCovariatesToCorrectFor.rawData[cov], datasetExpression.rawData[snp]); + for (int s = 0; s < datasetGenotypes.nrSamples; s++) { + datasetExpression.rawData[snp][s] -= rc[0] * datasetCovariatesToCorrectFor.rawData[cov][s]; + } + } + } + } + } + + private void correctDosageDirectionForQtl(File snpsToSwapFile, ExpressionDataset datasetGenotypes, ExpressionDataset datasetExpression) throws IOException { + //double[] mainEQTLCorr = new double[datasetGenotypes.nrProbes]; + + + if (snpsToSwapFile != null) { + System.out.println("Enforcing for every eQTL that the genotype dosage is swapped based on: " + snpsToSwapFile.getAbsolutePath()); + + HashSet snpsToSwap = new HashSet(); + BufferedReader reader = new BufferedReader(new InputStreamReader(new FileInputStream(snpsToSwapFile), "UTF-8")); + String line; + while ((line = reader.readLine()) != null) { + snpsToSwap.add(line); + } + reader.close(); + + for (int snp = 0; snp < datasetGenotypes.nrProbes; snp++) { + + if (snpsToSwap.contains(datasetGenotypes.probeNames[snp])) { + + for (int s = 0; s < datasetGenotypes.nrSamples; s++) { + datasetGenotypes.rawData[snp][s] = 2 - datasetGenotypes.rawData[snp][s]; + } + + } + + //mainEQTLCorr[snp] = corr; + } + + + } else { + System.out.println("Enforcing for every eQTL that the genotype dosage positively correlated with gene expression levels:"); + + Writer writer = new BufferedWriter(new FileWriter(outputDir + "/swappedDosages.txt")); + for (int snp = 0; snp < datasetGenotypes.nrProbes; snp++) { + double corr = JSci.maths.ArrayMath.correlation(datasetGenotypes.rawData[snp], datasetExpression.rawData[snp]); + //System.out.println(datasetExpression.probeNames[snp] + "\t" + snp + "\t" + corr); + + if (corr < 0) { + corr = -corr; + for (int s = 0; s < datasetGenotypes.nrSamples; s++) { + datasetGenotypes.rawData[snp][s] = 2 - datasetGenotypes.rawData[snp][s]; + } + writer.append(datasetGenotypes.probeNames[snp]); + writer.append('\n'); + } + + //mainEQTLCorr[snp] = corr; + } + writer.close(); + + } + } + + private void saveCorrectedCovariates(ExpressionDataset datasetCovariates) { + datasetCovariates.save(inputDir + "/CovariatesCorrected.txt"); + HashMap hashProbesToFilter = new HashMap(); + for (int p = 0; p < datasetCovariates.nrProbes; p++) { + if (datasetCovariates.probeNames[p].startsWith("ENSG")) { + hashProbesToFilter.put(datasetCovariates.probeNames[p], null); + } + } + ExpressionDataset datasetCovariatesCorrected = new ExpressionDataset(inputDir + "/CovariatesCorrected.txt", "\t", hashProbesToFilter, null); + datasetCovariatesCorrected.transposeDataset(); + datasetCovariatesCorrected.save(inputDir + "/CovariatesCorrected.txt"); + System.exit(0); + } + + private void icaCovariates(ExpressionDataset datasetCovariates) { + ExpressionDataset datasetICA = new ExpressionDataset("/Users/lude/Documents/ICA/mixingmatrix.txt"); + //ExpressionDataset datasetICA = new ExpressionDataset("/Users/lude/Documents/ICA/signals.txt"); + datasetICA.transposeDataset(); + for (int p = 0; p < datasetICA.nrProbes; p++) { + datasetCovariates.rawData[p] = datasetICA.rawData[p]; + datasetCovariates.probeNames[p] = datasetICA.probeNames[p]; + if (p == 7) { + for (int q = 0; q < datasetCovariates.nrProbes; q++) { + double corr = JSci.maths.ArrayMath.correlation(datasetICA.rawData[p], datasetCovariates.rawData[q]); + System.out.println(p + "\t" + datasetICA.probeNames[p] + "\t" + q + "\t" + datasetCovariates.probeNames[q] + "\t" + corr + "\t" + corr * corr); + } + } + } + + orthogonalizeDataset("/Users/lude/Documents/ICA/mixingmatrix.txt"); + //System.exit(0); + } + + private void forceNormalCovariates(ExpressionDataset datasetCovariates, ExpressionDataset datasetGenotypes) throws ArithmeticException { + System.out.println("Enforcing normal distribution on covariates"); + + NaturalRanking ranker = new NaturalRanking(); + + for (int p = 0; p < datasetCovariates.nrProbes; p++) { + //Rank order the expression values: + double[] values = new double[datasetCovariates.nrSamples]; + for (int s = 0; s < datasetGenotypes.nrSamples; s++) { + values[s] = datasetCovariates.rawData[p][s]; + } + double[] rankedValues = ranker.rank(values); + //Replace the original expression value with the standard distribution enforce: + for (int s = 0; s < datasetGenotypes.nrSamples; s++) { + //Convert the rank to a proportion, with range <0, 1> + double pValue = (0.5d + rankedValues[s] - 1d) / (double) (rankedValues.length); + //Convert the pValue to a Z-Score: + double zScore = cern.jet.stat.tdouble.Probability.normalInverse(pValue); + datasetCovariates.rawData[p][s] = zScore; //Replace original expression value with the Z-Score + } + } + } + + private void correctExpressionDataForInteractions(String[] covsToCorrect, ExpressionDataset datasetCovariates, ExpressionDataset datasetGenotypes, int nrSamples, ExpressionDataset datasetExpression, OLSMultipleLinearRegression regression) throws MathIllegalArgumentException { + System.out.println("Correcting expression data for predefined gene environment interaction effects (GC content, Gender, 5'Median Bias, 3'Median Bias):"); + int[] covsToCorrectIndex = new int[covsToCorrect.length]; + for (int c = 0; c < covsToCorrect.length; c++) { + covsToCorrectIndex[c] = ((Integer) datasetCovariates.hashProbes.get(covsToCorrect[c])).intValue(); + + } + for (int snp = 0; snp < datasetGenotypes.nrProbes; snp++) { + double[][] valsX = new double[nrSamples][1 + covsToCorrect.length * 2]; //store genotypes, covariates, interactions + for (int s = 0; s < nrSamples; s++) { + valsX[s][0] = datasetGenotypes.rawData[snp][s]; //genotypes + } + for (int c = 0; c < covsToCorrect.length; c++) { + for (int s = 0; s < nrSamples; s++) { + valsX[s][c * 2 + 1] = datasetCovariates.rawData[covsToCorrectIndex[c]][s]; //covariate + valsX[s][c * 2 + 2] = valsX[s][0] * valsX[s][c * 2 + 1]; //interction + } + } + double[] valsY = datasetExpression.rawData[snp]; + regression.newSampleData(valsY, valsX); + datasetExpression.rawData[snp] = regression.estimateResiduals(); + } + } + + private void forceNormalExpressionData(ExpressionDataset datasetExpression) throws ArithmeticException { + System.out.println("Enforcing normal distribution on expression data:"); + + NaturalRanking ranker = new NaturalRanking(); + + for (int p = 0; p < datasetExpression.nrProbes; p++) { + //Rank order the expression values: + double[] values = new double[datasetExpression.nrSamples]; + for (int s = 0; s < datasetExpression.nrSamples; s++) { + values[s] = datasetExpression.rawData[p][s]; + } + + double[] rankedValues = ranker.rank(values); + //Replace the original expression value with the standard distribution enforce: + for (int s = 0; s < datasetExpression.nrSamples; s++) { + //Convert the rank to a proportion, with range <0, 1> + double pValue = (0.5d + rankedValues[s] - 1d) / (double) (rankedValues.length); + //Convert the pValue to a Z-Score: + double zScore = cern.jet.stat.tdouble.Probability.normalInverse(pValue); + datasetExpression.rawData[p][s] = zScore; //Replace original expression value with the Z-Score + } + } + + System.out.println("Expression data now force normal"); + } } \ No newline at end of file From a2c8be360a359777aef75f0e241d5970bd14826a Mon Sep 17 00:00:00 2001 From: Patrick Deelen Date: Wed, 15 Jul 2015 16:26:57 +0200 Subject: [PATCH 093/143] Regress out all QTL effects from covariates --- .../TestEQTLDatasetForInteractions.java | 91 ++++++++++++++++--- 1 file changed, 78 insertions(+), 13 deletions(-) diff --git a/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/TestEQTLDatasetForInteractions.java b/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/TestEQTLDatasetForInteractions.java index c7ac458b8..fefd9f82a 100644 --- a/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/TestEQTLDatasetForInteractions.java +++ b/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/TestEQTLDatasetForInteractions.java @@ -5,6 +5,7 @@ */ package nl.systemsgenetics.eqtlinteractionanalyser.eqtlinteractionanalyser; +import com.google.common.collect.HashMultimap; import java.io.BufferedReader; import java.io.BufferedWriter; import java.io.File; @@ -16,6 +17,9 @@ import java.util.ArrayList; import java.util.HashMap; import java.util.HashSet; +import java.util.Iterator; +import java.util.Map; +import java.util.Set; import java.util.Vector; import java.util.concurrent.CompletionService; import java.util.concurrent.ExecutionException; @@ -24,12 +28,16 @@ import java.util.logging.Level; import java.util.logging.Logger; import org.apache.commons.math3.exception.MathIllegalArgumentException; +import org.apache.commons.math3.stat.correlation.PearsonsCorrelation; +import org.apache.commons.math3.stat.correlation.SpearmansCorrelation; import org.apache.commons.math3.stat.ranking.NaturalRanking; import org.apache.commons.math3.stat.regression.OLSMultipleLinearRegression; import org.apache.mahout.math.Arrays; import umcg.genetica.genomicboundaries.GenomicBoundary; import umcg.genetica.io.Gpio; import umcg.genetica.io.text.TextFile; +import umcg.genetica.io.trityper.EQTL; +import umcg.genetica.io.trityper.QTLTextFile; /** * @@ -68,6 +76,13 @@ public TestEQTLDatasetForInteractions(String inputDir, String outputDir, String } HashMap eqtlGenes = getEqtls(eQTLfileName); + + HashMultimap qtlProbeSnpMultiMap = HashMultimap.create(); + final QTLTextFile eQtlFileReader = new QTLTextFile(eQTLfileName, false); + for (Iterator it = eQtlFileReader.getEQtlIterator(); it.hasNext();) { + EQTL qtl = it.next(); + qtlProbeSnpMultiMap.put(qtl.getProbe(), qtl.getRsName()); + } if (annotationFile != null) { createGeneDistanceMap(annotationFile); @@ -88,7 +103,7 @@ public TestEQTLDatasetForInteractions(String inputDir, String outputDir, String String[] covsToCorrect = primaryCovsToCorrect; int cnt = 0; while (cnt < maxNumTopCovs) { - String topCov = performInteractionAnalysis(covsToCorrect, covariatesToCorrect2, eqtlGenes, outputTopCovs, snpsToSwapFile, permute); + String topCov = performInteractionAnalysis(covsToCorrect, covariatesToCorrect2, eqtlGenes, outputTopCovs, snpsToSwapFile, permute, qtlProbeSnpMultiMap); String[] covsToCorrectNew = new String[covsToCorrect.length + 1]; for (int c = 0; c < covsToCorrect.length; c++) { covsToCorrectNew[c] = covsToCorrect[c]; @@ -233,15 +248,15 @@ public void preprocessData() { } - public final String performInteractionAnalysis(String[] covsToCorrect, String[] covsToCorrect2, HashMap hashEQTLs, TextFile outputTopCovs, File snpsToSwapFile, boolean permute) throws IOException, Exception { + public final String performInteractionAnalysis(String[] covsToCorrect, String[] covsToCorrect2, HashMap hashEQTLs, TextFile outputTopCovs, File snpsToSwapFile, boolean permute, HashMultimap qtlProbeSnpMultiMap) throws IOException, Exception { HashMap hashSamples = new HashMap(); hashSamples = excludeOutliers(hashSamples); - ExpressionDataset datasetGenotypes = new ExpressionDataset(inputDir + "/bigTableLude.txt.Genotypes.binary", "\t", hashEQTLs, hashSamples); - ExpressionDataset datasetExpression = new ExpressionDataset(inputDir + "/bigTableLude.txt.Expression.binary", "\t", hashEQTLs, hashSamples); + ExpressionDataset datasetGenotypes = new ExpressionDataset(inputDir + "/bigTableLude.txt.Genotypes.binary", "\t", null, hashSamples); + ExpressionDataset datasetExpression = new ExpressionDataset(inputDir + "/bigTableLude.txt.Expression.binary", "\t", null, hashSamples); ExpressionDataset datasetCovariates = new ExpressionDataset(inputDir + "/covariateTableLude.txt.Covariates.binary", "\t", null, hashSamples); org.apache.commons.math3.stat.regression.OLSMultipleLinearRegression regression = new org.apache.commons.math3.stat.regression.OLSMultipleLinearRegression(); @@ -256,7 +271,7 @@ public final String performInteractionAnalysis(String[] covsToCorrect, String[] correctCovariateData(covsToCorrect2, covsToCorrect, datasetGenotypes, datasetCovariates); - correctCovariatesForQtls(datasetCovariates, datasetExpression, datasetGenotypes); + correctCovariatesForQtls(datasetCovariates, datasetGenotypes, qtlProbeSnpMultiMap); if (1 == 2) { saveCorrectedCovariates(datasetCovariates); @@ -269,8 +284,6 @@ public final String performInteractionAnalysis(String[] covsToCorrect, String[] } - cern.jet.random.tdouble.engine.DoubleRandomEngine randomEngine = new cern.jet.random.tdouble.engine.DRand(); - ExpressionDataset datasetExpressionBeforeEQTLCorrection = new ExpressionDataset(datasetExpression.nrProbes, datasetExpression.nrSamples); for (int p = 0; p < datasetExpression.nrProbes; p++) { for (int s = 0; s < datasetExpression.nrSamples; s++) { @@ -685,18 +698,70 @@ static public double[] getEigenVectorSVD(Jama.SingularValueDecomposition svd, do return eigenVector; } - private void correctCovariatesForQtls(ExpressionDataset datasetCovariates, ExpressionDataset datasetExpression, ExpressionDataset datasetGenotypes) { + private void correctCovariatesForQtls(ExpressionDataset datasetCovariates, ExpressionDataset datasetGenotypes, HashMultimap qtlProbeSnpMultiMap) throws Exception { System.out.println("Correcting covariate data for cis-eQTL effects:"); + + OLSMultipleLinearRegression ols = new OLSMultipleLinearRegression(); + + HashMap snpMap = new HashMap(datasetGenotypes.nrProbes); + for(Map.Entry snpEntry : datasetGenotypes.hashProbes.entrySet()){ + snpMap.put(snpEntry.getKey().substring(0, snpEntry.getKey().charAt('_')), snpEntry.getValue()); + } + for (int p = 0; p < datasetCovariates.nrProbes; p++) { - if (datasetExpression.hashProbes.containsKey(datasetCovariates.probeNames[p])) { - int index = ((Integer) datasetExpression.hashProbes.get(datasetCovariates.probeNames[p])).intValue(); - double[] rc = getLinearRegressionCoefficients(datasetGenotypes.rawData[index], datasetCovariates.rawData[p]); - for (int s = 0; s < datasetGenotypes.nrSamples; s++) { - datasetCovariates.rawData[p][s] -= rc[0] * datasetGenotypes.rawData[index][s]; + + String probe = datasetCovariates.probeNames[p]; + Set probeQtls = qtlProbeSnpMultiMap.get(probe); + + System.out.println(""); + System.out.println("-------------------------------------"); + System.out.println(""); + System.out.println("probe"); + + if(!probeQtls.isEmpty()){ + + double[][] x = new double[datasetCovariates.nrSamples][probeQtls.size()]; + + int k = 0; + for(String snp : probeQtls){ + + Integer s = snpMap.get(snp); + if(s == null){ + throw new Exception("Snp " + snp + " not found"); + } + + x[k++] = datasetGenotypes.rawData[s]; + + } + + ols.newSampleData(datasetCovariates.rawData[p], x); + + PearsonsCorrelation cor = new PearsonsCorrelation(); + + datasetCovariates.rawData[p] = ols.estimateResiduals(); + + for(String snp : probeQtls){ + Integer s = snpMap.get(snp); + System.out.println(snp + " - " + cor.correlation(datasetCovariates.rawData[p], datasetGenotypes.rawData[s])); } + } + } + + + + +// for (int p = 0; p < datasetCovariates.nrProbes; p++) { +// if (datasetExpression.hashProbes.containsKey(datasetCovariates.probeNames[p])) { +// int index = ((Integer) datasetExpression.hashProbes.get(datasetCovariates.probeNames[p])).intValue(); +// double[] rc = getLinearRegressionCoefficients(datasetGenotypes.rawData[index], datasetCovariates.rawData[p]); +// for (int s = 0; s < datasetGenotypes.nrSamples; s++) { +// datasetCovariates.rawData[p][s] -= rc[0] * datasetGenotypes.rawData[index][s]; +// } +// } +// } } From b6ab1caa42295f7c4f27267d4f5e8262e015a5f1 Mon Sep 17 00:00:00 2001 From: Patrick Deelen Date: Wed, 15 Jul 2015 16:44:22 +0200 Subject: [PATCH 094/143] preprocess flag --- .../EQTLInteractionAnalyser.java | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/EQTLInteractionAnalyser.java b/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/EQTLInteractionAnalyser.java index 619877ee6..eff0634f4 100644 --- a/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/EQTLInteractionAnalyser.java +++ b/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/EQTLInteractionAnalyser.java @@ -72,6 +72,10 @@ public class EQTLInteractionAnalyser { OptionBuilder.withDescription("Find chi2sum differences for each covariate between 2 consequtive interaction runs"); OptionBuilder.withLongOpt("chi2sumDiff"); OPTIONS.addOption(OptionBuilder.create("dif")); + + OptionBuilder.withDescription("Preprocess the data"); + OptionBuilder.withLongOpt("preprocess"); + OPTIONS.addOption(OptionBuilder.create("p")); OptionBuilder.withArgName("strings"); OptionBuilder.hasArgs(); @@ -106,7 +110,7 @@ public static void main(String[] args) throws IOException, Exception { String inputDir, outputDir, eqtlFile = null, annotationFile = null; final File snpsToSwapFile; int maxNumCovariatesToRegress = 20; - final boolean interpret, chi2sumDiff, permute; + final boolean interpret, chi2sumDiff, permute, preproces; final String[] covariates; final String[] covariates2; @@ -126,6 +130,7 @@ public static void main(String[] args) throws IOException, Exception { interpret = commandLine.hasOption("t"); chi2sumDiff = commandLine.hasOption("dif"); permute = commandLine.hasOption("perm"); + preproces = commandLine.hasOption("p"); if (commandLine.hasOption('a')) { annotationFile = commandLine.getOptionValue("a"); @@ -162,8 +167,11 @@ else if (commandLine.hasOption("c")){ System.exit(1); return; } - - if (interpret){ + + if(preproces){ + TestEQTLDatasetForInteractions interactor = new TestEQTLDatasetForInteractions(inputDir, outputDir); + interactor.preprocessData(); + } else if (interpret){ TestEQTLDatasetForInteractions interactor = new TestEQTLDatasetForInteractions(inputDir, outputDir); interactor.interpretInteractionZScoreMatrix(maxNumCovariatesToRegress); } From 2da49690e890208c1a4e8cc3eac00334c9648abe Mon Sep 17 00:00:00 2001 From: Patrick Deelen Date: Wed, 15 Jul 2015 17:08:29 +0200 Subject: [PATCH 095/143] Fix preprocess --- .../TestEQTLDatasetForInteractions.java | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/TestEQTLDatasetForInteractions.java b/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/TestEQTLDatasetForInteractions.java index fefd9f82a..f41549b91 100644 --- a/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/TestEQTLDatasetForInteractions.java +++ b/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/TestEQTLDatasetForInteractions.java @@ -208,6 +208,9 @@ public void preprocessData() { HashMap hashGenotypes = new HashMap(); HashMap hashExpression = new HashMap(); HashMap hashEQTLs = new HashMap(); + ArrayList snps = new ArrayList(); + int countExcludedLines = 0; + try { java.io.BufferedReader in = new java.io.BufferedReader(new java.io.FileReader(new File(inputDir + "/bigTableLude.txt"))); String str = in.readLine(); @@ -226,10 +229,13 @@ public void preprocessData() { if (!str.contains("NA")) { data = str.split("\t"); hashEQTLs.put(data[0], null); + snps.add(data[1]); itr++; if (itr % 100 == 0) { System.out.println(itr); } + } else { + ++countExcludedLines; } } } catch (Exception e) { @@ -237,7 +243,12 @@ public void preprocessData() { e.printStackTrace(); } + System.out.println("EXCLUDED LINES: " + countExcludedLines); + ExpressionDataset datasetGenotypes = new ExpressionDataset(inputDir + "/bigTableLude.txt", "\t", hashEQTLs, hashGenotypes); + datasetGenotypes.probeNames = snps.toArray(new String[snps.size()]); + datasetGenotypes.recalculateHashMaps(); + ExpressionDataset datasetExpression = new ExpressionDataset(inputDir + "/bigTableLude.txt", "\t", hashEQTLs, hashExpression); datasetGenotypes.save(datasetGenotypes.fileName + ".Genotypes.binary"); datasetExpression.save(datasetGenotypes.fileName + ".Expression.binary"); From adde07a0fe789f2c893d8221c7d08b5bcd349cc8 Mon Sep 17 00:00:00 2001 From: Patrick Deelen Date: Wed, 15 Jul 2015 17:12:15 +0200 Subject: [PATCH 096/143] test --- .../TestEQTLDatasetForInteractions.java | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/TestEQTLDatasetForInteractions.java b/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/TestEQTLDatasetForInteractions.java index f41549b91..5df33f785 100644 --- a/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/TestEQTLDatasetForInteractions.java +++ b/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/TestEQTLDatasetForInteractions.java @@ -728,7 +728,12 @@ private void correctCovariatesForQtls(ExpressionDataset datasetCovariates, Expre System.out.println(""); System.out.println("-------------------------------------"); System.out.println(""); - System.out.println("probe"); + System.out.println(probe); + System.out.println(""); + + + + if(!probeQtls.isEmpty()){ @@ -749,9 +754,16 @@ private void correctCovariatesForQtls(ExpressionDataset datasetCovariates, Expre ols.newSampleData(datasetCovariates.rawData[p], x); PearsonsCorrelation cor = new PearsonsCorrelation(); - + + System.out.println("Before"); + for(String snp : probeQtls){ + Integer s = snpMap.get(snp); + System.out.println(snp + " - " + cor.correlation(datasetCovariates.rawData[p], datasetGenotypes.rawData[s])); + } + datasetCovariates.rawData[p] = ols.estimateResiduals(); + System.out.println("After"); for(String snp : probeQtls){ Integer s = snpMap.get(snp); System.out.println(snp + " - " + cor.correlation(datasetCovariates.rawData[p], datasetGenotypes.rawData[s])); From b9ae56850fb13e560416899790e516a42318db9f Mon Sep 17 00:00:00 2001 From: Patrick Deelen Date: Wed, 15 Jul 2015 17:15:42 +0200 Subject: [PATCH 097/143] test --- .../TestEQTLDatasetForInteractions.java | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/TestEQTLDatasetForInteractions.java b/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/TestEQTLDatasetForInteractions.java index 5df33f785..bfb21520a 100644 --- a/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/TestEQTLDatasetForInteractions.java +++ b/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/TestEQTLDatasetForInteractions.java @@ -717,7 +717,12 @@ private void correctCovariatesForQtls(ExpressionDataset datasetCovariates, Expre HashMap snpMap = new HashMap(datasetGenotypes.nrProbes); for(Map.Entry snpEntry : datasetGenotypes.hashProbes.entrySet()){ - snpMap.put(snpEntry.getKey().substring(0, snpEntry.getKey().charAt('_')), snpEntry.getValue()); + try{ + snpMap.put(snpEntry.getKey().substring(0, snpEntry.getKey().charAt('_')), snpEntry.getValue()); + } catch(Exception e){ + System.out.println(snpEntry.getKey()); + throw e; + } } for (int p = 0; p < datasetCovariates.nrProbes; p++) { From f8b0fbf84bf21f04fbf0f8d14eb436ce0b3b6af5 Mon Sep 17 00:00:00 2001 From: Patrick Deelen Date: Wed, 15 Jul 2015 17:18:51 +0200 Subject: [PATCH 098/143] oeps --- .../eqtlinteractionanalyser/TestEQTLDatasetForInteractions.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/TestEQTLDatasetForInteractions.java b/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/TestEQTLDatasetForInteractions.java index bfb21520a..c13d1f78a 100644 --- a/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/TestEQTLDatasetForInteractions.java +++ b/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/TestEQTLDatasetForInteractions.java @@ -718,7 +718,7 @@ private void correctCovariatesForQtls(ExpressionDataset datasetCovariates, Expre HashMap snpMap = new HashMap(datasetGenotypes.nrProbes); for(Map.Entry snpEntry : datasetGenotypes.hashProbes.entrySet()){ try{ - snpMap.put(snpEntry.getKey().substring(0, snpEntry.getKey().charAt('_')), snpEntry.getValue()); + snpMap.put(snpEntry.getKey().substring(0, snpEntry.getKey().indexOf('_')), snpEntry.getValue()); } catch(Exception e){ System.out.println(snpEntry.getKey()); throw e; From 3027fbd5221be27de7e5602b64796fd13514ae33 Mon Sep 17 00:00:00 2001 From: Patrick Deelen Date: Wed, 15 Jul 2015 17:21:21 +0200 Subject: [PATCH 099/143] ? --- .../eqtlinteractionanalyser/TestEQTLDatasetForInteractions.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/TestEQTLDatasetForInteractions.java b/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/TestEQTLDatasetForInteractions.java index c13d1f78a..519d87fa7 100644 --- a/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/TestEQTLDatasetForInteractions.java +++ b/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/TestEQTLDatasetForInteractions.java @@ -733,7 +733,7 @@ private void correctCovariatesForQtls(ExpressionDataset datasetCovariates, Expre System.out.println(""); System.out.println("-------------------------------------"); System.out.println(""); - System.out.println(probe); + System.out.println(probe + " with " + probeQtls.size() + " SNPs"); System.out.println(""); From 206baa26c0582f8aea4553a1ebb34068a74f02eb Mon Sep 17 00:00:00 2001 From: Patrick Deelen Date: Wed, 15 Jul 2015 17:30:13 +0200 Subject: [PATCH 100/143] Fix --- .../TestEQTLDatasetForInteractions.java | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/TestEQTLDatasetForInteractions.java b/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/TestEQTLDatasetForInteractions.java index 519d87fa7..0bc3ff5e7 100644 --- a/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/TestEQTLDatasetForInteractions.java +++ b/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/TestEQTLDatasetForInteractions.java @@ -751,12 +751,15 @@ private void correctCovariatesForQtls(ExpressionDataset datasetCovariates, Expre if(s == null){ throw new Exception("Snp " + snp + " not found"); } - - x[k++] = datasetGenotypes.rawData[s]; - + double[] snpData = datasetGenotypes.rawData[s]; + for(int i = 0 ; i < datasetGenotypes.nrSamples ; ++i){ + x[i][k] = snpData[i]; + } + + k++; } - ols.newSampleData(datasetCovariates.rawData[p], x); + PearsonsCorrelation cor = new PearsonsCorrelation(); @@ -766,6 +769,7 @@ private void correctCovariatesForQtls(ExpressionDataset datasetCovariates, Expre System.out.println(snp + " - " + cor.correlation(datasetCovariates.rawData[p], datasetGenotypes.rawData[s])); } + ols.newSampleData(datasetCovariates.rawData[p], x); datasetCovariates.rawData[p] = ols.estimateResiduals(); System.out.println("After"); From e0900748fe97c0169fdaee3a2406366965d02941 Mon Sep 17 00:00:00 2001 From: Patrick Deelen Date: Wed, 15 Jul 2015 21:14:58 +0200 Subject: [PATCH 101/143] remove prints --- .../TestEQTLDatasetForInteractions.java | 37 +++++++++---------- 1 file changed, 17 insertions(+), 20 deletions(-) diff --git a/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/TestEQTLDatasetForInteractions.java b/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/TestEQTLDatasetForInteractions.java index 0bc3ff5e7..25a4a6b5a 100644 --- a/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/TestEQTLDatasetForInteractions.java +++ b/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/TestEQTLDatasetForInteractions.java @@ -730,15 +730,12 @@ private void correctCovariatesForQtls(ExpressionDataset datasetCovariates, Expre String probe = datasetCovariates.probeNames[p]; Set probeQtls = qtlProbeSnpMultiMap.get(probe); - System.out.println(""); - System.out.println("-------------------------------------"); - System.out.println(""); - System.out.println(probe + " with " + probeQtls.size() + " SNPs"); - System.out.println(""); - - - - +// System.out.println(""); +// System.out.println("-------------------------------------"); +// System.out.println(""); +// System.out.println(probe + " with " + probeQtls.size() + " SNPs"); +// System.out.println(""); + if(!probeQtls.isEmpty()){ @@ -761,22 +758,22 @@ private void correctCovariatesForQtls(ExpressionDataset datasetCovariates, Expre - PearsonsCorrelation cor = new PearsonsCorrelation(); +// PearsonsCorrelation cor = new PearsonsCorrelation(); - System.out.println("Before"); - for(String snp : probeQtls){ - Integer s = snpMap.get(snp); - System.out.println(snp + " - " + cor.correlation(datasetCovariates.rawData[p], datasetGenotypes.rawData[s])); - } +// System.out.println("Before"); +// for(String snp : probeQtls){ +// Integer s = snpMap.get(snp); +// System.out.println(snp + " - " + cor.correlation(datasetCovariates.rawData[p], datasetGenotypes.rawData[s])); +// } ols.newSampleData(datasetCovariates.rawData[p], x); datasetCovariates.rawData[p] = ols.estimateResiduals(); - System.out.println("After"); - for(String snp : probeQtls){ - Integer s = snpMap.get(snp); - System.out.println(snp + " - " + cor.correlation(datasetCovariates.rawData[p], datasetGenotypes.rawData[s])); - } +// System.out.println("After"); +// for(String snp : probeQtls){ +// Integer s = snpMap.get(snp); +// System.out.println(snp + " - " + cor.correlation(datasetCovariates.rawData[p], datasetGenotypes.rawData[s])); +// } } From e24602eb11f3c6bf5786957a170a7a64474bd8d4 Mon Sep 17 00:00:00 2001 From: Patrick Deelen Date: Wed, 15 Jul 2015 21:26:48 +0200 Subject: [PATCH 102/143] SubsetCovariatesToTest --- .../TestEQTLDatasetForInteractions.java | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/TestEQTLDatasetForInteractions.java b/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/TestEQTLDatasetForInteractions.java index 25a4a6b5a..51d2656ef 100644 --- a/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/TestEQTLDatasetForInteractions.java +++ b/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/TestEQTLDatasetForInteractions.java @@ -264,11 +264,25 @@ public final String performInteractionAnalysis(String[] covsToCorrect, String[] HashMap hashSamples = new HashMap(); hashSamples = excludeOutliers(hashSamples); + + String[] covariatesToTest = new String[]{"ENSG00000116701"}; + HashMap covariatesToLoad = new HashMap(); + if(covariatesToTest != null){ + for(String c : covariatesToTest){ + covariatesToLoad.put(c, null); + } + for(String c : covsToCorrect){ + covariatesToLoad.put(c, null); + } + for(String c : covsToCorrect2){ + covariatesToLoad.put(c, null); + } + } ExpressionDataset datasetGenotypes = new ExpressionDataset(inputDir + "/bigTableLude.txt.Genotypes.binary", "\t", null, hashSamples); ExpressionDataset datasetExpression = new ExpressionDataset(inputDir + "/bigTableLude.txt.Expression.binary", "\t", null, hashSamples); - ExpressionDataset datasetCovariates = new ExpressionDataset(inputDir + "/covariateTableLude.txt.Covariates.binary", "\t", null, hashSamples); + ExpressionDataset datasetCovariates = new ExpressionDataset(inputDir + "/covariateTableLude.txt.Covariates.binary", "\t", covariatesToLoad, hashSamples); org.apache.commons.math3.stat.regression.OLSMultipleLinearRegression regression = new org.apache.commons.math3.stat.regression.OLSMultipleLinearRegression(); int nrSamples = datasetGenotypes.nrSamples; From e85cbfb2bbf2113dfb962bd5b0f0fafc8adac76d Mon Sep 17 00:00:00 2001 From: Patrick Deelen Date: Wed, 15 Jul 2015 21:30:40 +0200 Subject: [PATCH 103/143] Fix --- .../TestEQTLDatasetForInteractions.java | 3 +++ 1 file changed, 3 insertions(+) diff --git a/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/TestEQTLDatasetForInteractions.java b/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/TestEQTLDatasetForInteractions.java index 51d2656ef..ec6b461ae 100644 --- a/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/TestEQTLDatasetForInteractions.java +++ b/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/TestEQTLDatasetForInteractions.java @@ -278,6 +278,9 @@ public final String performInteractionAnalysis(String[] covsToCorrect, String[] for(String c : covsToCorrect2){ covariatesToLoad.put(c, null); } + for(int i = 1 ; i <= 50 ; ++i){ + covariatesToLoad.put("Comp" + i, null); + } } ExpressionDataset datasetGenotypes = new ExpressionDataset(inputDir + "/bigTableLude.txt.Genotypes.binary", "\t", null, hashSamples); From 23d8c211c1a8f58b1827899f5df8f85c10c5072e Mon Sep 17 00:00:00 2001 From: Patrick Deelen Date: Wed, 15 Jul 2015 21:53:38 +0200 Subject: [PATCH 104/143] debug --- .../TestEQTLDatasetForInteractions.java | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/TestEQTLDatasetForInteractions.java b/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/TestEQTLDatasetForInteractions.java index ec6b461ae..4a00a5eca 100644 --- a/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/TestEQTLDatasetForInteractions.java +++ b/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/TestEQTLDatasetForInteractions.java @@ -373,6 +373,7 @@ public final String performInteractionAnalysis(String[] covsToCorrect, String[] for (int cov = 0; cov < datasetCovariates.nrProbes; cov++) { double stdev = JSci.maths.ArrayMath.standardDeviation(datasetCovariates.rawData[cov]); if (stdev > 0) { + System.out.println("Starting thread for: " + datasetCovariates.probeNames[cov]); PerformInteractionAnalysisPermutationTask task = new PerformInteractionAnalysisPermutationTask(datasetGenotypes, datasetExpression, datasetCovariates, cov); pool.submit(task); nrTasks++; @@ -386,6 +387,7 @@ public final String performInteractionAnalysis(String[] covsToCorrect, String[] if (geneDistanceMap != null) { for (int task = 0; task < nrTasks; task++) { try { + System.out.println("Waiting on thread for: " + datasetCovariates.probeNames[cov]); DoubleArrayIntegerObject result = pool.take().get(); int cov = result.intValue; double chi2Sum = 0; @@ -403,7 +405,7 @@ public final String performInteractionAnalysis(String[] covsToCorrect, String[] maxChi2 = chi2Sum; maxChi2Cov = datasetCovariates.probeNames[cov]; } - //System.out.println(covsToCorrect.length + "\t" + cov + "\t" + datasetCovariates.probeNames[cov] + "\t" + chi2Sum); + System.out.println(covsToCorrect.length + "\t" + cov + "\t" + datasetCovariates.probeNames[cov] + "\t" + chi2Sum); if ((task + 1) % 512 == 0) { System.out.println(task + 1 + " tasks processed"); } From 29470bd1dfb54aa0fe226b4043469e48b334a54e Mon Sep 17 00:00:00 2001 From: Patrick Deelen Date: Wed, 15 Jul 2015 22:04:31 +0200 Subject: [PATCH 105/143] clean up --- .../TestEQTLDatasetForInteractions.java | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/TestEQTLDatasetForInteractions.java b/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/TestEQTLDatasetForInteractions.java index 4a00a5eca..667971e82 100644 --- a/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/TestEQTLDatasetForInteractions.java +++ b/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/TestEQTLDatasetForInteractions.java @@ -265,7 +265,8 @@ public final String performInteractionAnalysis(String[] covsToCorrect, String[] hashSamples = excludeOutliers(hashSamples); - String[] covariatesToTest = new String[]{"ENSG00000116701"}; + //String[] covariatesToTest = new String[]{"ENSG00000116701"}; + String[] covariatesToTest = null; HashMap covariatesToLoad = new HashMap(); if(covariatesToTest != null){ @@ -373,7 +374,7 @@ public final String performInteractionAnalysis(String[] covsToCorrect, String[] for (int cov = 0; cov < datasetCovariates.nrProbes; cov++) { double stdev = JSci.maths.ArrayMath.standardDeviation(datasetCovariates.rawData[cov]); if (stdev > 0) { - System.out.println("Starting thread for: " + datasetCovariates.probeNames[cov]); + //System.out.println("Starting thread for: " + datasetCovariates.probeNames[cov]); PerformInteractionAnalysisPermutationTask task = new PerformInteractionAnalysisPermutationTask(datasetGenotypes, datasetExpression, datasetCovariates, cov); pool.submit(task); nrTasks++; @@ -387,7 +388,7 @@ public final String performInteractionAnalysis(String[] covsToCorrect, String[] if (geneDistanceMap != null) { for (int task = 0; task < nrTasks; task++) { try { - System.out.println("Waiting on thread for: " + datasetCovariates.probeNames[cov]); + //System.out.println("Waiting on thread for: " + datasetCovariates.probeNames[cov]); DoubleArrayIntegerObject result = pool.take().get(); int cov = result.intValue; double chi2Sum = 0; @@ -405,7 +406,7 @@ public final String performInteractionAnalysis(String[] covsToCorrect, String[] maxChi2 = chi2Sum; maxChi2Cov = datasetCovariates.probeNames[cov]; } - System.out.println(covsToCorrect.length + "\t" + cov + "\t" + datasetCovariates.probeNames[cov] + "\t" + chi2Sum); + //System.out.println(covsToCorrect.length + "\t" + cov + "\t" + datasetCovariates.probeNames[cov] + "\t" + chi2Sum); if ((task + 1) % 512 == 0) { System.out.println(task + 1 + " tasks processed"); } From 9afb73bd0a8e694cfaecdeebe26363bfe85bf3ad Mon Sep 17 00:00:00 2001 From: Patrick Deelen Date: Thu, 16 Jul 2015 09:27:02 +0200 Subject: [PATCH 106/143] Option to test specific covariates --- .../EQTLInteractionAnalyser.java | 15 ++++++++++++++- .../TestEQTLDatasetForInteractions.java | 7 +++---- 2 files changed, 17 insertions(+), 5 deletions(-) diff --git a/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/EQTLInteractionAnalyser.java b/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/EQTLInteractionAnalyser.java index eff0634f4..d6516f397 100644 --- a/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/EQTLInteractionAnalyser.java +++ b/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/EQTLInteractionAnalyser.java @@ -88,6 +88,12 @@ public class EQTLInteractionAnalyser { OptionBuilder.withDescription("Covariates to correct for without interaction term before running the interaction analysis"); OptionBuilder.withLongOpt("cov2"); OPTIONS.addOption(OptionBuilder.create("c2")); + + OptionBuilder.withArgName("strings"); + OptionBuilder.hasArgs(); + OptionBuilder.withDescription("Covariates to to test in interaction analysis. Optinal, all are tested if not used"); + OptionBuilder.withLongOpt("covTest"); + OPTIONS.addOption(OptionBuilder.create("ct")); OptionBuilder.withArgName("path"); OptionBuilder.hasArg(); @@ -114,6 +120,7 @@ public static void main(String[] args) throws IOException, Exception { final String[] covariates; final String[] covariates2; + final String[] covariatesToTest; try { final CommandLine commandLine = new PosixParser().parse(OPTIONS, args, false); @@ -153,6 +160,12 @@ else if (commandLine.hasOption("c")){ covariates2 = new String[0]; } + if (commandLine.hasOption("ct")){ + covariatesToTest = commandLine.getOptionValues("ct"); + } else { + covariatesToTest = null; + } + if (commandLine.hasOption("sw")){ snpsToSwapFile = new File(commandLine.getOptionValue("sw")); } else { @@ -180,7 +193,7 @@ else if (chi2sumDiff){ interactor.findChi2SumDifferences(maxNumCovariatesToRegress); } else { - new TestEQTLDatasetForInteractions(inputDir, outputDir, eqtlFile, maxNumCovariatesToRegress, annotationFile, covariates, covariates2, snpsToSwapFile, permute); + new TestEQTLDatasetForInteractions(inputDir, outputDir, eqtlFile, maxNumCovariatesToRegress, annotationFile, covariates, covariates2, snpsToSwapFile, permute, covariatesToTest); } } diff --git a/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/TestEQTLDatasetForInteractions.java b/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/TestEQTLDatasetForInteractions.java index 667971e82..d5395c830 100644 --- a/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/TestEQTLDatasetForInteractions.java +++ b/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/TestEQTLDatasetForInteractions.java @@ -59,7 +59,7 @@ public TestEQTLDatasetForInteractions(String inputDir, String outputDir) throws //preprocessData(); } - public TestEQTLDatasetForInteractions(String inputDir, String outputDir, String eQTLfileName, int maxNumTopCovs, String annotationFile, String[] covariatesToCorrect, String[] covariatesToCorrect2, File snpsToSwapFile, boolean permute) throws IOException, Exception { + public TestEQTLDatasetForInteractions(String inputDir, String outputDir, String eQTLfileName, int maxNumTopCovs, String annotationFile, String[] covariatesToCorrect, String[] covariatesToCorrect2, File snpsToSwapFile, boolean permute, String[] covariatesToTest) throws IOException, Exception { System.out.println("Input dir: " + inputDir); System.out.println("Output dir: " + outputDir); @@ -103,7 +103,7 @@ public TestEQTLDatasetForInteractions(String inputDir, String outputDir, String String[] covsToCorrect = primaryCovsToCorrect; int cnt = 0; while (cnt < maxNumTopCovs) { - String topCov = performInteractionAnalysis(covsToCorrect, covariatesToCorrect2, eqtlGenes, outputTopCovs, snpsToSwapFile, permute, qtlProbeSnpMultiMap); + String topCov = performInteractionAnalysis(covsToCorrect, covariatesToCorrect2, eqtlGenes, outputTopCovs, snpsToSwapFile, permute, qtlProbeSnpMultiMap, covariatesToTest); String[] covsToCorrectNew = new String[covsToCorrect.length + 1]; for (int c = 0; c < covsToCorrect.length; c++) { covsToCorrectNew[c] = covsToCorrect[c]; @@ -259,14 +259,13 @@ public void preprocessData() { } - public final String performInteractionAnalysis(String[] covsToCorrect, String[] covsToCorrect2, HashMap hashEQTLs, TextFile outputTopCovs, File snpsToSwapFile, boolean permute, HashMultimap qtlProbeSnpMultiMap) throws IOException, Exception { + public final String performInteractionAnalysis(String[] covsToCorrect, String[] covsToCorrect2, HashMap hashEQTLs, TextFile outputTopCovs, File snpsToSwapFile, boolean permute, HashMultimap qtlProbeSnpMultiMap, String[] covariatesToTest) throws IOException, Exception { HashMap hashSamples = new HashMap(); hashSamples = excludeOutliers(hashSamples); //String[] covariatesToTest = new String[]{"ENSG00000116701"}; - String[] covariatesToTest = null; HashMap covariatesToLoad = new HashMap(); if(covariatesToTest != null){ From 3d80bc4690056520756654fe713ef915be921607 Mon Sep 17 00:00:00 2001 From: Dasha Zhernakova Date: Thu, 16 Jul 2015 11:21:38 +0300 Subject: [PATCH 107/143] skip covariates affected by eSNPs --- ...ormInteractionAnalysisPermutationTask.java | 82 ++++++++----- .../TestEQTLDatasetForInteractions.java | 111 ++++++++++++++++-- 2 files changed, 155 insertions(+), 38 deletions(-) diff --git a/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/PerformInteractionAnalysisPermutationTask.java b/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/PerformInteractionAnalysisPermutationTask.java index 46cd488d5..533a4faf6 100644 --- a/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/PerformInteractionAnalysisPermutationTask.java +++ b/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/PerformInteractionAnalysisPermutationTask.java @@ -8,6 +8,8 @@ import cern.jet.random.tdouble.engine.DoubleRandomEngine; import java.util.concurrent.Callable; import org.apache.commons.math3.linear.SingularMatrixException; +import org.apache.commons.math3.stat.correlation.PearsonsCorrelation; +import org.apache.commons.math3.stat.regression.SimpleRegression; /** * @@ -18,15 +20,17 @@ public class PerformInteractionAnalysisPermutationTask implements Callable corrPvalueThreshold) { // don't compute the interaction if the covariate expression is affected by theis SNP + try { - double[][] valsX = new double[nrSamples][3]; - for (int s = 0; s < nrSamples; s++) { - valsX[s][0] = datasetGenotypes.rawData[snp][s]; - valsX[s][1] = datasetCovariates.rawData[covToTest][s]; - valsX[s][2] = valsX[s][0] * valsX[s][1]; - } - double[] valsY = datasetExpression.rawData[snp]; - regression.newSampleData(valsY, valsX); - double betaInteraction = regression.estimateRegressionParameters()[3]; - double seInteraction = regression.estimateRegressionParametersStandardErrors()[3]; - double tInteraction = betaInteraction / seInteraction; - double pValueInteraction = 1; - double zScoreInteraction = 0; - if (tInteraction < 0) { - pValueInteraction = tDistColt.cdf(tInteraction); - if (pValueInteraction < 2.0E-323) { - pValueInteraction = 2.0E-323; + double[][] valsX = new double[nrSamples][3]; + for (int s = 0; s < nrSamples; s++) { + valsX[s][0] = datasetGenotypes.rawData[snp][s]; + valsX[s][1] = datasetCovariates.rawData[covToTest][s]; + valsX[s][2] = valsX[s][0] * valsX[s][1]; } - zScoreInteraction = cern.jet.stat.tdouble.Probability.normalInverse(pValueInteraction); - } else { - pValueInteraction = tDistColt.cdf(-tInteraction); - if (pValueInteraction < 2.0E-323) { - pValueInteraction = 2.0E-323; + double[] valsY = datasetExpression.rawData[snp]; + regression.newSampleData(valsY, valsX); + double betaInteraction = regression.estimateRegressionParameters()[3]; + double seInteraction = regression.estimateRegressionParametersStandardErrors()[3]; + double tInteraction = betaInteraction / seInteraction; + double pValueInteraction = 1; + double zScoreInteraction = 0; + if (tInteraction < 0) { + pValueInteraction = tDistColt.cdf(tInteraction); + if (pValueInteraction < 2.0E-323) { + pValueInteraction = 2.0E-323; + } + zScoreInteraction = cern.jet.stat.tdouble.Probability.normalInverse(pValueInteraction); + } else { + pValueInteraction = tDistColt.cdf(-tInteraction); + if (pValueInteraction < 2.0E-323) { + pValueInteraction = 2.0E-323; + } + zScoreInteraction = -cern.jet.stat.tdouble.Probability.normalInverse(pValueInteraction); } - zScoreInteraction = -cern.jet.stat.tdouble.Probability.normalInverse(pValueInteraction); + zScores[snp] = zScoreInteraction; + } catch (SingularMatrixException e) { + zScores[snp] = Double.NaN; } - zScores[snp] = zScoreInteraction; - } catch (SingularMatrixException e){ + } + else{ + System.out.println("Removing covariate because of eQTL effect! " + datasetCovariatesPCAForceNormal.probeNames[covToTest] + " : " + datasetGenotypes.probeNames[snp]); zScores[snp] = Double.NaN; } + } return new DoubleArrayIntegerObject(zScores, covToTest); } + + private double correlateCovariateWithGenotype(int snp){ + SimpleRegression simpleRegression = new SimpleRegression(); + double[] expression = datasetCovariatesPCAForceNormal.rawData[covToTest]; + double[] genotypes = datasetGenotypes.rawData[snp]; + for (int s = 0; s < expression.length; s++) { + simpleRegression.addData(expression[s], genotypes[s]); + } + if (datasetGenotypes.probeNames[snp].equals(datasetCovariatesPCAForceNormal.probeNames[covToTest])){ + System.out.println("Same gene! " + datasetGenotypes.probeNames[snp] + "\t" + datasetCovariatesPCAForceNormal.probeNames[covToTest] + "\t" + simpleRegression.getSignificance() + "\t" + simpleRegression.getR()); + } + return simpleRegression.getSignificance(); + } } diff --git a/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/TestEQTLDatasetForInteractions.java b/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/TestEQTLDatasetForInteractions.java index c7ac458b8..749a0174c 100644 --- a/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/TestEQTLDatasetForInteractions.java +++ b/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/TestEQTLDatasetForInteractions.java @@ -252,6 +252,8 @@ public final String performInteractionAnalysis(String[] covsToCorrect, String[] correctDosageDirectionForQtl(snpsToSwapFile, datasetGenotypes, datasetExpression); + ExpressionDataset datasetCovariatesPCAForceNormal = correctCovariateDataPCA(covsToCorrect2,covsToCorrect,datasetGenotypes,datasetCovariates); + if (1 == 1) { correctCovariateData(covsToCorrect2, covsToCorrect, datasetGenotypes, datasetCovariates); @@ -282,7 +284,6 @@ public final String performInteractionAnalysis(String[] covsToCorrect, String[] forceNormalExpressionData(datasetExpression); - if (permute) { System.out.println("WARNING: PERMUTING GENOTYPE DATA!!!!"); String[] cohorts = {"LLDeep", "LLS", "RS", "CODAM"}; @@ -325,14 +326,13 @@ public final String performInteractionAnalysis(String[] covsToCorrect, String[] datasetZScores.recalculateHashMaps(); - java.util.concurrent.ExecutorService threadPool = Executors.newFixedThreadPool(Runtime.getRuntime().availableProcessors()); CompletionService pool = new ExecutorCompletionService(threadPool); int nrTasks = 0; for (int cov = 0; cov < datasetCovariates.nrProbes; cov++) { double stdev = JSci.maths.ArrayMath.standardDeviation(datasetCovariates.rawData[cov]); if (stdev > 0) { - PerformInteractionAnalysisPermutationTask task = new PerformInteractionAnalysisPermutationTask(datasetGenotypes, datasetExpression, datasetCovariates, cov); + PerformInteractionAnalysisPermutationTask task = new PerformInteractionAnalysisPermutationTask(datasetGenotypes, datasetExpression, datasetCovariates, datasetCovariatesPCAForceNormal, cov); pool.submit(task); nrTasks++; } @@ -342,7 +342,7 @@ public final String performInteractionAnalysis(String[] covsToCorrect, String[] double maxChi2 = 0; try { // If gene annotation provided, for chi2sum calculation use only genes that are 1mb apart - if (geneDistanceMap != null) { + //if (geneDistanceMap != null) { for (int task = 0; task < nrTasks; task++) { try { DoubleArrayIntegerObject result = pool.take().get(); @@ -350,13 +350,13 @@ public final String performInteractionAnalysis(String[] covsToCorrect, String[] double chi2Sum = 0; double[] covZ = datasetZScores.rawData[cov]; for (int snp = 0; snp < datasetGenotypes.nrProbes; snp++) { - if (genesFarAway(datasetZScores.sampleNames[snp], datasetZScores.probeNames[cov])) { + //if (genesFarAway(datasetZScores.sampleNames[snp], datasetZScores.probeNames[cov])) { double z = result.doubleArray[snp]; covZ[snp] = z; if (!Double.isNaN(z)) { chi2Sum += z * z; } - } + //} } if (chi2Sum > maxChi2) { maxChi2 = chi2Sum; @@ -370,7 +370,7 @@ public final String performInteractionAnalysis(String[] covsToCorrect, String[] Logger.getLogger(PerformInteractionAnalysisPermutationTask.class.getName()).log(Level.SEVERE, null, ex); } } - } //If gene annotation not provided, use all gene pairs + /*} //If gene annotation not provided, use all gene pairs else { for (int task = 0; task < nrTasks; task++) { try { @@ -397,7 +397,7 @@ public final String performInteractionAnalysis(String[] covsToCorrect, String[] Logger.getLogger(PerformInteractionAnalysisPermutationTask.class.getName()).log(Level.SEVERE, null, ex); } } - } + }*/ threadPool.shutdown(); } catch (Exception e) { e.printStackTrace(); @@ -446,7 +446,7 @@ private void createGeneDistanceMap(String annotFname) throws IOException { * @param gene2 * @return true if the genes are more than 1mb apart */ - private boolean genesFarAway(String gene1, String gene2) { + public boolean genesFarAway(String gene1, String gene2) { // if one of the covariates is a technical bias or a cell count etc if ((!gene1.startsWith("ENS")) || (!gene2.startsWith("ENS"))) { return true; @@ -812,6 +812,99 @@ private void correctCovariateData(String[] covsToCorrect2, String[] covsToCorrec } } + private ExpressionDataset correctCovariateDataPCA(String[] covsToCorrect2, String[] covsToCorrect, ExpressionDataset datasetGenotypes, ExpressionDataset datasetCovariates) throws Exception { + + int nrCompsToCorrectFor = 25; + + System.out.println("Preparing data for testing eQTL effects of SNPs on covariate data:"); + System.out.println("Correcting covariate data for cohort specific effects:"); + ExpressionDataset datasetCovariatesPCAForceNormal = new ExpressionDataset(inputDir + "/covariateTableLude.txt.Covariates.binary", "\t", null, datasetCovariates.hashSamples); + ExpressionDataset datasetCovariatesToCorrectFor = new ExpressionDataset(covsToCorrect2.length + covsToCorrect.length + nrCompsToCorrectFor, datasetGenotypes.nrSamples); + datasetCovariatesToCorrectFor.sampleNames = datasetGenotypes.sampleNames; + + // add covariates from the second list + for (int i = 0; i < covsToCorrect2.length; ++i) { + String cov = covsToCorrect2[i]; + Integer c = datasetCovariates.hashProbes.get(cov); + if (c == null) { + throw new Exception("Covariate not found: " + cov); + } + for (int s = 0; s < datasetGenotypes.nrSamples; s++) { + datasetCovariatesToCorrectFor.rawData[i][s] = datasetCovariates.rawData[c][s]; + } + } + + // add covariates from the first list + HashMap hashCovsToCorrect = new HashMap(); + int[] covsToCorrectIndex = new int[covsToCorrect.length]; + for (int c = 0; c < covsToCorrect.length; c++) { + hashCovsToCorrect.put(covsToCorrect[c], null); + covsToCorrectIndex[c] = ((Integer) datasetCovariates.hashProbes.get(covsToCorrect[c])).intValue(); + for (int s = 0; s < datasetGenotypes.nrSamples; s++) { + datasetCovariatesToCorrectFor.rawData[covsToCorrect2.length + c][s] = datasetCovariates.rawData[covsToCorrectIndex[c]][s]; + } + } + + // add PCs + if (nrCompsToCorrectFor > 0) { + for (int comp = 0; comp < nrCompsToCorrectFor; comp++) { + for (int s = 0; s < datasetGenotypes.nrSamples; s++) { + datasetCovariatesToCorrectFor.rawData[covsToCorrect2.length + covsToCorrect.length + comp][s] = datasetCovariates.rawData[datasetCovariates.nrProbes - 51 + comp][s]; + } + } + } + + datasetCovariatesToCorrectFor.transposeDataset(); + + datasetCovariatesToCorrectFor.save(inputDir + "/CovariatesToCorrectFor.txt"); + orthogonalizeDataset(inputDir + "/CovariatesToCorrectFor.txt"); + datasetCovariatesToCorrectFor = new ExpressionDataset(inputDir + "/CovariatesToCorrectFor.txt.PrincipalComponents.txt"); + datasetCovariatesToCorrectFor.transposeDataset(); + ExpressionDataset datasetCovariatesToCorrectForEigenvalues = new ExpressionDataset(inputDir + "/CovariatesToCorrectFor.txt.Eigenvalues.txt"); + + for (int p = 0; p < datasetCovariatesPCAForceNormal.nrProbes; p++) { + if (!hashCovsToCorrect.containsKey(datasetCovariatesPCAForceNormal.probeNames[p])) { + for (int cov = 0; cov < datasetCovariatesToCorrectFor.nrProbes; cov++) { + if (datasetCovariatesToCorrectForEigenvalues.rawData[cov][0] > 1E-5) { + double[] rc = getLinearRegressionCoefficients(datasetCovariatesToCorrectFor.rawData[cov], datasetCovariatesPCAForceNormal.rawData[p]); + for (int s = 0; s < datasetGenotypes.nrSamples; s++) { + datasetCovariatesPCAForceNormal.rawData[p][s] -= rc[0] * datasetCovariatesToCorrectFor.rawData[cov][s]; + } + } + } + /*double stdev = JSci.maths.ArrayMath.standardDeviation(datasetCovariates.rawData[p]); + double mean = JSci.maths.ArrayMath.mean(datasetCovariates.rawData[p]); + if (stdev < 1E-5) { + for (int s = 0; s < datasetGenotypes.nrSamples; s++) { + datasetCovariatesPCAForceNormal.rawData[p][s] = mean; + } + }*/ + } + } + + System.out.println("Enforcing normal distribution on covariates"); + + NaturalRanking ranker = new NaturalRanking(); + + for (int p = 0; p < datasetCovariatesPCAForceNormal.nrProbes; p++) { + //Rank order the expression values: + double[] values = new double[datasetCovariatesPCAForceNormal.nrSamples]; + for (int s = 0; s < datasetGenotypes.nrSamples; s++) { + values[s] = datasetCovariatesPCAForceNormal.rawData[p][s]; + } + double[] rankedValues = ranker.rank(values); + //Replace the original expression value with the standard distribution enforce: + for (int s = 0; s < datasetGenotypes.nrSamples; s++) { + //Convert the rank to a proportion, with range <0, 1> + double pValue = (0.5d + rankedValues[s] - 1d) / (double) (rankedValues.length); + //Convert the pValue to a Z-Score: + double zScore = cern.jet.stat.tdouble.Probability.normalInverse(pValue); + datasetCovariatesPCAForceNormal.rawData[p][s] = zScore; //Replace original expression value with the Z-Score + } + } + return datasetCovariatesPCAForceNormal; + } + private void correctExpressionData(String[] covsToCorrect2, ExpressionDataset datasetGenotypes, ExpressionDataset datasetCovariates, ExpressionDataset datasetExpression) throws Exception { //Define a set of covariates that we want to use as correction: System.out.println("Correcting gene expression data for cohort specific effects and top 25 components"); From b4d8caf882138c7832de65b169042158fd1736ac Mon Sep 17 00:00:00 2001 From: Patrick Deelen Date: Thu, 16 Jul 2015 12:11:03 +0200 Subject: [PATCH 108/143] Bugfix --- .../PerformInteractionAnalysisPermutationTask.java | 7 ++++--- .../TestEQTLDatasetForInteractions.java | 5 ++++- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/PerformInteractionAnalysisPermutationTask.java b/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/PerformInteractionAnalysisPermutationTask.java index 533a4faf6..7a861f016 100644 --- a/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/PerformInteractionAnalysisPermutationTask.java +++ b/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/PerformInteractionAnalysisPermutationTask.java @@ -97,9 +97,10 @@ private double correlateCovariateWithGenotype(int snp){ for (int s = 0; s < expression.length; s++) { simpleRegression.addData(expression[s], genotypes[s]); } - if (datasetGenotypes.probeNames[snp].equals(datasetCovariatesPCAForceNormal.probeNames[covToTest])){ - System.out.println("Same gene! " + datasetGenotypes.probeNames[snp] + "\t" + datasetCovariatesPCAForceNormal.probeNames[covToTest] + "\t" + simpleRegression.getSignificance() + "\t" + simpleRegression.getR()); - } + //This is not working now that we have the _rs next to the gene names +// if (datasetGenotypes.probeNames[snp].equals(datasetCovariatesPCAForceNormal.probeNames[covToTest])){ +// System.out.println("Same gene! " + datasetGenotypes.probeNames[snp] + "\t" + datasetCovariatesPCAForceNormal.probeNames[covToTest] + "\t" + simpleRegression.getSignificance() + "\t" + simpleRegression.getR()); +// } return simpleRegression.getSignificance(); } } diff --git a/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/TestEQTLDatasetForInteractions.java b/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/TestEQTLDatasetForInteractions.java index 26dd4c28f..a99a5852d 100644 --- a/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/TestEQTLDatasetForInteractions.java +++ b/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/TestEQTLDatasetForInteractions.java @@ -866,8 +866,11 @@ private void correctCovariateData(String[] covsToCorrect2, String[] covsToCorrec ExpressionDataset datasetCovariatesToCorrectFor = new ExpressionDataset(covsToCorrect2.length + covsToCorrect.length, datasetGenotypes.nrSamples); datasetCovariatesToCorrectFor.sampleNames = datasetGenotypes.sampleNames; + HashMap hashCovsToCorrect = new HashMap(); + for (int i = 0; i < covsToCorrect2.length; ++i) { String cov = covsToCorrect2[i]; + hashCovsToCorrect.put(cov, null); Integer c = datasetCovariates.hashProbes.get(cov); if (c == null) { throw new Exception("Covariate not found: " + cov); @@ -885,7 +888,7 @@ private void correctCovariateData(String[] covsToCorrect2, String[] covsToCorrec // } // } - HashMap hashCovsToCorrect = new HashMap(); + int[] covsToCorrectIndex = new int[covsToCorrect.length]; for (int c = 0; c < covsToCorrect.length; c++) { hashCovsToCorrect.put(covsToCorrect[c], null); From e2b037162abe5925a9153531e9b3daaeaee86885 Mon Sep 17 00:00:00 2001 From: Patrick Deelen Date: Thu, 16 Jul 2015 12:50:15 +0200 Subject: [PATCH 109/143] oeps --- .../eqtlinteractionanalyser/TestEQTLDatasetForInteractions.java | 2 ++ 1 file changed, 2 insertions(+) diff --git a/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/TestEQTLDatasetForInteractions.java b/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/TestEQTLDatasetForInteractions.java index a99a5852d..26f9a40d7 100644 --- a/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/TestEQTLDatasetForInteractions.java +++ b/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/TestEQTLDatasetForInteractions.java @@ -281,6 +281,8 @@ public final String performInteractionAnalysis(String[] covsToCorrect, String[] for(int i = 1 ; i <= 50 ; ++i){ covariatesToLoad.put("Comp" + i, null); } + } else { + covariatesToLoad = null; } ExpressionDataset datasetGenotypes = new ExpressionDataset(inputDir + "/bigTableLude.txt.Genotypes.binary", "\t", null, hashSamples); From d4ef7a7b54237c0a923bf4c63d74c40814cc42d6 Mon Sep 17 00:00:00 2001 From: Dasha Zhernakova Date: Thu, 16 Jul 2015 14:01:10 +0300 Subject: [PATCH 110/143] sample names fix --- .../EQTLInteractionAnalyser.java | 4 +- .../TestEQTLDatasetForInteractions.java | 82 ++++++++++--------- 2 files changed, 46 insertions(+), 40 deletions(-) diff --git a/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/EQTLInteractionAnalyser.java b/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/EQTLInteractionAnalyser.java index d6516f397..017b9b392 100644 --- a/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/EQTLInteractionAnalyser.java +++ b/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/EQTLInteractionAnalyser.java @@ -91,7 +91,7 @@ public class EQTLInteractionAnalyser { OptionBuilder.withArgName("strings"); OptionBuilder.hasArgs(); - OptionBuilder.withDescription("Covariates to to test in interaction analysis. Optinal, all are tested if not used"); + OptionBuilder.withDescription("Covariates to to test in interaction analysis. Optional, all are tested if not used"); OptionBuilder.withLongOpt("covTest"); OPTIONS.addOption(OptionBuilder.create("ct")); @@ -117,7 +117,7 @@ public static void main(String[] args) throws IOException, Exception { final File snpsToSwapFile; int maxNumCovariatesToRegress = 20; final boolean interpret, chi2sumDiff, permute, preproces; - + final String[] covariates; final String[] covariates2; final String[] covariatesToTest; diff --git a/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/TestEQTLDatasetForInteractions.java b/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/TestEQTLDatasetForInteractions.java index 26dd4c28f..e7cc5a1c6 100644 --- a/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/TestEQTLDatasetForInteractions.java +++ b/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/TestEQTLDatasetForInteractions.java @@ -27,6 +27,8 @@ import java.util.concurrent.Executors; import java.util.logging.Level; import java.util.logging.Logger; + +import org.apache.commons.math3.analysis.function.Exp; import org.apache.commons.math3.exception.MathIllegalArgumentException; import org.apache.commons.math3.stat.correlation.PearsonsCorrelation; import org.apache.commons.math3.stat.correlation.SpearmansCorrelation; @@ -265,7 +267,7 @@ public final String performInteractionAnalysis(String[] covsToCorrect, String[] hashSamples = excludeOutliers(hashSamples); - //String[] covariatesToTest = new String[]{"ENSG00000116701"}; + //covariatesToTest = new String[]{"ENSG00000116701"}; HashMap covariatesToLoad = new HashMap(); if(covariatesToTest != null){ @@ -286,7 +288,6 @@ public final String performInteractionAnalysis(String[] covsToCorrect, String[] ExpressionDataset datasetGenotypes = new ExpressionDataset(inputDir + "/bigTableLude.txt.Genotypes.binary", "\t", null, hashSamples); ExpressionDataset datasetExpression = new ExpressionDataset(inputDir + "/bigTableLude.txt.Expression.binary", "\t", null, hashSamples); ExpressionDataset datasetCovariates = new ExpressionDataset(inputDir + "/covariateTableLude.txt.Covariates.binary", "\t", covariatesToLoad, hashSamples); - org.apache.commons.math3.stat.regression.OLSMultipleLinearRegression regression = new org.apache.commons.math3.stat.regression.OLSMultipleLinearRegression(); int nrSamples = datasetGenotypes.nrSamples; @@ -295,7 +296,8 @@ public final String performInteractionAnalysis(String[] covsToCorrect, String[] correctDosageDirectionForQtl(snpsToSwapFile, datasetGenotypes, datasetExpression); - ExpressionDataset datasetCovariatesPCAForceNormal = correctCovariateDataPCA(covsToCorrect2,covsToCorrect,datasetGenotypes,datasetCovariates); + ExpressionDataset datasetCovariatesPCAForceNormal = new ExpressionDataset(inputDir + "/covariateTableLude.txt.Covariates.binary", "\t", covariatesToLoad, hashSamples); + correctCovariateDataPCA(covsToCorrect2,covsToCorrect,datasetGenotypes,datasetCovariatesPCAForceNormal); if (1 == 1) { @@ -326,34 +328,7 @@ public final String performInteractionAnalysis(String[] covsToCorrect, String[] forceNormalExpressionData(datasetExpression); if (permute) { - System.out.println("WARNING: PERMUTING GENOTYPE DATA!!!!"); - String[] cohorts = {"LLDeep", "LLS", "RS", "CODAM"}; - int[] permSampleIDs = new int[datasetGenotypes.nrSamples]; - for (int p = 0; p < cohorts.length; p++) { - Vector vecSamples = new Vector(); - for (int s = 0; s < datasetGenotypes.nrSamples; s++) { - if (datasetGenotypes.sampleNames[s].startsWith(cohorts[p])) { - vecSamples.add(s); - } - } - int nrSamplesThisCohort = vecSamples.size(); - for (int s = 0; s < datasetGenotypes.nrSamples; s++) { - if (datasetGenotypes.sampleNames[s].startsWith(cohorts[p])) { - int randomSample = ((Integer) vecSamples.remove((int) ((double) vecSamples.size() * Math.random()))).intValue(); - permSampleIDs[s] = randomSample; - } - } - } - ExpressionDataset datasetGenotypes2 = new ExpressionDataset(datasetGenotypes.nrProbes, datasetGenotypes.nrSamples); - datasetGenotypes2.probeNames = datasetGenotypes.probeNames; - datasetGenotypes2.sampleNames = datasetGenotypes.sampleNames; - datasetGenotypes2.recalculateHashMaps(); - for (int p = 0; p < datasetGenotypes2.nrProbes; p++) { - for (int s = 0; s < datasetGenotypes2.nrSamples; s++) { - datasetGenotypes2.rawData[p][s] = datasetGenotypes.rawData[p][permSampleIDs[s]]; - } - } - datasetGenotypes = datasetGenotypes2; + datasetGenotypes = permuteGenotypeData(datasetGenotypes); } @@ -924,25 +899,25 @@ private void correctCovariateData(String[] covsToCorrect2, String[] covsToCorrec } } - private ExpressionDataset correctCovariateDataPCA(String[] covsToCorrect2, String[] covsToCorrect, ExpressionDataset datasetGenotypes, ExpressionDataset datasetCovariates) throws Exception { + private ExpressionDataset correctCovariateDataPCA(String[] covsToCorrect2, String[] covsToCorrect, ExpressionDataset datasetGenotypes, ExpressionDataset datasetCovariatesPCAForceNormal) throws Exception { int nrCompsToCorrectFor = 25; System.out.println("Preparing data for testing eQTL effects of SNPs on covariate data:"); System.out.println("Correcting covariate data for cohort specific effects:"); - ExpressionDataset datasetCovariatesPCAForceNormal = new ExpressionDataset(inputDir + "/covariateTableLude.txt.Covariates.binary", "\t", null, datasetCovariates.hashSamples); + ExpressionDataset datasetCovariatesToCorrectFor = new ExpressionDataset(covsToCorrect2.length + covsToCorrect.length + nrCompsToCorrectFor, datasetGenotypes.nrSamples); datasetCovariatesToCorrectFor.sampleNames = datasetGenotypes.sampleNames; // add covariates from the second list for (int i = 0; i < covsToCorrect2.length; ++i) { String cov = covsToCorrect2[i]; - Integer c = datasetCovariates.hashProbes.get(cov); + Integer c = datasetCovariatesPCAForceNormal.hashProbes.get(cov); if (c == null) { throw new Exception("Covariate not found: " + cov); } for (int s = 0; s < datasetGenotypes.nrSamples; s++) { - datasetCovariatesToCorrectFor.rawData[i][s] = datasetCovariates.rawData[c][s]; + datasetCovariatesToCorrectFor.rawData[i][s] = datasetCovariatesPCAForceNormal.rawData[c][s]; } } @@ -951,9 +926,9 @@ private ExpressionDataset correctCovariateDataPCA(String[] covsToCorrect2, Strin int[] covsToCorrectIndex = new int[covsToCorrect.length]; for (int c = 0; c < covsToCorrect.length; c++) { hashCovsToCorrect.put(covsToCorrect[c], null); - covsToCorrectIndex[c] = ((Integer) datasetCovariates.hashProbes.get(covsToCorrect[c])).intValue(); + covsToCorrectIndex[c] = ((Integer) datasetCovariatesPCAForceNormal.hashProbes.get(covsToCorrect[c])).intValue(); for (int s = 0; s < datasetGenotypes.nrSamples; s++) { - datasetCovariatesToCorrectFor.rawData[covsToCorrect2.length + c][s] = datasetCovariates.rawData[covsToCorrectIndex[c]][s]; + datasetCovariatesToCorrectFor.rawData[covsToCorrect2.length + c][s] = datasetCovariatesPCAForceNormal.rawData[covsToCorrectIndex[c]][s]; } } @@ -961,7 +936,7 @@ private ExpressionDataset correctCovariateDataPCA(String[] covsToCorrect2, Strin if (nrCompsToCorrectFor > 0) { for (int comp = 0; comp < nrCompsToCorrectFor; comp++) { for (int s = 0; s < datasetGenotypes.nrSamples; s++) { - datasetCovariatesToCorrectFor.rawData[covsToCorrect2.length + covsToCorrect.length + comp][s] = datasetCovariates.rawData[datasetCovariates.nrProbes - 51 + comp][s]; + datasetCovariatesToCorrectFor.rawData[covsToCorrect2.length + covsToCorrect.length + comp][s] = datasetCovariatesPCAForceNormal.rawData[datasetCovariatesPCAForceNormal.nrProbes - 51 + comp][s]; } } } @@ -1228,4 +1203,35 @@ private void forceNormalExpressionData(ExpressionDataset datasetExpression) thro System.out.println("Expression data now force normal"); } + + private ExpressionDataset permuteGenotypeData(ExpressionDataset datasetGenotypes){ + System.out.println("WARNING: PERMUTING GENOTYPE DATA!!!!"); + String[] cohorts = {"LLDeep", "LLS", "RS", "CODAM"}; + int[] permSampleIDs = new int[datasetGenotypes.nrSamples]; + for (int p = 0; p < cohorts.length; p++) { + Vector vecSamples = new Vector(); + for (int s = 0; s < datasetGenotypes.nrSamples; s++) { + if (datasetGenotypes.sampleNames[s].startsWith(cohorts[p])) { + vecSamples.add(s); + } + } + int nrSamplesThisCohort = vecSamples.size(); + for (int s = 0; s < datasetGenotypes.nrSamples; s++) { + if (datasetGenotypes.sampleNames[s].startsWith(cohorts[p])) { + int randomSample = ((Integer) vecSamples.remove((int) ((double) vecSamples.size() * Math.random()))).intValue(); + permSampleIDs[s] = randomSample; + } + } + } + ExpressionDataset datasetGenotypes2 = new ExpressionDataset(datasetGenotypes.nrProbes, datasetGenotypes.nrSamples); + datasetGenotypes2.probeNames = datasetGenotypes.probeNames; + datasetGenotypes2.sampleNames = datasetGenotypes.sampleNames; + datasetGenotypes2.recalculateHashMaps(); + for (int p = 0; p < datasetGenotypes2.nrProbes; p++) { + for (int s = 0; s < datasetGenotypes2.nrSamples; s++) { + datasetGenotypes2.rawData[p][s] = datasetGenotypes.rawData[p][permSampleIDs[s]]; + } + } + return datasetGenotypes2; + } } \ No newline at end of file From 844393ff4235355e692193b33781799e41161d0e Mon Sep 17 00:00:00 2001 From: Patrick Deelen Date: Thu, 16 Jul 2015 13:21:16 +0200 Subject: [PATCH 111/143] Included samples --- .../EQTLInteractionAnalyser.java | 15 +- .../TestEQTLDatasetForInteractions.java | 235 +++++++++--------- 2 files changed, 137 insertions(+), 113 deletions(-) diff --git a/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/EQTLInteractionAnalyser.java b/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/EQTLInteractionAnalyser.java index d6516f397..065b010cc 100644 --- a/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/EQTLInteractionAnalyser.java +++ b/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/EQTLInteractionAnalyser.java @@ -106,6 +106,12 @@ public class EQTLInteractionAnalyser { OptionBuilder.withDescription("File containing the SNPs to swap"); OptionBuilder.withLongOpt("swap"); OPTIONS.addOption(OptionBuilder.create("sw")); + + OptionBuilder.withArgName("path"); + OptionBuilder.hasArg(); + OptionBuilder.withDescription("Included samples"); + OptionBuilder.withLongOpt("includedSamples"); + OPTIONS.addOption(OptionBuilder.create("is")); } public static void main(String[] args) throws IOException, Exception { @@ -121,6 +127,7 @@ public static void main(String[] args) throws IOException, Exception { final String[] covariates; final String[] covariates2; final String[] covariatesToTest; + final File samplesToInculudeFile; try { final CommandLine commandLine = new PosixParser().parse(OPTIONS, args, false); @@ -171,6 +178,12 @@ else if (commandLine.hasOption("c")){ } else { snpsToSwapFile = null; } + + if (commandLine.hasOption("is")){ + samplesToInculudeFile = new File(commandLine.getOptionValue("is")); + } else { + samplesToInculudeFile = null; + } } catch (ParseException ex) { System.err.println("Invalid command line arguments: "); @@ -193,7 +206,7 @@ else if (chi2sumDiff){ interactor.findChi2SumDifferences(maxNumCovariatesToRegress); } else { - new TestEQTLDatasetForInteractions(inputDir, outputDir, eqtlFile, maxNumCovariatesToRegress, annotationFile, covariates, covariates2, snpsToSwapFile, permute, covariatesToTest); + new TestEQTLDatasetForInteractions(inputDir, outputDir, eqtlFile, maxNumCovariatesToRegress, annotationFile, covariates, covariates2, snpsToSwapFile, permute, covariatesToTest, samplesToInculudeFile); } } diff --git a/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/TestEQTLDatasetForInteractions.java b/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/TestEQTLDatasetForInteractions.java index 26f9a40d7..a9303142b 100644 --- a/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/TestEQTLDatasetForInteractions.java +++ b/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/TestEQTLDatasetForInteractions.java @@ -45,7 +45,6 @@ */ public class TestEQTLDatasetForInteractions { - String inputDir = null; String outputDir = null; HashMap> geneDistanceMap = null; @@ -59,7 +58,7 @@ public TestEQTLDatasetForInteractions(String inputDir, String outputDir) throws //preprocessData(); } - public TestEQTLDatasetForInteractions(String inputDir, String outputDir, String eQTLfileName, int maxNumTopCovs, String annotationFile, String[] covariatesToCorrect, String[] covariatesToCorrect2, File snpsToSwapFile, boolean permute, String[] covariatesToTest) throws IOException, Exception { + public TestEQTLDatasetForInteractions(String inputDir, String outputDir, String eQTLfileName, int maxNumTopCovs, String annotationFile, String[] covariatesToCorrect, String[] covariatesToCorrect2, File snpsToSwapFile, boolean permute, String[] covariatesToTest, File samplesToInculudeFile) throws IOException, Exception { System.out.println("Input dir: " + inputDir); System.out.println("Output dir: " + outputDir); @@ -76,13 +75,13 @@ public TestEQTLDatasetForInteractions(String inputDir, String outputDir, String } HashMap eqtlGenes = getEqtls(eQTLfileName); - + HashMultimap qtlProbeSnpMultiMap = HashMultimap.create(); final QTLTextFile eQtlFileReader = new QTLTextFile(eQTLfileName, false); for (Iterator it = eQtlFileReader.getEQtlIterator(); it.hasNext();) { - EQTL qtl = it.next(); + EQTL qtl = it.next(); qtlProbeSnpMultiMap.put(qtl.getProbe(), qtl.getRsName()); - } + } if (annotationFile != null) { createGeneDistanceMap(annotationFile); @@ -98,12 +97,12 @@ public TestEQTLDatasetForInteractions(String inputDir, String outputDir, String } System.out.println(); - + String[] covsToCorrect = primaryCovsToCorrect; int cnt = 0; while (cnt < maxNumTopCovs) { - String topCov = performInteractionAnalysis(covsToCorrect, covariatesToCorrect2, eqtlGenes, outputTopCovs, snpsToSwapFile, permute, qtlProbeSnpMultiMap, covariatesToTest); + String topCov = performInteractionAnalysis(covsToCorrect, covariatesToCorrect2, eqtlGenes, outputTopCovs, snpsToSwapFile, permute, qtlProbeSnpMultiMap, covariatesToTest, samplesToInculudeFile); String[] covsToCorrectNew = new String[covsToCorrect.length + 1]; for (int c = 0; c < covsToCorrect.length; c++) { covsToCorrectNew[c] = covsToCorrect[c]; @@ -210,7 +209,7 @@ public void preprocessData() { HashMap hashEQTLs = new HashMap(); ArrayList snps = new ArrayList(); int countExcludedLines = 0; - + try { java.io.BufferedReader in = new java.io.BufferedReader(new java.io.FileReader(new File(inputDir + "/bigTableLude.txt"))); String str = in.readLine(); @@ -244,11 +243,11 @@ public void preprocessData() { } System.out.println("EXCLUDED LINES: " + countExcludedLines); - + ExpressionDataset datasetGenotypes = new ExpressionDataset(inputDir + "/bigTableLude.txt", "\t", hashEQTLs, hashGenotypes); datasetGenotypes.probeNames = snps.toArray(new String[snps.size()]); datasetGenotypes.recalculateHashMaps(); - + ExpressionDataset datasetExpression = new ExpressionDataset(inputDir + "/bigTableLude.txt", "\t", hashEQTLs, hashExpression); datasetGenotypes.save(datasetGenotypes.fileName + ".Genotypes.binary"); datasetExpression.save(datasetGenotypes.fileName + ".Expression.binary"); @@ -259,26 +258,38 @@ public void preprocessData() { } - public final String performInteractionAnalysis(String[] covsToCorrect, String[] covsToCorrect2, HashMap hashEQTLs, TextFile outputTopCovs, File snpsToSwapFile, boolean permute, HashMultimap qtlProbeSnpMultiMap, String[] covariatesToTest) throws IOException, Exception { + public final String performInteractionAnalysis(String[] covsToCorrect, String[] covsToCorrect2, HashMap hashEQTLs, TextFile outputTopCovs, File snpsToSwapFile, boolean permute, HashMultimap qtlProbeSnpMultiMap, String[] covariatesToTest, File samplesToInculudeFile) throws IOException, Exception { - HashMap hashSamples = new HashMap(); + HashMap hashSamples; + if (samplesToInculudeFile != null) { + hashSamples = new HashMap(); + BufferedReader reader = new BufferedReader(new InputStreamReader(new FileInputStream(samplesToInculudeFile), "UTF-8")); + String line; + while ((line = reader.readLine()) != null) { + hashSamples.put(line, null); + hashSamples.put(line + "_exp", null); + hashSamples.put(line + "_dosage", null); + } + } else{ + hashSamples = null; + } - hashSamples = excludeOutliers(hashSamples); + //hashSamples = excludeOutliers(hashSamples); //String[] covariatesToTest = new String[]{"ENSG00000116701"}; - + HashMap covariatesToLoad = new HashMap(); - if(covariatesToTest != null){ - for(String c : covariatesToTest){ + if (covariatesToTest != null) { + for (String c : covariatesToTest) { covariatesToLoad.put(c, null); } - for(String c : covsToCorrect){ + for (String c : covsToCorrect) { covariatesToLoad.put(c, null); } - for(String c : covsToCorrect2){ + for (String c : covsToCorrect2) { covariatesToLoad.put(c, null); } - for(int i = 1 ; i <= 50 ; ++i){ + for (int i = 1; i <= 50; ++i) { covariatesToLoad.put("Comp" + i, null); } } else { @@ -294,10 +305,10 @@ public final String performInteractionAnalysis(String[] covsToCorrect, String[] correctExpressionData(covsToCorrect2, datasetGenotypes, datasetCovariates, datasetExpression); - + correctDosageDirectionForQtl(snpsToSwapFile, datasetGenotypes, datasetExpression); - ExpressionDataset datasetCovariatesPCAForceNormal = correctCovariateDataPCA(covsToCorrect2,covsToCorrect,datasetGenotypes,datasetCovariates); + ExpressionDataset datasetCovariatesPCAForceNormal = correctCovariateDataPCA(covsToCorrect2, covsToCorrect, datasetGenotypes, datasetCovariates); if (1 == 1) { @@ -386,62 +397,62 @@ public final String performInteractionAnalysis(String[] covsToCorrect, String[] try { // If gene annotation provided, for chi2sum calculation use only genes that are 1mb apart //if (geneDistanceMap != null) { - for (int task = 0; task < nrTasks; task++) { - try { - //System.out.println("Waiting on thread for: " + datasetCovariates.probeNames[cov]); - DoubleArrayIntegerObject result = pool.take().get(); - int cov = result.intValue; - double chi2Sum = 0; - double[] covZ = datasetZScores.rawData[cov]; - for (int snp = 0; snp < datasetGenotypes.nrProbes; snp++) { - //if (genesFarAway(datasetZScores.sampleNames[snp], datasetZScores.probeNames[cov])) { - double z = result.doubleArray[snp]; - covZ[snp] = z; - if (!Double.isNaN(z)) { - chi2Sum += z * z; - } - //} - } - if (chi2Sum > maxChi2) { - maxChi2 = chi2Sum; - maxChi2Cov = datasetCovariates.probeNames[cov]; + for (int task = 0; task < nrTasks; task++) { + try { + //System.out.println("Waiting on thread for: " + datasetCovariates.probeNames[cov]); + DoubleArrayIntegerObject result = pool.take().get(); + int cov = result.intValue; + double chi2Sum = 0; + double[] covZ = datasetZScores.rawData[cov]; + for (int snp = 0; snp < datasetGenotypes.nrProbes; snp++) { + //if (genesFarAway(datasetZScores.sampleNames[snp], datasetZScores.probeNames[cov])) { + double z = result.doubleArray[snp]; + covZ[snp] = z; + if (!Double.isNaN(z)) { + chi2Sum += z * z; } - //System.out.println(covsToCorrect.length + "\t" + cov + "\t" + datasetCovariates.probeNames[cov] + "\t" + chi2Sum); - if ((task + 1) % 512 == 0) { - System.out.println(task + 1 + " tasks processed"); - } - } catch (ExecutionException ex) { - Logger.getLogger(PerformInteractionAnalysisPermutationTask.class.getName()).log(Level.SEVERE, null, ex); + //} } - } - /*} //If gene annotation not provided, use all gene pairs - else { - for (int task = 0; task < nrTasks; task++) { - try { - DoubleArrayIntegerObject result = pool.take().get(); - int cov = result.intValue; - double chi2Sum = 0; - double[] covZ = datasetZScores.rawData[cov]; - for (int snp = 0; snp < datasetGenotypes.nrProbes; snp++) { - double z = result.doubleArray[snp]; - covZ[snp] = z; - if (!Double.isNaN(z)) { - chi2Sum += z * z; - } - } - if (chi2Sum > maxChi2) { - maxChi2 = chi2Sum; - maxChi2Cov = datasetCovariates.probeNames[cov]; - } - //System.out.println(covsToCorrect.length + "\t" + cov + "\t" + datasetCovariates.probeNames[cov] + "\t" + chi2Sum); - if ((task + 1) % 512 == 0) { - System.out.println(task + 1 + " tasks processed"); - } - } catch (ExecutionException ex) { - Logger.getLogger(PerformInteractionAnalysisPermutationTask.class.getName()).log(Level.SEVERE, null, ex); + if (chi2Sum > maxChi2) { + maxChi2 = chi2Sum; + maxChi2Cov = datasetCovariates.probeNames[cov]; + } + //System.out.println(covsToCorrect.length + "\t" + cov + "\t" + datasetCovariates.probeNames[cov] + "\t" + chi2Sum); + if ((task + 1) % 512 == 0) { + System.out.println(task + 1 + " tasks processed"); } + } catch (ExecutionException ex) { + Logger.getLogger(PerformInteractionAnalysisPermutationTask.class.getName()).log(Level.SEVERE, null, ex); } - }*/ + } + /*} //If gene annotation not provided, use all gene pairs + else { + for (int task = 0; task < nrTasks; task++) { + try { + DoubleArrayIntegerObject result = pool.take().get(); + int cov = result.intValue; + double chi2Sum = 0; + double[] covZ = datasetZScores.rawData[cov]; + for (int snp = 0; snp < datasetGenotypes.nrProbes; snp++) { + double z = result.doubleArray[snp]; + covZ[snp] = z; + if (!Double.isNaN(z)) { + chi2Sum += z * z; + } + } + if (chi2Sum > maxChi2) { + maxChi2 = chi2Sum; + maxChi2Cov = datasetCovariates.probeNames[cov]; + } + //System.out.println(covsToCorrect.length + "\t" + cov + "\t" + datasetCovariates.probeNames[cov] + "\t" + chi2Sum); + if ((task + 1) % 512 == 0) { + System.out.println(task + 1 + " tasks processed"); + } + } catch (ExecutionException ex) { + Logger.getLogger(PerformInteractionAnalysisPermutationTask.class.getName()).log(Level.SEVERE, null, ex); + } + } + }*/ threadPool.shutdown(); } catch (Exception e) { e.printStackTrace(); @@ -732,76 +743,76 @@ static public double[] getEigenVectorSVD(Jama.SingularValueDecomposition svd, do private void correctCovariatesForQtls(ExpressionDataset datasetCovariates, ExpressionDataset datasetGenotypes, HashMultimap qtlProbeSnpMultiMap) throws Exception { System.out.println("Correcting covariate data for cis-eQTL effects:"); - + OLSMultipleLinearRegression ols = new OLSMultipleLinearRegression(); - + HashMap snpMap = new HashMap(datasetGenotypes.nrProbes); - for(Map.Entry snpEntry : datasetGenotypes.hashProbes.entrySet()){ - try{ + for (Map.Entry snpEntry : datasetGenotypes.hashProbes.entrySet()) { + try { snpMap.put(snpEntry.getKey().substring(0, snpEntry.getKey().indexOf('_')), snpEntry.getValue()); - } catch(Exception e){ + } catch (Exception e) { System.out.println(snpEntry.getKey()); throw e; } } - + for (int p = 0; p < datasetCovariates.nrProbes; p++) { - + String probe = datasetCovariates.probeNames[p]; Set probeQtls = qtlProbeSnpMultiMap.get(probe); - + // System.out.println(""); // System.out.println("-------------------------------------"); // System.out.println(""); // System.out.println(probe + " with " + probeQtls.size() + " SNPs"); // System.out.println(""); - - if(!probeQtls.isEmpty()){ - + + if (!probeQtls.isEmpty()) { + double[][] x = new double[datasetCovariates.nrSamples][probeQtls.size()]; - + int k = 0; - for(String snp : probeQtls){ - + for (String snp : probeQtls) { + Integer s = snpMap.get(snp); - if(s == null){ + if (s == null) { throw new Exception("Snp " + snp + " not found"); } double[] snpData = datasetGenotypes.rawData[s]; - for(int i = 0 ; i < datasetGenotypes.nrSamples ; ++i){ + for (int i = 0; i < datasetGenotypes.nrSamples; ++i) { x[i][k] = snpData[i]; } - + k++; - } - - - + } + + + // PearsonsCorrelation cor = new PearsonsCorrelation(); - + // System.out.println("Before"); // for(String snp : probeQtls){ // Integer s = snpMap.get(snp); // System.out.println(snp + " - " + cor.correlation(datasetCovariates.rawData[p], datasetGenotypes.rawData[s])); // } - + ols.newSampleData(datasetCovariates.rawData[p], x); datasetCovariates.rawData[p] = ols.estimateResiduals(); - + // System.out.println("After"); // for(String snp : probeQtls){ // Integer s = snpMap.get(snp); // System.out.println(snp + " - " + cor.correlation(datasetCovariates.rawData[p], datasetGenotypes.rawData[s])); // } - + } - + } - - - - + + + + // for (int p = 0; p < datasetCovariates.nrProbes; p++) { // if (datasetExpression.hashProbes.containsKey(datasetCovariates.probeNames[p])) { // int index = ((Integer) datasetExpression.hashProbes.get(datasetCovariates.probeNames[p])).intValue(); @@ -862,14 +873,14 @@ private HashMap excludeOutliers(HashMap hashSamples) { } private void correctCovariateData(String[] covsToCorrect2, String[] covsToCorrect, ExpressionDataset datasetGenotypes, ExpressionDataset datasetCovariates) throws Exception { - + System.out.println("Correcting covariate data for cohort specific effects:"); // String[] cohorts = {"LLDeep","LLS","RS","CODAM"}; ExpressionDataset datasetCovariatesToCorrectFor = new ExpressionDataset(covsToCorrect2.length + covsToCorrect.length, datasetGenotypes.nrSamples); datasetCovariatesToCorrectFor.sampleNames = datasetGenotypes.sampleNames; HashMap hashCovsToCorrect = new HashMap(); - + for (int i = 0; i < covsToCorrect2.length; ++i) { String cov = covsToCorrect2[i]; hashCovsToCorrect.put(cov, null); @@ -890,7 +901,7 @@ private void correctCovariateData(String[] covsToCorrect2, String[] covsToCorrec // } // } - + int[] covsToCorrectIndex = new int[covsToCorrect.length]; for (int c = 0; c < covsToCorrect.length; c++) { hashCovsToCorrect.put(covsToCorrect[c], null); @@ -990,12 +1001,12 @@ private ExpressionDataset correctCovariateDataPCA(String[] covsToCorrect2, Strin } } /*double stdev = JSci.maths.ArrayMath.standardDeviation(datasetCovariates.rawData[p]); - double mean = JSci.maths.ArrayMath.mean(datasetCovariates.rawData[p]); - if (stdev < 1E-5) { - for (int s = 0; s < datasetGenotypes.nrSamples; s++) { - datasetCovariatesPCAForceNormal.rawData[p][s] = mean; - } - }*/ + double mean = JSci.maths.ArrayMath.mean(datasetCovariates.rawData[p]); + if (stdev < 1E-5) { + for (int s = 0; s < datasetGenotypes.nrSamples; s++) { + datasetCovariatesPCAForceNormal.rawData[p][s] = mean; + } + }*/ } } From 378a90c0844d5e040adcc224b50d9d4ea640f413 Mon Sep 17 00:00:00 2001 From: Patrick Deelen Date: Thu, 16 Jul 2015 13:24:54 +0200 Subject: [PATCH 112/143] Default no covariates --- .../eqtlinteractionanalyser/EQTLInteractionAnalyser.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/EQTLInteractionAnalyser.java b/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/EQTLInteractionAnalyser.java index d6516f397..b50e711fc 100644 --- a/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/EQTLInteractionAnalyser.java +++ b/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/EQTLInteractionAnalyser.java @@ -151,7 +151,7 @@ public static void main(String[] args) throws IOException, Exception { else if (commandLine.hasOption("c")){ covariates = commandLine.getOptionValues("c"); } else { - covariates = new String[]{"gender", "GC", "MEDIAN_5PRIME_BIAS", "MEDIAN_3PRIME_BIAS"}; + covariates = new String[0]; } if (commandLine.hasOption("c2")){ From d8080a01aed9d2693d8b5094097e4f6e044dbe53 Mon Sep 17 00:00:00 2001 From: Patrick Deelen Date: Thu, 16 Jul 2015 13:25:14 +0200 Subject: [PATCH 113/143] tmp --- .../TestEQTLDatasetForInteractions.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/TestEQTLDatasetForInteractions.java b/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/TestEQTLDatasetForInteractions.java index d5395c830..d461f970e 100644 --- a/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/TestEQTLDatasetForInteractions.java +++ b/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/TestEQTLDatasetForInteractions.java @@ -428,11 +428,11 @@ public final String performInteractionAnalysis(String[] covsToCorrect, String[] chi2Sum += z * z; } } - if (chi2Sum > maxChi2) { + if (chi2Sum > maxChi2 && !datasetCovariates.probeNames[cov].startsWith("Comp")) { maxChi2 = chi2Sum; maxChi2Cov = datasetCovariates.probeNames[cov]; } - //System.out.println(covsToCorrect.length + "\t" + cov + "\t" + datasetCovariates.probeNames[cov] + "\t" + chi2Sum); + System.out.println(covsToCorrect.length + "\t" + cov + "\t" + datasetCovariates.probeNames[cov] + "\t" + chi2Sum); if ((task + 1) % 512 == 0) { System.out.println(task + 1 + " tasks processed"); } From 14b3cad888ab895bfae8264d545722efe3101a6a Mon Sep 17 00:00:00 2001 From: Patrick Deelen Date: Thu, 16 Jul 2015 13:34:19 +0200 Subject: [PATCH 114/143] Merge --- .../TestEQTLDatasetForInteractions.java | 32 ++----------------- 1 file changed, 2 insertions(+), 30 deletions(-) diff --git a/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/TestEQTLDatasetForInteractions.java b/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/TestEQTLDatasetForInteractions.java index 68972f56d..7ff4d483a 100644 --- a/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/TestEQTLDatasetForInteractions.java +++ b/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/TestEQTLDatasetForInteractions.java @@ -413,42 +413,14 @@ public final String performInteractionAnalysis(String[] covsToCorrect, String[] } //} } -<<<<<<< HEAD - } - } //If gene annotation not provided, use all gene pairs - else { - for (int task = 0; task < nrTasks; task++) { - try { - DoubleArrayIntegerObject result = pool.take().get(); - int cov = result.intValue; - double chi2Sum = 0; - double[] covZ = datasetZScores.rawData[cov]; - for (int snp = 0; snp < datasetGenotypes.nrProbes; snp++) { - double z = result.doubleArray[snp]; - covZ[snp] = z; - if (!Double.isNaN(z)) { - chi2Sum += z * z; - } - } - if (chi2Sum > maxChi2 && !datasetCovariates.probeNames[cov].startsWith("Comp")) { - maxChi2 = chi2Sum; - maxChi2Cov = datasetCovariates.probeNames[cov]; - } - System.out.println(covsToCorrect.length + "\t" + cov + "\t" + datasetCovariates.probeNames[cov] + "\t" + chi2Sum); - if ((task + 1) % 512 == 0) { - System.out.println(task + 1 + " tasks processed"); - } - } catch (ExecutionException ex) { - Logger.getLogger(PerformInteractionAnalysisPermutationTask.class.getName()).log(Level.SEVERE, null, ex); -======= + if (chi2Sum > maxChi2) { maxChi2 = chi2Sum; maxChi2Cov = datasetCovariates.probeNames[cov]; } - //System.out.println(covsToCorrect.length + "\t" + cov + "\t" + datasetCovariates.probeNames[cov] + "\t" + chi2Sum); + System.out.println(covsToCorrect.length + "\t" + cov + "\t" + datasetCovariates.probeNames[cov] + "\t" + chi2Sum); if ((task + 1) % 512 == 0) { System.out.println(task + 1 + " tasks processed"); ->>>>>>> FETCH_HEAD } } catch (ExecutionException ex) { Logger.getLogger(PerformInteractionAnalysisPermutationTask.class.getName()).log(Level.SEVERE, null, ex); From e4c06b9aaecb305ef293f07c353ab1dff9be5496 Mon Sep 17 00:00:00 2001 From: Patrick Deelen Date: Thu, 16 Jul 2015 13:37:49 +0200 Subject: [PATCH 115/143] tmp --- .../PerformInteractionAnalysisPermutationTask.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/PerformInteractionAnalysisPermutationTask.java b/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/PerformInteractionAnalysisPermutationTask.java index 7a861f016..a29394c2d 100644 --- a/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/PerformInteractionAnalysisPermutationTask.java +++ b/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/PerformInteractionAnalysisPermutationTask.java @@ -46,7 +46,8 @@ public DoubleArrayIntegerObject call() throws Exception { double[] zScores = new double[datasetGenotypes.nrProbes]; for (int snp = 0; snp < datasetGenotypes.nrProbes; snp++) { - double corrPvalue = correlateCovariateWithGenotype(snp); + //double corrPvalue = correlateCovariateWithGenotype(snp); + double corrPvalue = 0; if (corrPvalue > corrPvalueThreshold) { // don't compute the interaction if the covariate expression is affected by theis SNP try { From 40da2e55896421571436fde8880121455eb583d7 Mon Sep 17 00:00:00 2001 From: Patrick Deelen Date: Thu, 16 Jul 2015 14:23:42 +0200 Subject: [PATCH 116/143] interactions --- .../eqtlinteractionanalyser/TestEQTLDatasetForInteractions.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/TestEQTLDatasetForInteractions.java b/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/TestEQTLDatasetForInteractions.java index 7ff4d483a..4db4ccb53 100644 --- a/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/TestEQTLDatasetForInteractions.java +++ b/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/TestEQTLDatasetForInteractions.java @@ -414,7 +414,7 @@ public final String performInteractionAnalysis(String[] covsToCorrect, String[] //} } - if (chi2Sum > maxChi2) { + if (chi2Sum > maxChi2 && !datasetCovariates.probeNames[cov].startsWith("Comp") && !datasetCovariates.probeNames[cov].equals("LLS") && !datasetCovariates.probeNames[cov].equals("LLdeep") && !datasetCovariates.probeNames[cov].equals("RS") && !datasetCovariates.probeNames[cov].equals("CODAM")) { maxChi2 = chi2Sum; maxChi2Cov = datasetCovariates.probeNames[cov]; } From e83bff081658aba776299cf378900f0d460c3dd3 Mon Sep 17 00:00:00 2001 From: Patrick Deelen Date: Thu, 16 Jul 2015 14:24:10 +0200 Subject: [PATCH 117/143] tmp --- .../PerformInteractionAnalysisPermutationTask.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/PerformInteractionAnalysisPermutationTask.java b/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/PerformInteractionAnalysisPermutationTask.java index a29394c2d..fa9cd214f 100644 --- a/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/PerformInteractionAnalysisPermutationTask.java +++ b/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/PerformInteractionAnalysisPermutationTask.java @@ -47,7 +47,7 @@ public DoubleArrayIntegerObject call() throws Exception { for (int snp = 0; snp < datasetGenotypes.nrProbes; snp++) { //double corrPvalue = correlateCovariateWithGenotype(snp); - double corrPvalue = 0; + double corrPvalue = 1; if (corrPvalue > corrPvalueThreshold) { // don't compute the interaction if the covariate expression is affected by theis SNP try { From 4b78c6f32bdeabfef2ac70aa359f4307a7527360 Mon Sep 17 00:00:00 2001 From: Patrick Deelen Date: Thu, 16 Jul 2015 17:00:52 +0200 Subject: [PATCH 118/143] Interactions --- ...ormInteractionAnalysisPermutationTask.java | 18 +++-- .../SkippedInteractionTracker.java | 40 ++++++++++++ .../SkippedInteractionWriter.java | 65 +++++++++++++++++++ .../TestEQTLDatasetForInteractions.java | 10 ++- 4 files changed, 125 insertions(+), 8 deletions(-) create mode 100644 eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/SkippedInteractionTracker.java create mode 100644 eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/SkippedInteractionWriter.java diff --git a/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/PerformInteractionAnalysisPermutationTask.java b/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/PerformInteractionAnalysisPermutationTask.java index fa9cd214f..1a1de9ba3 100644 --- a/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/PerformInteractionAnalysisPermutationTask.java +++ b/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/PerformInteractionAnalysisPermutationTask.java @@ -25,14 +25,18 @@ public class PerformInteractionAnalysisPermutationTask implements Callable corrPvalueThreshold) { // don't compute the interaction if the covariate expression is affected by theis SNP try { @@ -79,15 +82,18 @@ public DoubleArrayIntegerObject call() throws Exception { } zScores[snp] = zScoreInteraction; } catch (SingularMatrixException e) { - zScores[snp] = Double.NaN; + zScores[snp] = 0; + skippedTracker.addSkipped(SkippedInteractionTracker.Reason.SINGULAR, datasetGenotypes.probeNames[snp]); } } else{ - System.out.println("Removing covariate because of eQTL effect! " + datasetCovariatesPCAForceNormal.probeNames[covToTest] + " : " + datasetGenotypes.probeNames[snp]); - zScores[snp] = Double.NaN; + //System.out.println("Removing covariate because of eQTL effect! " + datasetCovariatesPCAForceNormal.probeNames[covToTest] + " : " + datasetGenotypes.probeNames[snp]); + skippedTracker.addSkipped(SkippedInteractionTracker.Reason.SHARED_QTL, datasetGenotypes.probeNames[snp]); + zScores[snp] = 0; } } + skippedWriter.add(skippedTracker); return new DoubleArrayIntegerObject(zScores, covToTest); } diff --git a/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/SkippedInteractionTracker.java b/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/SkippedInteractionTracker.java new file mode 100644 index 000000000..6c55604df --- /dev/null +++ b/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/SkippedInteractionTracker.java @@ -0,0 +1,40 @@ +package nl.systemsgenetics.eqtlinteractionanalyser.eqtlinteractionanalyser; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.EnumMap; + +/** + * + * @author Patrick Deelen + */ +public class SkippedInteractionTracker { + + public static enum Reason { + SINGULAR, SHARED_QTL + } + + private final String covariate; + EnumMap> skipped; + + public SkippedInteractionTracker(String covariate) { + this.covariate = covariate; + skipped = new EnumMap>(Reason.class); + for(Reason r : Reason.values()){ + skipped.put(r, new ArrayList()); + } + } + + public void addSkipped(Reason r, String qtl){ + skipped.get(r).add(qtl); + } + + public String getCovariate() { + return covariate; + } + + public EnumMap> getSkipped() { + return skipped; + } + +} diff --git a/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/SkippedInteractionWriter.java b/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/SkippedInteractionWriter.java new file mode 100644 index 000000000..abaa52563 --- /dev/null +++ b/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/SkippedInteractionWriter.java @@ -0,0 +1,65 @@ +package nl.systemsgenetics.eqtlinteractionanalyser.eqtlinteractionanalyser; + +import au.com.bytecode.opencsv.CSVWriter; +import java.io.File; +import java.io.FileWriter; +import java.io.IOException; +import java.util.ArrayList; + +/** + * + * @author Patrick Deelen + */ +public class SkippedInteractionWriter { + + private final CSVWriter writer; + private final String[] row = new String[5]; + private int c; + private StringBuilder tmp; + + public SkippedInteractionWriter(File skippedInteractionsFile) throws IOException { + writer = new CSVWriter(new FileWriter(skippedInteractionsFile), '\t', CSVWriter.NO_QUOTE_CHARACTER); + + c = 0; + row[c++] = "Covariate"; + row[c++] = "CountSingular"; + row[c++] = "CountSharedQtl"; + row[c++] = "SingularQtls"; + row[c++] = "SharedQtls"; + + writer.writeNext(row); + } + + public void close() throws IOException{ + writer.close(); + } + + synchronized void add(SkippedInteractionTracker skipped){ + + ArrayList singular = skipped.getSkipped().get(SkippedInteractionTracker.Reason.SINGULAR); + ArrayList sharedQtl = skipped.getSkipped().get(SkippedInteractionTracker.Reason.SHARED_QTL); + + c = 0; + row[c++] = skipped.getCovariate(); + row[c++] = String.valueOf(singular.size()); + row[c++] = String.valueOf(sharedQtl.size()); + + tmp = new StringBuilder(); + for(String qtl : singular){ + tmp.append(qtl); + tmp.append(';'); + } + row[c++] = tmp.toString(); + + tmp = new StringBuilder(); + for(String qtl : sharedQtl){ + tmp.append(qtl); + tmp.append(';'); + } + row[c++] = tmp.toString(); + + writer.writeNext(row); + + } + +} diff --git a/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/TestEQTLDatasetForInteractions.java b/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/TestEQTLDatasetForInteractions.java index 32f0a536d..4595b3db6 100644 --- a/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/TestEQTLDatasetForInteractions.java +++ b/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/TestEQTLDatasetForInteractions.java @@ -351,9 +351,15 @@ public final String performInteractionAnalysis(String[] covsToCorrect, String[] ExpressionDataset datasetZScores = new ExpressionDataset(datasetCovariates.nrProbes, datasetExpression.nrProbes); datasetZScores.probeNames = datasetCovariates.probeNames; - datasetZScores.sampleNames = datasetGenotypes.probeNames; + + datasetZScores.sampleNames = new String[datasetGenotypes.probeNames.length]; + for(int i = 0 ; i < datasetGenotypes.probeNames.length ; ++i){ + datasetZScores.sampleNames[i] = datasetGenotypes.probeNames[i] + datasetExpression.probeNames[i].substring(datasetExpression.probeNames[i].lastIndexOf('_')); + } + datasetZScores.recalculateHashMaps(); + SkippedInteractionWriter skippedWriter = new SkippedInteractionWriter(null); java.util.concurrent.ExecutorService threadPool = Executors.newFixedThreadPool(Runtime.getRuntime().availableProcessors()); CompletionService pool = new ExecutorCompletionService(threadPool); @@ -361,7 +367,7 @@ public final String performInteractionAnalysis(String[] covsToCorrect, String[] for (int cov = 0; cov < datasetCovariates.nrProbes; cov++) { double stdev = JSci.maths.ArrayMath.standardDeviation(datasetCovariates.rawData[cov]); if (stdev > 0) { - PerformInteractionAnalysisPermutationTask task = new PerformInteractionAnalysisPermutationTask(datasetGenotypes, datasetExpression, datasetCovariates, datasetCovariatesPCAForceNormal, cov); + PerformInteractionAnalysisPermutationTask task = new PerformInteractionAnalysisPermutationTask(datasetGenotypes, datasetExpression, datasetCovariates, datasetCovariatesPCAForceNormal, cov, skippedWriter); pool.submit(task); nrTasks++; } From 410a88316296a87f830bac29bdda51b2b3eaa252 Mon Sep 17 00:00:00 2001 From: Patrick Deelen Date: Fri, 17 Jul 2015 09:36:25 +0200 Subject: [PATCH 119/143] Interactions --- .../TestEQTLDatasetForInteractions.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/TestEQTLDatasetForInteractions.java b/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/TestEQTLDatasetForInteractions.java index 4595b3db6..f55b9150b 100644 --- a/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/TestEQTLDatasetForInteractions.java +++ b/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/TestEQTLDatasetForInteractions.java @@ -359,7 +359,7 @@ public final String performInteractionAnalysis(String[] covsToCorrect, String[] datasetZScores.recalculateHashMaps(); - SkippedInteractionWriter skippedWriter = new SkippedInteractionWriter(null); + SkippedInteractionWriter skippedWriter = new SkippedInteractionWriter(new File(outputDir + "/skippedInteractionsRound_" + covsToCorrect.length + ".txt")); java.util.concurrent.ExecutorService threadPool = Executors.newFixedThreadPool(Runtime.getRuntime().availableProcessors()); CompletionService pool = new ExecutorCompletionService(threadPool); @@ -444,6 +444,7 @@ public final String performInteractionAnalysis(String[] covsToCorrect, String[] System.out.println("Top covariate:\t" + maxChi2 + "\t" + maxChi2Cov); outputTopCovs.writeln("Top covariate:\t" + maxChi2 + "\t" + maxChi2Cov); outputTopCovs.flush(); + skippedWriter.close(); datasetZScores.save(outputDir + "/InteractionZScoresMatrix-" + covsToCorrect.length + "Covariates.txt"); return maxChi2Cov; From 5bd08710084835955db6cc3443f4addd41970032 Mon Sep 17 00:00:00 2001 From: Patrick Deelen Date: Tue, 21 Jul 2015 10:05:15 +0200 Subject: [PATCH 120/143] Interactions --- .../EQTLInteractionAnalyser.java | 34 ++++- .../ExpressionDataset.java | 20 +-- .../GeneAnnotation.java | 43 ++++++ .../InteractionPlotter.java | 12 +- .../TestEQTLDatasetForInteractions.java | 132 ++++++++++++++---- 5 files changed, 199 insertions(+), 42 deletions(-) create mode 100644 eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/GeneAnnotation.java diff --git a/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/EQTLInteractionAnalyser.java b/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/EQTLInteractionAnalyser.java index 4ac7ac1a1..1cf74d774 100644 --- a/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/EQTLInteractionAnalyser.java +++ b/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/EQTLInteractionAnalyser.java @@ -73,6 +73,12 @@ public class EQTLInteractionAnalyser { OptionBuilder.withLongOpt("chi2sumDiff"); OPTIONS.addOption(OptionBuilder.create("dif")); + OptionBuilder.withArgName("int"); + OptionBuilder.hasArg(); + OptionBuilder.withDescription("Start round for chi2sumDiff option"); + OptionBuilder.withLongOpt("start"); + OPTIONS.addOption(OptionBuilder.create("s")); + OptionBuilder.withDescription("Preprocess the data"); OptionBuilder.withLongOpt("preprocess"); OPTIONS.addOption(OptionBuilder.create("p")); @@ -112,6 +118,13 @@ public class EQTLInteractionAnalyser { OptionBuilder.withDescription("Included samples"); OptionBuilder.withLongOpt("includedSamples"); OPTIONS.addOption(OptionBuilder.create("is")); + + OptionBuilder.withArgName("path"); + OptionBuilder.hasArg(); + OptionBuilder.withDescription("Gene annotation file"); + OptionBuilder.withLongOpt("geneAnnotation"); + OPTIONS.addOption(OptionBuilder.create("ga")); + } public static void main(String[] args) throws IOException, Exception { @@ -123,11 +136,13 @@ public static void main(String[] args) throws IOException, Exception { final File snpsToSwapFile; int maxNumCovariatesToRegress = 20; final boolean interpret, chi2sumDiff, permute, preproces; + final int startRoundCompareChi2; final String[] covariates; final String[] covariates2; final String[] covariatesToTest; final File samplesToInculudeFile; + final File ensgAnnotationFile; try { final CommandLine commandLine = new PosixParser().parse(OPTIONS, args, false); @@ -140,11 +155,21 @@ public static void main(String[] args) throws IOException, Exception { if (commandLine.hasOption('n')) { maxNumCovariatesToRegress = Integer.parseInt(commandLine.getOptionValue("n")); } + + interpret = commandLine.hasOption("t"); chi2sumDiff = commandLine.hasOption("dif"); permute = commandLine.hasOption("perm"); preproces = commandLine.hasOption("p"); + + if (commandLine.hasOption('s')) { + startRoundCompareChi2 = Integer.parseInt(commandLine.getOptionValue("s")); + } else if(chi2sumDiff){ + throw new Exception("Set -s"); + } else { + startRoundCompareChi2 = 0; + } if (commandLine.hasOption('a')) { annotationFile = commandLine.getOptionValue("a"); @@ -184,6 +209,13 @@ else if (commandLine.hasOption("c")){ } else { samplesToInculudeFile = null; } + + + if (commandLine.hasOption("ga")){ + ensgAnnotationFile = new File(commandLine.getOptionValue("ga")); + } else { + ensgAnnotationFile = null; + } } catch (ParseException ex) { System.err.println("Invalid command line arguments: "); @@ -203,7 +235,7 @@ else if (commandLine.hasOption("c")){ } else if (chi2sumDiff){ TestEQTLDatasetForInteractions interactor = new TestEQTLDatasetForInteractions(inputDir, outputDir); - interactor.findChi2SumDifferences(maxNumCovariatesToRegress); + interactor.findChi2SumDifferences(maxNumCovariatesToRegress, startRoundCompareChi2, ensgAnnotationFile); } else { new TestEQTLDatasetForInteractions(inputDir, outputDir, eqtlFile, maxNumCovariatesToRegress, annotationFile, covariates, covariates2, snpsToSwapFile, permute, covariatesToTest, samplesToInculudeFile); diff --git a/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/ExpressionDataset.java b/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/ExpressionDataset.java index ccb212160..b446ebe19 100644 --- a/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/ExpressionDataset.java +++ b/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/ExpressionDataset.java @@ -14,6 +14,7 @@ import java.awt.geom.*; import java.lang.Math; import javax.imageio.*; +import org.apache.commons.lang3.StringUtils; /** * @@ -36,11 +37,11 @@ public ExpressionDataset(String fileName) { if (fileName.endsWith(".binary")) { loadExpressionDataInBinaryFormat(fileName); } else { - loadExpressionData(fileName, "\t"); + loadExpressionData(fileName, '\t'); } } - public ExpressionDataset(String fileName, String delimiter) { + public ExpressionDataset(String fileName, char delimiter) { if (fileName.endsWith(".binary")) { loadExpressionDataInBinaryFormat(fileName); } else { @@ -48,7 +49,7 @@ public ExpressionDataset(String fileName, String delimiter) { } } - public ExpressionDataset(String fileName, String delimiter, HashMap hashProbesToInclude) { + public ExpressionDataset(String fileName, char delimiter, HashMap hashProbesToInclude) { this.hashProbesToInclude = hashProbesToInclude; if (fileName.endsWith(".binary")) { loadExpressionDataInBinaryFormat(fileName); @@ -57,7 +58,7 @@ public ExpressionDataset(String fileName, String delimiter, HashMap hashProbesTo } } - public ExpressionDataset(String fileName, String delimiter, HashMap hashProbesToInclude, HashMap hashSamplesToInclude) { + public ExpressionDataset(String fileName, char delimiter, HashMap hashProbesToInclude, HashMap hashSamplesToInclude) { this.hashProbesToInclude = hashProbesToInclude; this.hashSamplesToInclude = hashSamplesToInclude; if (fileName.endsWith(".binary")) { @@ -312,7 +313,7 @@ public void loadExpressionDataInBinaryFormat(String fileName) { System.out.println("Binary file:\t" + fileName + "\thas been loaded, nrProbes:\t" + nrProbes + "\tnrSamples:\t" + nrSamples); } - public void loadExpressionData(String fileName, String delimiter) { + public void loadExpressionData(String fileName, char delimiter) { this.fileName = fileName; boolean dataIsInTriTyperFormat = false; File file = new File(fileName); @@ -325,7 +326,7 @@ public void loadExpressionData(String fileName, String delimiter) { try { java.io.BufferedReader in = new java.io.BufferedReader(new java.io.FileReader(file)); String str = in.readLine(); - String[] data = str.split(delimiter); + String[] data = StringUtils.splitPreserveAllTokens(str, delimiter); if (data.length>2 && data[1].length() > 0 && data[1].equals("MultipleHits")) { dataIsInTriTyperFormat = true; sampleOffset = 9; @@ -367,7 +368,7 @@ public void loadExpressionData(String fileName, String delimiter) { nrProbes++; //if (nrProbes%1000==0) System.out.println(nrProbes); } else { - data = str.split(delimiter); + data = StringUtils.splitPreserveAllTokens(str, delimiter); if (hashProbesToInclude.containsKey(data[0])) { nrProbes++; //if (nrProbes%1000==0) System.out.println(nrProbes); @@ -386,12 +387,13 @@ public void loadExpressionData(String fileName, String delimiter) { String str = in.readLine(); nrProbes = 0; while ((str = in.readLine()) != null) { - String[] data = str.split(delimiter); + String[] data = StringUtils.splitPreserveAllTokens(str, delimiter); if (hashProbesToInclude==null || hashProbesToInclude.containsKey(data[0])) { probeNames[nrProbes] = new String(data[0].getBytes()); hashProbes.put(probeNames[nrProbes], nrProbes); + double[] row = rawData[nrProbes]; for (int s=0; s ensgAnnotations; + if(ensgAnnotationFile == null) { + ensgAnnotations = Collections.emptyMap(); + } else { + ensgAnnotations = readEnsgAnnotations(ensgAnnotationFile); + } + System.out.println("Interpreting the z-score matrix"); System.out.println("Preparing the data"); for (int nrCovsRemoved = numPrimaryCovsToCorrect; nrCovsRemoved < numPrimaryCovsToCorrect + maxNumRegressedCovariates; nrCovsRemoved++) { - ExpressionDataset dataset = new ExpressionDataset(outputDir + "/InteractionZScoresMatrix-" + nrCovsRemoved + "Covariates.txt"); + + if(new File(inputDir + "/InteractionZScoresMatrix-" + nrCovsRemoved + "Covariates.txt.binary.dat").exists()){ + System.out.println(""); + System.out.println("USING EXISTING BINARY FILE!!!!"); + System.out.println(""); + continue; + } + + ExpressionDataset dataset = new ExpressionDataset(inputDir + "/InteractionZScoresMatrix-" + nrCovsRemoved + "Covariates.txt"); dataset.save(dataset.fileName + ".binary"); } System.out.println("Comparing chi2sums"); + + double[] previousChi2 = null; + String[][] output = null; + boolean firstDataset = true; + String[] header = null; + double topCovChi2; + String topCov = "Technical"; + for (int nrCovsRemoved = numPrimaryCovsToCorrect; nrCovsRemoved < numPrimaryCovsToCorrect + maxNumRegressedCovariates; nrCovsRemoved++) { - ExpressionDataset dataset = new ExpressionDataset(outputDir + "/InteractionZScoresMatrix-" + nrCovsRemoved + "Covariates.txt.binary"); - ExpressionDataset dataset2 = new ExpressionDataset(outputDir + "/InteractionZScoresMatrix-" + (nrCovsRemoved + 1) + "Covariates.txt.binary"); - + ExpressionDataset dataset = new ExpressionDataset(inputDir + "/InteractionZScoresMatrix-" + nrCovsRemoved + "Covariates.txt.binary"); + + if(firstDataset){ + previousChi2 = new double[dataset.nrProbes]; + output = new String[dataset.nrProbes][(maxNumRegressedCovariates * 2) + 5]; + for (int covariate = 0; covariate < dataset.nrProbes; covariate++) { + output[covariate][0] = dataset.probeNames[covariate]; + GeneAnnotation geneAnnotation = ensgAnnotations.get(dataset.probeNames[covariate]); + output[covariate][1] = geneAnnotation.getHuho(); + output[covariate][2] = geneAnnotation.getChr(); + output[covariate][3] = String.valueOf(geneAnnotation.getStart()); + output[covariate][4] = String.valueOf(geneAnnotation.getEnd()); + } + header = new String[(maxNumRegressedCovariates * 2) + 1]; + header[0] = "Covariate gene"; + header[1] = "Gene symbol"; + header[2] = "Chr"; + header[3] = "Start"; + header[4] = "End"; + } + + int outputColOffset = 5 + (nrCovsRemoved - numPrimaryCovsToCorrect) * 2; + + header[outputColOffset] = "Chi2sum"; + header[1 + outputColOffset] = topCov + "_removed"; + + topCovChi2 = 0; + for (int covariate = 0; covariate < dataset.nrProbes; covariate++) { - double chi2Sum1 = 0, chi2Sum2 = 0; + double chi2Sum = 0; + double[] covariateData = dataset.rawData[covariate]; for (int gene = 0; gene < dataset.nrSamples; gene++) { - double z_before = dataset.rawData[covariate][gene]; - chi2Sum1 += z_before * z_before; - double z_after = dataset2.rawData[covariate][gene]; - chi2Sum2 += z_after * z_after; - + chi2Sum += covariateData[gene] * covariateData[gene]; + } + + if(chi2Sum > topCovChi2){ + topCovChi2 = chi2Sum; + topCov = dataset.probeNames[covariate]; } - System.out.println(nrCovsRemoved + "\t" + dataset.probeNames[covariate] + "\t" + chi2Sum1 + "\t" + chi2Sum2 + "\t" + (chi2Sum1 - chi2Sum2)); + + output[covariate][outputColOffset] = String.valueOf(chi2Sum); + output[covariate][1 + outputColOffset] = firstDataset ? "0" : String.valueOf(previousChi2[covariate] - chi2Sum); + previousChi2[covariate] = chi2Sum; + + + //System.out.println(nrCovsRemoved + "\t" + dataset.probeNames[covariate] + "\t" + chi2Sum1 + "\t" + chi2Sum2 + "\t" + (chi2Sum1 - chi2Sum2)); } + + firstDataset = false; + + } + + CSVWriter writer = new CSVWriter(new FileWriter(outputDir + "/chi2diff.txt"), '\t', CSVWriter.NO_QUOTE_CHARACTER); + writer.writeNext(header); + for(String[] row : output){ + writer.writeNext(row); } + writer.close(); + } public void preprocessData() { @@ -246,11 +314,11 @@ public void preprocessData() { System.out.println("EXCLUDED LINES: " + countExcludedLines); - ExpressionDataset datasetGenotypes = new ExpressionDataset(inputDir + "/bigTableLude.txt", "\t", hashEQTLs, hashGenotypes); + ExpressionDataset datasetGenotypes = new ExpressionDataset(inputDir + "/bigTableLude.txt", '\t', hashEQTLs, hashGenotypes); datasetGenotypes.probeNames = snps.toArray(new String[snps.size()]); datasetGenotypes.recalculateHashMaps(); - ExpressionDataset datasetExpression = new ExpressionDataset(inputDir + "/bigTableLude.txt", "\t", hashEQTLs, hashExpression); + ExpressionDataset datasetExpression = new ExpressionDataset(inputDir + "/bigTableLude.txt", '\t', hashEQTLs, hashExpression); datasetGenotypes.save(datasetGenotypes.fileName + ".Genotypes.binary"); datasetExpression.save(datasetGenotypes.fileName + ".Expression.binary"); @@ -296,9 +364,10 @@ public final String performInteractionAnalysis(String[] covsToCorrect, String[] covariatesToLoad = null; } - ExpressionDataset datasetGenotypes = new ExpressionDataset(inputDir + "/bigTableLude.txt.Genotypes.binary", "\t", null, hashSamples); - ExpressionDataset datasetExpression = new ExpressionDataset(inputDir + "/bigTableLude.txt.Expression.binary", "\t", null, hashSamples); - ExpressionDataset datasetCovariates = new ExpressionDataset(inputDir + "/covariateTableLude.txt.Covariates.binary", "\t", covariatesToLoad, hashSamples); + ExpressionDataset datasetGenotypes = new ExpressionDataset(inputDir + "/bigTableLude.txt.Genotypes.binary", '\t', null, hashSamples); + ExpressionDataset datasetExpression = new ExpressionDataset(inputDir + "/bigTableLude.txt.Expression.binary", '\t', null, hashSamples); + ExpressionDataset datasetCovariates = new ExpressionDataset(inputDir + "/covariateTableLude.txt.Covariates.binary", '\t', covariatesToLoad, hashSamples); + org.apache.commons.math3.stat.regression.OLSMultipleLinearRegression regression = new org.apache.commons.math3.stat.regression.OLSMultipleLinearRegression(); int nrSamples = datasetGenotypes.nrSamples; @@ -308,7 +377,7 @@ public final String performInteractionAnalysis(String[] covsToCorrect, String[] correctDosageDirectionForQtl(snpsToSwapFile, datasetGenotypes, datasetExpression); - ExpressionDataset datasetCovariatesPCAForceNormal = new ExpressionDataset(inputDir + "/covariateTableLude.txt.Covariates.binary", "\t", covariatesToLoad, hashSamples); + ExpressionDataset datasetCovariatesPCAForceNormal = new ExpressionDataset(inputDir + "/covariateTableLude.txt.Covariates.binary", '\t', covariatesToLoad, hashSamples); correctCovariateDataPCA(covsToCorrect2,covsToCorrect,datasetGenotypes,datasetCovariatesPCAForceNormal); @@ -813,14 +882,14 @@ private HashMap excludeOutliers(HashMap hashSamples) { HashMap hashCovariates = new HashMap(); hashCovariates.put("MEDIAN_5PRIME_BIAS", null); hashCovariates.put("MEDIAN_3PRIME_BIAS", null); - ExpressionDataset datasetCovariates = new ExpressionDataset(inputDir + "/covariateTableLude.txt.Covariates.binary", "\t", hashCovariates, null); + ExpressionDataset datasetCovariates = new ExpressionDataset(inputDir + "/covariateTableLude.txt.Covariates.binary", '\t', hashCovariates, null); hashSamples = new HashMap(); for (int s = 0; s < datasetCovariates.nrSamples; s++) { if (datasetCovariates.rawData[0][s] != 0) { hashSamples.put(datasetCovariates.sampleNames[s], null); } } - datasetCovariates = new ExpressionDataset(inputDir + "/covariateTableLude.txt.Covariates.binary", "\t", hashCovariates, hashSamples); + datasetCovariates = new ExpressionDataset(inputDir + "/covariateTableLude.txt.Covariates.binary", '\t', hashCovariates, hashSamples); HashMap hashSamplesToExclude = new HashMap(); if (1 == 1) { int index = ((Integer) datasetCovariates.hashProbes.get("MEDIAN_5PRIME_BIAS")).intValue(); @@ -1130,7 +1199,7 @@ private void saveCorrectedCovariates(ExpressionDataset datasetCovariates) { hashProbesToFilter.put(datasetCovariates.probeNames[p], null); } } - ExpressionDataset datasetCovariatesCorrected = new ExpressionDataset(inputDir + "/CovariatesCorrected.txt", "\t", hashProbesToFilter, null); + ExpressionDataset datasetCovariatesCorrected = new ExpressionDataset(inputDir + "/CovariatesCorrected.txt", '\t', hashProbesToFilter, null); datasetCovariatesCorrected.transposeDataset(); datasetCovariatesCorrected.save(inputDir + "/CovariatesCorrected.txt"); System.exit(0); @@ -1258,4 +1327,15 @@ private ExpressionDataset permuteGenotypeData(ExpressionDataset datasetGenotypes } return datasetGenotypes2; } + + private HashMap readEnsgAnnotations(File ensgAnnotationFile) throws FileNotFoundException, IOException { + final HashMap ensgAnnotations = new HashMap(); + CSVReader refReader = new CSVReader(new FileReader(ensgAnnotationFile), '\t', '\0', '\0'); + refReader.readNext(); + String[] nextLine; + while ((nextLine = refReader.readNext()) != null) { + ensgAnnotations.put(nextLine[0], new GeneAnnotation(nextLine[0], nextLine[1], nextLine[2], Integer.valueOf(nextLine[3]), Integer.valueOf(nextLine[4]))); + } + return ensgAnnotations; + } } \ No newline at end of file From fa6c7e12145adb342a70f3c599add7b59ef9048b Mon Sep 17 00:00:00 2001 From: Dasha Zhernakova Date: Tue, 21 Jul 2015 13:23:49 +0300 Subject: [PATCH 121/143] added an option for the number of threads --- .../EQTLInteractionAnalyser.java | 15 +++++++++++++-- .../TestEQTLDatasetForInteractions.java | 10 +++++----- 2 files changed, 18 insertions(+), 7 deletions(-) diff --git a/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/EQTLInteractionAnalyser.java b/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/EQTLInteractionAnalyser.java index 1cf74d774..7a530959e 100644 --- a/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/EQTLInteractionAnalyser.java +++ b/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/EQTLInteractionAnalyser.java @@ -124,7 +124,12 @@ public class EQTLInteractionAnalyser { OptionBuilder.withDescription("Gene annotation file"); OptionBuilder.withLongOpt("geneAnnotation"); OPTIONS.addOption(OptionBuilder.create("ga")); - + + OptionBuilder.withArgName("int"); + OptionBuilder.hasArg(); + OptionBuilder.withDescription("Number of threads"); + OptionBuilder.withLongOpt("threads"); + OPTIONS.addOption(OptionBuilder.create("nt")); } public static void main(String[] args) throws IOException, Exception { @@ -135,6 +140,7 @@ public static void main(String[] args) throws IOException, Exception { String inputDir, outputDir, eqtlFile = null, annotationFile = null; final File snpsToSwapFile; int maxNumCovariatesToRegress = 20; + int numThreads; final boolean interpret, chi2sumDiff, permute, preproces; final int startRoundCompareChi2; @@ -216,6 +222,11 @@ else if (commandLine.hasOption("c")){ } else { ensgAnnotationFile = null; } + if (commandLine.hasOption("nt")) { + numThreads = Integer.parseInt(commandLine.getOptionValue("nt")); + } else { + numThreads = Runtime.getRuntime().availableProcessors(); + } } catch (ParseException ex) { System.err.println("Invalid command line arguments: "); @@ -238,7 +249,7 @@ else if (chi2sumDiff){ interactor.findChi2SumDifferences(maxNumCovariatesToRegress, startRoundCompareChi2, ensgAnnotationFile); } else { - new TestEQTLDatasetForInteractions(inputDir, outputDir, eqtlFile, maxNumCovariatesToRegress, annotationFile, covariates, covariates2, snpsToSwapFile, permute, covariatesToTest, samplesToInculudeFile); + new TestEQTLDatasetForInteractions(inputDir, outputDir, eqtlFile, maxNumCovariatesToRegress, annotationFile, covariates, covariates2, snpsToSwapFile, permute, covariatesToTest, samplesToInculudeFile, numThreads); } } diff --git a/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/TestEQTLDatasetForInteractions.java b/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/TestEQTLDatasetForInteractions.java index 38c23ad68..38445d584 100644 --- a/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/TestEQTLDatasetForInteractions.java +++ b/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/TestEQTLDatasetForInteractions.java @@ -62,7 +62,7 @@ public TestEQTLDatasetForInteractions(String inputDir, String outputDir) throws //preprocessData(); } - public TestEQTLDatasetForInteractions(String inputDir, String outputDir, String eQTLfileName, int maxNumTopCovs, String annotationFile, String[] covariatesToCorrect, String[] covariatesToCorrect2, File snpsToSwapFile, boolean permute, String[] covariatesToTest, File samplesToInculudeFile) throws IOException, Exception { + public TestEQTLDatasetForInteractions(String inputDir, String outputDir, String eQTLfileName, int maxNumTopCovs, String annotationFile, String[] covariatesToCorrect, String[] covariatesToCorrect2, File snpsToSwapFile, boolean permute, String[] covariatesToTest, File samplesToInculudeFile, int numThreads) throws IOException, Exception { System.out.println("Input dir: " + inputDir); System.out.println("Output dir: " + outputDir); @@ -106,7 +106,7 @@ public TestEQTLDatasetForInteractions(String inputDir, String outputDir, String String[] covsToCorrect = primaryCovsToCorrect; int cnt = 0; while (cnt < maxNumTopCovs) { - String topCov = performInteractionAnalysis(covsToCorrect, covariatesToCorrect2, eqtlGenes, outputTopCovs, snpsToSwapFile, permute, qtlProbeSnpMultiMap, covariatesToTest, samplesToInculudeFile); + String topCov = performInteractionAnalysis(covsToCorrect, covariatesToCorrect2, eqtlGenes, outputTopCovs, snpsToSwapFile, permute, qtlProbeSnpMultiMap, covariatesToTest, samplesToInculudeFile, numThreads); String[] covsToCorrectNew = new String[covsToCorrect.length + 1]; for (int c = 0; c < covsToCorrect.length; c++) { covsToCorrectNew[c] = covsToCorrect[c]; @@ -328,7 +328,7 @@ public void preprocessData() { } - public final String performInteractionAnalysis(String[] covsToCorrect, String[] covsToCorrect2, HashMap hashEQTLs, TextFile outputTopCovs, File snpsToSwapFile, boolean permute, HashMultimap qtlProbeSnpMultiMap, String[] covariatesToTest, File samplesToInculudeFile) throws IOException, Exception { + public final String performInteractionAnalysis(String[] covsToCorrect, String[] covsToCorrect2, HashMap hashEQTLs, TextFile outputTopCovs, File snpsToSwapFile, boolean permute, HashMultimap qtlProbeSnpMultiMap, String[] covariatesToTest, File samplesToInculudeFile, int numThreads) throws IOException, Exception { HashMap hashSamples; if (samplesToInculudeFile != null) { @@ -378,7 +378,7 @@ public final String performInteractionAnalysis(String[] covsToCorrect, String[] ExpressionDataset datasetCovariatesPCAForceNormal = new ExpressionDataset(inputDir + "/covariateTableLude.txt.Covariates.binary", '\t', covariatesToLoad, hashSamples); - correctCovariateDataPCA(covsToCorrect2,covsToCorrect,datasetGenotypes,datasetCovariatesPCAForceNormal); + correctCovariateDataPCA(covsToCorrect2, covsToCorrect, datasetGenotypes, datasetCovariatesPCAForceNormal); if (1 == 1) { @@ -430,7 +430,7 @@ public final String performInteractionAnalysis(String[] covsToCorrect, String[] SkippedInteractionWriter skippedWriter = new SkippedInteractionWriter(new File(outputDir + "/skippedInteractionsRound_" + covsToCorrect.length + ".txt")); - java.util.concurrent.ExecutorService threadPool = Executors.newFixedThreadPool(Runtime.getRuntime().availableProcessors()); + java.util.concurrent.ExecutorService threadPool = Executors.newFixedThreadPool(numThreads); CompletionService pool = new ExecutorCompletionService(threadPool); int nrTasks = 0; for (int cov = 0; cov < datasetCovariates.nrProbes; cov++) { From 52f3cad3d783d145bd558df44bbe5086fb517d44 Mon Sep 17 00:00:00 2001 From: Patrick Deelen Date: Wed, 22 Jul 2015 10:48:45 +0200 Subject: [PATCH 122/143] Interactions --- .../TestEQTLDatasetForInteractions.java | 114 +++++++++++------- 1 file changed, 72 insertions(+), 42 deletions(-) diff --git a/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/TestEQTLDatasetForInteractions.java b/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/TestEQTLDatasetForInteractions.java index 38c23ad68..990bee081 100644 --- a/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/TestEQTLDatasetForInteractions.java +++ b/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/TestEQTLDatasetForInteractions.java @@ -42,6 +42,7 @@ import umcg.genetica.io.text.TextFile; import umcg.genetica.io.trityper.EQTL; import umcg.genetica.io.trityper.QTLTextFile; +import umcg.genetica.math.matrix2.DoubleMatrixDataset; /** * @@ -177,99 +178,128 @@ public void interpretInteractionZScoreMatrix(int maxNumRegressedCovariates) { } public void findChi2SumDifferences(int maxNumRegressedCovariates, int numPrimaryCovsToCorrect, File ensgAnnotationFile) throws IOException { - + Map ensgAnnotations; - if(ensgAnnotationFile == null) { - ensgAnnotations = Collections.emptyMap(); + if (ensgAnnotationFile == null) { + ensgAnnotations = Collections.emptyMap(); } else { - ensgAnnotations = readEnsgAnnotations(ensgAnnotationFile); + ensgAnnotations = readEnsgAnnotations(ensgAnnotationFile); } - + + double[][] topCovZscores = null; + String[] topCovs = new String[maxNumRegressedCovariates]; + String[] genes = null; + System.out.println("Interpreting the z-score matrix"); System.out.println("Preparing the data"); for (int nrCovsRemoved = numPrimaryCovsToCorrect; nrCovsRemoved < numPrimaryCovsToCorrect + maxNumRegressedCovariates; nrCovsRemoved++) { - - if(new File(inputDir + "/InteractionZScoresMatrix-" + nrCovsRemoved + "Covariates.txt.binary.dat").exists()){ + + if (new File(inputDir + "/InteractionZScoresMatrix-" + nrCovsRemoved + "Covariates.txt.binary.dat").exists()) { System.out.println(""); System.out.println("USING EXISTING BINARY FILE!!!!"); System.out.println(""); continue; } - + ExpressionDataset dataset = new ExpressionDataset(inputDir + "/InteractionZScoresMatrix-" + nrCovsRemoved + "Covariates.txt"); dataset.save(dataset.fileName + ".binary"); } System.out.println("Comparing chi2sums"); - + double[] previousChi2 = null; String[][] output = null; boolean firstDataset = true; String[] header = null; double topCovChi2; String topCov = "Technical"; - + int topCovI = -1; + for (int nrCovsRemoved = numPrimaryCovsToCorrect; nrCovsRemoved < numPrimaryCovsToCorrect + maxNumRegressedCovariates; nrCovsRemoved++) { ExpressionDataset dataset = new ExpressionDataset(inputDir + "/InteractionZScoresMatrix-" + nrCovsRemoved + "Covariates.txt.binary"); - - if(firstDataset){ + + if (firstDataset) { previousChi2 = new double[dataset.nrProbes]; output = new String[dataset.nrProbes][(maxNumRegressedCovariates * 2) + 5]; for (int covariate = 0; covariate < dataset.nrProbes; covariate++) { output[covariate][0] = dataset.probeNames[covariate]; GeneAnnotation geneAnnotation = ensgAnnotations.get(dataset.probeNames[covariate]); - output[covariate][1] = geneAnnotation.getHuho(); - output[covariate][2] = geneAnnotation.getChr(); - output[covariate][3] = String.valueOf(geneAnnotation.getStart()); - output[covariate][4] = String.valueOf(geneAnnotation.getEnd()); + if (geneAnnotation == null) { + output[covariate][1] = ""; + output[covariate][2] = ""; + output[covariate][3] = ""; + output[covariate][4] = ""; + } else { + output[covariate][1] = geneAnnotation.getHuho(); + output[covariate][2] = geneAnnotation.getChr(); + output[covariate][3] = String.valueOf(geneAnnotation.getStart()); + output[covariate][4] = String.valueOf(geneAnnotation.getEnd()); + } + } - header = new String[(maxNumRegressedCovariates * 2) + 1]; + header = new String[(maxNumRegressedCovariates * 2) + 5]; header[0] = "Covariate gene"; header[1] = "Gene symbol"; header[2] = "Chr"; header[3] = "Start"; header[4] = "End"; + + genes = dataset.sampleNames; + topCovZscores = new double[genes.length][maxNumRegressedCovariates]; } - + int outputColOffset = 5 + (nrCovsRemoved - numPrimaryCovsToCorrect) * 2; - - header[outputColOffset] = "Chi2sum"; - header[1 + outputColOffset] = topCov + "_removed"; - + + header[outputColOffset] = topCov + "_removed_chi2sum"; + header[1 + outputColOffset] = "Difference"; + topCovChi2 = 0; - + for (int covariate = 0; covariate < dataset.nrProbes; covariate++) { double chi2Sum = 0; double[] covariateData = dataset.rawData[covariate]; for (int gene = 0; gene < dataset.nrSamples; gene++) { - chi2Sum += covariateData[gene] * covariateData[gene]; + chi2Sum += covariateData[gene] * covariateData[gene]; } - - if(chi2Sum > topCovChi2){ + + if (chi2Sum > topCovChi2) { topCovChi2 = chi2Sum; topCov = dataset.probeNames[covariate]; + topCovI = covariate; } - + output[covariate][outputColOffset] = String.valueOf(chi2Sum); output[covariate][1 + outputColOffset] = firstDataset ? "0" : String.valueOf(previousChi2[covariate] - chi2Sum); previousChi2[covariate] = chi2Sum; - - + //System.out.println(nrCovsRemoved + "\t" + dataset.probeNames[covariate] + "\t" + chi2Sum1 + "\t" + chi2Sum2 + "\t" + (chi2Sum1 - chi2Sum2)); } - + + topCovs[nrCovsRemoved - numPrimaryCovsToCorrect] = topCov; + double[] covariateData = dataset.rawData[topCovI]; + for (int gene = 0; gene < dataset.nrSamples; gene++) { + topCovZscores[gene][nrCovsRemoved - numPrimaryCovsToCorrect] = covariateData[gene]; + } + firstDataset = false; - + } - + CSVWriter writer = new CSVWriter(new FileWriter(outputDir + "/chi2diff.txt"), '\t', CSVWriter.NO_QUOTE_CHARACTER); writer.writeNext(header); - for(String[] row : output){ + for (String[] row : output) { writer.writeNext(row); } writer.close(); - + + ExpressionDataset topCovZDataset = new ExpressionDataset(genes.length, topCovs.length); + topCovZDataset.rawData = topCovZscores; + topCovZDataset.probeNames = genes; + topCovZDataset.sampleNames = topCovs; + topCovZDataset.save(outputDir + "/topCovZ.txt"); + + } public void preprocessData() { @@ -340,7 +370,7 @@ public final String performInteractionAnalysis(String[] covsToCorrect, String[] hashSamples.put(line + "_exp", null); hashSamples.put(line + "_dosage", null); } - } else{ + } else { hashSamples = null; } @@ -367,7 +397,7 @@ public final String performInteractionAnalysis(String[] covsToCorrect, String[] ExpressionDataset datasetGenotypes = new ExpressionDataset(inputDir + "/bigTableLude.txt.Genotypes.binary", '\t', null, hashSamples); ExpressionDataset datasetExpression = new ExpressionDataset(inputDir + "/bigTableLude.txt.Expression.binary", '\t', null, hashSamples); ExpressionDataset datasetCovariates = new ExpressionDataset(inputDir + "/covariateTableLude.txt.Covariates.binary", '\t', covariatesToLoad, hashSamples); - + org.apache.commons.math3.stat.regression.OLSMultipleLinearRegression regression = new org.apache.commons.math3.stat.regression.OLSMultipleLinearRegression(); int nrSamples = datasetGenotypes.nrSamples; @@ -378,7 +408,7 @@ public final String performInteractionAnalysis(String[] covsToCorrect, String[] ExpressionDataset datasetCovariatesPCAForceNormal = new ExpressionDataset(inputDir + "/covariateTableLude.txt.Covariates.binary", '\t', covariatesToLoad, hashSamples); - correctCovariateDataPCA(covsToCorrect2,covsToCorrect,datasetGenotypes,datasetCovariatesPCAForceNormal); + correctCovariateDataPCA(covsToCorrect2, covsToCorrect, datasetGenotypes, datasetCovariatesPCAForceNormal); if (1 == 1) { @@ -420,12 +450,12 @@ public final String performInteractionAnalysis(String[] covsToCorrect, String[] ExpressionDataset datasetZScores = new ExpressionDataset(datasetCovariates.nrProbes, datasetExpression.nrProbes); datasetZScores.probeNames = datasetCovariates.probeNames; - + datasetZScores.sampleNames = new String[datasetGenotypes.probeNames.length]; - for(int i = 0 ; i < datasetGenotypes.probeNames.length ; ++i){ + for (int i = 0; i < datasetGenotypes.probeNames.length; ++i) { datasetZScores.sampleNames[i] = datasetGenotypes.probeNames[i] + datasetExpression.probeNames[i].substring(datasetExpression.probeNames[i].lastIndexOf('_')); - } - + } + datasetZScores.recalculateHashMaps(); SkippedInteractionWriter skippedWriter = new SkippedInteractionWriter(new File(outputDir + "/skippedInteractionsRound_" + covsToCorrect.length + ".txt")); @@ -1297,7 +1327,7 @@ private void forceNormalExpressionData(ExpressionDataset datasetExpression) thro System.out.println("Expression data now force normal"); } - private ExpressionDataset permuteGenotypeData(ExpressionDataset datasetGenotypes){ + private ExpressionDataset permuteGenotypeData(ExpressionDataset datasetGenotypes) { System.out.println("WARNING: PERMUTING GENOTYPE DATA!!!!"); String[] cohorts = {"LLDeep", "LLS", "RS", "CODAM"}; int[] permSampleIDs = new int[datasetGenotypes.nrSamples]; From 266e4e9e74ca4f62664aace75857503c5c59bedc Mon Sep 17 00:00:00 2001 From: Patrick Deelen Date: Wed, 22 Jul 2015 11:53:28 +0200 Subject: [PATCH 123/143] output --- .../eqtlinteractionanalyser/TestEQTLDatasetForInteractions.java | 1 + 1 file changed, 1 insertion(+) diff --git a/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/TestEQTLDatasetForInteractions.java b/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/TestEQTLDatasetForInteractions.java index 990bee081..ddeb3d832 100644 --- a/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/TestEQTLDatasetForInteractions.java +++ b/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/TestEQTLDatasetForInteractions.java @@ -71,6 +71,7 @@ public TestEQTLDatasetForInteractions(String inputDir, String outputDir, String System.out.println("Maximum number of covariates to regress out: " + maxNumTopCovs); System.out.println("Covariates to correct for with interaction: " + Arrays.toString(covariatesToCorrect)); System.out.println("Covariates to correct for without interaction: " + Arrays.toString(covariatesToCorrect2)); + System.out.println("Samples to include file: " + samplesToInculudeFile.getAbsolutePath()); this.inputDir = inputDir; this.outputDir = outputDir; From c84003bfb387c15a6d4a1b39ebac34687604ec21 Mon Sep 17 00:00:00 2001 From: Patrick Deelen Date: Wed, 22 Jul 2015 11:45:37 +0200 Subject: [PATCH 124/143] printing --- .../eqtlinteractionanalyser/TestEQTLDatasetForInteractions.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/TestEQTLDatasetForInteractions.java b/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/TestEQTLDatasetForInteractions.java index ddeb3d832..73ff1386f 100644 --- a/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/TestEQTLDatasetForInteractions.java +++ b/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/TestEQTLDatasetForInteractions.java @@ -499,7 +499,7 @@ public final String performInteractionAnalysis(String[] covsToCorrect, String[] maxChi2 = chi2Sum; maxChi2Cov = datasetCovariates.probeNames[cov]; } - System.out.println(covsToCorrect.length + "\t" + cov + "\t" + datasetCovariates.probeNames[cov] + "\t" + chi2Sum); + //System.out.println(covsToCorrect.length + "\t" + cov + "\t" + datasetCovariates.probeNames[cov] + "\t" + chi2Sum); if ((task + 1) % 512 == 0) { System.out.println(task + 1 + " tasks processed"); } From 5a73ba51d8c306ce41e3a844356b915bbbe83243 Mon Sep 17 00:00:00 2001 From: Dasha Zhernakova Date: Wed, 22 Jul 2015 16:08:59 +0300 Subject: [PATCH 125/143] fixed permutations --- .../EQTLInteractionAnalyser.java | 43 ++++++-- .../TestEQTLDatasetForInteractions.java | 99 +++++++++---------- 2 files changed, 78 insertions(+), 64 deletions(-) diff --git a/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/EQTLInteractionAnalyser.java b/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/EQTLInteractionAnalyser.java index 7a530959e..7f55d72c6 100644 --- a/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/EQTLInteractionAnalyser.java +++ b/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/EQTLInteractionAnalyser.java @@ -6,14 +6,15 @@ package nl.systemsgenetics.eqtlinteractionanalyser.eqtlinteractionanalyser; -import java.io.File; +import java.io.*; + import org.apache.commons.cli.*; import umcg.genetica.io.text.TextFile; -import java.io.IOException; import java.text.DateFormat; import java.text.SimpleDateFormat; import java.util.Date; +import java.util.HashMap; /** * @@ -94,8 +95,14 @@ public class EQTLInteractionAnalyser { OptionBuilder.withDescription("Covariates to correct for without interaction term before running the interaction analysis"); OptionBuilder.withLongOpt("cov2"); OPTIONS.addOption(OptionBuilder.create("c2")); - - OptionBuilder.withArgName("strings"); + + OptionBuilder.withArgName("strings"); + OptionBuilder.hasArgs(); + OptionBuilder.withDescription("Covariates to correct for without interaction term before running the interaction analysis"); + OptionBuilder.withLongOpt("cohorts"); + OPTIONS.addOption(OptionBuilder.create("ch")); + + OptionBuilder.withArgName("strings"); OptionBuilder.hasArgs(); OptionBuilder.withDescription("Covariates to to test in interaction analysis. Optional, all are tested if not used"); OptionBuilder.withLongOpt("covTest"); @@ -143,11 +150,13 @@ public static void main(String[] args) throws IOException, Exception { int numThreads; final boolean interpret, chi2sumDiff, permute, preproces; final int startRoundCompareChi2; - + + HashMap hashSamples; + final String[] covariates; final String[] covariates2; + final String[] cohorts; final String[] covariatesToTest; - final File samplesToInculudeFile; final File ensgAnnotationFile; try { final CommandLine commandLine = new PosixParser().parse(OPTIONS, args, false); @@ -197,7 +206,13 @@ else if (commandLine.hasOption("c")){ } else { covariates2 = new String[0]; } - + + if (commandLine.hasOption("ch")){ + cohorts = commandLine.getOptionValues("ch"); + } else { + cohorts = null; + } + if (commandLine.hasOption("ct")){ covariatesToTest = commandLine.getOptionValues("ct"); } else { @@ -211,9 +226,17 @@ else if (commandLine.hasOption("c")){ } if (commandLine.hasOption("is")){ - samplesToInculudeFile = new File(commandLine.getOptionValue("is")); + File samplesToIncludeFile = new File(commandLine.getOptionValue("is")); + hashSamples = new HashMap(); + BufferedReader reader = new BufferedReader(new InputStreamReader(new FileInputStream(samplesToIncludeFile), "UTF-8")); + String line; + while ((line = reader.readLine()) != null) { + hashSamples.put(line, null); + hashSamples.put(line + "_exp", null); + hashSamples.put(line + "_dosage", null); + } } else { - samplesToInculudeFile = null; + hashSamples = null; } @@ -249,7 +272,7 @@ else if (chi2sumDiff){ interactor.findChi2SumDifferences(maxNumCovariatesToRegress, startRoundCompareChi2, ensgAnnotationFile); } else { - new TestEQTLDatasetForInteractions(inputDir, outputDir, eqtlFile, maxNumCovariatesToRegress, annotationFile, covariates, covariates2, snpsToSwapFile, permute, covariatesToTest, samplesToInculudeFile, numThreads); + new TestEQTLDatasetForInteractions(inputDir, outputDir, eqtlFile, maxNumCovariatesToRegress, annotationFile, covariates, covariates2, snpsToSwapFile, permute, covariatesToTest, hashSamples, numThreads, cohorts); } } diff --git a/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/TestEQTLDatasetForInteractions.java b/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/TestEQTLDatasetForInteractions.java index 38445d584..b6eb489e6 100644 --- a/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/TestEQTLDatasetForInteractions.java +++ b/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/TestEQTLDatasetForInteractions.java @@ -53,6 +53,7 @@ public class TestEQTLDatasetForInteractions { String outputDir = null; HashMap> geneDistanceMap = null; String[] primaryCovsToCorrect; + ExpressionDataset datasetGenotypes; public TestEQTLDatasetForInteractions(String inputDir, String outputDir) throws IOException { @@ -62,7 +63,7 @@ public TestEQTLDatasetForInteractions(String inputDir, String outputDir) throws //preprocessData(); } - public TestEQTLDatasetForInteractions(String inputDir, String outputDir, String eQTLfileName, int maxNumTopCovs, String annotationFile, String[] covariatesToCorrect, String[] covariatesToCorrect2, File snpsToSwapFile, boolean permute, String[] covariatesToTest, File samplesToInculudeFile, int numThreads) throws IOException, Exception { + public TestEQTLDatasetForInteractions(String inputDir, String outputDir, String eQTLfileName, int maxNumTopCovs, String annotationFile, String[] covariatesToCorrect, String[] covariatesToCorrect2, File snpsToSwapFile, boolean permute, String[] covariatesToTest, HashMap hashSamples, int numThreads, String[] cohorts) throws IOException, Exception { System.out.println("Input dir: " + inputDir); System.out.println("Output dir: " + outputDir); @@ -78,6 +79,8 @@ public TestEQTLDatasetForInteractions(String inputDir, String outputDir, String Gpio.createDir(outputDir); } + initGenotypes(permute, hashSamples, cohorts); + HashMap eqtlGenes = getEqtls(eQTLfileName); HashMultimap qtlProbeSnpMultiMap = HashMultimap.create(); @@ -106,7 +109,7 @@ public TestEQTLDatasetForInteractions(String inputDir, String outputDir, String String[] covsToCorrect = primaryCovsToCorrect; int cnt = 0; while (cnt < maxNumTopCovs) { - String topCov = performInteractionAnalysis(covsToCorrect, covariatesToCorrect2, eqtlGenes, outputTopCovs, snpsToSwapFile, permute, qtlProbeSnpMultiMap, covariatesToTest, samplesToInculudeFile, numThreads); + String topCov = performInteractionAnalysis(covsToCorrect, covariatesToCorrect2, eqtlGenes, outputTopCovs, snpsToSwapFile, permute, qtlProbeSnpMultiMap, covariatesToTest, hashSamples, numThreads); String[] covsToCorrectNew = new String[covsToCorrect.length + 1]; for (int c = 0; c < covsToCorrect.length; c++) { covsToCorrectNew[c] = covsToCorrect[c]; @@ -118,6 +121,45 @@ public TestEQTLDatasetForInteractions(String inputDir, String outputDir, String outputTopCovs.close(); } + private void initGenotypes(boolean permute, HashMap hashSamples, String[] cohorts){ + datasetGenotypes = new ExpressionDataset(inputDir + "/bigTableLude.txt.Genotypes.binary", '\t', null, hashSamples); + + if (permute){ + System.out.println("WARNING: PERMUTING GENOTYPE DATA!!!!"); + if (cohorts == null) { + cohorts = new String[] {"LLDeep", "LLS", "RS", "CODAM"}; + } + int[] permSampleIDs = new int[datasetGenotypes.nrSamples]; + for (int p = 0; p < cohorts.length; p++) { + Vector vecSamples = new Vector(); + for (int s = 0; s < datasetGenotypes.nrSamples; s++) { + if (datasetGenotypes.sampleNames[s].startsWith(cohorts[p])) { + vecSamples.add(s); + } + } + int nrSamplesThisCohort = vecSamples.size(); + for (int s = 0; s < datasetGenotypes.nrSamples; s++) { + if (datasetGenotypes.sampleNames[s].startsWith(cohorts[p])) { + int randomSample = ((Integer) vecSamples.remove((int) ((double) vecSamples.size() * Math.random()))).intValue(); + permSampleIDs[s] = randomSample; + } + } + } + + ExpressionDataset datasetGenotypes2 = new ExpressionDataset(datasetGenotypes.nrProbes, datasetGenotypes.nrSamples); + datasetGenotypes2.probeNames = datasetGenotypes.probeNames; + datasetGenotypes2.sampleNames = datasetGenotypes.sampleNames; + datasetGenotypes2.recalculateHashMaps(); + for (int p = 0; p < datasetGenotypes2.nrProbes; p++) { + for (int s = 0; s < datasetGenotypes2.nrSamples; s++) { + datasetGenotypes2.rawData[p][s] = datasetGenotypes.rawData[p][permSampleIDs[s]]; + } + } + datasetGenotypes = datasetGenotypes2; + } + + } + /** * Extracts eQTL gene names * @@ -328,21 +370,7 @@ public void preprocessData() { } - public final String performInteractionAnalysis(String[] covsToCorrect, String[] covsToCorrect2, HashMap hashEQTLs, TextFile outputTopCovs, File snpsToSwapFile, boolean permute, HashMultimap qtlProbeSnpMultiMap, String[] covariatesToTest, File samplesToInculudeFile, int numThreads) throws IOException, Exception { - - HashMap hashSamples; - if (samplesToInculudeFile != null) { - hashSamples = new HashMap(); - BufferedReader reader = new BufferedReader(new InputStreamReader(new FileInputStream(samplesToInculudeFile), "UTF-8")); - String line; - while ((line = reader.readLine()) != null) { - hashSamples.put(line, null); - hashSamples.put(line + "_exp", null); - hashSamples.put(line + "_dosage", null); - } - } else{ - hashSamples = null; - } + public final String performInteractionAnalysis(String[] covsToCorrect, String[] covsToCorrect2, HashMap hashEQTLs, TextFile outputTopCovs, File snpsToSwapFile, boolean permute, HashMultimap qtlProbeSnpMultiMap, String[] covariatesToTest, HashMap hashSamples, int numThreads) throws IOException, Exception { //hashSamples = excludeOutliers(hashSamples); @@ -364,7 +392,6 @@ public final String performInteractionAnalysis(String[] covsToCorrect, String[] covariatesToLoad = null; } - ExpressionDataset datasetGenotypes = new ExpressionDataset(inputDir + "/bigTableLude.txt.Genotypes.binary", '\t', null, hashSamples); ExpressionDataset datasetExpression = new ExpressionDataset(inputDir + "/bigTableLude.txt.Expression.binary", '\t', null, hashSamples); ExpressionDataset datasetCovariates = new ExpressionDataset(inputDir + "/covariateTableLude.txt.Covariates.binary", '\t', covariatesToLoad, hashSamples); @@ -376,7 +403,6 @@ public final String performInteractionAnalysis(String[] covsToCorrect, String[] correctDosageDirectionForQtl(snpsToSwapFile, datasetGenotypes, datasetExpression); - ExpressionDataset datasetCovariatesPCAForceNormal = new ExpressionDataset(inputDir + "/covariateTableLude.txt.Covariates.binary", '\t', covariatesToLoad, hashSamples); correctCovariateDataPCA(covsToCorrect2, covsToCorrect, datasetGenotypes, datasetCovariatesPCAForceNormal); @@ -409,10 +435,6 @@ public final String performInteractionAnalysis(String[] covsToCorrect, String[] forceNormalExpressionData(datasetExpression); - if (permute) { - datasetGenotypes = permuteGenotypeData(datasetGenotypes); - } - if (1 == 1) { @@ -1297,37 +1319,6 @@ private void forceNormalExpressionData(ExpressionDataset datasetExpression) thro System.out.println("Expression data now force normal"); } - private ExpressionDataset permuteGenotypeData(ExpressionDataset datasetGenotypes){ - System.out.println("WARNING: PERMUTING GENOTYPE DATA!!!!"); - String[] cohorts = {"LLDeep", "LLS", "RS", "CODAM"}; - int[] permSampleIDs = new int[datasetGenotypes.nrSamples]; - for (int p = 0; p < cohorts.length; p++) { - Vector vecSamples = new Vector(); - for (int s = 0; s < datasetGenotypes.nrSamples; s++) { - if (datasetGenotypes.sampleNames[s].startsWith(cohorts[p])) { - vecSamples.add(s); - } - } - int nrSamplesThisCohort = vecSamples.size(); - for (int s = 0; s < datasetGenotypes.nrSamples; s++) { - if (datasetGenotypes.sampleNames[s].startsWith(cohorts[p])) { - int randomSample = ((Integer) vecSamples.remove((int) ((double) vecSamples.size() * Math.random()))).intValue(); - permSampleIDs[s] = randomSample; - } - } - } - ExpressionDataset datasetGenotypes2 = new ExpressionDataset(datasetGenotypes.nrProbes, datasetGenotypes.nrSamples); - datasetGenotypes2.probeNames = datasetGenotypes.probeNames; - datasetGenotypes2.sampleNames = datasetGenotypes.sampleNames; - datasetGenotypes2.recalculateHashMaps(); - for (int p = 0; p < datasetGenotypes2.nrProbes; p++) { - for (int s = 0; s < datasetGenotypes2.nrSamples; s++) { - datasetGenotypes2.rawData[p][s] = datasetGenotypes.rawData[p][permSampleIDs[s]]; - } - } - return datasetGenotypes2; - } - private HashMap readEnsgAnnotations(File ensgAnnotationFile) throws FileNotFoundException, IOException { final HashMap ensgAnnotations = new HashMap(); CSVReader refReader = new CSVReader(new FileReader(ensgAnnotationFile), '\t', '\0', '\0'); From 99db6870e8d3b8dbb8f608fcdc4d377544185218 Mon Sep 17 00:00:00 2001 From: Patrick Deelen Date: Wed, 22 Jul 2015 21:20:03 +0200 Subject: [PATCH 126/143] Possible jenkins fix --- genetica-libraries/pom.xml | 226 +++++++++--------- .../umcg/genetica/graphics/ForestPlot.java | 16 +- .../java/umcg/genetica/graphics/Heatmap.java | 32 +-- .../umcg/genetica/graphics/ScatterPlot.java | 16 +- .../umcg/genetica/graphics/ViolinBoxPlot.java | 16 +- 5 files changed, 153 insertions(+), 153 deletions(-) diff --git a/genetica-libraries/pom.xml b/genetica-libraries/pom.xml index e2ae6e6c0..0525d9a67 100644 --- a/genetica-libraries/pom.xml +++ b/genetica-libraries/pom.xml @@ -1,121 +1,121 @@ - - nl.systemsgenetics - systemsgenetics - 1.0.2-SNAPSHOT - - genetica-libraries - 1.0.7-SNAPSHOT - jar - 4.0.0 - - - - org.apache.maven.plugins - maven-compiler-plugin - 2.3.2 - - 1.7 - 1.7 - - - - - - - commons-primitives - commons-primitives - 1.0 - - - nl.systemsgenetics - Genotype-IO - 1.0.1 - - - commons-collections - commons-collections - 3.2.1 - + xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> + + nl.systemsgenetics + systemsgenetics + 1.0.2-SNAPSHOT + + genetica-libraries + 1.0.7-SNAPSHOT + jar + 4.0.0 + + + + org.apache.maven.plugins + maven-compiler-plugin + 2.3.2 + + 1.7 + 1.7 + + + + + + + commons-primitives + commons-primitives + 1.0 + + + nl.systemsgenetics + Genotype-IO + 1.0.1 + + + commons-collections + commons-collections + 3.2.1 + log4j log4j 1.2.17 - - commons-configuration - commons-configuration - 1.6 - - - commons-lang - commons-lang - 2.5 - - - commons-logging - commons-logging - 1.1.1 - - - org.apache.commons - commons-math3 - 3.2 - - - net.sourceforge.parallelcolt - parallelcolt - 0.10.0 - - - net.sf.jsci - jsci - 1.2 - - - com.google.code.gson - gson - 2.1 - - - com.lowagie - itext - 4.2.1 - - - gov.nist.math - jama - 1.0.2 - - - ca.umontreal.iro - ssj - 2.5 - - - colt - colt - - - - - commons-jxpath - commons-jxpath - 1.3 - - - org.testng - testng - 6.5.2 - test - - - net.sf.trove4j - trove4j - 3.0.3 - jar - - + + commons-configuration + commons-configuration + 1.6 + + + commons-lang + commons-lang + 2.5 + + + commons-logging + commons-logging + 1.1.1 + + + org.apache.commons + commons-math3 + 3.2 + + + net.sourceforge.parallelcolt + parallelcolt + 0.10.0 + + + net.sf.jsci + jsci + 1.2 + + + com.google.code.gson + gson + 2.1 + + + com.itextpdf + itextpdf + 5.5.6 + + + gov.nist.math + jama + 1.0.2 + + + ca.umontreal.iro + ssj + 2.5 + + + colt + colt + + + + + commons-jxpath + commons-jxpath + 1.3 + + + org.testng + testng + 6.5.2 + test + + + net.sf.trove4j + trove4j + 3.0.3 + jar + + \ No newline at end of file diff --git a/genetica-libraries/src/main/java/umcg/genetica/graphics/ForestPlot.java b/genetica-libraries/src/main/java/umcg/genetica/graphics/ForestPlot.java index 947a674d1..e5e0784b0 100644 --- a/genetica-libraries/src/main/java/umcg/genetica/graphics/ForestPlot.java +++ b/genetica-libraries/src/main/java/umcg/genetica/graphics/ForestPlot.java @@ -4,7 +4,7 @@ */ package umcg.genetica.graphics; -import com.lowagie.text.DocumentException; +import com.itextpdf.text.DocumentException; import java.awt.BasicStroke; import java.awt.Color; import java.awt.Font; @@ -85,8 +85,8 @@ public void drawMultiForrestPlot(String xAxisName, String[] yAxisNames, Double[] Locale.setDefault(Locale.US); // set up Graphics2D depending on required format using iText in case PDF Graphics2D g2d = null; - com.lowagie.text.Document document = null; - com.lowagie.text.pdf.PdfWriter writer = null; + com.itextpdf.text.Document document = null; + com.itextpdf.text.pdf.PdfWriter writer = null; BufferedImage bi = null; int width = 1; int height = 1; @@ -130,16 +130,16 @@ public void drawMultiForrestPlot(String xAxisName, String[] yAxisNames, Double[] height = (yAxisNames.length * textpadding) + (2 * textpadding) + (fontheight * yAxisNames.length) + (topMargin * 2) + geneNameMargin + fontheight + topMargin; System.out.println(height); // initialize plot - com.lowagie.text.pdf.PdfContentByte cb = null; + com.itextpdf.text.pdf.PdfContentByte cb = null; if (output == ForestPlot.Output.PDF) { - com.lowagie.text.Rectangle rectangle = new com.lowagie.text.Rectangle(width, height); - document = new com.lowagie.text.Document(rectangle); - writer = com.lowagie.text.pdf.PdfWriter.getInstance(document, new java.io.FileOutputStream(filename)); + com.itextpdf.text.Rectangle rectangle = new com.itextpdf.text.Rectangle(width, height); + document = new com.itextpdf.text.Document(rectangle); + writer = com.itextpdf.text.pdf.PdfWriter.getInstance(document, new java.io.FileOutputStream(filename)); document.open(); cb = writer.getDirectContent(); cb.saveState(); - //com.lowagie.text.pdf.DefaultFontMapper fontMap = new com.lowagie.text.pdf.DefaultFontMapper(); + //com.itextpdf.text.pdf.DefaultFontMapper fontMap = new com.itextpdf.text.pdf.DefaultFontMapper(); g2d = cb.createGraphics(width, height); } else { bi = new BufferedImage(width, height, BufferedImage.TYPE_INT_RGB); diff --git a/genetica-libraries/src/main/java/umcg/genetica/graphics/Heatmap.java b/genetica-libraries/src/main/java/umcg/genetica/graphics/Heatmap.java index 9fcf3b8f6..9549ebdb7 100644 --- a/genetica-libraries/src/main/java/umcg/genetica/graphics/Heatmap.java +++ b/genetica-libraries/src/main/java/umcg/genetica/graphics/Heatmap.java @@ -5,8 +5,8 @@ package umcg.genetica.graphics; import JSci.maths.ArrayMath; -import com.lowagie.text.DocumentException; -import com.lowagie.text.Rectangle; +import com.itextpdf.text.DocumentException; +import com.itextpdf.text.Rectangle; import java.awt.Color; import java.awt.Font; import java.awt.FontMetrics; @@ -62,18 +62,18 @@ public static void drawHeatmap(double[][] values, String[] rowHeaders, String[] } // set up Graphics2D depending on required format using iText in case PDF Graphics2D g2d = null; - com.lowagie.text.Document document = null; - com.lowagie.text.pdf.PdfWriter writer = null; - com.lowagie.text.pdf.PdfContentByte cb = null; + com.itextpdf.text.Document document = null; + com.itextpdf.text.pdf.PdfWriter writer = null; + com.itextpdf.text.pdf.PdfContentByte cb = null; BufferedImage bi = null; if (output == Output.PDF) { Rectangle rectangle = new Rectangle(width, height); - document = new com.lowagie.text.Document(rectangle); - writer = com.lowagie.text.pdf.PdfWriter.getInstance(document, new java.io.FileOutputStream(filename)); + document = new com.itextpdf.text.Document(rectangle); + writer = com.itextpdf.text.pdf.PdfWriter.getInstance(document, new java.io.FileOutputStream(filename)); document.open(); cb = writer.getDirectContent(); - //com.lowagie.text.pdf.DefaultFontMapper fontMap = new com.lowagie.text.pdf.DefaultFontMapper(); + //com.itextpdf.text.pdf.DefaultFontMapper fontMap = new com.itextpdf.text.pdf.DefaultFontMapper(); cb.saveState(); g2d = cb.createGraphics(width, height); } else { @@ -203,19 +203,19 @@ public static void drawCorrelationHeatmap(double[][] values, String[] rowHeaders // set up Graphics2D depending on required format using iText in case PDF Graphics2D g2d = null; - com.lowagie.text.Document document = null; - com.lowagie.text.pdf.PdfWriter writer = null; - com.lowagie.text.pdf.PdfContentByte cb = null; + com.itextpdf.text.Document document = null; + com.itextpdf.text.pdf.PdfWriter writer = null; + com.itextpdf.text.pdf.PdfContentByte cb = null; BufferedImage bi = null; if (output == Output.PDF) { - com.lowagie.text.Rectangle rectangle = new com.lowagie.text.Rectangle(width, height); - document = new com.lowagie.text.Document(rectangle); - writer = com.lowagie.text.pdf.PdfWriter.getInstance(document, new java.io.FileOutputStream(filename)); + com.itextpdf.text.Rectangle rectangle = new com.itextpdf.text.Rectangle(width, height); + document = new com.itextpdf.text.Document(rectangle); + writer = com.itextpdf.text.pdf.PdfWriter.getInstance(document, new java.io.FileOutputStream(filename)); document.open(); cb = writer.getDirectContent(); cb.saveState(); - //com.lowagie.text.pdf.DefaultFontMapper fontMap = new com.lowagie.text.pdf.DefaultFontMapper(); + //com.itextpdf.text.pdf.DefaultFontMapper fontMap = new com.itextpdf.text.pdf.DefaultFontMapper(); g2d = cb.createGraphics(width, height); } else { bi = new BufferedImage(width, height, BufferedImage.TYPE_INT_RGB); @@ -358,7 +358,7 @@ private static void normalizeCorrelations(double[][] values) { } private Rectangle getScaleGradient(int width, int height) { - com.lowagie.text.Rectangle r = new com.lowagie.text.Rectangle(width, height); + com.itextpdf.text.Rectangle r = new com.itextpdf.text.Rectangle(width, height); return r; } diff --git a/genetica-libraries/src/main/java/umcg/genetica/graphics/ScatterPlot.java b/genetica-libraries/src/main/java/umcg/genetica/graphics/ScatterPlot.java index 2499bbe31..ed5a87597 100644 --- a/genetica-libraries/src/main/java/umcg/genetica/graphics/ScatterPlot.java +++ b/genetica-libraries/src/main/java/umcg/genetica/graphics/ScatterPlot.java @@ -4,9 +4,9 @@ */ package umcg.genetica.graphics; -import com.lowagie.text.DocumentException; -import com.lowagie.text.Rectangle; -import com.lowagie.text.pdf.PdfContentByte; +import com.itextpdf.text.DocumentException; +import com.itextpdf.text.Rectangle; +import com.itextpdf.text.pdf.PdfContentByte; import java.awt.Color; import java.awt.Font; import java.awt.FontMetrics; @@ -48,8 +48,8 @@ public class ScatterPlot { private int fontheight; private OUTPUTFORMAT format; private String outfilename; - private com.lowagie.text.Document document = null; - private com.lowagie.text.pdf.PdfWriter writer = null; + private com.itextpdf.text.Document document = null; + private com.itextpdf.text.pdf.PdfWriter writer = null; private PdfContentByte cb; private int[] category; private Color[] colors; @@ -141,13 +141,13 @@ private void init() { if (format == OUTPUTFORMAT.PDF) { Rectangle rectangle = new Rectangle(graphWidth, graphHeight); - document = new com.lowagie.text.Document(rectangle); + document = new com.itextpdf.text.Document(rectangle); if (!outfilename.toLowerCase().endsWith(".pdf")) { outfilename += ".pdf"; } try { - writer = com.lowagie.text.pdf.PdfWriter.getInstance(document, new java.io.FileOutputStream(outfilename)); + writer = com.itextpdf.text.pdf.PdfWriter.getInstance(document, new java.io.FileOutputStream(outfilename)); } catch (DocumentException e) { e.printStackTrace(); @@ -158,7 +158,7 @@ private void init() { cb = writer.getDirectContent(); cb.saveState(); -// com.lowagie.text.pdf.DefaultFontMapper fontMap = new com.lowagie.text.pdf.DefaultFontMapper(); +// com.itextpdf.text.pdf.DefaultFontMapper fontMap = new com.itextpdf.text.pdf.DefaultFontMapper(); g2d = cb.createGraphics(graphWidth, graphHeight); } else { bi = new java.awt.image.BufferedImage(graphWidth, graphHeight, java.awt.image.BufferedImage.TYPE_INT_RGB); diff --git a/genetica-libraries/src/main/java/umcg/genetica/graphics/ViolinBoxPlot.java b/genetica-libraries/src/main/java/umcg/genetica/graphics/ViolinBoxPlot.java index a16b32e41..0db332f0e 100644 --- a/genetica-libraries/src/main/java/umcg/genetica/graphics/ViolinBoxPlot.java +++ b/genetica-libraries/src/main/java/umcg/genetica/graphics/ViolinBoxPlot.java @@ -4,7 +4,7 @@ */ package umcg.genetica.graphics; -import com.lowagie.text.DocumentException; +import com.itextpdf.text.DocumentException; import java.awt.BasicStroke; import java.awt.Color; import java.awt.Graphics2D; @@ -54,9 +54,9 @@ public void draw(double[][][] vals, String[] datasetNames, String[][] xLabels, S Locale.setDefault(Locale.US); // set up Graphics2D depending on required format using iText in case PDF Graphics2D g2d = null; - com.lowagie.text.Document document = null; - com.lowagie.text.pdf.PdfWriter writer = null; - com.lowagie.text.pdf.PdfContentByte cb = null; + com.itextpdf.text.Document document = null; + com.itextpdf.text.pdf.PdfWriter writer = null; + com.itextpdf.text.pdf.PdfContentByte cb = null; BufferedImage bi = null; @@ -109,17 +109,17 @@ public void draw(double[][][] vals, String[] datasetNames, String[][] xLabels, S } if (output == Output.PDF) { - com.lowagie.text.Rectangle rectangle = new com.lowagie.text.Rectangle(docWidth, docHeight); - document = new com.lowagie.text.Document(rectangle); + com.itextpdf.text.Rectangle rectangle = new com.itextpdf.text.Rectangle(docWidth, docHeight); + document = new com.itextpdf.text.Document(rectangle); try { - writer = com.lowagie.text.pdf.PdfWriter.getInstance(document, new java.io.FileOutputStream(outputFileName)); + writer = com.itextpdf.text.pdf.PdfWriter.getInstance(document, new java.io.FileOutputStream(outputFileName)); } catch (DocumentException e) { throw new IOException(e.fillInStackTrace()); } document.open(); cb = writer.getDirectContent(); cb.saveState(); - //com.lowagie.text.pdf.DefaultFontMapper fontMap = new com.lowagie.text.pdf.DefaultFontMapper(); + //com.itextpdf.text.pdf.DefaultFontMapper fontMap = new com.itextpdf.text.pdf.DefaultFontMapper(); g2d = cb.createGraphics(docWidth, docHeight); } else { bi = new BufferedImage(docWidth, docHeight, BufferedImage.TYPE_INT_RGB); From b206b133d6790df09938b9d4152ebef37a2972a3 Mon Sep 17 00:00:00 2001 From: Patrick Deelen Date: Wed, 22 Jul 2015 21:25:38 +0200 Subject: [PATCH 127/143] Fix part 2 --- .../binarymeta/meta/cis/CisAnalysis.java | 2 +- .../binarymeta/meta/graphics/ZScorePlot.java | 12 ++++++------ .../binarymeta/util/Filter.java | 2 +- .../eqtlmappingpipeline/graphics/Graphics.java | 12 ++++++------ .../eqtlmappingpipeline/metaqtl3/MetaQTL3.java | 2 +- .../metaqtl3/graphics/EQTLDotPlot.java | 16 ++++++++-------- .../metaqtl3/graphics/EQTLPlotter.java | 12 ++++++------ .../metaqtl3/graphics/QQPlot.java | 12 ++++++------ .../eqtlmappingpipeline/util/QTLDotPlotter.java | 2 +- 9 files changed, 36 insertions(+), 36 deletions(-) diff --git a/eqtl-mapping-pipeline/src/main/java/eqtlmappingpipeline/binarymeta/meta/cis/CisAnalysis.java b/eqtl-mapping-pipeline/src/main/java/eqtlmappingpipeline/binarymeta/meta/cis/CisAnalysis.java index 538175b4e..c96e96d28 100644 --- a/eqtl-mapping-pipeline/src/main/java/eqtlmappingpipeline/binarymeta/meta/cis/CisAnalysis.java +++ b/eqtl-mapping-pipeline/src/main/java/eqtlmappingpipeline/binarymeta/meta/cis/CisAnalysis.java @@ -4,7 +4,7 @@ */ package eqtlmappingpipeline.binarymeta.meta.cis; -import com.lowagie.text.DocumentException; +import com.itextpdf.text.DocumentException; import eqtlmappingpipeline.metaqtl3.FDR; import eqtlmappingpipeline.metaqtl3.graphics.EQTLDotPlot; import eqtlmappingpipeline.binarymeta.meta.MetaAnalyze; diff --git a/eqtl-mapping-pipeline/src/main/java/eqtlmappingpipeline/binarymeta/meta/graphics/ZScorePlot.java b/eqtl-mapping-pipeline/src/main/java/eqtlmappingpipeline/binarymeta/meta/graphics/ZScorePlot.java index 3b1ac4b5f..140b91105 100644 --- a/eqtl-mapping-pipeline/src/main/java/eqtlmappingpipeline/binarymeta/meta/graphics/ZScorePlot.java +++ b/eqtl-mapping-pipeline/src/main/java/eqtlmappingpipeline/binarymeta/meta/graphics/ZScorePlot.java @@ -4,8 +4,8 @@ */ package eqtlmappingpipeline.binarymeta.meta.graphics; -import com.lowagie.text.Document; -import com.lowagie.text.pdf.PdfContentByte; +import com.itextpdf.text.Document; +import com.itextpdf.text.pdf.PdfContentByte; import java.awt.Color; import java.awt.Font; import java.awt.Graphics2D; @@ -37,7 +37,7 @@ public class ZScorePlot { private String outfilename = ""; private Document document; private PdfContentByte cb; - private com.lowagie.text.pdf.PdfWriter writer; + private com.itextpdf.text.pdf.PdfWriter writer; public void init(int numdatasets, String[] datasets, boolean pdf, String filename) { @@ -58,11 +58,11 @@ public void init(int numdatasets, String[] datasets, boolean pdf, String filenam height = (plotsize * numDatasets) + ((numdatasets + 1) * spacer) - (plotsize + spacer); if (pdfOutput) { - com.lowagie.text.Rectangle rectangle = new com.lowagie.text.Rectangle(width, height); - document = new com.lowagie.text.Document(rectangle); + com.itextpdf.text.Rectangle rectangle = new com.itextpdf.text.Rectangle(width, height); + document = new com.itextpdf.text.Document(rectangle); writer = null; try { - writer = com.lowagie.text.pdf.PdfWriter.getInstance(document, new java.io.FileOutputStream(filename)); + writer = com.itextpdf.text.pdf.PdfWriter.getInstance(document, new java.io.FileOutputStream(filename)); document.open(); cb = writer.getDirectContent(); cb.saveState(); diff --git a/eqtl-mapping-pipeline/src/main/java/eqtlmappingpipeline/binarymeta/util/Filter.java b/eqtl-mapping-pipeline/src/main/java/eqtlmappingpipeline/binarymeta/util/Filter.java index ed2f3f9bb..628fa188f 100644 --- a/eqtl-mapping-pipeline/src/main/java/eqtlmappingpipeline/binarymeta/util/Filter.java +++ b/eqtl-mapping-pipeline/src/main/java/eqtlmappingpipeline/binarymeta/util/Filter.java @@ -4,7 +4,7 @@ */ package eqtlmappingpipeline.binarymeta.util; -import com.lowagie.text.DocumentException; +import com.itextpdf.text.DocumentException; import eqtlmappingpipeline.metaqtl3.FDR; import eqtlmappingpipeline.metaqtl3.FDR.FDRMethod; import eqtlmappingpipeline.metaqtl3.graphics.EQTLDotPlot; diff --git a/eqtl-mapping-pipeline/src/main/java/eqtlmappingpipeline/graphics/Graphics.java b/eqtl-mapping-pipeline/src/main/java/eqtlmappingpipeline/graphics/Graphics.java index 1f21e3200..0cfc0972d 100644 --- a/eqtl-mapping-pipeline/src/main/java/eqtlmappingpipeline/graphics/Graphics.java +++ b/eqtl-mapping-pipeline/src/main/java/eqtlmappingpipeline/graphics/Graphics.java @@ -18,7 +18,7 @@ */ public class Graphics { - private com.lowagie.text.Document document; + private com.itextpdf.text.Document document; private boolean usePDF = false; protected BufferedImage bi; protected Graphics2D g2d; @@ -29,9 +29,9 @@ public class Graphics { protected int marginTop, marginBottom, marginLeft, marginRight; protected double scalingX, scalingY; protected int FILE_TYPE; - protected com.lowagie.text.pdf.PdfContentByte cb; + protected com.itextpdf.text.pdf.PdfContentByte cb; protected String outputLoc = ""; - protected com.lowagie.text.pdf.PdfWriter writer; + protected com.itextpdf.text.pdf.PdfWriter writer; public Graphics() { bi = new java.awt.image.BufferedImage(100, 100, java.awt.image.BufferedImage.TYPE_INT_RGB); @@ -51,11 +51,11 @@ public Graphics(int width, int height) { protected void init(int width, int height) { if (usePDF) { - com.lowagie.text.Rectangle rectangle = new com.lowagie.text.Rectangle(width, height); - document = new com.lowagie.text.Document(rectangle); + com.itextpdf.text.Rectangle rectangle = new com.itextpdf.text.Rectangle(width, height); + document = new com.itextpdf.text.Document(rectangle); writer = null; try { - writer = com.lowagie.text.pdf.PdfWriter.getInstance(document, new java.io.FileOutputStream(outputLoc)); + writer = com.itextpdf.text.pdf.PdfWriter.getInstance(document, new java.io.FileOutputStream(outputLoc)); document.open(); cb = writer.getDirectContent(); cb.saveState(); diff --git a/eqtl-mapping-pipeline/src/main/java/eqtlmappingpipeline/metaqtl3/MetaQTL3.java b/eqtl-mapping-pipeline/src/main/java/eqtlmappingpipeline/metaqtl3/MetaQTL3.java index 6770c0be7..54e335ad6 100644 --- a/eqtl-mapping-pipeline/src/main/java/eqtlmappingpipeline/metaqtl3/MetaQTL3.java +++ b/eqtl-mapping-pipeline/src/main/java/eqtlmappingpipeline/metaqtl3/MetaQTL3.java @@ -8,7 +8,7 @@ import cern.colt.matrix.tint.IntMatrix2D; import cern.colt.matrix.tint.impl.DenseIntMatrix2D; import cern.colt.matrix.tint.impl.DenseLargeIntMatrix2D; -import com.lowagie.text.DocumentException; +import com.itextpdf.text.DocumentException; import eqtlmappingpipeline.metaqtl3.containers.WorkPackage; import eqtlmappingpipeline.metaqtl3.containers.Result; import umcg.genetica.math.stats.Descriptives; diff --git a/eqtl-mapping-pipeline/src/main/java/eqtlmappingpipeline/metaqtl3/graphics/EQTLDotPlot.java b/eqtl-mapping-pipeline/src/main/java/eqtlmappingpipeline/metaqtl3/graphics/EQTLDotPlot.java index a03f468ab..e154c3008 100644 --- a/eqtl-mapping-pipeline/src/main/java/eqtlmappingpipeline/metaqtl3/graphics/EQTLDotPlot.java +++ b/eqtl-mapping-pipeline/src/main/java/eqtlmappingpipeline/metaqtl3/graphics/EQTLDotPlot.java @@ -4,7 +4,7 @@ */ package eqtlmappingpipeline.metaqtl3.graphics; -import com.lowagie.text.DocumentException; +import com.itextpdf.text.DocumentException; import java.awt.Color; import java.awt.Graphics2D; import java.awt.RenderingHints; @@ -61,19 +61,19 @@ public void draw(String inputFile, String outputFile, Output output) throws IOEx int innerHeight = y1 - y0; Graphics2D g2d = null; - com.lowagie.text.Document document = null; - com.lowagie.text.pdf.PdfWriter writer = null; - com.lowagie.text.pdf.PdfContentByte cb = null; + com.itextpdf.text.Document document = null; + com.itextpdf.text.pdf.PdfWriter writer = null; + com.itextpdf.text.pdf.PdfContentByte cb = null; BufferedImage bi = null; if (output == Output.PDF) { - com.lowagie.text.Rectangle rectangle = new com.lowagie.text.Rectangle(width, height); - document = new com.lowagie.text.Document(rectangle); - writer = com.lowagie.text.pdf.PdfWriter.getInstance(document, new java.io.FileOutputStream(outputFile)); + com.itextpdf.text.Rectangle rectangle = new com.itextpdf.text.Rectangle(width, height); + document = new com.itextpdf.text.Document(rectangle); + writer = com.itextpdf.text.pdf.PdfWriter.getInstance(document, new java.io.FileOutputStream(outputFile)); document.open(); cb = writer.getDirectContent(); cb.saveState(); - //com.lowagie.text.pdf.DefaultFontMapper fontMap = new com.lowagie.text.pdf.DefaultFontMapper(); + //com.itextpdf.text.pdf.DefaultFontMapper fontMap = new com.itextpdf.text.pdf.DefaultFontMapper(); g2d = cb.createGraphics(width, height); } else { bi = new BufferedImage(width, height, BufferedImage.TYPE_INT_RGB); diff --git a/eqtl-mapping-pipeline/src/main/java/eqtlmappingpipeline/metaqtl3/graphics/EQTLPlotter.java b/eqtl-mapping-pipeline/src/main/java/eqtlmappingpipeline/metaqtl3/graphics/EQTLPlotter.java index 63a52c1ee..5326dc27d 100644 --- a/eqtl-mapping-pipeline/src/main/java/eqtlmappingpipeline/metaqtl3/graphics/EQTLPlotter.java +++ b/eqtl-mapping-pipeline/src/main/java/eqtlmappingpipeline/metaqtl3/graphics/EQTLPlotter.java @@ -164,18 +164,18 @@ public void draw(WorkPackage wp, int pid) { Graphics2D g2d = null; BufferedImage bi = null; - com.lowagie.text.Document document = null; - com.lowagie.text.pdf.PdfContentByte cb = null; - com.lowagie.text.pdf.PdfWriter writer = null; + com.itextpdf.text.Document document = null; + com.itextpdf.text.pdf.PdfContentByte cb = null; + com.itextpdf.text.pdf.PdfWriter writer = null; if (outputPlotsFileType == FILE_TYPE_PNG) { bi = new java.awt.image.BufferedImage(width, height, java.awt.image.BufferedImage.TYPE_INT_RGB); g2d = bi.createGraphics(); } else { - com.lowagie.text.Rectangle rectangle = new com.lowagie.text.Rectangle(width, height); - document = new com.lowagie.text.Document(rectangle); + com.itextpdf.text.Rectangle rectangle = new com.itextpdf.text.Rectangle(width, height); + document = new com.itextpdf.text.Document(rectangle); try { - writer = com.lowagie.text.pdf.PdfWriter.getInstance(document, new java.io.FileOutputStream(file)); + writer = com.itextpdf.text.pdf.PdfWriter.getInstance(document, new java.io.FileOutputStream(file)); document.open(); cb = writer.getDirectContent(); cb.saveState(); diff --git a/eqtl-mapping-pipeline/src/main/java/eqtlmappingpipeline/metaqtl3/graphics/QQPlot.java b/eqtl-mapping-pipeline/src/main/java/eqtlmappingpipeline/metaqtl3/graphics/QQPlot.java index 6c2bf34bd..ee0af840f 100644 --- a/eqtl-mapping-pipeline/src/main/java/eqtlmappingpipeline/metaqtl3/graphics/QQPlot.java +++ b/eqtl-mapping-pipeline/src/main/java/eqtlmappingpipeline/metaqtl3/graphics/QQPlot.java @@ -44,19 +44,19 @@ public void draw(String fileName, double fdrCutOff, int nrPermutationsFDR, int m } Graphics2D g2d = null; BufferedImage bi = null; - com.lowagie.text.Document document = null; - com.lowagie.text.pdf.PdfContentByte cb = null; - com.lowagie.text.pdf.PdfWriter writer = null; + com.itextpdf.text.Document document = null; + com.itextpdf.text.pdf.PdfContentByte cb = null; + com.itextpdf.text.pdf.PdfWriter writer = null; if (outputPlotsFileType == FILE_TYPE_PNG) { bi = new java.awt.image.BufferedImage(width, height, java.awt.image.BufferedImage.TYPE_INT_RGB); g2d = bi.createGraphics(); } else { - com.lowagie.text.Rectangle rectangle = new com.lowagie.text.Rectangle(width, height); + com.itextpdf.text.Rectangle rectangle = new com.itextpdf.text.Rectangle(width, height); - document = new com.lowagie.text.Document(rectangle); + document = new com.itextpdf.text.Document(rectangle); try { - writer = com.lowagie.text.pdf.PdfWriter.getInstance(document, new java.io.FileOutputStream(fileQQPlot)); + writer = com.itextpdf.text.pdf.PdfWriter.getInstance(document, new java.io.FileOutputStream(fileQQPlot)); document.open(); cb = writer.getDirectContent(); cb.saveState(); diff --git a/eqtl-mapping-pipeline/src/main/java/eqtlmappingpipeline/util/QTLDotPlotter.java b/eqtl-mapping-pipeline/src/main/java/eqtlmappingpipeline/util/QTLDotPlotter.java index 92183e4ac..f7865fb65 100644 --- a/eqtl-mapping-pipeline/src/main/java/eqtlmappingpipeline/util/QTLDotPlotter.java +++ b/eqtl-mapping-pipeline/src/main/java/eqtlmappingpipeline/util/QTLDotPlotter.java @@ -4,7 +4,7 @@ */ package eqtlmappingpipeline.util; -import com.lowagie.text.DocumentException; +import com.itextpdf.text.DocumentException; import eqtlmappingpipeline.metaqtl3.graphics.EQTLDotPlot; import java.io.IOException; import java.util.logging.Level; From afa8eea45b1e10cca02f9580f7fdd97cfbbf19e3 Mon Sep 17 00:00:00 2001 From: Patrick Deelen Date: Wed, 22 Jul 2015 21:28:49 +0200 Subject: [PATCH 128/143] Fix --- .../eqtlpermutationtranscriptionfactoranalysis/Testing.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/eqtl-functional-enrichment/src/main/java/nl/systemsgenetics/eqtlpermutationtranscriptionfactoranalysis/Testing.java b/eqtl-functional-enrichment/src/main/java/nl/systemsgenetics/eqtlpermutationtranscriptionfactoranalysis/Testing.java index 17591ce62..cf168a21d 100644 --- a/eqtl-functional-enrichment/src/main/java/nl/systemsgenetics/eqtlpermutationtranscriptionfactoranalysis/Testing.java +++ b/eqtl-functional-enrichment/src/main/java/nl/systemsgenetics/eqtlpermutationtranscriptionfactoranalysis/Testing.java @@ -8,7 +8,7 @@ import java.util.HashMap; import java.util.HashSet; import java.util.regex.Pattern; -import org.bouncycastle.util.Arrays; +//import org.bouncycastle.util.Arrays; import umcg.genetica.genomicboundaries.GenomicBoundaries; import umcg.genetica.graphics.ViolinBoxPlot; import umcg.genetica.io.text.TextFile; From 471ec6f540a96775fac409c5e478e00ababbc8e8 Mon Sep 17 00:00:00 2001 From: Patrick Deelen Date: Wed, 22 Jul 2015 21:34:35 +0200 Subject: [PATCH 129/143] Fix --- BinaryMetaAnalyzer/pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/BinaryMetaAnalyzer/pom.xml b/BinaryMetaAnalyzer/pom.xml index a01baf7c4..d9314c876 100644 --- a/BinaryMetaAnalyzer/pom.xml +++ b/BinaryMetaAnalyzer/pom.xml @@ -13,7 +13,7 @@ ${project.groupId} genetica-libraries - 1.0.6-SNAPSHOT + 1.0.7-SNAPSHOT BinaryMetaAnalyzer From aa1c81b12fd9276971bbab507a4377977ff40cd7 Mon Sep 17 00:00:00 2001 From: Patrick Deelen Date: Thu, 23 Jul 2015 16:56:39 +0200 Subject: [PATCH 130/143] Minor fix --- .../TestEQTLDatasetForInteractions.java | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/TestEQTLDatasetForInteractions.java b/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/TestEQTLDatasetForInteractions.java index 85ab10715..ee5b505e1 100644 --- a/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/TestEQTLDatasetForInteractions.java +++ b/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/TestEQTLDatasetForInteractions.java @@ -1033,9 +1033,13 @@ private ExpressionDataset correctCovariateDataPCA(String[] covsToCorrect2, Strin ExpressionDataset datasetCovariatesToCorrectFor = new ExpressionDataset(covsToCorrect2.length + covsToCorrect.length + nrCompsToCorrectFor, datasetGenotypes.nrSamples); datasetCovariatesToCorrectFor.sampleNames = datasetGenotypes.sampleNames; + // add covariates from the first list + HashMap hashCovsToCorrect = new HashMap(); + // add covariates from the second list for (int i = 0; i < covsToCorrect2.length; ++i) { String cov = covsToCorrect2[i]; + hashCovsToCorrect.put(cov, null); Integer c = datasetCovariatesPCAForceNormal.hashProbes.get(cov); if (c == null) { throw new Exception("Covariate not found: " + cov); @@ -1045,8 +1049,6 @@ private ExpressionDataset correctCovariateDataPCA(String[] covsToCorrect2, Strin } } - // add covariates from the first list - HashMap hashCovsToCorrect = new HashMap(); int[] covsToCorrectIndex = new int[covsToCorrect.length]; for (int c = 0; c < covsToCorrect.length; c++) { hashCovsToCorrect.put(covsToCorrect[c], null); From 71fd3fb1c550d091c2730032df9e959eb7573a4e Mon Sep 17 00:00:00 2001 From: Patrick Deelen Date: Thu, 23 Jul 2015 22:18:15 +0200 Subject: [PATCH 131/143] Interaction regress out all SNP effects --- .../TestEQTLDatasetForInteractions.java | 70 ++++++++++++++++--- 1 file changed, 59 insertions(+), 11 deletions(-) diff --git a/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/TestEQTLDatasetForInteractions.java b/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/TestEQTLDatasetForInteractions.java index ee5b505e1..3cb2c3761 100644 --- a/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/TestEQTLDatasetForInteractions.java +++ b/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/TestEQTLDatasetForInteractions.java @@ -436,7 +436,7 @@ public final String performInteractionAnalysis(String[] covsToCorrect, String[] } } - correctExpressionDataForInteractions(covsToCorrect, datasetCovariates, datasetGenotypes, nrSamples, datasetExpression, regression); + correctExpressionDataForInteractions(covsToCorrect, datasetCovariates, datasetGenotypes, nrSamples, datasetExpression, regression, qtlProbeSnpMultiMap); forceNormalExpressionData(datasetExpression); @@ -1035,7 +1035,7 @@ private ExpressionDataset correctCovariateDataPCA(String[] covsToCorrect2, Strin // add covariates from the first list HashMap hashCovsToCorrect = new HashMap(); - + // add covariates from the second list for (int i = 0; i < covsToCorrect2.length; ++i) { String cov = covsToCorrect2[i]; @@ -1280,27 +1280,75 @@ private void forceNormalCovariates(ExpressionDataset datasetCovariates, Expressi } } - private void correctExpressionDataForInteractions(String[] covsToCorrect, ExpressionDataset datasetCovariates, ExpressionDataset datasetGenotypes, int nrSamples, ExpressionDataset datasetExpression, OLSMultipleLinearRegression regression) throws MathIllegalArgumentException { + private void correctExpressionDataForInteractions(String[] covsToCorrect, ExpressionDataset datasetCovariates, ExpressionDataset datasetGenotypes, int nrSamples, ExpressionDataset datasetExpression, OLSMultipleLinearRegression regression, HashMultimap qtlProbeSnpMultiMap) throws MathIllegalArgumentException, Exception { + System.out.println("Correcting expression data for predefined gene environment interaction effects (GC content, Gender, 5'Median Bias, 3'Median Bias):"); int[] covsToCorrectIndex = new int[covsToCorrect.length]; for (int c = 0; c < covsToCorrect.length; c++) { covsToCorrectIndex[c] = ((Integer) datasetCovariates.hashProbes.get(covsToCorrect[c])).intValue(); } - for (int snp = 0; snp < datasetGenotypes.nrProbes; snp++) { - double[][] valsX = new double[nrSamples][1 + covsToCorrect.length * 2]; //store genotypes, covariates, interactions - for (int s = 0; s < nrSamples; s++) { - valsX[s][0] = datasetGenotypes.rawData[snp][s]; //genotypes + + HashMap snpMap = new HashMap(datasetGenotypes.nrProbes); + for (Map.Entry snpEntry : datasetGenotypes.hashProbes.entrySet()) { + try { + snpMap.put(snpEntry.getKey().substring(0, snpEntry.getKey().indexOf('_')), snpEntry.getValue()); + } catch (Exception e) { + System.out.println(snpEntry.getKey()); + throw e; + } + } + + for (int p = 0; p < datasetExpression.nrProbes; p++) { + + String probe = datasetExpression.probeNames[p].substring(0, datasetExpression.probeNames[p].indexOf('_')); + Set probeQtls = qtlProbeSnpMultiMap.get(probe); + + if (probeQtls.isEmpty()) { + throw new Exception("Error 1"); } + + //boolean foundPisS = false; + double[][] valsX = new double[nrSamples][probeQtls.size() + covsToCorrect.length * 2]; //store genotypes, covariates, interactions + int k = 0; + for (String snp : probeQtls) { + + Integer s = snpMap.get(snp); + if (s == null) { + throw new Exception("Snp " + snp + " not found"); + } +// if(s.intValue() == p){ +// foundPisS = true; +// } + double[] snpData = datasetGenotypes.rawData[s]; + for (int i = 0; i < datasetGenotypes.nrSamples; ++i) { + valsX[i][k] = snpData[i]; + } + + k++; + } +// if(!foundPisS){ +// +// System.out.println("Expected snp: " + datasetGenotypes.probeNames[p] + " at index: " + p); +// +// for(String qtlSnp : probeQtls = qtlProbeSnpMultiMap.get(probe)){ +// System.out.println("QTL snp: " + qtlSnp + " found at index: " + snpMap.get(qtlSnp)); +// } +// +// throw new Exception("Error 2"); +// } for (int c = 0; c < covsToCorrect.length; c++) { + double[] covData = datasetCovariates.rawData[covsToCorrectIndex[c]]; + double[] snpData = datasetGenotypes.rawData[p]; + for (int s = 0; s < nrSamples; s++) { - valsX[s][c * 2 + 1] = datasetCovariates.rawData[covsToCorrectIndex[c]][s]; //covariate - valsX[s][c * 2 + 2] = valsX[s][0] * valsX[s][c * 2 + 1]; //interction + valsX[s][c * 2 + probeQtls.size()] = covData[s]; //covariate + valsX[s][c * 2 + probeQtls.size() + 1] = snpData[s] * covData[s]; //interction } } - double[] valsY = datasetExpression.rawData[snp]; + double[] valsY = datasetExpression.rawData[p]; regression.newSampleData(valsY, valsX); - datasetExpression.rawData[snp] = regression.estimateResiduals(); + datasetExpression.rawData[p] = regression.estimateResiduals(); } } From bfe42c6572274c408deb24e1bed50b53e882759c Mon Sep 17 00:00:00 2001 From: Patrick Deelen Date: Fri, 24 Jul 2015 18:01:16 +0200 Subject: [PATCH 132/143] Fix in genotype IO table writer --- .../org/molgenis/genotype/table/TableGenotypeWriter.java | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/Genotype-IO/src/main/java/org/molgenis/genotype/table/TableGenotypeWriter.java b/Genotype-IO/src/main/java/org/molgenis/genotype/table/TableGenotypeWriter.java index bd99b0e99..ce13c2c99 100644 --- a/Genotype-IO/src/main/java/org/molgenis/genotype/table/TableGenotypeWriter.java +++ b/Genotype-IO/src/main/java/org/molgenis/genotype/table/TableGenotypeWriter.java @@ -42,8 +42,13 @@ public void write(String path) { for (GeneticVariant variant : genotypeData) { - dosageWriter.append(variant.getPrimaryVariantId()); - genotypeWriter.append(variant.getPrimaryVariantId()); + String variantId = variant.getPrimaryVariantId(); + if(variantId == null){ + variantId = variant.getSequenceName() + ":" + variant.getStartPos(); + } + + dosageWriter.append(variantId); + genotypeWriter.append(variantId); for (float dosage : variant.getSampleDosages()) { dosageWriter.append('\t'); From 8a8e0d74f0e23d7caed98a0f2068296a74083417 Mon Sep 17 00:00:00 2001 From: Patrick Deelen Date: Sun, 26 Jul 2015 17:00:39 +0200 Subject: [PATCH 133/143] interaciton --- .../TestEQTLDatasetForInteractions.java | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/TestEQTLDatasetForInteractions.java b/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/TestEQTLDatasetForInteractions.java index 3cb2c3761..ea79406a7 100644 --- a/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/TestEQTLDatasetForInteractions.java +++ b/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/TestEQTLDatasetForInteractions.java @@ -1154,11 +1154,11 @@ private void correctExpressionData(String[] covsToCorrect2, ExpressionDataset da datasetCovariatesToCorrectFor.transposeDataset(); - datasetCovariatesToCorrectFor.save(inputDir + "/CovariatesToCorrectFor.txt"); - orthogonalizeDataset(inputDir + "/CovariatesToCorrectFor.txt"); - datasetCovariatesToCorrectFor = new ExpressionDataset(inputDir + "/CovariatesToCorrectFor.txt.PrincipalComponents.txt"); + datasetCovariatesToCorrectFor.save(outputDir + "/CovariatesToCorrectFor.txt"); + orthogonalizeDataset(outputDir + "/CovariatesToCorrectFor.txt"); + datasetCovariatesToCorrectFor = new ExpressionDataset(outputDir + "/CovariatesToCorrectFor.txt.PrincipalComponents.txt"); datasetCovariatesToCorrectFor.transposeDataset(); - ExpressionDataset datasetCovariatesToCorrectForEigenvalues = new ExpressionDataset(inputDir + "/CovariatesToCorrectFor.txt.Eigenvalues.txt"); + ExpressionDataset datasetCovariatesToCorrectForEigenvalues = new ExpressionDataset(outputDir + "/CovariatesToCorrectFor.txt.Eigenvalues.txt"); for (int snp = 0; snp < datasetExpression.nrProbes; snp++) { for (int cov = 0; cov < datasetCovariatesToCorrectFor.nrProbes; cov++) { if (datasetCovariatesToCorrectForEigenvalues.rawData[cov][0] > 1E-5) { From f91d5ce921b571ade77aecc218a2453787b95dfa Mon Sep 17 00:00:00 2001 From: Patrick Deelen Date: Tue, 28 Jul 2015 09:41:40 +0200 Subject: [PATCH 134/143] Interactions SNP selection --- eQTLInteractionAnalyser/pom.xml | 2 +- .../EQTLInteractionAnalyser.java | 16 ++++++- ...ormInteractionAnalysisPermutationTask.java | 11 +++-- .../TestEQTLDatasetForInteractions.java | 42 +++++++++++++++---- 4 files changed, 58 insertions(+), 13 deletions(-) diff --git a/eQTLInteractionAnalyser/pom.xml b/eQTLInteractionAnalyser/pom.xml index c93865306..b2f231ebb 100644 --- a/eQTLInteractionAnalyser/pom.xml +++ b/eQTLInteractionAnalyser/pom.xml @@ -9,7 +9,7 @@ nl.systemsgenetics eQTLInteractionAnalyser - 1.0-SNAPSHOT + 1.1-SNAPSHOT eQTLInteractionAnalyser http://maven.apache.org diff --git a/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/EQTLInteractionAnalyser.java b/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/EQTLInteractionAnalyser.java index 63063f42b..0eaf9aa63 100644 --- a/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/EQTLInteractionAnalyser.java +++ b/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/EQTLInteractionAnalyser.java @@ -137,6 +137,12 @@ public class EQTLInteractionAnalyser { OptionBuilder.withDescription("Number of threads"); OptionBuilder.withLongOpt("threads"); OPTIONS.addOption(OptionBuilder.create("nt")); + + OptionBuilder.withArgName("path"); + OptionBuilder.hasArg(); + OptionBuilder.withDescription("SNPs to test"); + OptionBuilder.withLongOpt("snpsToTest"); + OPTIONS.addOption(OptionBuilder.create("snps")); } public static void main(String[] args) throws IOException, Exception { @@ -158,6 +164,8 @@ public static void main(String[] args) throws IOException, Exception { final String[] cohorts; final String[] covariatesToTest; final File ensgAnnotationFile; + final File snpsToTestFile; + try { final CommandLine commandLine = new PosixParser().parse(OPTIONS, args, false); @@ -225,6 +233,12 @@ else if (commandLine.hasOption("c")){ snpsToSwapFile = null; } + if (commandLine.hasOption("snps")){ + snpsToTestFile = new File(commandLine.getOptionValue("snps")); + } else { + snpsToTestFile = null; + } + if (commandLine.hasOption("is")){ File samplesToIncludeFile = new File(commandLine.getOptionValue("is")); System.out.println("Samples to include file: " + samplesToIncludeFile.getAbsolutePath()); @@ -273,7 +287,7 @@ else if (chi2sumDiff){ interactor.findChi2SumDifferences(maxNumCovariatesToRegress, startRoundCompareChi2, ensgAnnotationFile); } else { - new TestEQTLDatasetForInteractions(inputDir, outputDir, eqtlFile, maxNumCovariatesToRegress, annotationFile, covariates, covariates2, snpsToSwapFile, permute, covariatesToTest, hashSamples, numThreads, cohorts); + new TestEQTLDatasetForInteractions(inputDir, outputDir, eqtlFile, maxNumCovariatesToRegress, annotationFile, covariates, covariates2, snpsToSwapFile, permute, covariatesToTest, hashSamples, numThreads, cohorts, snpsToTestFile); } } diff --git a/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/PerformInteractionAnalysisPermutationTask.java b/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/PerformInteractionAnalysisPermutationTask.java index 1a1de9ba3..f31b5e6f2 100644 --- a/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/PerformInteractionAnalysisPermutationTask.java +++ b/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/PerformInteractionAnalysisPermutationTask.java @@ -5,10 +5,9 @@ */ package nl.systemsgenetics.eqtlinteractionanalyser.eqtlinteractionanalyser; -import cern.jet.random.tdouble.engine.DoubleRandomEngine; +import gnu.trove.set.hash.TIntHashSet; import java.util.concurrent.Callable; import org.apache.commons.math3.linear.SingularMatrixException; -import org.apache.commons.math3.stat.correlation.PearsonsCorrelation; import org.apache.commons.math3.stat.regression.SimpleRegression; /** @@ -27,8 +26,9 @@ public class PerformInteractionAnalysisPermutationTask implements Callable corrPvalueThreshold) { // don't compute the interaction if the covariate expression is affected by theis SNP try { diff --git a/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/TestEQTLDatasetForInteractions.java b/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/TestEQTLDatasetForInteractions.java index 4e910eeff..e4af277a4 100644 --- a/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/TestEQTLDatasetForInteractions.java +++ b/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/TestEQTLDatasetForInteractions.java @@ -8,6 +8,7 @@ import au.com.bytecode.opencsv.CSVReader; import au.com.bytecode.opencsv.CSVWriter; import com.google.common.collect.HashMultimap; +import gnu.trove.set.hash.TIntHashSet; import java.io.BufferedReader; import java.io.BufferedWriter; import java.io.File; @@ -42,7 +43,6 @@ import umcg.genetica.io.text.TextFile; import umcg.genetica.io.trityper.EQTL; import umcg.genetica.io.trityper.QTLTextFile; -import umcg.genetica.math.matrix2.DoubleMatrixDataset; /** * @@ -64,7 +64,7 @@ public TestEQTLDatasetForInteractions(String inputDir, String outputDir) throws //preprocessData(); } - public TestEQTLDatasetForInteractions(String inputDir, String outputDir, String eQTLfileName, int maxNumTopCovs, String annotationFile, String[] covariatesToCorrect, String[] covariatesToCorrect2, File snpsToSwapFile, boolean permute, String[] covariatesToTest, HashMap hashSamples, int numThreads, String[] cohorts) throws IOException, Exception { + public TestEQTLDatasetForInteractions(String inputDir, String outputDir, String eQTLfileName, int maxNumTopCovs, String annotationFile, String[] covariatesToCorrect, String[] covariatesToCorrect2, File snpsToSwapFile, boolean permute, String[] covariatesToTest, HashMap hashSamples, int numThreads, String[] cohorts, File snpsToTestFile) throws IOException, Exception { System.out.println("Input dir: " + inputDir); System.out.println("Output dir: " + outputDir); @@ -82,8 +82,6 @@ public TestEQTLDatasetForInteractions(String inputDir, String outputDir, String initGenotypes(permute, hashSamples, cohorts); - HashMap eqtlGenes = getEqtls(eQTLfileName); - HashMultimap qtlProbeSnpMultiMap = HashMultimap.create(); final QTLTextFile eQtlFileReader = new QTLTextFile(eQTLfileName, false); for (Iterator it = eQtlFileReader.getEQtlIterator(); it.hasNext();) { @@ -94,6 +92,33 @@ public TestEQTLDatasetForInteractions(String inputDir, String outputDir, String if (annotationFile != null) { createGeneDistanceMap(annotationFile); } + + final TIntHashSet snpsToTest; + if(snpsToTestFile != null){ + + snpsToTest = new TIntHashSet(); + BufferedReader reader = new BufferedReader(new InputStreamReader(new FileInputStream(snpsToTestFile), "UTF-8")); + + String line; + while ((line = reader.readLine()) != null) { + Integer genotypeI = datasetGenotypes.hashProbes.get(line); + + if(genotypeI == null){ + System.out.println("SNP " + line + " not found in genotype data"); + continue; + } + + if(!snpsToTest.add(genotypeI)){ + System.out.println("Warning including SNP twice: " + line); + } + + } + + System.out.println("Confining testing to: " + snpsToTest.size() + " SNPs from: " + snpsToTestFile.getAbsolutePath()); + + } else { + snpsToTest = null; + } //preprocessData(); @@ -110,7 +135,7 @@ public TestEQTLDatasetForInteractions(String inputDir, String outputDir, String String[] covsToCorrect = primaryCovsToCorrect; int cnt = 0; while (cnt < maxNumTopCovs) { - String topCov = performInteractionAnalysis(covsToCorrect, covariatesToCorrect2, eqtlGenes, outputTopCovs, snpsToSwapFile, qtlProbeSnpMultiMap, covariatesToTest, hashSamples, numThreads); + String topCov = performInteractionAnalysis(covsToCorrect, covariatesToCorrect2, outputTopCovs, snpsToSwapFile, qtlProbeSnpMultiMap, covariatesToTest, hashSamples, numThreads, snpsToTest); String[] covsToCorrectNew = new String[covsToCorrect.length + 1]; for (int c = 0; c < covsToCorrect.length; c++) { covsToCorrectNew[c] = covsToCorrect[c]; @@ -123,6 +148,7 @@ public TestEQTLDatasetForInteractions(String inputDir, String outputDir, String } private void initGenotypes(boolean permute, HashMap hashSamples, String[] cohorts){ + datasetGenotypes = new ExpressionDataset(inputDir + "/bigTableLude.txt.Genotypes.binary", '\t', null, hashSamples); if (permute){ @@ -400,7 +426,7 @@ public void preprocessData() { } - public final String performInteractionAnalysis(String[] covsToCorrect, String[] covsToCorrect2, HashMap hashEQTLs, TextFile outputTopCovs, File snpsToSwapFile, HashMultimap qtlProbeSnpMultiMap, String[] covariatesToTest, HashMap hashSamples, int numThreads) throws IOException, Exception { + public final String performInteractionAnalysis(String[] covsToCorrect, String[] covsToCorrect2, TextFile outputTopCovs, File snpsToSwapFile, HashMultimap qtlProbeSnpMultiMap, String[] covariatesToTest, HashMap hashSamples, int numThreads, final TIntHashSet snpsToTest) throws IOException, Exception { //hashSamples = excludeOutliers(hashSamples); @@ -489,7 +515,7 @@ public final String performInteractionAnalysis(String[] covsToCorrect, String[] for (int cov = 0; cov < datasetCovariates.nrProbes; cov++) { double stdev = JSci.maths.ArrayMath.standardDeviation(datasetCovariates.rawData[cov]); if (stdev > 0) { - PerformInteractionAnalysisPermutationTask task = new PerformInteractionAnalysisPermutationTask(datasetGenotypes, datasetExpression, datasetCovariates, datasetCovariatesPCAForceNormal, cov, skippedWriter); + PerformInteractionAnalysisPermutationTask task = new PerformInteractionAnalysisPermutationTask(datasetGenotypes, datasetExpression, datasetCovariates, datasetCovariatesPCAForceNormal, cov, skippedWriter, snpsToTest); pool.submit(task); nrTasks++; } @@ -1327,7 +1353,7 @@ private void correctExpressionDataForInteractions(String[] covsToCorrect, Expres Set probeQtls = qtlProbeSnpMultiMap.get(probe); if (probeQtls.isEmpty()) { - throw new Exception("Error 1"); + throw new Exception("No eQTLs found for: " + probe); } //boolean foundPisS = false; From 5d2ef16c50677bc84d70e8c9381bfbc530f5d3fe Mon Sep 17 00:00:00 2001 From: Dasha Zhernakova Date: Wed, 29 Jul 2015 12:18:35 +0300 Subject: [PATCH 135/143] edits to interpretInteractionZScoreMatrix --- .../EQTLInteractionAnalyser.java | 17 +++++++++++--- .../TestEQTLDatasetForInteractions.java | 22 +++++++++++-------- 2 files changed, 27 insertions(+), 12 deletions(-) diff --git a/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/EQTLInteractionAnalyser.java b/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/EQTLInteractionAnalyser.java index 0eaf9aa63..c848d3614 100644 --- a/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/EQTLInteractionAnalyser.java +++ b/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/EQTLInteractionAnalyser.java @@ -137,6 +137,12 @@ public class EQTLInteractionAnalyser { OptionBuilder.withDescription("Number of threads"); OptionBuilder.withLongOpt("threads"); OPTIONS.addOption(OptionBuilder.create("nt")); + + OptionBuilder.withArgName("int"); + OptionBuilder.hasArg(); + OptionBuilder.withDescription("Z-score difference threshold for interpretation"); + OptionBuilder.withLongOpt("threshold"); + OPTIONS.addOption(OptionBuilder.create("thr")); OptionBuilder.withArgName("path"); OptionBuilder.hasArg(); @@ -155,7 +161,7 @@ public static void main(String[] args) throws IOException, Exception { int maxNumCovariatesToRegress = 20; int numThreads; final boolean interpret, chi2sumDiff, permute, preproces; - final int startRoundCompareChi2; + final int startRoundCompareChi2, threshold; HashMap hashSamples; @@ -178,7 +184,12 @@ public static void main(String[] args) throws IOException, Exception { if (commandLine.hasOption('n')) { maxNumCovariatesToRegress = Integer.parseInt(commandLine.getOptionValue("n")); } - + if (commandLine.hasOption("thr")) { + threshold = Integer.parseInt(commandLine.getOptionValue("thr")); + } + else { + threshold = 3; + } interpret = commandLine.hasOption("t"); @@ -280,7 +291,7 @@ else if (commandLine.hasOption("c")){ interactor.preprocessData(); } else if (interpret){ TestEQTLDatasetForInteractions interactor = new TestEQTLDatasetForInteractions(inputDir, outputDir); - interactor.interpretInteractionZScoreMatrix(maxNumCovariatesToRegress); + interactor.interpretInteractionZScoreMatrix(maxNumCovariatesToRegress, startRoundCompareChi2, threshold); } else if (chi2sumDiff){ TestEQTLDatasetForInteractions interactor = new TestEQTLDatasetForInteractions(inputDir, outputDir); diff --git a/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/TestEQTLDatasetForInteractions.java b/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/TestEQTLDatasetForInteractions.java index e4af277a4..84f45e256 100644 --- a/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/TestEQTLDatasetForInteractions.java +++ b/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/TestEQTLDatasetForInteractions.java @@ -209,17 +209,22 @@ public static HashMap getEqtls(String fname) throws IOException } - public void interpretInteractionZScoreMatrix(int maxNumRegressedCovariates) { + public void interpretInteractionZScoreMatrix(int maxNumRegressedCovariates, int numPrimaryCovsToCorrect, int zscoreDiffThreshold) throws IOException { System.out.println("Interpreting the z-score matrix"); - int numPrimaryCovsToCorrect = primaryCovsToCorrect.length; for (int nrCovsRemoved = numPrimaryCovsToCorrect; nrCovsRemoved < numPrimaryCovsToCorrect + maxNumRegressedCovariates; nrCovsRemoved++) { - ExpressionDataset dataset = new ExpressionDataset(outputDir + "/InteractionZScoresMatrix-" + nrCovsRemoved + "Covariates.txt"); - dataset.save(dataset.fileName + ".binary"); + if (! new File(outputDir + "/InteractionZScoresMatrix-" + nrCovsRemoved + "Covariates.txt.binary.dat").exists()) { + ExpressionDataset dataset = new ExpressionDataset(outputDir + "/InteractionZScoresMatrix-" + nrCovsRemoved + "Covariates.txt"); + dataset.save(dataset.fileName + ".binary"); + } + else { + System.out.println("Binary z-score matrix already exists, not overwriting it: " + outputDir + "/InteractionZScoresMatrix-" + nrCovsRemoved + "Covariates.txt.binary.dat"); + } } - + TextFile out = new TextFile(outputDir + "zscoreDiff.txt", true); + out.writeln("numCovsRemoved\tcovariate\teQTL\tz-score_before\tz-score_after\tdifference"); for (int nrCovsRemoved = numPrimaryCovsToCorrect; nrCovsRemoved < numPrimaryCovsToCorrect + maxNumRegressedCovariates; nrCovsRemoved++) { ExpressionDataset dataset = new ExpressionDataset(outputDir + "/InteractionZScoresMatrix-" + nrCovsRemoved + "Covariates.txt.binary"); @@ -233,16 +238,15 @@ public void interpretInteractionZScoreMatrix(int maxNumRegressedCovariates) { double absZDiff = Math.abs(zDiff); if (absZDiff > 2 && absZDiff > maxAbsZDiff) { maxAbsZDiff = absZDiff; - output = nrCovsRemoved + "\t" + p + "\t" + dataset.probeNames[p] + "\t" + q + "\t" + dataset.sampleNames[q] + "\t" + dataset.rawData[p][q] + "\t" + dataset2.rawData[p][q] + "\t" + zDiff; + output = nrCovsRemoved + "\t" + dataset.probeNames[p] + "\t" + dataset.sampleNames[q] + "\t" + dataset.rawData[p][q] + "\t" + dataset2.rawData[p][q] + "\t" + zDiff; } } - if (maxAbsZDiff > 2) { + if (maxAbsZDiff > zscoreDiffThreshold) { System.out.println(output); + out.writeln(output); } } } - - System.exit(0); } public void findChi2SumDifferences(int maxNumRegressedCovariates, int numPrimaryCovsToCorrect, File ensgAnnotationFile) throws IOException { From 8b0ad9b1680e14dc0ed83f90fa91937f8cce912a Mon Sep 17 00:00:00 2001 From: Dasha Zhernakova Date: Wed, 29 Jul 2015 13:10:17 +0300 Subject: [PATCH 136/143] small bug fix --- .../eqtlinteractionanalyser/EQTLInteractionAnalyser.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/EQTLInteractionAnalyser.java b/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/EQTLInteractionAnalyser.java index c848d3614..c6b9ec686 100644 --- a/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/EQTLInteractionAnalyser.java +++ b/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/EQTLInteractionAnalyser.java @@ -192,7 +192,7 @@ public static void main(String[] args) throws IOException, Exception { } - interpret = commandLine.hasOption("t"); + interpret = commandLine.hasOption("it"); chi2sumDiff = commandLine.hasOption("dif"); permute = commandLine.hasOption("perm"); preproces = commandLine.hasOption("p"); From f8861185519270300d1cdcca6907f4392c2c39f9 Mon Sep 17 00:00:00 2001 From: Dasha Zhernakova Date: Wed, 29 Jul 2015 13:44:04 +0300 Subject: [PATCH 137/143] small bug fix --- .../eqtlinteractionanalyser/TestEQTLDatasetForInteractions.java | 1 + 1 file changed, 1 insertion(+) diff --git a/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/TestEQTLDatasetForInteractions.java b/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/TestEQTLDatasetForInteractions.java index 84f45e256..1a1d532bc 100644 --- a/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/TestEQTLDatasetForInteractions.java +++ b/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/TestEQTLDatasetForInteractions.java @@ -247,6 +247,7 @@ public void interpretInteractionZScoreMatrix(int maxNumRegressedCovariates, int } } } + out.close(); } public void findChi2SumDifferences(int maxNumRegressedCovariates, int numPrimaryCovsToCorrect, File ensgAnnotationFile) throws IOException { From 5c8ae6d49a2c2cd079a8600be8bb0df632479d6c Mon Sep 17 00:00:00 2001 From: Patrick Deelen Date: Thu, 30 Jul 2015 17:34:06 +0200 Subject: [PATCH 138/143] Improvements to trityper --- .../trityper/TriTyperGenotypeData.java | 2 +- .../trityper/TriTyperGenotypeWriter.java | 31 +++++++++++++------ 2 files changed, 22 insertions(+), 11 deletions(-) diff --git a/Genotype-IO/src/main/java/org/molgenis/genotype/trityper/TriTyperGenotypeData.java b/Genotype-IO/src/main/java/org/molgenis/genotype/trityper/TriTyperGenotypeData.java index 6653e6e10..00178a2e9 100644 --- a/Genotype-IO/src/main/java/org/molgenis/genotype/trityper/TriTyperGenotypeData.java +++ b/Genotype-IO/src/main/java/org/molgenis/genotype/trityper/TriTyperGenotypeData.java @@ -120,7 +120,7 @@ public TriTyperGenotypeData(File location, int cacheSize, VariantFilter variantF } public TriTyperGenotypeData(File location, int cacheSize, VariantFilter variantFilter, SampleFilter sampleFilter) throws IOException { - this(new File(location, "GenotypeMatrix.dat"), new File(location, "ImputedDosageMatrix.dat").exists() ? new File(location, "ImputedDosageMatrix.dat") : null, new File(location, "SNPs.txt.gz").exists() ? new File(location, "SNPs.txt.gz") : new File(location, "SNPs.txt"), new File(location, "SNPMappings.txt.gz").exists() ? new File(location, "SNPMappings.txt.gz") : new File(location, "SNPMappings.txt"), new File(location, "Individuals.txt.gz").exists() ? new File(location, "Individuals.txt.gz") : new File(location, "Individuals.txt"), new File(location, "PhenotypeInformation.txt.gz").exists() ? new File(location, "PhenotypeInformation.txt.gz") : new File(location, "PhenotypeInformation.txt"), cacheSize, variantFilter, sampleFilter, new File(location, "allelRecodingInformation.txt").exists() ? new File(location, "allelRecodingInformation.txt") : null); + this(new File(location, "GenotypeMatrix.dat"), new File(location, "ImputedDosageMatrix.dat").exists() ? new File(location, "ImputedDosageMatrix.dat") : null, new File(location, "SNPs.txt.gz").exists() ? new File(location, "SNPs.txt.gz") : new File(location, "SNPs.txt"), new File(location, "SNPMappings.txt.gz").exists() ? new File(location, "SNPMappings.txt.gz") : new File(location, "SNPMappings.txt"), new File(location, "Individuals.txt.gz").exists() ? new File(location, "Individuals.txt.gz") : new File(location, "Individuals.txt"), new File(location, "PhenotypeInformation.txt.gz").exists() ? new File(location, "PhenotypeInformation.txt.gz") : new File(location, "PhenotypeInformation.txt"), cacheSize, variantFilter, sampleFilter, new File(location, "AlleleRecodingInformation.txt").exists() ? new File(location, "AlleleRecodingInformation.txt") : null); } public TriTyperGenotypeData(File genotypeDataFile, File imputedDosageDataFile, File snpFile, File snpMapFile, File individualFile, File phenotypeAnnotationFile, int cacheSize, VariantFilter variantFilter, SampleFilter sampleFilter, File allelRecoding) throws IOException { diff --git a/Genotype-IO/src/main/java/org/molgenis/genotype/trityper/TriTyperGenotypeWriter.java b/Genotype-IO/src/main/java/org/molgenis/genotype/trityper/TriTyperGenotypeWriter.java index 46323161f..f52fabcc6 100644 --- a/Genotype-IO/src/main/java/org/molgenis/genotype/trityper/TriTyperGenotypeWriter.java +++ b/Genotype-IO/src/main/java/org/molgenis/genotype/trityper/TriTyperGenotypeWriter.java @@ -49,7 +49,7 @@ public void write(File folder) throws IOException { File snpMapFile = new File(folder, "SNPMappings.txt"); File individualFile = new File(folder, "Individuals.txt"); File phenotypeAnnotationFile = new File(folder, "PhenotypeInformation.txt"); - File allelRecodingFile = new File(folder, "allelRecodingInformation.txt"); + File allelRecodingFile = new File(folder, "AlleleRecodingInformation.txt"); writeSnps(snpFile, snpMapFile); writeSamples(individualFile, phenotypeAnnotationFile); @@ -71,13 +71,7 @@ private void writeSnps(File snpFile, File snpMapFile) throws IOException { // continue; // } - final GeneticVariantId snpId = variant.getVariantId(); - final String snpName; - if(snpId.containsId()){ - snpName = snpId.getPrimairyId(); - } else { - snpName = variant.getSequenceName() + ':' + String.valueOf(variant.getStartPos()); - } + final String snpName = createTriTyperVariantId(variant); snpFileWriter.append(snpName); snpFileWriter.append('\n'); @@ -156,7 +150,7 @@ private void writeGenotypes(File genotypeDataFile, File imputedDosageDataFile, F a = sampleAlleles.get(0).isSnpAllele() && sampleAlleles.get(0) != Allele.ZERO ? (byte) sampleAlleles.get(0).getAlleleAsSnp() : 0; b = sampleAlleles.get(1).isSnpAllele() && sampleAlleles.get(1) != Allele.ZERO ? (byte) sampleAlleles.get(1).getAlleleAsSnp() : 0; } else { - snpRecodingInfo.add(variant.getPrimaryVariantId()+"\t"+variant.getSequenceName()+"\t"+variant.getStartPos()+"\t"+variant.getVariantAlleles().get(0)+"\t"+variant.getVariantAlleles().get(1)); + snpRecodingInfo.add(createTriTyperVariantId(variant)+"\t"+variant.getSequenceName()+"\t"+variant.getStartPos()+"\t"+variant.getVariantAlleles().get(0)+"\t"+variant.getVariantAlleles().get(1)); if(sampleAlleles.get(0).equals(variant.getVariantAlleles().get(0))){ a = (byte) 'A'; @@ -205,7 +199,7 @@ private void writeGenotypes(File genotypeDataFile, File imputedDosageDataFile, F if(!snpRecodingInfo.isEmpty()){ BufferedWriter allelRecodingFileWriter = new BufferedWriter(new FileWriter(allelRecodingFile)); - allelRecodingFileWriter.write("Variant_ID\tchr\tpos\tAllel1\tAllel2\n"); + allelRecodingFileWriter.write("Variant_ID\tChr\tPos\tAllele1\tAllele2\n"); for(String s : snpRecodingInfo){ allelRecodingFileWriter.write(s+"\n"); } @@ -213,4 +207,21 @@ private void writeGenotypes(File genotypeDataFile, File imputedDosageDataFile, F allelRecodingFileWriter.close(); } } + + private String createTriTyperVariantId(GeneticVariant variant) { + final GeneticVariantId snpId = variant.getVariantId(); + String snpName; + if(snpId.containsId()){ + snpName = snpId.getPrimairyId(); + } else { + snpName = variant.getSequenceName() + ':' + String.valueOf(variant.getStartPos()); + if(!variant.isSnp()){ + for(Allele allele : variant.getVariantAlleles()){ + snpName = snpName + "_" + allele.getAlleleAsString(); + } + + } + } + return snpName; + } } From 7072cb98e50ad80e603d00575083fe534bef1834 Mon Sep 17 00:00:00 2001 From: Patrick Deelen Date: Wed, 2 Sep 2015 22:27:34 -0400 Subject: [PATCH 139/143] Many changes to interaction software --- .../EQTLInteractionAnalyser.java | 15 +- .../TestEQTLDatasetForInteractions.java | 161 +++++++++++++----- 2 files changed, 134 insertions(+), 42 deletions(-) diff --git a/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/EQTLInteractionAnalyser.java b/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/EQTLInteractionAnalyser.java index 0eaf9aa63..aee80493e 100644 --- a/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/EQTLInteractionAnalyser.java +++ b/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/EQTLInteractionAnalyser.java @@ -83,6 +83,10 @@ public class EQTLInteractionAnalyser { OptionBuilder.withDescription("Preprocess the data"); OptionBuilder.withLongOpt("preprocess"); OPTIONS.addOption(OptionBuilder.create("p")); + + OptionBuilder.withDescription("Skip all normalization step. n must be 1"); + OptionBuilder.withLongOpt("noNormalization"); + OPTIONS.addOption(OptionBuilder.create("nn")); OptionBuilder.withArgName("strings"); OptionBuilder.hasArgs(); @@ -165,6 +169,7 @@ public static void main(String[] args) throws IOException, Exception { final String[] covariatesToTest; final File ensgAnnotationFile; final File snpsToTestFile; + final boolean skipNormalization; try { final CommandLine commandLine = new PosixParser().parse(OPTIONS, args, false); @@ -197,7 +202,7 @@ public static void main(String[] args) throws IOException, Exception { if (commandLine.hasOption('a')) { annotationFile = commandLine.getOptionValue("a"); } - + if (commandLine.hasOption("cf")) { TextFile covFile = new TextFile(commandLine.getOptionValue("cf"), false); covariates = covFile.readAsArray(); @@ -239,6 +244,12 @@ else if (commandLine.hasOption("c")){ snpsToTestFile = null; } + skipNormalization = commandLine.hasOption("nn"); + if(skipNormalization && maxNumCovariatesToRegress != 1){ + System.err.println("n must be one if normalization is turned off"); + System.exit(-1); + } + if (commandLine.hasOption("is")){ File samplesToIncludeFile = new File(commandLine.getOptionValue("is")); System.out.println("Samples to include file: " + samplesToIncludeFile.getAbsolutePath()); @@ -287,7 +298,7 @@ else if (chi2sumDiff){ interactor.findChi2SumDifferences(maxNumCovariatesToRegress, startRoundCompareChi2, ensgAnnotationFile); } else { - new TestEQTLDatasetForInteractions(inputDir, outputDir, eqtlFile, maxNumCovariatesToRegress, annotationFile, covariates, covariates2, snpsToSwapFile, permute, covariatesToTest, hashSamples, numThreads, cohorts, snpsToTestFile); + new TestEQTLDatasetForInteractions(inputDir, outputDir, eqtlFile, maxNumCovariatesToRegress, annotationFile, covariates, covariates2, snpsToSwapFile, permute, covariatesToTest, hashSamples, numThreads, cohorts, snpsToTestFile, skipNormalization); } } diff --git a/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/TestEQTLDatasetForInteractions.java b/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/TestEQTLDatasetForInteractions.java index e4af277a4..29e438da2 100644 --- a/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/TestEQTLDatasetForInteractions.java +++ b/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/TestEQTLDatasetForInteractions.java @@ -35,6 +35,7 @@ import java.util.logging.Logger; import org.apache.commons.math3.exception.MathIllegalArgumentException; +import org.apache.commons.math3.stat.descriptive.moment.Variance; import org.apache.commons.math3.stat.ranking.NaturalRanking; import org.apache.commons.math3.stat.regression.OLSMultipleLinearRegression; import org.apache.mahout.math.Arrays; @@ -64,7 +65,7 @@ public TestEQTLDatasetForInteractions(String inputDir, String outputDir) throws //preprocessData(); } - public TestEQTLDatasetForInteractions(String inputDir, String outputDir, String eQTLfileName, int maxNumTopCovs, String annotationFile, String[] covariatesToCorrect, String[] covariatesToCorrect2, File snpsToSwapFile, boolean permute, String[] covariatesToTest, HashMap hashSamples, int numThreads, String[] cohorts, File snpsToTestFile) throws IOException, Exception { + public TestEQTLDatasetForInteractions(String inputDir, String outputDir, String eQTLfileName, int maxNumTopCovs, String annotationFile, String[] covariatesToCorrect, String[] covariatesToCorrect2, File snpsToSwapFile, boolean permute, String[] covariatesToTest, HashMap hashSamples, int numThreads, String[] cohorts, File snpsToTestFile, boolean skipNormalization) throws IOException, Exception { System.out.println("Input dir: " + inputDir); System.out.println("Output dir: " + outputDir); @@ -83,39 +84,41 @@ public TestEQTLDatasetForInteractions(String inputDir, String outputDir, String initGenotypes(permute, hashSamples, cohorts); HashMultimap qtlProbeSnpMultiMap = HashMultimap.create(); - final QTLTextFile eQtlFileReader = new QTLTextFile(eQTLfileName, false); - for (Iterator it = eQtlFileReader.getEQtlIterator(); it.hasNext();) { - EQTL qtl = it.next(); - qtlProbeSnpMultiMap.put(qtl.getProbe(), qtl.getRsName()); + if (eQTLfileName != null) { + final QTLTextFile eQtlFileReader = new QTLTextFile(eQTLfileName, false); + for (Iterator it = eQtlFileReader.getEQtlIterator(); it.hasNext();) { + EQTL qtl = it.next(); + qtlProbeSnpMultiMap.put(qtl.getProbe(), qtl.getRsName()); + } } if (annotationFile != null) { createGeneDistanceMap(annotationFile); } - + final TIntHashSet snpsToTest; - if(snpsToTestFile != null){ - + if (snpsToTestFile != null) { + snpsToTest = new TIntHashSet(); BufferedReader reader = new BufferedReader(new InputStreamReader(new FileInputStream(snpsToTestFile), "UTF-8")); - - String line; + + String line; while ((line = reader.readLine()) != null) { Integer genotypeI = datasetGenotypes.hashProbes.get(line); - - if(genotypeI == null){ + + if (genotypeI == null) { System.out.println("SNP " + line + " not found in genotype data"); continue; } - - if(!snpsToTest.add(genotypeI)){ + + if (!snpsToTest.add(genotypeI)) { System.out.println("Warning including SNP twice: " + line); } - + } System.out.println("Confining testing to: " + snpsToTest.size() + " SNPs from: " + snpsToTestFile.getAbsolutePath()); - + } else { snpsToTest = null; } @@ -135,7 +138,7 @@ public TestEQTLDatasetForInteractions(String inputDir, String outputDir, String String[] covsToCorrect = primaryCovsToCorrect; int cnt = 0; while (cnt < maxNumTopCovs) { - String topCov = performInteractionAnalysis(covsToCorrect, covariatesToCorrect2, outputTopCovs, snpsToSwapFile, qtlProbeSnpMultiMap, covariatesToTest, hashSamples, numThreads, snpsToTest); + String topCov = performInteractionAnalysis(covsToCorrect, covariatesToCorrect2, outputTopCovs, snpsToSwapFile, qtlProbeSnpMultiMap, covariatesToTest, hashSamples, numThreads, snpsToTest, skipNormalization); String[] covsToCorrectNew = new String[covsToCorrect.length + 1]; for (int c = 0; c < covsToCorrect.length; c++) { covsToCorrectNew[c] = covsToCorrect[c]; @@ -147,14 +150,14 @@ public TestEQTLDatasetForInteractions(String inputDir, String outputDir, String outputTopCovs.close(); } - private void initGenotypes(boolean permute, HashMap hashSamples, String[] cohorts){ - + private void initGenotypes(boolean permute, HashMap hashSamples, String[] cohorts) { + datasetGenotypes = new ExpressionDataset(inputDir + "/bigTableLude.txt.Genotypes.binary", '\t', null, hashSamples); - if (permute){ + if (permute) { System.out.println("WARNING: PERMUTING GENOTYPE DATA!!!!"); if (cohorts == null) { - cohorts = new String[] {"LLDeep", "LLS", "RS", "CODAM"}; + cohorts = new String[]{"LLDeep", "LLS", "RS", "CODAM"}; } int[] permSampleIDs = new int[datasetGenotypes.nrSamples]; for (int p = 0; p < cohorts.length; p++) { @@ -426,7 +429,7 @@ public void preprocessData() { } - public final String performInteractionAnalysis(String[] covsToCorrect, String[] covsToCorrect2, TextFile outputTopCovs, File snpsToSwapFile, HashMultimap qtlProbeSnpMultiMap, String[] covariatesToTest, HashMap hashSamples, int numThreads, final TIntHashSet snpsToTest) throws IOException, Exception { + public final String performInteractionAnalysis(String[] covsToCorrect, String[] covsToCorrect2, TextFile outputTopCovs, File snpsToSwapFile, HashMultimap qtlProbeSnpMultiMap, String[] covariatesToTest, HashMap hashSamples, int numThreads, final TIntHashSet snpsToTest, boolean skipNormalization) throws IOException, Exception { //hashSamples = excludeOutliers(hashSamples); @@ -455,20 +458,31 @@ public final String performInteractionAnalysis(String[] covsToCorrect, String[] org.apache.commons.math3.stat.regression.OLSMultipleLinearRegression regression = new org.apache.commons.math3.stat.regression.OLSMultipleLinearRegression(); int nrSamples = datasetGenotypes.nrSamples; - - correctExpressionData(covsToCorrect2, datasetGenotypes, datasetCovariates, datasetExpression); - correctDosageDirectionForQtl(snpsToSwapFile, datasetGenotypes, datasetExpression); + + if(skipNormalization){ + correctExpressionData(covsToCorrect2, datasetGenotypes, datasetCovariates, datasetExpression); + } + + ExpressionDataset datasetCovariatesPCAForceNormal = new ExpressionDataset(inputDir + "/covariateTableLude.txt.Covariates.binary", '\t', covariatesToLoad, hashSamples); - correctCovariateDataPCA(covsToCorrect2, covsToCorrect, datasetGenotypes, datasetCovariatesPCAForceNormal); + + if(skipNormalization){ + correctCovariateDataPCA(covsToCorrect2, covsToCorrect, datasetGenotypes, datasetCovariatesPCAForceNormal); + } if (1 == 1) { - correctCovariateData(covsToCorrect2, covsToCorrect, datasetGenotypes, datasetCovariates); + if (!skipNormalization && covsToCorrect2.length != 0 && covsToCorrect.length != 0) { + correctCovariateData(covsToCorrect2, covsToCorrect, datasetGenotypes, datasetCovariates); + } + + if (!skipNormalization && !qtlProbeSnpMultiMap.isEmpty()) { + correctCovariatesForQtls(datasetCovariates, datasetGenotypes, qtlProbeSnpMultiMap); + } - correctCovariatesForQtls(datasetCovariates, datasetGenotypes, qtlProbeSnpMultiMap); if (1 == 2) { saveCorrectedCovariates(datasetCovariates); @@ -477,7 +491,9 @@ public final String performInteractionAnalysis(String[] covsToCorrect, String[] if (1 == 2) { icaCovariates(datasetCovariates); } - forceNormalCovariates(datasetCovariates, datasetGenotypes); + if(!skipNormalization){ + forceNormalCovariates(datasetCovariates, datasetGenotypes); + } } @@ -488,9 +504,19 @@ public final String performInteractionAnalysis(String[] covsToCorrect, String[] } } - correctExpressionDataForInteractions(covsToCorrect, datasetCovariates, datasetGenotypes, nrSamples, datasetExpression, regression, qtlProbeSnpMultiMap); + if(!skipNormalization && covsToCorrect.length != 0){ + correctExpressionDataForInteractions(covsToCorrect, datasetCovariates, datasetGenotypes, nrSamples, datasetExpression, regression, qtlProbeSnpMultiMap); + } + + if(!skipNormalization){ + forceNormalExpressionData(datasetExpression); + } - forceNormalExpressionData(datasetExpression); + datasetExpression.save(outputDir + "/expressionDataRound_" + covsToCorrect.length + ".txt"); + datasetExpression.save(outputDir + "/expressionDataRound_" + covsToCorrect.length + ".binary"); + datasetCovariates.save(outputDir + "/covariateData_" + covsToCorrect.length + ".binary"); + + if (1 == 1) { @@ -522,6 +548,7 @@ public final String performInteractionAnalysis(String[] covsToCorrect, String[] } String maxChi2Cov = ""; + int maxChi2CovI = 0; double maxChi2 = 0; try { // If gene annotation provided, for chi2sum calculation use only genes that are 1mb apart @@ -545,6 +572,7 @@ public final String performInteractionAnalysis(String[] covsToCorrect, String[] if (chi2Sum > maxChi2 && !datasetCovariates.probeNames[cov].startsWith("Comp") && !datasetCovariates.probeNames[cov].equals("LLS") && !datasetCovariates.probeNames[cov].equals("LLdeep") && !datasetCovariates.probeNames[cov].equals("RS") && !datasetCovariates.probeNames[cov].equals("CODAM")) { maxChi2 = chi2Sum; + maxChi2CovI = cov; maxChi2Cov = datasetCovariates.probeNames[cov]; } //System.out.println(covsToCorrect.length + "\t" + cov + "\t" + datasetCovariates.probeNames[cov] + "\t" + chi2Sum); @@ -595,6 +623,16 @@ public final String performInteractionAnalysis(String[] covsToCorrect, String[] skippedWriter.close(); datasetZScores.save(outputDir + "/InteractionZScoresMatrix-" + covsToCorrect.length + "Covariates.txt"); + BufferedWriter writer = new BufferedWriter(new FileWriter(outputDir + "/" + "topCov" + maxChi2Cov + "_expression.txt")); + double[] topCovExpression = datasetCovariates.rawData[maxChi2CovI]; + for (int i = 0; i < topCovExpression.length; ++i) { + writer.append(datasetCovariates.sampleNames[i]); + writer.append('\t'); + writer.append(String.valueOf(topCovExpression[i])); + writer.append('\n'); + } + writer.close(); + return maxChi2Cov; } @@ -901,14 +939,25 @@ private void correctCovariatesForQtls(ExpressionDataset datasetCovariates, Expre if (!probeQtls.isEmpty()) { - double[][] x = new double[datasetCovariates.nrSamples][probeQtls.size()]; + int snpsInData = 0; + for (String snp : probeQtls) { + + Integer s = snpMap.get(snp); + if (s != null) { + ++snpsInData; + } + + } + + double[][] x = new double[datasetCovariates.nrSamples][snpsInData]; int k = 0; for (String snp : probeQtls) { Integer s = snpMap.get(snp); if (s == null) { - throw new Exception("Snp " + snp + " not found"); + continue; + //throw new Exception("Snp " + snp + " not found"); } double[] snpData = datasetGenotypes.rawData[s]; for (int i = 0; i < datasetGenotypes.nrSamples; ++i) { @@ -1330,7 +1379,7 @@ private void forceNormalCovariates(ExpressionDataset datasetCovariates, Expressi private void correctExpressionDataForInteractions(String[] covsToCorrect, ExpressionDataset datasetCovariates, ExpressionDataset datasetGenotypes, int nrSamples, ExpressionDataset datasetExpression, OLSMultipleLinearRegression regression, HashMultimap qtlProbeSnpMultiMap) throws MathIllegalArgumentException, Exception { - System.out.println("Correcting expression data for predefined gene environment interaction effects (GC content, Gender, 5'Median Bias, 3'Median Bias):"); + System.out.println("Correcting expression data for predefined gene environment interaction effects: " + Arrays.toString(covsToCorrect)); int[] covsToCorrectIndex = new int[covsToCorrect.length]; for (int c = 0; c < covsToCorrect.length; c++) { covsToCorrectIndex[c] = ((Integer) datasetCovariates.hashProbes.get(covsToCorrect[c])).intValue(); @@ -1347,6 +1396,8 @@ private void correctExpressionDataForInteractions(String[] covsToCorrect, Expres } } + Variance v = new Variance(); + for (int p = 0; p < datasetExpression.nrProbes; p++) { String probe = datasetExpression.probeNames[p].substring(0, datasetExpression.probeNames[p].indexOf('_')); @@ -1356,14 +1407,40 @@ private void correctExpressionDataForInteractions(String[] covsToCorrect, Expres throw new Exception("No eQTLs found for: " + probe); } + int snpsInData = 0; + HashSet excludedSnps = new HashSet(); + for (String snp : probeQtls) { + + Integer s = snpMap.get(snp); + if (s != null) { + + + + if (v.evaluate(datasetGenotypes.rawData[s]) > 0) { + ++snpsInData; + } else { + excludedSnps.add(snp); + } + + } + + + + } + //boolean foundPisS = false; - double[][] valsX = new double[nrSamples][probeQtls.size() + covsToCorrect.length * 2]; //store genotypes, covariates, interactions + double[][] valsX = new double[nrSamples][snpsInData + covsToCorrect.length * 2]; //store genotypes, covariates, interactions int k = 0; for (String snp : probeQtls) { + if (excludedSnps.contains(snp)) { + continue; + } + Integer s = snpMap.get(snp); if (s == null) { - throw new Exception("Snp " + snp + " not found"); + //throw new Exception("Snp " + snp + " not found"); + continue; } // if(s.intValue() == p){ // foundPisS = true; @@ -1388,15 +1465,19 @@ private void correctExpressionDataForInteractions(String[] covsToCorrect, Expres for (int c = 0; c < covsToCorrect.length; c++) { double[] covData = datasetCovariates.rawData[covsToCorrectIndex[c]]; double[] snpData = datasetGenotypes.rawData[p]; - + for (int s = 0; s < nrSamples; s++) { - valsX[s][c * 2 + probeQtls.size()] = covData[s]; //covariate - valsX[s][c * 2 + probeQtls.size() + 1] = snpData[s] * covData[s]; //interction + valsX[s][c * 2 + snpsInData] = covData[s]; //covariate + valsX[s][c * 2 + snpsInData + 1] = snpData[s] * covData[s]; //interction } } double[] valsY = datasetExpression.rawData[p]; regression.newSampleData(valsY, valsX); - datasetExpression.rawData[p] = regression.estimateResiduals(); + try { + datasetExpression.rawData[p] = regression.estimateResiduals(); + } catch (Exception up) { + System.err.println("Error correcting for interactions: " + probe + " - " + datasetGenotypes.probeNames[p]); + } } } From 0d6800113f742f1f0241b8f35279f356c496a417 Mon Sep 17 00:00:00 2001 From: Niek de Klein Date: Fri, 18 Sep 2015 14:00:50 +0200 Subject: [PATCH 140/143] add example genotypeHarmonizer tritype conversion --- cellTypeSpecificAlleleSpecificExpression/README.md | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/cellTypeSpecificAlleleSpecificExpression/README.md b/cellTypeSpecificAlleleSpecificExpression/README.md index 3bb337da1..fcccf5c8c 100644 --- a/cellTypeSpecificAlleleSpecificExpression/README.md +++ b/cellTypeSpecificAlleleSpecificExpression/README.md @@ -103,10 +103,16 @@ The sub-module ASreads accepts two types of file formats: 2. VCF The TriTyper format is considerably faster to read than the VCF at this moment, therefore this guide will continue with the TriTyper format. -Conversion of the genotype format into TriTyper can be done using the [Genotype Harmonizer](https://github.com/molgenis/systemsgenetics/wiki/Genotype-Harmonizer) +Conversion of the genotype format into TriTyper can be done using the [Genotype Harmonizer](https://github.com/molgenis/systemsgenetics/wiki/Genotype-Harmonizer). This guide refers to their wiki for the conversion of your genotype format into TriTyper format. For example purposes, the directory containing TriTyper information will be set to the following: `Suzie-Peter_Genotype/` +``` +bgzip -c Suzie-RNAseq1.vcf > Suzie-RNAseq1.vcf.gz +tabix -p vcf Suzie-RNAseq1.vcf.gz +sh GenotypeHarmoinzer.sh --input Suzie-RNAseq1.vcf.gz --outputType TRITYPER --output Suzie-Peter_Genotype/ +``` + **Creating a coupling file** From 2478a1775216abd9e3aa82e38a2bd40b9c0c2884 Mon Sep 17 00:00:00 2001 From: Niek de Klein Date: Fri, 18 Sep 2015 14:13:45 +0200 Subject: [PATCH 141/143] samtools index output to .bam.bai instead of .bam.idx --- cellTypeSpecificAlleleSpecificExpression/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cellTypeSpecificAlleleSpecificExpression/README.md b/cellTypeSpecificAlleleSpecificExpression/README.md index fcccf5c8c..202f4d646 100644 --- a/cellTypeSpecificAlleleSpecificExpression/README.md +++ b/cellTypeSpecificAlleleSpecificExpression/README.md @@ -89,7 +89,7 @@ you can then index the bam using the following command in your terminal: ``` samtools index Suzie-RNAseq1.bam ``` -A file named: `Suzie-RNAseq1.bam.idx` will be add in the working directory, +A file named: `Suzie-RNAseq1.bam.bai` will be add in the working directory, this contains the index for the specific bam. Now the bam file can be used for reading by the ASreads sub-module. Please index all the bam files that you want to use for analysis. From f5ea2958273f6be2d91ad9109ccab26234238dc5 Mon Sep 17 00:00:00 2001 From: Patrick Deelen Date: Tue, 22 Sep 2015 10:19:13 -0400 Subject: [PATCH 142/143] Interactions --- .../EQTLInteractionAnalyser.java | 29 +++++++++++- .../TestEQTLDatasetForInteractions.java | 45 ++++++++++++------- 2 files changed, 56 insertions(+), 18 deletions(-) diff --git a/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/EQTLInteractionAnalyser.java b/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/EQTLInteractionAnalyser.java index aee80493e..8c3b4fc6e 100644 --- a/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/EQTLInteractionAnalyser.java +++ b/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/EQTLInteractionAnalyser.java @@ -49,6 +49,12 @@ public class EQTLInteractionAnalyser { OptionBuilder.withDescription("Path to the eQTL file to test for interactions"); OptionBuilder.withLongOpt("eqtls"); OPTIONS.addOption(OptionBuilder.create("e")); + + OptionBuilder.withArgName("path"); + OptionBuilder.hasArg(); + OptionBuilder.withDescription("Path to the eQTL file to correct covariates"); + OptionBuilder.withLongOpt("eqtlsCovariates"); + OPTIONS.addOption(OptionBuilder.create("ec")); OptionBuilder.withArgName("path"); OptionBuilder.hasArg(); @@ -84,6 +90,10 @@ public class EQTLInteractionAnalyser { OptionBuilder.withLongOpt("preprocess"); OPTIONS.addOption(OptionBuilder.create("p")); + OptionBuilder.withDescription("Convert matrix"); + OptionBuilder.withLongOpt("convertMatrix"); + OPTIONS.addOption(OptionBuilder.create("cm")); + OptionBuilder.withDescription("Skip all normalization step. n must be 1"); OptionBuilder.withLongOpt("noNormalization"); OPTIONS.addOption(OptionBuilder.create("nn")); @@ -170,6 +180,8 @@ public static void main(String[] args) throws IOException, Exception { final File ensgAnnotationFile; final File snpsToTestFile; final boolean skipNormalization; + final boolean convertMatrix; + final String eqtlFileCovariates; try { final CommandLine commandLine = new PosixParser().parse(OPTIONS, args, false); @@ -180,6 +192,10 @@ public static void main(String[] args) throws IOException, Exception { if (commandLine.hasOption('e')) { eqtlFile = commandLine.getOptionValue("e"); } + + + eqtlFileCovariates = commandLine.getOptionValue("ec", null); + if (commandLine.hasOption('n')) { maxNumCovariatesToRegress = Integer.parseInt(commandLine.getOptionValue("n")); } @@ -190,6 +206,7 @@ public static void main(String[] args) throws IOException, Exception { chi2sumDiff = commandLine.hasOption("dif"); permute = commandLine.hasOption("perm"); preproces = commandLine.hasOption("p"); + convertMatrix = commandLine.hasOption("cm"); if (commandLine.hasOption('s')) { startRoundCompareChi2 = Integer.parseInt(commandLine.getOptionValue("s")); @@ -296,9 +313,17 @@ else if (commandLine.hasOption("c")){ else if (chi2sumDiff){ TestEQTLDatasetForInteractions interactor = new TestEQTLDatasetForInteractions(inputDir, outputDir); interactor.findChi2SumDifferences(maxNumCovariatesToRegress, startRoundCompareChi2, ensgAnnotationFile); - } + } else if (convertMatrix){ + System.out.println("input file: " + inputDir); + System.out.println("output file: " + outputDir); + if(inputDir.equals(outputDir)){ + System.err.println("input == output"); + System.exit(1); + } + new ExpressionDataset(inputDir).save(outputDir); + } else { - new TestEQTLDatasetForInteractions(inputDir, outputDir, eqtlFile, maxNumCovariatesToRegress, annotationFile, covariates, covariates2, snpsToSwapFile, permute, covariatesToTest, hashSamples, numThreads, cohorts, snpsToTestFile, skipNormalization); + new TestEQTLDatasetForInteractions(inputDir, outputDir, eqtlFile, maxNumCovariatesToRegress, annotationFile, covariates, covariates2, snpsToSwapFile, permute, covariatesToTest, hashSamples, numThreads, cohorts, snpsToTestFile, skipNormalization, eqtlFileCovariates); } } diff --git a/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/TestEQTLDatasetForInteractions.java b/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/TestEQTLDatasetForInteractions.java index 29e438da2..139ab55a4 100644 --- a/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/TestEQTLDatasetForInteractions.java +++ b/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/TestEQTLDatasetForInteractions.java @@ -65,14 +65,16 @@ public TestEQTLDatasetForInteractions(String inputDir, String outputDir) throws //preprocessData(); } - public TestEQTLDatasetForInteractions(String inputDir, String outputDir, String eQTLfileName, int maxNumTopCovs, String annotationFile, String[] covariatesToCorrect, String[] covariatesToCorrect2, File snpsToSwapFile, boolean permute, String[] covariatesToTest, HashMap hashSamples, int numThreads, String[] cohorts, File snpsToTestFile, boolean skipNormalization) throws IOException, Exception { + public TestEQTLDatasetForInteractions(String inputDir, String outputDir, String eQTLfileName, int maxNumTopCovs, String annotationFile, String[] covariatesToCorrect, String[] covariatesToCorrect2, File snpsToSwapFile, boolean permute, String[] covariatesToTest, HashMap hashSamples, int numThreads, String[] cohorts, File snpsToTestFile, boolean skipNormalization, String eQTLfileNameCovariates) throws IOException, Exception { System.out.println("Input dir: " + inputDir); System.out.println("Output dir: " + outputDir); System.out.println("eQTL file: " + eQTLfileName); + System.out.println("eQTL file covariates: " + eQTLfileNameCovariates); System.out.println("Maximum number of covariates to regress out: " + maxNumTopCovs); System.out.println("Covariates to correct for with interaction: " + Arrays.toString(covariatesToCorrect)); System.out.println("Covariates to correct for without interaction: " + Arrays.toString(covariatesToCorrect2)); + System.out.println("Covariates to test: " + Arrays.toString(covariatesToTest)); this.inputDir = inputDir; this.outputDir = outputDir; @@ -83,7 +85,7 @@ public TestEQTLDatasetForInteractions(String inputDir, String outputDir, String initGenotypes(permute, hashSamples, cohorts); - HashMultimap qtlProbeSnpMultiMap = HashMultimap.create(); + final HashMultimap qtlProbeSnpMultiMap = HashMultimap.create(); if (eQTLfileName != null) { final QTLTextFile eQtlFileReader = new QTLTextFile(eQTLfileName, false); for (Iterator it = eQtlFileReader.getEQtlIterator(); it.hasNext();) { @@ -91,6 +93,18 @@ public TestEQTLDatasetForInteractions(String inputDir, String outputDir, String qtlProbeSnpMultiMap.put(qtl.getProbe(), qtl.getRsName()); } } + + final HashMultimap qtlProbeSnpMultiMapCovariates; + if(eQTLfileNameCovariates != null){ + qtlProbeSnpMultiMapCovariates = HashMultimap.create(); + final QTLTextFile eQtlFileReader = new QTLTextFile(eQTLfileNameCovariates, false); + for (Iterator it = eQtlFileReader.getEQtlIterator(); it.hasNext();) { + EQTL qtl = it.next(); + qtlProbeSnpMultiMapCovariates.put(qtl.getProbe(), qtl.getRsName()); + } + } else { + qtlProbeSnpMultiMapCovariates = qtlProbeSnpMultiMap; + } if (annotationFile != null) { createGeneDistanceMap(annotationFile); @@ -138,7 +152,7 @@ public TestEQTLDatasetForInteractions(String inputDir, String outputDir, String String[] covsToCorrect = primaryCovsToCorrect; int cnt = 0; while (cnt < maxNumTopCovs) { - String topCov = performInteractionAnalysis(covsToCorrect, covariatesToCorrect2, outputTopCovs, snpsToSwapFile, qtlProbeSnpMultiMap, covariatesToTest, hashSamples, numThreads, snpsToTest, skipNormalization); + String topCov = performInteractionAnalysis(covsToCorrect, covariatesToCorrect2, outputTopCovs, snpsToSwapFile, qtlProbeSnpMultiMap, covariatesToTest, hashSamples, numThreads, snpsToTest, skipNormalization, qtlProbeSnpMultiMapCovariates); String[] covsToCorrectNew = new String[covsToCorrect.length + 1]; for (int c = 0; c < covsToCorrect.length; c++) { covsToCorrectNew[c] = covsToCorrect[c]; @@ -163,16 +177,16 @@ private void initGenotypes(boolean permute, HashMap hashSamples, String[] cohort for (int p = 0; p < cohorts.length; p++) { Vector vecSamples = new Vector(); for (int s = 0; s < datasetGenotypes.nrSamples; s++) { - if (datasetGenotypes.sampleNames[s].startsWith(cohorts[p])) { + //if (datasetGenotypes.sampleNames[s].startsWith(cohorts[p])) { vecSamples.add(s); - } + //} } - int nrSamplesThisCohort = vecSamples.size(); + for (int s = 0; s < datasetGenotypes.nrSamples; s++) { - if (datasetGenotypes.sampleNames[s].startsWith(cohorts[p])) { + //if (datasetGenotypes.sampleNames[s].startsWith(cohorts[p])) { int randomSample = ((Integer) vecSamples.remove((int) ((double) vecSamples.size() * Math.random()))).intValue(); permSampleIDs[s] = randomSample; - } + //} } } @@ -334,7 +348,7 @@ public void findChi2SumDifferences(int maxNumRegressedCovariates, int numPrimary chi2Sum += covariateData[gene] * covariateData[gene]; } - if (chi2Sum > topCovChi2) { + if (chi2Sum > topCovChi2 && !dataset.probeNames[covariate].startsWith("Comp") && !dataset.probeNames[covariate].equals("LLS") && !dataset.probeNames[covariate].equals("LLdeep") && !dataset.probeNames[covariate].equals("RS") && !dataset.probeNames[covariate].equals("CODAM")) { topCovChi2 = chi2Sum; topCov = dataset.probeNames[covariate]; topCovI = covariate; @@ -429,8 +443,7 @@ public void preprocessData() { } - public final String performInteractionAnalysis(String[] covsToCorrect, String[] covsToCorrect2, TextFile outputTopCovs, File snpsToSwapFile, HashMultimap qtlProbeSnpMultiMap, String[] covariatesToTest, HashMap hashSamples, int numThreads, final TIntHashSet snpsToTest, boolean skipNormalization) throws IOException, Exception { - + public final String performInteractionAnalysis(String[] covsToCorrect, String[] covsToCorrect2, TextFile outputTopCovs, File snpsToSwapFile, HashMultimap qtlProbeSnpMultiMap, String[] covariatesToTest, HashMap hashSamples, int numThreads, final TIntHashSet snpsToTest, boolean skipNormalization, HashMultimap qtlProbeSnpMultiMapCovariates) throws IOException, Exception { //hashSamples = excludeOutliers(hashSamples); @@ -460,7 +473,7 @@ public final String performInteractionAnalysis(String[] covsToCorrect, String[] correctDosageDirectionForQtl(snpsToSwapFile, datasetGenotypes, datasetExpression); - if(skipNormalization){ + if(!skipNormalization){ correctExpressionData(covsToCorrect2, datasetGenotypes, datasetCovariates, datasetExpression); } @@ -468,7 +481,7 @@ public final String performInteractionAnalysis(String[] covsToCorrect, String[] ExpressionDataset datasetCovariatesPCAForceNormal = new ExpressionDataset(inputDir + "/covariateTableLude.txt.Covariates.binary", '\t', covariatesToLoad, hashSamples); - if(skipNormalization){ + if(!skipNormalization){ correctCovariateDataPCA(covsToCorrect2, covsToCorrect, datasetGenotypes, datasetCovariatesPCAForceNormal); } @@ -479,8 +492,8 @@ public final String performInteractionAnalysis(String[] covsToCorrect, String[] correctCovariateData(covsToCorrect2, covsToCorrect, datasetGenotypes, datasetCovariates); } - if (!skipNormalization && !qtlProbeSnpMultiMap.isEmpty()) { - correctCovariatesForQtls(datasetCovariates, datasetGenotypes, qtlProbeSnpMultiMap); + if (!skipNormalization && !qtlProbeSnpMultiMapCovariates.isEmpty()) { + correctCovariatesForQtls(datasetCovariates, datasetGenotypes, qtlProbeSnpMultiMapCovariates); } @@ -1400,7 +1413,7 @@ private void correctExpressionDataForInteractions(String[] covsToCorrect, Expres for (int p = 0; p < datasetExpression.nrProbes; p++) { - String probe = datasetExpression.probeNames[p].substring(0, datasetExpression.probeNames[p].indexOf('_')); + String probe = datasetExpression.probeNames[p].substring(0, datasetExpression.probeNames[p].lastIndexOf('_')); Set probeQtls = qtlProbeSnpMultiMap.get(probe); if (probeQtls.isEmpty()) { From 1dfabae8699096db433f91c610e0a6685f69401a Mon Sep 17 00:00:00 2001 From: Dasha Zhernakova Date: Fri, 25 Sep 2015 13:23:55 +0200 Subject: [PATCH 143/143] added an option to skip covariate normalization --- .../EQTLInteractionAnalyser.java | 13 ++++++++++++- .../TestEQTLDatasetForInteractions.java | 16 +++++++++------- 2 files changed, 21 insertions(+), 8 deletions(-) diff --git a/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/EQTLInteractionAnalyser.java b/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/EQTLInteractionAnalyser.java index 2c19883f4..aa7e62ad3 100644 --- a/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/EQTLInteractionAnalyser.java +++ b/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/EQTLInteractionAnalyser.java @@ -98,6 +98,10 @@ public class EQTLInteractionAnalyser { OptionBuilder.withLongOpt("noNormalization"); OPTIONS.addOption(OptionBuilder.create("nn")); + OptionBuilder.withDescription("Skip covariate normalization step. n must be 1"); + OptionBuilder.withLongOpt("noCovNormalization"); + OPTIONS.addOption(OptionBuilder.create("ncn")); + OptionBuilder.withArgName("strings"); OptionBuilder.hasArgs(); OptionBuilder.withDescription("covariates to correct for using an interaction term before running the interaction analysis"); @@ -186,6 +190,7 @@ public static void main(String[] args) throws IOException, Exception { final File ensgAnnotationFile; final File snpsToTestFile; final boolean skipNormalization; + final boolean skipCovariateNormalization; final boolean convertMatrix; final String eqtlFileCovariates; @@ -277,6 +282,12 @@ else if (commandLine.hasOption("c")){ System.err.println("n must be one if normalization is turned off"); System.exit(-1); } + + skipCovariateNormalization = commandLine.hasOption("ncn"); + if(skipCovariateNormalization && maxNumCovariatesToRegress != 1){ + System.err.println("n must be one if covariate normalization is turned off"); + System.exit(-1); + } if (commandLine.hasOption("is")){ File samplesToIncludeFile = new File(commandLine.getOptionValue("is")); @@ -334,7 +345,7 @@ else if (chi2sumDiff){ new ExpressionDataset(inputDir).save(outputDir); } else { - new TestEQTLDatasetForInteractions(inputDir, outputDir, eqtlFile, maxNumCovariatesToRegress, annotationFile, covariates, covariates2, snpsToSwapFile, permute, covariatesToTest, hashSamples, numThreads, cohorts, snpsToTestFile, skipNormalization, eqtlFileCovariates); + new TestEQTLDatasetForInteractions(inputDir, outputDir, eqtlFile, maxNumCovariatesToRegress, annotationFile, covariates, covariates2, snpsToSwapFile, permute, covariatesToTest, hashSamples, numThreads, cohorts, snpsToTestFile, skipNormalization, skipCovariateNormalization, eqtlFileCovariates); } } diff --git a/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/TestEQTLDatasetForInteractions.java b/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/TestEQTLDatasetForInteractions.java index 3f5444214..9611fa474 100644 --- a/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/TestEQTLDatasetForInteractions.java +++ b/eQTLInteractionAnalyser/src/main/java/nl/systemsgenetics/eqtlinteractionanalyser/eqtlinteractionanalyser/TestEQTLDatasetForInteractions.java @@ -65,7 +65,7 @@ public TestEQTLDatasetForInteractions(String inputDir, String outputDir) throws //preprocessData(); } - public TestEQTLDatasetForInteractions(String inputDir, String outputDir, String eQTLfileName, int maxNumTopCovs, String annotationFile, String[] covariatesToCorrect, String[] covariatesToCorrect2, File snpsToSwapFile, boolean permute, String[] covariatesToTest, HashMap hashSamples, int numThreads, String[] cohorts, File snpsToTestFile, boolean skipNormalization, String eQTLfileNameCovariates) throws IOException, Exception { + public TestEQTLDatasetForInteractions(String inputDir, String outputDir, String eQTLfileName, int maxNumTopCovs, String annotationFile, String[] covariatesToCorrect, String[] covariatesToCorrect2, File snpsToSwapFile, boolean permute, String[] covariatesToTest, HashMap hashSamples, int numThreads, String[] cohorts, File snpsToTestFile, boolean skipNormalization, boolean skipCovariateNormalization, String eQTLfileNameCovariates) throws IOException, Exception { System.out.println("Input dir: " + inputDir); System.out.println("Output dir: " + outputDir); @@ -74,7 +74,9 @@ public TestEQTLDatasetForInteractions(String inputDir, String outputDir, String System.out.println("Maximum number of covariates to regress out: " + maxNumTopCovs); System.out.println("Covariates to correct for with interaction: " + Arrays.toString(covariatesToCorrect)); System.out.println("Covariates to correct for without interaction: " + Arrays.toString(covariatesToCorrect2)); - System.out.println("Covariates to test: " + Arrays.toString(covariatesToTest)); + if (covariatesToTest != null) { + System.out.println("Covariates to test: " + Arrays.toString(covariatesToTest)); + } this.inputDir = inputDir; this.outputDir = outputDir; @@ -152,7 +154,7 @@ public TestEQTLDatasetForInteractions(String inputDir, String outputDir, String String[] covsToCorrect = primaryCovsToCorrect; int cnt = 0; while (cnt < maxNumTopCovs) { - String topCov = performInteractionAnalysis(covsToCorrect, covariatesToCorrect2, outputTopCovs, snpsToSwapFile, qtlProbeSnpMultiMap, covariatesToTest, hashSamples, numThreads, snpsToTest, skipNormalization, qtlProbeSnpMultiMapCovariates); + String topCov = performInteractionAnalysis(covsToCorrect, covariatesToCorrect2, outputTopCovs, snpsToSwapFile, qtlProbeSnpMultiMap, covariatesToTest, hashSamples, numThreads, snpsToTest, skipNormalization, skipCovariateNormalization, qtlProbeSnpMultiMapCovariates); String[] covsToCorrectNew = new String[covsToCorrect.length + 1]; for (int c = 0; c < covsToCorrect.length; c++) { covsToCorrectNew[c] = covsToCorrect[c]; @@ -448,7 +450,7 @@ public void preprocessData() { } - public final String performInteractionAnalysis(String[] covsToCorrect, String[] covsToCorrect2, TextFile outputTopCovs, File snpsToSwapFile, HashMultimap qtlProbeSnpMultiMap, String[] covariatesToTest, HashMap hashSamples, int numThreads, final TIntHashSet snpsToTest, boolean skipNormalization, HashMultimap qtlProbeSnpMultiMapCovariates) throws IOException, Exception { + public final String performInteractionAnalysis(String[] covsToCorrect, String[] covsToCorrect2, TextFile outputTopCovs, File snpsToSwapFile, HashMultimap qtlProbeSnpMultiMap, String[] covariatesToTest, HashMap hashSamples, int numThreads, final TIntHashSet snpsToTest, boolean skipNormalization, boolean skipCovariateNormalization, HashMultimap qtlProbeSnpMultiMapCovariates) throws IOException, Exception { //hashSamples = excludeOutliers(hashSamples); @@ -486,18 +488,18 @@ public final String performInteractionAnalysis(String[] covsToCorrect, String[] ExpressionDataset datasetCovariatesPCAForceNormal = new ExpressionDataset(inputDir + "/covariateTableLude.txt.Covariates.binary", '\t', covariatesToLoad, hashSamples); - if(!skipNormalization){ + if(!skipNormalization && !skipCovariateNormalization){ correctCovariateDataPCA(covsToCorrect2, covsToCorrect, datasetGenotypes, datasetCovariatesPCAForceNormal); } if (1 == 1) { - if (!skipNormalization && covsToCorrect2.length != 0 && covsToCorrect.length != 0) { + if (!skipNormalization && !skipCovariateNormalization && covsToCorrect2.length != 0 && covsToCorrect.length != 0) { correctCovariateData(covsToCorrect2, covsToCorrect, datasetGenotypes, datasetCovariates); } - if (!skipNormalization && !qtlProbeSnpMultiMapCovariates.isEmpty()) { + if (!skipNormalization && !skipCovariateNormalization && !qtlProbeSnpMultiMapCovariates.isEmpty()) { correctCovariatesForQtls(datasetCovariates, datasetGenotypes, qtlProbeSnpMultiMapCovariates); }