Do not pass pandas object to count script as divergent pandas version… (

#12) * Do not pass pandas object to count script as divergent pandas versions between host process and script might cause problems with deserialization. * do not check names when reading sample sheet * pin star version when creating index * plain python list and strings * fixes * remove debug output
snakemake-workflows · Mar 4, 2019 · c0a1740 · c0a1740
1 parent badcaa9
commit c0a1740
Show file tree

Hide file tree

Showing 4 changed files with 5 additions and 7 deletions.
diff --git a/.travis.yml b/.travis.yml
@@ -16,7 +16,7 @@ install:
   - conda config --add channels conda-forge
   - conda config --add channels bioconda
   - conda create -q -n snakemake snakemake>=5.1.2 python=$TRAVIS_PYTHON_VERSION
-  - conda create -q -n star star
+  - conda create -q -n star star=2.5.3a
   - source activate star
   # create STAR index (too big to live in git)
   - mkdir .test/data/ref/index

diff --git a/rules/diffexp.smk b/rules/diffexp.smk
@@ -4,7 +4,7 @@ rule count_matrix:
     output:
         "counts/all.tsv"
     params:
-        units=units
+        samples=units["sample"].tolist()
     conda:
         "../envs/pandas.yaml"
     script:

diff --git a/scripts/count-matrix.py b/scripts/count-matrix.py
@@ -3,12 +3,11 @@
 counts = [pd.read_table(f, index_col=0, usecols=[0, 1], header=None, skiprows=4)
           for f in snakemake.input]
 
-for t, (sample, unit) in zip(counts, snakemake.params.units.index):
+for t, sample in zip(counts, snakemake.params.samples):
     t.columns = [sample]
 
 matrix = pd.concat(counts, axis=1)
 matrix.index.name = "gene"
 # collapse technical replicates
 matrix = matrix.groupby(matrix.columns, axis=1).sum()
-print(matrix)
 matrix.to_csv(snakemake.output[0], sep="\t")
diff --git a/scripts/deseq2-init.R b/scripts/deseq2-init.R
@@ -14,9 +14,8 @@ if (snakemake@threads > 1) {
 
 # colData and countData must have the same sample order, but this is ensured
 # by the way we create the count matrix
-cts <- read.table(snakemake@input[["counts"]], header=TRUE, row.names="gene")
-coldata <- read.table(snakemake@params[["samples"]], header=TRUE, row.names="sample")
-print(cts)
+cts <- read.table(snakemake@input[["counts"]], header=TRUE, row.names="gene", check.names=FALSE)
+coldata <- read.table(snakemake@params[["samples"]], header=TRUE, row.names="sample", check.names=FALSE)
 
 dds <- DESeqDataSetFromMatrix(countData=cts,
                               colData=coldata,