Skip to content

Commit

Permalink
Upgrade clip, remove cython dependancy from treemaker, update setup s…
Browse files Browse the repository at this point in the history
…cript
  • Loading branch information
pfh committed Jun 11, 2012
1 parent 87bf3c5 commit 4788d8f
Show file tree
Hide file tree
Showing 24 changed files with 832 additions and 824 deletions.
4 changes: 4 additions & 0 deletions CHANGES
Original file line number Diff line number Diff line change
@@ -1,4 +1,8 @@

0.73 - heatmap: slightly smarter
clip: converted to new style
treemaker no longer depends on Cython

0.72 - Tools now log command line invocation
import: can import BAM files
shrimp: shrimp-options: -h now works again
Expand Down
4 changes: 2 additions & 2 deletions MANIFEST.in
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
global-include *.py *.pyx
exclude release.py
exclude nesoni2
exclude nesoni_vbc
include nesoni-script
include nesoni_scripts/*
include README
include CHANGES
include COPYING
Expand Down
11 changes: 6 additions & 5 deletions README
Original file line number Diff line number Diff line change
Expand Up @@ -28,13 +28,14 @@ suitable for phylogenetic analysis in SplitsTree4.
Python 2.6 or higher. Use of PyPy where possible is highly recommended
for performance.

Required Python libraries:
* BioPython (compiled modules may need to be disabled when installing in PyPy)
Recommended Python libraries:
* BioPython
- for reading GenBank files
- compiled modules may need to be disabled when installing in PyPy

Optional Python libraries (used by non-core nesoni tools)
* matplotlib
* Cython
* Numpy
* numpy

External programs:
* SHRiMP
Expand Down Expand Up @@ -69,7 +70,7 @@ for usage instructions.

nesoni can also be used without installing:

./nesoni_scripts/nesoni
./nesoni-script


~ Consensus policy
Expand Down
35 changes: 35 additions & 0 deletions nesoni-r/R/basic.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@

basic.seq <- function(n) {
# Count from 1 to n inclusive without being clever

if (n < 1)
numeric(0)
else
1:n
}

basic.subset <- function(object, rows) {
# Get rows by boolean vector

stopifnot(is.logical(rows))

# Don't tile selection
stopifnot( length(rows) == nrow(object) )

# Don't drop down to vector if single row selected
object[rows,,drop=FALSE]
}

basic.take <- function(object, rows) {
#Get rows by index

stopifnot(is.numeric(rows))
stopifnot(all(rows >= 1))
stopifnot(all(rows <= nrow(object)))

object[rows,,drop=FALSE]
}

basic.image <- function(x,y,z, col, breaks) {
image(x=x, y=y, z=z,col=col,breaks=breaks,xaxt="n",yaxt="n",xlab='',ylab='')
}
189 changes: 131 additions & 58 deletions nesoni-r/R/plotting.R
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,6 @@ put.plot <- function(px1,px2,py1,py2) {
)
}

basic.image <- function(x,y,z, col, breaks) {
image(x=x, y=y, z=z,col=col,breaks=breaks,xaxt="n",yaxt="n",xlab='',ylab='')
}

color.legend <- function(col, breaks, title='') {
basic.image(x=breaks,y=c(0),z=matrix(breaks,ncol=1), col=col,breaks=breaks)
axis(1, las=2)
Expand All @@ -35,24 +31,14 @@ dendrogram.paths <- function(dend) {
}
}

basic.seq <- function(n) {
# Count from 1 to n inclusive without being clever

if (n < 1)
numeric(0)
else
1:n
}

do.dendrogram <- function(mat) {
if (nrow(mat) < 3) {
do.dendrogram <- function(mat, enable=TRUE) {
if (nrow(mat) < 3 || !enable) {
list(
dendrogram = NA,
order = basic.seq(nrow(mat)),
paths = rep('',nrow(mat))
)
} else {

dist.mat <- dist(mat)
control <- list(hclust = hclust(dist.mat))
dend.mat <- as.dendrogram(
Expand All @@ -66,53 +52,136 @@ do.dendrogram <- function(mat) {
}
}

trim.labels <- function(labels) {
labels <- as.character(labels)
n <- max(10, ceiling( mean(nchar(labels)) * 3.0 ))

for(i in basic.seq(length(labels)))
if (nchar(labels[i]) > n) {
m <- n
for(j in 1:n)
if (any(substr(labels[i],j,j) == c(' ',',',';')))
m <- j
labels[i] <- sprintf("%s...",substr(labels[i],1,m))
}

labels
}



nesoni.heatmap <- function(x, reorder.columns=FALSE,dist.row=NA,dist.col=NA, ...) {
if(!is.matrix(x)) x <- as.matrix(x)

if (all(is.na(dist.row)))
dist.row <- dist( t(scale(t(x))) )
nesoni.heatmap <- function(mat, labels, reorder.columns=FALSE) {
n.rows <- nrow(mat)
n.cols <- ncol(mat)

if (all(is.na(dist.col)) && reorder.columns)
dist.col <- dist( scale(t(x)) )
dend.row <- do.dendrogram( t(scale(t(mat))) )
dend.col <- do.dendrogram( scale(t(mat)), enable=reorder.columns )

method <- 'OLO'

if (reorder.columns) {
control <- list(hclust = hclust(dist.col))
dend.col <- as.dendrogram(
seriation::seriate(dist.col,
method = method, control = control)[[1]])
dend <- 'both'
} else {
dend.col <- NA
dend <- 'row'
if (n.rows < 2 || n.cols < 2) {
plot.new()
title(sprintf('Can\'t plot %d x %d heatmap', n.rows, n.cols))
} else {
dend.row.x1 <- 1/90
dend.row.x2 <- 1/9

dend.col.y1 <- 1.0 - dend.row.x2 * aspect.ratio()
dend.col.y2 <- 1.0 - (1.0-dend.col.y1)*0.1

y1 <- 4 / par()$fin[2]
y2 <- dend.col.y1

x1 <- dend.row.x2
x2 <- 1/3

legend.y2 <- y1*0.75
legend.y1 <- legend.y2 - 0.03*aspect.ratio()
legend.x1 <- 15/30
legend.x2 <- 18/30

col <- signed.col
extreme <- max(0.0,abs(mat),na.rm=TRUE)
breaks <- seq(-extreme,extreme, length=length(col)+1)

plot.new()

if (!all(is.na(dend.col$dendrogram))) {
put.plot(x1,x2, dend.col.y1,dend.col.y2)
plot(dend.col$dendrogram, horiz=FALSE, axes=FALSE, yaxs="i", leaflab="none")
}

if (!all(is.na(dend.row$dendrogram))) {
put.plot(dend.row.x1,dend.row.x2, y1,y2)
plot(dend.row$dendrogram, horiz=TRUE, axes=FALSE, yaxs="i", leaflab="none")
}

put.plot(x1,x2, y1,y2)
basic.image(1:n.cols,1:n.rows, t(mat[dend.row$order,dend.col$order,drop=FALSE]), col=col, breaks=breaks)
axis(1, at=1:n.cols, labels=colnames(mat), las=2, tick=FALSE)

line <- 0
for(i in basic.seq(length(labels))) {
if (i < length(labels))
l <- trim.labels(labels[[i]])
else
l <- as.character(labels[[i]])
axis(4, at=1:n.rows, labels=l[dend.row$order], las=2, tick=FALSE, line=line)
line <- line + 0.45 * max(0,nchar(l))
}

put.plot(legend.x1,legend.x2, legend.y1,legend.y2)
color.legend(col, breaks, 'log2 expression\ndifference from row average\n')
}

control <- list(hclust = hclust(dist.row))
dend.row <- as.dendrogram(
seriation::seriate(dist.row,
method = method, control = control)[[1]])

result <- gplots::heatmap.2(x, Colv = dend.col, Rowv = dend.row, dendrogram = dend,
scale = "none", trace='none', density.info='none',
lwid=c(2,par("din")[1]-2), lhei=c(1.5,par("din")[2]-1.5),
cexCol=1.0,
...)

invisible(result)
}
list(
dend.row = dend.row,
dend.col = dend.col
)
}

#nesoni.heatmap <- function(x, reorder.columns=FALSE,dist.row=NA,dist.col=NA, ...) {
#if(!is.matrix(x)) x <- as.matrix(x)
#
#if (all(is.na(dist.row)))
# dist.row <- dist( t(scale(t(x))) )
#
#if (all(is.na(dist.col)) && reorder.columns)
# dist.col <- dist( scale(t(x)) )
#
#method <- 'OLO'
#
#if (reorder.columns) {
# control <- list(hclust = hclust(dist.col))
# dend.col <- as.dendrogram(
# seriation::seriate(dist.col,
# method = method, control = control)[[1]])
# dend <- 'both'
#} else {
# dend.col <- NA
# dend <- 'row'
#}
#
#control <- list(hclust = hclust(dist.row))
#dend.row <- as.dendrogram(
# seriation::seriate(dist.row,
# method = method, control = control)[[1]])
#
#result <- gplots::heatmap.2(x, Colv = dend.col, Rowv = dend.row, dendrogram = dend,
# scale = "none", trace='none', density.info='none',
# lwid=c(2,par("din")[1]-2), lhei=c(1.5,par("din")[2]-1.5),
# cexCol=1.0,
# ...)
#
#invisible(result)
#}

unsigned.col <- hsv(h=seq(0.95,1.15, length.out=256)%%1.0, v=seq(0,1, length.out=256)**0.5,s=seq(1,0,length.out=256)**0.5)
signed.col <- hsv(h=(sign(seq(-1.0,1.0, length.out=256))*0.2+0.8)%%1.0, v=1,s=abs(seq(-1,1,length.out=256)))

hmap.elist <- function(filename.prefix, elist,
min.sd=0.0, min.span=0.0, min.svd=0.0, svd.rank=NULL,
annotation=c('gene', 'product'),
res=150, row.labels=NA, margins=c(20,20),
main='log2 expression\ndifference from\nrow average', ...) {
res=150, row.labels=NA,
reorder.columns = FALSE) {
keep <- rep(TRUE, nrow(elist$E))

if (min.sd > 0.0) {
Expand All @@ -130,30 +199,33 @@ hmap.elist <- function(filename.prefix, elist,
svd.rank <- ncol(elist$E)-1
s <- svd(t(scale(t(elist$E), center=TRUE,scale=FALSE)), nu=svd.rank,nv=svd.rank)
cat('SVD d diagonal:\n')
print(s$d[1:svd.rank])
print(s$d[basic.seq(svd.rank)])
mag <- sqrt( rowSums(s$u*s$u) * nrow(s$u) / ncol(s$u) )
keep <- (keep & mag >= min.svd)
}

elist <- elist[keep,]

if (is.na(row.labels))
row.labels <- (nrow(elist) <= 500)
row.labels <- (nrow(elist) <= 300)

data <- t(scale(t(elist$E), center=TRUE,scale=FALSE))

labels <- list(rownames(data))

for(colname in annotation)
if (!all(is.na(elist$gene[,colname])))
rownames(data) <- paste(rownames(data), elist$gene[,colname])

height <- if(row.labels) (16*nrow(data)+400)*res/150 else 2500*res/150
png(sprintf('%s.png',filename.prefix), width=1500*res/150, height=height, res=res)
labels[[ length(labels)+1 ]] <- elist$gene[,colname]
height <- if(row.labels) (25*nrow(data)+800)*res/150 else 2500*res/150
png(sprintf('%s.png',filename.prefix), width=2000*res/150, height=height, res=res)

heatmap <- nesoni.heatmap(data, col=signed.col, symkey=TRUE,symbreaks=TRUE, labRow=(if(row.labels) NULL else NA), margins=margins, main=main, ...)
#heatmap <- nesoni.heatmap(data, col=signed.col, symkey=TRUE,symbreaks=TRUE, labRow=(if(row.labels) NULL else NA), margins=margins, main=main, ...)
heatmap <- nesoni.heatmap(data, labels=if(row.labels) labels else list(), reorder.columns=reorder.columns)

dev.off()

shuffled.elist <- elist[rev(heatmap$rowInd),]
shuffled.elist <- elist[rev(heatmap$dend.row$order),]

table.filename <- sprintf('%s.csv', filename.prefix)

Expand All @@ -169,7 +241,8 @@ hmap.elist <- function(filename.prefix, elist,
for(colname in annotation) {
frame[,colname] <- shuffled.elist$gene[,colname]
}
frame[,'cluster hierarchy'] <- rev(dendrogram.paths(heatmap$rowDendrogram))
#frame[,'cluster hierarchy'] <- rev(dendrogram.paths(heatmap$rowDendrogram))
frame[,'cluster hierarchy'] <- rev(heatmap$dend.row$paths)
frame <- data.frame(frame, shuffled.elist$E, check.names=FALSE)

write.csv(frame, row.names=FALSE)
Expand Down
3 changes: 3 additions & 0 deletions nesoni-script
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
#!/usr/bin/env python

import nesoni.__main__
9 changes: 5 additions & 4 deletions nesoni/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

import sys

VERSION='0.72'
VERSION='0.73'

BOLD = '\x1b[1m'
END = '\x1b[m'
Expand Down Expand Up @@ -129,6 +129,7 @@
""" % { 'BOLD' : '\x1b[1m', 'END' : '\x1b[m', 'VERSION' : VERSION }

from reference_directory import Make_reference
from clip import Clip
from samimport import Import
from samshrimp import Shrimp
from samconsensus import Filter, Reconsensus, Consensus
Expand Down Expand Up @@ -289,9 +290,9 @@ def fill_scaffolds(args):
def pastiche(args):
grace.load('pastiche').pastiche(args)

@add
def clip(args):
grace.load('clip').clip(args)
#@add
#def clip(args):
# grace.load('clip').clip(args)

@add
def plot_counts(args):
Expand Down
15 changes: 15 additions & 0 deletions nesoni/__main__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@

import sys

import nesoni
from nesoni import grace, config

try:
sys.exit(nesoni.main(sys.argv[1:]))

except grace.Help_shown:
sys.exit(1)

except Exception:
config.report_exception()
sys.exit(1)
Loading

0 comments on commit 4788d8f

Please sign in to comment.