Mercurial > repos > jeremyjliu > region_motif_enrichment

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/region_motif_compare.r	Wed Aug 06 15:36:46 2014 -0400
@@ -0,0 +1,189 @@
+# Name: region_motif_compare.r
+# Description: Reads in two count files and determines enriched and depleted
+# motifs (or any location based feature) based on poisson tests and gc
+# corrections. All enrichment ratios relative to overall count / gc ratios.
+# Author: Jeremy liu
+# Email: jeremy.liu@yale.edu
+# Date: 14/07/03
+# Note: This script is meant to be invoked with the following command
+# R --slave --vanilla -f ./region_motif_compare.r --args <workingdir> <db> <intab1> <intab2>
+#   <enriched_tab> <depleted_tab> <plots_png>
+# <workingdir> is working directory of galaxy installation
+# <db> types: "t" test, "p" pouya, "j" jaspar jolma, "m" mouse, "c" combined
+# Dependencies: plotting.r
+
+# Auxiliary function to concatenate multiple strings
+concat <- function(...) {
+	input_list <- list(...)
+	return(paste(input_list, sep="", collapse=""))
+}
+
+# Supress all warning messages to prevent Galaxy treating warnings as errors
+options(warn=-1)
+
+# Set common and data directories
+args <- commandArgs()
+workingDir = args[7]
+commonDir = concat(workingDir, "/tools/my_tools")
+dbCode = args[8]
+# dbCode "c" implemented when pwmFile is loaded
+if (dbCode == "t" | dbCode == "p") {
+	pwmFile = concat(commonDir, "/region_motif_db/pwms/pouya.pwms.from.seq.RData")
+} else if (dbCode == "j") {
+	pwmFile = concat(commonDir, "/region_motif_db/pwms/jaspar.jolma.pwms.from.seq.RData")
+} else if (dbCode == "m") {
+	pwmFile = concat(commonDir, "/region_motif_db/pwms/mm9.pwms.from.seq.RData")
+} else if (dbCode == "c") { # rest of dbCode "c" implemeted when pwmFile loaded
+	pwmFile = concat(commonDir, "/region_motif_db/pwms/pouya.pwms.from.seq.RData")
+	pwmFile2 = concat(commonDir, "/region_motif_db/pwms/jaspar.jolma.pwms.from.seq.RData")
+} else {
+	pwmFile = concat(commonDir, "/region_motif_db/pwms/pouya.pwms.from.seq.RData")
+}
+
+# Set input and reference files
+inTab1 = args[9]
+inTab2 = args[10]
+enrichTab = args[11]
+depleteTab = args[12]
+plotsPng = args[13]
+
+# Load dependencies
+source(concat(commonDir, "/region_motif_lib/plotting.r"))
+
+# Auxiliary function to read in tab file and prepare the data
+read_tsv <- function(file) {
+	data = read.table(file, sep="\t", stringsAsFactors=FALSE)
+	names(data)[names(data) == "V1"] = "motif"
+	names(data)[names(data) == "V2"] = "counts"
+	return(data)
+}
+
+startTime = Sys.time()
+cat("Running ... Started at:", format(startTime, "%a %b %d %X %Y"), "...\n")
+
+# Loading motif position weight matrix (pwm) file and input tab file
+#cat("Loading and reading input region motif count files...\n")
+load(pwmFile) # pwms data structure
+if (dbCode == "c") { # Remaining implementation of dbCode "c" combined
+	temp = pwms
+	load(pwmFile2)
+	pwms = append(temp, pwms)
+}
+region1DF = read_tsv(inTab1)
+region2DF = read_tsv(inTab2)
+region1Counts = region1DF$counts
+region2Counts = region2DF$counts
+names(region1Counts) = region1DF$motif
+names(region2Counts) = region2DF$motif
+
+# Processing count vectors to account for missing 0 count motifs, then sorting
+#cat("Performing 0 count correction and sorting...\n")
+allNames = union(names(region1Counts), names(region2Counts))
+region1Diff = setdiff(allNames, names(region1Counts))
+region2Diff = setdiff(allNames, names(region2Counts))
+addCounts1 = rep(0, length(region1Diff))
+addCounts2 = rep(0, length(region2Diff))
+names(addCounts1) = region1Diff
+names(addCounts2) = region2Diff
+newCounts1 = append(region1Counts, addCounts1)
+newCounts2 = append(region2Counts, addCounts2)
+region1Counts = newCounts1[sort.int(names(newCounts1), index.return=TRUE)$ix]
+region2Counts = newCounts2[sort.int(names(newCounts2), index.return=TRUE)$ix]
+
+# Generate gc content matrix
+gc = sapply(pwms, function(i) mean(i[2:3,3:18]))
+
+# Apply poisson test, calculate p and q values, and filter significant results
+#cat("Applying poisson test...\n")
+rValue = sum(region2Counts) / sum(region1Counts)
+pValue = sapply(seq(along=region1Counts), function(i) {
+	poisson.test(c(region1Counts[i], region2Counts[i]), r=1/rValue)$p.value
+})
+qValue = p.adjust(pValue, "fdr")
+indices = which(qValue<0.1 & abs(log2(region1Counts/region2Counts/rValue))>log2(1.5))
+
+# Setting up output diagnostic plots, 4 in 1 png image
+png(plotsPng, width=800, height=800)
+xlab = "region1_count"
+ylab = "region2_count"
+lim = c(0.5, 5000)
+layout(matrix(1:4, ncol=2))
+par(mar=c(5, 5, 5, 1))
+
+# Plot all motif counts along the linear correlation coefficient
+plot.scatter(region1Counts+0.5, region2Counts+0.5, log="xy", xlab=xlab, ylab=ylab,
+						 cex.lab=2.2, cex.axis=1.8, xlim=lim, ylim=lim*rValue)
+abline(0, rValue, untf=T)
+abline(0, rValue*2, untf=T, lty=2)
+abline(0, rValue/2, untf=T, lty=2)
+
+# Plot enriched and depleted motifs in red, housed in second plot
+plot.scatter(region1Counts+0.5, region2Counts+0.5, log="xy", xlab=xlab, ylab=ylab,
+						 cex.lab=2.2, cex.axis=1.8, xlim=lim, ylim=lim*rValue)
+points(region1Counts[indices]+0.5, region2Counts[indices]+0.5, col="red")
+abline(0, rValue, untf=T)
+abline(0, rValue*2, untf=T, lty=2)
+abline(0, rValue/2, untf=T, lty=2)
+
+# Apply and plot gc correction and loess curve
+#cat("Applying gc correction, rerunning poisson test...\n")
+ind = which(region1Counts>5)
+gc = gc[names(region2Counts)] # Reorder the indices of pwms to match input data
+lo = plot.scatter(gc,log2(region2Counts/region1Counts),draw.loess=T,
+								xlab="gc content of motif",ylab=paste("log2(",ylab,"/",xlab,")"),
+								cex.lab=2.2,cex.axis=1.8,ind=ind) # This function is in plotting.r
+gcCorrection = 2^approx(lo$loess,xout=gc,rule=2)$y
+save(gc, file="gc.RData")
+
+# Recalculate p and q values, and filter for significant entries
+pValueGC = sapply(seq(along=region1Counts),function(i) {
+	poisson.test(c(region1Counts[i],region2Counts[i]),r=1/gcCorrection[i])$p.value
+})
+qValueGC=p.adjust(pValueGC,"fdr")
+indicesGC = which(qValueGC<0.1 & abs(log2(region1Counts/region2Counts*gcCorrection))>log2(1.5))
+
+# Plot gc corrected motif counts
+plot.scatter(region1Counts+0.5, (region2Counts+0.5)/gcCorrection, log="xy",
+						 xlab=xlab, ylab=paste(ylab,"(normalized)"), cex.lab=2.2, cex.axis=1.8,
+						 xlim=lim, ylim=lim)
+points(region1Counts[indicesGC]+0.5,
+			 (region2Counts[indicesGC]+0.5)/gcCorrection[indicesGC], col="red")
+abline(0,1)
+abline(0,1*2,untf=T,lty=2)
+abline(0,1/2,untf=T,lty=2)
+
+# Trim results, compile statistics and output to file
+# Only does so if significant results are computed
+if(length(indicesGC) > 0) {
+	# Calculate expected counts and enrichment ratios
+	#cat("Calculating statistics...\n")
+	nullExpect = region1Counts * gcCorrection
+	enrichment = region2Counts / nullExpect
+
+	# Reorder selected indices in ascending pvalue
+	#cat("Reordering by ascending pvalue...\n")
+	indicesReorder = indicesGC[order(pValueGC[indicesGC])]
+
+	# Combine data into one data frame and output to two files
+	#cat("Splitting and outputting data...\n")
+	outDF = data.frame(motif=names(pValueGC), p=as.numeric(pValueGC), q=qValueGC,
+										 stringsAsFactors=F, region_1_count=region1Counts,
+										 null_expectation=round(nullExpect,2), region_2_count=region2Counts,
+										 enrichment=enrichment)[indicesReorder,]
+	names(outDF)[which(names(outDF)=="region_1_count")]=xlab
+	names(outDF)[which(names(outDF)=="region_2_count")]=ylab
+	indicesEnrich = which(outDF$enrichment>1)
+	indicesDeplete = which(outDF$enrichment<1)
+	outDF$enrichment = ifelse(outDF$enrichment>1,
+														round(outDF$enrichment,3),
+														paste("1/",round(1/outDF$enrichment,3)))
+	write.table(outDF[indicesEnrich,], file=enrichTab, quote=FALSE,
+							sep="\t", append=FALSE, row.names=FALSE, col.names=TRUE)
+	write.table(outDF[indicesDeplete,], file=depleteTab, quote=FALSE,
+							sep="\t", append=FALSE, row.names=FALSE, col.names=TRUE)
+}
+
+# Catch display messages and output timing information
+catchMessage = dev.off()
+cat("Done. Job started at:", format(startTime, "%a %b %d %X %Y."),
+		"Job ended at:", format(Sys.time(), "%a %b %d %X %Y."), "\n")
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/region_motif_compare.xml	Wed Aug 06 15:36:46 2014 -0400
@@ -0,0 +1,29 @@
+<tool id="region_motif_compare" name="Region Motif Count Compare">
+  <description>for comparing the motif counts in different region sets</description>
+  <command interpreter="bash">
+    /usr/bin/R --slave --vanilla -f $GALAXY_ROOT_DIR/tools/my_tools/region_motif_compare.r --args $GALAXY_ROOT_DIR $db_type $in_tab_1 $in_tab_2 $out_enriched $out_depleted $out_plots
+  </command>
+  <inputs>
+    <param name="in_tab_1" type="data" format="tabular" label="Region Set 1 Motif Count File"/>
+    <param name="in_tab_2" type="data" format="tabular" label="Region Set 2 Motif Count File"/>
+    <param name="db_type" type="select" label="Select Motif Database" >
+      <option value="t">Test Pouya Subset (hg19)</option>
+      <option value="p">Pouya Encode Motifs (hg19)</option>
+      <option value="j">Jaspar and Jolma Motifs (hg19)</option>
+      <option value="m">Mouse Motifs (mm9)</option>
+      <option value="c">Pouya, Jaspar, and Jolma Combined (hg19)</option>
+    </param>
+  </inputs>
+  <outputs>
+    <data name="out_enriched" format="tabular" label="Enriched Motifs"/>
+    <data name="out_depleted" format="tabular" label="Depleted Motifs"/>
+    <data name="out_plots" format="png" label="Motif Count Comparison Plots"/>
+  </outputs>
+
+  <help>
+    This tools reads in two counts file and determines enriched and depleted
+    motifs in two different region sets based on poisson calculation with
+    gc correction.
+  </help>
+
+</tool>
\ No newline at end of file
Binary file region_motif_db/pwms/jaspar.jolma.pwms.from.seq.RData has changed
Binary file region_motif_db/pwms/mm9.pwms.from.seq.RData has changed
Binary file region_motif_db/pwms/pouya.pwms.from.seq.RData has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/region_motif_intersect.r	Wed Aug 06 15:36:46 2014 -0400
@@ -0,0 +1,102 @@
+# Name: region_motif_intersect.r
+# Description: Takes a bed file of target regions and counts intersections
+# of each motif (built in rdata database) and target regions.
+# Author: Jeremy liu
+# Email: jeremy.liu@yale.edu
+# Date: 14/07/02
+# Note: This script is meant to be invoked with the following command
+# R --slave --vanilla -f ./region_motif_intersect.r --args <workingdir> <db> <inbed> <outtab>
+# <workingdir> is working directory of galaxy installation
+# <db> types: "t" test, "p" pouya, "j" jaspar jolma, "m" mouse
+# Dependencies: none
+
+# Auxiliary function to concatenate multiple strings
+concat <- function(...) {
+  input_list <- list(...)
+  return(paste(input_list, sep="", collapse=""))
+}
+
+# Set common and data directories
+args <- commandArgs()
+workingDir = args[7]
+commonDir = concat(workingDir, "/tools/my_tools")
+dbCode = args[8]
+if (dbCode == "t") {
+  motifDB = concat(commonDir, "/region_motif_db/pouya_test_motifs.bed.bgz")
+} else if (dbCode == "p") {
+  motifDB = concat(commonDir, "/region_motif_db/pouya_motifs.bed.bgz")
+} else if (dbCode == "j") {
+  motifDB = concat(commonDir, "/region_motif_db/jaspar_jolma_motifs.bed.bgz")
+} else if (dbCode == "m") {
+  motifDB = concat(commonDir, "/region_motif_db/mm9_motifs_split.bed.bgz")
+} else {
+  motifDB = concat(commonDir, "/region_motif_db/pouya_motifs.bed.bgz")
+}
+
+# Set input and reference files, comment to toggle commmand line arguments
+inBed = args[9]
+outTab = args[10]
+
+# Auxiliary function to read in BED file
+read_bed <- function(file) {
+  return(read.table(file, sep="\t", stringsAsFactors=FALSE))
+}
+
+startTime = Sys.time()
+cat("Running ... Started at:", format(startTime, "%a %b %d %X %Y"), "...\n")
+
+# Load dependencies
+#cat("Loading dependencies...\n")
+suppressPackageStartupMessages(library(Rsamtools, quietly=TRUE))
+
+# Initializing hash table (as env) with motif names and loading tabix file
+#cat("Loading motif database and initializing hash table...\n")
+motifTable = new.env()
+motifTbx <- TabixFile(motifDB)
+
+# Loading input bed file, convert integer columns to numeric, name columns
+#cat("Loading region file...\n")
+regionsDF = read_bed(inBed)
+dfTemp = sapply(regionsDF, is.integer)
+regionsDF[dfTemp] = lapply(regionsDF[dfTemp], as.numeric)
+names(regionsDF)[names(regionsDF) == "V1"] = "chr"
+names(regionsDF)[names(regionsDF) == "V2"] = "start"
+names(regionsDF)[names(regionsDF) == "V3"] = "end"
+
+# Filtering regions to exclude chromosomes not in motif database
+#cat("Determining intersection counts...\n")
+motifTbxChrs = seqnamesTabix(motifTbx)
+regionsDFFilter = subset(regionsDF, chr %in% motifTbxChrs)
+
+# Loading regions into GRanges object and scanning motif tabix database
+# Region end is incremented by 1 since scanTabix querying is inclusive for
+# position start but exclusive for position end.
+param = GRanges(regionsDFFilter$chr, IRanges(regionsDFFilter$start,
+                end=regionsDFFilter$end + 1))
+regionsIntersects = scanTabix(motifTbx, param=param)
+
+# Parsing result list and updating motif count hash table
+#cat("Parsing result list...\n")
+for(regionIntersects in regionsIntersects) {
+  for(regionIntersect in strsplit(regionIntersects, " ")) {
+    intersectMotif = strsplit(regionIntersect, "\t")[[1]][4]
+    if(is.null(motifTable[[intersectMotif]])) {
+      motifTable[[intersectMotif]] = 1
+    } else {
+      motifTable[[intersectMotif]] = motifTable[[intersectMotif]] + 1
+    }
+  }
+}
+
+# Converting motif count hash table to an integer vector for output
+counts = integer(length = length(ls(motifTable)))
+names(counts) = ls(motifTable)
+for(motifName in ls(motifTable)) {
+  counts[motifName] = as.integer(motifTable[[motifName]])
+}
+
+# Outputting intersection counts to tab delineated file
+#cat("Outputting to file...\n")
+write.table(counts, outTab, quote=FALSE, sep="\t", row.names=TRUE, col.names=FALSE)
+cat("Done. Job started at:", format(startTime, "%a %b %d %X %Y."),
+    "Job ended at:", format(Sys.time(), "%a %b %d %X %Y."), "\n")
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/region_motif_intersect.xml	Wed Aug 06 15:36:46 2014 -0400
@@ -0,0 +1,24 @@
+<tool id="region_motif_intersect" name="Region Motif Intersect">
+  <description>for computing the motifs that lie inside a region set</description>
+  <command interpreter="bash">
+    /usr/bin/R --slave --vanilla -f $GALAXY_ROOT_DIR/tools/my_tools/region_motif_intersect.r --args $GALAXY_ROOT_DIR $db_type $in_bed $out_tab
+  </command>
+  <inputs>
+    <param name="in_bed" type="data" format="bed" label="Input BED File" />
+    <param name="db_type" type="select" label="Select Motif Database" >
+      <option value="t">Test Pouya Subset (hg19)</option>
+      <option value="p">Pouya Encode Motifs (hg19)</option>
+      <option value="j">Jaspar and Jolma Motifs (hg19)</option>
+      <option value="m">Mouse Motifs (mm9)</option>
+    </param>
+  </inputs>
+  <outputs>
+    <data name="out_tab" format="tabular" />
+  </outputs>
+
+  <help>
+    This tool computes the motifs and the number of motifs that intersect
+    any region in a input set of regions.
+  </help>
+
+</tool>
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/region_motif_lib/plotting.r	Wed Aug 06 15:36:46 2014 -0400
@@ -0,0 +1,815 @@
+library(graphics, quietly=TRUE)
+
+plot.verbose=F
+name.cleaner<-function(...,sep="",replace="_") {
+  plot.name=gsub(" ",replace,paste(...,sep=sep))
+  plot.name=gsub("/",replace,plot.name)
+  plot.name=gsub(",",replace,plot.name)
+  plot.name=gsub("'",replace,plot.name)
+  plot.name=gsub("\\+","plus",plot.name)
+  plot.name=gsub("\\(","",plot.name)
+  plot.name=gsub("\\)","",plot.name)
+  return(plot.name)
+}
+plot.namer <- function(..., date=0, fig.dir=0, format="png",sep="") {
+  plot.name=name.cleaner(...,sep=sep)
+  if(date==0) date=gsub("-","",as.character(Sys.Date()))
+  if(fig.dir==0) fig.dir="/Users/alver/allplots"
+  plot.name=paste(fig.dir,"/",date,plot.name,".",format,sep="")
+  if(plot.verbose) cat("  saving figure: ",plot.name,"\n")
+  return(plot.name)
+}
+
+plot.scatter <- function(x,y=NULL,f=0.1,same=FALSE,n.points=-1,draw.lowess=FALSE,write.r=TRUE,cex.r=1,col=NULL,col.line=NULL,lwd.line=1,
+                         draw.loess=FALSE,span=0.5,bandwidth=bandwidth,draw.prof=FALSE,xlog=FALSE,ylog=FALSE,cor.method="pearson",log="",ind=NULL,
+                         draw.spread=FALSE,...) {
+
+  ## if col is the same length as x, use col for each point matching x.
+  ## if col is the same length as ind, use col for each point matching x[ind].
+  ## else use densCols function based on col.
+  ## if col is null, densCols is used with bluetone for first plot and redtone for same=T.
+
+    #print(length(x))
+    #print(length(y))
+
+    xy <- xy.coords(x, y)
+    x=xy$x
+    y=xy$y
+
+    output=list()
+    col.use = col
+
+    if(!is.null(ind)) {
+        if(length(col.use)==length(x)) {
+            col.use=col.use[ind]
+        }
+        x=x[ind]
+        y=y[ind]
+    }
+
+    if(length(col.use)!=length(x)) {
+        col.use=rep(NA,length(x))
+    }
+
+
+  take=which(is.finite(x) & is.finite(y))
+  x=x[take]
+  y=y[take]
+  col.use=col.use[take]
+
+  if(grepl("x",log)) xlog=TRUE
+  if(grepl("y",log)) ylog=TRUE
+  if(xlog) log="x"
+  if(ylog) log=paste(log,"y",sep="")
+
+  if(draw.lowess) {
+    lo = lowess(x,y,f)
+    output$lowess=lo
+  }
+  if(draw.loess | draw.spread) {
+      px=x;py=y
+      if(xlog)  px=log(x)
+      if(ylog)  py=log(y)
+      ind = which(is.finite(px+py))
+      px=px[ind]
+      py=py[ind]
+      lo = loess(py ~ px,span=span,iterations=5)
+      lo.y=as.numeric(lo$fitted)
+      lo.x=as.numeric(lo$x)
+      if(draw.spread) lo.sd = loess((lo.y-py)^2 ~ lo.x,span=span*1.5,iterations=5)
+      if(xlog) lo.x=exp(lo.x)
+      if(ylog) lo.y=exp(lo.y)
+      lo =data.frame(x=lo.x,y=lo.y)
+      if(draw.spread) {
+          lo.sd=lo.sd$fitted
+          if(ylog) lo.sd=lo.sd*lo.y*lo.y
+          lo$sd=sqrt(pmax(0,lo.sd))
+      }
+      lo=unique(lo)
+      lo = lo[order(lo$x),]
+      output$loess=lo
+  }
+
+  if(draw.prof) {
+    px=x;py=y
+    if(xlog)  px=log(x)
+    p=prof(px,py,50)
+    if(xlog)  p$x=exp(p$x)
+    output$prof=p
+  }
+
+  r=cor(x,y,method=cor.method)
+  output$cor=r
+  output$cor.method=cor.method
+
+  len=length(x)
+  if(n.points>0 & n.points<len) {
+    take=sample(1:len,n.points)
+    x=x[take]
+    y=y[take]
+    col.use=col.use[take]
+  }
+
+  if(xlog) {
+    ind = which(x>0)
+    x=x[ind]
+    y=y[ind]
+    col.use=col.use[ind]
+  }
+  xcol=x
+  if(xlog) xcol=log(xcol)
+  if(ylog) {
+    ind = which(y>0)
+    x=x[ind]
+    xcol=xcol[ind]
+    y=y[ind]
+    col.use=col.use[ind]
+  }
+  ycol=y
+  if(ylog) ycol=log(ycol)
+
+  if(is.null(col)) {
+    if(!same) {
+      col=colorRampPalette(blues9[-(1:3)])
+    } else {
+      col=colorRampPalette(c("lightpink","red","darkred"))
+    }
+  }
+  if(!is.na(col.use[1])) {
+    col=col.use
+  } else {
+    col= suppressPackageStartupMessages(densCols(xcol,ycol,col =col,bandwidth=bandwidth,nbin=500))
+  }
+  if(!same) {
+    plot(x,y,col=col,log=log,...)
+  } else {
+    points(x,y,col=col,...)
+  }
+
+  if(is.null(col.line)) {
+    col.line="darkblue"
+    if(same) col.line="darkred"
+  }
+  if(draw.lowess | draw.loess) lines(lo,col=col.line,lwd=lwd.line)
+  if(draw.spread) {
+      lines(lo$x,lo$y+lo$sd,col=col.line,lwd=lwd.line)
+      lines(lo$x,lo$y-lo$sd,col=col.line,lwd=lwd.line)
+  }
+  if(draw.prof) {
+    points(p)
+    plot.prof(p)
+  }
+  if(write.r & !same) mtext(paste("r=",round(r,3),sep=""),cex=cex.r)
+  return(invisible(output))
+}
+
+#color.int=c(144,586,465,257,490,100,74,24)
+#coli=1
+#cols = integer()
+colramp.bwr = vector()
+colramp.byr = vector()
+colramp.bw = vector()
+colramp.bw2 = vector()
+
+plot.save=F
+
+setup.plotting <- function() {
+  pdf.options(useDingbats = FALSE)
+#  cols<<-colors()[color.int]
+#  cols<<-rep(cols,100)
+  colramp.bwr <<- colorRampPalette(c("blue","white","red"),space="Lab")(100);
+  colramp.byr <<- colorRampPalette(c("blue","yellow","red"),space="Lab")(100);
+  colramp.bw  <<- colorRampPalette(c("white","black"),space="Lab")(100)
+  colramp.bw2  <<- colorRampPalette(c("grey92","grey5"),space="Lab")(100)
+}
+
+
+plot.cluster <- function(x,k, max.points.cl=-1, image.sep=-1,col=NULL, reorder=FALSE) {
+    x[which(is.na(x))]=0
+    if(reorder) {
+        o=hclust(dist(t(x)))$order
+        x=x[,o]
+    }
+    if(image.sep<0) {
+        if(max.points.cl>0) {
+            image.sep=ceiling(0.2*max.points.cl)
+        }    else {
+            image.sep=ceiling(0.2 * nrow(x) / nrow(k$centers))
+        }
+    }
+
+    distances<-dist(k$centers)
+    hcl=hclust(distances)
+
+    adjust.branch.sep <-function(ddr,lengths) {
+        assign.branch.sep <- function(d,i.leaf) {
+            if(is.leaf(d)) {
+                attr(d,"members")<-lengths[i.leaf]
+                i.leaf=i.leaf+1
+                output=list(d=d,i.leaf=i.leaf)
+                return(output)
+            }
+            else{
+                input=assign.branch.sep(d[[1]],i.leaf)
+                i.leaf=input$i.leaf
+                d[[1]]=input$d
+
+                input=assign.branch.sep(d[[2]],i.leaf)
+                i.leaf=input$i.leaf
+                d[[2]]=input$d
+
+                attr(d,"members")<-attr(d[[1]],"members")+attr(d[[2]],"members")
+                output=list(d=d,i.leaf=i.leaf)
+                return(output)
+            }
+        }
+        ddr<-as.dendrogram(ddr)
+        ddr=assign.branch.sep(ddr,1)$d
+        return(ddr)
+    }
+
+    n.points.actual=k$size
+    if(max.points.cl>0) {
+    k$size[which(k$size>max.points.cl)] = max.points.cl
+}
+
+    ddr<-adjust.branch.sep(hcl,k$size[hcl$order]+image.sep)
+    centers=length(hcl$order)
+
+    n.points=sum(k$size)
+    n.dims=ncol(x)
+    z=matrix(numeric((n.points+(centers-1)*image.sep)*n.dims),ncol=n.dims)
+
+
+  last.row=0
+  cluster.y.pos=numeric(centers)
+  for(i.c in hcl$order) {
+    n.p=k$size[i.c]
+    z[last.row+1:n.p,] = x[which(k$cluster==i.c)[1:n.p],]
+    cluster.y.pos[i.c]=last.row+n.p/2
+    last.row=last.row+n.p+image.sep
+  }
+
+  zlim=c(0,max(z))
+  if(min(z)<0) {
+    m=max(c(z,-z))
+    zlim=c(-m,m)
+  }
+  if(is.null(col)) {
+    if(min(z)>=0) {
+      col= colramp.bw
+    } else {
+      col= colorRampPalette(c("blue","yellow","red"),space="Lab")(100);
+    }
+  }
+  x.pl=seq1(n.dims+1)-0.5
+  y.pl=seq1(nrow(z)+1)-0.5
+  l <- layout(matrix(1:2,ncol=2),widths=c(1,5))
+  par(mar = c(6,0.5,6,0))
+  my.plot.dendrogram(ddr,horiz=T,axes=F,yaxs="i",xaxs="i",leaflab="none",center=T,lwd=10)
+  par(mar = c(6,0.1,6,2.1))
+  image(x=x.pl,y=y.pl,z=t(z),zlim=zlim,axes=FALSE,xlab="",col=col)
+  mtext("cluster",side=4,adj=1.1)
+  mtext("points",side=4,adj=1.1,line=1)
+  mtext(seq1(centers),side=4,at=cluster.y.pos)
+  mtext(n.points.actual,side=4,at=cluster.y.pos,line=1)
+
+  if(!is.null(dimnames(x)[[2]])) {
+    mtext(dimnames(x)[[2]],side=1,at=seq1(n.dims),las=2)
+  }
+}
+
+plot.cluster2 <- function(k, n.clusters=-1, n.clusters.per.panel=4, cols=c("black","red","blue","darkgreen","orange"),f=0,xshift=0,...) {
+  if(n.clusters<=0) n.clusters=nrow(k$centers)
+
+  n.elements=as.numeric(unlist(lapply(seq1(n.clusters), function(cl) length(which(abs(k$cluster)==cl)))))
+
+  distances<-dist(k$centers)
+  n.panels = ceiling(n.clusters/n.clusters.per.panel)
+  n.rows=ceiling(sqrt(n.panels))
+  n.cols=ceiling(n.panels/n.rows)
+  n.panels.layout=n.rows*n.cols
+
+  layout(matrix(seq1(n.panels.layout),nrow=n.rows,byrow=TRUE))
+
+  min=min(k$centers)
+  max=max(k$centers)
+
+  if(f>0) {
+    for(i.cluster in seq1(n.clusters)) {
+      k$centers[i.cluster,]=lowess(k$centers[i.cluster,],f=f)$y
+    }
+  }
+
+  ##  hcl=hclust(distances)
+  hcl=list()
+  hcl$order=1:n.clusters
+
+  for(i.cluster in seq1(n.clusters)) {
+    if(i.cluster %% n.clusters.per.panel == 1 ) {
+      clusters.of.panel=i.cluster:(i.cluster+n.clusters.per.panel-1)
+      clusters.of.panel=clusters.of.panel[which(clusters.of.panel<=n.clusters)]
+      clusters.of.panel=hcl$order[clusters.of.panel]
+      plot(c(0,length(k$centers[1,]))+xshift,c(min,max),type="n",...)
+      mtext(paste(clusters.of.panel," (",n.elements[clusters.of.panel],")",sep=""),line=length(clusters.of.panel)-seq1(length(clusters.of.panel)),col=cols[seq1(length(clusters.of.panel)) %% n.clusters.per.panel+1] )
+    }
+   # lines(k$centers[hcl$order[i.cluster],],col=cols[i.cluster %% n.clusters.per.panel+1])
+     lines(seq1(length(k$centers[1,]))+xshift,k$centers[hcl$order[i.cluster],],col=cols[i.cluster %% n.clusters.per.panel+1])
+  }
+}
+
+my.colors <- function(n) {
+  few.colors=c("black","red","blue","green3","mediumorchid3","gold2","darkcyan","sienna2")
+  if(n<=length(few.colors)) return(few.colors [seq1(n)])
+  col=integer(n)
+  n.families=7
+  n.members=ceiling(n/n.families)
+  for(i in seq1(n)) {
+    member=ceiling(i/n.families)
+    ratio=(member-1)/(n.members-1)
+    c2=0+0.8*ratio
+    if(member %% 2 == 1) ratio=-ratio
+    c1=0.8-0.2*ratio
+    c3=0.75-0.2*ratio
+    if(i %% n.families == 1) {col[i]=rgb(c2,c2,c2)}
+    if(i %% n.families == 2) {col[i]=rgb(c1,c1/2,c1/2)}
+    if(i %% n.families == 3) {col[i]=rgb(c1/2,0.9*c1,c1/2)}
+    if(i %% n.families == 4) {col[i]=rgb(c1/2,c1/2,c1)}
+    if(i %% n.families == 5) {col[i]=rgb(c3,c3,c3/2)}
+    if(i %% n.families == 6) {col[i]=rgb(c3,c3/2,c3)}
+    if(i %% n.families == 0) {col[i]=rgb(c3/2,c3,c3)}
+  }
+  return(col)
+}
+
+plot.my.colors <-function(n) {
+  x11()
+  col=my.colors(n)
+  plot(x=c(0,n),y=c(0,1),type="n")
+  segments(seq1(n)-1,runif(n),seq1(n),runif(n),col=col)
+}
+
+
+plot.colors <-function() {
+  x11(width=10,height=10)
+  plot(c(0,26),c(0,26),type="n")
+  c=colors()
+  n=length(c)
+  i=1:n
+  x=i%%26
+  y=floor(i/26)
+  rect(x,y,x+1,y+1,col=c[i],border=c[i])
+  text(x+0.5,y+0.5,i)
+}
+
+
+adjust.branch.sep <-function(ddr,lengths) {
+  assign.branch.sep <- function(d,i.leaf) {
+    if(is.leaf(d)) {
+      attr(d,"members")<-lengths[i.leaf]
+      i.leaf=i.leaf+1
+      output=list(d=d,i.leaf=i.leaf)
+      return(output)
+    }
+    else{
+      input=assign.branch.sep(d[[1]],i.leaf)
+      i.leaf=input$i.leaf
+      d[[1]]=input$d
+
+      input=assign.branch.sep(d[[2]],i.leaf)
+      i.leaf=input$i.leaf
+      d[[2]]=input$d
+
+      attr(d,"members")<-attr(d[[1]],"members")+attr(d[[2]],"members")
+      output=list(d=d,i.leaf=i.leaf)
+      return(output)
+    }
+  }
+  ddr<-as.dendrogram(ddr)
+  ddr=assign.branch.sep(ddr,1)$d
+  return(ddr)
+}
+t.dhcol <- function(dr,h,cols=c(1)) {
+                                        # check child heights
+  if(attr(dr[[1]],"height")<h) {
+                                        # color
+    ecol <- cols[coli];
+    coli <<- coli+1;
+    dr[[1]] <- dendrapply(dr[[1]],function(e) { attr(e,"edgePar") <- list(col=ecol); e});
+    attr(dr[[1]],"edgePar") <- list(col=ecol,p.border=NA,p.col=NA,t.col=1,t.cex=1.3);
+  } else {
+    dr[[1]] <- t.dhcol(dr[[1]],h,cols);
+  }
+
+  if(attr(dr[[2]],"height")<h) {
+                                        # color
+    ecol <- cols[coli];
+    coli <<- coli+1;
+    dr[[2]] <- dendrapply(dr[[2]],function(e) { attr(e,"edgePar") <- list(col=ecol); e});
+    attr(dr[[2]],"edgePar") <- list(col=ecol,p.border=NA,p.col=NA,t.col=1,t.cex=1.3);
+  } else {
+    dr[[2]] <- t.dhcol(dr[[2]],h,cols);
+  }
+  return(dr);
+}
+
+
+
+### The rest is PeterK's my.plot.dendogram
+
+## FIXME: need larger par("mar")[1] or [4] for longish labels !
+## {probably don't change, just print a warning ..}
+my.plot.dendrogram <-
+    function (x, type = c("rectangle", "triangle"), center = FALSE,
+          edge.root = is.leaf(x) || !is.null(attr(x, "edgetext")),
+          nodePar = NULL, edgePar = list(),
+          leaflab = c("perpendicular", "textlike", "none"), dLeaf = NULL,
+          xlab = "", ylab = "", xaxt="n", yaxt="s",
+          horiz = FALSE, frame.plot = FALSE, ...)
+{
+    type <- match.arg(type)
+    leaflab <- match.arg(leaflab)
+    hgt <- attr(x, "height")
+    if (edge.root && is.logical(edge.root))
+    edge.root <- 0.0625 * if(is.leaf(x)) 1 else hgt
+    mem.x <- .my.memberDend(x)
+    yTop <- hgt + edge.root
+    if(center) { x1 <- 0.5 ; x2 <- mem.x + 0.5 }
+    else       { x1 <- 1   ; x2 <- mem.x }
+    xlim <- c(x1 - 1/2, x2 + 1/2)
+    ylim <- c(0, yTop)
+    if (horiz) {## swap and reverse direction on `x':
+    xl <- xlim; xlim <- rev(ylim); ylim <- xl
+    tmp <- xaxt; xaxt <- yaxt; yaxt <- tmp
+    }
+    plot(0, xlim = xlim, ylim = ylim, type = "n", xlab = xlab, ylab = ylab,
+     xaxt = xaxt, yaxt = yaxt, frame.plot = frame.plot, ...)
+    if(is.null(dLeaf))
+        dLeaf <- .75*(if(horiz) strwidth("w") else strheight("x"))
+
+    if (edge.root) {
+### FIXME: the first edge + edgetext is drawn here, all others in plotNode()
+### -----  maybe use trick with adding a single parent node to the top ?
+    x0 <- my.plotNodeLimit(x1, x2, x, center)$x
+    if (horiz)
+        segments(hgt, x0, yTop, x0)
+    else segments(x0, hgt, x0, yTop)
+    if (!is.null(et <- attr(x, "edgetext"))) {
+        my <- mean(hgt, yTop)
+        if (horiz)
+        text(my, x0, et)
+        else text(x0, my, et)
+    }
+    }
+    my.plotNode(x1, x2, x, type = type, center = center, leaflab = leaflab,
+             dLeaf = dLeaf, nodePar = nodePar, edgePar = edgePar, horiz = horiz)
+}
+
+### the work horse: plot node (if pch) and lines to all children
+my.plotNode <-
+    function(x1, x2, subtree, type, center, leaflab, dLeaf,
+         nodePar, edgePar, horiz = FALSE)
+{
+    inner <- !is.leaf(subtree) && x1 != x2
+    yTop <- attr(subtree, "height")
+    bx <- my.plotNodeLimit(x1, x2, subtree, center)
+    xTop <- bx$x
+    usrpar <- par("usr");
+
+    ## handle node specific parameters in "nodePar":
+    hasP <- !is.null(nPar <- attr(subtree, "nodePar"))
+    if(!hasP) nPar <- nodePar
+
+    if(getOption("verbose")) {
+    cat(if(inner)"inner node" else "leaf", ":")
+    if(!is.null(nPar)) { cat(" with node pars\n"); str(nPar) }
+    cat(if(inner)paste(" height", formatC(yTop),"; "),
+        "(x1,x2)= (",formatC(x1,wid=4),",",formatC(x2,wid=4),")",
+        "--> xTop=", formatC(xTop, wid=8),"\n", sep="")
+    }
+
+    Xtract <- function(nam, L, default, indx)
+    rep(if(nam %in% names(L)) L[[nam]] else default,
+        length.out = indx)[indx]
+    asTxt <- function(x) # to allow 'plotmath' labels:
+    if(is.character(x) || is.expression(x) || is.null(x)) x else as.character(x)
+
+    i <- if(inner || hasP) 1 else 2 # only 1 node specific par
+
+    if(!is.null(nPar)) { ## draw this node
+    pch <- Xtract("pch", nPar, default = 1:2,    i)
+    cex <- Xtract("cex", nPar, default = c(1,1),     i)
+    col <- Xtract("col", nPar, default = par("col"), i)
+    bg <- Xtract("bg", nPar, default = par("bg"), i)
+    points(if (horiz) cbind(yTop, xTop) else cbind(xTop, yTop),
+           pch = pch, bg = bg, col = col, cex = cex)
+    }
+
+    if(leaflab == "textlike")
+        p.col <- Xtract("p.col", nPar, default = "white", i)
+    lab.col <- Xtract("lab.col", nPar, default = par("col"), i)
+    lab.cex <- Xtract("lab.cex", nPar, default = c(1,1), i)
+    lab.font <- Xtract("lab.font", nPar, default = par("font"), i)
+    if (is.leaf(subtree)) {
+    ## label leaf
+    if (leaflab == "perpendicular") { # somewhat like plot.hclust
+        if(horiz) {
+                X <- yTop + dLeaf * lab.cex
+                Y <- xTop; srt <- 0; adj <- c(0, 0.5)
+        }
+        else {
+                Y <- yTop - dLeaf * lab.cex
+                X <- xTop; srt <- 90; adj <- 1
+        }
+            nodeText <- asTxt(attr(subtree,"label"))
+        text(X, Y, nodeText, xpd = TRUE, srt = srt, adj = adj,
+                 cex = lab.cex, col = lab.col, font = lab.font)
+    }
+    }
+    else if (inner) {
+    segmentsHV <- function(x0, y0, x1, y1) {
+        if (horiz)
+        segments(y0, x0, y1, x1, col = col, lty = lty, lwd = lwd)
+        else segments(x0, y0, x1, y1, col = col, lty = lty, lwd = lwd)
+    }
+    for (k in 1:length(subtree)) {
+        child <- subtree[[k]]
+        ## draw lines to the children and draw them recursively
+        yBot <- attr(child, "height")
+        if (getOption("verbose")) cat("ch.", k, "@ h=", yBot, "; ")
+        if (is.null(yBot))
+        yBot <- 0
+        xBot <-
+        if (center) mean(bx$limit[k:(k + 1)])
+        else bx$limit[k] + .my.midDend(child)
+
+        hasE <- !is.null(ePar <- attr(child, "edgePar"))
+        if (!hasE)
+        ePar <- edgePar
+        i <- if (!is.leaf(child) || hasE) 1 else 2
+        ## define line attributes for segmentsHV():
+        col <- Xtract("col", ePar, default = par("col"), i)
+        lty <- Xtract("lty", ePar, default = par("lty"), i)
+        lwd <- Xtract("lwd", ePar, default = par("lwd"), i)
+        if (type == "triangle") {
+        segmentsHV(xTop, yTop, xBot, yBot)
+        }
+        else { # rectangle
+        segmentsHV(xTop,yTop, xBot,yTop)# h
+        segmentsHV(xBot,yTop, xBot,yBot)# v
+        }
+        vln <- NULL
+        if (is.leaf(child) && leaflab == "textlike") {
+        nodeText <- asTxt(attr(child,"label"))
+        if(getOption("verbose"))
+            cat('-- with "label"',format(nodeText))
+        hln <- 0.6 * strwidth(nodeText, cex = lab.cex)/2
+        vln <- 1.5 * strheight(nodeText, cex = lab.cex)/2
+        rect(xBot - hln, yBot,
+             xBot + hln, yBot + 2 * vln, col = p.col)
+        text(xBot, yBot + vln, nodeText, xpd = TRUE,
+                     cex = lab.cex, col = lab.col, font = lab.font)
+        }
+        if (!is.null(attr(child, "edgetext"))) {
+        edgeText <- asTxt(attr(child, "edgetext"))
+        if(getOption("verbose"))
+            cat('-- with "edgetext"',format(edgeText))
+        if (!is.null(vln)) {
+            mx <-
+            if(type == "triangle")
+                (xTop+ xBot+ ((xTop - xBot)/(yTop - yBot)) * vln)/2
+            else xBot
+            my <- (yTop + yBot + 2 * vln)/2
+        }
+        else {
+            mx <- if(type == "triangle") (xTop + xBot)/2 else xBot
+            my <- (yTop + yBot)/2
+        }
+        ## Both for "triangle" and "rectangle" : Diamond + Text
+
+                p.col <- Xtract("p.col", ePar, default = "white", i)
+                p.border <- Xtract("p.border", ePar, default = par("fg"), i)
+                ## edge label pars: defaults from the segments pars
+                p.lwd <- Xtract("p.lwd", ePar, default = lwd, i)
+                p.lty <- Xtract("p.lty", ePar, default = lty, i)
+                t.col <- Xtract("t.col", ePar, default = col, i)
+                t.cex <- Xtract("t.cex", ePar, default =  1,  i)
+                t.font<- Xtract("t.font",ePar, default= par("font"), i)
+                t.shift <- Xtract("t.shift", ePar, default =  0.01,  i)
+
+        vlm <- strheight(c(edgeText,"h"), cex = t.cex)/2
+        hlm <- strwidth (c(edgeText,"m"), cex = t.cex)/2
+        hl3 <- c(hlm[1], hlm[1] + hlm[2], hlm[1])
+                #polygon(mx+ c(-hl3, hl3), my + sum(vlm)*c(-1:1,1:-1),
+                #        col = p.col, border= p.border, lty = p.lty, lwd = p.lwd)
+        #text(mx, my, edgeText, cex = t.cex, col = t.col, font = t.font)
+                if(horiz) {
+                  text(my, mx+t.shift*abs(usrpar[3]-usrpar[4]), edgeText, cex = t.cex, col = t.col, font = t.font)
+                } else {
+                  text(mx+t.shift*abs(usrpar[2]-usrpar[1]), my, edgeText, cex = t.cex, col = t.col, font = t.font)
+                }
+        }
+        my.plotNode(bx$limit[k], bx$limit[k + 1], subtree = child,
+             type, center, leaflab, dLeaf, nodePar, edgePar, horiz)
+    }
+    }
+}
+
+my.plotNodeLimit <- function(x1, x2, subtree, center)
+{
+    ## get the left borders limit[k] of all children k=1..K, and
+    ## the handle point `x' for the edge connecting to the parent.
+    inner <- !is.leaf(subtree) && x1 != x2
+    if(inner) {
+    K <- length(subtree)
+    mTop <- .my.memberDend(subtree)
+    limit <- integer(K)
+    xx1 <- x1
+    for(k in 1:K) {
+        m <- .my.memberDend(subtree[[k]])
+        ##if(is.null(m)) m <- 1
+        xx1 <- xx1 + (if(center) (x2-x1) * m/mTop else m)
+        limit[k] <- xx1
+    }
+    limit <- c(x1, limit)
+    } else { ## leaf
+    limit <- c(x1, x2)
+    }
+    mid <- attr(subtree, "midpoint")
+    center <- center || (inner && !is.numeric(mid))
+    x <- if(center) mean(c(x1,x2)) else x1 + (if(inner) mid else 0)
+    list(x = x, limit = limit)
+}
+
+.my.memberDend <- function(x) {
+    r <- attr(x,"x.member")
+    if(is.null(r)) {
+        r <- attr(x,"members")
+        if(is.null(r)) r <- 1:1
+    }
+    r
+}
+
+.my.midDend <- function(x)
+    if(is.null(mp <- attr(x, "midpoint"))) 0 else mp
+
+
+## original Andy Liaw; modified RG, MM :
+my.heatmap <- function (x, Rowv=NULL, Colv=if(symm)"Rowv" else NULL,
+          distfun = dist, hclustfun = hclust,
+          reorderfun = function(d,w) reorder(d,w),
+          add.expr, symm = FALSE, revC = identical(Colv, "Rowv"),
+          scale = c("row", "column", "none"), na.rm=TRUE,
+          margins = c(5, 5), ColSideColors, RowSideColors,
+          cexRow = 0.2 + 1/log10(nr), cexCol = 0.2 + 1/log10(nc),
+          labRow = NULL, labCol = NULL, main = NULL, xlab = NULL, ylab = NULL,
+          keep.dendro = FALSE,
+          verbose = getOption("verbose"), imageSize=4, imageVSize=imageSize,imageHSize=imageSize,lasCol=2, lasRow=2, respect=F, ...)
+{
+    scale <- if(symm && missing(scale)) "none" else match.arg(scale)
+    if(length(di <- dim(x)) != 2 || !is.numeric(x))
+        stop("'x' must be a numeric matrix")
+    nr <- di[1]
+    nc <- di[2]
+    if(nr <= 1 || nc <= 1)
+        stop("'x' must have at least 2 rows and 2 columns")
+    if(!is.numeric(margins) || length(margins) != 2)
+        stop("'margins' must be a numeric vector of length 2")
+
+    doRdend <- !identical(Rowv,NA)
+    doCdend <- !identical(Colv,NA)
+    ## by default order by row/col means
+    if(is.null(Rowv)) Rowv <- rowMeans(x, na.rm = na.rm)
+    if(is.null(Colv)) Colv <- colMeans(x, na.rm = na.rm)
+
+    ## get the dendrograms and reordering indices
+
+    if(doRdend) {
+        if(inherits(Rowv, "dendrogram"))
+            ddr <- Rowv
+        else {
+            hcr <- hclustfun(distfun(x))
+            ddr <- as.dendrogram(hcr)
+            if(!is.logical(Rowv) || Rowv)
+                ddr <- reorderfun(ddr, Rowv)
+        }
+        if(nr != length(rowInd <- order.dendrogram(ddr)))
+            stop("row dendrogram ordering gave index of wrong length")
+    }
+    else rowInd <- 1:nr
+
+    if(doCdend) {
+        if(inherits(Colv, "dendrogram"))
+            ddc <- Colv
+        else if(identical(Colv, "Rowv")) {
+            if(nr != nc)
+                stop('Colv = "Rowv" but nrow(x) != ncol(x)')
+            ddc <- ddr
+        }
+        else {
+            hcc <- hclustfun(distfun(if(symm)x else t(x)))
+            ddc <- as.dendrogram(hcc)
+            if(!is.logical(Colv) || Colv)
+                ddc <- reorderfun(ddc, Colv)
+        }
+        if(nc != length(colInd <- order.dendrogram(ddc)))
+            stop("column dendrogram ordering gave index of wrong length")
+    }
+    else colInd <- 1:nc
+
+    ## reorder x
+    x <- x[rowInd, colInd]
+
+    labRow <-
+        if(is.null(labRow))
+            if(is.null(rownames(x))) (1:nr)[rowInd] else rownames(x)
+        else labRow[rowInd]
+    labCol <-
+        if(is.null(labCol))
+            if(is.null(colnames(x))) (1:nc)[colInd] else colnames(x)
+        else labCol[colInd]
+
+    if(scale == "row") {
+        x <- sweep(x, 1, rowMeans(x, na.rm = na.rm))
+        sx <- apply(x, 1, sd, na.rm = na.rm)
+        x <- sweep(x, 1, sx, "/")
+    }
+    else if(scale == "column") {
+        x <- sweep(x, 2, colMeans(x, na.rm = na.rm))
+        sx <- apply(x, 2, sd, na.rm = na.rm)
+        x <- sweep(x, 2, sx, "/")
+    }
+
+    ## Calculate the plot layout
+    lmat <- rbind(c(NA, 3), 2:1)
+    lwid <- c(if(doRdend) 1 else 0.05, imageHSize)
+    lhei <- c((if(doCdend) 1 else 0.05) + if(!is.null(main)) 0.2 else 0, imageVSize)
+    if(!missing(ColSideColors)) { ## add middle row to layout
+        if(!is.character(ColSideColors) || length(ColSideColors) != nc)
+            stop("'ColSideColors' must be a character vector of length ncol(x)")
+        lmat <- rbind(lmat[1,]+1, c(NA,1), lmat[2,]+1)
+        lhei <- c(lhei[1], 0.2, lhei[2])
+    }
+    if(!missing(RowSideColors)) { ## add middle column to layout
+        if(!is.character(RowSideColors) || length(RowSideColors) != nr)
+            stop("'RowSideColors' must be a character vector of length nrow(x)")
+        lmat <- cbind(lmat[,1]+1, c(rep(NA, nrow(lmat)-1), 1), lmat[,2]+1)
+        lwid <- c(lwid[1], 0.2, lwid[2])
+    }
+    lmat[is.na(lmat)] <- 0
+    if(verbose) {
+        cat("layout: widths = ", lwid, ", heights = ", lhei,"; lmat=\n")
+        print(lmat)
+    }
+
+    ## Graphics `output' -----------------------
+
+    op <- par(no.readonly = TRUE)
+    on.exit(par(op))
+    layout(lmat, widths = lwid, heights = lhei, respect = respect)
+    ## draw the side bars
+    if(!missing(RowSideColors)) {
+        par(mar = c(margins[1],0, 0,0.5))
+        image(rbind(1:nr), col = RowSideColors[rowInd], axes = FALSE)
+    }
+    if(!missing(ColSideColors)) {
+        par(mar = c(0.5,0, 0,margins[2]))
+        image(cbind(1:nc), col = ColSideColors[colInd], axes = FALSE)
+    }
+    ## draw the main carpet
+    par(mar = c(margins[1], 0, 0, margins[2]))
+    if(!symm || scale != "none")
+        x <- t(x)
+    if(revC) { # x columns reversed
+        iy <- nr:1
+        ddr <- rev(ddr)
+        x <- x[,iy]
+    } else iy <- 1:nr
+
+    image(1:nc, 1:nr, x, xlim = 0.5+ c(0, nc), ylim = 0.5+ c(0, nr),
+          axes = FALSE, xlab = "", ylab = "", ...)
+    axis(1, 1:nc, labels= labCol, las= lasCol, line= -0.5, tick= 0, cex.axis= cexCol)
+    if(!is.null(xlab)) mtext(xlab, side = 1, line = margins[1] - 1.25)
+    axis(4, iy, labels= labRow, las= lasRow, line= -0.5, tick= 0, cex.axis= cexRow)
+    if(!is.null(ylab)) mtext(ylab, side = 4, line = margins[2] - 1.25,las=lasRow)
+    if (!missing(add.expr))
+        eval(substitute(add.expr))
+
+    ## the two dendrograms :
+    par(mar = c(margins[1], 0, 0, 0))
+    if(doRdend)
+        my.plot.dendrogram(ddr, horiz = TRUE, axes = FALSE, yaxs = "i", leaflab = "none")
+    else frame()
+
+    par(mar = c(0, 0, if(!is.null(main)) 1 else 0, margins[2]))
+    if(doCdend)
+        my.plot.dendrogram(ddc,               axes = FALSE, xaxs = "i", leaflab = "none")
+    else if(!is.null(main)) frame()
+
+    ## title
+    if(!is.null(main)) title(main, cex.main = 1.5*op[["cex.main"]])
+
+    invisible(list(rowInd = rowInd, colInd = colInd,
+                   Rowv = if(keep.dendro && doRdend) ddr,
+                   Colv = if(keep.dendro && doCdend) ddc ))
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/region_motif_lib/regions.cpp	Wed Aug 06 15:36:46 2014 -0400
@@ -0,0 +1,123 @@
+#include <iostream>
+#include <vector>
+#include <algorithm>
+using namespace std;
+
+extern "C"  {
+   typedef pair <int,int> Se_t;
+   bool se_lt (const Se_t &l,const Se_t &r) { return l.first < r.first; }
+
+   void merge_regions(int *regions, int*nregionsR,int *merge_sepR) {
+      int nregs=nregionsR[0];
+      if(nregs==0) return;
+      int sep=merge_sepR[0];
+      if(sep<1) sep=1;
+      vector<Se_t> reg(nregs);
+      for(int ireg=0;ireg<nregs;ireg++) {
+	 reg[ireg]=make_pair(regions[ireg*2],regions[ireg*2+1]);
+      }
+      sort(reg.begin(),reg.end(),se_lt);
+      int *reg_index = new int[nregs];
+      for(int ireg=1;ireg<nregs;ireg++) reg_index[ireg]=-1;
+      reg_index[0]=0;
+      int last_ireg=0;
+      int counter=1;
+      for(int ireg=1;ireg<nregs;ireg++) {
+	 if(reg[ireg].first<=reg[last_ireg].second+sep) {
+	    if(reg[ireg].second>reg[last_ireg].second) reg[last_ireg].second=reg[ireg].second;
+	 } else {
+	    last_ireg=ireg;
+	    reg_index[counter]=ireg;
+	    counter++;
+	 }
+      }
+      for(int ireg=0;ireg<counter;ireg++) {
+	 regions[ireg*2] = reg[reg_index[ireg]].first;
+	 regions[ireg*2+1] = reg[reg_index[ireg]].second;
+      }
+      nregionsR[0]=counter;
+      delete [] reg_index;
+   }
+   void region_minus_region(int *regions, int*nregionsR,int *region2s, int*nregion2sR,int *updatedregions) {
+      int sep=1;
+      merge_regions(regions,nregionsR,&sep);
+      merge_regions(region2s,nregion2sR,&sep);
+      int nregs=nregionsR[0];
+      int nreg2s=nregion2sR[0];
+      for(int i=0;i<2*(nregs+nreg2s);i++) updatedregions[i]=-1;
+      if(nregs==0) return;
+      int ireg = 0;
+      int iregout = 0;
+      for(int ireg2=0; ireg2<nreg2s;ireg2++) {
+	 if(ireg==nregs) break;
+	 if(region2s[ireg2*2+1] < regions[2*ireg]) continue;
+	 if(region2s[ireg2*2] > regions[2*ireg+1]) {
+	    updatedregions[2*iregout] = regions[2*ireg];
+	    updatedregions[2*iregout+1] = regions[2*ireg+1];
+	    ireg++;
+	    ireg2--;
+	    iregout++;
+	    continue;
+	 }
+	 int s = regions[ireg*2];
+	 int e = regions[ireg*2+1];
+	 int s2 = region2s[ireg2*2];
+	 int e2 = region2s[ireg2*2+1];
+	 if(s2<=s && e2>=e) {
+	    ireg++;
+	    ireg2--;
+	 }
+	 else if(s2<=s) {
+	    regions[ireg*2] = e2+1;
+	    continue;
+	 } else if(e2>=e) {
+	    updatedregions[2*iregout] = s;
+	    updatedregions[2*iregout+1] = s2-1;
+	    ireg2--;
+	    iregout++;
+	    ireg++;
+	 } else {
+	    updatedregions[2*iregout] = s;
+	    updatedregions[2*iregout+1] = s2-1;
+	    regions[ireg*2] = e2+1;
+	    iregout++;
+	    ireg2--;
+	 }
+      }
+      while(ireg<nregs) {
+	 updatedregions[2*iregout] = regions[2*ireg];
+	 updatedregions[2*iregout+1] = regions[2*ireg+1];
+	 ireg++;
+	 iregout++;
+      }
+   }
+   void intersection_of_regions(int *regions, int*nregionsR,int *region2s, int*nregion2sR,int *updatedregions) {
+      int sep=1;
+      merge_regions(regions,nregionsR,&sep);
+      merge_regions(region2s,nregion2sR,&sep);
+      int nregs=nregionsR[0];
+      int nreg2s=nregion2sR[0];
+      for(int i=0;i<2*(nregs+nreg2s);i++) updatedregions[i]=-1;
+      if(nregs==0) return;
+      if(nreg2s==0) return;
+      int ireg2 = 0;
+      int iregout = 0;
+      for(int ireg=0; ireg<nregs;ireg++) {
+	 if(ireg2==nreg2s) return;
+	 if(regions[ireg*2+1] < region2s[2*ireg2]) continue;
+	 if(regions[ireg*2] > region2s[2*ireg2+1]) {ireg2++; ireg--; continue;}
+
+	 int s = regions[ireg*2];
+	 if(s<region2s[ireg2*2]) s = region2s[ireg2*2];
+	 int e = regions[ireg*2+1];
+	 if(e>region2s[ireg2*2+1]) {
+	    e = region2s[ireg2*2+1];
+	    ireg2++;
+	    ireg--;
+	 }
+	 updatedregions[2*iregout] = s;
+	 updatedregions[2*iregout+1] = e;
+	 iregout++;
+      }
+   }
+}
Binary file region_motif_lib/regions.o has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/region_motif_lib/regions.r	Wed Aug 06 15:36:46 2014 -0400
@@ -0,0 +1,313 @@
+
+# SHOULD ONLY OCCUR IN ONE FILE
+#common.dir = "/Users/jeremyliu1/galaxy-dist/tools/my_tools"
+
+# commonDir from region_motif_intersect.r sourcing file
+dyn.load(paste(commonDir, "/region_motif_lib/regions.so",sep=""))
+
+##reg = matrix(cbind(from,to)) from<to
+##region[[chr]] = reg
+##pos = unique(integer())
+##poslist = list(chr,pos, optional(strand=c(-1,0,+1)))
+
+# USED
+merge.reg <- function(...,sep=1) {
+  ##This function returns union of regs.
+  reg = rbind(...)
+  x=.C("merge_regions",as.integer(t(reg)),as.integer(nrow(reg)),as.integer(sep))
+  reg=matrix(x[[1]][1:(x[[2]]*2)],ncol=2,byrow=TRUE)
+  reg = matrix(reg[which(reg[,2]>reg[,1]),],ncol=2)
+  reg[which(reg==0)]=1
+  return(reg)
+}
+
+merge.regions<-function(...,sep=1) {
+  ##This function returns union of regions.
+    regions=list(...)
+    chrs = unique(unlist(lapply(regions,names),use.names=F))
+    region = list()
+    for(chr in chrs) {
+        region[[chr]] = do.call("merge.reg",c(lapply(regions,function(i) i[[chr]]),sep=sep))
+  }
+  return(region)
+}
+
+plot.reg<-function(reg,xlim=NULL,y=NULL,vertical=FALSE,...) {
+  ##This function does not stack if reg is overlapping.
+  ##new plot is made unless y is specified.
+  if(nrow(reg)==0) return()
+  if(is.null(xlim)) xlim=range(reg)
+  if(is.null(y)) {
+    plot(xlim,c(0,1),type="n",axes=FALSE,xlab=" ",ylab=" ")
+    y=0.5
+  }
+  segments(reg[,1],y,reg[,2],...)
+  if(vertical) abline(v=reg)
+}
+
+distance.to.closest.reg.of.reg <- function(reg,reg2) {
+  ##for each element of reg, what is the closest distance to any element of reg2?
+  reg2 = merge.reg(reg2)
+  reg2 = c(-Inf,t(reg2),Inf)
+  s=reg[,1]
+  e=reg[,2]
+  sbin = as.integer(cut(s,reg2))
+  ebin = as.integer(cut(e,reg2))
+  d = pmin(s-reg2[sbin], reg2[sbin+1]-s, e-reg2[ebin], reg2[ebin+1]-e)
+  d[which(sbin!=ebin | sbin%%2==0)] = 0
+  return(d)
+}
+
+# USED
+distance.to.closest.reg.of.pos <- function(pos,reg) {
+  ##for each element of pos, what is the closest distance to any element of reg?
+  reg = merge.reg(reg)
+  reg = c(-Inf,t(reg),Inf)
+  pbin = as.integer(cut(pos,reg))
+  d = pmin(pos-reg[pbin], reg[pbin+1]-pos)
+  d[which(pbin%%2==0)] = 0
+  return(d)
+}
+
+distance.to.closest.pos.of.reg <- function(reg,pos,pos.strand=NULL,index.return=FALSE) {
+  ##for each element of reg, what is the closest distance to any element of pos?
+  ##if strand is provided, distance is along strand
+  o = order(pos)
+  pos =  c(-Inf,pos[o],Inf)
+  o = c(o[1],o,o[length(o)])
+
+  s=reg[,1]
+  e=reg[,2]
+  sbin = as.integer(cut(s,pos))
+  ebin = as.integer(cut(e,pos))
+
+  d=integer(nrow(reg))
+  s.is.closer = s-pos[sbin] < pos[sbin+1]-e
+  if(index.return) {
+    return(ifelse(s.is.closer,o[sbin],o[sbin+1]))
+  }
+  d = ifelse(s.is.closer, s-pos[sbin], e-pos[sbin+1])
+  d[which(sbin!=ebin)] = 0
+  if(!is.null(pos.strand)) {
+    reg.strand = ifelse(s.is.closer,pos.strand[o][sbin],pos.strand[o][sbin+1])
+    d = d * reg.strand
+  }
+  return(d)
+}
+
+if(F) {
+    pos = sample(seq(0,1000,200))
+    pos2 = sample(seq(10,1010,100))
+    pos.strand = sample(c(1,-1),6,replace = T)
+    pos2.strand = sample(c(1,-1),11,replace = T)
+}
+
+distance.to.closest.pos.of.pos <- function(pos,pos2,pos.strand=NULL,pos2.strand=NULL, ignore.pos.strand=TRUE,index.return=FALSE) {
+  ##for each element of pos, what is the closest distance to any element of pos2?
+  ##if index.return==TRUE, index of pos2 closest to pos is returned
+  ##else if strand2 is provided, distance is along strand2
+  ##if strand and strand2 are both provided and !ignore.pos.strand
+  ##  then output is a list giving plus.up, plus.down, minus.up, minus.down
+  ##    plus.up: distance to closest upstream on the same same strand etc. etc.
+  o = order(pos2)
+  pos2 =  c(-Inf,pos2[o],Inf)
+  if(!is.null(pos2.strand))   pos2.strand = c(-Inf,pos2.strand[o],Inf)
+
+  if(is.null(pos2.strand) | is.null(pos.strand) | ignore.pos.strand) {
+    pbin = as.integer(cut(pos,pos2))
+
+    pbin = ifelse(pos-pos2[pbin] < pos2[pbin+1]-pos,pbin,pbin+1)
+    d = pos-pos2[pbin]
+    if(!is.null(pos2.strand)) d = d * pos2.strand[pbin]
+
+    if(index.return) return(o[pbin-1])
+    return(d)
+  }
+  strands = list(plus=1,minus=-1)
+  relcoords = list(up=0,down=1)
+  ind = lapply(strands,function(strand) {
+    ind.p = c(1,which(pos2.strand==strand),length(pos2))
+    pbin.p = cut(pos,pos2[ind.p],labels=FALSE)
+    as.data.frame(lapply(relcoords,function(i) ind.p[pbin.p+i]))
+  })
+  ind.temp = ind
+  ind.minus = which(pos.strand==-1)
+  if(length(ind.minus)>0) {
+      ind[[1]][ind.minus,]=ind.temp[[2]][ind.minus,2:1]
+      ind[[2]][ind.minus,]=ind.temp[[1]][ind.minus,2:1]
+  }
+  ind = unlist(ind,recursive=FALSE)
+  if(index.return) {
+    return( lapply(ind,function(i) {
+      i[which(i==1)]=NA
+      i[which(i==length(pos2))]=NA
+      o[i-1]
+    }) )
+  }
+  return(lapply(ind,function(i) pos.strand*(pos2[i]-pos)))
+}
+
+distance.to.closest.region.of.region <- function(region,region2) {
+  ##for each element of region[[chr]], what is the closest distance to any element of region2[[chr]]?
+  ##returns d[[chr]]
+  chrs = names(region)
+  d=list()
+  for(chr in chrs) {
+    if(is.null(region2[[chr]])) {
+      d[[chr]] = rep(Inf,nrow(region[[chr]]))
+    } else {
+      d[[chr]] = distance.to.closest.reg.of.reg(region[[chr]],region2[[chr]])
+    }
+  }
+  return(d)
+}
+
+# USED
+distance.to.closest.region.of.poslist <- function(poslist,region) {
+  ##for each element of poslist, what is the closest distance to any element of region?
+  chrs = names(table(poslist$chr))
+  d=integer()
+  for(chr in chrs) {
+    ind = which(poslist$chr==chr)
+    pos=poslist$pos[ind]
+    if(is.null(region[[chr]])) {
+      d[ind] = Inf
+    } else {
+      d[ind] = distance.to.closest.reg.of.pos(pos,region[[chr]])
+    }
+  }
+  return(d)
+}
+distance.to.closest.poslist.of.region <- function(region,poslist,index.return=FALSE) {
+  ##for each element of region, what is the closest distance to any element of poslist?
+  chrs = names(region)
+  d=list()
+  for(chr in chrs) {
+    ind = which(poslist$chr==chr)
+    pos=poslist$pos[ind]
+    pos.strand=poslist$strand[ind]
+    d[[chr]] = distance.to.closest.pos.of.reg(region[[chr]],pos,pos.strand,index.return=index.return)
+    if(index.return) d[[chr]] = ind[d[[chr]]]
+  }
+  return(d)
+}
+
+distance.to.closest.poslist.of.poslist <- function(poslist,poslist2,ignore.poslist.strand=TRUE,index.return=FALSE) {
+  ##for each element of poslist, what is the closest distance to any element of poslist2?
+  ##if poslist2$strand is provided, distance is along strand2
+  ##if strand and strand2 are provided and no ignore.poslist.strand
+  ##  then output is a list giving plus.up, plus.down, minus.up, minus.down
+  ##    plus.up: distance to closest upstream on the same same strand etc. etc.
+  ##if index.return==TRUE, index of pos2 closest to pos is returned
+
+  chrs = names(table(poslist$chr))
+
+  d=integer()
+  stranded = !(is.null(poslist2$strand) | is.null(poslist$strand) | ignore.poslist.strand)
+  if(stranded) {
+    brs = c("plus.up","plus.down","minus.up","minus.down")
+    d=list()
+    for(br in brs) d[[br]]=integer()
+  }
+
+  for(chr in chrs) {
+    ind = which(poslist$chr==chr)
+    ind2 = which(poslist2$chr==chr)
+    pos=poslist$pos[ind]
+    pos2=poslist2$pos[ind2]
+    pos.strand=poslist$strand[ind]
+    pos2.strand=poslist2$strand[ind2]
+    if(!stranded) {
+      d[ind] = distance.to.closest.pos.of.pos(pos,pos2,pos.strand,pos2.strand,ignore.poslist.strand,index.return=index.return)
+      if(index.return) d[ind] = ind2[d[ind]]
+    } else {
+      x =  distance.to.closest.pos.of.pos(pos,pos2,pos.strand,pos2.strand,ignore.poslist.strand)
+      for(br in brs) {
+        d[[br]][ind] = x[[br]]
+        if(index.return) d[[br]][ind] = ind2[d[[br]][ind]]
+      }
+    }
+  }
+  return(d)
+}
+
+
+reg.minus.reg <- function(reg,reg2) {
+  x = .C("region_minus_region",as.integer(t(reg)),as.integer(nrow(reg)),as.integer(t(reg2)),as.integer(nrow(reg2)),integer((nrow(reg)+nrow(reg2))*2))[[5]]
+  x=x[which(x>=0)]
+  return(matrix(x,ncol=2,byrow=TRUE))
+}
+
+intersection.of.regs <- function(reg,reg2) {
+  x = .C("intersection_of_regions",as.integer(t(reg)),as.integer(nrow(reg)),as.integer(t(reg2)),as.integer(nrow(reg2)),integer((nrow(reg)+nrow(reg2))*2))[[5]]
+  x=x[which(x>=0)]
+  return(matrix(x,ncol=2,byrow=TRUE))
+}
+
+region.minus.region<-function(region,region2) {
+  chrs = names(region)
+  for(chr in chrs) {
+    if(is.null(region[[chr]])) next
+    if(!is.null(region2[[chr]])) {
+      region[[chr]] = reg.minus.reg(region[[chr]],region2[[chr]])
+    }
+  }
+  return(region)
+}
+
+intersection.of.regions<-function(region,region2) {
+  chrs = names(region)
+  for(chr in chrs) {
+    if(is.null(region2[[chr]])) {
+      region[[chr]]<-NULL
+    } else {
+      region[[chr]] = intersection.of.regs(region[[chr]],region2[[chr]])
+    }
+  }
+  return(region)
+}
+
+reg.around.pos <-function(pos,range=500,strand=NULL) {
+  if(length(range)==1) range=c(range,range)
+  if(is.null(strand)) strand = 1;
+  reg = cbind(pos-range[1]*strand,pos+range[2]*strand);
+  ind = which(reg[,2]<reg[,1])
+  reg[ind,] =  reg[ind,2:1]
+  ind = which(reg<=0)
+  reg[ind] = 1
+  return(reg)
+}
+
+
+region.around.poslist <-function(poslist,range=500) {
+  chrs = names(table(poslist$chr))
+  region=list()
+  for(chr in chrs) {
+    ind = which(poslist$chr==chr)
+    pos=poslist$pos[ind]
+    strand = 1
+    if(!is.null(poslist$strand)) {
+      strand = poslist$strand[ind]
+    }
+    region[[chr]] =  reg.around.pos(pos,range,strand)
+  }
+  return(region)
+}
+
+
+poslist.of.region.centers <-function(region) {
+  chrs = names(region)
+  n=sapply(region,nrow)
+  return(data.frame(chr=rep(chrs,n),pos=unlist(lapply(region,function(chr)(chr[,1]+chr[,2])/2),use.names = FALSE)))
+}
+
+write.gff.region<-function(region,outfname) {
+  region = lapply(region,function(chr) list(s=chr[,1],e=chr[,2]))
+  out=unlist.chr(region)
+  out$chr=rep(names(region),sapply(region,function(i) length(i$s)))
+  empty=rep(".",length(out$chr))
+  write.table(data.frame(out$chr,empty,empty,out$s,out$e,empty,empty,empty,empty),quote=FALSE,sep="\t",file=outfname,col.names=FALSE,row.names=FALSE)
+}
+
+number.of.regions = function(region)sum(sapply(region,nrow))
+size.of.regions = function(region) sum(sapply(merge.regions(region),function(reg) sum(reg[,2]-reg[,1])))
Binary file region_motif_lib/regions.so has changed