changeset 8:a58527c632b7 draft

Uploaded
author peter-waltman
date Mon, 11 Mar 2013 16:31:29 -0400
parents 2efa1a284546
children a3c03541fe6f
files cluster.tools/cluster.tab.2.rdata.R cluster.tools/cluster.tab.2.rdata.py cluster.tools/cluster.tab.2.rdata.xml cluster.tools/consensus.clustering.xml cluster.tools/format.raw.TCGA.RNASeq.data.xml cluster.tools/hclust.R cluster.tools/hclust.xml cluster.tools/heatmap.from.cluster.result.xml cluster.tools/new.ccplus.R cluster.tools/normalize.matrix.by.other.R cluster.tools/normalize.matrix.by.other.py cluster.tools/normalize.matrix.xml cluster.tools/partition.R cluster.tools/partition.xml cluster.tools/rdata.2.out.R cluster.tools/rnaseq.feature.selection.xml cluster.tools/select.k.from.consensus.cluster.R cluster.tools/tab.2.cdt.xml
diffstat 16 files changed, 187 insertions(+), 802 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/cluster.tools/cluster.tab.2.rdata.R	Mon Mar 11 16:31:29 2013 -0400
@@ -0,0 +1,35 @@
+#!/usr/bin/env Rscript
+argspec <- c("tab.2.cdt.R converts a data matrix to cdt format
+
+        Usage: 
+                tab.2.cdt.R -d <data.file> 
+        Optional:
+                            -o <output_file>
+                \n\n")
+args <- commandArgs(TRUE)
+if ( length( args ) == 1 && args =="--help") { 
+  write(argspec, stderr())
+  q();
+}
+
+
+lib.load.quiet <- function( package ) {
+   package <- as.character(substitute(package))
+   suppressPackageStartupMessages( do.call( "library", list( package=package ) ) )
+}
+lib.load.quiet(getopt)
+
+
+spec <- matrix( c( "data.fname",      "d", 1, "character",
+                   "output.fname",    "o", 2, "character"
+                   ),
+                nc=4,
+                byrow=TRUE
+               )
+
+opt <- getopt( spec=spec )
+if ( is.null( opt$output.fname ) ) opt$output.fname <- sub( "tab$|csv$", "cdt", opt$data.fname )
+
+cl <- as.matrix( read.delim( opt$data.fname, row.names=1, check.names=FALSE ) )
+cl <- cl[,1]
+save( file=opt$output.fname, cl )
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/cluster.tools/cluster.tab.2.rdata.py	Mon Mar 11 16:31:29 2013 -0400
@@ -0,0 +1,14 @@
+#!/usr/bin/env python
+import os
+import sys
+import subprocess
+
+select_script_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), "cluster.tab.2.rdata.R")
+
+cmd_args = [ "Rscript", select_script_path ] + sys.argv[1:] 
+
+proc = subprocess.Popen( cmd_args, stderr=subprocess.PIPE, stdout=subprocess.PIPE )
+(stdoutdata, stderrdata) = proc.communicate()
+if proc.returncode:
+	sys.stderr.write(stderrdata)
+sys.stdout.write(stdoutdata)
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/cluster.tools/cluster.tab.2.rdata.xml	Mon Mar 11 16:31:29 2013 -0400
@@ -0,0 +1,27 @@
+<tool id="cluster_tab_2_rdata" name="Convert tab-delimitted Cluster Assignments to RData" force_history_refresh="True">
+    <command interpreter="python">cluster.tab.2.rdata.py
+-d $dataset 
+-o ${output}
+
+</command>
+    <inputs>
+    	<param name="dataset" type="data" label="Tab-delimited List of Cluster Assignments"/>
+    </inputs>
+    <outputs>
+        <data format="rdata" name="output" label="Cluster RData File"/>
+    </outputs>
+<help>
+.. class:: infomark
+     
+**Convert tab-delimitted cluster assignments to RData file** - Tool to convert a cluster assignment in tab-delimited format into an RData file format that can be read by the other tools in the Cluster-Tools Suite, e.g. The Kaplan-Meier Survival Plotting Tools
+
+**OUTPUT:**  A new RData file, with the cluster assignments stored as a named numeric vector
+
+----
+
+**Parameters**
+
+- **Cluster Assignment Matrix in tab-delimited format** Tab-delimited file AND MUST have a header line!
+
+</help>
+</tool>
--- a/cluster.tools/consensus.clustering.xml	Mon Mar 04 04:11:28 2013 -0500
+++ b/cluster.tools/consensus.clustering.xml	Mon Mar 11 16:31:29 2013 -0400
@@ -13,7 +13,13 @@
 -k ${kmax} 
 -r ${reps} 
 -f ${finalLinkage}
--o ${output} 
+#if str($direction) == "rows":
+-o ${rdata_output_rows}
+#end if
+
+#if str($direction) == "cols":
+-o ${rdata_output_cols}
+#end if
 -h $report 
 -p ${report.files_path}
 
@@ -89,7 +95,12 @@
     </inputs>
     <outputs>
       <data format="html" name="report" label="Consensus Clustering Report (HTML)"/>
-      <data format="rdata" name="output" label="Consensus Clustering Data (RData)"/>
+      <data format="rdata" name="rdata_output_rows" label="Consensus Clustering Results; Gene Clusters (RData)">
+        <filter>(direction)=="rows"</filter>
+      </data>
+      <data format="rdata" name="rdata_output_cols" label="Consensus Clustering Results; Sample Clusters (RData)">
+        <filter>(direction)=="cols"</filter>
+      </data>
     </outputs>
 <help>
 .. class:: infomark
--- a/cluster.tools/format.raw.TCGA.RNASeq.data.xml	Mon Mar 04 04:11:28 2013 -0500
+++ b/cluster.tools/format.raw.TCGA.RNASeq.data.xml	Mon Mar 11 16:31:29 2013 -0400
@@ -26,7 +26,7 @@
      
 **Format Raw TCGA RNASeq Data** - Tool to convert a raw RNASeq TCGA data file (a file from either Synapse or Firehose) into a the format expected by the Survival Analysis tools
 
-**Log-transform data?** -Specify whether or not to log-transform the data matrix.  To avoid numeric underflow, this will use log(x+1), where x is the value of the RNASeq data
+**Log-transform data?** - Specify whether or not to log-transform the data matrix.  To avoid numeric underflow, this will use log(x+1), where x is the value of the RNASeq data
 
 **OUTPUT:**  A new tab-delimited file containing the log-transformed RNASeq data
 
--- a/cluster.tools/hclust.R	Mon Mar 04 04:11:28 2013 -0500
+++ b/cluster.tools/hclust.R	Mon Mar 11 16:31:29 2013 -0400
@@ -39,14 +39,25 @@
 
 opt <- getopt( spec=spec )
 
+data <- as.matrix( read.delim( opt$data.fname, header=T, row.names=1 , check.names=FALSE ) )
 if ( is.null( opt$distance.metric ) ) { opt$distance.metric <- "euclidean" }
 if ( is.null( opt$dist.obj ) ) { opt$dist.obj <- FALSE }
 if ( is.null( opt$direction ) ) { opt$direction <- "cols"  }
 if ( is.null( opt$linkage ) ) { opt$linkage <- "average" }
-if ( is.null( opt$num.k ) ) { opt$num.k <- 10 }
 if ( is.null( opt$output.name ) ) { opt$output.name <- "hclust.result.rda" }
+if ( is.null( opt$num.k ) || ( opt$num.k == -1 )) {
+  if ( opt$direction == 'cols' ) {
+    opt$num.k <- 5
+  } else if ( opt$direction == 'rows' ) {
+    opt$num.k <- nrow( data ) / 30  ## we use an estimated average size of gene clusters to be 30
+    if ( opt$num.k > 1000 ) {
+      opt$num.k <- ( opt$num.k %/% 10 ) * 10
+    } else {
+      opt$num.k <- ( opt$num.k %/% 5 ) * 5
+    }
+  }
+}
 
-data <- as.matrix( read.delim( opt$data.fname, header=T, row.names=1 , check.names=FALSE ) )
 if ( opt$direction == "cols" ) {
   ## need to transpose b/c both kmeans & pam cluster the rows
   ## this shouldn't have an effect upon a distance matrix
--- a/cluster.tools/hclust.xml	Mon Mar 04 04:11:28 2013 -0500
+++ b/cluster.tools/hclust.xml	Mon Mar 11 16:31:29 2013 -0400
@@ -5,8 +5,18 @@
 -n ${direction} 
 -m ${distance_metric} 
 -l ${linkage} 
+
+#if str($numk) != "-1":
 -k ${numk} 
--o ${rdata_output}
+#end if
+
+#if str($direction) == "rows":
+-o ${rdata_output_rows}
+#end if
+
+#if str($direction) == "cols":
+-o ${rdata_output_cols}
+#end if
 
 </command>
     <inputs>
@@ -41,11 +51,16 @@
 	  <option value="ward">Ward</option>
     	</param>
     	
-    	<param name="numk" type="integer" label="Number of Clusters" value="50" help="Specify the number of clusters to use"/>
+    	<param name="numk" type="integer" label="Number of Clusters" value="-1" help="Specify the number of clusters to use (-1 to use default. See help below)."/>
     	
     </inputs>
     <outputs>
-        <data format="rdata" name="rdata_output" label="Hierarchical Clustering Result (RData)"/>
+      <data format="rdata" name="rdata_output_rows" label="Hierarchical Clustering Results; Gene Clusters (RData)">
+        <filter>(direction)=="rows"</filter>
+      </data>
+      <data format="rdata" name="rdata_output_cols" label="Hierarchical Clustering Results; Sample Clusters (RData)">
+        <filter>(direction)=="cols"</filter>
+      </data>
     </outputs>
 <help>
 .. class:: infomark
@@ -92,7 +107,9 @@
          * McQuity
          * Ward
 
-- **Number of Clusters** Specify the number of clusters to use
+- **Number of Clusters** Specify the number of clusters to use.  If set to -1, default values will be used, with the default set as follows:
+        * if samples/columns are being clustered, the **default** is 5.
+        * if genes/rows are being clustered, the **default** is set to num_rows/30, e.g. if there are 600 row/genes in the matrix, the default will be 20 clusters.
 
 </help>
 </tool>
--- a/cluster.tools/heatmap.from.cluster.result.xml	Mon Mar 04 04:11:28 2013 -0500
+++ b/cluster.tools/heatmap.from.cluster.result.xml	Mon Mar 11 16:31:29 2013 -0400
@@ -99,7 +99,7 @@
 
 - **Clustering Classification** Specify the clustering classification (RData file format - use the 'Convert tab-delimited Cluster Assignments to RData" tool to convert assignments in tab-delimited format).
 
-- **Plot Kaplan-Meiers Survival Plot as well (primary clustering ONLY)?** Specify whether or not to also plot a Kaplan-Meiers Surivial Plot.  **NOTE*, the cluster results must be a **SAMPLE** cluster.
+- **Plot Kaplan-Meiers Survival Plot as well (primary clustering ONLY)?** Specify whether or not to also plot a Kaplan-Meiers Surivial Plot.  **NOTE**, the cluster results must be a **SAMPLE** cluster.
  
 - **Cluster the second dimension?** Specify whether or not to cluster the 2nd dimension of matrix in the cluster result.  Choice of:
          * No
--- a/cluster.tools/new.ccplus.R	Mon Mar 04 04:11:28 2013 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,773 +0,0 @@
-##!/usr/bin/env Rscript
-## Consensus Clustering Script by Peter Waltman
-## May 31, 2011
-## License under Creative Commons Attribution 3.0 Unported (CC BY 3.0)
-##
-#usage, options and doc goes here
-argspec <- c("consensus.clustering.R takes a clustering from ConsensusClusterPlus and clinical survival data
-and generates a KM-plot, along with the log-rank p-values
-
-        Usage: 
-                consensus.clustering.R -d <data.file> 
-        Optional:
-                -o <output.name>
-                -a <cluster.alg>  ## must be either 'hc' or 'km'
-                -m <distance.metric> ## must be one supported by ConsensusClusterPlus
-                -k <max.k>
-                -r <reps>
-                -f <filter>            ## filter, o/w no filtering
-
-                \n\n")
-args <- commandArgs(TRUE)
-if ( length( args ) == 1 && args =="--help") { 
-  write(argspec, stderr())
-  q();
-}
-
-require(getopt)
-##require(ConsensusClusterPlus)
-##  if any of the faster clustering methods are available on this system, load them
-require( amap )
-require( cluster )
-if ( any( c( 'flashClust', 'fastcluster' ) %in% installed.packages() ) ) {
-  if ( 'flashClust' %in% installed.packages() ) {
-    require( flashClust )
-  } else {
-    if ( 'fastcluster' %in% installed.packages() ) {
-      require( fastcluster )
-    }
-  }
-}
-
-###################
-## code borrowed/updated from ConsensusClusterPlus
-###################
-
-ConsensusClusterPlus <- function( d=NULL,
-                                  maxK = 3,
-                                  reps=10,
-                                  pItem=0.8,
-                                  pFeature=1,
-                                  clusterAlg="hc",
-                                  title="untitled_consensus_cluster",
-                                  innerLinkage="average",
-                                  finalLinkage="average",
-                                  distance=ifelse( inherits(d,"dist"), attr( d, "method" ), "euclidean" ),
-                                  ml=NULL,
-                                  tmyPal=NULL,
-                                  seed=NULL,
-                                  plot=NULL,
-                                  writeTable=FALSE,
-                                  weightsItem=NULL,
-                                  weightsFeature=NULL,
-                                  verbose=F ) {
-  ##description: runs consensus subsamples 
-
-
-  if(is.null(seed)==TRUE){
-    seed=timeSeed = as.numeric(Sys.time())
-  }
-  set.seed(seed)
-
-  if(is.null(ml)==TRUE){
-
-    if ( inherits( distance, "dist" ) ) {
-      stop( "If you want to pass in a pre-calculated distance object, pass it in as the data, rather than the distance parameter\n" )
-    }
-    
-    if ( ! class( d ) %in% c( "dist", "matrix", "ExpressionSet" ) ) {
-      stop("d must be a matrix, distance object or ExpressionSet (eset object)")
-    }
-
-    if ( inherits( d, "dist" ) ) {
-      ## if d is a distance matrix, fix a few things so that they don't cause problems with the analysis
-      ##  Note, assumption is that if d is a distance matrix, the user doesn't want to sample over the row features
-      if ( is.null( attr( d, "method" ) ) ) {
-        attr( d, "method" ) <- distance <- "unknown - user-specified"
-      }
-      if ( is.null( distance ) || ( distance != attr( d, "method" ) ) ) {
-        distance <- attr( d, "method" )
-      }
-      
-      if ( ( ! is.null( pFeature ) ) && ( pFeature < 1 ) ) {
-        if ( verbose ) warning( "Cannot use the pFeatures parameter when specifying a distance matrix as the data object\n" )
-        pFeature <- 1
-      }
-      if ( ! is.null( weightsFeature ) ) {
-        if ( verbose ) warning( "Cannot use the weightsFeature parameter when specifying a distance matrix as the data object\n" )
-        weightsFeature <- NULL
-      }
-      if ( clusterAlg == "km" ) {
-        if ( verbose ) warning( "You are asking CCPLUS to use K-means to cluster a distance matrix (rather than the data itself) - this may produce unintended results. We suggest using PAM if you want to use alternate distance metrics/objects\n" )
-        ##d <- as.matrix( d )  #this is now done w/in ccRun
-      }
-    } else {
-      if ( is.null( distance ) ) {
-        ## we should never get here, but just in case
-        distance <- "pearson"
-      }
-    }
-
-    if ( ( clusterAlg == "km" ) && inherits( distance, "character" ) && ( distance != "euclidean" ) ) {
-      warning( "WARNING: kmeans can only use the euclidean distance metric.  If you would like to use an alternate metric, we suggest using PAM or HC clustering instead. This parameter combinationwill use k-means, but will NOT use the specified distance metric\n" )
-      distance <- 'euclidean'
-    }
-
-
-    if ( inherits( d,"ExpressionSet" ) ) {
-      d <- exprs(d)
-    }
-
-    ml <- ccRun( d=d,
-                 maxK=maxK,
-                 repCount=reps,
-                 diss=inherits(d,"dist"),
-                 pItem=pItem,
-                 pFeature=pFeature,
-                 innerLinkage=innerLinkage,
-                 clusterAlg=clusterAlg,
-                 weightsFeature=weightsFeature,
-                 weightsItem=weightsItem,
-                 distance=distance,
-                 verbose=verbose)
-  }
-  res=list();
-  
-  ##make results directory
-  if((is.null(plot)==FALSE | writeTable) & !file.exists(paste(title,sep=""))){
-    dir.create(paste(title,sep=""))
-  }
-  
-  ##write log file
-  log <- matrix( ncol=2,
-                 byrow=T,
-                 c("title",title,
-                   "maxK",maxK,
-                   "input matrix rows",ifelse ( inherits( d, "matrix" ), nrow(d), "dist-mat" ), 
-                   "input matric columns",ifelse ( inherits( d, "matrix" ), ncol(d), ncol( as.matrix(d) ) ), 
-                   "number of bootstraps",reps,
-                   "item subsampling proportion",pItem,
-                   "feature subsampling proportion",ifelse( is.null(pFeature), 1, pFeature ),
-                   "cluster algorithm",clusterAlg,
-                   "inner linkage type",innerLinkage,
-                   "final linkage type",finalLinkage,
-                   "correlation method",distance,
-                   "plot",if(is.null(plot)) NA else plot,
-                   "seed",if(is.null(seed)) NA else seed))
-  colnames(log) = c("option","value")
-  if(writeTable){
-    write.csv(file=paste(title,"/",title,".log.csv",sep=""), log,row.names=F)
-  }
-  if(is.null(plot)){
-    ##nothing
-  }else if(plot=="png"){
-    png(paste(title,"/","consensus%03d.png",sep=""))
-  }else if (plot=="pdf"){
-    pdf(onefile=TRUE, paste(title,"/","consensus.pdf",sep=""))
-  }else if (plot=="ps"){
-    postscript(onefile=TRUE, paste(title,"/","consensus.ps",sep=""))
-  }	
-  
-  colorList=list()
-  colorM = rbind() #matrix of colors.
-  
-                                        #18 colors for marking different clusters
-  thisPal <- c("#A6CEE3","#1F78B4","#B2DF8A","#33A02C","#FB9A99","#E31A1C","#FDBF6F","#FF7F00","#CAB2D6","#6A3D9A","#FFFF99","#B15928",
-               "#bd18ea", #magenta
-               "#2ef4ca", #aqua
-               "#f4cced", #pink,
-               "#f4cc03", #lightorange
-               "#05188a", #navy,
-               "#e5a25a", #light brown
-               "#06f106", #bright green
-               "#85848f", #med gray
-               "#000000", #black
-               "#076f25", #dark green
-               "#93cd7f",#lime green
-               "#4d0776", #dark purple
-               "#ffffff" #white
-               )
-  
-  ##plot scale
-  colBreaks=NA
-  if(is.null(tmyPal)==TRUE){
-    colBreaks=10
-    tmyPal = myPal(colBreaks)
-  }else{
-    colBreaks=length(tmyPal)
-  }
-  sc = cbind(seq(0,1,by=1/( colBreaks) )); rownames(sc) = sc[,1]
-  sc = cbind(sc,sc)
-  heatmap(sc, Colv=NA, Rowv=NA, symm=FALSE, scale='none', col=tmyPal, na.rm=TRUE,labRow=rownames(sc),labCol=F,main="consensus matrix legend")
-
-  for (tk in 2:maxK){
-    if(verbose){
-      message(paste("consensus ",tk))
-    }
-    fm = ml[[tk]]
-    hc=hclust( as.dist( 1 - fm ), method=finalLinkage);
-    message("clustered")	
-    ct = cutree(hc,tk)
-    names(ct) = colnames(d)
-    c = fm
-    ##colnames(c) = colnames(d)
-    ##rownames(c) = colnames(d)
-
-    colorList = setClusterColors(res[[tk-1]][[3]],ct,thisPal,colorList)
-	
-    pc = c
-    pc=pc[hc$order,] #***pc is matrix for plotting, same as c but is row-ordered and has names and extra row of zeros.
-    pc = rbind(pc,0)
-    
-    heatmap(pc, Colv=as.dendrogram(hc), Rowv=NA, symm=FALSE, scale='none', col=tmyPal, na.rm=TRUE,labRow=F,labCol=F,mar=c(5,5),main=paste("consensus matrix k=",tk,sep="") , ColSideCol=colorList[[1]])
-    legend("topright",legend=unique(ct),fill=unique(colorList[[1]]),horiz=FALSE )
-
-    res[[tk]] = list(consensusMatrix=c,consensusTree=hc,consensusClass=ct,ml=ml[[tk]],clrs=colorList)
-    colorM = rbind(colorM,colorList[[1]]) 
-  }
-  CDF(ml)
-  clusterTrackingPlot(colorM[,res[[length(res)]]$consensusTree$order])
-  if(is.null(plot)==FALSE){
-    dev.off();
-  }
-  res[[1]] = colorM
-  if(writeTable){
-    for(i in 2:length(res)){
-      write.csv(file=paste(title,"/",title,".k=",i,".consensusMatrix.csv",sep=""), res[[i]]$consensusMatrix)
-      write.table(file=paste(title,"/",title,".k=",i,".consensusClass.csv",sep=""), res[[i]]$consensusClass,col.names = F,sep=",")
-    }
-  }
-  return(res)
-}
-
-
-calcICL = function(res,title="untitled_consensus_cluster",plot=NULL,writeTable=FALSE){
-  #calculates and plots cluster consensus and item consensus
-  cc=rbind()
-  cci = rbind()
-  sumRes=list()
-  colorsArr=c()
-  
-  #make results directory
-  if((is.null(plot)==FALSE | writeTable) & !file.exists(paste(title,sep=""))){
-	dir.create(paste(title,sep=""))
-  }
-  if(is.null(plot)){
-    #to screen
-  }else if(plot=="pdf"){
-    pdf(onefile=TRUE, paste(title,"/","icl.pdf",sep=""))
-  }else if(plot=="ps"){
-    postscript(onefile=TRUE, paste(title,"/","icl.ps",sep=""))
-  }else if (plot=="png"){
-    png(paste(title,"/","icl%03d.png",sep=""))
-  }
-
-  par(mfrow=c(3,1),mar=c(4,3,2,0))
-
-  for (k in 2:length(res)){ #each k
-    eiCols = c();
-    o = res[[k]]
-    m = o$consensusMatrix
-    m = triangle(m,mode=2)
-    for (ci in sort(unique(o$consensusClass))){ #each cluster in k
-	items = which(o$consensusClass==ci)
-	nk = length(items)
-	mk = sum( m[items,items], na.rm=T)/((nk*(nk-1))/2)
-	cc=rbind(cc,c(k,ci,mk)) #cluster-consensus
-	
-      for (ei in rev(res[[2]]$consensusTree$order) ){
-		denom = if (ei %in% items) { nk - 1} else { nk }
-        	mei = sum( c(m[ei,items],m[items,ei]), na.rm=T)/denom  # mean item consensus to a cluster.
-		cci = rbind(cci,c(k,ci,ei,mei)) #cluster, cluster index, item index, item-consensus
-      }
-      eiCols = c(eiCols, rep(ci,length(o$consensusClass)) )
-    }
-	  
-	  cck = cci[which(cci[,1]==k),] #only plot the new k data.
-
-	  #group by item, order by cluster i
-	  w=lapply(split(cck,cck[,3]), function(x) { y=matrix(unlist(x),ncol=4); y[order(y[,2]),4] }) 
-	  q = matrix(as.numeric(unlist(w)),ncol=length(w),byrow=F)
-	  q = q[,res[[2]]$consensusTree$order] #order by leave order of k=2
- 	  #q is a matrix of k rows and sample columns, values are item consensus of sample to the cluster.
-
-	  thisColors = unique(cbind(res[[k]]$consensusClass,res[[k]]$clrs[[1]]))
-	  thisColors=thisColors[order(as.numeric(thisColors[,1])),2]
-	  colorsArr=c(colorsArr,thisColors)
-	  sumRes[[k]] = rankedBarPlot(q,thisColors,cc=res[[k]]$consensusClass[res[[2]]$consensusTree$order],paste("k=",k,sep="") )
-  }
-
-  ys=cs=lab=c()
-  lastk=cc[1,1]
-  for(i in 1:length(colorsArr)){
-    if(lastk != cc[i,1]){
-      ys=c(ys,0,0)
-      cs=c(cs,NA,NA)
-      lastk=cc[i,1]
-      lab=c(lab,NA,NA)
-    }
-    ys=c(ys,cc[i,3])
-    cs=c(cs,colorsArr[i])
-    lab=c(lab,cc[i,1])
-  }
-  names(ys) = lab
-  par(mfrow=c(3,1),mar=c(4,3,2,0))
-  barplot(ys,col=cs,border=cs,main="cluster-consensus",ylim=c(0,1),las=1)
-  if(is.null(plot)==FALSE){
-	  dev.off()
-  }
-  colnames(cc) = c("k","cluster","clusterConsensus")
-  colnames(cci) = c("k","cluster","item","itemConsensus")
-  cci[,"item"] = names(res[[2]]$consensusClass)[ cci[,"item"] ]
-  #type cci
-  cci = data.frame( k=as.numeric(cci[,"k"]), cluster=as.numeric(cci[,"cluster"]), item=cci[,"item"], itemConsensus=as.numeric(cci[,"itemConsensus"])) 
-  
-  #write to file.
-  if(writeTable){
-	write.csv(file=paste(title,"/",title,".summary.cluster.consensus.csv",sep=""),row.names=F, cc)
-	write.csv(file=paste(title,"/",title,".summary.item.consensus.csv",sep=""), row.names=F, cc)
-  }
-  return(list(clusterConsensus=cc,itemConsensus=cci))
-}
-
-
-ccRun <- function( d=d,
-                   maxK=NULL,
-                   repCount=NULL,
-                   diss=inherits( d, "dist" ),
-                   pItem=NULL,
-                   pFeature=NULL,
-                   innerLinkage=NULL,
-                   distance=ifelse( inherits(d,"dist"), attr( d, "method" ), "euclidean" ),
-                   clusterAlg=NULL,
-                   weightsItem=NULL,
-                   weightsFeature=NULL,
-                   verbose=NULL) {
-  m = vector(mode='list', repCount)
-  ml = vector(mode="list",maxK)
-  n <- ifelse( diss, ncol( as.matrix(d) ), ncol(d) )
-  mCount = mConsist = matrix(c(0),ncol=n,nrow=n)
-  ml[[1]] = c(0);
-
-  if (is.null( distance ) ) distance <- 'euclidean'  ## necessary if d is a dist object and attr( d, "method" ) == NULLa
-  
-  require( amap )
-  ##  we're going to use the amap Dist function, but they misname their correlation
-  ##  functions, so re-name them correctly
-  amap.distance <- c( "euclidean", "maximum", "manhattan", "canberra", "binary",
-                      "pearson", "abspearson", "correlation", "abscorrelation", "spearman", "kendall" )
-  names( amap.distance ) <- c( "euclidean", "maximum", "manhattan", "canberra", "binary",
-                               "cosine", "abscosine", "pearson", "abspearson", "spearman", "kendall" )
-  main.dist.obj <- NULL
-  ##browser()
-  if ( diss ){
-    main.dist.obj <- d
-
-    ## reset the pFeature & weightsFeature params if they've been set (irrelevant if d is a dist matrix)
-    if ( ( !is.null(pFeature) ) &&
-         ( pFeature < 1 ) ) {
-      if (verbose) warning( "user-supplied data is a distance matrix; ignoring user-specified pFeature parameter\n" )
-      pFeature <- 1 # set it to 1 to avoid problems with sampleCols
-    }
-    if ( ! is.null( weightsFeature ) ) {
-      if (verbose) warning( "user-supplied data is a distance matrix; ignoring user-specified weightsFeature parameter\n" )
-      weightsFeature <- NULL  # set it to NULL to avoid problems with sampleCols
-    }
-  } else { ## d is a data matrix
-    ## we're not sampling over the features
-    if ( ( clusterAlg != "km" ) &&
-         ( is.null( pFeature ) ||
-           ( ( pFeature == 1 ) && is.null( weightsFeature ) ) ) ) {
-      ## only generate a main.dist.object IFF 1) d is a matrix, 2) we're not sampling the features, and 3) the algorithm isn't 'km'
-      if ( inherits( distance, "character" ) ) {
-        if ( ! distance %in% names( amap.distance ) ) stop("unsupported distance.")
-
-        main.dist.obj <- Dist( t(d), method=as.character( amap.distance[ distance ] ) )
-        ## now fix dumb amap naming convention for distance metrics
-        attr( main.dist.obj, "method" ) <- as.character( amap.distance[ distance ] )
-      } else stop("unsupported distance specified.")
-    } else {
-      ## pFeature < 1 or a weightsFeature != NULL
-      ## since d is a data matrix, the user wants to sample over the gene features, so main.dist.obj is left as NULL
-    }
-  }
- 
-
-  for (i in 1:repCount){
-    ##browser()  
-    if(verbose){
-      message(paste("random subsample",i));
-    }
-    ## take expression matrix sample, samples and genes
-    sample_x = sampleCols( d, pItem, pFeature, weightsItem, weightsFeature )
-
-    this_dist = NA
-    if ( ! is.null( main.dist.obj ) ) {
-      boot.cols <- sample_x$subcols
-      this_dist <- as.matrix( main.dist.obj )[ boot.cols, boot.cols ]
-      if ( clusterAlg != "km" ) {
-        ## if this isn't kmeans, then convert to a distance object
-        this_dist <- as.dist( this_dist )
-        attr( this_dist, "method" ) <- attr( main.dist.obj, "method" )
-      }
-    } else {
-      ## if main.dist.obj is NULL, then d is a data matrix, and either:
-      ##   1) clusterAlg is 'km'
-      ##   2) pFeatures < 1 or weightsFeatures have been specified, or
-      ##   3) both
-      ## so we can't use a main distance object and for every iteration, we will have to re-calculate either
-      ##   1) the distance matrix (because we're also sampling the features as well), or
-      ##   2) the submat (if using km) 
-
-      if ( clusterAlg != "km" )  {
-        if ( ! distance %in% names( amap.distance ) ) stop("unsupported distance.")
-        ## good, we have a supported distance type
-        this_dist <- Dist( t( sample_x$submat ), method=as.character( amap.distance[ distance ] ) )
-        ## now fix dumb amap naming convention for distance metrics
-        attr( this_dist, "method" ) <- as.character( amap.distance[ distance ] )
-      } else {
-        ##browser()
-        ##clusterAlg == "km" 
-        ## if we're not sampling the features, then grab the colslice
-        if ( is.null( pFeature ) ||
-            ( ( pFeature == 1 ) && is.null( weightsFeature ) ) ) {
-          this_dist <- d[, sample_x$subcols ]
-        } else {
-          if ( is.na( sample_x$submat ) ) {
-            save( "ccrun.submat.eq.na.dbg.rda" )
-            stop( "Houston, we have a problem.  sample_x$submat is NA in ccRun when it should be specified - saving state\n" )
-          }
-          
-          this_dist <- sample_x$submat
-        } 
-      }
-    }
-                  
-    ## cluster samples for HC.
-    this_cluster=NA
-    if(clusterAlg=="hc"){
-      this_cluster = hclust( this_dist, method=innerLinkage)
-    }
-    ##browser()
-    ##mCount is possible number of times that two sample occur in same random sample, independent of k
-    ##mCount stores number of times a sample pair was sampled together.
-    mCount <- connectivityMatrix( rep( 1,length(sample_x[[3]])),
-                                  mCount,
-                                  sample_x[[3]] ) 
-
-    ##use samples for each k		
-    for (k in 2:maxK){
-      if(verbose){
-        message(paste("  k =",k))
-      }
-      if (i==1){
-        ml[[k]] = mConsist #initialize
-      }
-      this_assignment=NA
-      if(clusterAlg=="hc"){
-        ##prune to k for hc
-        this_assignment = cutree(this_cluster,k)
-        ##browser()
-      }else if(clusterAlg=="km"){
-        ##this_dist should now be a matrix corresponding to the result from sampleCols
-        this_assignment <- kmeans( t( this_dist ),
-                                   k,
-                                   iter.max = 10,
-                                   nstart = 1,
-                                   algorithm = c("Hartigan-Wong") )$cluster
-      }else if ( clusterAlg == "pam" ) {
-        require( cluster )
-        this_assignment <- pam( x=this_dist,
-                                k,
-                                diss=TRUE,
-                                metric=distance, 
-                                cluster.only=TRUE )
-      } else{
-        ##optional cluterArg Hook.
-        this_assignment <- get(clusterAlg)(this_dist, k)
-      }
-      ##add to tally				
-      ml[[k]] <- connectivityMatrix( this_assignment,
-                                     ml[[k]],
-                                     sample_x[[3]] )
-    }
-  }
-	
-
-  ##consensus fraction
-  res = vector(mode="list",maxK)
-  for (k in 2:maxK){
-    ##fill in other half of matrix for tally and count.
-    tmp = triangle(ml[[k]],mode=3)
-    tmpCount = triangle(mCount,mode=3)
-    res[[k]] = tmp / tmpCount
-    res[[k]][which(tmpCount==0)] = 0
-  }
-  message("end fraction")
-  return(res)
-}
-
-
-connectivityMatrix <- function( clusterAssignments, m, sampleKey){
-  ##input: named vector of cluster assignments, matrix to add connectivities
-  ##output: connectivity matrix
-  names( clusterAssignments ) <- sampleKey 
-  cls <- lapply( unique( clusterAssignments ), function(i) as.numeric( names( clusterAssignments[ clusterAssignments %in% i ] ) ) )
-
-  for ( i in 1:length( cls ) ) {
-    nelts <- 1:ncol( m )
-    cl <- as.numeric( nelts %in% cls[[i]] ) ## produces a binary vector
-    updt <- outer( cl, cl )
-    m <- m + updt
-  }
-  return(m)
-}
-
-## returns a list with the sample columns, as well as the sub-matrix & sample features (if necessary)
-##  if no sampling over the features is performed, the submatrix & sample features are returned as NAs
-##  to reduce memory overhead
-sampleCols <- function( d,
-                        pSamp=NULL,
-                        pRow=NULL,
-                        weightsItem=NULL,
-                        weightsFeature=NULL ){
-  space <- ifelse( inherits( d, "dist" ), ncol( as.matrix(d) ), ncol(d) )
-  sampleN <- floor(space*pSamp)
-  sampCols <- sort( sample(space, sampleN, replace = FALSE, prob = weightsItem) )
-
-  this_sample <- sampRows <- NA
-  if ( inherits( d, "matrix" ) ) {
-    if ( (! is.null( pRow ) ) &&
-         ( (pRow < 1 ) || (! is.null( weightsFeature ) ) ) ) {
-      ## only sample the rows and generate a sub-matrix if we're sampling over the row/gene/features
-      space = nrow(d)
-      sampleN = floor(space*pRow)
-      sampRows = sort( sample(space, sampleN, replace = FALSE, prob = weightsFeature) )
-      this_sample <- d[sampRows,sampCols]
-      dimnames(this_sample) <- NULL
-    } else {
-      ## do nothing
-    }
-  }
-  return( list( submat=this_sample,
-                subrows=sampRows,
-                subcols=sampCols ) )
-}
-
-CDF=function(ml,breaks=100){
-  #plot CDF distribution
-  plot(c(0),xlim=c(0,1),ylim=c(0,1),col="white",bg="white",xlab="consensus index",ylab="CDF",main="consensus CDF", las=2)
-  k=length(ml)
-  this_colors = rainbow(k-1)
-  areaK = c()
-  for (i in 2:length(ml)){
-    v=triangle(ml[[i]],mode=1)
-
-    #empirical CDF distribution. default number of breaks is 100    
-    h = hist(v, plot=FALSE, breaks=seq(0,1,by=1/breaks))
-    h$counts = cumsum(h$counts)/sum(h$counts)
-
-    #calculate area under CDF curve, by histogram method.
-    thisArea=0
-    for (bi in 1:(length(h$breaks)-1)){
-       thisArea = thisArea + h$counts[bi]*(h$breaks[bi+1]-h$breaks[bi]) #increment by height by width
-       bi = bi + 1
-    }
-    areaK = c(areaK,thisArea)
-    lines(h$mids,h$counts,col=this_colors[i-1],lwd=2,type='l')
-  }
-  legend(0.8,0.5,legend=paste(rep("",k-1),seq(2,k,by=1),sep=""),fill=this_colors)
-
-  #plot area under CDF change.
-  deltaK=areaK[1] #initial auc at k=2
-  for(i in 2:(length(areaK))){
-    #proportional increase relative to prior K.
-    deltaK = c(deltaK,( areaK[i] - areaK[i-1])/areaK[i-1])
-  }
-  plot(1+(1:length(deltaK)),y=deltaK,xlab="k",ylab="relative change in area under CDF curve",main="Delta area",type="b")
-}
-
-
-myPal = function(n=10){
-  #returns n colors
-  seq = rev(seq(0,255,by=255/(n)))
-  palRGB = cbind(seq,seq,255)
-  rgb(palRGB,maxColorValue=255)
-}
-
-setClusterColors = function(past_ct,ct,colorU,colorList){
-	#description: sets common color of clusters between different K
-	newColors = c()
-	if(length(colorList)==0){
-		#k==2
-		newColors = colorU[ct]
-		colori=2
-	}else{
-		newColors = rep(NULL,length(ct))
-		colori = colorList[[2]]
-		mo=table(past_ct,ct)
-		m=mo/apply(mo,1,sum)
-			for(tci in 1:ncol(m)){ # for each cluster
-				maxC = max(m[,tci])
-				pci = which(m[,tci] == maxC)				
-				if( sum(m[,tci]==maxC)==1 & max(m[pci,])==maxC & sum(m[pci,]==maxC)==1  )  {
-				#if new column maximum is unique, same cell is row maximum and is also unique
-				##Note: the greatest of the prior clusters' members are the greatest in a current cluster's members.
-					newColors[which(ct==tci)] = unique(colorList[[1]][which(past_ct==pci)]) # one value
-				}else{ #add new color.
-					colori=colori+1
-					newColors[which(ct==tci)] = colorU[colori]
-				}
-			}
-	}
-	return(list(newColors,colori,unique(newColors) ))
-}
-
-clusterTrackingPlot = function(m){
-  #description: plots cluster tracking plot
-  #input: m - matrix where rows are k, columns are samples, and values are cluster assignments.
-  plot(NULL,xlim=c(-0.1,1),ylim=c(0,1),axes=FALSE,xlab="samples",ylab="k",main="tracking plot")
-  for(i in 1:nrow(m)){
-    rect(  xleft=seq(0,1-1/ncol(m),by=1/ncol(m)),  ybottom=rep(1-i/nrow(m),ncol(m)) , xright=seq(1/ncol(m),1,by=1/ncol(m)), ytop=rep(1-(i-1)/nrow(m),ncol(m)), col=m[i,],border=NA)   
-  }
-  #hatch lines to indicate samples
-  xl = seq(0,1-1/ncol(m),by=1/ncol(m))
-  segments(  xl, rep(-0.1,ncol(m)) , xl, rep(0,ncol(m)), col="black")    #** alt white and black color?
-  ypos = seq(1,0,by=-1/nrow(m))-1/(2*nrow(m))
-  text(x=-0.1,y=ypos[-length(ypos)],labels=seq(2,nrow(m)+1,by=1))
-}
-
-triangle = function(m,mode=1){
-  #mode=1 for CDF, vector of lower triangle.
-  #mode==3 for full matrix.
-  #mode==2 for calcICL; nonredundant half matrix coun
-  #mode!=1 for summary 
-  n=dim(m)[1]
-  nm = matrix(0,ncol=n,nrow=n)
-  fm = m
-
-
-  nm[upper.tri(nm)] = m[upper.tri(m)] #only upper half
-  
-  fm = t(nm)+nm
-  diag(fm) = diag(m)
-  
-  nm=fm
-  nm[upper.tri(nm)] = NA
-  diag(nm) = NA
-  vm = m[lower.tri(nm)]
-  
-  if(mode==1){
-    return(vm) #vector 		
-  }else if(mode==3){
-    return(fm) #return full matrix
-  }else if(mode == 2){
-    return(nm) #returns lower triangle and no diagonal. no double counts.
-  }
-  
-}
-
-
-rankedBarPlot=function(d,myc,cc,title){
-	colors = rbind() #each row is a barplot series
-	byRank = cbind()
-
-	spaceh = 0.1 #space between bars
-	for(i in 1:ncol(d)){
-	  byRank = cbind(byRank,sort(d[,i],na.last=F))
-	  colors = rbind(colors,order(d[,i],na.last=F))
-	}
-	maxH = max(c(1.5,apply(byRank,2,sum)),na.rm=T) #maximum height of graph
-	
-	#barplot largest to smallest so that smallest is in front.
-	barp = barplot( apply(byRank,2,sum) ,  col=myc[colors[,1]] ,space=spaceh,ylim=c(0,maxH),main=paste("item-consensus", title),border=NA,las=1  )
-	for(i in 2:nrow(byRank)){
-	  barplot( apply(matrix(byRank[i:nrow(byRank),],ncol=ncol(byRank))  ,2,sum), space=spaceh,col=myc[colors[,i]],ylim=c(0,maxH), add=T,border=NA,las=1  )
-	}
-	xr=seq(spaceh,ncol(d)+ncol(d)*spaceh,(ncol(d)+ncol(d)*spaceh)/ncol(d)  )
-	#class labels as asterisks
-	text("*",x=xr+0.5,y=maxH,col=myc[cc],cex=1.4) #rect(xr,1.4,xr+1,1.5,col=myc[cc] )
-}
-
-
-
-###################################################################3333
-## RESTART MY SCRIPTS HERE
-##save.image( '/home/waltman/work.local/tmp/new.ccplus.R.dbg' )
-stop( "phw forced stop\n")
-spec <- matrix( c( "data.fname",         "d", 1, "character",
-                   "direction",          "n", 2, "character",
-                   "output.name",        "o", 2, "character",
-                   "cluster.alg",        "a", 2, "character", ## must be either 'hc' or 'km'
-                   "distance.metric",    "m", 2, "character", ## must be one supported by ConsensusClusterPlus
-                   "max.k",              "k", 2, "integer",
-                   "reps",               "r", 2, "integer",
-                   "innerLinkage",       "i", 1, "character",
-                   "finalLinkage",       "f", 1, "character",
-                   "out.report.dir",     "p", 2, "character",
-                   "out.report.html",    "h", 2, "character"
-                   ),
-                nc=4,
-                byrow=TRUE
-               )
-
-opt <- getopt( spec=spec )
-
-## default params for non-required params
-if ( is.null( opt$direction ) ) { opt$direction <- "cols"  }
-if ( is.null( opt$cluster.alg ) ) { opt$cluster.alg <- "pam" }
-if ( is.null( opt$output.name ) ) { opt$output.name <- "consensus.cluster.result" }
-if ( is.null( opt$distance.metric ) ) { opt$distance.metric <- "cosine" }
-if ( is.null( opt$max.k ) ) { opt$max.k <- 10 }
-if ( is.null( opt$reps ) ) { opt$reps <- 1000 }
-if ( is.null( opt$innerLinkage ) ) { opt$innerLinkage <- "average" }
-if ( is.null( opt$finalLinkage ) ) { opt$finalLinkage <- "average" }
-
-if ( is.null( opt$out.report.dir ) ) { opt$out.report.dir <- "report" }
-if ( is.null( opt$out.report.html ) ) { opt$out.report.html <- file.path( "report", "index.html" ) }
-
-## validate params here (make sure set to valid values)
-if ( !opt$cluster.alg %in% c( "hc", "km", "pam" ) ) {
-  stop( "invalid clustering algorithm specified", cluster.alg )
-}
-
-
-data <- as.matrix( read.delim( opt$data.fname, header=T, row.names=1 , check.names=FALSE ) )
-## transpose the matrix if we want to cluster the rows (genes)
-if ( opt$direction == "rows" ) {
-  data <- t( data )
-}
-
-
-title <- paste( opt$cluster.alg, opt$output.name, sep="." )
-##source( '~/bin/galaxy-dist/tools/ucsc.cancer.tools/cluster.tools/new.ccplus.R' )
-results <- ConsensusClusterPlus( data,
-                                 maxK=opt$max.k,
-                                 reps=opt$reps,
-                                 pItem=0.8,
-                                 ##pFeature=NULL,
-                                 pFeature=0.5,
-                                 title=opt$out.report.dir,
-                                 clusterAlg=opt$cluster.alg,
-                                 distance=opt$distance.metric,
-                                 innerLinkage=opt$innerLinkage,
-                                 finalLinkage=opt$finalLinkage,
-                                 plot='pdf',
-                                 writeTable=FALSE,
-                                 seed=100,
-                                 weightsFeature=abs( rnorm( nrow( orig.data ) ) ),
-                                 ##verbose=FALSE )
-                                 verbose=TRUE )
-
-pngs = list.files(path=opt$out.report.dir, patt="png")
-html.out <- paste( "<html>", 
-                   paste( paste( "<div><img src=\'", pngs, sep="" ), "\'/></div>", sep="" ),
-                   "</html>" )
-cat( html.out, file=opt$out.report.html )
-
-
-## re-transpose the matrix back if we've clustered the rows (genes)
-if ( opt$direction == "rows" ) {
-  data <- t( data )
-}
-save( file=opt$output.name, data, results)
--- a/cluster.tools/normalize.matrix.xml	Mon Mar 04 04:11:28 2013 -0500
+++ b/cluster.tools/normalize.matrix.xml	Mon Mar 11 16:31:29 2013 -0400
@@ -64,7 +64,7 @@
 	 * Median Absolute Deviation
 	 * Standard Deviation
 
-- **Variance Adjustment for Rows** Variance Adjustment Method for Columns
+- **Variance Adjustment for Cols** Variance Adjustment Method for Columns
 
          * No Adjustment
 	 * Median Absolute Deviation
--- a/cluster.tools/partition.R	Mon Mar 04 04:11:28 2013 -0500
+++ b/cluster.tools/partition.R	Mon Mar 11 16:31:29 2013 -0400
@@ -36,15 +36,26 @@
                )
 
 opt <- getopt( spec=spec )
+data <- as.matrix( read.delim( opt$data.fname, header=T, row.names=1 , check.names=FALSE ) )
 
 if ( is.null( opt$distance.metric ) ) { opt$distance.metric <- "euclidean" }
 if ( is.null( opt$algorithm ) ) { opt$algorithm <- "km" }
 if ( is.null( opt$dist.obj ) ) { opt$dist.obj <- FALSE }
 if ( is.null( opt$direction ) ) { opt$direction <- "cols"  }
-if ( is.null( opt$num.k ) ) { opt$num.k <- 10 }
 if ( is.null( opt$output.name ) ) { opt$output.name <- "partition.result" }
+if ( is.null( opt$num.k ) || ( opt$num.k == -1 )) {
+  if ( opt$direction == 'cols' ) {
+    opt$num.k <- 5
+  } else if ( opt$direction == 'rows' ) {
+    opt$num.k <- nrow( data ) / 30  ## we use an estimated average size of gene clusters to be 30
+    if ( opt$num.k > 1000 ) {
+      opt$num.k <- ( opt$num.k %/% 10 ) * 10
+    } else {
+      opt$num.k <- ( opt$num.k %/% 5 ) * 5
+    }
+  }
+}
 
-data <- as.matrix( read.delim( opt$data.fname, header=T, row.names=1 , check.names=FALSE ) )
 
 if ( opt$direction == "cols" ) {
   ## need to transpose b/c both kmeans & pam cluster the rows
--- a/cluster.tools/partition.xml	Mon Mar 04 04:11:28 2013 -0500
+++ b/cluster.tools/partition.xml	Mon Mar 11 16:31:29 2013 -0400
@@ -6,8 +6,19 @@
 -a $alg_cond.algorithm
 #if $alg_cond.algorithm == 'pam' # -m ${alg_cond.distance_metric}
 #end if
+
+#if str($numk) != "-1":
 -k ${numk} 
--o ${output}
+#end if
+
+#if str($direction) == "rows":
+-o ${rdata_output_rows}
+#end if
+
+#if str($direction) == "cols":
+-o ${rdata_output_cols}
+#end if
+
 
 </command>
     <inputs>
@@ -40,11 +51,16 @@
 	    </param>
 	  </when>
 	</conditional>
-    	<param name="numk" type="integer" label="Number of Clusters" value="50" help="Specify the number of clusters to use"/>
+    	<param name="numk" type="integer" label="Number of Clusters" value="-1" help="Specify the number of clusters to use (-1 to use default. See help below)."/>
     	
     </inputs>
     <outputs>
-        <data format="rdata" name="output" label="Partition Clustering Data (RData)"/>
+      <data format="rdata" name="rdata_output_rows" label="Partition Clustering Results; Gene Clusters (RData)">
+        <filter>(direction)=="rows"</filter>
+      </data>
+      <data format="rdata" name="rdata_output_cols" label="Partition Clustering Results; Sample Clusters (RData)">
+        <filter>(direction)=="cols"</filter>
+      </data>
     </outputs>
 <help>
 .. class:: infomark
@@ -87,7 +103,9 @@
 	 * Binary
 
 
-- **Number of Clusters** Specify the number of clusters to use
+- **Number of Clusters** Specify the number of clusters to use.  If set to -1, default values will be used, with the default set as follows:
+        * if samples/columns are being clustered, the **default** is 5.
+        * if genes/rows are being clustered, the **default** is set to num_rows/30, e.g. if there are 600 row/genes in the matrix, the default will be 20 clusters.
 
 </help>
 </tool>
--- a/cluster.tools/rdata.2.out.R	Mon Mar 04 04:11:28 2013 -0500
+++ b/cluster.tools/rdata.2.out.R	Mon Mar 11 16:31:29 2013 -0400
@@ -12,6 +12,15 @@
   q();
 }
 
+## some helper fn's
+write.2.tab <- function( mat,
+                         fname ) {
+  mat <- rbind( colnames( mat ), mat )
+  mat <- cbind( c( "ID", rownames( mat )[-1] ),
+                      mat )
+  write.table( mat, fname, sep="\t", row.names=FALSE, col.names=FALSE, quote=FALSE )
+}
+
 lib.load.quiet <- function( package ) {
    package <- as.character(substitute(package))
    suppressPackageStartupMessages( do.call( "library", list( package=package ) ) )
@@ -51,11 +60,10 @@
 if ( opt$output.format %in% c( "cls-only", "newick" ) ) {
   if ( opt$output.format == "cls-only" ) {
 
-    cl <- cbind( names( cl ), as.numeric( cl ) )
-    colnames( cl ) <- c( "ID", "Class" )
+    cl <- matrix( as.numeric( cl ), nc=1, dimnames=list( names(cl), "Class" ) )
+    opt$output.fname <- gsub( "cls-only$", "tab", opt$output.fname )
 
-    opt$output.fname <- gsub( "cls-only$", "tab", opt$output.fname )
-    write.table( cl, opt$output.fname, sep="\t", quote=FALSE, row.names=FALSE, col.names=FALSE )
+    write.2.tab( cl, opt$output.fname )
   } else {
     ##if ( opt$output.format == "newick" ) {
 
--- a/cluster.tools/rnaseq.feature.selection.xml	Mon Mar 04 04:11:28 2013 -0500
+++ b/cluster.tools/rnaseq.feature.selection.xml	Mon Mar 11 16:31:29 2013 -0400
@@ -35,18 +35,17 @@
 **Parameters**
 
 - **Z-transform data?** - Specify whether or not to Z-transform the rows (mean=0, sd=1)
-- **Variance Metric for Genes** - Specify Metric to use for calculating Gene Variance. Choice of:
+- **Variance Metric for Genes** - Specify Metric to use for calculating Gene Variance. Choice of
 
 	 * Median Absolute Deviation (MAD)
 	 * Maximum Absolute Deviation - similar to MAD, but uses the _Maximum_, instead of the Median Absolute Deviatioin
 	 * Standard Deviation
 
- - **Percentage of Samples Passing** Percent of samples with an IPL that passes the threshold. Choice of:
+
+ - **Percentage of Samples Passing** Percent of samples with an IPL that passes the threshold. Choice of
 
          * Integer Value       - indicate the exact number of genes that are to be kept
          * Real Value in [0,1] - indicate the percentage of genes that are to be kept
 
-
-
 </help>
 </tool>
--- a/cluster.tools/select.k.from.consensus.cluster.R	Mon Mar 04 04:11:28 2013 -0500
+++ b/cluster.tools/select.k.from.consensus.cluster.R	Mon Mar 11 16:31:29 2013 -0400
@@ -319,12 +319,19 @@
   write.table( cl, opt$cluster.class.out, sep="\t", row.names=FALSE, quote=FALSE )
 }
 
-treecl.res <- results[[ k.select ]]$consensusTree
 ## cl should already exist, but re-create it just in case
 cl <- cls[[ as.character( k.select ) ]] 
-
+treecl.res <- results[[ k.select ]]$consensusTree
+select.result <- results[[ k.select ]]
 
-select.result <- results[[ k.select ]]
+if ( length(cl) == ncol(data) ) {
+  names( cl ) <- treecl.res$labels <- select.result$consensusTree$labels <- colnames(data)
+} else if ( length(cl) == nrow(data) ) {
+  names( cl ) <- treecl.res$labels <- select.result$consensusTree$labels <- rownames(data)
+} else {
+  stop( "Number of clustered elements not equal to either number of rows or columns of data matrix\n" )
+}
+
 ## over-write the tabular version of the opt$cluster.class.out with an RData file
 save( file=opt$cluster.class.out, treecl.res, cl, select.result, data ) 
 
--- a/cluster.tools/tab.2.cdt.xml	Mon Mar 04 04:11:28 2013 -0500
+++ b/cluster.tools/tab.2.cdt.xml	Mon Mar 11 16:31:29 2013 -0400
@@ -14,7 +14,7 @@
 .. class:: infomark
      
 **Convert tab-delimitted to CDT** - Tool to convert a data matrix into a simplified CDT format that can be read by TreeView
--**NOTE** NO CLUSTERING performed on data matrix.  Tool is a simple data conversion utility.
+- **NOTE** NO CLUSTERING performed on data matrix.  Tool is a simple data conversion utility.
 
 **OUTPUT:**  A new CDT file