diff cluster.tools/partition.R @ 8:a58527c632b7 draft

Uploaded
author peter-waltman
date Mon, 11 Mar 2013 16:31:29 -0400
parents 0decf3fd54bc
children
line wrap: on
line diff
--- a/cluster.tools/partition.R	Mon Mar 04 04:11:28 2013 -0500
+++ b/cluster.tools/partition.R	Mon Mar 11 16:31:29 2013 -0400
@@ -36,15 +36,26 @@
                )
 
 opt <- getopt( spec=spec )
+data <- as.matrix( read.delim( opt$data.fname, header=T, row.names=1 , check.names=FALSE ) )
 
 if ( is.null( opt$distance.metric ) ) { opt$distance.metric <- "euclidean" }
 if ( is.null( opt$algorithm ) ) { opt$algorithm <- "km" }
 if ( is.null( opt$dist.obj ) ) { opt$dist.obj <- FALSE }
 if ( is.null( opt$direction ) ) { opt$direction <- "cols"  }
-if ( is.null( opt$num.k ) ) { opt$num.k <- 10 }
 if ( is.null( opt$output.name ) ) { opt$output.name <- "partition.result" }
+if ( is.null( opt$num.k ) || ( opt$num.k == -1 )) {
+  if ( opt$direction == 'cols' ) {
+    opt$num.k <- 5
+  } else if ( opt$direction == 'rows' ) {
+    opt$num.k <- nrow( data ) / 30  ## we use an estimated average size of gene clusters to be 30
+    if ( opt$num.k > 1000 ) {
+      opt$num.k <- ( opt$num.k %/% 10 ) * 10
+    } else {
+      opt$num.k <- ( opt$num.k %/% 5 ) * 5
+    }
+  }
+}
 
-data <- as.matrix( read.delim( opt$data.fname, header=T, row.names=1 , check.names=FALSE ) )
 
 if ( opt$direction == "cols" ) {
   ## need to transpose b/c both kmeans & pam cluster the rows