diff cluster.tools/order.by.cl.R @ 2:b442996b66ae draft

Uploaded
author peter-waltman
date Wed, 27 Feb 2013 20:17:04 -0500
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/cluster.tools/order.by.cl.R	Wed Feb 27 20:17:04 2013 -0500
@@ -0,0 +1,108 @@
+#!/usr/bin/env Rscript
+argspec <- c("tab.2.cdt.R converts a data matrix to cdt format
+
+        Usage: 
+                tab.2.cdt.R -d <data.file> 
+        Optional:
+                            -o <output_file>
+                \n\n")
+args <- commandArgs(TRUE)
+if ( length( args ) == 1 && args =="--help") { 
+  write(argspec, stderr())
+  q();
+}
+
+lib.load.quiet <- function( package ) {
+   package <- as.character(substitute(package))
+   suppressPackageStartupMessages( do.call( "library", list( package=package ) ) )
+}
+lib.load.quiet(getopt)
+lib.load.quiet( gplots )
+if ( any( c( 'flashClust', 'fastcluster' ) %in% installed.packages() ) ) {
+  if ( 'flashClust' %in% installed.packages() ) {
+    lib.load.quiet( flashClust )
+  } else {
+    if ( 'fastcluster' %in% installed.packages() ) {
+      lib.load.quiet( fastcluster )
+    }
+  }
+}
+
+
+spec <- matrix( c( "data.fname",      "d", 1, "character",
+                   "class.select",    "c", 1, "character",
+                   "genes.only",      "g", 0, "logical",
+                   "within.cl.srt",   "w", 0, "logical",
+                   "output.fname",    "o", 2, "character"
+                   ),
+                nc=4,
+                byrow=TRUE
+               )
+
+
+opt <- getopt( spec=spec )
+if ( is.null( opt$output.fname ) ) opt$output.fname <- sub( "tab$|csv$", "cdt", opt$data.fname )
+if ( is.null( opt$genes.only ) ) opt$genes.only <- FALSE
+if ( is.null( opt$within.cl.srt ) ) opt$within.cl.srt <- FALSE
+
+data <- as.matrix( read.delim( opt$data.fname, row.names=1, check.names=FALSE ) )
+
+if ( opt$genes.only ) {
+  feats <- rownames( data )
+  gene.feats <- feats[ ! grepl( "complex|abstract|family", feats ) ]
+  data <- data[ gene.feats, ]
+}
+
+
+cls <- as.matrix( read.delim( opt$class.select, row.names=1 ) )
+cls <- cls[ order( cls[,1] ), , drop=FALSE ]
+
+row.cluster <- FALSE
+##  we assume this is a row-wise cluster if any rows are in the columns
+if ( any( rownames( cls ) %in% rownames( data ) ) ) {
+  row.cluster <- TRUE
+  data <- t( data )
+}
+
+if ( ! all( rownames( cls ) %in% colnames( data ) ) ) {
+
+  ovp <- rownames( cls )
+  ovp <- ovp[ ovp %in% colnames( data ) ]
+  if ( length( ovp ) > 0 ) {
+    cls <- cls[ ovp, ]
+  }
+  else {
+    stop( "no samples in cluster are found in data file\n" )
+  }
+}
+
+if ( opt$within.cl.srt ) {
+
+  cls.orig <- cls
+  cls.vect <- cls[,1]
+  cls <- sort( unique( as.numeric( cls.vect ) ) )
+
+  cls <- unlist( lapply( cls,
+                         function(i) {
+                           elts <- names( cls.vect[ cls.vect %in% i ] )
+                           sub.mat <- data[, elts ]
+                           browser()
+                           sub.dist <- dist( t( sub.mat ) )
+                           return( elts[ hclust( sub.dist )$order ] )
+                         }
+                        )
+                )
+  cls <- cls.orig[ cls, , drop=FALSE ]
+}
+
+
+## re-order and update column names
+data <- data[, rownames(cls) ]
+colnames( data ) <- paste( rownames(cls), paste( "cl", sprintf( "%02d", cls[,1] ), sep=""), sep="-" )
+
+##  now re-transpose
+if ( row.cluster ) {
+  data <- t( data )
+}
+write.table( data, opt$output.fname, sep="\t", col.names=NA, quote=FALSE )
+