Mercurial > repos > peter-waltman > ucsc_cluster_tools2

#!/usr/bin/env Rscript
argspec <- c("tab.2.cdt.R converts a data matrix to cdt format

        Usage:
                tab.2.cdt.R -d <data.file>
        Optional:
                            -o <output_file>
                \n\n")
args <- commandArgs(TRUE)
if ( length( args ) == 1 && args =="--help") {
  write(argspec, stderr())
  q();
}

lib.load.quiet <- function( package ) {
   package <- as.character(substitute(package))
   suppressPackageStartupMessages( do.call( "library", list( package=package ) ) )
}
lib.load.quiet(getopt)
lib.load.quiet( gplots )
if ( any( c( 'flashClust', 'fastcluster' ) %in% installed.packages() ) ) {
  if ( 'flashClust' %in% installed.packages() ) {
    lib.load.quiet( flashClust )
  } else {
    if ( 'fastcluster' %in% installed.packages() ) {
      lib.load.quiet( fastcluster )
    }
  }
}


spec <- matrix( c( "data.fname",      "d", 1, "character",
                   "class.select",    "c", 1, "character",
                   "genes.only",      "g", 0, "logical",
                   "within.cl.srt",   "w", 0, "logical",
                   "output.fname",    "o", 2, "character"
                   ),
                nc=4,
                byrow=TRUE
               )


opt <- getopt( spec=spec )
if ( is.null( opt$output.fname ) ) opt$output.fname <- sub( "tab$|csv$", "cdt", opt$data.fname )
if ( is.null( opt$genes.only ) ) opt$genes.only <- FALSE
if ( is.null( opt$within.cl.srt ) ) opt$within.cl.srt <- FALSE

data <- as.matrix( read.delim( opt$data.fname, row.names=1, check.names=FALSE ) )

if ( opt$genes.only ) {
  feats <- rownames( data )
  gene.feats <- feats[ ! grepl( "complex|abstract|family", feats ) ]
  data <- data[ gene.feats, ]
}


cls <- as.matrix( read.delim( opt$class.select, row.names=1 ) )
cls <- cls[ order( cls[,1] ), , drop=FALSE ]

row.cluster <- FALSE
##  we assume this is a row-wise cluster if any rows are in the columns
if ( any( rownames( cls ) %in% rownames( data ) ) ) {
  row.cluster <- TRUE
  data <- t( data )
}

if ( ! all( rownames( cls ) %in% colnames( data ) ) ) {

  ovp <- rownames( cls )
  ovp <- ovp[ ovp %in% colnames( data ) ]
  if ( length( ovp ) > 0 ) {
    cls <- cls[ ovp, ]
  }
  else {
    stop( "no samples in cluster are found in data file\n" )
  }
}

if ( opt$within.cl.srt ) {

  cls.orig <- cls
  cls.vect <- cls[,1]
  cls <- sort( unique( as.numeric( cls.vect ) ) )

  cls <- unlist( lapply( cls,
                         function(i) {
                           elts <- names( cls.vect[ cls.vect %in% i ] )
                           sub.mat <- data[, elts ]
                           browser()
                           sub.dist <- dist( t( sub.mat ) )
                           return( elts[ hclust( sub.dist )$order ] )
                         }
                        )
                )
  cls <- cls.orig[ cls, , drop=FALSE ]
}


## re-order and update column names
data <- data[, rownames(cls) ]
colnames( data ) <- paste( rownames(cls), paste( "cl", sprintf( "%02d", cls[,1] ), sep=""), sep="-" )

##  now re-transpose
if ( row.cluster ) {
  data <- t( data )
}
write.table( data, opt$output.fname, sep="\t", col.names=NA, quote=FALSE )
author	peter-waltman
date	Thu, 28 Feb 2013 01:45:39 -0500
parents
children