ucsc_cluster_tools: cluster.tools/order.by.cl.R comparison

comparison cluster.tools/order.by.cl.R @ 2:b442996b66ae draft

Uploaded

author	peter-waltman
date	Wed, 27 Feb 2013 20:17:04 -0500
parents
children

comparison

equal deleted inserted replaced

-:e25d2bece0a2
+:b442996b66ae
+#!/usr/bin/env Rscript
+argspec <- c("tab.2.cdt.R converts a data matrix to cdt format
+Usage:
+tab.2.cdt.R -d <data.file>
+Optional:
+-o <output_file>
+\n\n")
+args <- commandArgs(TRUE)
+if ( length( args ) == 1 && args =="--help") {
+write(argspec, stderr())
+q();
+}
+lib.load.quiet <- function( package ) {
+package <- as.character(substitute(package))
+suppressPackageStartupMessages( do.call( "library", list( package=package ) ) )
+}
+lib.load.quiet(getopt)
+lib.load.quiet( gplots )
+if ( any( c( 'flashClust', 'fastcluster' ) %in% installed.packages() ) ) {
+if ( 'flashClust' %in% installed.packages() ) {
+lib.load.quiet( flashClust )
+} else {
+if ( 'fastcluster' %in% installed.packages() ) {
+lib.load.quiet( fastcluster )
+}
+}
+}
+spec <- matrix( c( "data.fname",      "d", 1, "character",
+"class.select",    "c", 1, "character",
+"genes.only",      "g", 0, "logical",
+"within.cl.srt",   "w", 0, "logical",
+"output.fname",    "o", 2, "character"
+),
+nc=4,
+byrow=TRUE
+)
+opt <- getopt( spec=spec )
+if ( is.null( opt$output.fname ) ) opt$output.fname <- sub( "tab$|csv$", "cdt", opt$data.fname )
+if ( is.null( opt$genes.only ) ) opt$genes.only <- FALSE
+if ( is.null( opt$within.cl.srt ) ) opt$within.cl.srt <- FALSE
+data <- as.matrix( read.delim( opt$data.fname, row.names=1, check.names=FALSE ) )
+if ( opt$genes.only ) {
+feats <- rownames( data )
+gene.feats <- feats[ ! grepl( "complex|abstract|family", feats ) ]
+data <- data[ gene.feats, ]
+}
+cls <- as.matrix( read.delim( opt$class.select, row.names=1 ) )
+cls <- cls[ order( cls[,1] ), , drop=FALSE ]
+row.cluster <- FALSE
+##  we assume this is a row-wise cluster if any rows are in the columns
+if ( any( rownames( cls ) %in% rownames( data ) ) ) {
+row.cluster <- TRUE
+data <- t( data )
+}
+if ( ! all( rownames( cls ) %in% colnames( data ) ) ) {
+ovp <- rownames( cls )
+ovp <- ovp[ ovp %in% colnames( data ) ]
+if ( length( ovp ) > 0 ) {
+cls <- cls[ ovp, ]
+}
+else {
+stop( "no samples in cluster are found in data file\n" )
+}
+}
+if ( opt$within.cl.srt ) {
+cls.orig <- cls
+cls.vect <- cls[,1]
+cls <- sort( unique( as.numeric( cls.vect ) ) )
+cls <- unlist( lapply( cls,
+function(i) {
+elts <- names( cls.vect[ cls.vect %in% i ] )
+sub.mat <- data[, elts ]
+browser()
+sub.dist <- dist( t( sub.mat ) )
+return( elts[ hclust( sub.dist )$order ] )
+}
+)
+)
+cls <- cls.orig[ cls, , drop=FALSE ]
+}
+## re-order and update column names
+data <- data[, rownames(cls) ]
+colnames( data ) <- paste( rownames(cls), paste( "cl", sprintf( "%02d", cls[,1] ), sep=""), sep="-" )
+##  now re-transpose
+if ( row.cluster ) {
+data <- t( data )
+}
+write.table( data, opt$output.fname, sep="\t", col.names=NA, quote=FALSE )

Mercurial > repos > peter-waltman > ucsc_cluster_tools

comparison cluster.tools/order.by.cl.R @ 2:b442996b66ae draft