comparison cluster.tools/order.by.cl.R @ 2:b442996b66ae draft

Uploaded
author peter-waltman
date Wed, 27 Feb 2013 20:17:04 -0500
parents
children
comparison
equal deleted inserted replaced
1:e25d2bece0a2 2:b442996b66ae
1 #!/usr/bin/env Rscript
2 argspec <- c("tab.2.cdt.R converts a data matrix to cdt format
3
4 Usage:
5 tab.2.cdt.R -d <data.file>
6 Optional:
7 -o <output_file>
8 \n\n")
9 args <- commandArgs(TRUE)
10 if ( length( args ) == 1 && args =="--help") {
11 write(argspec, stderr())
12 q();
13 }
14
15 lib.load.quiet <- function( package ) {
16 package <- as.character(substitute(package))
17 suppressPackageStartupMessages( do.call( "library", list( package=package ) ) )
18 }
19 lib.load.quiet(getopt)
20 lib.load.quiet( gplots )
21 if ( any( c( 'flashClust', 'fastcluster' ) %in% installed.packages() ) ) {
22 if ( 'flashClust' %in% installed.packages() ) {
23 lib.load.quiet( flashClust )
24 } else {
25 if ( 'fastcluster' %in% installed.packages() ) {
26 lib.load.quiet( fastcluster )
27 }
28 }
29 }
30
31
32 spec <- matrix( c( "data.fname", "d", 1, "character",
33 "class.select", "c", 1, "character",
34 "genes.only", "g", 0, "logical",
35 "within.cl.srt", "w", 0, "logical",
36 "output.fname", "o", 2, "character"
37 ),
38 nc=4,
39 byrow=TRUE
40 )
41
42
43 opt <- getopt( spec=spec )
44 if ( is.null( opt$output.fname ) ) opt$output.fname <- sub( "tab$|csv$", "cdt", opt$data.fname )
45 if ( is.null( opt$genes.only ) ) opt$genes.only <- FALSE
46 if ( is.null( opt$within.cl.srt ) ) opt$within.cl.srt <- FALSE
47
48 data <- as.matrix( read.delim( opt$data.fname, row.names=1, check.names=FALSE ) )
49
50 if ( opt$genes.only ) {
51 feats <- rownames( data )
52 gene.feats <- feats[ ! grepl( "complex|abstract|family", feats ) ]
53 data <- data[ gene.feats, ]
54 }
55
56
57 cls <- as.matrix( read.delim( opt$class.select, row.names=1 ) )
58 cls <- cls[ order( cls[,1] ), , drop=FALSE ]
59
60 row.cluster <- FALSE
61 ## we assume this is a row-wise cluster if any rows are in the columns
62 if ( any( rownames( cls ) %in% rownames( data ) ) ) {
63 row.cluster <- TRUE
64 data <- t( data )
65 }
66
67 if ( ! all( rownames( cls ) %in% colnames( data ) ) ) {
68
69 ovp <- rownames( cls )
70 ovp <- ovp[ ovp %in% colnames( data ) ]
71 if ( length( ovp ) > 0 ) {
72 cls <- cls[ ovp, ]
73 }
74 else {
75 stop( "no samples in cluster are found in data file\n" )
76 }
77 }
78
79 if ( opt$within.cl.srt ) {
80
81 cls.orig <- cls
82 cls.vect <- cls[,1]
83 cls <- sort( unique( as.numeric( cls.vect ) ) )
84
85 cls <- unlist( lapply( cls,
86 function(i) {
87 elts <- names( cls.vect[ cls.vect %in% i ] )
88 sub.mat <- data[, elts ]
89 browser()
90 sub.dist <- dist( t( sub.mat ) )
91 return( elts[ hclust( sub.dist )$order ] )
92 }
93 )
94 )
95 cls <- cls.orig[ cls, , drop=FALSE ]
96 }
97
98
99 ## re-order and update column names
100 data <- data[, rownames(cls) ]
101 colnames( data ) <- paste( rownames(cls), paste( "cl", sprintf( "%02d", cls[,1] ), sep=""), sep="-" )
102
103 ## now re-transpose
104 if ( row.cluster ) {
105 data <- t( data )
106 }
107 write.table( data, opt$output.fname, sep="\t", col.names=NA, quote=FALSE )
108