0
|
1 #!/usr/bin/env Rscript
|
|
2 argspec <- c("tab.2.cdt.R converts a data matrix to cdt format
|
|
3
|
|
4 Usage:
|
|
5 tab.2.cdt.R -d <data.file>
|
|
6 Optional:
|
|
7 -o <output_file>
|
|
8 \n\n")
|
|
9 args <- commandArgs(TRUE)
|
|
10 if ( length( args ) == 1 && args =="--help") {
|
|
11 write(argspec, stderr())
|
|
12 q();
|
|
13 }
|
|
14
|
|
15 lib.load.quiet <- function( package ) {
|
|
16 package <- as.character(substitute(package))
|
|
17 suppressPackageStartupMessages( do.call( "library", list( package=package ) ) )
|
|
18 }
|
|
19 lib.load.quiet(getopt)
|
|
20 lib.load.quiet( gplots )
|
|
21 if ( any( c( 'flashClust', 'fastcluster' ) %in% installed.packages() ) ) {
|
|
22 if ( 'flashClust' %in% installed.packages() ) {
|
|
23 lib.load.quiet( flashClust )
|
|
24 } else {
|
|
25 if ( 'fastcluster' %in% installed.packages() ) {
|
|
26 lib.load.quiet( fastcluster )
|
|
27 }
|
|
28 }
|
|
29 }
|
|
30
|
|
31
|
|
32 spec <- matrix( c( "data.fname", "d", 1, "character",
|
|
33 "class.select", "c", 1, "character",
|
|
34 "genes.only", "g", 0, "logical",
|
|
35 "within.cl.srt", "w", 0, "logical",
|
|
36 "output.fname", "o", 2, "character"
|
|
37 ),
|
|
38 nc=4,
|
|
39 byrow=TRUE
|
|
40 )
|
|
41
|
|
42
|
|
43 opt <- getopt( spec=spec )
|
|
44 if ( is.null( opt$output.fname ) ) opt$output.fname <- sub( "tab$|csv$", "cdt", opt$data.fname )
|
|
45 if ( is.null( opt$genes.only ) ) opt$genes.only <- FALSE
|
|
46 if ( is.null( opt$within.cl.srt ) ) opt$within.cl.srt <- FALSE
|
|
47
|
|
48 data <- as.matrix( read.delim( opt$data.fname, row.names=1, check.names=FALSE ) )
|
|
49
|
|
50 if ( opt$genes.only ) {
|
|
51 feats <- rownames( data )
|
|
52 gene.feats <- feats[ ! grepl( "complex|abstract|family", feats ) ]
|
|
53 data <- data[ gene.feats, ]
|
|
54 }
|
|
55
|
|
56
|
|
57 cls <- as.matrix( read.delim( opt$class.select, row.names=1 ) )
|
|
58 cls <- cls[ order( cls[,1] ), , drop=FALSE ]
|
|
59
|
|
60 row.cluster <- FALSE
|
|
61 ## we assume this is a row-wise cluster if any rows are in the columns
|
|
62 if ( any( rownames( cls ) %in% rownames( data ) ) ) {
|
|
63 row.cluster <- TRUE
|
|
64 data <- t( data )
|
|
65 }
|
|
66
|
|
67 if ( ! all( rownames( cls ) %in% colnames( data ) ) ) {
|
|
68
|
|
69 ovp <- rownames( cls )
|
|
70 ovp <- ovp[ ovp %in% colnames( data ) ]
|
|
71 if ( length( ovp ) > 0 ) {
|
|
72 cls <- cls[ ovp, ]
|
|
73 }
|
|
74 else {
|
|
75 stop( "no samples in cluster are found in data file\n" )
|
|
76 }
|
|
77 }
|
|
78
|
|
79 if ( opt$within.cl.srt ) {
|
|
80
|
|
81 cls.orig <- cls
|
|
82 cls.vect <- cls[,1]
|
|
83 cls <- sort( unique( as.numeric( cls.vect ) ) )
|
|
84
|
|
85 cls <- unlist( lapply( cls,
|
|
86 function(i) {
|
|
87 elts <- names( cls.vect[ cls.vect %in% i ] )
|
|
88 sub.mat <- data[, elts ]
|
|
89 browser()
|
|
90 sub.dist <- dist( t( sub.mat ) )
|
|
91 return( elts[ hclust( sub.dist )$order ] )
|
|
92 }
|
|
93 )
|
|
94 )
|
|
95 cls <- cls.orig[ cls, , drop=FALSE ]
|
|
96 }
|
|
97
|
|
98
|
|
99 ## re-order and update column names
|
|
100 data <- data[, rownames(cls) ]
|
|
101 colnames( data ) <- paste( rownames(cls), paste( "cl", sprintf( "%02d", cls[,1] ), sep=""), sep="-" )
|
|
102
|
|
103 ## now re-transpose
|
|
104 if ( row.cluster ) {
|
|
105 data <- t( data )
|
|
106 }
|
|
107 write.table( data, opt$output.fname, sep="\t", col.names=NA, quote=FALSE )
|
|
108
|