annotate cluster.tools/order.by.cl.R @ 0:0decf3fd54bc draft

Uploaded
author peter-waltman
date Thu, 28 Feb 2013 01:45:39 -0500
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
1 #!/usr/bin/env Rscript
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
2 argspec <- c("tab.2.cdt.R converts a data matrix to cdt format
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
3
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
4 Usage:
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
5 tab.2.cdt.R -d <data.file>
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
6 Optional:
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
7 -o <output_file>
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
8 \n\n")
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
9 args <- commandArgs(TRUE)
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
10 if ( length( args ) == 1 && args =="--help") {
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
11 write(argspec, stderr())
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
12 q();
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
13 }
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
14
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
15 lib.load.quiet <- function( package ) {
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
16 package <- as.character(substitute(package))
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
17 suppressPackageStartupMessages( do.call( "library", list( package=package ) ) )
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
18 }
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
19 lib.load.quiet(getopt)
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
20 lib.load.quiet( gplots )
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
21 if ( any( c( 'flashClust', 'fastcluster' ) %in% installed.packages() ) ) {
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
22 if ( 'flashClust' %in% installed.packages() ) {
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
23 lib.load.quiet( flashClust )
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
24 } else {
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
25 if ( 'fastcluster' %in% installed.packages() ) {
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
26 lib.load.quiet( fastcluster )
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
27 }
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
28 }
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
29 }
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
30
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
31
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
32 spec <- matrix( c( "data.fname", "d", 1, "character",
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
33 "class.select", "c", 1, "character",
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
34 "genes.only", "g", 0, "logical",
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
35 "within.cl.srt", "w", 0, "logical",
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
36 "output.fname", "o", 2, "character"
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
37 ),
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
38 nc=4,
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
39 byrow=TRUE
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
40 )
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
41
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
42
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
43 opt <- getopt( spec=spec )
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
44 if ( is.null( opt$output.fname ) ) opt$output.fname <- sub( "tab$|csv$", "cdt", opt$data.fname )
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
45 if ( is.null( opt$genes.only ) ) opt$genes.only <- FALSE
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
46 if ( is.null( opt$within.cl.srt ) ) opt$within.cl.srt <- FALSE
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
47
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
48 data <- as.matrix( read.delim( opt$data.fname, row.names=1, check.names=FALSE ) )
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
49
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
50 if ( opt$genes.only ) {
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
51 feats <- rownames( data )
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
52 gene.feats <- feats[ ! grepl( "complex|abstract|family", feats ) ]
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
53 data <- data[ gene.feats, ]
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
54 }
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
55
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
56
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
57 cls <- as.matrix( read.delim( opt$class.select, row.names=1 ) )
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
58 cls <- cls[ order( cls[,1] ), , drop=FALSE ]
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
59
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
60 row.cluster <- FALSE
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
61 ## we assume this is a row-wise cluster if any rows are in the columns
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
62 if ( any( rownames( cls ) %in% rownames( data ) ) ) {
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
63 row.cluster <- TRUE
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
64 data <- t( data )
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
65 }
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
66
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
67 if ( ! all( rownames( cls ) %in% colnames( data ) ) ) {
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
68
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
69 ovp <- rownames( cls )
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
70 ovp <- ovp[ ovp %in% colnames( data ) ]
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
71 if ( length( ovp ) > 0 ) {
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
72 cls <- cls[ ovp, ]
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
73 }
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
74 else {
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
75 stop( "no samples in cluster are found in data file\n" )
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
76 }
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
77 }
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
78
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
79 if ( opt$within.cl.srt ) {
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
80
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
81 cls.orig <- cls
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
82 cls.vect <- cls[,1]
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
83 cls <- sort( unique( as.numeric( cls.vect ) ) )
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
84
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
85 cls <- unlist( lapply( cls,
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
86 function(i) {
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
87 elts <- names( cls.vect[ cls.vect %in% i ] )
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
88 sub.mat <- data[, elts ]
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
89 browser()
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
90 sub.dist <- dist( t( sub.mat ) )
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
91 return( elts[ hclust( sub.dist )$order ] )
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
92 }
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
93 )
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
94 )
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
95 cls <- cls.orig[ cls, , drop=FALSE ]
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
96 }
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
97
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
98
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
99 ## re-order and update column names
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
100 data <- data[, rownames(cls) ]
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
101 colnames( data ) <- paste( rownames(cls), paste( "cl", sprintf( "%02d", cls[,1] ), sep=""), sep="-" )
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
102
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
103 ## now re-transpose
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
104 if ( row.cluster ) {
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
105 data <- t( data )
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
106 }
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
107 write.table( data, opt$output.fname, sep="\t", col.names=NA, quote=FALSE )
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
108