annotate cluster.tools/select.k.from.consensus.cluster.R @ 9:a3c03541fe6f draft default tip

Uploaded
author peter-waltman
date Mon, 11 Mar 2013 17:30:48 -0400
parents a58527c632b7
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
1 #!/usr/bin/env Rscript
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
2 # Consensus Clustering Script by Peter Waltman
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
3 # June 1, 2012
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
4 # License under Creative Commons Attribution 3.0 Unported (CC BY 3.0)
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
5 #
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
6 ##usage, options and doc goes here
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
7 argspec <- c("select.k.from.consensus.clust4er.R takes a clustering from ConsensusClusterPlus
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
8 and clinical survival data and determines the right k to use.
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
9
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
10 Usage:
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
11 select.k.from.consensus.cluster.R -r <results_file>
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
12 Optional:
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
13 -o output.png # default is stdout
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
14 -c change.min
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
15 -m metric (must be either:
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
16 rel.change,
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
17 angle,
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
18 silhouette (must specify data matrix)
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
19 survival (must specify survival data; uses minimal cummulative log-rank p-value)
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
20 -d data ## for calculating silhouette plots (plotted, but not used unless specified)
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
21 -s survival.data.fname (plotted, but not used unless specified)
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
22 -e survival.comp (can be either all, one or both - see the mode param for gen.survival.curves for explanation)
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
23 -z survival analysis script to be called (defaults to galaxy.gen.survival.curves.R)
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
24 \n\n")
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
25 args <- commandArgs(TRUE)
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
26 if ( length( args ) == 1 && args =="--help") {
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
27 write( argspec, stderr() )
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
28 q();
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
29 }
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
30
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
31 lib.load.quiet <- function( package ) {
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
32 package <- as.character(substitute(package))
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
33 suppressPackageStartupMessages( do.call( "library", list( package=package ) ) )
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
34 }
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
35 lib.load.quiet(getopt)
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
36 lib.load.quiet( amap )
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
37 lib.load.quiet( cluster )
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
38
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
39 spec <- matrix( c( "results.file", "r", 1, "character",
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
40 "change.min", "c", 2, "double",
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
41 "metric", "m", 2, "character",
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
42 "survival.data", "s", 2, "character",
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
43 "survival.comp", "e", 2, "character",
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
44 "survival.script", "z", 2, "character",
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
45 "output.format", "f", 2, "character",
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
46 "cluster.class.out", "o", 2, "character",
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
47 "output.report.dir", "p", 2, "character",
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
48 "output.report.html", "h", 2, "character"
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
49 ),
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
50 nc=4,
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
51 byrow=T
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
52 )
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
53 opt <- getopt( spec=spec )
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
54
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
55 ## default params for non-required params
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
56 if ( is.null( opt$output.report.dir ) ) { opt$output.report.dir <- "report" }
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
57 if ( is.null( opt$output.report.html ) ) { opt$output.report.html <- "report/index.html" }
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
58
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
59 if ( is.null( opt$change.min ) ) { opt$change.min <- 0.075 }
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
60 if ( is.null( opt$metric ) ) { opt$metric <- "difference" } ## alternate is angle }
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
61 if ( is.null( opt$survival.comp ) ) { opt$survival.comp <- "all" } ## alternate is one or both }
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
62 if ( is.null( opt$survival.script ) ) { opt$survival.script <- "galaxy.gen.survival.curves.R" } ## alternate is one or both }
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
63 if ( is.null( opt$cluster.class.out) ) { opt$cluster.class.out <- "select.cls.rda" }
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
64
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
65 if ( !file.exists( opt$output.report.dir ) ){
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
66 dir.create(opt$output.report.dir)
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
67 }
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
68
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
69 if ( ! opt$metric %in% c( "difference", "angle", "silhouette", "survival" ) ) {
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
70 stop( "invalid metric specified ", opt$metric, "\n" )
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
71 }
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
72
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
73
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
74 opt$change.min <- as.numeric( opt$change.min )
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
75 if ( abs( opt$change.min ) > 1 ) {
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
76 stop( "invalid angle specified:", opt$change.min, "Please specify angle in rangel [-1,0]\n" )
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
77 }
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
78 if ( opt$metric=="angle" && opt$change.min > 0 ) {
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
79 opt$change.min <- -opt$change.min
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
80 cat( "Using", opt$change.min, "for minimum angle\n" )
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
81 }
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
82
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
83 if ( opt$metric == "survival" &&
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
84 ( is.null( opt$survival.data ) ||
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
85 (! file.exists( opt$survival.data ) ) )
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
86 ) {
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
87 stop( "Must provide valid survival file in order to use survival as metric\n" )
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
88 }
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
89
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
90
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
91 ## From the ConsensusClusterPlust package - modified by phw
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
92 CDF <- function( ml,
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
93 breaks=1000,
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
94 plot.it=TRUE ){
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
95 if ( class(ml[[1]])=="matrix" && ( names( ml[1] ) =="2" ) ) {
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
96 ml <- c( c(0), ml )
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
97 }
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
98 ##plot CDF distribution
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
99 if ( plot.it ) {
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
100 plot( c(0),
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
101 xlim=c(0,1),
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
102 ylim=c(0,1),
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
103 col="white",
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
104 bg="white",
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
105 xlab="consensus index",
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
106 ylab="CDF",
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
107 main="consensus CDF",
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
108 las=2 )
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
109 }
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
110
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
111 k=length(ml)
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
112 this_colors <- rainbow(k-1)
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
113 areaK <- c()
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
114 for (i in 2:length( ml ) ) {
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
115 v <- ml[[i]]
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
116 v <- v[ lower.tri(v) ]
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
117
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
118 #empirical CDF distribution. default number of breaks is 100
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
119 h = hist(v, plot=FALSE, breaks=seq(0,1,by=1/breaks))
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
120 h$counts = cumsum(h$counts)/sum(h$counts)
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
121
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
122 #calculate area under CDF curve, by histogram method.
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
123 thisArea=0
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
124 for (bi in 1:(length(h$breaks)-1)){
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
125 thisArea = thisArea + h$counts[bi]*(h$breaks[bi+1]-h$breaks[bi]) #increment by height by width
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
126 }
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
127 areaK = c(areaK,thisArea)
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
128 if ( plot.it ) lines(h$mids,h$counts,col=this_colors[i-1],lwd=2,type='l')
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
129 }
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
130 if ( plot.it ) legend(0.8,0.5,legend=paste(rep("",k-1),seq(2,k,by=1),sep=""),fill=this_colors)
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
131
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
132 #Calc area under CDF change.
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
133 deltaK=areaK[1] #initial auc at k=2
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
134 for(i in 2:(length(areaK))){
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
135 #proportional increase relative to prior K.
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
136 deltaK = c(deltaK,( areaK[i] - areaK[i-1])/areaK[i-1])
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
137 }
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
138 return ( list( areaK=areaK, deltaK=deltaK ) )
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
139 }
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
140
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
141
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
142 load( opt$results.file )
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
143
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
144 if ( opt$metric == "silhouette" ) {
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
145 if ( ! exists( 'data' ) && ( class( data ) != "matrix" ) ) {
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
146 stop( "Must provide valid data matrix in order to use silhouette as metric\n" )
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
147 }
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
148 }
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
149 cons.matrices <- lapply( results[ 2:length(results) ], '[[', 'consensusMatrix' )
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
150 cls <- lapply( results[ 2:length(results) ], function( res ) return( res$consensusClass[ res$consensusTree$order ] ) ) ##'[[', 'consensusClass' )
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
151 names( cons.matrices ) <- names( cls ) <- 2:length( results )
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
152
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
153 png.fname <- file.path( opt$output.report.dir, "consensus.sel.criteria.CDF.png")
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
154 plot.dev <- png( png.fname,
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
155 width=11,
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
156 height=8.5,
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
157 units='in',
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
158 res=72 )
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
159 ## this will calculate the CDF, plus plot them
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
160 rel.delta <- CDF( cons.matrices, breaks=1000, plot.it=TRUE )$deltaK
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
161 dev.off()
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
162 names( rel.delta ) <- seq( from=2, by=1, length=length( rel.delta ) )
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
163 vector.of.metric.changes <- rel.delta
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
164
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
165 main.txt <- ", per Size K"
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
166 ylab.txt <- ""
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
167
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
168 main.txt <- paste( "Relative Change in Area", main.txt, sep="" )
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
169 ylab.txt <- paste( "relative change in area under CDF curve", ylab.txt, sep="" )
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
170 png.fname <- file.path( opt$output.report.dir, "consensus.sel.criteria.diff.png")
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
171
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
172 plot.dev <- png( png.fname,
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
173 width=11,
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
174 height=8.5,
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
175 units='in',
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
176 res=72 )
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
177 plot( as.numeric( names( vector.of.metric.changes ) ),
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
178 vector.of.metric.changes,
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
179 main=main.txt,
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
180 ylab=ylab.txt,
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
181 xlab="Cluster size (K)",
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
182 type='b' )
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
183 dev.off()
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
184
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
185 k.select <- vector.of.metric.changes[ vector.of.metric.changes < opt$change.min ]
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
186 if ( length( k.select ) > 1 ) {
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
187 k.select <- k.select[1]
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
188 } else {
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
189 if ( length( k.select ) == 0 ) {
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
190 k.select <- vector.of.metric.changes[ length( vector.of.metric.changes ) ]
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
191 } else {
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
192 ## do nothing
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
193 }
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
194 }
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
195 k.select <- as.numeric( names( k.select ) )
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
196 ## find the search range
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
197 k.search.range <- (k.select-2):(k.select+2)
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
198 k.search.range <- k.search.range[ k.search.range %in% as.numeric( names( vector.of.metric.changes ) ) ]
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
199 k.search.range <- vector.of.metric.changes[ as.character( k.search.range ) ]
9
a3c03541fe6f Uploaded
peter-waltman
parents: 8
diff changeset
200
a3c03541fe6f Uploaded
peter-waltman
parents: 8
diff changeset
201 if ( sum( k.search.range < 0.25 ) == 0 ) {
a3c03541fe6f Uploaded
peter-waltman
parents: 8
diff changeset
202 ## this should only happen if this is a garbage clustering
a3c03541fe6f Uploaded
peter-waltman
parents: 8
diff changeset
203 k.search.range <- k.search.range[ which.min( k.search.range ) ]
a3c03541fe6f Uploaded
peter-waltman
parents: 8
diff changeset
204 } else {
a3c03541fe6f Uploaded
peter-waltman
parents: 8
diff changeset
205 k.search.range <- k.search.range[ k.search.range < 0.25 ]
a3c03541fe6f Uploaded
peter-waltman
parents: 8
diff changeset
206 }
a3c03541fe6f Uploaded
peter-waltman
parents: 8
diff changeset
207
a3c03541fe6f Uploaded
peter-waltman
parents: 8
diff changeset
208 if ( sum( k.search.range > 0.025 ) == 0 ) {
a3c03541fe6f Uploaded
peter-waltman
parents: 8
diff changeset
209 k.search.range <- k.search.range[ which.max( k.search.range ) ]
a3c03541fe6f Uploaded
peter-waltman
parents: 8
diff changeset
210 } else {
a3c03541fe6f Uploaded
peter-waltman
parents: 8
diff changeset
211 k.search.range <- k.search.range[ k.search.range > 0.025 ]
a3c03541fe6f Uploaded
peter-waltman
parents: 8
diff changeset
212 }
0
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
213 k.search.range <- names( k.search.range )
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
214
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
215 if ( exists("data") ) {
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
216 ## what direction is the clustering in? rows or cols?
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
217 elts <- unique( names( results[[2]]$consensusClass ) )
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
218 if ( all( elts %in% colnames( data ) ) ) {
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
219 ## sample clusters
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
220 data.dist <- dist( t( data ) )
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
221 cls <- lapply( cls, function( x ) return( x[ colnames( data ) ] ) )
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
222 } else if ( all( elts %in% rownames( data ) ) ) {
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
223 data.dist <- dist( data )
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
224 cls <- lapply( cls, function( x ) return( x[ rownames( data ) ] ) )
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
225 } else {
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
226 stop( "incompatible cluster results and data matrix\n" )
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
227 }
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
228
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
229
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
230 sils <- lapply( cls,
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
231 silhouette,
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
232 dist=data.dist )
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
233 sils <- sapply( sils,
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
234 function(x) {
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
235 return( summary( x )$avg.width )
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
236 }
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
237 )
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
238
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
239 png.fname <- file.path( opt$output.report.dir, "consensus.sel.silhouette.png")
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
240
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
241 plot.dev <- png( png.fname,
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
242 width=11,
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
243 height=8.5,
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
244 units='in',
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
245 res=72 )
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
246 plot( as.numeric( names( sils ) ),
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
247 sils,
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
248 main="Average Silhouette Widths, per Cluster Size K",
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
249 ylab="average silhouette width (correlation distance)",
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
250 xlab="Cluster size (K)",
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
251 type='b' )
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
252 dev.off()
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
253
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
254 ## if the metric is silhouette, use that (but only over the k's that are on the rel-change "elbow"
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
255 if ( opt$metric == "silhouette" ) {
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
256 names( sils ) <- names( cls )
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
257
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
258 sils <- sils[ k.search.range ]
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
259 k.select <- as.numeric( names( sils[ sils == max( sils, na.rm=T ) ] ) )
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
260 }
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
261 }
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
262
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
263 if ( ! is.null( opt$survival.data ) ) {
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
264 if ( ! file.exists( opt$survival.data ) ) {
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
265 stop( 'specified clinical/survival file can not be found:', opt$survival.data, "\n" )
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
266 }
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
267
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
268 if ( opt$metric == "survival" ) {
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
269 pvals <- numeric()
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
270
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
271 for ( cl in cls ) {
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
272
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
273 cons.class.file <- tempfile( "tmp.class.rdata" )
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
274 save( file=cons.class.file, cl )
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
275
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
276 cmd.string <- opt$survival.script
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
277
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
278 ## get the consensusClass file that's associated with the k.select
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
279 cmd.string <- paste( cmd.string, "-C", cons.class.file )
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
280 cmd.string <- paste( cmd.string, "-S", opt$survival.data )
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
281 cmd.string <- paste( cmd.string, "-M", opt$survival.comp )
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
282 cmd.string <- paste( cmd.string, "-P" )
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
283 pvals <- c( pvals, as.numeric( system( cmd.string, intern=T ) ) )
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
284 }
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
285 names( pvals ) <- names( cls )
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
286
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
287
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
288 png.fname <- file.path( opt$output.report.dir, "consensus.sel.criteria.survival.png" )
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
289
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
290 plot.dev <- png( png.fname,
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
291 width=11,
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
292 height=8.5,
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
293 units='in',
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
294 res=72 )
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
295 plot( as.numeric( names( pvals ) ),
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
296 -log( pvals ),
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
297 main="Average Log-rank p-values (-log), per Cluster Size K",
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
298 ylab="Average log-rank p-values (-log)",
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
299 xlab="Cluster size (K)",
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
300 type='b' )
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
301 dev.off()
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
302
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
303
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
304 pvals <- pvals[ k.search.range ]
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
305 k.select <- as.numeric( names( pvals[ pvals == min( pvals, na.rm=T ) ] ) )
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
306 }
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
307
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
308 cmd.string <- opt$survival.script
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
309
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
310 ## get the consensusClass file that's associated with the k.select
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
311 cl <- cls[[ as.character( k.select ) ]]
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
312 cl <- cbind( names( cl ), as.integer(cl) )
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
313 colnames( cl ) <- c( "ID", "class" )
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
314 write.table( cl, opt$cluster.class.out, sep="\t", row.names=FALSE, quote=FALSE )
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
315
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
316 cmd.string <- paste( cmd.string, "-c", opt$cluster.class.out )
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
317 cmd.string <- paste( cmd.string, "-s", opt$survival.data )
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
318 cmd.string <- paste( cmd.string, "-m", opt$survival.comp )
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
319
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
320 survival.out.file <- paste( opt$output.report.dir, "survival.png", sep="/" )
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
321 cmd.string <- paste( cmd.string, "-o", survival.out.file )
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
322 output <- system( cmd.string, intern=T )
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
323 cat( output, sep="\n" )
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
324 } else {
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
325 ## get the consensusClass file that's associated with the k.select
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
326 cl <- cls[[ as.character( k.select ) ]]
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
327 cl <- cbind( names( cl ), as.integer(cl) )
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
328 colnames( cl ) <- c( "ID", "class" )
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
329 write.table( cl, opt$cluster.class.out, sep="\t", row.names=FALSE, quote=FALSE )
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
330 }
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
331
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
332 ## cl should already exist, but re-create it just in case
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
333 cl <- cls[[ as.character( k.select ) ]]
8
a58527c632b7 Uploaded
peter-waltman
parents: 0
diff changeset
334 treecl.res <- results[[ k.select ]]$consensusTree
a58527c632b7 Uploaded
peter-waltman
parents: 0
diff changeset
335 select.result <- results[[ k.select ]]
0
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
336
8
a58527c632b7 Uploaded
peter-waltman
parents: 0
diff changeset
337 if ( length(cl) == ncol(data) ) {
a58527c632b7 Uploaded
peter-waltman
parents: 0
diff changeset
338 names( cl ) <- treecl.res$labels <- select.result$consensusTree$labels <- colnames(data)
a58527c632b7 Uploaded
peter-waltman
parents: 0
diff changeset
339 } else if ( length(cl) == nrow(data) ) {
a58527c632b7 Uploaded
peter-waltman
parents: 0
diff changeset
340 names( cl ) <- treecl.res$labels <- select.result$consensusTree$labels <- rownames(data)
a58527c632b7 Uploaded
peter-waltman
parents: 0
diff changeset
341 } else {
a58527c632b7 Uploaded
peter-waltman
parents: 0
diff changeset
342 stop( "Number of clustered elements not equal to either number of rows or columns of data matrix\n" )
a58527c632b7 Uploaded
peter-waltman
parents: 0
diff changeset
343 }
a58527c632b7 Uploaded
peter-waltman
parents: 0
diff changeset
344
0
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
345 ## over-write the tabular version of the opt$cluster.class.out with an RData file
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
346 save( file=opt$cluster.class.out, treecl.res, cl, select.result, data )
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
347
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
348 report_str = paste( "k selected by consensus clustering and user-specified metric, ", opt$metric, ", is ", k.select, "\n", sep="" )
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
349
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
350 pngs = list.files(path=opt$output.report.dir, patt="png")
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
351 html.out <- paste( "<html>", report_str,
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
352 paste( paste( paste( "<div><img src=\'", pngs, sep="" ), "\'/></div>", sep="" ), collapse=""),
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
353 "</html>" )
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
354 cat( html.out, file=opt$output.report.html )
0decf3fd54bc Uploaded
peter-waltman
parents:
diff changeset
355