comparison cluster.tools/gen.survival.curves.R @ 2:b442996b66ae draft

Uploaded
author peter-waltman
date Wed, 27 Feb 2013 20:17:04 -0500
parents
children
comparison
equal deleted inserted replaced
1:e25d2bece0a2 2:b442996b66ae
1 #!/usr/bin/env Rscript
2 ##
3 ## Calculates the log-rank test for a given clustering, in the output format from ConsensusClusterPlus
4 ##
5 ## Input (required):
6 ## - consensus cluster file (consensusClass.csv file)
7 ## - survival data
8 ## Input (optional):
9 ## Output: a KM plot, with the most significant p-value. Output to stdout can be captured by re-direction
10 ##
11 ## Uses: survival library
12 ## Date: August 21, 2012
13 ## Author: Peter Waltman
14 ##
15
16 ##usage, options and doc goes here
17 argspec <- c("gen.survival.curves.R takes a clustering from ConsensusClusterPlus and clinical survival data
18 and generates a KM-plot, along with the log-rank p-values
19
20 Usage:
21 gen.survival.curves.R -c <cluster.file> -s <clinical.file>
22 Options:
23 -o <output file> (postscript)
24 -m <mode> (all, one, both)
25 \"all\" - perform all-vs-all log-rank test
26 \"one\" - perform one-vs-others log-rank test (returns min)
27 \"both\" - perform both \"all\" and \"one\" tests
28 -t <title>
29 -p <pval.only> ( only return the p-value for this given mode - no plotting at all (to screen or postscript))
30 -v <verbose>
31 ")
32
33 lib.load.quiet <- function( package ) {
34 package <- as.character(substitute(package))
35 suppressPackageStartupMessages( do.call( "library", list( package=package ) ) )
36 }
37 lib.load.quiet(getopt)
38 lib.load.quiet( survival )
39
40
41 args <- commandArgs(TRUE)
42 if ( length( args ) == 1 && args =="--help") {
43 write(argspec, stderr())
44 q();
45 }
46
47 spec <- matrix( c( "cluster.fname", "C", 1, "character",
48 "survival.fname", "S", 1, "character",
49 "mode", "M", 2, "character",
50 "title", "T", 2, "character",
51 "image.format", "I", 2, "character",
52 "output.fname", "O", 2, "character",
53 "pval.only", "P", 0, "logical",
54 "verbose", "V", 0, "logical"
55 ),
56 ncol=4,
57 byrow=TRUE
58 )
59 opt <- getopt( spec=spec )
60
61
62 #set some reasonable defaults for the options that are needed,
63 #but were not specified.
64 if ( is.null(opt$mode ) ) {
65 opt$mode <- "all"
66 } else {
67 if ( ! opt$mode %in% c( 'all', 'one', 'both' ) ) {
68 stop( "invalid mode specified,' -m", opt$mode, "'. must be either {all, one, both}\n" )
69 }
70 }
71 if ( is.null( opt$title ) ) {
72 opt$title <- opt$cluster.fname
73 opt$title <- strsplit( opt$title, "\\/" )[[1]]
74 opt$title <- opt$title[ length( opt$title ) ]
75 }
76 if ( is.null( opt$image.format ) ){
77 opt$image.format <- "png"
78 } else {
79 if ( ! opt$image.format %in% c( "pdf", "png", "none" ) ) stop( 'invalid image format specified\n' )
80 }
81 if ( is.null(opt$output.fname ) ) { opt$output.fname <- paste( opt$mode, "survival.curve", opt$image.format, sep="." ) }
82 if ( is.null(opt$cluster.header ) ) { opt$cluster.header = FALSE }
83 if ( is.null(opt$pval.only ) ) { opt$pval.only = FALSE }
84 if ( is.null(opt$verbose ) ) { opt$verbose = FALSE }
85
86 ##print some progress messages to stderr, if requested.
87 if ( opt$verbose ) { write("writing...",stderr()); }
88
89 load( opt$cluster.fname )
90 cluster.data <- cbind( names( cl ), as.numeric( cl ) )
91 colnames( cluster.data ) <- c( "id", "group_num" )
92 rownames( cluster.data ) <- names( cl )
93
94 survival.data <- read.delim( opt$survival.fname, as.is=TRUE, row.names=1 )
95 survival.data <- cbind( rownames( survival.data ), survival.data ) ## add in the ids, so we can merge on them
96 if ( length( colnames( survival.data ) ) == 3 ) {
97 ## we have to left-shift the current colanmes to drop the 1st one
98 ## b/c cbind will add one for the column we just added
99 colnames( survival.data ) <- c( "id", colnames( survival.data )[-1] )
100 }
101 if ( length( colnames( survival.data ) ) == 2 ) {
102 ## added just in case there's a change to cbind as R is prone to doing
103 colnames( survival.data ) <- c( "id", colnames( survival.data ) )
104 }
105 survival.data$id <- as.character( survival.data$id )
106
107
108 ## Now, filter so we only contain the same samples
109 n.clust.data.samps <- nrow( cluster.data )
110 n.surv.data.samps <- nrow( survival.data )
111 if ( n.clust.data.samps > n.surv.data.samps ) {
112 ovp.samples <- rownames( cluster.data )
113 ovp.samples <- ovp.samples[ ovp.samples %in% survival.data$id ]
114 } else {
115 ovp.samples <- survival.data$id
116 ovp.samples <- ovp.samples[ ovp.samples %in% rownames( cluster.data ) ]
117 }
118
119 cluster.data <- cluster.data[ ovp.samples, , drop=FALSE]
120 survival.data <- survival.data[ ovp.samples, ]
121 survival.data <- merge( survival.data, cluster.data )
122
123
124 calc.all.pval <- function( survival.data ) {
125 survdiff( Surv( time, status )~group_num, data=survival.data )
126 surv.res <- survdiff( Surv( time, status )~group_num, data=survival.data )
127 pval <- surv.res$chisq
128 df <- length( surv.res$n ) - 1
129 pval <- pchisq( pval, df=df, lower=F )
130 return( pval )
131 }
132
133 calc.one.v.others.pval <- function( survival.data ) {
134 grps <- sort( unique( as.numeric( survival.data$group_num ) ) )
135
136 retval <- numeric()
137 for ( g in grps ) {
138 one.v.all.survival.data <- survival.data
139 tmp <- as.numeric( one.v.all.survival.data$group_num )
140 tmp[ ! tmp %in% g ] <- -1
141 tmp[ tmp %in% g ] <- 1
142 tmp[ tmp %in% -1 ] <- 2
143 one.v.all.survival.data$group_num <- tmp
144 surv.res <- survdiff( Surv( time, status )~group_num, data=one.v.all.survival.data )
145 pval <- surv.res$chisq
146 df <- length( surv.res$n ) - 1
147 pval <- pchisq( pval, df=df, lower=F )
148 retval <- c( retval, pval )
149 }
150 names( retval ) <- grps
151 return( retval )
152 }
153
154
155 if ( opt$mode == "all" ) {
156
157 pval <- calc.all.pval( survival.data )
158 log.rank <- paste( "Log Rank p-value:", sprintf( "%1.2e",pval ) )
159 opt$title <- paste( opt$title, log.rank, sep="\n" )
160 } else {
161 if ( opt$mode == "one" ) {
162
163 pvals <- calc.one.v.others.pval( survival.data )
164 min.p <- min( pvals, na.rm=T )
165 if ( length( min.p ) == 0 ) {
166 stop( 'no valid p-value returned from the one-v-others test\n' )
167 }
168 cluster.num <- names( pvals )[ pvals == min.p ]
169 pval <- pvals[ cluster.num ]
170 log.rank <- paste( "Log Rank p-value for cluster", cluster.num,"is:", sprintf( "%1.2e",pval ) )
171 opt$title <- paste( opt$title, log.rank, sep="\n" )
172 } else {
173 if ( opt$mode== "both" ) {
174 ## add the all-v-all p-value
175 bak <- pval <- calc.all.pval( survival.data )
176 log.rank <- paste( "Log Rank p-value:", sprintf( "%1.2e",pval ) )
177 opt$title <- paste( opt$title, log.rank, sep="\n" )
178
179 ## now add the one-v-all p-value
180 pvals <- calc.one.v.others.pval( survival.data )
181 min.p <- min( pvals, na.rm=T )
182 if ( length( min.p ) == 0 ) {
183 stop( 'no valid p-value returned from the one-v-others test\n' )
184 }
185 cluster.num <- names( pvals )[ pvals == min.p ]
186 pval <- pvals[ cluster.num ]
187 log.rank <- paste( "Log Rank p-value for cluster", cluster.num,"is:", sprintf( "%1.2e",pval ) )
188 opt$title <- paste( opt$title, log.rank, sep="\n" )
189
190 if ( opt$pval.only ) {
191 pval <- min( c( bak, pval ), na.rm=T )
192 }
193 }
194 else {
195 stop( "invalid mode specified, mode = ", opt$mode, "\n" )
196 }
197 }
198 }
199
200 if ( opt$pval.only ) {
201 cat( paste(pval, "\n", sep="" ), file=stdout() )
202 }
203
204
205 if ( ! opt$pval.only ) {
206 ngrps <- length( unique( survival.data$group_num ) )
207 col.map <- rainbow( ngrps )
208
209
210
211 ##postscript( opt$output.fname, horizontal=T, paper='letter' )
212 if ( opt$image.format == 'png' ) {
213 plot.dev <- png( opt$output.fname,
214 width=11,
215 height=8.5,
216 units='in',
217 res=72 )
218 } else if ( opt$image.format == 'pdf' ) {
219 pdf( opt$output.fname,
220 paper="letter" )
221 } else if ( opt$image.format == 'none' ) {
222 ## do nothing - this allows other scripts to call this and hopefully plot into them
223
224 }
225
226 plot( survfit( Surv( time, status )~group_num, data=survival.data ),
227 main = opt$title,
228 ##lty = 1:ngrps,
229 lty=1,
230 col=col.map,
231 ylab = "Probability",
232 xlab = "Survival Time in Days",
233 )
234
235 ## set the legend.labels if they're still not set yet
236 if( ! exists( "legend.labels" ) ) {
237 grp.counts <- table( as.factor( survival.data[, "group_num" ] ) )
238 legend.labels <- paste( "Cluster", 1:ngrps, paste( "(n=", as.integer(grp.counts), ")", sep="" ) )
239 }
240
241 legend( "topright",
242 lty=1,
243 col=col.map,
244 bty = "n",
245 legend=legend.labels
246 )
247
248 if( opt$image.format != "none" ) dev.off()
249 }