0
|
1 #!/usr/bin/env Rscript
|
|
2 ##
|
|
3 ## Calculates the log-rank test for a given clustering, in the output format from ConsensusClusterPlus
|
|
4 ##
|
|
5 ## Input (required):
|
|
6 ## - consensus cluster file (consensusClass.csv file)
|
|
7 ## - survival data
|
|
8 ## Input (optional):
|
|
9 ## Output: a KM plot, with the most significant p-value. Output to stdout can be captured by re-direction
|
|
10 ##
|
|
11 ## Uses: survival library
|
|
12 ## Date: August 21, 2012
|
|
13 ## Author: Peter Waltman
|
|
14 ##
|
|
15
|
|
16 ##usage, options and doc goes here
|
|
17 argspec <- c("gen.survival.curves.R takes a clustering from ConsensusClusterPlus and clinical survival data
|
|
18 and generates a KM-plot, along with the log-rank p-values
|
|
19
|
|
20 Usage:
|
|
21 gen.survival.curves.R -c <cluster.file> -s <clinical.file>
|
|
22 Options:
|
|
23 -o <output file> (postscript)
|
|
24 -m <mode> (all, one, both)
|
|
25 \"all\" - perform all-vs-all log-rank test
|
|
26 \"one\" - perform one-vs-others log-rank test (returns min)
|
|
27 \"both\" - perform both \"all\" and \"one\" tests
|
|
28 -t <title>
|
|
29 -p <pval.only> ( only return the p-value for this given mode - no plotting at all (to screen or postscript))
|
|
30 -v <verbose>
|
|
31 ")
|
|
32
|
|
33 lib.load.quiet <- function( package ) {
|
|
34 package <- as.character(substitute(package))
|
|
35 suppressPackageStartupMessages( do.call( "library", list( package=package ) ) )
|
|
36 }
|
|
37 lib.load.quiet(getopt)
|
|
38 lib.load.quiet( survival )
|
|
39
|
|
40
|
|
41 args <- commandArgs(TRUE)
|
|
42 if ( length( args ) == 1 && args =="--help") {
|
|
43 write(argspec, stderr())
|
|
44 q();
|
|
45 }
|
|
46
|
|
47 spec <- matrix( c( "cluster.fname", "C", 1, "character",
|
|
48 "survival.fname", "S", 1, "character",
|
|
49 "mode", "M", 2, "character",
|
|
50 "title", "T", 2, "character",
|
|
51 "myplots.rda", "R", 2, "character",
|
|
52 "image.format", "I", 2, "character",
|
|
53 "output.fname", "O", 2, "character",
|
|
54 "pval.only", "P", 0, "logical",
|
|
55 "verbose", "V", 0, "logical"
|
|
56 ),
|
|
57 ncol=4,
|
|
58 byrow=TRUE
|
|
59 )
|
|
60 opt <- getopt( spec=spec )
|
|
61
|
|
62
|
|
63 #set some reasonable defaults for the options that are needed,
|
|
64 #but were not specified.
|
|
65 if ( is.null(opt$mode ) ) {
|
|
66 opt$mode <- "all"
|
|
67 } else {
|
|
68 if ( ! opt$mode %in% c( 'all', 'one', 'both' ) ) {
|
|
69 stop( "invalid mode specified,' -m", opt$mode, "'. must be either {all, one, both}\n" )
|
|
70 }
|
|
71 }
|
|
72 if ( is.null( opt$title ) ) {
|
|
73 opt$title <- opt$cluster.fname
|
|
74 opt$title <- strsplit( opt$title, "\\/" )[[1]]
|
|
75 opt$title <- opt$title[ length( opt$title ) ]
|
|
76 }
|
|
77 if ( is.null( opt$image.format ) ){
|
|
78 opt$image.format <- "png"
|
|
79 } else {
|
|
80 if ( ! opt$image.format %in% c( "pdf", "png", "none" ) ) stop( 'invalid image format specified\n' )
|
|
81 }
|
|
82 if ( is.null(opt$output.fname ) ) { opt$output.fname <- paste( opt$mode, "survival.curve", opt$image.format, sep="." ) }
|
|
83 if ( is.null(opt$cluster.header ) ) { opt$cluster.header = FALSE }
|
|
84 if ( is.null(opt$pval.only ) ) { opt$pval.only = FALSE }
|
|
85 if ( is.null(opt$verbose ) ) { opt$verbose = FALSE }
|
|
86
|
|
87 ##print some progress messages to stderr, if requested.
|
|
88 if ( opt$verbose ) { write("writing...",stderr()); }
|
|
89
|
|
90 load( opt$cluster.fname )
|
|
91 cluster.data <- cbind( names( cl ), as.numeric( cl ) )
|
|
92 colnames( cluster.data ) <- c( "id", "group_num" )
|
|
93 rownames( cluster.data ) <- names( cl )
|
|
94
|
|
95 survival.data <- read.delim( opt$survival.fname, as.is=TRUE, row.names=1 )
|
|
96 survival.data <- cbind( rownames( survival.data ), survival.data ) ## add in the ids, so we can merge on them
|
|
97 if ( length( colnames( survival.data ) ) == 3 ) {
|
|
98 ## we have to left-shift the current colanmes to drop the 1st one
|
|
99 ## b/c cbind will add one for the column we just added
|
|
100 colnames( survival.data ) <- c( "id", colnames( survival.data )[-1] )
|
|
101 }
|
|
102 if ( length( colnames( survival.data ) ) == 2 ) {
|
|
103 ## added just in case there's a change to cbind as R is prone to doing
|
|
104 colnames( survival.data ) <- c( "id", colnames( survival.data ) )
|
|
105 }
|
|
106 survival.data$id <- as.character( survival.data$id )
|
|
107
|
|
108
|
|
109 ## Now, filter so we only contain the same samples
|
|
110 n.clust.data.samps <- nrow( cluster.data )
|
|
111 n.surv.data.samps <- nrow( survival.data )
|
|
112 if ( n.clust.data.samps > n.surv.data.samps ) {
|
|
113 ovp.samples <- rownames( cluster.data )
|
|
114 ovp.samples <- ovp.samples[ ovp.samples %in% survival.data$id ]
|
|
115 } else {
|
|
116 ovp.samples <- survival.data$id
|
|
117 ovp.samples <- ovp.samples[ ovp.samples %in% rownames( cluster.data ) ]
|
|
118 }
|
|
119
|
|
120 cluster.data <- cluster.data[ ovp.samples, , drop=FALSE]
|
|
121 survival.data <- survival.data[ ovp.samples, ]
|
|
122 survival.data <- merge( survival.data, cluster.data )
|
|
123
|
|
124
|
|
125 calc.all.pval <- function( survival.data ) {
|
|
126 survdiff( Surv( time, status )~group_num, data=survival.data )
|
|
127 surv.res <- survdiff( Surv( time, status )~group_num, data=survival.data )
|
|
128 pval <- surv.res$chisq
|
|
129 df <- length( surv.res$n ) - 1
|
|
130 pval <- pchisq( pval, df=df, lower=F )
|
|
131 return( pval )
|
|
132 }
|
|
133
|
|
134 calc.one.v.others.pval <- function( survival.data ) {
|
|
135 grps <- sort( unique( as.numeric( survival.data$group_num ) ) )
|
|
136
|
|
137 retval <- numeric()
|
|
138 for ( g in grps ) {
|
|
139 one.v.all.survival.data <- survival.data
|
|
140 tmp <- as.numeric( one.v.all.survival.data$group_num )
|
|
141 tmp[ ! tmp %in% g ] <- -1
|
|
142 tmp[ tmp %in% g ] <- 1
|
|
143 tmp[ tmp %in% -1 ] <- 2
|
|
144 one.v.all.survival.data$group_num <- tmp
|
|
145 surv.res <- survdiff( Surv( time, status )~group_num, data=one.v.all.survival.data )
|
|
146 pval <- surv.res$chisq
|
|
147 df <- length( surv.res$n ) - 1
|
|
148 pval <- pchisq( pval, df=df, lower=F )
|
|
149 retval <- c( retval, pval )
|
|
150 }
|
|
151 names( retval ) <- grps
|
|
152 return( retval )
|
|
153 }
|
|
154
|
|
155
|
|
156 if ( opt$mode == "all" ) {
|
|
157
|
|
158 pval <- calc.all.pval( survival.data )
|
|
159 log.rank <- paste( "Log Rank p-value:", sprintf( "%1.2e",pval ) )
|
|
160 opt$title <- paste( opt$title, log.rank, sep="\n" )
|
|
161 } else {
|
|
162 if ( opt$mode == "one" ) {
|
|
163
|
|
164 pvals <- calc.one.v.others.pval( survival.data )
|
|
165 min.p <- min( pvals, na.rm=T )
|
|
166 if ( length( min.p ) == 0 ) {
|
|
167 stop( 'no valid p-value returned from the one-v-others test\n' )
|
|
168 }
|
|
169 cluster.num <- names( pvals )[ pvals == min.p ]
|
|
170 pval <- pvals[ cluster.num ]
|
|
171 log.rank <- paste( "Log Rank p-value for cluster", cluster.num,"is:", sprintf( "%1.2e",pval ) )
|
|
172 opt$title <- paste( opt$title, log.rank, sep="\n" )
|
|
173 } else {
|
|
174 if ( opt$mode== "both" ) {
|
|
175 ## add the all-v-all p-value
|
|
176 bak <- pval <- calc.all.pval( survival.data )
|
|
177 log.rank <- paste( "Log Rank p-value:", sprintf( "%1.2e",pval ) )
|
|
178 opt$title <- paste( opt$title, log.rank, sep="\n" )
|
|
179
|
|
180 ## now add the one-v-all p-value
|
|
181 pvals <- calc.one.v.others.pval( survival.data )
|
|
182 min.p <- min( pvals, na.rm=T )
|
|
183 if ( length( min.p ) == 0 ) {
|
|
184 stop( 'no valid p-value returned from the one-v-others test\n' )
|
|
185 }
|
|
186 cluster.num <- names( pvals )[ pvals == min.p ]
|
|
187 pval <- pvals[ cluster.num ]
|
|
188 log.rank <- paste( "Log Rank p-value for cluster", cluster.num,"is:", sprintf( "%1.2e",pval ) )
|
|
189 opt$title <- paste( opt$title, log.rank, sep="\n" )
|
|
190
|
|
191 if ( opt$pval.only ) {
|
|
192 pval <- min( c( bak, pval ), na.rm=T )
|
|
193 }
|
|
194 }
|
|
195 else {
|
|
196 stop( "invalid mode specified, mode = ", opt$mode, "\n" )
|
|
197 }
|
|
198 }
|
|
199 }
|
|
200
|
|
201 if ( opt$pval.only ) {
|
|
202 cat( paste(pval, "\n", sep="" ), file=stdout() )
|
|
203 }
|
|
204
|
|
205
|
|
206 if ( ! opt$pval.only ) {
|
|
207 ngrps <- length( unique( survival.data$group_num ) )
|
|
208 col.map <- rainbow( ngrps )
|
|
209
|
|
210
|
|
211
|
|
212 ##postscript( opt$output.fname, horizontal=T, paper='letter' )
|
|
213 if ( opt$image.format == 'png' ) {
|
|
214 plot.dev <- png( opt$output.fname,
|
|
215 width=11,
|
|
216 height=8.5,
|
|
217 units='in',
|
|
218 res=72 )
|
|
219 } else if ( opt$image.format == 'pdf' ) {
|
|
220 pdf( opt$output.fname,
|
|
221 paper="letter" )
|
|
222 } else if ( opt$image.format == 'none' ) {
|
|
223 ## do nothing - this allows other scripts to call this and hopefully plot into them
|
|
224 ## NOPE, this doesn't work. see what I do with the myplots.rda file
|
|
225 }
|
|
226
|
|
227 plot( survfit( Surv( time, status )~group_num, data=survival.data ),
|
|
228 main = opt$title,
|
|
229 ##lty = 1:ngrps,
|
|
230 lty=1,
|
|
231 col=col.map,
|
|
232 ylab = "Probability",
|
|
233 xlab = "Survival Time in Days",
|
|
234 )
|
|
235
|
|
236
|
|
237 ## set the legend.labels if they're still not set yet
|
|
238 if( ! exists( "legend.labels" ) ) {
|
|
239 grp.counts <- table( as.factor( survival.data[, "group_num" ] ) )
|
|
240 legend.labels <- paste( "Cluster", 1:ngrps, paste( "(n=", as.integer(grp.counts), ")", sep="" ) )
|
|
241 }
|
|
242
|
|
243 legend( "topright",
|
|
244 lty=1,
|
|
245 col=col.map,
|
|
246 bty = "n",
|
|
247 legend=legend.labels
|
|
248 )
|
|
249
|
|
250 if( opt$image.format != "none" ) dev.off()
|
|
251 }
|