Mercurial > repos > peter-waltman > ucsc_cluster_tools2
comparison cluster.tools/gen.survival.curves.R @ 0:0decf3fd54bc draft
Uploaded
author | peter-waltman |
---|---|
date | Thu, 28 Feb 2013 01:45:39 -0500 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:0decf3fd54bc |
---|---|
1 #!/usr/bin/env Rscript | |
2 ## | |
3 ## Calculates the log-rank test for a given clustering, in the output format from ConsensusClusterPlus | |
4 ## | |
5 ## Input (required): | |
6 ## - consensus cluster file (consensusClass.csv file) | |
7 ## - survival data | |
8 ## Input (optional): | |
9 ## Output: a KM plot, with the most significant p-value. Output to stdout can be captured by re-direction | |
10 ## | |
11 ## Uses: survival library | |
12 ## Date: August 21, 2012 | |
13 ## Author: Peter Waltman | |
14 ## | |
15 | |
16 ##usage, options and doc goes here | |
17 argspec <- c("gen.survival.curves.R takes a clustering from ConsensusClusterPlus and clinical survival data | |
18 and generates a KM-plot, along with the log-rank p-values | |
19 | |
20 Usage: | |
21 gen.survival.curves.R -c <cluster.file> -s <clinical.file> | |
22 Options: | |
23 -o <output file> (postscript) | |
24 -m <mode> (all, one, both) | |
25 \"all\" - perform all-vs-all log-rank test | |
26 \"one\" - perform one-vs-others log-rank test (returns min) | |
27 \"both\" - perform both \"all\" and \"one\" tests | |
28 -t <title> | |
29 -p <pval.only> ( only return the p-value for this given mode - no plotting at all (to screen or postscript)) | |
30 -v <verbose> | |
31 ") | |
32 | |
33 lib.load.quiet <- function( package ) { | |
34 package <- as.character(substitute(package)) | |
35 suppressPackageStartupMessages( do.call( "library", list( package=package ) ) ) | |
36 } | |
37 lib.load.quiet(getopt) | |
38 lib.load.quiet( survival ) | |
39 | |
40 | |
41 args <- commandArgs(TRUE) | |
42 if ( length( args ) == 1 && args =="--help") { | |
43 write(argspec, stderr()) | |
44 q(); | |
45 } | |
46 | |
47 spec <- matrix( c( "cluster.fname", "C", 1, "character", | |
48 "survival.fname", "S", 1, "character", | |
49 "mode", "M", 2, "character", | |
50 "title", "T", 2, "character", | |
51 "myplots.rda", "R", 2, "character", | |
52 "image.format", "I", 2, "character", | |
53 "output.fname", "O", 2, "character", | |
54 "pval.only", "P", 0, "logical", | |
55 "verbose", "V", 0, "logical" | |
56 ), | |
57 ncol=4, | |
58 byrow=TRUE | |
59 ) | |
60 opt <- getopt( spec=spec ) | |
61 | |
62 | |
63 #set some reasonable defaults for the options that are needed, | |
64 #but were not specified. | |
65 if ( is.null(opt$mode ) ) { | |
66 opt$mode <- "all" | |
67 } else { | |
68 if ( ! opt$mode %in% c( 'all', 'one', 'both' ) ) { | |
69 stop( "invalid mode specified,' -m", opt$mode, "'. must be either {all, one, both}\n" ) | |
70 } | |
71 } | |
72 if ( is.null( opt$title ) ) { | |
73 opt$title <- opt$cluster.fname | |
74 opt$title <- strsplit( opt$title, "\\/" )[[1]] | |
75 opt$title <- opt$title[ length( opt$title ) ] | |
76 } | |
77 if ( is.null( opt$image.format ) ){ | |
78 opt$image.format <- "png" | |
79 } else { | |
80 if ( ! opt$image.format %in% c( "pdf", "png", "none" ) ) stop( 'invalid image format specified\n' ) | |
81 } | |
82 if ( is.null(opt$output.fname ) ) { opt$output.fname <- paste( opt$mode, "survival.curve", opt$image.format, sep="." ) } | |
83 if ( is.null(opt$cluster.header ) ) { opt$cluster.header = FALSE } | |
84 if ( is.null(opt$pval.only ) ) { opt$pval.only = FALSE } | |
85 if ( is.null(opt$verbose ) ) { opt$verbose = FALSE } | |
86 | |
87 ##print some progress messages to stderr, if requested. | |
88 if ( opt$verbose ) { write("writing...",stderr()); } | |
89 | |
90 load( opt$cluster.fname ) | |
91 cluster.data <- cbind( names( cl ), as.numeric( cl ) ) | |
92 colnames( cluster.data ) <- c( "id", "group_num" ) | |
93 rownames( cluster.data ) <- names( cl ) | |
94 | |
95 survival.data <- read.delim( opt$survival.fname, as.is=TRUE, row.names=1 ) | |
96 survival.data <- cbind( rownames( survival.data ), survival.data ) ## add in the ids, so we can merge on them | |
97 if ( length( colnames( survival.data ) ) == 3 ) { | |
98 ## we have to left-shift the current colanmes to drop the 1st one | |
99 ## b/c cbind will add one for the column we just added | |
100 colnames( survival.data ) <- c( "id", colnames( survival.data )[-1] ) | |
101 } | |
102 if ( length( colnames( survival.data ) ) == 2 ) { | |
103 ## added just in case there's a change to cbind as R is prone to doing | |
104 colnames( survival.data ) <- c( "id", colnames( survival.data ) ) | |
105 } | |
106 survival.data$id <- as.character( survival.data$id ) | |
107 | |
108 | |
109 ## Now, filter so we only contain the same samples | |
110 n.clust.data.samps <- nrow( cluster.data ) | |
111 n.surv.data.samps <- nrow( survival.data ) | |
112 if ( n.clust.data.samps > n.surv.data.samps ) { | |
113 ovp.samples <- rownames( cluster.data ) | |
114 ovp.samples <- ovp.samples[ ovp.samples %in% survival.data$id ] | |
115 } else { | |
116 ovp.samples <- survival.data$id | |
117 ovp.samples <- ovp.samples[ ovp.samples %in% rownames( cluster.data ) ] | |
118 } | |
119 | |
120 cluster.data <- cluster.data[ ovp.samples, , drop=FALSE] | |
121 survival.data <- survival.data[ ovp.samples, ] | |
122 survival.data <- merge( survival.data, cluster.data ) | |
123 | |
124 | |
125 calc.all.pval <- function( survival.data ) { | |
126 survdiff( Surv( time, status )~group_num, data=survival.data ) | |
127 surv.res <- survdiff( Surv( time, status )~group_num, data=survival.data ) | |
128 pval <- surv.res$chisq | |
129 df <- length( surv.res$n ) - 1 | |
130 pval <- pchisq( pval, df=df, lower=F ) | |
131 return( pval ) | |
132 } | |
133 | |
134 calc.one.v.others.pval <- function( survival.data ) { | |
135 grps <- sort( unique( as.numeric( survival.data$group_num ) ) ) | |
136 | |
137 retval <- numeric() | |
138 for ( g in grps ) { | |
139 one.v.all.survival.data <- survival.data | |
140 tmp <- as.numeric( one.v.all.survival.data$group_num ) | |
141 tmp[ ! tmp %in% g ] <- -1 | |
142 tmp[ tmp %in% g ] <- 1 | |
143 tmp[ tmp %in% -1 ] <- 2 | |
144 one.v.all.survival.data$group_num <- tmp | |
145 surv.res <- survdiff( Surv( time, status )~group_num, data=one.v.all.survival.data ) | |
146 pval <- surv.res$chisq | |
147 df <- length( surv.res$n ) - 1 | |
148 pval <- pchisq( pval, df=df, lower=F ) | |
149 retval <- c( retval, pval ) | |
150 } | |
151 names( retval ) <- grps | |
152 return( retval ) | |
153 } | |
154 | |
155 | |
156 if ( opt$mode == "all" ) { | |
157 | |
158 pval <- calc.all.pval( survival.data ) | |
159 log.rank <- paste( "Log Rank p-value:", sprintf( "%1.2e",pval ) ) | |
160 opt$title <- paste( opt$title, log.rank, sep="\n" ) | |
161 } else { | |
162 if ( opt$mode == "one" ) { | |
163 | |
164 pvals <- calc.one.v.others.pval( survival.data ) | |
165 min.p <- min( pvals, na.rm=T ) | |
166 if ( length( min.p ) == 0 ) { | |
167 stop( 'no valid p-value returned from the one-v-others test\n' ) | |
168 } | |
169 cluster.num <- names( pvals )[ pvals == min.p ] | |
170 pval <- pvals[ cluster.num ] | |
171 log.rank <- paste( "Log Rank p-value for cluster", cluster.num,"is:", sprintf( "%1.2e",pval ) ) | |
172 opt$title <- paste( opt$title, log.rank, sep="\n" ) | |
173 } else { | |
174 if ( opt$mode== "both" ) { | |
175 ## add the all-v-all p-value | |
176 bak <- pval <- calc.all.pval( survival.data ) | |
177 log.rank <- paste( "Log Rank p-value:", sprintf( "%1.2e",pval ) ) | |
178 opt$title <- paste( opt$title, log.rank, sep="\n" ) | |
179 | |
180 ## now add the one-v-all p-value | |
181 pvals <- calc.one.v.others.pval( survival.data ) | |
182 min.p <- min( pvals, na.rm=T ) | |
183 if ( length( min.p ) == 0 ) { | |
184 stop( 'no valid p-value returned from the one-v-others test\n' ) | |
185 } | |
186 cluster.num <- names( pvals )[ pvals == min.p ] | |
187 pval <- pvals[ cluster.num ] | |
188 log.rank <- paste( "Log Rank p-value for cluster", cluster.num,"is:", sprintf( "%1.2e",pval ) ) | |
189 opt$title <- paste( opt$title, log.rank, sep="\n" ) | |
190 | |
191 if ( opt$pval.only ) { | |
192 pval <- min( c( bak, pval ), na.rm=T ) | |
193 } | |
194 } | |
195 else { | |
196 stop( "invalid mode specified, mode = ", opt$mode, "\n" ) | |
197 } | |
198 } | |
199 } | |
200 | |
201 if ( opt$pval.only ) { | |
202 cat( paste(pval, "\n", sep="" ), file=stdout() ) | |
203 } | |
204 | |
205 | |
206 if ( ! opt$pval.only ) { | |
207 ngrps <- length( unique( survival.data$group_num ) ) | |
208 col.map <- rainbow( ngrps ) | |
209 | |
210 | |
211 | |
212 ##postscript( opt$output.fname, horizontal=T, paper='letter' ) | |
213 if ( opt$image.format == 'png' ) { | |
214 plot.dev <- png( opt$output.fname, | |
215 width=11, | |
216 height=8.5, | |
217 units='in', | |
218 res=72 ) | |
219 } else if ( opt$image.format == 'pdf' ) { | |
220 pdf( opt$output.fname, | |
221 paper="letter" ) | |
222 } else if ( opt$image.format == 'none' ) { | |
223 ## do nothing - this allows other scripts to call this and hopefully plot into them | |
224 ## NOPE, this doesn't work. see what I do with the myplots.rda file | |
225 } | |
226 | |
227 plot( survfit( Surv( time, status )~group_num, data=survival.data ), | |
228 main = opt$title, | |
229 ##lty = 1:ngrps, | |
230 lty=1, | |
231 col=col.map, | |
232 ylab = "Probability", | |
233 xlab = "Survival Time in Days", | |
234 ) | |
235 | |
236 | |
237 ## set the legend.labels if they're still not set yet | |
238 if( ! exists( "legend.labels" ) ) { | |
239 grp.counts <- table( as.factor( survival.data[, "group_num" ] ) ) | |
240 legend.labels <- paste( "Cluster", 1:ngrps, paste( "(n=", as.integer(grp.counts), ")", sep="" ) ) | |
241 } | |
242 | |
243 legend( "topright", | |
244 lty=1, | |
245 col=col.map, | |
246 bty = "n", | |
247 legend=legend.labels | |
248 ) | |
249 | |
250 if( opt$image.format != "none" ) dev.off() | |
251 } |