Mercurial > repos > peter-waltman > ucsc_cluster_tools
comparison cluster.tools/gen.survival.curves.R @ 2:b442996b66ae draft
Uploaded
author | peter-waltman |
---|---|
date | Wed, 27 Feb 2013 20:17:04 -0500 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
1:e25d2bece0a2 | 2:b442996b66ae |
---|---|
1 #!/usr/bin/env Rscript | |
2 ## | |
3 ## Calculates the log-rank test for a given clustering, in the output format from ConsensusClusterPlus | |
4 ## | |
5 ## Input (required): | |
6 ## - consensus cluster file (consensusClass.csv file) | |
7 ## - survival data | |
8 ## Input (optional): | |
9 ## Output: a KM plot, with the most significant p-value. Output to stdout can be captured by re-direction | |
10 ## | |
11 ## Uses: survival library | |
12 ## Date: August 21, 2012 | |
13 ## Author: Peter Waltman | |
14 ## | |
15 | |
16 ##usage, options and doc goes here | |
17 argspec <- c("gen.survival.curves.R takes a clustering from ConsensusClusterPlus and clinical survival data | |
18 and generates a KM-plot, along with the log-rank p-values | |
19 | |
20 Usage: | |
21 gen.survival.curves.R -c <cluster.file> -s <clinical.file> | |
22 Options: | |
23 -o <output file> (postscript) | |
24 -m <mode> (all, one, both) | |
25 \"all\" - perform all-vs-all log-rank test | |
26 \"one\" - perform one-vs-others log-rank test (returns min) | |
27 \"both\" - perform both \"all\" and \"one\" tests | |
28 -t <title> | |
29 -p <pval.only> ( only return the p-value for this given mode - no plotting at all (to screen or postscript)) | |
30 -v <verbose> | |
31 ") | |
32 | |
33 lib.load.quiet <- function( package ) { | |
34 package <- as.character(substitute(package)) | |
35 suppressPackageStartupMessages( do.call( "library", list( package=package ) ) ) | |
36 } | |
37 lib.load.quiet(getopt) | |
38 lib.load.quiet( survival ) | |
39 | |
40 | |
41 args <- commandArgs(TRUE) | |
42 if ( length( args ) == 1 && args =="--help") { | |
43 write(argspec, stderr()) | |
44 q(); | |
45 } | |
46 | |
47 spec <- matrix( c( "cluster.fname", "C", 1, "character", | |
48 "survival.fname", "S", 1, "character", | |
49 "mode", "M", 2, "character", | |
50 "title", "T", 2, "character", | |
51 "image.format", "I", 2, "character", | |
52 "output.fname", "O", 2, "character", | |
53 "pval.only", "P", 0, "logical", | |
54 "verbose", "V", 0, "logical" | |
55 ), | |
56 ncol=4, | |
57 byrow=TRUE | |
58 ) | |
59 opt <- getopt( spec=spec ) | |
60 | |
61 | |
62 #set some reasonable defaults for the options that are needed, | |
63 #but were not specified. | |
64 if ( is.null(opt$mode ) ) { | |
65 opt$mode <- "all" | |
66 } else { | |
67 if ( ! opt$mode %in% c( 'all', 'one', 'both' ) ) { | |
68 stop( "invalid mode specified,' -m", opt$mode, "'. must be either {all, one, both}\n" ) | |
69 } | |
70 } | |
71 if ( is.null( opt$title ) ) { | |
72 opt$title <- opt$cluster.fname | |
73 opt$title <- strsplit( opt$title, "\\/" )[[1]] | |
74 opt$title <- opt$title[ length( opt$title ) ] | |
75 } | |
76 if ( is.null( opt$image.format ) ){ | |
77 opt$image.format <- "png" | |
78 } else { | |
79 if ( ! opt$image.format %in% c( "pdf", "png", "none" ) ) stop( 'invalid image format specified\n' ) | |
80 } | |
81 if ( is.null(opt$output.fname ) ) { opt$output.fname <- paste( opt$mode, "survival.curve", opt$image.format, sep="." ) } | |
82 if ( is.null(opt$cluster.header ) ) { opt$cluster.header = FALSE } | |
83 if ( is.null(opt$pval.only ) ) { opt$pval.only = FALSE } | |
84 if ( is.null(opt$verbose ) ) { opt$verbose = FALSE } | |
85 | |
86 ##print some progress messages to stderr, if requested. | |
87 if ( opt$verbose ) { write("writing...",stderr()); } | |
88 | |
89 load( opt$cluster.fname ) | |
90 cluster.data <- cbind( names( cl ), as.numeric( cl ) ) | |
91 colnames( cluster.data ) <- c( "id", "group_num" ) | |
92 rownames( cluster.data ) <- names( cl ) | |
93 | |
94 survival.data <- read.delim( opt$survival.fname, as.is=TRUE, row.names=1 ) | |
95 survival.data <- cbind( rownames( survival.data ), survival.data ) ## add in the ids, so we can merge on them | |
96 if ( length( colnames( survival.data ) ) == 3 ) { | |
97 ## we have to left-shift the current colanmes to drop the 1st one | |
98 ## b/c cbind will add one for the column we just added | |
99 colnames( survival.data ) <- c( "id", colnames( survival.data )[-1] ) | |
100 } | |
101 if ( length( colnames( survival.data ) ) == 2 ) { | |
102 ## added just in case there's a change to cbind as R is prone to doing | |
103 colnames( survival.data ) <- c( "id", colnames( survival.data ) ) | |
104 } | |
105 survival.data$id <- as.character( survival.data$id ) | |
106 | |
107 | |
108 ## Now, filter so we only contain the same samples | |
109 n.clust.data.samps <- nrow( cluster.data ) | |
110 n.surv.data.samps <- nrow( survival.data ) | |
111 if ( n.clust.data.samps > n.surv.data.samps ) { | |
112 ovp.samples <- rownames( cluster.data ) | |
113 ovp.samples <- ovp.samples[ ovp.samples %in% survival.data$id ] | |
114 } else { | |
115 ovp.samples <- survival.data$id | |
116 ovp.samples <- ovp.samples[ ovp.samples %in% rownames( cluster.data ) ] | |
117 } | |
118 | |
119 cluster.data <- cluster.data[ ovp.samples, , drop=FALSE] | |
120 survival.data <- survival.data[ ovp.samples, ] | |
121 survival.data <- merge( survival.data, cluster.data ) | |
122 | |
123 | |
124 calc.all.pval <- function( survival.data ) { | |
125 survdiff( Surv( time, status )~group_num, data=survival.data ) | |
126 surv.res <- survdiff( Surv( time, status )~group_num, data=survival.data ) | |
127 pval <- surv.res$chisq | |
128 df <- length( surv.res$n ) - 1 | |
129 pval <- pchisq( pval, df=df, lower=F ) | |
130 return( pval ) | |
131 } | |
132 | |
133 calc.one.v.others.pval <- function( survival.data ) { | |
134 grps <- sort( unique( as.numeric( survival.data$group_num ) ) ) | |
135 | |
136 retval <- numeric() | |
137 for ( g in grps ) { | |
138 one.v.all.survival.data <- survival.data | |
139 tmp <- as.numeric( one.v.all.survival.data$group_num ) | |
140 tmp[ ! tmp %in% g ] <- -1 | |
141 tmp[ tmp %in% g ] <- 1 | |
142 tmp[ tmp %in% -1 ] <- 2 | |
143 one.v.all.survival.data$group_num <- tmp | |
144 surv.res <- survdiff( Surv( time, status )~group_num, data=one.v.all.survival.data ) | |
145 pval <- surv.res$chisq | |
146 df <- length( surv.res$n ) - 1 | |
147 pval <- pchisq( pval, df=df, lower=F ) | |
148 retval <- c( retval, pval ) | |
149 } | |
150 names( retval ) <- grps | |
151 return( retval ) | |
152 } | |
153 | |
154 | |
155 if ( opt$mode == "all" ) { | |
156 | |
157 pval <- calc.all.pval( survival.data ) | |
158 log.rank <- paste( "Log Rank p-value:", sprintf( "%1.2e",pval ) ) | |
159 opt$title <- paste( opt$title, log.rank, sep="\n" ) | |
160 } else { | |
161 if ( opt$mode == "one" ) { | |
162 | |
163 pvals <- calc.one.v.others.pval( survival.data ) | |
164 min.p <- min( pvals, na.rm=T ) | |
165 if ( length( min.p ) == 0 ) { | |
166 stop( 'no valid p-value returned from the one-v-others test\n' ) | |
167 } | |
168 cluster.num <- names( pvals )[ pvals == min.p ] | |
169 pval <- pvals[ cluster.num ] | |
170 log.rank <- paste( "Log Rank p-value for cluster", cluster.num,"is:", sprintf( "%1.2e",pval ) ) | |
171 opt$title <- paste( opt$title, log.rank, sep="\n" ) | |
172 } else { | |
173 if ( opt$mode== "both" ) { | |
174 ## add the all-v-all p-value | |
175 bak <- pval <- calc.all.pval( survival.data ) | |
176 log.rank <- paste( "Log Rank p-value:", sprintf( "%1.2e",pval ) ) | |
177 opt$title <- paste( opt$title, log.rank, sep="\n" ) | |
178 | |
179 ## now add the one-v-all p-value | |
180 pvals <- calc.one.v.others.pval( survival.data ) | |
181 min.p <- min( pvals, na.rm=T ) | |
182 if ( length( min.p ) == 0 ) { | |
183 stop( 'no valid p-value returned from the one-v-others test\n' ) | |
184 } | |
185 cluster.num <- names( pvals )[ pvals == min.p ] | |
186 pval <- pvals[ cluster.num ] | |
187 log.rank <- paste( "Log Rank p-value for cluster", cluster.num,"is:", sprintf( "%1.2e",pval ) ) | |
188 opt$title <- paste( opt$title, log.rank, sep="\n" ) | |
189 | |
190 if ( opt$pval.only ) { | |
191 pval <- min( c( bak, pval ), na.rm=T ) | |
192 } | |
193 } | |
194 else { | |
195 stop( "invalid mode specified, mode = ", opt$mode, "\n" ) | |
196 } | |
197 } | |
198 } | |
199 | |
200 if ( opt$pval.only ) { | |
201 cat( paste(pval, "\n", sep="" ), file=stdout() ) | |
202 } | |
203 | |
204 | |
205 if ( ! opt$pval.only ) { | |
206 ngrps <- length( unique( survival.data$group_num ) ) | |
207 col.map <- rainbow( ngrps ) | |
208 | |
209 | |
210 | |
211 ##postscript( opt$output.fname, horizontal=T, paper='letter' ) | |
212 if ( opt$image.format == 'png' ) { | |
213 plot.dev <- png( opt$output.fname, | |
214 width=11, | |
215 height=8.5, | |
216 units='in', | |
217 res=72 ) | |
218 } else if ( opt$image.format == 'pdf' ) { | |
219 pdf( opt$output.fname, | |
220 paper="letter" ) | |
221 } else if ( opt$image.format == 'none' ) { | |
222 ## do nothing - this allows other scripts to call this and hopefully plot into them | |
223 | |
224 } | |
225 | |
226 plot( survfit( Surv( time, status )~group_num, data=survival.data ), | |
227 main = opt$title, | |
228 ##lty = 1:ngrps, | |
229 lty=1, | |
230 col=col.map, | |
231 ylab = "Probability", | |
232 xlab = "Survival Time in Days", | |
233 ) | |
234 | |
235 ## set the legend.labels if they're still not set yet | |
236 if( ! exists( "legend.labels" ) ) { | |
237 grp.counts <- table( as.factor( survival.data[, "group_num" ] ) ) | |
238 legend.labels <- paste( "Cluster", 1:ngrps, paste( "(n=", as.integer(grp.counts), ")", sep="" ) ) | |
239 } | |
240 | |
241 legend( "topright", | |
242 lty=1, | |
243 col=col.map, | |
244 bty = "n", | |
245 legend=legend.labels | |
246 ) | |
247 | |
248 if( opt$image.format != "none" ) dev.off() | |
249 } |