Mercurial > repos > peter-waltman > ucsc_cluster_tools2
diff cluster.tools/gen.survival.curves.R @ 0:0decf3fd54bc draft
Uploaded
author | peter-waltman |
---|---|
date | Thu, 28 Feb 2013 01:45:39 -0500 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/cluster.tools/gen.survival.curves.R Thu Feb 28 01:45:39 2013 -0500 @@ -0,0 +1,251 @@ +#!/usr/bin/env Rscript +## +## Calculates the log-rank test for a given clustering, in the output format from ConsensusClusterPlus +## +## Input (required): +## - consensus cluster file (consensusClass.csv file) +## - survival data +## Input (optional): +## Output: a KM plot, with the most significant p-value. Output to stdout can be captured by re-direction +## +## Uses: survival library +## Date: August 21, 2012 +## Author: Peter Waltman +## + +##usage, options and doc goes here +argspec <- c("gen.survival.curves.R takes a clustering from ConsensusClusterPlus and clinical survival data +and generates a KM-plot, along with the log-rank p-values + + Usage: + gen.survival.curves.R -c <cluster.file> -s <clinical.file> + Options: + -o <output file> (postscript) + -m <mode> (all, one, both) + \"all\" - perform all-vs-all log-rank test + \"one\" - perform one-vs-others log-rank test (returns min) + \"both\" - perform both \"all\" and \"one\" tests + -t <title> + -p <pval.only> ( only return the p-value for this given mode - no plotting at all (to screen or postscript)) + -v <verbose> + ") + +lib.load.quiet <- function( package ) { + package <- as.character(substitute(package)) + suppressPackageStartupMessages( do.call( "library", list( package=package ) ) ) +} +lib.load.quiet(getopt) +lib.load.quiet( survival ) + + +args <- commandArgs(TRUE) +if ( length( args ) == 1 && args =="--help") { + write(argspec, stderr()) + q(); +} + +spec <- matrix( c( "cluster.fname", "C", 1, "character", + "survival.fname", "S", 1, "character", + "mode", "M", 2, "character", + "title", "T", 2, "character", + "myplots.rda", "R", 2, "character", + "image.format", "I", 2, "character", + "output.fname", "O", 2, "character", + "pval.only", "P", 0, "logical", + "verbose", "V", 0, "logical" + ), + ncol=4, + byrow=TRUE + ) +opt <- getopt( spec=spec ) + + +#set some reasonable defaults for the options that are needed, +#but were not specified. +if ( is.null(opt$mode ) ) { + opt$mode <- "all" +} else { + if ( ! opt$mode %in% c( 'all', 'one', 'both' ) ) { + stop( "invalid mode specified,' -m", opt$mode, "'. must be either {all, one, both}\n" ) + } +} +if ( is.null( opt$title ) ) { + opt$title <- opt$cluster.fname + opt$title <- strsplit( opt$title, "\\/" )[[1]] + opt$title <- opt$title[ length( opt$title ) ] +} +if ( is.null( opt$image.format ) ){ + opt$image.format <- "png" +} else { + if ( ! opt$image.format %in% c( "pdf", "png", "none" ) ) stop( 'invalid image format specified\n' ) +} +if ( is.null(opt$output.fname ) ) { opt$output.fname <- paste( opt$mode, "survival.curve", opt$image.format, sep="." ) } +if ( is.null(opt$cluster.header ) ) { opt$cluster.header = FALSE } +if ( is.null(opt$pval.only ) ) { opt$pval.only = FALSE } +if ( is.null(opt$verbose ) ) { opt$verbose = FALSE } + +##print some progress messages to stderr, if requested. +if ( opt$verbose ) { write("writing...",stderr()); } + +load( opt$cluster.fname ) +cluster.data <- cbind( names( cl ), as.numeric( cl ) ) +colnames( cluster.data ) <- c( "id", "group_num" ) +rownames( cluster.data ) <- names( cl ) + +survival.data <- read.delim( opt$survival.fname, as.is=TRUE, row.names=1 ) +survival.data <- cbind( rownames( survival.data ), survival.data ) ## add in the ids, so we can merge on them +if ( length( colnames( survival.data ) ) == 3 ) { + ## we have to left-shift the current colanmes to drop the 1st one + ## b/c cbind will add one for the column we just added + colnames( survival.data ) <- c( "id", colnames( survival.data )[-1] ) +} +if ( length( colnames( survival.data ) ) == 2 ) { + ## added just in case there's a change to cbind as R is prone to doing + colnames( survival.data ) <- c( "id", colnames( survival.data ) ) +} +survival.data$id <- as.character( survival.data$id ) + + +## Now, filter so we only contain the same samples +n.clust.data.samps <- nrow( cluster.data ) +n.surv.data.samps <- nrow( survival.data ) +if ( n.clust.data.samps > n.surv.data.samps ) { + ovp.samples <- rownames( cluster.data ) + ovp.samples <- ovp.samples[ ovp.samples %in% survival.data$id ] +} else { + ovp.samples <- survival.data$id + ovp.samples <- ovp.samples[ ovp.samples %in% rownames( cluster.data ) ] +} + +cluster.data <- cluster.data[ ovp.samples, , drop=FALSE] +survival.data <- survival.data[ ovp.samples, ] +survival.data <- merge( survival.data, cluster.data ) + + +calc.all.pval <- function( survival.data ) { + survdiff( Surv( time, status )~group_num, data=survival.data ) + surv.res <- survdiff( Surv( time, status )~group_num, data=survival.data ) + pval <- surv.res$chisq + df <- length( surv.res$n ) - 1 + pval <- pchisq( pval, df=df, lower=F ) + return( pval ) +} + +calc.one.v.others.pval <- function( survival.data ) { + grps <- sort( unique( as.numeric( survival.data$group_num ) ) ) + + retval <- numeric() + for ( g in grps ) { + one.v.all.survival.data <- survival.data + tmp <- as.numeric( one.v.all.survival.data$group_num ) + tmp[ ! tmp %in% g ] <- -1 + tmp[ tmp %in% g ] <- 1 + tmp[ tmp %in% -1 ] <- 2 + one.v.all.survival.data$group_num <- tmp + surv.res <- survdiff( Surv( time, status )~group_num, data=one.v.all.survival.data ) + pval <- surv.res$chisq + df <- length( surv.res$n ) - 1 + pval <- pchisq( pval, df=df, lower=F ) + retval <- c( retval, pval ) + } + names( retval ) <- grps + return( retval ) +} + + +if ( opt$mode == "all" ) { + + pval <- calc.all.pval( survival.data ) + log.rank <- paste( "Log Rank p-value:", sprintf( "%1.2e",pval ) ) + opt$title <- paste( opt$title, log.rank, sep="\n" ) +} else { + if ( opt$mode == "one" ) { + + pvals <- calc.one.v.others.pval( survival.data ) + min.p <- min( pvals, na.rm=T ) + if ( length( min.p ) == 0 ) { + stop( 'no valid p-value returned from the one-v-others test\n' ) + } + cluster.num <- names( pvals )[ pvals == min.p ] + pval <- pvals[ cluster.num ] + log.rank <- paste( "Log Rank p-value for cluster", cluster.num,"is:", sprintf( "%1.2e",pval ) ) + opt$title <- paste( opt$title, log.rank, sep="\n" ) + } else { + if ( opt$mode== "both" ) { + ## add the all-v-all p-value + bak <- pval <- calc.all.pval( survival.data ) + log.rank <- paste( "Log Rank p-value:", sprintf( "%1.2e",pval ) ) + opt$title <- paste( opt$title, log.rank, sep="\n" ) + + ## now add the one-v-all p-value + pvals <- calc.one.v.others.pval( survival.data ) + min.p <- min( pvals, na.rm=T ) + if ( length( min.p ) == 0 ) { + stop( 'no valid p-value returned from the one-v-others test\n' ) + } + cluster.num <- names( pvals )[ pvals == min.p ] + pval <- pvals[ cluster.num ] + log.rank <- paste( "Log Rank p-value for cluster", cluster.num,"is:", sprintf( "%1.2e",pval ) ) + opt$title <- paste( opt$title, log.rank, sep="\n" ) + + if ( opt$pval.only ) { + pval <- min( c( bak, pval ), na.rm=T ) + } + } + else { + stop( "invalid mode specified, mode = ", opt$mode, "\n" ) + } + } +} + +if ( opt$pval.only ) { + cat( paste(pval, "\n", sep="" ), file=stdout() ) +} + + +if ( ! opt$pval.only ) { + ngrps <- length( unique( survival.data$group_num ) ) + col.map <- rainbow( ngrps ) + + + + ##postscript( opt$output.fname, horizontal=T, paper='letter' ) + if ( opt$image.format == 'png' ) { + plot.dev <- png( opt$output.fname, + width=11, + height=8.5, + units='in', + res=72 ) + } else if ( opt$image.format == 'pdf' ) { + pdf( opt$output.fname, + paper="letter" ) + } else if ( opt$image.format == 'none' ) { + ## do nothing - this allows other scripts to call this and hopefully plot into them + ## NOPE, this doesn't work. see what I do with the myplots.rda file + } + + plot( survfit( Surv( time, status )~group_num, data=survival.data ), + main = opt$title, + ##lty = 1:ngrps, + lty=1, + col=col.map, + ylab = "Probability", + xlab = "Survival Time in Days", + ) + + + ## set the legend.labels if they're still not set yet + if( ! exists( "legend.labels" ) ) { + grp.counts <- table( as.factor( survival.data[, "group_num" ] ) ) + legend.labels <- paste( "Cluster", 1:ngrps, paste( "(n=", as.integer(grp.counts), ")", sep="" ) ) + } + + legend( "topright", + lty=1, + col=col.map, + bty = "n", + legend=legend.labels + ) + + if( opt$image.format != "none" ) dev.off() +}