diff cluster.tools/remove.tcga.normals.R @ 3:563832f48c08 draft

Uploaded
author peter-waltman
date Fri, 01 Mar 2013 19:51:25 -0500
parents 0decf3fd54bc
children
line wrap: on
line diff
--- a/cluster.tools/remove.tcga.normals.R	Fri Mar 01 10:17:24 2013 -0500
+++ b/cluster.tools/remove.tcga.normals.R	Fri Mar 01 19:51:25 2013 -0500
@@ -12,6 +12,14 @@
   q();
 }
 
+## some helper fn's
+write.2.tab <- function( mat,
+                         fname ) {
+  mat <- rbind( colnames( mat ), mat )
+  mat <- cbind( c( "ID", rownames( mat )[-1] ),
+                      mat )
+  write.table( mat, fname, sep="\t", row.names=FALSE, col.names=FALSE, quote=FALSE )
+}
 lib.load.quiet <- function( package ) {
    package <- as.character(substitute(package))
    suppressPackageStartupMessages( do.call( "library", list( package=package ) ) )
@@ -19,7 +27,9 @@
 lib.load.quiet(getopt)
 
 spec <- matrix( c( "data.fname",      "d", 1, "character",
-                   "output.fname",    "o", 2, "character"
+                   "output.fname",    "o", 2, "character",
+                   "return.normals",  "r", 2, "character",
+                   "out.norm.fname",  "O", 2, "character"
                    ),
                 nc=4,
                 byrow=TRUE
@@ -27,15 +37,34 @@
 
 opt <- getopt( spec=spec )
 if ( is.null( opt$output.fname ) ) { opt$output.fname <- 'merge_merge.tumors.tab' }
+if ( is.null( opt$return.normals ) ) {
+  opt$return.normals <- FALSE
+} else {
+  opt$return.normals <- ( tolower( opt$return.normals ) %in% "yes" )
+}
+if ( is.null( opt$out.norm.fname ) ) { opt$out.norm.fname <- 'merge_merge.normals.tab' }
 
 mat <- as.matrix( read.delim( opt$data.fname, row.names=1, check.names=FALSE ) )
-if ( length( strsplit( colnames( mat ), "-" )[[1]] ) == 4 ) {
+norms <- matrix( NA, nc=0, nr=nrow( mat ), dimnames=list( rownames( mat ), c() ) )
+if ( length( strsplit( colnames( mat ), "-" )[[1]] ) > 3 ) {
   cnames <-  sapply( strsplit( colnames( mat ), "-" ), function(x) x[4] )
   norms <- grepl( "^1", cnames )
 
   if ( sum( norms ) > 0  ) {
     tumors <- ! norms
+    norms <- mat[, norms ]
     mat <- mat[, tumors ]
   }
+} else {
+  if ( opt$return.normals ) {
+    writeLines( "TCGA ID barcodes in supplied file only provide patient sample info (no aliquot components are in IDs)\n", opt$out.norm.fname )
+  }
 }
-write.table( mat, opt$output.fname, quote=FALSE, sep="\t", col.names=NA )
+write.2.tab( mat, opt$output.fname )
+if ( opt$return.normals ) {
+  if ( ncol( norms ) > 0 )  {
+    write.2.tab( norms, opt$out.norm.fname )
+  } else {
+    writeLines( "no normals found in supplied matrix\n", opt$out.norm.fname )
+  }
+}