changeset 3:563832f48c08 draft

Uploaded
author peter-waltman
date Fri, 01 Mar 2013 19:51:25 -0500
parents b8f262149ee2
children 2c225e2f0acb
files cluster.tools/cluster.2.centroid.xml cluster.tools/extract.TCGA.survival.data.R cluster.tools/extract.TCGA.survival.data.xml cluster.tools/fix.and.merge.TCGA.sample.IDs.R cluster.tools/fix.and.merge.TCGA.sample.IDs.xml cluster.tools/format.raw.TCGA.RNASeq.data.xml cluster.tools/gen.matrix.heatmap.xml cluster.tools/heatmap.from.cluster.result.R cluster.tools/heatmap.from.cluster.result.xml cluster.tools/remove.tcga.normals.R cluster.tools/remove.tcga.normals.xml cluster.tools/rnaseq.feature.selection.R cluster.tools/rnaseq.feature.selection.xml
diffstat 13 files changed, 67 insertions(+), 27 deletions(-) [+]
line wrap: on
line diff
--- a/cluster.tools/cluster.2.centroid.xml	Fri Mar 01 10:17:24 2013 -0500
+++ b/cluster.tools/cluster.2.centroid.xml	Fri Mar 01 19:51:25 2013 -0500
@@ -8,8 +8,8 @@
     <inputs>
     	<param name="dataset" type="data" format='rdata' label="Cluster Result" help="Specify the cluster result to analayze (MUST BE IN rdata format; see help below)"/>
 	<param name="gen_new_dgram" type='select' display="radio" label="Re-calculate cluster tree for new matrix ?" help="Specify whether or not to re-calculate a dendrogram for the cluster centroid matrix">
+	  <option value="yes" selected='true' >Yes</option>
 	  <option value="no">No</option>
-	  <option value="yes" selected='true' >Yes</option>
 	</param>
 
     </inputs>
--- a/cluster.tools/extract.TCGA.survival.data.R	Fri Mar 01 10:17:24 2013 -0500
+++ b/cluster.tools/extract.TCGA.survival.data.R	Fri Mar 01 19:51:25 2013 -0500
@@ -93,6 +93,7 @@
   }
   clinical.data <- as.numeric( ! grepl( "(LIVING|Not)", clinical.data ) )
 }
+
 if ( is.null(opt$time.column ) ) {
   time.colname <- "CDE.clinical_time"
   
--- a/cluster.tools/extract.TCGA.survival.data.xml	Fri Mar 01 10:17:24 2013 -0500
+++ b/cluster.tools/extract.TCGA.survival.data.xml	Fri Mar 01 19:51:25 2013 -0500
@@ -5,10 +5,10 @@
 
 </command>
     <inputs>
-    	<param name="dataset" type="data" format='tabular' label="Raw Clinical Data"/>
+    	<param name="dataset" type="data" format='tabular' label="Raw TCGA Clinical Data"/>
     </inputs>
     <outputs>
-        <data format="tabular" name="output" label="Formatted Clinical Data"/>
+        <data format="tabular" name="output" label="TCGA Survival Data"/>
     </outputs>
 <help>
 .. class:: infomark
--- a/cluster.tools/fix.and.merge.TCGA.sample.IDs.R	Fri Mar 01 10:17:24 2013 -0500
+++ b/cluster.tools/fix.and.merge.TCGA.sample.IDs.R	Fri Mar 01 19:51:25 2013 -0500
@@ -48,7 +48,6 @@
   cat( err.msg, file=opt$output.fname )
   stop( err.msg )
 }
-
 remove.periods.from.ids <- function( ids ) {
   return( gsub( "\\.", "-", ids ) )
 }
@@ -115,6 +114,7 @@
   if ( nelts > 3 ) {
     normals <- grepl( "^TCGA-..-....-1", cnames )
     data <- data[ , (! normals ), drop=FALSE ]
+    cnames <- cnames[ ! normals ]
   }
 }
 
--- a/cluster.tools/fix.and.merge.TCGA.sample.IDs.xml	Fri Mar 01 10:17:24 2013 -0500
+++ b/cluster.tools/fix.and.merge.TCGA.sample.IDs.xml	Fri Mar 01 19:51:25 2013 -0500
@@ -1,13 +1,13 @@
 <tool id="fix_and_merge_TCGA_samples_IDs" name="Fix and Merge TCGA sample IDs" force_history_refresh="True">
     <command interpreter="python">fix.and.merge.TCGA.sample.IDs.py
--d $dataset -n ${num_components} ${remove_normals}
+-d $dataset 
+-n ${num_components}
 -o ${output}
 
 </command>
     <inputs>
     	<param name="dataset" type="data" format='tabular' label="Matrix with Full TCGA Aliquot Barcodes"/>
 	<param name="num_components" type="integer" label="Number of barcode components to use (min number is 3)" value="3" />
-	<param name="remove_normals" type="boolean" label="Remove Normals from Matrix? (check to exclude)" truevalue="-r" falsevalue="" checked="True" />
     </inputs>
     <outputs>
         <data format="tabular" name="output" label="Matrix with TCGA Patient Barcodes (filtered and merged)"/>
@@ -27,7 +27,5 @@
 
 - **Number of barcode components to use** Specify the number of barcode components to use in new matrix that is produced **(min number is 3)**
 
-- **Remove Normals from Matrix?** - Remove any normals from the matrix (if necessary)
-
 </help>
 </tool>
--- a/cluster.tools/format.raw.TCGA.RNASeq.data.xml	Fri Mar 01 10:17:24 2013 -0500
+++ b/cluster.tools/format.raw.TCGA.RNASeq.data.xml	Fri Mar 01 19:51:25 2013 -0500
@@ -9,8 +9,8 @@
     <inputs>
     	<param name="dataset" type="data" format='tabular' label="Raw TCGA RNASeq Data"/>
         <param name="log_transform" type='select' display="radio" label="Log-transform data?" help="Specify whether or not to log-transform the data matrix (log(x+1))">
+          <option value="yes" selected='true' >Yes</option>
           <option value="no">No</option>
-          <option value="yes" selected='true' >Yes</option>
         </param>
         <param name="filter_low_variant" type='integer' label="Filter Threshold for Low-variant genes?" value="10" help="Specify threshold for minimum median value for all genes (-1 to use no filter)">
           <option value="yes" selected='true' >Yes</option>
--- a/cluster.tools/gen.matrix.heatmap.xml	Fri Mar 01 10:17:24 2013 -0500
+++ b/cluster.tools/gen.matrix.heatmap.xml	Fri Mar 01 19:51:25 2013 -0500
@@ -26,12 +26,12 @@
     </param>
   </conditional>
   <param name="output_treeview_format" type='select' display="radio" label="Output in TreeView format as well?" help="Specify whether or not to produce files for TreeView">
+    <option value="yes" selected='true' >Yes</option>
     <option value="no">No</option>
-    <option value="yes" selected='true' >Yes</option>
   </param>
   <param name="reverse_rows"  type='select'  display="radio" label="Reverse Row-order (to make consistent w\TreeView Display)?" help="Specify whether or not to reverse Row-order (to make the heatmap consistent with output from TreeView (reversed otherwise)">
+    <option value="yes">Yes</option>
     <option value="no" selected='true'>No</option>
-    <option value="yes">Yes</option>
   </param>
 </inputs>
 <outputs>
--- a/cluster.tools/heatmap.from.cluster.result.R	Fri Mar 01 10:17:24 2013 -0500
+++ b/cluster.tools/heatmap.from.cluster.result.R	Fri Mar 01 19:51:25 2013 -0500
@@ -46,7 +46,7 @@
                    "survival.fname",      "S", 2, "character",
                    "survival.image",      "I", 2, "character",
                    "survival.mode",       "M", 2, "character",
-                   "title",               "T", 2, "character"
+                   "survival.title",      "T", 2, "character"
                    ),
                 nc=4,
                 byrow=TRUE
@@ -332,6 +332,8 @@
   cmd.string <- paste( cmd.string, "-C", opt$dataset )
   cmd.string <- paste( cmd.string, "-S", opt$survival.fname )
   cmd.string <- paste( cmd.string, "-M", opt$survival.mode )
+  if ( ! is.null( opt$suvival.title ) )
+    cmd.string <- paste( cmd.string, "-T", opt$suvival.title )
 
   ##  only call kms if we're the image is png
   if ( opt$image.format=="png" ) {
--- a/cluster.tools/heatmap.from.cluster.result.xml	Fri Mar 01 10:17:24 2013 -0500
+++ b/cluster.tools/heatmap.from.cluster.result.xml	Fri Mar 01 19:51:25 2013 -0500
@@ -35,8 +35,8 @@
   <param format="rdata" name="dataset1" type="data" label="Clustering Classification" help="Cluster result file from CCPLUS, HAC, or PAM"/>
   <conditional name="plot_kms_cond">
     <param name="plot_kms" type='select' label="Plot Kaplan-Meiers Survival Plot as well (primary clustering ONLY)?" help="NOTE: this only works when the Image Format is PNG.  For a PDF of the KM plot, you can use the 'Generate Kaplan-Meiers Plot for Cluster Result' tool">
+      <option value="yes" >Yes</option>
       <option value="no" selected='true'>No</option>
-      <option value="yes" >Yes</option>
     </param>
     <when value='yes'>
       <param name="survial_data" type="data" format="tabular" label="Clinical Data" help="Specify the clinical data to use for the Kaplan-Meiers Plot (see help)"/>
@@ -50,8 +50,8 @@
   </conditional>
   <conditional name="cluster_second_direction_cond">
     <param name="cluster_second_direction" type="select" label="Cluster the second dimension? (e.g. rows if this is a sample cluster)"  help="Cluster the 2nd dimension of matrix in the cluster result (see help below)">
+      <option value="yes">Yes</option>
       <option value="no" selected="true">No</option>
-      <option value="yes">Yes</option>
       <option value="prev">Previous Cluster Result</option>
     </param>
     <when value="prev">
@@ -65,12 +65,12 @@
     </param>
   </conditional>
   <param name="output_treeview_format" type='select' display="radio" label="Output in TreeView format as well?" help="Specify whether or not to produce files for TreeView" >
+    <option value="yes" selected='true' >Yes</option>
     <option value="no">No</option>
-    <option value="yes" selected='true' >Yes</option>
   </param>
   <param name="reverse_rows"  type='select'  display="radio" label="Reverse Row-order (to make consistent w\TreeView Display)?" help="Specify whether or not to reverse Row-order (to make the heatmap consistent with output from TreeView (reversed otherwise)" >
+    <option value="yes">Yes</option>
     <option value="no" selected='true'>No</option>
-    <option value="yes">Yes</option>
   </param>
 </inputs>
 <outputs>
--- a/cluster.tools/remove.tcga.normals.R	Fri Mar 01 10:17:24 2013 -0500
+++ b/cluster.tools/remove.tcga.normals.R	Fri Mar 01 19:51:25 2013 -0500
@@ -12,6 +12,14 @@
   q();
 }
 
+## some helper fn's
+write.2.tab <- function( mat,
+                         fname ) {
+  mat <- rbind( colnames( mat ), mat )
+  mat <- cbind( c( "ID", rownames( mat )[-1] ),
+                      mat )
+  write.table( mat, fname, sep="\t", row.names=FALSE, col.names=FALSE, quote=FALSE )
+}
 lib.load.quiet <- function( package ) {
    package <- as.character(substitute(package))
    suppressPackageStartupMessages( do.call( "library", list( package=package ) ) )
@@ -19,7 +27,9 @@
 lib.load.quiet(getopt)
 
 spec <- matrix( c( "data.fname",      "d", 1, "character",
-                   "output.fname",    "o", 2, "character"
+                   "output.fname",    "o", 2, "character",
+                   "return.normals",  "r", 2, "character",
+                   "out.norm.fname",  "O", 2, "character"
                    ),
                 nc=4,
                 byrow=TRUE
@@ -27,15 +37,34 @@
 
 opt <- getopt( spec=spec )
 if ( is.null( opt$output.fname ) ) { opt$output.fname <- 'merge_merge.tumors.tab' }
+if ( is.null( opt$return.normals ) ) {
+  opt$return.normals <- FALSE
+} else {
+  opt$return.normals <- ( tolower( opt$return.normals ) %in% "yes" )
+}
+if ( is.null( opt$out.norm.fname ) ) { opt$out.norm.fname <- 'merge_merge.normals.tab' }
 
 mat <- as.matrix( read.delim( opt$data.fname, row.names=1, check.names=FALSE ) )
-if ( length( strsplit( colnames( mat ), "-" )[[1]] ) == 4 ) {
+norms <- matrix( NA, nc=0, nr=nrow( mat ), dimnames=list( rownames( mat ), c() ) )
+if ( length( strsplit( colnames( mat ), "-" )[[1]] ) > 3 ) {
   cnames <-  sapply( strsplit( colnames( mat ), "-" ), function(x) x[4] )
   norms <- grepl( "^1", cnames )
 
   if ( sum( norms ) > 0  ) {
     tumors <- ! norms
+    norms <- mat[, norms ]
     mat <- mat[, tumors ]
   }
+} else {
+  if ( opt$return.normals ) {
+    writeLines( "TCGA ID barcodes in supplied file only provide patient sample info (no aliquot components are in IDs)\n", opt$out.norm.fname )
+  }
 }
-write.table( mat, opt$output.fname, quote=FALSE, sep="\t", col.names=NA )
+write.2.tab( mat, opt$output.fname )
+if ( opt$return.normals ) {
+  if ( ncol( norms ) > 0 )  {
+    write.2.tab( norms, opt$out.norm.fname )
+  } else {
+    writeLines( "no normals found in supplied matrix\n", opt$out.norm.fname )
+  }
+}
--- a/cluster.tools/remove.tcga.normals.xml	Fri Mar 01 10:17:24 2013 -0500
+++ b/cluster.tools/remove.tcga.normals.xml	Fri Mar 01 19:51:25 2013 -0500
@@ -1,14 +1,24 @@
 <tool id="remove_tcga_normal_samples" name="Remove TGCA Normal Samples" force_history_refresh="True">
     <command interpreter="python">remove.tcga.normals.py
 -d $dataset
--o ${output}
-
-</command>
+-o ${output_tumor}
+-r ${return_normal_matrix}
+#if str($return_normal_matrix) == 'yes':
+-O ${output_normal}
+#end if
+    </command>
     <inputs>
-    	<param name="dataset" type="data" format='tabular' label="Matrix containing TCGA Normal Samples"/>
+      <param name="dataset" type="data" format='tabular' label="Matrix containing TCGA Normal Samples"/>
+      <param name="return_normal_matrix"  type='select'  display="radio" label="Return Normals in Separate File?" help="Specify whether or not to return a file containing the normals">
+	<option value="yes" selected='true' >Yes</option>
+	<option value="no">No</option>
+      </param>
     </inputs>
     <outputs>
-        <data format="tabular" name="output" label="Tumor Sample Matrix"/>
+      <data format="tabular" name="output_tumor" label="Tumor Sample Matrix"/>
+      <data format="tabular" name="output_normal" label="Normal Sample Matrix">
+	<filter>(return_normal_matrix)=="yes"</filter>
+      </data>
     </outputs>
 <help>
 .. class:: infomark
--- a/cluster.tools/rnaseq.feature.selection.R	Fri Mar 01 10:17:24 2013 -0500
+++ b/cluster.tools/rnaseq.feature.selection.R	Fri Mar 01 19:51:25 2013 -0500
@@ -46,7 +46,7 @@
 
 spec <- matrix( c( "data.fname",         "d", 1, "character",
                    "output.fname",       "o", 2, "character",
-                   "var.method",         "m", 2, "character", ## must be either 'active', 'inactive' or 'modulated'
+                   "var.method",         "m", 2, "character",
                    "z.transform",        "z", 2, "character",
                    "perc.pass",          "p", 2, "numeric"
                    ),
--- a/cluster.tools/rnaseq.feature.selection.xml	Fri Mar 01 10:17:24 2013 -0500
+++ b/cluster.tools/rnaseq.feature.selection.xml	Fri Mar 01 19:51:25 2013 -0500
@@ -1,4 +1,4 @@
-<tool id="rnaseq_feature_selection" name="RNASeq Feature Selection" force_history_refresh="True">
+<tool id="rnaseq_feature_selection" name="RNASeq Feature Selection (High Variance Filter)" force_history_refresh="True">
     <command interpreter="python">rnaseq.feature.selection.py
 -d $dataset 
 -z ${z_transform}
@@ -21,7 +21,7 @@
 	<param name="perc_pass" type="float" label="Total number of features to keep" help="Use value >= 1 to indicate exact number of genes. Use value in 0-1 range to specify percentage" value="1500"/>
     </inputs>
     <outputs>
-        <data format="tabular" name="output" label="Filtered RNASeqs"/>
+        <data format="tabular" name="output" label="High Variance Filtered RNASeq Data"/>
     </outputs>
 <help>
 .. class:: infomark