Mercurial > repos > peter-waltman > ucsc_cluster_tools2
changeset 3:563832f48c08 draft
Uploaded
author | peter-waltman |
---|---|
date | Fri, 01 Mar 2013 19:51:25 -0500 |
parents | b8f262149ee2 |
children | 2c225e2f0acb |
files | cluster.tools/cluster.2.centroid.xml cluster.tools/extract.TCGA.survival.data.R cluster.tools/extract.TCGA.survival.data.xml cluster.tools/fix.and.merge.TCGA.sample.IDs.R cluster.tools/fix.and.merge.TCGA.sample.IDs.xml cluster.tools/format.raw.TCGA.RNASeq.data.xml cluster.tools/gen.matrix.heatmap.xml cluster.tools/heatmap.from.cluster.result.R cluster.tools/heatmap.from.cluster.result.xml cluster.tools/remove.tcga.normals.R cluster.tools/remove.tcga.normals.xml cluster.tools/rnaseq.feature.selection.R cluster.tools/rnaseq.feature.selection.xml |
diffstat | 13 files changed, 67 insertions(+), 27 deletions(-) [+] |
line wrap: on
line diff
--- a/cluster.tools/cluster.2.centroid.xml Fri Mar 01 10:17:24 2013 -0500 +++ b/cluster.tools/cluster.2.centroid.xml Fri Mar 01 19:51:25 2013 -0500 @@ -8,8 +8,8 @@ <inputs> <param name="dataset" type="data" format='rdata' label="Cluster Result" help="Specify the cluster result to analayze (MUST BE IN rdata format; see help below)"/> <param name="gen_new_dgram" type='select' display="radio" label="Re-calculate cluster tree for new matrix ?" help="Specify whether or not to re-calculate a dendrogram for the cluster centroid matrix"> + <option value="yes" selected='true' >Yes</option> <option value="no">No</option> - <option value="yes" selected='true' >Yes</option> </param> </inputs>
--- a/cluster.tools/extract.TCGA.survival.data.R Fri Mar 01 10:17:24 2013 -0500 +++ b/cluster.tools/extract.TCGA.survival.data.R Fri Mar 01 19:51:25 2013 -0500 @@ -93,6 +93,7 @@ } clinical.data <- as.numeric( ! grepl( "(LIVING|Not)", clinical.data ) ) } + if ( is.null(opt$time.column ) ) { time.colname <- "CDE.clinical_time"
--- a/cluster.tools/extract.TCGA.survival.data.xml Fri Mar 01 10:17:24 2013 -0500 +++ b/cluster.tools/extract.TCGA.survival.data.xml Fri Mar 01 19:51:25 2013 -0500 @@ -5,10 +5,10 @@ </command> <inputs> - <param name="dataset" type="data" format='tabular' label="Raw Clinical Data"/> + <param name="dataset" type="data" format='tabular' label="Raw TCGA Clinical Data"/> </inputs> <outputs> - <data format="tabular" name="output" label="Formatted Clinical Data"/> + <data format="tabular" name="output" label="TCGA Survival Data"/> </outputs> <help> .. class:: infomark
--- a/cluster.tools/fix.and.merge.TCGA.sample.IDs.R Fri Mar 01 10:17:24 2013 -0500 +++ b/cluster.tools/fix.and.merge.TCGA.sample.IDs.R Fri Mar 01 19:51:25 2013 -0500 @@ -48,7 +48,6 @@ cat( err.msg, file=opt$output.fname ) stop( err.msg ) } - remove.periods.from.ids <- function( ids ) { return( gsub( "\\.", "-", ids ) ) } @@ -115,6 +114,7 @@ if ( nelts > 3 ) { normals <- grepl( "^TCGA-..-....-1", cnames ) data <- data[ , (! normals ), drop=FALSE ] + cnames <- cnames[ ! normals ] } }
--- a/cluster.tools/fix.and.merge.TCGA.sample.IDs.xml Fri Mar 01 10:17:24 2013 -0500 +++ b/cluster.tools/fix.and.merge.TCGA.sample.IDs.xml Fri Mar 01 19:51:25 2013 -0500 @@ -1,13 +1,13 @@ <tool id="fix_and_merge_TCGA_samples_IDs" name="Fix and Merge TCGA sample IDs" force_history_refresh="True"> <command interpreter="python">fix.and.merge.TCGA.sample.IDs.py --d $dataset -n ${num_components} ${remove_normals} +-d $dataset +-n ${num_components} -o ${output} </command> <inputs> <param name="dataset" type="data" format='tabular' label="Matrix with Full TCGA Aliquot Barcodes"/> <param name="num_components" type="integer" label="Number of barcode components to use (min number is 3)" value="3" /> - <param name="remove_normals" type="boolean" label="Remove Normals from Matrix? (check to exclude)" truevalue="-r" falsevalue="" checked="True" /> </inputs> <outputs> <data format="tabular" name="output" label="Matrix with TCGA Patient Barcodes (filtered and merged)"/> @@ -27,7 +27,5 @@ - **Number of barcode components to use** Specify the number of barcode components to use in new matrix that is produced **(min number is 3)** -- **Remove Normals from Matrix?** - Remove any normals from the matrix (if necessary) - </help> </tool>
--- a/cluster.tools/format.raw.TCGA.RNASeq.data.xml Fri Mar 01 10:17:24 2013 -0500 +++ b/cluster.tools/format.raw.TCGA.RNASeq.data.xml Fri Mar 01 19:51:25 2013 -0500 @@ -9,8 +9,8 @@ <inputs> <param name="dataset" type="data" format='tabular' label="Raw TCGA RNASeq Data"/> <param name="log_transform" type='select' display="radio" label="Log-transform data?" help="Specify whether or not to log-transform the data matrix (log(x+1))"> + <option value="yes" selected='true' >Yes</option> <option value="no">No</option> - <option value="yes" selected='true' >Yes</option> </param> <param name="filter_low_variant" type='integer' label="Filter Threshold for Low-variant genes?" value="10" help="Specify threshold for minimum median value for all genes (-1 to use no filter)"> <option value="yes" selected='true' >Yes</option>
--- a/cluster.tools/gen.matrix.heatmap.xml Fri Mar 01 10:17:24 2013 -0500 +++ b/cluster.tools/gen.matrix.heatmap.xml Fri Mar 01 19:51:25 2013 -0500 @@ -26,12 +26,12 @@ </param> </conditional> <param name="output_treeview_format" type='select' display="radio" label="Output in TreeView format as well?" help="Specify whether or not to produce files for TreeView"> + <option value="yes" selected='true' >Yes</option> <option value="no">No</option> - <option value="yes" selected='true' >Yes</option> </param> <param name="reverse_rows" type='select' display="radio" label="Reverse Row-order (to make consistent w\TreeView Display)?" help="Specify whether or not to reverse Row-order (to make the heatmap consistent with output from TreeView (reversed otherwise)"> + <option value="yes">Yes</option> <option value="no" selected='true'>No</option> - <option value="yes">Yes</option> </param> </inputs> <outputs>
--- a/cluster.tools/heatmap.from.cluster.result.R Fri Mar 01 10:17:24 2013 -0500 +++ b/cluster.tools/heatmap.from.cluster.result.R Fri Mar 01 19:51:25 2013 -0500 @@ -46,7 +46,7 @@ "survival.fname", "S", 2, "character", "survival.image", "I", 2, "character", "survival.mode", "M", 2, "character", - "title", "T", 2, "character" + "survival.title", "T", 2, "character" ), nc=4, byrow=TRUE @@ -332,6 +332,8 @@ cmd.string <- paste( cmd.string, "-C", opt$dataset ) cmd.string <- paste( cmd.string, "-S", opt$survival.fname ) cmd.string <- paste( cmd.string, "-M", opt$survival.mode ) + if ( ! is.null( opt$suvival.title ) ) + cmd.string <- paste( cmd.string, "-T", opt$suvival.title ) ## only call kms if we're the image is png if ( opt$image.format=="png" ) {
--- a/cluster.tools/heatmap.from.cluster.result.xml Fri Mar 01 10:17:24 2013 -0500 +++ b/cluster.tools/heatmap.from.cluster.result.xml Fri Mar 01 19:51:25 2013 -0500 @@ -35,8 +35,8 @@ <param format="rdata" name="dataset1" type="data" label="Clustering Classification" help="Cluster result file from CCPLUS, HAC, or PAM"/> <conditional name="plot_kms_cond"> <param name="plot_kms" type='select' label="Plot Kaplan-Meiers Survival Plot as well (primary clustering ONLY)?" help="NOTE: this only works when the Image Format is PNG. For a PDF of the KM plot, you can use the 'Generate Kaplan-Meiers Plot for Cluster Result' tool"> + <option value="yes" >Yes</option> <option value="no" selected='true'>No</option> - <option value="yes" >Yes</option> </param> <when value='yes'> <param name="survial_data" type="data" format="tabular" label="Clinical Data" help="Specify the clinical data to use for the Kaplan-Meiers Plot (see help)"/> @@ -50,8 +50,8 @@ </conditional> <conditional name="cluster_second_direction_cond"> <param name="cluster_second_direction" type="select" label="Cluster the second dimension? (e.g. rows if this is a sample cluster)" help="Cluster the 2nd dimension of matrix in the cluster result (see help below)"> + <option value="yes">Yes</option> <option value="no" selected="true">No</option> - <option value="yes">Yes</option> <option value="prev">Previous Cluster Result</option> </param> <when value="prev"> @@ -65,12 +65,12 @@ </param> </conditional> <param name="output_treeview_format" type='select' display="radio" label="Output in TreeView format as well?" help="Specify whether or not to produce files for TreeView" > + <option value="yes" selected='true' >Yes</option> <option value="no">No</option> - <option value="yes" selected='true' >Yes</option> </param> <param name="reverse_rows" type='select' display="radio" label="Reverse Row-order (to make consistent w\TreeView Display)?" help="Specify whether or not to reverse Row-order (to make the heatmap consistent with output from TreeView (reversed otherwise)" > + <option value="yes">Yes</option> <option value="no" selected='true'>No</option> - <option value="yes">Yes</option> </param> </inputs> <outputs>
--- a/cluster.tools/remove.tcga.normals.R Fri Mar 01 10:17:24 2013 -0500 +++ b/cluster.tools/remove.tcga.normals.R Fri Mar 01 19:51:25 2013 -0500 @@ -12,6 +12,14 @@ q(); } +## some helper fn's +write.2.tab <- function( mat, + fname ) { + mat <- rbind( colnames( mat ), mat ) + mat <- cbind( c( "ID", rownames( mat )[-1] ), + mat ) + write.table( mat, fname, sep="\t", row.names=FALSE, col.names=FALSE, quote=FALSE ) +} lib.load.quiet <- function( package ) { package <- as.character(substitute(package)) suppressPackageStartupMessages( do.call( "library", list( package=package ) ) ) @@ -19,7 +27,9 @@ lib.load.quiet(getopt) spec <- matrix( c( "data.fname", "d", 1, "character", - "output.fname", "o", 2, "character" + "output.fname", "o", 2, "character", + "return.normals", "r", 2, "character", + "out.norm.fname", "O", 2, "character" ), nc=4, byrow=TRUE @@ -27,15 +37,34 @@ opt <- getopt( spec=spec ) if ( is.null( opt$output.fname ) ) { opt$output.fname <- 'merge_merge.tumors.tab' } +if ( is.null( opt$return.normals ) ) { + opt$return.normals <- FALSE +} else { + opt$return.normals <- ( tolower( opt$return.normals ) %in% "yes" ) +} +if ( is.null( opt$out.norm.fname ) ) { opt$out.norm.fname <- 'merge_merge.normals.tab' } mat <- as.matrix( read.delim( opt$data.fname, row.names=1, check.names=FALSE ) ) -if ( length( strsplit( colnames( mat ), "-" )[[1]] ) == 4 ) { +norms <- matrix( NA, nc=0, nr=nrow( mat ), dimnames=list( rownames( mat ), c() ) ) +if ( length( strsplit( colnames( mat ), "-" )[[1]] ) > 3 ) { cnames <- sapply( strsplit( colnames( mat ), "-" ), function(x) x[4] ) norms <- grepl( "^1", cnames ) if ( sum( norms ) > 0 ) { tumors <- ! norms + norms <- mat[, norms ] mat <- mat[, tumors ] } +} else { + if ( opt$return.normals ) { + writeLines( "TCGA ID barcodes in supplied file only provide patient sample info (no aliquot components are in IDs)\n", opt$out.norm.fname ) + } } -write.table( mat, opt$output.fname, quote=FALSE, sep="\t", col.names=NA ) +write.2.tab( mat, opt$output.fname ) +if ( opt$return.normals ) { + if ( ncol( norms ) > 0 ) { + write.2.tab( norms, opt$out.norm.fname ) + } else { + writeLines( "no normals found in supplied matrix\n", opt$out.norm.fname ) + } +}
--- a/cluster.tools/remove.tcga.normals.xml Fri Mar 01 10:17:24 2013 -0500 +++ b/cluster.tools/remove.tcga.normals.xml Fri Mar 01 19:51:25 2013 -0500 @@ -1,14 +1,24 @@ <tool id="remove_tcga_normal_samples" name="Remove TGCA Normal Samples" force_history_refresh="True"> <command interpreter="python">remove.tcga.normals.py -d $dataset --o ${output} - -</command> +-o ${output_tumor} +-r ${return_normal_matrix} +#if str($return_normal_matrix) == 'yes': +-O ${output_normal} +#end if + </command> <inputs> - <param name="dataset" type="data" format='tabular' label="Matrix containing TCGA Normal Samples"/> + <param name="dataset" type="data" format='tabular' label="Matrix containing TCGA Normal Samples"/> + <param name="return_normal_matrix" type='select' display="radio" label="Return Normals in Separate File?" help="Specify whether or not to return a file containing the normals"> + <option value="yes" selected='true' >Yes</option> + <option value="no">No</option> + </param> </inputs> <outputs> - <data format="tabular" name="output" label="Tumor Sample Matrix"/> + <data format="tabular" name="output_tumor" label="Tumor Sample Matrix"/> + <data format="tabular" name="output_normal" label="Normal Sample Matrix"> + <filter>(return_normal_matrix)=="yes"</filter> + </data> </outputs> <help> .. class:: infomark
--- a/cluster.tools/rnaseq.feature.selection.R Fri Mar 01 10:17:24 2013 -0500 +++ b/cluster.tools/rnaseq.feature.selection.R Fri Mar 01 19:51:25 2013 -0500 @@ -46,7 +46,7 @@ spec <- matrix( c( "data.fname", "d", 1, "character", "output.fname", "o", 2, "character", - "var.method", "m", 2, "character", ## must be either 'active', 'inactive' or 'modulated' + "var.method", "m", 2, "character", "z.transform", "z", 2, "character", "perc.pass", "p", 2, "numeric" ),
--- a/cluster.tools/rnaseq.feature.selection.xml Fri Mar 01 10:17:24 2013 -0500 +++ b/cluster.tools/rnaseq.feature.selection.xml Fri Mar 01 19:51:25 2013 -0500 @@ -1,4 +1,4 @@ -<tool id="rnaseq_feature_selection" name="RNASeq Feature Selection" force_history_refresh="True"> +<tool id="rnaseq_feature_selection" name="RNASeq Feature Selection (High Variance Filter)" force_history_refresh="True"> <command interpreter="python">rnaseq.feature.selection.py -d $dataset -z ${z_transform} @@ -21,7 +21,7 @@ <param name="perc_pass" type="float" label="Total number of features to keep" help="Use value >= 1 to indicate exact number of genes. Use value in 0-1 range to specify percentage" value="1500"/> </inputs> <outputs> - <data format="tabular" name="output" label="Filtered RNASeqs"/> + <data format="tabular" name="output" label="High Variance Filtered RNASeq Data"/> </outputs> <help> .. class:: infomark