view cluster.tools/rnaseq.feature.selection.xml @ 3:563832f48c08 draft

Uploaded
author peter-waltman
date Fri, 01 Mar 2013 19:51:25 -0500
parents dddfeedb85af
children a58527c632b7
line wrap: on
line source

<tool id="rnaseq_feature_selection" name="RNASeq Feature Selection (High Variance Filter)" force_history_refresh="True">
    <command interpreter="python">rnaseq.feature.selection.py
-d $dataset 
-z ${z_transform}
-m ${var_method} 
-p ${perc_pass} 
-o ${output}

</command>
    <inputs>
    	<param name="dataset" type="data" format='tabular' label="RNASeq Data Set"/>
        <param name="z_transform" type='select' display="radio" label="Z-transform data?" help="Specify whether or not to Z-transform the rows (mean=0, sd=1)">
          <option value="yes" selected='true' >Yes</option>
          <option value="no">No</option>
        </param>
    	<param name="var_method" type="select" label="Variance Metric for Genes" help="Specify Metric to use for calculating Gene Variance" >
	  <option value="mad">Median Absolute Deviation (MAD)</option>
	  <option value="maxad">Maximum Absolute Deviation (Max AD)</option>
	  <option value="sd">Standard Deviation (SD)</option>
    	</param>
	<param name="perc_pass" type="float" label="Total number of features to keep" help="Use value >= 1 to indicate exact number of genes. Use value in 0-1 range to specify percentage" value="1500"/>
    </inputs>
    <outputs>
        <data format="tabular" name="output" label="High Variance Filtered RNASeq Data"/>
    </outputs>
<help>
.. class:: infomark
     
**RNASeq Feature Selection** - Tool to filter an RNASeq matrix to select the most variant features

**OUTPUT:**  A new matrix containing only the most variant feaures

----

**Parameters**

- **Z-transform data?** - Specify whether or not to Z-transform the rows (mean=0, sd=1)
- **Variance Metric for Genes** - Specify Metric to use for calculating Gene Variance. Choice of:

	 * Median Absolute Deviation (MAD)
	 * Maximum Absolute Deviation - similar to MAD, but uses the _Maximum_, instead of the Median Absolute Deviatioin
	 * Standard Deviation

 - **Percentage of Samples Passing** Percent of samples with an IPL that passes the threshold. Choice of:

         * Integer Value       - indicate the exact number of genes that are to be kept
         * Real Value in [0,1] - indicate the percentage of genes that are to be kept



</help>
</tool>