changeset 8:277a79e23357 draft

Uploaded
author ynewton
date Wed, 03 Oct 2012 10:10:47 -0400
parents 8758afe96b3e
children 600872152be6
files normalize.xml
diffstat 1 files changed, 56 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/normalize.xml	Wed Oct 03 10:10:47 2012 -0400
@@ -0,0 +1,56 @@
+<tool id="matrix_normalize" name="Matrix Normalize" version="1.0.0">
+  <description>Matrix Normalize</description>
+  <command interpreter="Rscript">normalize.r $genomicMatrix $normType $normBy 
+#if str($controlColumnLabelsList) != "None":
+    $controlColumnLabelsList 
+#end if  
+  > $outfile
+  </command>
+  <inputs>
+	  <param name="genomicMatrix" type="data" label="Genomic Matrix"/>
+      <param name="normBy" type="select" label="normalize by (row or column)">
+        <option value="row">ROW</option>
+        <option value="column">COLUMN</option>
+      </param>	  
+      <param name="normType" type="select" label="type of normalization">
+        <option value="median_shift">Median Shift</option>
+        <option value="mean_shift">Mean Shift</option>
+        <option value="t_statistic">Student t-statistic (z-scores)</option>
+        <option value="exponential_fit">Exponential Distribution Normalization</option>
+        <option value="normal_fit">Normal Distribution Normalization</option>
+        <option value="weibull_0.5_fit">Weibull Distribution Normalization (scale=1,shape=0.5)</option>  
+        <option value="weibull_1_fit">Weibull Distribution Normalization (scale=1,shape=1)</option>  
+        <option value="weibull_1.5_fit">Weibull Distribution Normalization (scale=1,shape=1.5)</option>  
+        <option value="weibull_5_fit">Weibull Distribution Normalization (scale=1,shape=5)</option>  
+      </param>	        
+	  <param name="controlColumnLabelsList" optional="true" type="data" label="Controls"/>
+  </inputs>
+  <outputs>
+      <data name="outfile" format="tabular"/>
+  </outputs>
+  <help>
+**What it does**
+
+This tool takes data in a matrix format and normalizes it using the chosen normalization options. The matrix data is assumed to be column and row annotated, meaning that the first line of the matrix file is assumed to be the column headers and the first column of each row is assumed to be the row header.
+
+Data can be normalized either by row or column. Note that exponential, normal, and weibull normalizations automatically do so by column regardless of the user selection. 
+
+The following normalizations are provided:
+
+1. Median shift: if no normals list is provided then computes the median for the whole row and subtracts it from each entry of the row. If normals are provided then computes median for normals and subtracts it from each value of non-normal. Returns only non-normal samples if normals are provided. If "Column" is selected in normalize by, then normals are ignored. 
+
+2. Mean shift: if no normals list is provided then computes the mean for the whole row and subtracts it from each entry of the row. If normals are provided then computes mean for normals and subtracts it from each value of non-normal. Returns only non-normal samples if normals are provided. If "Column" is selected in normalize by, then normals are ignored. 
+
+3. T-statistic (z-score): sometimes called standardization. Z-score is computed for each value of the row/column. If normals are specified then the z-score within each class (normals and non-normals) is computed.
+
+4. Exponential normalization: performed by columns/samples. All genes/probes in the column/sample are ranked. Then inverse CDF (quantile function) is applied to the ranks (transforms a rank to a real number in exponential distribution).
+
+5. Normal normalization: same as exponential normalization, but inverse quantile function of Normal distribution is applied.
+
+6. Weibull normalizations: same as exponential normalization, but inverse quantile function of Weibull distribution is applied with appropriate scale and shape parameters.
+
+
+Normals parameter is an optional parameter which contains a list of column headers from the input matrix which should be considered as normals
+
+  </help>   
+</tool>
\ No newline at end of file