Mercurial > repos > fubar > differential_count_models

diff rgedgeRpaired_nocamera.xml @ 143:1435811cbf01 draft
Uploaded
author: iuc
date: Thu, 26 Feb 2015 22:41:57 -0500
parents: e7894f37320a
--- a/rgedgeRpaired_nocamera.xml	Wed Feb 18 11:37:14 2015 -0500
+++ b/rgedgeRpaired_nocamera.xml	Thu Feb 26 22:41:57 2015 -0500
@@ -1,144 +1,124 @@
+<?xml version="1.0"?>
 <tool id="rgdifferentialcount" name="Differential_Count" version="0.28">
   <description>models using BioConductor packages</description>
   <requirements>
-      <requirement type="package" version="3.1.2">R</requirement>
-      <requirement type="package" version="1.3.18">graphicsmagick</requirement>
-      <requirement type="package" version="9.10">ghostscript</requirement> 
-      <requirement type="package" version="2.14">biocbasics</requirement>
+    <requirement type="package" version="3.1.2">R</requirement>
+    <requirement type="package" version="1.3.18">graphicsmagick</requirement>
+    <requirement type="package" version="9.10">ghostscript</requirement>
+    <requirement type="package" version="2.14">biocbasics</requirement>
   </requirements>
-  
   <command interpreter="python">
      rgToolFactory.py --script_path "$runme" --interpreter "Rscript" --tool_name "Differential_Counts" 
     --output_dir "$html_file.files_path" --output_html "$html_file" --make_HTML "yes"
   </command>
   <inputs>
-    <param name="input1"  type="data" format="tabular" label="Select an input matrix - rows are contigs, columns are counts for each sample"
-       help="Use the HTSeq based count matrix preparation tool to create these matrices from BAM/SAM files and a GTF file of genomic features"/>
-    <param name="title" type="text" value="Differential Counts" size="80" label="Title for job outputs" 
-           help="Supply a meaningful name here to remind you what the outputs contain">
+    <param name="input1" type="data" format="tabular" label="Select an input matrix - rows are contigs, columns are counts for each sample" help="Use the HTSeq based count matrix preparation tool to create these matrices from BAM/SAM files and a GTF file of genomic features"/>
+    <param name="title" type="text" value="Differential Counts" size="80" label="Title for job outputs" help="Supply a meaningful name here to remind you what the outputs contain">
       <sanitizer invalid_char="">
-        <valid initial="string.letters,string.digits"><add value="_" /> </valid>
+        <valid initial="string.letters,string.digits">
+          <add value="_"/>
+        </valid>
       </sanitizer>
     </param>
     <param name="treatment_name" type="text" value="Treatment" size="50" label="Treatment Name"/>
-    <param name="Treat_cols" label="Select columns containing treatment." type="data_column" data_ref="input1" numerical="True" 
-         multiple="true" use_header_names="true" size="120" display="checkboxes"  force_select="True">
-        <validator type="no_options" message="Please select at least one column."/>
+    <param name="Treat_cols" label="Select columns containing treatment." type="data_column" data_ref="input1" numerical="True" multiple="true" use_header_names="true" size="120" display="checkboxes" force_select="True">
+      <validator type="no_options" message="Please select at least one column."/>
     </param>
     <param name="control_name" type="text" value="Control" size="50" label="Control Name"/>
-    <param name="Control_cols" label="Select columns containing control." type="data_column" data_ref="input1" numerical="True" 
-         multiple="true" use_header_names="true" size="120" display="checkboxes"  force_select="True">
+    <param name="Control_cols" label="Select columns containing control." type="data_column" data_ref="input1" numerical="True" multiple="true" use_header_names="true" size="120" display="checkboxes" force_select="True">
     </param>
-    <param name="subjectids" type="text" optional="true" size="120" value = ""
-       label="IF SUBJECTS NOT ALL INDEPENDENT! Enter comma separated strings to indicate sample labels for (eg) pairing - must be one for every column in input"
-       help="Leave blank if no pairing, but eg if data from sample id A99 is in columns 2,4 and id C21 is in 3,5 then enter 'A99,C21,A99,C21'">
+    <param name="subjectids" type="text" optional="true" size="120" value="" label="IF SUBJECTS NOT ALL INDEPENDENT! Enter comma separated strings to indicate sample labels for (eg) pairing - must be one for every column in input" help="Leave blank if no pairing, but eg if data from sample id A99 is in columns 2,4 and id C21 is in 3,5 then enter 'A99,C21,A99,C21'">
       <sanitizer>
-        <valid initial="string.letters,string.digits"><add value="," /> </valid>
+        <valid initial="string.letters,string.digits">
+          <add value=","/>
+        </valid>
       </sanitizer>
     </param>
-    <param name="fQ" type="float" value="0.3" size="5" label="Non-differential contig count quantile threshold - zero to analyze all non-zero read count contigs"
-     help="May be a good or a bad idea depending on the biology and the question. EG 0.3 = sparsest 30% of contigs with at least one read are removed before analysis"/>
-    <param name="useNDF" type="boolean" truevalue="T" falsevalue="F" checked="false" size="1" 
-              label="Non differential filter - remove contigs below a threshold (1 per million) for half or more samples"
-     help="May be a good or a bad idea depending on the biology and the question. This was the old default. Quantile based is available as an alternative"/>
-
+    <param name="fQ" type="float" value="0.3" size="5" label="Non-differential contig count quantile threshold - zero to analyze all non-zero read count contigs" help="May be a good or a bad idea depending on the biology and the question. EG 0.3 = sparsest 30% of contigs with at least one read are removed before analysis"/>
+    <param name="useNDF" type="boolean" truevalue="T" falsevalue="F" checked="false" size="1" label="Non differential filter - remove contigs below a threshold (1 per million) for half or more samples" help="May be a good or a bad idea depending on the biology and the question. This was the old default. Quantile based is available as an alternative"/>
     <conditional name="edgeR">
-        <param name="doedgeR" type="select" 
-           label="Run this model using edgeR"
-           help="edgeR uses a negative binomial model and seems to be powerful, even with few replicates">
-          <option value="F">Do not run edgeR</option>
-          <option value="T" selected="true">Run edgeR</option>
-         </param>
-         <when value="T">
-          <param name="edgeR_priordf" type="integer" value="10" size="3" 
-           label="prior.df for tagwise dispersion - larger value = more squeezing of tag dispersions to common dispersion. Replaces prior.n  and prior.df = prior.n * residual.df"
-           help="10 = edgeR default. Use a larger value to 'smooth' small samples. See edgeR docs and note below"/>
-          <param name="edgeR_robust_method" type="select" value="20" size="3" 
-           label="Use robust dispersion method"
-           help="Use ordinary, anscombe or deviance robust deviance estimates">
-             <option value="ordinary" selected="true">Use ordinary deviance estimates</option>
-             <option value="deviance">Use robust deviance estimates</option>
-             <option value="anscombe">use Anscombe robust deviance estimates</option>
-          </param>
-         </when>
-         <when value="F"></when>
+      <param name="doedgeR" type="select" label="Run this model using edgeR" help="edgeR uses a negative binomial model and seems to be powerful, even with few replicates">
+        <option value="F">Do not run edgeR</option>
+        <option value="T" selected="true">Run edgeR</option>
+      </param>
+      <when value="T">
+        <param name="edgeR_priordf" type="integer" value="10" size="3" label="prior.df for tagwise dispersion - larger value = more squeezing of tag dispersions to common dispersion. Replaces prior.n  and prior.df = prior.n * residual.df" help="10 = edgeR default. Use a larger value to 'smooth' small samples. See edgeR docs and note below"/>
+        <param name="edgeR_robust_method" type="select" value="20" size="3" label="Use robust dispersion method" help="Use ordinary, anscombe or deviance robust deviance estimates">
+          <option value="ordinary" selected="true">Use ordinary deviance estimates</option>
+          <option value="deviance">Use robust deviance estimates</option>
+          <option value="anscombe">use Anscombe robust deviance estimates</option>
+        </param>
+      </when>
+      <when value="F"/>
     </conditional>
     <conditional name="DESeq2">
-    <param name="doDESeq2" type="select" 
-       label="Run the same model with DESeq2 and compare findings"
-       help="DESeq2 is an update to the DESeq package. It uses different assumptions and methods to edgeR">
-      <option value="F" selected="true">Do not run DESeq2</option>
-      <option value="T">Run DESeq2</option>
-     </param>
-     <when value="T">
-         <param name="DESeq_fitType" type="select">
-            <option value="parametric" selected="true">Parametric (default) fit for dispersions</option>
-            <option value="local">Local fit - this will automagically be used if parametric fit fails</option>
-            <option value="mean">Mean dispersion fit- use this if you really understand what you're doing - read the fine manual linked below in the documentation</option>
-         </param> 
-     </when>
-     <when value="F"> </when>
+      <param name="doDESeq2" type="select" label="Run the same model with DESeq2 and compare findings" help="DESeq2 is an update to the DESeq package. It uses different assumptions and methods to edgeR">
+        <option value="F" selected="true">Do not run DESeq2</option>
+        <option value="T">Run DESeq2</option>
+      </param>
+      <when value="T">
+        <param name="DESeq_fitType" type="select">
+          <option value="parametric" selected="true">Parametric (default) fit for dispersions</option>
+          <option value="local">Local fit - this will automagically be used if parametric fit fails</option>
+          <option value="mean">Mean dispersion fit- use this if you really understand what you're doing - read the fine manual linked below in the documentation</option>
+        </param>
+      </when>
+      <when value="F"> </when>
     </conditional>
-    <param name="doVoom" type="select" 
-       label="Run the same model with Voom/limma and compare findings"
-       help="Voom uses counts per million and a precise transformation of variance so count data can be analysed using limma">
+    <param name="doVoom" type="select" label="Run the same model with Voom/limma and compare findings" help="Voom uses counts per million and a precise transformation of variance so count data can be analysed using limma">
       <option value="F" selected="true">Do not run VOOM</option>
       <option value="T">Run VOOM</option>
-     </param>
-    <param name="fdrthresh" type="float" value="0.05" size="5" label="P value threshold for FDR filtering for amily wise error rate control"
-     help="Conventional default value of 0.05 recommended"/>
-    <param name="fdrtype" type="select" label="FDR (Type II error) control method" 
-         help="Use fdr or bh typically to control for the number of tests in a reliable way">
-            <option value="fdr" selected="true">fdr</option>
-            <option value="BH">Benjamini Hochberg</option>
-            <option value="BY">Benjamini Yukateli</option>
-            <option value="bonferroni">Bonferroni</option>
-            <option value="hochberg">Hochberg</option>
-            <option value="holm">Holm</option>
-            <option value="hommel">Hommel</option>
-            <option value="none">no control for multiple tests</option>
+    </param>
+    <param name="fdrthresh" type="float" value="0.05" size="5" label="P value threshold for FDR filtering for amily wise error rate control" help="Conventional default value of 0.05 recommended"/>
+    <param name="fdrtype" type="select" label="FDR (Type II error) control method" help="Use fdr or bh typically to control for the number of tests in a reliable way">
+      <option value="fdr" selected="true">fdr</option>
+      <option value="BH">Benjamini Hochberg</option>
+      <option value="BY">Benjamini Yukateli</option>
+      <option value="bonferroni">Bonferroni</option>
+      <option value="hochberg">Hochberg</option>
+      <option value="holm">Holm</option>
+      <option value="hommel">Hommel</option>
+      <option value="none">no control for multiple tests</option>
     </param>
   </inputs>
   <outputs>
     <data format="tabular" name="out_edgeR" label="${title}_topTable_edgeR.xls">
-         <filter>edgeR['doedgeR'] == "T"</filter>
+      <filter>edgeR['doedgeR'] == "T"</filter>
     </data>
     <data format="tabular" name="out_DESeq2" label="${title}_topTable_DESeq2.xls">
-         <filter>DESeq2['doDESeq2'] == "T"</filter>
+      <filter>DESeq2['doDESeq2'] == "T"</filter>
     </data>
     <data format="tabular" name="out_VOOM" label="${title}_topTable_VOOM.xls">
-         <filter>doVoom == "T"</filter>
+      <filter>doVoom == "T"</filter>
     </data>
     <data format="html" name="html_file" label="${title}.html"/>
   </outputs>
- <stdio>
-     <exit_code range="4"   level="fatal"   description="Number of subject ids must match total number of samples in the input matrix" />
- </stdio>
- <tests>
-<test>
-<param name='input1' value='test_bams2mx.xls' ftype='tabular' />
- <param name='treatment_name' value='liver' />
- <param name='title' value='edgeRtest' />
- <param name='useNDF' value='' />
- <param name='doedgeR' value='T' />
- <param name='doVoom' value='T' />
- <param name='doDESeq2' value='T' />
- <param name='fdrtype' value='fdr' />
- <param name='edgeR_priordf' value="8" />
- <param name='edgeR_robust' value="ordinary" />
- <param name='fdrthresh' value="0.05" />
- <param name='control_name' value='heart' />
- <param name='subjectids' value='' />
- <param name='Control_cols' value='3,4,5,9' />
- <param name='Treat_cols' value='2,6,7,8' />
- <output name='out_edgeR' file='edgeRtest1out.xls' compare='diff' lines_diff='20' />
- <output name='html_file' file='edgeRtest1out.html'  compare='diff' lines_diff='20' />
-</test>
-</tests>
-
-<configfiles>
-<configfile name="runme">
-<![CDATA[
+  <stdio>
+    <exit_code range="4" level="fatal" description="Number of subject ids must match total number of samples in the input matrix"/>
+  </stdio>
+  <tests>
+    <test>
+      <param name="input1" value="test_bams2mx.xls" ftype="tabular"/>
+      <param name="treatment_name" value="liver"/>
+      <param name="title" value="edgeRtest"/>
+      <param name="useNDF" value=""/>
+      <param name="doedgeR" value="T"/>
+      <param name="doVoom" value="T"/>
+      <param name="doDESeq2" value="T"/>
+      <param name="fdrtype" value="fdr"/>
+      <param name="edgeR_priordf" value="8"/>
+      <param name="edgeR_robust" value="ordinary"/>
+      <param name="fdrthresh" value="0.05"/>
+      <param name="control_name" value="heart"/>
+      <param name="subjectids" value=""/>
+      <param name="Control_cols" value="3,4,5,9"/>
+      <param name="Treat_cols" value="2,6,7,8"/>
+      <output name="out_edgeR" file="edgeRtest1out.xls" compare="diff" lines_diff="20"/>
+      <output name="html_file" file="edgeRtest1out.html" compare="diff" lines_diff="20"/>
+    </test>
+  </tests>
+  <configfiles>
+    <configfile name="runme"><![CDATA[
 # 
 # edgeR.Rscript
 # updated feb 2014 adding outlier-robust deviance estimate options by ross for R 3.0.2/bioc 2.13
@@ -885,8 +865,8 @@
 sink()
 ]]>
 </configfile>
-</configfiles>
-<help>
+  </configfiles>
+  <help>
 
 **What it does**
 
@@ -1063,10 +1043,7 @@
 .. _limma_VOOM: http://www.bioconductor.org/packages/release/bioc/html/limma.html
 .. _Galaxy: http://getgalaxy.org
 </help>
-<citations>
+  <citations>
     <citation type="doi">doi: 10.1093/bioinformatics/btp616</citation>
-</citations>
-
+  </citations>
 </tool>
-
-
author	iuc
date	Thu, 26 Feb 2015 22:41:57 -0500
parents	e7894f37320a
children