view DESeq_diff_ann_wrapper.xml @ 0:8c5de60b3c04 draft

Uploaded
author vladimir-daric
date Fri, 25 Apr 2014 05:05:39 -0400
parents
children
line wrap: on
line source

<tool id="deseq-diff-ann" name="DESeq" version="1.14.0">
  <requirements>
    <requirement type='binary' version="3.0.2">R</requirement>
    <requirement type="package" version="2.14">Bioconductor</requirement>
    <requirement type="package" version="1.14.0">DESeq</requirement>
    <requirement type="package" version="2.12.1">gplots</requirement>
    <requirement type="package" version="3.5-3">plotrix</requirement>
    <requirement type="package" version="1.0-5">RColorBrewer</requirement>
  </requirements>
  <description>Uses DESeq R library for differential expression analysis</description>
  <parallelism method="basic"></parallelism>
  <command interpreter="python">
    DESeq_diff_ann_wrapper.py
    --alpha=$alpha
    --out=$outhtml
    --up=$up
    --down=$down
    --foldchange=$foldchange
    --logbase=$logbase
  	--geneNumber=$geneNumber
    #if $params.settingsType == "yes":
      --sharingMode=$params.mode
      --method=$params.method
      --fitType=$params.type
    #end if  
    --cond1=$cond1
    --cond2=$cond2
    --subfolder=$outhtml.files_path
    #if str( $replicat_opt['options_type_selector'] ) == 'with':
        --replicates
        #for $i in $replicat_opt.series
            ${i.htseq_out},${i.group},${i.label}
        #end for
    #else
        #for $i in $replicat_opt.series
            ${i.htseq_out},${i.group},${i.label}
        #end for
    #end if
    

  </command>
  <inputs>
    <param name="alpha" format="txt" type="text" value="0.05" label="Significance threshold (p-value)" help="p-value threshold for false discovery rate of significantly differentially expressed genes. See DESeq manual.">
      <sanitizer>
        <valid initial="string.digits">
          <add value="." />
        </valid>
        <mapping initial="none">
          <add source="," target="."/>
        </mapping>
      </sanitizer>
    </param>
    <param name="foldchange" format="txt" type="text" value="2" label="Fold change threshold" help="Fold change from condition B to A used both in graphs and selection of up or down-regulated significant genes. See DESeq manual.">
      <sanitizer>
        <valid initial="string.digits">
          <add value="." />
        </valid>
        <mapping initial="none">
          <add source="," target="."/>
        </mapping>          
      </sanitizer>
    </param>
    <conditional name="params">
      <param name="settingsType" type="select" label="DESeq advanced mode" help="Use default settings or set custom values for any DESeq parameter.">
    	<option value="no">Use Default</option>
        <option value="yes">Full parameter list</option>
      </param>
        <when value="no" />
        <!-- Full/advanced params. -->
        <when value="yes">
	  <param name="mode" type="select" label="DESeq mode" help="How variance estimate is computed with respect to the fitted line. See DESeq manual for more explain.">
	    <option value="fit-only" >fit-only</option>
	    <option value="maximum" selected="True">maximum</option>
	    <option value="gene-est-only">gene-est-only</option>
	  </param>
	  <param name="method" type="select" label="DESeq method" help="How samples are pooled to estimate dispersion. If no replicates use 'blind'.">
	    <option value="blind">blind</option>
	    <option value="pooled" selected="True">pooled</option>
	    <option value="per-condition">per-condition</option>
	  </param>
          <param name="type" type="select" label="DESeq fit type" help="refers to the model. 'Local' is the published model, 'parametric' is glm-based (may not converge).">
            <option value="local" >local</option>
            <option value="parametric" selected="True">parametric</option>
          </param>
      	</when>  <!-- full -->
      </conditional>  <!-- params -->
    
    <param name="logbase" type="select" label="Select of logBase representation">
      <option value="2">2</option>
      <option value="10" selected="True">10</option>
    </param>
    
    <param name="geneNumber" format="txt" type="text" value="30" label="Number of genes included in the heatmap of the most highly expressed genes"/>
      
    <param name="cond1" format="txt" type="text" value="cond1" label="Name for condition one" help="This name will be used in graphs."/>
    <param name="cond2" format="txt" type="text" value="cond2" label="Name for condition two" help="This name will be used in graphs."/>
    
    <conditional name="replicat_opt">
        <param name="options_type_selector" type="select" label="Analysis with replicates" help="NB : without replicate, the 'blind' method and 'fit-only' mode are applied, in accordance with the DESeq manual.">
          <option value="with" selected="True">with replicates</option>
          <option value="without">without replicates</option>
        </param>
        <when value="with">
          <repeat name="series" title="replicate read count">
            <param name="htseq_out" format="txt" type="data" label="HTSeq-count results"/>
            <param name="group" type="select" label="Replicate condition">
              <option value="1" selected="True">Condition one</option>
              <option value="2">Condition two</option>
            </param>
            <param name="label" format="txt" type="text" value="replicate1" label="Replicate label name" help="You can specify a label for your replicates.">
              <sanitizer>
                <valid initial="string.letters,string.digits">
                  <add value="_" />
                </valid>
                <mapping initial="none">
                  <add source=" " target="_"/>
                </mapping>          
              </sanitizer>
            </param>
	  </repeat>
        </when>
      <when value="without">
        <repeat name="series" title="Read count">
          <param name="htseq_out" format="txt" type="data" label="HTSeq-count results"/>
          <param name="group" type="select" label="Replicate condition">
            <option value="1" selected="True">Condition one</option>
            <option value="2">Condition two</option>
          </param>
          <param name="label" format="txt" type="text" value="label1" label="Replicate label name" help="You can specify a label for your replicates."/>
        </repeat>
      </when>
    </conditional>
  </inputs>
  <outputs>
    <data name="up" format="tabular" label="DE_up_genes" />
    <data name="down" format="tabular" label="DE_down_genes" />
    <data name="outhtml" format="html" label="deseq_result on ${on_string}" />
  </outputs>
  <tests>
  	<test>
  		<param name="params.settingsType" value="yes"/>
  		<param name="params.type" value="local" />
  		<param name="cond1" value="A" />
		<param name="cond2" value="B" />
		<param name="params.mode" value="" />
		<param name="params.mode" value="" />
		<param name="params.mode" value="" />	
  		
	</test>
  </tests>
  <help>
Summary
-------  

This is a Galaxy wrapper for the DESeq package version 1.14.0 enabling full control of DESeq options (as well as additional graphical outputs/tables).
A major task in the analysis of RNA-seq count data is the detection of differentially expressed genes. Based on read count data for each gene and each sample, the `DESeq`_ package provides methods to test for differential expression based on the negative binomial distribution and a shrinkage estimator for the distribution variance.

.. _DESeq: http://bioconductor.org/packages/release/bioc/html/DESeq.html

Input
------
Two or more count data files. Each file contains the number of reads assigned to each gene for a given sample. One may use for instance the output files of HTSeq-count.

example: ::

  geneA	12
  geneB	0
  geneC	150
  geneD	3


Output 
------- 

This Galaxy wrapper provides an html output and various result tables describing differentially expressed genes.

.. class:: warningmark

Warning note : in the absence of biological replicates : use the p-value (not the "adjusted" p-value) as the significance of differential expression.


Dependencies
------------

.. class:: warningmark

This tool depends on R (>=3.0.0), DESeq (>=1.14.0), gplots (>=2.12.1), plotrix (>=3.5-3), RColorBrewer (>=1.0-5). 


.. class:: infomark

Please contact `eBio`_ for more information

.. _eBio: http://ebio.u-psud.fr/

Citation 
---------

Simon Anders and Wolfgang Huber (2010): `Differential expression analysis for sequence count data`_. Genome Biology 11:R106

.. _Differential expression analysis for sequence count data: http://www.ncbi.nlm.nih.gov/pubmed/20979621




  </help>
</tool>