view deseq2.xml @ 41:0a0a3388e3f2 draft

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit cbeb1c4c436be04323bd9a809a6393d00b168d07"
author iuc
date Mon, 29 Nov 2021 18:16:10 +0000
parents ed9e8345a292
children 6ef2cba4e35a
line wrap: on
line source

<tool id="deseq2" name="DESeq2" version="@TOOL_VERSION@+galaxy@SUFFIX_VERSION@">
    <description>Determines differentially expressed features from count tables</description>
    <macros>
        <import>deseq2_macros.xml</import>
    </macros>
    <expand macro='requirements'/>
    <expand macro='edam_ontology' />
    <expand macro='xrefs'/>
    <stdio>
        <regex match="Execution halted"
           source="both"
           level="fatal"
           description="Execution halted." />
        <regex match="Error in"
           source="both"
           level="fatal"
           description="An undefined error occurred, please check your input carefully and contact your administrator." />
        <regex match="Fatal error"
           source="both"
           level="fatal"
           description="An undefined error occurred, please check your input carefully and contact your administrator." />
    </stdio>
    <version_command><![CDATA[
echo $(R --version | grep version | grep -v GNU)", DESeq2 version" $(R --vanilla --slave -e "library(DESeq2); cat(sessionInfo()\$otherPkgs\$DESeq2\$Version)" 2> /dev/null | grep -v -i "WARNING: ")
    ]]></version_command>
    <command><![CDATA[
#if $tximport.tximport_selector == 'tximport':
    #if $tximport.mapping_format.mapping_format_selector == 'gtf':
        ln -s '$tximport.mapping_format.gtf_file' mapping.gff &&
    #else:
        ln -s '$tximport.mapping_format.tabular_file' mapping.txt &&
    #end if
#end if

## This is needed for Pulsar to transfer the file
cat '$__tool_directory__/get_deseq_dataset.R' > /dev/null &&

#import json
#import os
Rscript '${__tool_directory__}/deseq2.R'
    --cores \${GALAXY_SLOTS:-1}
    -o '$deseq_out'
    #if 'pdf' in $output_options.output_selector:
        -p '$plots'
    #end if
    -A $output_options.alpha_ma
    #if 'normCounts' in $output_options.output_selector:
        -n '$counts_out'
    #end if
    #if 'sizefactors' in $output_options.output_selector:
        -F '$sizefactors_out'
    #end if
    #if 'normRLog' in $output_options.output_selector:
        -r '$rlog_out'
    #end if
    #if 'normVST' in $output_options.output_selector:
        -v '$vst_out'
    #end if
    #set $filename_to_element_identifiers = {}
    #set $temp_factor_names = list()
    #for $factor in $select_data.rep_factorName:
        #set $temp_factor = list()
        #for $level in $factor.rep_factorLevel:
            #set $count_files = list()
            #if $select_data.how == 'group_tags':
                #for $group in $level.groups.value:
                    #for $file in $select_data.countsFile.get_datasets_for_group($group):
                        $count_files.append(str($file))
                        $filename_to_element_identifiers.__setitem__(os.path.basename(str($file)),  $file.element_identifier)
                    #end for
                #end for
            #else:
                #for $file in $level.countsFile:
                    $count_files.append(str($file))
                    $filename_to_element_identifiers.__setitem__(os.path.basename(str($file)),  $file.element_identifier)
                #end for
            #end if
            $temp_factor.append( {str($level.factorLevel): $count_files} )
        #end for
        $temp_factor.reverse()
        $temp_factor_names.append([str($factor.factorName), $temp_factor])
    #end for

    $header

    -f '#echo json.dumps(temp_factor_names)#'
    -l '#echo json.dumps(filename_to_element_identifiers)#'
    #if $advanced_options.esf:
        -e $advanced_options.esf
    #end if
    -t $advanced_options.fit_type
    #if $batch_factors:
        --batch_factors '$batch_factors'
    #end if
    #if $advanced_options.outlier_replace_off:
        -a
    #end if
    #if $advanced_options.outlier_filter_off:
        -b
    #end if
    #if $advanced_options.auto_mean_filter_off:
        -c
    #end if
    #if 'many_contrasts' in $output_options.output_selector
        -m
    #end if
    #if $tximport.tximport_selector == 'tximport':
        -i
        -y $tximport.txtype
        #if $tximport.mapping_format.mapping_format_selector == 'gtf':
            -x mapping.gff
        #else:
            -x mapping.txt
        #end if

    #end if
]]></command>
    <inputs>
        <conditional name="select_data">
            <param name="how" type="select">
                <option value="datasets_per_level">Select datasets per level</option>
                <option value="group_tags">Select group tags corresponding to levels</option>
            </param>
            <when value="group_tags">
                <param name="countsFile" type="data_collection" format="tabular" label="Count file(s) collection" multiple="true"/>
                <expand macro="factor_repeat">
                    <param name="groups" type="group_tag" data_ref="countsFile" multiple="true" label="Select groups that correspond to this factor level"/>
                </expand>
            </when>
            <when value="datasets_per_level">
                <expand macro="factor_repeat">
                    <param name="countsFile" type="data" format="tabular" multiple="true" label="Counts file(s)"/>
                </expand>
            </when>
        </conditional>

        <param name="batch_factors" type="data" format="tabular" optional="true" label="(Optional) provide a tabular file with additional batch factors to include in the model." help="You can produce this file using RUVSeq or svaseq."/>
        <param name="header" type="boolean" truevalue="-H" falsevalue="" checked="true" label="Files have header?" help="If this option is set to Yes, the tool will assume that the count files have column headers in the first row. Default: Yes" />

        <conditional name="tximport">
            <param name="tximport_selector" type="select" label="Choice of Input data">
                <option value="count" selected="True">Count data (e.g. from HTSeq-count, featureCounts or StringTie)</option>
                <option value="tximport">TPM values (e.g. from kallisto, sailfish or salmon)</option>
            </param>
            <when value="tximport">
                <param name="txtype" type="select" label="Program used to generate TPMs">
                    <option value="kallisto">kallisto</option>
                    <option value="sailfish">Sailfish</option>
                    <option value="salmon">Salmon</option>
                </param>
                <conditional name="mapping_format">
                    <param name="mapping_format_selector" type="select" label="Gene mapping format">
                        <option value="gtf" selected="True">GTF/GFF3</option>
                        <option value="tabular">Transcript-ID to Gene-ID mapping file</option>
                    </param>
                    <when value="gtf">
                        <param name="gtf_file" type="data" format="gtf,gff3" label="GTF/GFF3 annotation file"/>
                    </when>
                    <when value="tabular">
                        <param name="tabular_file" type="data" format="tabular" label="Tabular file with Transcript-ID to Gene-ID mapping"/>
                    </when>
                </conditional>
            </when>
            <when value="count" />
        </conditional>
        <section name="advanced_options" title="Advanced options">
            <param name="esf" type="select" label="Method for estimateSizeFactors" 
                help="Method for estimation: either 'ratio', 'poscounts', or 'iterate'. 'ratio' uses the standard median ratio method introduced in DESeq. 
                    The size factor is the median ratio of the sample over a 'pseudosample': for each gene, the geometric mean of all samples. 
                    'poscounts' and 'iterate' offer alternative estimators, which can be used even when all genes contain a sample with a zero (a problem 
                    for the default method, as the geometric mean becomes zero, and the ratio undefined). The 'poscounts' estimator deals with a gene with 
                    some zeros, by calculating a modified geometric mean by taking the n-th root of the product of the non-zero counts. This evolved out of 
                    use cases with Paul McMurdie's phyloseq package for metagenomic samples. The 'iterate' estimator iterates between estimating the dispersion 
                    with a design of ~1, and finding a size factor vector by numerically optimizing the likelihood of the ~1 model.">
                <option value="" selected="true">No Selection (use default)</option>
                <option value="ratio">ratio</option>
                <option value="poscounts">poscounts</option>
                <option value="iterate">iterate</option>
            </param>
            <param name="fit_type" type="select" label="Fit type">
                <option value="1" selected="true">parametric</option>
                <option value="2">local</option>
                <option value="3">mean</option>
            </param>
            <param name="outlier_replace_off" type="boolean" truevalue="1" falsevalue="0" checked="false"
                label="Turn off outliers replacement (only affects with >6 replicates)"
                help="When there are more than 6 replicates for a given sample, the DESeq2 will automatically replace
                    counts with large Cook’s distance with the trimmed mean over all samples, scaled up by the size factor
                    or normalization factor for that sample" />
            <param name="outlier_filter_off" type="boolean" truevalue="1" falsevalue="0" checked="false"
                label="Turn off outliers filtering (only affects with >2 replicates)"
                help="When there are more than 2 replicates for a given sample, the DESeq2 will automatically
                    filter genes which contain a Cook’s distance above a cutoff" />
            <param name="auto_mean_filter_off" type="boolean" truevalue="1" falsevalue="0" checked="false"
                label="Turn off independent filtering"
                help=" DESeq2 performs independent filtering by default using the mean of normalized counts as a filter statistic" />
        </section>
        <section name="output_options" title="Output options">
            <param name="output_selector" type="select" multiple="True" optional="true" display="checkboxes" label="Output selector">
                <option value="pdf" selected="True">Generate plots for visualizing the analysis results</option>
                <option value="sizefactors" >Output sample size factors</option>
                <option value="normCounts">Output normalised counts</option>
                <option value="normVST">Output VST normalized table</option>
                <option value="normRLog">Output rLog normalized table</option>
                <option value="many_contrasts">Output all levels vs all levels of primary factor (use when you have >2 levels for primary factor)</option>
            </param>
            <param name="alpha_ma" type="float" min="0" max="0.5" value="0.1" label="Alpha value for MA-plot" help="Default value is 0.1. This option is only meaninful when plots are generated" />
        </section>
    </inputs>
    <outputs>
        <data name="deseq_out" format="tabular" label="DESeq2 result file on ${on_string}">
            <filter>'many_contrasts' not in output_options['output_selector']</filter>
            <actions>
                <action name="column_names" type="metadata" default="GeneID,Base mean,log2(FC),StdErr,Wald-Stats,P-value,P-adj" />
            </actions>
        </data>
        <collection name="split_output" type="list" label="DESeq2 result files on ${on_string}">
            <filter>output_options['output_selector'] and 'many_contrasts' in output_options['output_selector']</filter>
            <discover_datasets pattern="(?P&lt;designation&gt;.+_vs_.+)" format="tabular" directory="." visible="false"/>
        </collection>
        <data name="plots" format="pdf" label="DESeq2 plots on ${on_string}">
            <filter>output_options['output_selector'] and 'pdf' in output_options['output_selector']</filter>
        </data>
        <data name="sizefactors_out" format="tabular" label="Size Factors on ${on_string}">
            <filter>output_options['output_selector'] and 'sizefactors' in output_options['output_selector']</filter>
        </data>
        <data name="counts_out" format="tabular" label="Normalized counts file on ${on_string}">
            <filter>output_options['output_selector'] and 'normCounts' in output_options['output_selector']</filter>
        </data>
        <data name="rlog_out" format="tabular" label="rLog-Normalized counts file on ${on_string}">
            <filter>output_options['output_selector'] and 'normRLog' in output_options['output_selector']</filter>
        </data>
        <data name="vst_out" format="tabular" label="VST-Normalized counts file on ${on_string}">
            <filter>output_options['output_selector'] and 'normVST' in output_options['output_selector']</filter>
        </data>
    </outputs>
    <tests>
        <!--Ensure counts files with header works -->
        <test expect_num_outputs="4">
            <repeat name="rep_factorName">
                <param name="factorName" value="Treatment"/>
                <repeat name="rep_factorLevel">
                    <param name="factorLevel" value="Treated"/>
                    <param name="countsFile" value="GSM461179_treat_single.counts,GSM461180_treat_paired.counts,GSM461181_treat_paired.counts"/>
                </repeat>
                <repeat name="rep_factorLevel">
                    <param name="factorLevel" value="Untreated"/>
                    <param name="countsFile" value="GSM461176_untreat_single.counts,GSM461177_untreat_paired.counts,GSM461178_untreat_paired.counts,GSM461182_untreat_single.counts"/>
                </repeat>
            </repeat>
            <section name="output_options">
                <param name="output_selector" value="normCounts,normRLog,normVST"/>
            </section>
            <output name="counts_out">
                <assert_contents>
                    <has_text_matching expression="GSM461176_untreat_single.counts\tGSM461177_untreat_paired.counts\tGSM461178_untreat_paired.counts\tGSM461182_untreat_single.counts\tGSM461179_treat_single.counts\tGSM461180_treat_paired.counts\tGSM461181_treat_paired.counts" />
                    <has_text_matching expression="FBgn0000003\t0\t0\t0\t0\t0\t0\t0" />
                </assert_contents>
            </output>
            <output name="rlog_out">
                <assert_contents>
                    <has_text_matching expression="GSM461176_untreat_single.counts\tGSM461177_untreat_paired.counts\tGSM461178_untreat_paired.counts\tGSM461182_untreat_single.counts\tGSM461179_treat_single.counts\tGSM461180_treat_paired.counts\tGSM461181_treat_paired.counts" />
                    <has_text_matching expression="FBgn0000003\t0\t0\t0\t0\t0\t0\t0" />
                </assert_contents>
            </output>
            <output name="vst_out">
                <assert_contents>
                    <has_text_matching expression="GSM461176_untreat_single.counts\tGSM461177_untreat_paired.counts\tGSM461178_untreat_paired.counts\tGSM461182_untreat_single.counts\tGSM461179_treat_single.counts\tGSM461180_treat_paired.counts\tGSM461181_treat_paired.counts" />
                    <has_text_matching expression="FBgn0000003\t5.*\t5.*\t5.*\t5.*\t5.*\t5.*\t5.*" />
                </assert_contents>
            </output>
            <output name="deseq_out" >
                <assert_contents>
                    <has_text_matching expression="FBgn0003360\t1933\.9504.*\t-2\.8399.*\t0\.1309.*\t-21\.68.*\t.*e-104\t.*e-101" />
                </assert_contents>
            </output>
        </test>
        <!--Ensure additional batch factor correction works -->
        <test expect_num_outputs="2">
            <repeat name="rep_factorName">
                <param name="factorName" value="Treatment"/>
                <repeat name="rep_factorLevel">
                    <param name="factorLevel" value="Treated"/>
                    <param name="countsFile" value="GSM461179_treat_single.counts,GSM461180_treat_paired.counts,GSM461181_treat_paired.counts"/>
                </repeat>
                <repeat name="rep_factorLevel">
                    <param name="factorLevel" value="Untreated"/>
                    <param name="countsFile" value="GSM461176_untreat_single.counts,GSM461177_untreat_paired.counts,GSM461178_untreat_paired.counts,GSM461182_untreat_single.counts"/>
                </repeat>
            </repeat>
            <param name="batch_factors" value="batch_factors.tab"/>
            <section name="output_options">
                <param name="output_selector" value="normCounts"/>
            </section>
            <output name="deseq_out">
                <assert_contents>
                    <has_text_matching expression="FBgn0003360\t1933.*\t-2.9.*\t0.1.*\t-26.*\t1.*-152\t4.*-149" />
                </assert_contents>
            </output>
        </test>
        <!--Ensure counts files without header works -->
        <test expect_num_outputs="4">
            <repeat name="rep_factorName">
                <param name="factorName" value="Treatment"/>
                <repeat name="rep_factorLevel">
                    <param name="factorLevel" value="Treated"/>
                    <param name="countsFile" value="GSM461179_treat_single.counts.noheader,GSM461180_treat_paired.counts.noheader,GSM461181_treat_paired.counts.noheader"/>
                </repeat>
                <repeat name="rep_factorLevel">
                    <param name="factorLevel" value="Untreated"/>
                    <param name="countsFile" value="GSM461176_untreat_single.counts.noheader,GSM461177_untreat_paired.counts.noheader,GSM461178_untreat_paired.counts.noheader,GSM461182_untreat_single.counts.noheader"/>
                </repeat>
            </repeat>
            <param name="header" value="False"/>
            <section name="output_options">
                <param name="output_selector" value="normCounts,normRLog,normVST"/>
            </section>        
            <output name="counts_out">
                <assert_contents>
                    <has_text_matching expression="GSM461176_untreat_single.counts.noheader\tGSM461177_untreat_paired.counts.noheader\tGSM461178_untreat_paired.counts.noheader\tGSM461182_untreat_single.counts.noheader\tGSM461179_treat_single.counts.noheader\tGSM461180_treat_paired.counts.noheader\tGSM461181_treat_paired.counts.noheader" />
                    <has_text_matching expression="FBgn0000003\t0\t0\t0\t0\t0\t0\t0" />
                </assert_contents>
            </output>
            <output name="rlog_out">
                <assert_contents>
                    <has_text_matching expression="GSM461176_untreat_single.counts.noheader\tGSM461177_untreat_paired.counts.noheader\tGSM461178_untreat_paired.counts.noheader\tGSM461182_untreat_single.counts.noheader\tGSM461179_treat_single.counts.noheader\tGSM461180_treat_paired.counts.noheader\tGSM461181_treat_paired.counts.noheader" />
                    <has_text_matching expression="FBgn0000003\t0\t0\t0\t0\t0\t0\t0" />
                </assert_contents>
            </output>
            <output name="vst_out">
                <assert_contents>
                    <has_text_matching expression="GSM461176_untreat_single.counts.noheader\tGSM461177_untreat_paired.counts.noheader\tGSM461178_untreat_paired.counts.noheader\tGSM461182_untreat_single.counts.noheader\tGSM461179_treat_single.counts.noheader\tGSM461180_treat_paired.counts.noheader\tGSM461181_treat_paired.counts.noheader" />
                    <has_text_matching expression="FBgn0000003\t5.*\t5.*\t5.*\t5.*\t5.*\t5.*\t5.*" />
                </assert_contents>
            </output>
            <output name="deseq_out" >
                <assert_contents>
                    <has_text_matching expression="FBgn0003360\t1933\.9504.*\t-2\.8399.*\t0\.1309.*\t-21\.68.*\t.*e-104\t.*e-101" />
                </assert_contents>
            </output>
        </test>
        <!--Ensure Sailfish/Salmon input with tx2gene table works-->
        <test expect_num_outputs="1">
            <repeat name="rep_factorName">
                <param name="factorName" value="Treatment"/>
                <repeat name="rep_factorLevel">
                    <param name="factorLevel" value="Treated"/>
                    <param name="countsFile" value="sailfish/sailfish_quant.sf1.tab,sailfish/sailfish_quant.sf2.tab,sailfish/sailfish_quant.sf3.tab"/>
                </repeat>
                <repeat name="rep_factorLevel">
                    <param name="factorLevel" value="Untreated"/>
                    <param name="countsFile" value="sailfish/sailfish_quant.sf4.tab,sailfish/sailfish_quant.sf5.tab,sailfish/sailfish_quant.sf6.tab"/>
                </repeat>
            </repeat>
            <section name="output_options">
                <param name="output_selector" value=""/>
            </section>            
            <param name="tximport_selector" value="tximport"/>
            <param name="txtype" value="sailfish"/>
            <param name="mapping_format_selector" value="tabular"/>
            <param name="tabular_file" value="tx2gene.tab"/>
            <output name="deseq_out" >
                <assert_contents>
                    <has_text_matching expression="UGT3A2\t1.8841.*\t-0.1329.*\t0.6936.*\t-0.1917.*\t0.8479.*\t0.9999.*" />
                </assert_contents>
            </output>
        </test>
        <!--Ensure Sailfish/Salmon input with GFF3 annotation from NCBI works-->
        <test expect_num_outputs="1">
            <repeat name="rep_factorName">
                <param name="factorName" value="Treatment"/>
                <repeat name="rep_factorLevel">
                    <param name="factorLevel" value="Treated"/>
                    <param name="countsFile" value="sailfish/sailfish_quant.sf1.tab,sailfish/sailfish_quant.sf2.tab,sailfish/sailfish_quant.sf3.tab"/>
                </repeat>
                <repeat name="rep_factorLevel">
                    <param name="factorLevel" value="Untreated"/>
                    <param name="countsFile" value="sailfish/sailfish_quant.sf4.tab,sailfish/sailfish_quant.sf5.tab,sailfish/sailfish_quant.sf6.tab"/>
                </repeat>
            </repeat>
            <section name="output_options">
                <param name="output_selector" value=""/>
            </section>            
            <param name="tximport_selector" value="tximport"/>
            <param name="txtype" value="sailfish"/>
            <param name="mapping_format_selector" value="gtf"/>
            <param name="gtf_file" value="GRCh38_latest_genomic.gff"/>
            <output name="deseq_out" >
                <assert_contents>
                    <has_text_matching expression="UGT3A2\t1.8841.*\t-0.1329.*\t0.6936.*\t-0.1917.*\t0.8479.*\t0.9999.*" />
                </assert_contents>
            </output>
        </test>
        <!--Ensure Sailfish/Salmon input with GTF annotation from Ensembl works-->
        <test expect_num_outputs="1">
            <repeat name="rep_factorName">
                <param name="factorName" value="Treatment"/>
                <repeat name="rep_factorLevel">
                    <param name="factorLevel" value="Treated"/>
                    <param name="countsFile" value="sailfish_ensembl/sailfish_quant.sf1.tab,sailfish_ensembl/sailfish_quant.sf2.tab,sailfish_ensembl/sailfish_quant.sf3.tab"/>
                </repeat>
                <repeat name="rep_factorLevel">
                    <param name="factorLevel" value="Untreated"/>
                    <param name="countsFile" value="sailfish_ensembl/sailfish_quant.sf4.tab,sailfish_ensembl/sailfish_quant.sf5.tab,sailfish_ensembl/sailfish_quant.sf6.tab"/>
                </repeat>
            </repeat>
            <section name="output_options">
                <param name="output_selector" value=""/>
            </section>            
            <param name="tximport_selector" value="tximport"/>
            <param name="txtype" value="sailfish"/>
            <param name="mapping_format_selector" value="gtf"/>
            <param name="gtf_file" value="Homo_sapiens.GRCh38.94.gtf" ftype="gtf"/>
            <output name="deseq_out" >
                <assert_contents>
                    <has_text_matching expression="ENSG00000168671\t1.8841.*\t-0.1180.*\t0.7429.*\t-0.1589.*\t0.8737.*\t0.9999.*" />
                </assert_contents>
            </output>
        </test>
        <!--Ensure group tags can be used to select factor levels -->
        <test expect_num_outputs="1">
            <param name="select_data|how" value="group_tags"/>
            <param name="select_data|countsFile">
                <collection type="list">
                    <element name="1" value="sailfish/sailfish_quant.sf1.tab" tags="group:primary:treated"/>
                    <element name="2" value="sailfish/sailfish_quant.sf2.tab" tags="group:primary:treated"/>
                    <element name="3" value="sailfish/sailfish_quant.sf3.tab" tags="group:primary:treated"/>
                    <element name="4" value="sailfish/sailfish_quant.sf4.tab" tags="group:primary:untreated"/>
                    <element name="5" value="sailfish/sailfish_quant.sf5.tab" tags="group:primary:untreated"/>
                    <element name="6" value="sailfish/sailfish_quant.sf6.tab" tags="group:primary:untreated"/>
                </collection>
            </param>
            <repeat name="rep_factorName">
                <param name="factorName" value="Treatment"/>
                <repeat name="rep_factorLevel">
                    <param name="factorLevel" value="Treated"/>
                    <param name="groups" value="primary:treated"/>
                </repeat>
                <repeat name="rep_factorLevel">
                    <param name="factorLevel" value="Untreated"/>
                    <param name="groups" value="primary:untreated"/>
                </repeat>
            </repeat>
            <section name="output_options">
                <param name="output_selector" value=""/>
            </section>            
            <param name="tximport_selector" value="tximport"/>
            <param name="txtype" value="sailfish"/>
            <param name="mapping_format_selector" value="tabular"/>
            <param name="tabular_file" value="tx2gene.tab"/>
            <output name="deseq_out" >
                <assert_contents>
                    <has_text_matching expression="HOXC11\t0.557.*\t0.324.*\t0.437.*\t0.741.*\t0.458.*\t0.999.*"/>
                </assert_contents>
            </output>
        </test>
        <!--Ensure many_contrasts produces output collection -->
        <test expect_num_outputs="1">
            <param name="select_data|how" value="group_tags"/>
            <param name="select_data|countsFile">
                <collection type="list">
                    <element name="1" value="sailfish/sailfish_quant.sf1.tab" tags="group:primary:treated"/>
                    <element name="2" value="sailfish/sailfish_quant.sf2.tab" tags="group:primary:treated"/>
                    <element name="3" value="sailfish/sailfish_quant.sf3.tab" tags="group:primary:treated"/>
                    <element name="4" value="sailfish/sailfish_quant.sf4.tab" tags="group:primary:untreated"/>
                    <element name="5" value="sailfish/sailfish_quant.sf5.tab" tags="group:primary:untreated"/>
                    <element name="6" value="sailfish/sailfish_quant.sf6.tab" tags="group:primary:untreated"/>
                </collection>
            </param>
            <repeat name="rep_factorName">
                <param name="factorName" value="Treatment"/>
                <repeat name="rep_factorLevel">
                    <param name="factorLevel" value="Treated"/>
                    <param name="groups" value="primary:treated"/>
                </repeat>
                <repeat name="rep_factorLevel">
                    <param name="factorLevel" value="Untreated"/>
                    <param name="groups" value="primary:untreated"/>
                </repeat>
            </repeat>
            <section name="output_options">
                <param name="output_selector" value="many_contrasts"/>
            </section>            
            <param name="tximport_selector" value="tximport"/>
            <param name="txtype" value="sailfish"/>
            <param name="mapping_format_selector" value="tabular"/>
            <param name="tabular_file" value="tx2gene.tab"/>
            <output_collection name="split_output" type="list" count="1">
                <element name="Treatment_Treated_vs_Untreated">
                    <assert_contents>
                        <has_text_matching expression="HOXC11\t0.557.*\t0.324.*\t0.437.*\t0.741.*\t0.458.*\t0.999.*"/>
                    </assert_contents>
                </element>
            </output_collection>
        </test>
        <!--Test alpha_ma option-->
        <test expect_num_outputs="1">
            <repeat name="rep_factorName">
                <param name="factorName" value="Treatment"/>
                <repeat name="rep_factorLevel">
                    <param name="factorLevel" value="Treated"/>
                    <param name="countsFile" value="sailfish_ensembl/sailfish_quant.sf1.tab,sailfish_ensembl/sailfish_quant.sf2.tab,sailfish_ensembl/sailfish_quant.sf3.tab"/>
                </repeat>
                <repeat name="rep_factorLevel">
                    <param name="factorLevel" value="Untreated"/>
                    <param name="countsFile" value="sailfish_ensembl/sailfish_quant.sf4.tab,sailfish_ensembl/sailfish_quant.sf5.tab,sailfish_ensembl/sailfish_quant.sf6.tab"/>
                </repeat>
            </repeat>
            <section name="output_options">
                <param name="output_selector" value=""/>
                <param name="alpha_ma" value="0.05"/>
            </section>            
            <param name="tximport_selector" value="tximport"/>
            <param name="txtype" value="sailfish"/>
            <param name="mapping_format_selector" value="gtf"/>
            <param name="gtf_file" value="Homo_sapiens.GRCh38.94.gtf" ftype="gtf"/>
            <output name="deseq_out" >
                <assert_contents>
                    <has_text_matching expression="ENSG00000168671\t1.8841.*\t-0.1180.*\t0.7429.*\t-0.1589.*\t0.8737.*\t0.9999.*" />
                </assert_contents>
            </output>
        </test>
        <!-- Same as above alpha_ma test, but with size factors -->
        <test expect_num_outputs="2">
            <repeat name="rep_factorName">
                <param name="factorName" value="Treatment"/>
                <repeat name="rep_factorLevel">
                    <param name="factorLevel" value="Treated"/>
                    <param name="countsFile" value="sailfish_ensembl/sailfish_quant.sf1.tab,sailfish_ensembl/sailfish_quant.sf2.tab,sailfish_ensembl/sailfish_quant.sf3.tab"/>
                </repeat>
                <repeat name="rep_factorLevel">
                    <param name="factorLevel" value="Untreated"/>
                    <param name="countsFile" value="sailfish_ensembl/sailfish_quant.sf4.tab,sailfish_ensembl/sailfish_quant.sf5.tab,sailfish_ensembl/sailfish_quant.sf6.tab"/>
                </repeat>
            </repeat>
            <section name="output_options">
                <param name="output_selector" value="sizefactors"/>
                <param name="alpha_ma" value="0.05"/>
            </section>
            <param name="tximport_selector" value="tximport"/>
            <param name="txtype" value="sailfish"/>
            <param name="mapping_format_selector" value="gtf"/>
            <param name="gtf_file" value="Homo_sapiens.GRCh38.94.gtf" ftype="gtf"/>
            <output name="sizefactors_out">
                <assert_contents>
                    <has_text_matching expression="sailfish_quant\.sf4\.tab\t0\.8\d+" />
                    <has_text_matching expression="sailfish_quant\.sf3\.tab\t1\.0\d+" />
                </assert_contents>
            </output>
        </test>
        <!-- Same as above alpha_ma size factor test, but with a non-default estimator-->
        <test expect_num_outputs="2">
            <repeat name="rep_factorName">
                <param name="factorName" value="Treatment"/>
                <repeat name="rep_factorLevel">
                    <param name="factorLevel" value="Treated"/>
                    <param name="countsFile" value="sailfish_ensembl/sailfish_quant.sf1.tab,sailfish_ensembl/sailfish_quant.sf2.tab,sailfish_ensembl/sailfish_quant.sf3.tab"/>
                </repeat>
                <repeat name="rep_factorLevel">
                    <param name="factorLevel" value="Untreated"/>
                    <param name="countsFile" value="sailfish_ensembl/sailfish_quant.sf4.tab,sailfish_ensembl/sailfish_quant.sf5.tab,sailfish_ensembl/sailfish_quant.sf6.tab"/>
                </repeat>
            </repeat>
            <section name="advanced_options">
                <param name="esf" value="poscounts" />
            </section>
            <section name="output_options">
                <param name="output_selector" value="sizefactors"/>
                <param name="alpha_ma" value="0.05"/>
            </section>
            <param name="tximport_selector" value="tximport"/>
            <param name="txtype" value="sailfish"/>
            <param name="mapping_format_selector" value="gtf"/>
            <param name="gtf_file" value="Homo_sapiens.GRCh38.94.gtf" ftype="gtf"/>
            <output name="sizefactors_out" >
                <assert_contents>
                    <has_text_matching expression="sailfish_quant\.sf4\.tab\t0\.8\d+" />
                    <has_text_matching expression="sailfish_quant\.sf3\.tab\t1\.0\d+" />
                </assert_contents>
            </output>
        </test>
    </tests>
    <help><![CDATA[
.. class:: infomark

**What it does**

Estimate variance-mean dependence in count data from high-throughput sequencing assays and test for differential expression based on a model using the negative binomial distribution

-----

**Inputs**

**Count Files**

DESeq2_ takes count tables generated from **featureCounts**, **HTSeq-count** or **StringTie** as input. Count tables must be generated for each sample individually. One header row is assumed, but files with no header (e.g from HTSeq) can be input with the *Files have header?* option set to No. DESeq2 is capable of handling multiple factors that affect your experiment. The first factor you input is considered as the primary factor that affects gene expressions. Optionally, you can input one or more secondary factors that might influence your experiment. But the final output will be changes in genes due to primary factor in presence of secondary factors. Each factor has two levels/states. You need to select appropriate count table from your history for each factor level.

The following table gives some examples of factors and their levels:

========= ============== ===============
Factor    Factor level 1 Factor level 2
--------- -------------- ---------------
Treatment Treated        Untreated
--------- -------------- ---------------
Condition Knockdown      Wildtype
--------- -------------- ---------------
TimePoint Day4           Day1
--------- -------------- ---------------
SeqType   SingleEnd      PairedEnd
--------- -------------- ---------------
Gender    Female         Male
========= ============== ===============

*Note*: Output log2 fold changes are based on primary factor level 1 vs. factor level2. Here the order of factor levels is important. For example, for the factor 'Treatment' given in above table, DESeq2 computes fold changes of 'Treated' samples against 'Untreated', i.e. the values correspond to up or down regulations of genes in Treated samples.

DESeq2_ can also take transcript-level counts from quantification tools such as, **kallisto**, **Salmon** and **Sailfish**, and this Galaxy wrapper incorporates the Bioconductor tximport_ package to process the transcript counts for DESeq2.

**Salmon or Sailfish Files**

Salmon or Sailfish ``quant.sf`` files can be imported by setting type to *Salmon* or *Sailfish* respectively above. Note: for previous version of Salmon or Sailfish, in which the quant.sf files start with comment lines you will need to remove the comment lines before inputting here. An example of the format is shown below.

Example:

============ ========== =============== =========== ===========
Name         Length     EffectiveLength TPM         NumReads
------------ ---------- --------------- ----------- -----------
NR_001526    164        20.4518         0           0
NR_001526_1  164        20.4518         0           0
NR_001526_2  164        20.4518         0           0
NM_130786    1764       1956.04         2.47415     109.165
NR_015380    2129       2139.53         1.77331     85.5821
NM_001198818 9360       7796.58         2.38616e-07 4.19648e-05
NM_001198819 9527       7964.62         0           0
NM_001198820 9410       7855.78         0           0
NM_014576    9267       7714.88         0.0481114   8.37255
============ ========== =============== =========== ===========

**kallisto Files**

kallisto ``abundance.tsv`` files can be imported by setting type to *kallisto* above. An example of the format is shown below.

Example:

============ ========== =============== =========== ===========
target_id    length     eff_length      est_counts  tpm
------------ ---------- --------------- ----------- -----------
NR_001526    164        20.4518         0           0
NR_001526_1  164        20.4518         0           0
NR_001526_2  164        20.4518         0           0
NM_130786    1764       1956.04         109.165     2.47415
NR_015380    2129       2139.53         85.5821     1.77331
NM_001198818 9360       7796.58         4.19648e-05 2.38616e-07
NM_001198819 9527       7964.62         0           0
NM_001198820 9410       7855.78         0           0
NM_014576    9267       7714.88         8.37255     0.0481114
============ ========== =============== =========== ===========

-----

**Output**

DESeq2_ generates a tabular file containing the different columns and optional visualized results as PDF.

====== ==========================================================
Column Description
------ ----------------------------------------------------------
     1 Gene Identifiers
     2 mean normalised counts, averaged over all samples from both conditions
     3 the logarithm (to basis 2) of the fold change (See the note in inputs section)
     4 standard error estimate for the log2 fold change estimate
     5 Wald statistic
     6 p value for the statistical significance of this change
     7 p value adjusted for multiple testing with the Benjamini-Hochberg procedure
       which controls false discovery rate (FDR)
====== ==========================================================

By selecting ``Output sample size factors`` in the "Output options"
selection box, the size factors used to normalize the samples can also
be output as a tabular file.

.. _DESeq2: http://master.bioconductor.org/packages/release/bioc/html/DESeq2.html
.. _tximport: https://bioconductor.org/packages/devel/bioc/vignettes/tximport/inst/doc/tximport.html
    ]]></help>
    <expand macro="citations" />
</tool>