view trinityrnaseq.xml @ 9:09c1e388c20c default tip

Change samtools tool_dependency to iuc package_samtools_0_1_19
author Jim Johnson <jj@umn.edu>
date Thu, 06 Feb 2014 10:45:40 -0600
parents d5580071d910
children
line wrap: on
line source

<tool id="trinityrnaseq" name="Trinity" version="0.0.2">
    <!-- Written by Jeremy Goecks, now maintained here by bhaas -->
    <description>De novo assembly of RNA-Seq data Using Trinity</description>
    <requirements>
        <requirement type="package" version="2013_08_14">trinityrnaseq</requirement>
        <requirement type="package" version="0.1.19">samtools</requirement>
        <requirement type="package" version="1.0.0">bowtie</requirement>
    </requirements>
    <command>
        Trinity.pl --JM $JM --CPU $CPU
        
        ## Inputs.
        #if str($inputs.paired_or_single) == "paired":
            --left $inputs.left_input --right $inputs.right_input
            #if  $inputs.left_input.ext == 'fa':
                --seqType fa
            #else:
                --seqType fq
            #end if
            #if str($inputs.library_type) != "None":
                --SS_lib_type $inputs.library_type
            #end if
			--group_pairs_distance $inputs.group_pairs_distance
        #else:
            --single $inputs.input
            #if  str($inputs.input.ext) == 'fa':
                --seqType fa
            #else:
                --seqType fq
            #end if
            #if str($inputs.library_type) != "None":
                --SS_lib_type $inputs.library_type
            #end if
        #end if

        ## Additional parameters.
        #if str($additional_params.use_additional) == "yes":
			--min_kmer_cov $additional_params.min_kmer_cov --max_reads_per_graph $additional_params.max_reads_per_graph --bflyHeapSpaceMax $additional_params.bflyHeapSpaceMax
			#if $additional_params.bfly_opts != 'None':
				--bfly_opts " $additional_params.bfly_opts "
			#end if
        #end if

        ## direct output to the log file
        | tee $trinity_log 
        ## Check output for success message (direct grep output to dev null, -q option quits early leaving tee error messages)
        | grep 'All commands completed successfully.' > /dev/null
        ## Print the stats to output to provide info on galaxy history item
        &amp;&amp; \$TRINITY_HOME/util/TrinityStats.pl trinity_out_dir/Trinity.fasta
    </command>
    <inputs>
        <param name="JM" type="select" label="JM" help="Amount of memory to allocate to Jellyfish for Kmer catalog construction">
            <option value="1G">1G</option>
            <option value="10G">10G</option>
            <option value="50G">50G</option>
            <option value="100G">100G</option>
            <option value="200G">200G</option>
            <option value="500G">500G</option>
        </param>

        <param name="CPU" type="integer" value="2" min="1" label="CPU" help="Number of CPUs to use by Trinity" />


        <conditional name="inputs">
            <param name="paired_or_single" type="select" label="Paired or Single-end data?">
                <option value="paired">Paired</option>
                <option value="single">Single</option>
            </param>
            <when value="paired">
                <param format="fasta,fastq" name="left_input" type="data" label="Left/Forward strand reads" help=""/>
                <param format="fasta,fastq" name="right_input" type="data" label="Right/Reverse strand reads" help=""/>
                <param name="library_type" type="select" label="Strand-specific Library Type">
                    <option value="None">None</option>
                    <option value="FR">FR</option>
                    <option value="RF">RF</option>
                </param>
                <param name="group_pairs_distance" type="integer" value="500" min="1" label="Group pairs distance" help="Maximum length expected between fragment pairs"/>
                <param name="path_reinforcement_distance" type="integer" value="75" min="1" label="Path reinforcement distance" help="Minimum read overlap required for path extension in the graph" />    

            </when>
            <when value="single">
                <param format="fasta,fastq" name="input" type="data" label="Single-end reads" help=""/>
                <param name="library_type" type="select" label="Strand-specific Library Type">
                    <option value="None">None</option>
                    <option value="F">F</option>
                    <option value="R">R</option>
                </param>
                <param name="path_reinforcement_distance" type="integer" value="40" min="1" label="Path reinforcement distance" help="Minimum read overlap required for path extension in the graph" />    
            </when>
        </conditional>
        
        <conditional name="additional_params">
            <param name="use_additional" type="select" label="Use Additional Params?">
                <option value="no">No</option>
                <option value="yes">Yes</option>
            </param>
            <when value="no">
            </when>
            <when value="yes">            
                <param name="min_kmer_cov" type="integer" value="1" min="1" label="inchworm_min_kmer_cov" help="Minimum kmer coverage required by Inchworm for initial contig construction" />
                <param name="max_reads_per_graph" type="integer" value="20000000" min="10000" label="chrysalis_max_reads_per_graph" help="Maximum number of reads to be anchored within each transcript graph by Chrysalis" />
                <param name="bfly_opts" type="text" value="None" label="bfly_opts" help="Options to pass on to Butterfly" />
                <param name="bflyHeapSpaceMax" type="select" label="bflyHeapSpaceMax" help="Java heap space maximum value for Butterfly">
                    <option value="1G">1G</option>
                    <option value="2G">2G</option>
                    <option value="4G" selected="true">4G</option>
                    <option value="10G">10G</option>
                    <option value="20G">20G</option>
                </param>
               <param name="min_contig_length" type="integer" value="200" min="1" label="Minimum Contig Length" help=""/>
            </when>
        </conditional>
    </inputs>
    <stdio>
        <exit_code range="1:"  level="fatal" description="Failed" />
        <regex match="command not found" 
               source="both" 
               level="fatal" 
               description="Trinity.pl not found" />
    </stdio>
    <outputs>
        <data format="txt" name="trinity_log" label="${tool.name} on ${on_string}: log" />
        <data format="fasta" name="assembled_transcripts" label="${tool.name} on ${on_string}: Assembled Transcripts" from_work_dir="trinity_out_dir/Trinity.fasta"/>
    </outputs>
    <tests>

        <test>
            <param name="JM" value="1G"/>
            <param name="CPU" value="1"/>
            <param name="paired_or_single" value="single"/>
            <param name="input" ftype="fastq" value="reads.left.fq"/>
            <param name="library_type" value="None"/>
            <param name="path_reinforcement_distance" value="40"/>
            <param name="use_additional" value="no"/>
            <output name="trinity_log">
                <assert_contents>
                    <!-- sequence merged from multiple reads -->
                    <has_text text="Butterfly assemblies are written to" />
                </assert_contents>
            </output>
            <output name="assembled_transcripts">
                <assert_contents>
                    <!-- sequence merged from multiple reads -->
                    <has_text text="CCATGAGGGGGGGGGGCAATGG" />
                </assert_contents>
            </output>
        </test>

        <test>
            <param name="JM" value="1G"/>
            <param name="CPU" value="1"/>
            <param name="paired_or_single" value="paired"/>
            <param name="left_input" ftype="fastq" value="reads.left.fq"/>
            <param name="right_input" ftype="fastq" value="reads.right.fq"/>
            <param name="library_type" value="None"/>
            <param name="group_pairs_distance" value="500"/>
            <param name="path_reinforcement_distance" value="75"/>
            <param name="use_additional" value="no"/>
            <output name="trinity_log">
                <assert_contents>
                    <!-- sequence merged from multiple reads -->
                    <has_text text="Butterfly assemblies are written to" />
                </assert_contents>
            </output>
            <output name="assembled_transcripts">
                <assert_contents>
                    <!-- sequence merged from multiple reads -->
                    <has_text text="AAGCTGGCCTCAAATTCCTGATCC" />
                </assert_contents>
            </output>
        </test>
    </tests>
    <help>
        Trinity is a de novo transcript assembler that uses RNA-seq data as input. This tool runs all Trinity_ commands--Inchworm, Chrysalis, and Butterfly--in a single pass.
        
        .. _Trinity: http://trinityrnaseq.sourceforge.net
    </help>
</tool>