view trinityrnaseq_norm.xml @ 6:8d546ef8cfea

Add RSEM_abundance_estimation
author Jim Johnson <jj@umn.edu>
date Fri, 22 Nov 2013 14:48:39 -0600
parents 5eb99d21ef0d
children
line wrap: on
line source

<tool id="trinityrnaseq_norm" name="Trinity read normalization" version="0.0.2">
    <!-- Written by Jeremy Goecks, modified by Josh Bowden for normalization proceedure, now maintained here by bhaas -->
    <description>Pre-process RNA-seq data to reduce coverage of highly covered areas</description>
    <requirements>
        <requirement type="package" version="2013_08_14">trinityrnaseq</requirement>
    </requirements>
    <command>
        ## symlink input in work_dir
        #if str($inputs.paired_or_single) == "paired":
          ln -s $inputs.left_input left_reads &amp;&amp;
          ln -s $inputs.right_input right_reads &amp;&amp;
        #else:
          ln -s $inputs.input single_reads &amp;&amp;
        #end if	
        \${TRINITY_HOME}/util/normalize_by_kmer_coverage.pl --JM $JM --max_cov $max_cov
        ## Inputs.
        #if str($inputs.paired_or_single) == "paired":
            --left left_reads --right right_reads
            #if  $inputs.left_input.ext == 'fa':
                --seqType fa
            #else:
                --seqType fq
            #end if
            $inputs.pe_reads_unordered
            #if str($inputs.library_type) != "None":
                --SS_lib_type $inputs.library_type
            #end if
            $inputs.pairs_together
            $inputs.parallel_stats
        #else:
            --single single_reads
            #if  str($inputs.input.ext) == 'fa':
                --seqType fa
            #else:
                --seqType fq
            #end if
            #if str($inputs.library_type) != "None":
                --SS_lib_type $inputs.library_type
            #end if
        #end if	
        #if $kmer_size:
            --KMER_SIZE $kmer_size
        #end if
        #if $max_pct_stdev:
            --max_pct_stdev $max_pct_stdev
        #end if
        ## direct stdio to output
        | tee  $trinity_coverage_normalization_log &amp;&amp;
        #if str($inputs.paired_or_single) == "paired":
          cp left_reads.normalized* $output_left &amp;&amp;
          cp right_reads.normalized* $output_right 
        #else:
          cp single_reads.normalized* $output_single 
        #end if	
    </command>
    <inputs>
      	<param name="JM" type="select" label="JM" help="Amount of memory to allocate to Jellyfish for Kmer catalog construction">
		<option value="1G">1G</option>
		<option value="10G">10G</option>
		<option value="20G">20G</option>
		<option value="50G">50G</option>
		<option value="100G">100G</option>			
	</param>
		
	<param name="max_cov" type="select" label="max_cov" help="Read coverage in terms of maximum covarge to keep">
		<option value="30">30</option>
		<option value="40">40</option>
		<option value="50">50</option>
		<option value="60">60</option>
		<option value="70">70</option>
		<option value="100">100</option>
	</param>

        <conditional name="inputs">
	    <param name="paired_or_single" type="select" label="Paired or Single-end data?">
                <option value="paired">Paired</option>
                <option value="single">Single</option>
            </param>
            <when value="paired">
                <param format="fasta,fastq" name="left_input" type="data" label="Left/Forward strand reads" help=""/>
                <param format="fasta,fastq" name="right_input" type="data" label="Right/Reverse strand reads" help=""/>
                <param name="library_type" type="select" label="Strand-specific Library Type">
                    <option value="None">None</option>
                    <option value="FR">FR</option>
                    <option value="RF">RF</option>
                </param>  				
                <param name="pe_reads_unordered" type="boolean" truevalue="--PE_reads_unordered" falsevalue="" checked="false" label="set if the input paired-end reads are not identically ordered"/>
                <param name="pairs_together" type="boolean" truevalue="--pairs_together" falsevalue="" checked="false" label="process paired reads by averaging stats between pairs and retaining linking info"/>
                <param name="parallel_stats" type="boolean" truevalue="--PARALLEL_STATS" falsevalue="" checked="false" label="generate read stats in parallel for paired reads" help="(Figure 2X Inchworm memory requirement)"/>
            </when>
            <when value="single">
                <param format="fasta,fastq" name="input" type="data" label="Single-end reads" help=""/>
                <param name="library_type" type="select" label="Strand-specific Library Type">
                    <option value="None">None</option>
                    <option value="F">F</option>
                    <option value="R">R</option>
                </param>
            </when>
        </conditional>
        <param name="kmer_size"  type="integer" value="" optional="true" label="KMER SIZE" help="default: 25">
            <validator type="in_range" message="kmer size between 3 and 200" min="3" max="200"/>
        </param>
        <param name="max_pct_stdev"  type="integer" value="" optional="true" label="maximum pct of mean for stdev of kmer coverage across read" help="default: 100">
            <validator type="in_range" message="kmer size between 10 and 100" min="10" max="100"/>
        </param>
        
    </inputs>
    <stdio>
        <exit_code range="1:"  level="fatal" description="Failed" />
        <regex match="Error" 
               source="stdout" 
               level="fatal" 
               description="Failed" />
    </stdio>
    <outputs>
	<!-- I have not found a way to do condional outputs so all potential output files are specified and some will be empty -->
	<data format="txt" name="trinity_coverage_normalization_log" label="${tool.name} on ${on_string}: log" />
	<data format_source="left_input" name="output_left" label="${tool.name} on ${on_string}: Normalized left reads"> 
            <filter>inputs['paired_or_single'] == "paired"</filter>
        </data>
	<data format_source="right_input" name="output_right" label="${tool.name} on ${on_string}: Normalized right reads"> 
            <filter>inputs['paired_or_single'] == "paired"</filter>
        </data>
	<data format_source="input" name="output_single" label="${tool.name} on ${on_string}: Normalized reads"> 
            <filter>inputs['paired_or_single'] == "single"</filter>
        </data>
    </outputs>
    <tests>
        <test>
            <param name="JM" value="1G"/>
            <param name="max_cov" value="30"/>
            <param name="paired_or_single" value="single"/>
            <param name="input" ftype="fastq" value="reads.left.fq"/>
            <param name="library_type" value="None"/>
            <output name="trinity_coverage_normalization_log">
                <assert_contents>
                    <has_text text="Normalization complete." />
                </assert_contents>
            </output>
            <output name="output_single">
                <assert_contents>
                    <has_text text="ACTGCATCCTGGAAAGAATCAATGGTGGCCGGAAAGTGTTTTTCAAATACAAGAGTGACAATGTGCCCTGTTGTTT" />
                </assert_contents>
            </output>
        </test>
        <test>
            <param name="JM" value="1G"/>
            <param name="max_cov" value="30"/>
            <param name="paired_or_single" value="paired"/>
            <param name="left_input" ftype="fastq" value="reads.left.fq"/>
            <param name="right_input" ftype="fastq" value="reads.right.fq"/>
            <param name="library_type" value="None"/>
            <param name="pe_reads_unordered" value="False"/>
            <param name="pairs_together" value="False"/>
            <param name="parallel_stats" value="False"/>
            <output name="trinity_coverage_normalization_log">
                <assert_contents>
                    <has_text text="Normalization complete." />
                </assert_contents>
            </output>
            <output name="output_left">
                <assert_contents>
                    <has_text text="CTGGGCTGCAGCTAAGTTCTCTGCATCCTCCTTCTTGCTTGTGGCTGGGAAGAAGACAATGTTGTCGATGGTCTGG" />
                </assert_contents>
            </output>
            <output name="output_right">
                <assert_contents>
                    <has_text text="CTCAAATGGTTAATTCTCAGGCTGCAAATATTCGTTCAGGATGGAAGAACATTTTCTCAGTATTCCATCTAGCTGC" />
                </assert_contents>
            </output>
        </test>
    </tests>
    <help>
        Runs script Trinity_ script util/normalize_by_kmer_coverage.pl which reduces data sizes with minimal impact on recovered transcripts when used by Trinity.pl.
        
        .. _Trinity: http://trinityrnaseq.sourceforge.net
    </help>
</tool>