view cuffquant_wrapper.xml @ 0:851fe29d1f20 draft

Uploaded
author geert-vandeweyer
date Mon, 04 Aug 2014 10:27:05 -0400
parents
children e14baafb20bd
line wrap: on
line source

<tool id="cuffquant" name="Cuffquant" version="0.0.1">
    <!-- Wrapper supports Cuffdiff versions 2.2.1 -->
    <description>Precompute gene expression levels</description>
    <requirements>
        <requirement type="package" version="2.2.1">cufflinks</requirement>
    </requirements>
    <version_command>cuffquant 2>&amp;1 | head -n 1</version_command>
    <command>
        cuffquant
            --no-update-check
            ##--num-threads="\${GALAXY_SLOTS:-4}"
	    --num-threads=6
            ## Set advanced SE data parameters?
            #if $additional.sAdditional == "Yes":
                -m $additional.frag_mean_len
                -s $additional.frag_len_std_dev
            #end if

            ## Multi-read correct?
            #if $multiread_correct :
            -u
            #end if

            ## Bias correction?
            #if $bias_correction.do_bias_correction == "Yes":
               -b
                #if $bias_correction.seq_source.index_source == "history":
                    ## Custom genome from history.
                    $bias_correction.seq_source.ref_file
                #else:
                    ## Built-in genome.
                    ${__get_data_table_entry__('fasta_indexes', 'value', $gtf_input.dbkey, 'path')}
                #end if
            #end if

	    $length_correction

	    ## Set advanced parameters for cufflinks
 	    #if $advanced_settings.sAdvanced == "Yes":
		#if str($advanced_settings.library_type) != 'auto':
			--library-type=$advanced_settings.library_type
		#end if
		#if $advanced_settings.mask_file:
			--mask-file=$advanced_settings.mask_file
	    	#end if
		--max-mle-iterations=$advanced_settings.max_mle_iterations
		--max-bundle-frags=$advanced_settings.max_bundle_frags
	    #end if
            ## Inputs.
            $gtf_input
            #for $condition in $conditions:
                #set samples = ','.join( [ str( $sample.sample ) for $sample in $condition.samples ] )
                $samples
            #end for
    </command>
    <inputs>
        <param format="gtf,gff3" name="gtf_input" type="data" label="Transcripts" help="A transcript annotation (GFF3 or GTF) file produced by cufflinks, cuffcompare, or other source."/>

        <repeat name="conditions" title="Condition" min="1">
            <param name="name" title="Condition name" type="text" label="Name"/>
            <repeat name="samples" title="Replicate" min="1">
                <param name="sample" label="Add replicate" type="data" format="sam,bam"/>
            </repeat>
        </repeat>


        <param name="multiread_correct" type="boolean" label="Use multi-read correct" help="Tells Cufflinks to do an initial estimation procedure to more accurately weight reads mapping to multiple locations in the genome." />

        <conditional name="bias_correction">
            <param name="do_bias_correction" type="select" label="Perform Bias Correction" help="Bias detection and correction can significantly improve accuracy of transcript abundance estimates.">
                <option value="No">No</option>
                <option value="Yes">Yes</option>
            </param>
            <when value="Yes">
                <conditional name="seq_source">
                  <param name="index_source" type="select" label="Reference sequence data">
                    <option value="cached">Locally cached</option>
                    <option value="history">History</option>
                  </param>
                  <when value="cached">
                    <param name="index" type="select" label="Using reference genome">
                      <options from_data_table="fasta_indexes">
                        <filter type="data_meta" ref="gtf_input" key="dbkey" column="1" />
                        <validator type="no_options" message="No reference genome is available for the build associated with the selected input dataset" />
                      </options>
                    </param>
                  </when>
                  <when value="history">
                      <param name="ref_file" type="data" format="fasta" label="Using reference file" />
                  </when>
                </conditional>
            </when>
            <when value="No"></when>
        </conditional>

	<param name="length_correction" type="select" label="apply length correction" help="mode of length normalization to transcript fpkm.">
            <option value="" selected="true">cufflinks effective length correction</option>
            <option value="--no-effective-length-correction">standard length correction</option>
	    <option value="--no-length-correction">no length correction at all (use raw counts)</option>
        </param>

        <conditional name="additional">
            <param name="sAdditional" type="select" label="Set Additional Parameters for single end reads? (not recommended for paired-end reads)">
                <option value="No" selected="True">No</option>
                <option value="Yes">Yes</option>
            </param>
            <when value="No"></when>
            <when value="Yes">
                <param name="frag_mean_len" type="integer" value="200" label="Average Fragment Length"/>
                <param name="frag_len_std_dev" type="integer" value="80" label="Fragment Length Standard Deviation"/>
            </when>
        </conditional>

        <conditional name="advanced_settings">
	    	<param name="sAdvanced" type="select" label="Set Advanced Cuffquant parameters? ">
                	<option value="No" selected="True">No</option>
                	<option value="Yes">Yes</option>
            	</param>
		<when value="No"></when>
		<when value="Yes">
			<param type="select" name="library_type" label="Library prep used for input reads" help="">
				<option value="auto" selected="True">Auto Detect</option>
				<option value="ff-firststrand">ff-firststrand</option>
				<option value="ff-secondstrand">ff-secondstrand</option>
				<option value="ff-unstranded">ff-unstranded</option>
				<option value="fr-firststrand">fr-firststrand</option>
				<option value="fr-secondstrand">fr-secondstrand</option>
				<option value="fr-unstranded" >fr-unstranded</option>
				<option value="transfrags">transfrags</option>
			</param>
			<param name="mask_file" type="data" format="gtf,gff3" label="Mask File" help="Ignore all alignment within transcripts in this file" optional="True" />
			<param name="max_mle_iterations" value="5000" type="integer" label="Max MLE iterations" help="Maximum iterations allowed for Maximal Likelyhood Estimation calculations" />
			<param name="max_bundle_frags" type="integer" value="500000" label="Maximum number of fragments per locus" help="Sets the maximum number of fragments a locus may have before being skipped. Skipped loci are listed in skipped.gtf. Default: 500,000" />
			
		    </when>
		</conditional>
    </inputs>

    <stdio>
        <regex match="Error" source="both" level="fatal" description="Error"/>
        <regex match=".*" source="both" level="log" description="tool progress"/>
    </stdio>

    <outputs>

        <!-- Standard datasets. -->
        <data format="cxb" name="out_file" label="${tool.name} on ${on_string}: Abundances.cxb" from_work_dir="abundances.cxb" />
    </outputs>

    <tests>
        <test>
                <!--
                    cuffdiff cuffcompare_out5.gtf cuffdiff_in1.sam cuffdiff_in2.sam 
                -->
                <!-- 
                    NOTE: as of version 0.0.6 of the wrapper, tests cannot be run because multiple inputs to a repeat
                    element are not supported.
                <param name="gtf_input" value="cuffcompare_out5.gtf" ftype="gtf" />
                <param name="do_groups" value="No" />
                <param name="aligned_reads1" value="cuffdiff_in1.sam" ftype="sam" />
                <param name="aligned_reads2" value="cuffdiff_in2.sam" ftype="sam" />
                <param name="fdr" value="0.05" />
                <param name="min_alignment_count" value="0" />
                <param name="do_bias_correction" value="No" />
                <param name="do_normalization" value="No" />
                <param name="multiread_correct" value="No"/>
                <param name="sAdditional" value="No"/>
                <output name="splicing_diff" file="cuffdiff_out9.txt"/>
                <output name="promoters_diff" file="cuffdiff_out10.txt"/>
                <output name="cds_diff" file="cuffdiff_out11.txt"/>
                <output name="cds_exp_fpkm_tracking" file="cuffdiff_out4.txt"/>
                <output name="cds_fpkm_tracking" file="cuffdiff_out8.txt"/>
                <output name="tss_groups_exp" file="cuffdiff_out3.txt" lines_diff="200"/>
                <output name="tss_groups_fpkm_tracking" file="cuffdiff_out7.txt"/>
                <output name="genes_exp" file="cuffdiff_out2.txt" lines_diff="200"/>
                <output name="genes_fpkm_tracking" file="cuffdiff_out6.txt" lines_diff="200"/>
                <output name="isoforms_exp" file="cuffdiff_out1.txt" lines_diff="200"/>
                <output name="isoforms_fpkm_tracking" file="cuffdiff_out5.txt" lines_diff="200"/>
                -->
        </test>
    </tests>

    <help>
**Cuffquant Overview**

Cuffquant is part of Cufflinks_. Cuffquant provides pre-calculation of gene expression levels. The resulting file can be provided to cuffdiff or cuffnorm for further processing.  Please cite: Trapnell C, Williams BA, Pertea G, Mortazavi AM, Kwan G, van Baren MJ, Salzberg SL, Wold B, Pachter L. Transcript assembly and abundance estimation from RNA-Seq reveals thousands of new transcripts and switching among isoforms. Nature Biotechnology doi:10.1038/nbt.1621

.. _Cufflinks: http://cufflinks.cbcb.umd.edu/
        
------

**Know what you are doing**

.. class:: warningmark

There is no such thing (yet) as an automated gearshift in expression analysis. It is all like stick-shift driving in San Francisco. In other words, running this tool with default parameters will probably not give you meaningful results. A way to deal with this is to **understand** the parameters by carefully reading the `documentation`__ and experimenting. Fortunately, Galaxy makes experimenting easy.

.. __: http://cufflinks.cbcb.umd.edu/manual.html#cuffdiff

------

**Input format**

Cuffquant takes Cufflinks or Cuffcompare GTF files as input along with two or more SAM files containing the fragment alignments for two or more samples.

------

**Outputs**

Cuffquant produces one output file:

1. Transcript expression values in binary format.
    
-------

**Settings**

All of the options have a default value. You can change any of them. Most of the options in Cuffdiff have been implemented here.

------

**Cuffdiff parameter list**

This is a list of implemented Cuffdiff options::

  -m INT                         Average fragment length (SE reads); default 200
  -s INT                         Fragment legnth standard deviation (SE reads); default 80
  --max-mle-iterations INT       Sets the number of iterations allowed during maximum likelihood estimation of abundances. Default: 5000
  -u                             Multi read correction tells Cufflinks to do an initial estimation procedure to more accurately weight reads mapping to multiple locations in the genome.
  -b ref.fasta			 bias correction. Bias detection and correction can significantly improve accuracy of transcript abundance estimates.
  --no-effective-length-correction  Use standard length correction
  --no-length-correction         Disable all length correction.
  --library-type                 ff-firststrand,ff-secondstrand,ff-unstranded,fr-firstrand,fr-secondstrand,fr-unstranded,transfrags
  --mask-file (gff3/gtf)         Ignore all alignment within transcripts in this file
  --max-bundle-frags             Sets the maximum number of fragments a locus may have before being skipped. Skipped loci are listed in skipped.gtf.
    </help>
</tool>