diff cuffquant_wrapper.xml @ 0:cef77c81d60c draft

Uploaded
author devteam
date Wed, 26 Nov 2014 14:02:38 -0500
parents
children 8fb10700729b
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/cuffquant_wrapper.xml	Wed Nov 26 14:02:38 2014 -0500
@@ -0,0 +1,218 @@
+<tool id="cuffquant" name="Cuffquant" version="@VERSION@.0">
+    <!-- Wrapper supports Cuffdiff versions 2.2.1 -->
+    <description>Precompute gene expression levels</description>
+    <expand macro="requirements" />
+    <expand macro="stdio" />
+    <macros>
+      <import>cuff_macros.xml</import>
+    </macros>
+    <version_command>cuffquant 2>&amp;1 | head -n 1</version_command>
+    <command>
+        cuffquant
+            --no-update-check
+            --num-threads=\${GALAXY_SLOTS:-4}
+            ## Set advanced SE data parameters?
+            #if $additional.sAdditional == "Yes":
+                -m $additional.frag_mean_len
+                -s $additional.frag_len_std_dev
+            #end if
+
+            ## Multi-read correct?
+            #if $multiread_correct :
+            -u
+            #end if
+
+            ## Bias correction?
+            #if $bias_correction.do_bias_correction == "Yes":
+               -b
+                #if $bias_correction.seq_source.index_source == "history":
+                    ## Custom genome from history.
+                    $bias_correction.seq_source.ref_file
+                #else:
+                    ## Built-in genome.
+                     "${ bias_correction.seq_source.index.fields.path }"
+                #end if
+            #end if
+
+            $length_correction
+
+            ## Set advanced parameters for cufflinks
+            #if $advanced_settings.sAdvanced == "Yes":
+                #if str($advanced_settings.library_type) != 'auto':
+                    --library-type=$advanced_settings.library_type
+                #end if
+                #if $advanced_settings.mask_file:
+                    --mask-file=$advanced_settings.mask_file
+                #end if
+                --max-mle-iterations=$advanced_settings.max_mle_iterations
+                --max-bundle-frags=$advanced_settings.max_bundle_frags
+            #end if
+            ## Inputs.
+            $gtf_input
+            #set samplestring = ','.join( [ str( $sample.sample ) for $sample in $samples ] )
+            $samplestring
+    </command>
+    <inputs>
+        <param format="gtf,gff3" name="gtf_input" type="data" label="Transcripts"
+            help="A transcript annotation (GFF3 or GTF) file produced by cufflinks, cuffcompare, or other source."/>
+
+        <repeat name="samples" title="Replicate" min="1">
+            <param name="sample" label="Add replicate" type="data" format="sam,bam"/>
+        </repeat>
+
+        <param name="multiread_correct" type="boolean" label="Use multi-read correct"
+            help="Tells Cufflinks to do an initial estimation procedure to more accurately weight reads mapping to multiple locations in the genome." />
+
+        <conditional name="bias_correction">
+            <param name="do_bias_correction" type="select" label="Perform Bias Correction"
+                help="Bias detection and correction can significantly improve accuracy of transcript abundance estimates.">
+                <option value="No">No</option>
+                <option value="Yes">Yes</option>
+            </param>
+            <when value="Yes">
+                <conditional name="seq_source">
+                  <param name="index_source" type="select" label="Reference sequence data">
+                    <option value="cached">Locally cached</option>
+                    <option value="history">History</option>
+                  </param>
+                  <when value="cached">
+                    <param name="index" type="select" label="Using reference genome">
+                      <options from_data_table="fasta_indexes">
+                        <filter type="data_meta" ref="gtf_input" key="dbkey" column="1" />
+                        <validator type="no_options" message="No reference genome is available for the build associated with the selected input dataset" />
+                      </options>
+                    </param>
+                  </when>
+                  <when value="history">
+                      <param name="ref_file" type="data" format="fasta" label="Using reference file" />
+                  </when>
+                </conditional>
+            </when>
+            <when value="No"></when>
+        </conditional>
+
+        <param name="length_correction" type="select" label="apply length correction" help="mode of length normalization to transcript fpkm.">
+            <option value="" selected="true">cufflinks effective length correction</option>
+            <option value="--no-effective-length-correction">standard length correction</option>
+            <option value="--no-length-correction">no length correction at all (use raw counts)</option>
+        </param>
+
+        <conditional name="additional">
+            <param name="sAdditional" type="select" label="Set Additional Parameters for single end reads? (not recommended for paired-end reads)">
+                <option value="No" selected="True">No</option>
+                <option value="Yes">Yes</option>
+            </param>
+            <when value="No"></when>
+            <when value="Yes">
+                <param name="frag_mean_len" type="integer" value="200" label="Average Fragment Length"/>
+                <param name="frag_len_std_dev" type="integer" value="80" label="Fragment Length Standard Deviation"/>
+            </when>
+        </conditional>
+
+        <conditional name="advanced_settings">
+            <param name="sAdvanced" type="select" label="Set Advanced Cuffquant parameters? ">
+                    <option value="No" selected="True">No</option>
+                    <option value="Yes">Yes</option>
+                </param>
+        <when value="No"></when>
+        <when value="Yes">
+            <param type="select" name="library_type" label="Library prep used for input reads" help="">
+                <option value="auto" selected="True">Auto Detect</option>
+                <option value="ff-firststrand">ff-firststrand</option>
+                <option value="ff-secondstrand">ff-secondstrand</option>
+                <option value="ff-unstranded">ff-unstranded</option>
+                <option value="fr-firststrand">fr-firststrand</option>
+                <option value="fr-secondstrand">fr-secondstrand</option>
+                <option value="fr-unstranded" >fr-unstranded</option>
+                <option value="transfrags">transfrags</option>
+            </param>
+            <param name="mask_file" type="data" format="gtf,gff3" label="Mask File" help="Ignore all alignment within transcripts in this file" optional="True" />
+            <param name="max_mle_iterations" value="5000" type="integer" label="Max MLE iterations" help="Maximum iterations allowed for Maximal Likelyhood Estimation calculations" />
+            <param name="max_bundle_frags" type="integer" value="500000" label="Maximum number of fragments per locus" 
+                help="Sets the maximum number of fragments a locus may have before being skipped. Default: 500,000" />
+            </when>
+        </conditional>
+    </inputs>
+    <outputs>
+        <!-- Standard datasets. -->
+        <data format="cxb" name="out_file" label="${tool.name} on ${on_string}: Abundances.cxb" from_work_dir="abundances.cxb" />
+    </outputs>
+    <tests>
+        <test>
+            <!--
+                cuffquant cuffcompare_out5.gtf cuffdiff_in1.sam,cuffdiff_in2.sam 
+            -->
+            <param name="gtf_input" value="cuffquant_in.gtf" ftype="gtf" />
+            <repeat name="samples">
+                <param name="sample" value="cuffquant_in1.sam" ftype="sam" />
+            </repeat>
+            <repeat name="samples">
+                <param name="sample" value="cuffquant_in2.sam" ftype="sam" />
+            </repeat>
+            <param name="length_correction" value="" />
+            <param name="do_bias_correction" value="No" />
+            <param name="multiread_correct" value="No"/>
+            <param name="sAdditional" value="No"/>
+            <param name="sAdvanced" value="No" />
+            <output name="out_file" file="cuffquant_out1.cxb" compare="sim_size" />
+        </test>
+    </tests>
+
+    <help>
+**Cuffquant Overview**
+
+Cuffquant is part of Cufflinks_. Cuffquant provides pre-calculation of gene expression levels. The resulting file can be provided to cuffdiff or cuffnorm for further processing.  Please cite: Trapnell C, Williams BA, Pertea G, Mortazavi AM, Kwan G, van Baren MJ, Salzberg SL, Wold B, Pachter L. Transcript assembly and abundance estimation from RNA-Seq reveals thousands of new transcripts and switching among isoforms. Nature Biotechnology doi:10.1038/nbt.1621
+
+.. _Cufflinks: http://cufflinks.cbcb.umd.edu/
+        
+------
+
+**Know what you are doing**
+
+.. class:: warningmark
+
+There is no such thing (yet) as an automated gearshift in expression analysis. It is all like stick-shift driving in San Francisco. In other words, running this tool with default parameters will probably not give you meaningful results. A way to deal with this is to **understand** the parameters by carefully reading the `documentation`__ and experimenting. Fortunately, Galaxy makes experimenting easy.
+
+.. __: http://cufflinks.cbcb.umd.edu/manual.html#cuffdiff
+
+------
+
+**Input format**
+
+Cuffquant takes Cufflinks or Cuffcompare GTF files as input along with two or more SAM files containing the fragment alignments for two or more samples.
+
+------
+
+**Outputs**
+
+Cuffquant produces one output file:
+
+1. Transcript expression values in binary format.
+    
+-------
+
+**Settings**
+
+All of the options have a default value. You can change any of them. Most of the options in Cuffdiff have been implemented here.
+
+------
+
+**Cuffdiff parameter list**
+
+This is a list of implemented Cuffdiff options::
+
+  -m INT                         Average fragment length (SE reads); default 200
+  -s INT                         Fragment legnth standard deviation (SE reads); default 80
+  --max-mle-iterations INT       Sets the number of iterations allowed during maximum likelihood estimation of abundances. Default: 5000
+  -u                             Multi read correction tells Cufflinks to do an initial estimation procedure to more accurately weight reads mapping to multiple locations in the genome.
+  -b ref.fasta             bias correction. Bias detection and correction can significantly improve accuracy of transcript abundance estimates.
+  --no-effective-length-correction  Use standard length correction
+  --no-length-correction         Disable all length correction.
+  --library-type                 ff-firststrand,ff-secondstrand,ff-unstranded,fr-firstrand,fr-secondstrand,fr-unstranded,transfrags
+  --mask-file (gff3/gtf)         Ignore all alignment within transcripts in this file
+  --max-bundle-frags             Sets the maximum number of fragments a locus may have before being skipped. Skipped loci are listed in skipped.gtf.
+    </help>
+    <citations>
+        <citation type="doi">10.1038/nbt.1621</citation>
+    </citations>
+</tool>