annotate cufflinks_wrapper.xml @ 12:e6ac542a288e draft default tip

Updated Test Data
author geert-vandeweyer
date Sat, 02 Aug 2014 10:12:58 -0400
parents c668946e25d1
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
1 <tool id="cufflinks" name="Cufflinks" version="0.0.7">
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
2 <!-- Wrapper supports Cufflinks versions v1.3.0 and newer -->
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
3 <description>transcript assembly and FPKM (RPKM) estimates for RNA-Seq data</description>
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
4 <requirements>
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
5 <requirement type="package" version="2.2.1">cufflinks</requirement>
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
6 </requirements>
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
7 <version_command>cufflinks 2>&amp;1 | head -n 1</version_command>
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
8 <command interpreter="python">
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
9 cufflinks_wrapper.py
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
10 --input=$input
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
11 --assembled-isoforms-output=$assembled_isoforms
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
12 --num-threads="\${GALAXY_SLOTS:-4}"
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
13 -I $max_intron_len
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
14 -F $min_isoform_fraction
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
15 -j $pre_mrna_fraction
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
16 $length_correction
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
17
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
18 ## Include reference annotation?
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
19 #if $reference_annotation.use_ref == "Use reference annotation":
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
20 -G $reference_annotation.reference_annotation_file
8
065e8d11a08c Uploaded
geert-vandeweyer
parents: 7
diff changeset
21 $reference_annotation.compatible_hits_norm
0
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
22 #end if
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
23 #if $reference_annotation.use_ref == "Use reference annotation guide":
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
24 -g $reference_annotation.reference_annotation_guide_file
9
e5139e144b56 3_xxx format in paramater naming is illegal.
geert-vandeweyer
parents: 8
diff changeset
25 --3-overhang-tolerance=$reference_annotation.three_overhang_tolerance
7
e87fb5178f4b correction to conditional variables
geert-vandeweyer
parents: 4
diff changeset
26 --intron-overhang-tolerance=$reference_annotation.intron_overhang_tolerance
e87fb5178f4b correction to conditional variables
geert-vandeweyer
parents: 4
diff changeset
27 $reference_annotation.no_faux_reads
0
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
28 #end if
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
29
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
30 ## Bias correction?
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
31 #if $bias_correction.do_bias_correction == "Yes":
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
32 -b
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
33 #if $bias_correction.seq_source.index_source == "history":
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
34 --ref_file=$bias_correction.seq_source.ref_file
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
35 #else:
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
36 --index=${bias_correction.seq_source.index.fields.path}
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
37 #end if
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
38 #end if
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
39
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
40 ## Multi-read correct?
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
41 #if str($multiread_correct) == "Yes":
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
42 -u
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
43 #end if
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
44
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
45 ## Include global model if available.
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
46 #if $global_model:
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
47 --global_model=$global_model
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
48 #end if
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
49
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
50 ## advanced settings
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
51 #if $advanced_settings.use_advanced_settings == "Yes":
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
52 --library-type=$advanced_settings.library_type
4
03fdfb596475 Uploaded
geert-vandeweyer
parents: 3
diff changeset
53 #if $advanced_settings.mask_file:
0
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
54 --mask-file=$advanced_settings.mask_file
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
55 #end if
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
56 --inner-mean-dist=$advanced_settings.inner_mean_dist
11
c668946e25d1 Corrected innder-dist-std-dev typo
geert-vandeweyer
parents: 10
diff changeset
57 --inner-dist-std-dev=$advanced_settings.inner_dist_std_dev
0
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
58 --max-mle-iterations=$advanced_settings.max_mle_iterations
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
59 --junc-alpha=$advanced_settings.junc_alpha
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
60 --small-anchor-fraction=$advanced_settings.small_anchor_fraction
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
61 --overhang-tolerance=$advanced_settings.overhang_tolerance
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
62 --max-bundle-length=$advanced_settings.max_bundle_length
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
63 --max-bundle-frags=$advanced_settings.max_bundle_frags
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
64 --min-intron-length=$advanced_settings.min_intron_length
9
e5139e144b56 3_xxx format in paramater naming is illegal.
geert-vandeweyer
parents: 8
diff changeset
65 --trim-3-avgcov-thresh=$advanced_settings.trim_three_avgcov_thresh
e5139e144b56 3_xxx format in paramater naming is illegal.
geert-vandeweyer
parents: 8
diff changeset
66 --trim-3-dropoff-frac=$advanced_settings.trim_three_dropoff_frac
0
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
67 #end if
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
68
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
69 </command>
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
70 <inputs>
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
71 <param format="sam,bam" name="input" type="data" label="SAM or BAM file of aligned RNA-Seq reads" help=""/>
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
72 <param name="max_intron_len" type="integer" value="300000" min="1" max="600000" label="Max Intron Length" help="ignore alignments with gaps longer than this"/>
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
73 <param name="min_isoform_fraction" type="float" value="0.10" min="0" max="1" label="Min Isoform Fraction" help="suppress transcripts below this abundance level"/>
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
74 <param name="pre_mrna_fraction" type="float" value="0.15" min="0" max="1" label="Pre MRNA Fraction" help="suppress intra-intronic transcripts below this level"/>
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
75 <conditional name="reference_annotation">
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
76 <param name="use_ref" type="select" label="Use Reference Annotation">
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
77 <option value="No" selected="true">No</option>
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
78 <option value="Use reference annotation">Use reference annotation</option>
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
79 <option value="Use reference annotation guide">Use reference annotation as guide</option>
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
80 </param>
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
81 <when value="No"></when>
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
82 <when value="Use reference annotation">
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
83 <param format="gff3,gtf" name="reference_annotation_file" type="data" label="Reference Annotation" help="Gene annotation dataset in GTF or GFF3 format."/>
3
1312bf8f5e17 Uploaded
geert-vandeweyer
parents: 2
diff changeset
84 <param name="compatible_hits_norm" type="select" label="Count hits compatible with reference RNAs only" help="With this option, Cufflinks counts only those fragments compatible with some reference transcript towards the number of mapped hits used in the FPKM denominator. This option can only be used in combination with --GTF.">
0
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
85 <option value="" selected="True">No</option>
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
86 <option value="--compatible-hits-norm">Yes</option>
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
87 </param>
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
88 </when>
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
89 <when value="Use reference annotation guide">
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
90 <param format="gff3,gtf" name="reference_annotation_guide_file" type="data" label="Reference Annotation" help="Gene annotation dataset in GTF or GFF3 format."/>
9
e5139e144b56 3_xxx format in paramater naming is illegal.
geert-vandeweyer
parents: 8
diff changeset
91 <param name="three_overhang_tolerance" type="integer" value="600" label="3prime overhang tolerance" help="The number of bp allowed to overhang the 3prime end of a reference transcript when determining if an assembled transcript should be merged with it (ie, the assembled transcript is not novel). The default is 600 bp." />
0
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
92 <param name="intron_overhang_tolerance" type="integer" value="50" label="Intronic overhang tolerance" help="The number of bp allowed to enter the intron of a reference transcript when determining if an assembled transcript should be merged with it (ie, the assembled transcript is not novel). The default is 50 bp." />
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
93 <param name="no_faux_reads" type="select" label="Disable tiling of reference transcripts" help="This option disables tiling of the reference transcripts with faux reads. Use this if you only want to use sequencing reads in assembly but do not want to output assembled transcripts that lay within reference transcripts. All reference transcripts in the input annotation will also be included in the output.">
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
94 <option value="" selected="True">No</option>
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
95 <option value="--no-faux-reads">Yes</option>
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
96 </param>
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
97 </when>
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
98 </conditional>
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
99 <conditional name="bias_correction">
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
100 <param name="do_bias_correction" type="select" label="Perform Bias Correction" help="Bias detection and correction can significantly improve accuracy of transcript abundance estimates.">
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
101 <option value="No" selected="true">No</option>
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
102 <option value="Yes">Yes</option>
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
103 </param>
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
104 <when value="Yes">
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
105 <conditional name="seq_source">
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
106 <param name="index_source" type="select" label="Reference sequence data">
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
107 <option value="cached" selected="true">Locally cached</option>
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
108 <option value="history">History</option>
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
109 </param>
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
110 <when value="cached">
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
111 <param name="index" type="select" label="Using reference genome">
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
112 <options from_data_table="fasta_indexes">
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
113 <filter type="data_meta" ref="input" key="dbkey" column="1" />
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
114 <validator type="no_options" message="No reference genome is available for the build associated with the selected input dataset" />
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
115 </options>
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
116 </param>
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
117 </when>
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
118 <when value="history">
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
119 <param name="ref_file" type="data" format="fasta" label="Using reference file" />
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
120 </when>
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
121 </conditional>
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
122 </when>
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
123 <when value="No"></when>
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
124 </conditional>
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
125
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
126 <param name="multiread_correct" type="select" label="Use multi-read correct" help="Tells Cufflinks to do an initial estimation procedure to more accurately weight reads mapping to multiple locations in the genome.">
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
127 <option value="No" selected="true">No</option>
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
128 <option value="Yes">Yes</option>
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
129 </param>
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
130
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
131 <param name="length_correction" type="select" label="Apply length correction" help="Mode of length normalization to transcript FPKM.">
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
132 <option value="" selected="true">Cufflinks Effective Length Correction</option>
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
133 <option value="--no-effective-length-correction">Standard Length Correction</option>
10
6fc77371a25f Corrected no-lenght-correction typo
geert-vandeweyer
parents: 9
diff changeset
134 <option value="--no-length-correction">No Length Correction at all (use raw counts)</option>
0
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
135 </param>
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
136
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
137 <param name="global_model" type="hidden_data" label="Global model (for use in Trackster)" optional="True"/>
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
138
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
139 <!-- advanced settings -->
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
140 <conditional name="advanced_settings">
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
141 <param name="use_advanced_settings" type="select" label="Set advanced Cufflinks options" help="">
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
142 <option value="No" selected="true">No</option>
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
143 <option value="Yes" >Yes</option>
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
144 </param>
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
145 <when value="No"></when>
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
146 <when value="Yes">
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
147
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
148 <param type="select" name="library_type" label="Library prep used for input reads" help="">
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
149 <option value="auto" selected="True">Auto Detect</option>
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
150 <option value="ff-firststrand">ff-firststrand</option>
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
151 <option value="ff-secondstrand">ff-secondstrand</option>
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
152 <option value="ff-unstranded">ff-unstranded</option>
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
153 <option value="fr-firststrand">fr-firststrand</option>
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
154 <option value="fr-secondstrand">fr-secondstrand</option>
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
155 <option value="fr-unstranded" >fr-unstranded</option>
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
156 <option value="transfrags">transfrags</option>
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
157 </param>
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
158
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
159 <param name="mask_file" type="data" format="gff3,gtf" label="Mask File" help="Ignore all alignment within transcripts in this file " optional="True" />
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
160
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
161 <param name="inner_mean_dist" type="integer" value="45" label="Inner mean distance" help="This is the expected (mean) inner distance between mate pairs. For, example, for paired end runs with fragments selected at 300bp,where each end is 50bp, you should set it as 200. The default is 45bp." />
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
162
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
163 <param name="inner_dist_std_dev" type="integer" value="20" label="Inner distance standard deviation" help="The standard deviation for the distribution on inner distances between mate pairs. The default is 20bp." />
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
164
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
165 <param name="max_mle_iterations" type="integer" value="5000" label="Max MLE iterations" help="Sets the number of iterations allowed during maximum likelihood estimation of abundances. Default: 5000" />
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
166
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
167 <param name="junc_alpha" type="float" value="0.001" min="0" max="1" label="Alpha value for the binomial test used during false positive spliced alignment filtration" help="Default: 0.001" />
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
168
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
169 <param name="small_anchor_fraction" type="float" value="0.09" min="0" max="1" label="percent read overhang taken as suspiciously small" help="Spliced reads with less than this percent of their length on each side of the junction are considered suspicious and are candidates for filtering prior to assembly. Default: 0.09." />
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
170
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
171 <param name="overhang_tolerance" type="integer" value="8" label="Intronic overhang tolerance" help="The number of bp allowed to enter the intron of a transcript when determining if a read or another transcript is mappable to/compatible with it. The default is 8 bp based on the default bowtie/TopHat parameters." />
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
172
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
173 <param name="max_bundle_length" type="integer" value="3500000" label="Maximum genomic length of a given bundle" help="Default: 3,500,000bp" />
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
174
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
175 <param name="max_bundle_frags" type="integer" value="1000000" label="Maximum number of fragments per locus" help="Sets the maximum number of fragments a locus may have before being skipped. Skipped loci are listed in skipped.gtf. Default: 1,000,000" />
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
176
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
177 <param name="min_intron_length" type="integer" value="50" label="Minimal allowed intron size" help="Default: 50bp" />
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
178
9
e5139e144b56 3_xxx format in paramater naming is illegal.
geert-vandeweyer
parents: 8
diff changeset
179 <param name="trim_three_avgcov_thresh" type="integer" value="10" label="Minimum average coverage required to attempt 3prime trimming." help="Default: 10" />
0
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
180
9
e5139e144b56 3_xxx format in paramater naming is illegal.
geert-vandeweyer
parents: 8
diff changeset
181 <param name="trim_three_dropoff_frac" type="float" value="0.1" min="0" max="1" label="The fraction of average coverage below which to trim the 3prime end of an assembled transcript." help="Default: 0.1"/>
0
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
182
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
183 </when>
2
e6da12b2440e Fix to advanced settings
geert-vandeweyer
parents: 1
diff changeset
184 </conditional>
0
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
185 </inputs>
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
186 <!-- output files -->
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
187 <outputs>
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
188 <data format="tabular" name="genes_expression" label="${tool.name} on ${on_string}: gene expression" from_work_dir="genes.fpkm_tracking"/>
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
189 <data format="tabular" name="transcripts_expression" label="${tool.name} on ${on_string}: transcript expression" from_work_dir="isoforms.fpkm_tracking"/>
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
190 <data format="gtf" name="assembled_isoforms" label="${tool.name} on ${on_string}: assembled transcripts"/>
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
191 <data format="txt" name="total_map_mass" label="${tool.name} on ${on_string}: total map mass" hidden="true" from_work_dir="global_model.txt"/>
12
e6ac542a288e Updated Test Data
geert-vandeweyer
parents: 11
diff changeset
192 <data format="gtf" name="skipped" label="${tool.name} on ${on_string}: Skipped Transcripts" from_working_dir="skipped.gtf"/>
0
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
193 </outputs>
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
194
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
195 <trackster_conf>
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
196 <action type="set_param" name="global_model" output_name="total_map_mass"/>
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
197 </trackster_conf>
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
198
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
199 <tests>
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
200 <!--
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
201 Simple test that uses test data included with cufflinks.
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
202 -->
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
203 <test>
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
204 <param name="input" value="cufflinks_in.bam"/>
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
205 <param name="max_intron_len" value="300000"/>
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
206 <param name="min_isoform_fraction" value="0.05"/>
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
207 <param name="pre_mrna_fraction" value="0.05"/>
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
208 <param name="use_ref" value="No"/>
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
209 <param name="do_bias_correction" value="No"/>
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
210 <param name="multiread_correct" value="No"/>
12
e6ac542a288e Updated Test Data
geert-vandeweyer
parents: 11
diff changeset
211 <param name="length_correction" value="Cufflinks Effective Length Correction"/>
0
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
212 <param name="use_advanced_settings" value="No" />
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
213 <output name="genes_expression" format="tabular" lines_diff="2" file="cufflinks_out3.fpkm_tracking"/>
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
214 <output name="transcripts_expression" format="tabular" lines_diff="2" file="cufflinks_out2.fpkm_tracking"/>
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
215 <output name="assembled_isoforms" file="cufflinks_out1.gtf"/>
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
216 <output name="global_model" file="cufflinks_out4.txt"/>
12
e6ac542a288e Updated Test Data
geert-vandeweyer
parents: 11
diff changeset
217 <output name="skipped" file="cufflinks_out4.gtf"/>
0
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
218 </test>
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
219 </tests>
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
220
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
221 <help>
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
222 **Cufflinks Overview**
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
223
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
224 Cufflinks_ assembles transcripts, estimates their abundances, and tests for differential expression and regulation in RNA-Seq samples. It accepts aligned RNA-Seq reads and assembles the alignments into a parsimonious set of transcripts. Cufflinks then estimates the relative abundances of these transcripts based on how many reads support each one. Please cite: Trapnell C, Williams BA, Pertea G, Mortazavi AM, Kwan G, van Baren MJ, Salzberg SL, Wold B, Pachter L. Transcript assembly and abundance estimation from RNA-Seq reveals thousands of new transcripts and switching among isoforms. Nature Biotechnology doi:10.1038/nbt.1621
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
225
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
226 .. _Cufflinks: http://cufflinks.cbcb.umd.edu/
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
227
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
228 ------
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
229
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
230 **Know what you are doing**
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
231
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
232 .. class:: warningmark
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
233
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
234 There is no such thing (yet) as an automated gearshift in expression analysis. It is all like stick-shift driving in San Francisco. In other words, running this tool with default parameters will probably not give you meaningful results. A way to deal with this is to **understand** the parameters by carefully reading the `documentation`__ and experimenting. Fortunately, Galaxy makes experimenting easy.
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
235
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
236 .. __: http://cufflinks.cbcb.umd.edu/manual.html
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
237
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
238 ------
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
239
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
240 **Input formats**
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
241
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
242 Cufflinks takes a text file of SAM alignments as input. The RNA-Seq read mapper TopHat produces output in this format, and is recommended for use with Cufflinks. However Cufflinks will accept SAM alignments generated by any read mapper. Here's an example of an alignment Cufflinks will accept::
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
243
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
244 s6.25mer.txt-913508 16 chr1 4482736 255 14M431N11M * 0 0 \
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
245 CAAGATGCTAGGCAAGTCTTGGAAG IIIIIIIIIIIIIIIIIIIIIIIII NM:i:0 XS:A:-
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
246
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
247 Note the use of the custom tag XS. This attribute, which must have a value of "+" or "-", indicates which strand the RNA that produced this read came from. While this tag can be applied to any alignment, including unspliced ones, it must be present for all spliced alignment records (those with a 'N' operation in the CIGAR string).
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
248 The SAM file supplied to Cufflinks must be sorted by reference position. If you aligned your reads with TopHat, your alignments will be properly sorted already. If you used another tool, you may want to make sure they are properly sorted as follows::
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
249
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
250 sort -k 3,3 -k 4,4n hits.sam > hits.sam.sorted
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
251
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
252 NOTE: Cufflinks currently only supports SAM alignments with the CIGAR match ('M') and reference skip ('N') operations. Support for the other operations, such as insertions, deletions, and clipping, will be added in the future.
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
253
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
254 ------
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
255
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
256 **Outputs**
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
257
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
258 Cufflinks produces three output files:
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
259
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
260 Transcripts and Genes:
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
261
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
262 This GTF file contains Cufflinks' assembled isoforms. The first 7 columns are standard GTF, and the last column contains attributes, some of which are also standardized (e.g. gene_id, transcript_id). There one GTF record per row, and each record represents either a transcript or an exon within a transcript. The columns are defined as follows::
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
263
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
264 Column number Column name Example Description
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
265 -----------------------------------------------------
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
266 1 seqname chrX Chromosome or contig name
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
267 2 source Cufflinks The name of the program that generated this file (always 'Cufflinks')
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
268 3 feature exon The type of record (always either "transcript" or "exon").
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
269 4 start 77696957 The leftmost coordinate of this record (where 0 is the leftmost possible coordinate)
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
270 5 end 77712009 The rightmost coordinate of this record, inclusive.
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
271 6 score 77712009 The most abundant isoform for each gene is assigned a score of 1000. Minor isoforms are scored by the ratio (minor FPKM/major FPKM)
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
272 7 strand + Cufflinks' guess for which strand the isoform came from. Always one of '+', '-' '.'
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
273 7 frame . Cufflinks does not predict where the start and stop codons (if any) are located within each transcript, so this field is not used.
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
274 8 attributes See below
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
275
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
276 Each GTF record is decorated with the following attributes::
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
277
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
278 Attribute Example Description
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
279 -----------------------------------------
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
280 gene_id CUFF.1 Cufflinks gene id
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
281 transcript_id CUFF.1.1 Cufflinks transcript id
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
282 FPKM 101.267 Isoform-level relative abundance in Reads Per Kilobase of exon model per Million mapped reads
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
283 frac 0.7647 Reserved. Please ignore, as this attribute may be deprecated in the future
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
284 conf_lo 0.07 Lower bound of the 95% confidence interval of the abundance of this isoform, as a fraction of the isoform abundance. That is, lower bound = FPKM * (1.0 - conf_lo)
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
285 conf_hi 0.1102 Upper bound of the 95% confidence interval of the abundance of this isoform, as a fraction of the isoform abundance. That is, upper bound = FPKM * (1.0 + conf_lo)
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
286 cov 100.765 Estimate for the absolute depth of read coverage across the whole transcript
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
287
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
288
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
289 Transcripts only:
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
290 This file is simply a tab delimited file containing one row per transcript and with columns containing the attributes above. There are a few additional attributes not in the table above, but these are reserved for debugging, and may change or disappear in the future.
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
291
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
292 Genes only:
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
293 This file contains gene-level coordinates and expression values.
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
294
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
295 -------
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
296
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
297 **Cufflinks settings**
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
298
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
299 All of the options have a default value. You can change any of them. Most of the options in Cufflinks have been implemented here.
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
300
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
301 ------
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
302
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
303 **Cufflinks parameter list**
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
304
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
305 This is a list of implemented Cufflinks options::
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
306
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
307 -m INT This is the expected (mean) inner distance between mate pairs. For, example, for paired end runs with fragments selected at 300bp, where each end is 50bp, you should set -r to be 200. The default is 45bp.
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
308 -s INT The standard deviation for the distribution on inner distances between mate pairs. The default is 20bp.
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
309 -I INT The minimum intron length. Cufflinks will not report transcripts with introns longer than this, and will ignore SAM alignments with REF_SKIP CIGAR operations longer than this. The default is 300,000.
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
310 -F After calculating isoform abundance for a gene, Cufflinks filters out transcripts that it believes are very low abundance, because isoforms expressed at extremely low levels often cannot reliably be assembled, and may even be artifacts of incompletely spliced precursors of processed transcripts. This parameter is also used to filter out introns that have far fewer spliced alignments supporting them. The default is 0.05, or 5% of the most abundant isoform (the major isoform) of the gene.
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
311 -j Some RNA-Seq protocols produce a significant amount of reads that originate from incompletely spliced transcripts, and these reads can confound the assembly of fully spliced mRNAs. Cufflinks uses this parameter to filter out alignments that lie within the intronic intervals implied by the spliced alignments. The minimum depth of coverage in the intronic region covered by the alignment is divided by the number of spliced reads, and if the result is lower than this parameter value, the intronic alignments are ignored. The default is 5%.
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
312 -G Tells Cufflinks to use the supplied reference annotation to estimate isoform expression. It will not assemble novel transcripts, and the program will ignore alignments not structurally compatible with any reference transcript.
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
313 -N With this option, Cufflinks excludes the contribution of the top 25 percent most highly expressed genes from the number of mapped fragments used in the FPKM denominator. This can improve robustness of differential expression calls for less abundant genes and transcripts.
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
314 </help>
ae8ad9d17e34 Uploaded
geert-vandeweyer
parents:
diff changeset
315 </tool>