comparison cuffdiff_wrapper.xml @ 1:6a9bc26ab8d9

Added cuffdata and cuffdatadb datatypes, cuffdiff multiselect outputs
author Jim Johnson <jj@umn.edu>
date Mon, 08 Oct 2012 15:26:47 -0500
parents
children de024d31e777
comparison
equal deleted inserted replaced
0:8ba5f0460b1e 1:6a9bc26ab8d9
1 <tool id="cuffdiff" name="Cuffdiff" version="0.0.5">
2 <!-- Wrapper supports Cuffdiff versions v1.3.0-v2.0 -->
3 <description>find significant changes in transcript expression, splicing, and promoter use</description>
4 <requirements>
5 <requirement type="package">cufflinks</requirement>
6 </requirements>
7 <command interpreter="python">
8 #set sel_outputs = $output_sel.__str__.split(',')
9 cuffdiff_wrapper.py
10 --FDR=$fdr
11 --num-threads="4"
12 --min-alignment-count=$min_alignment_count
13
14 #if 'cuffdata' in $sel_outputs or not $output_sel:
15 --cuffdatadir=$cuffdata.extra_files_path
16 #end if
17 #if 'cummeRbund_db' in $sel_outputs:
18 --cummeRbund_db=$cummeRbund_db
19 #end if
20
21 #if 'isoforms_fpkm_tracking' in $sel_outputs:
22 --isoforms_fpkm_tracking_output=$isoforms_fpkm_tracking
23 #end if
24 #if 'genes_fpkm_tracking' in $sel_outputs:
25 --genes_fpkm_tracking_output=$genes_fpkm_tracking
26 #end if
27 #if 'cds_fpkm_tracking' in $sel_outputs:
28 --cds_fpkm_tracking_output=$cds_fpkm_tracking
29 #end if
30 #if 'tss_groups_fpkm_tracking' in $sel_outputs:
31 --tss_groups_fpkm_tracking_output=$tss_groups_fpkm_tracking
32 #end if
33 #if 'isoforms_exp_diff' in $sel_outputs:
34 --isoforms_exp_output=$isoforms_exp_diff
35 #end if
36 #if 'genes_exp_diff' in $sel_outputs:
37 --genes_exp_output=$genes_exp_diff
38 #end if
39 #if 'tss_groups_exp_diff' in $sel_outputs:
40 --tss_groups_exp_output=$tss_groups_exp_diff
41 #end if
42 #if 'cds_exp_fpkm_tracking' in $sel_outputs:
43 --cds_exp_fpkm_tracking_output=$cds_exp_fpkm_tracking
44 #end if
45 #if 'splicing_diff' in $sel_outputs:
46 --splicing_diff_output=$splicing_diff
47 #end if
48 #if 'cds_diff' in $sel_outputs:
49 --cds_diff_output=$cds_diff
50 #end if
51 #if 'promoters_diff' in $sel_outputs:
52 --promoters_diff_output=$promoters_diff
53 #end if
54 #if 'cds_read_group_tracking' in $sel_outputs:
55 --cds_read_group_tracking=$cds_read_group_tracking
56 #end if
57 #if 'tss_groups_read_group_tracking' in $sel_outputs:
58 --tss_groups_read_group_tracking=$tss_groups_read_group_tracking
59 #end if
60 #if 'genes_read_group_tracking' in $sel_outputs:
61 --genes_read_group_tracking=$genes_read_group_tracking
62 #end if
63 #if 'isoforms_read_group_tracking' in $sel_outputs:
64 --isoforms_read_group_tracking=$isoforms_read_group_tracking
65 #end if
66
67 ## Set advanced data parameters?
68 #if $additional.sAdditional == "Yes":
69 -m $additional.frag_mean_len
70 -s $additional.frag_len_std_dev
71 #end if
72
73 ## Normalization?
74 #if str($do_normalization) == "Yes":
75 -N
76 #end if
77
78 ## Multi-read correct?
79 #if str($multiread_correct) == "Yes":
80 -u
81 #end if
82
83 ## Bias correction?
84 #if $bias_correction.do_bias_correction == "Yes":
85 -b
86 #if $bias_correction.seq_source.index_source == "history":
87 --ref_file=$bias_correction.seq_source.ref_file
88 #else:
89 --ref_file="None"
90 #end if
91 --dbkey=${gtf_input.metadata.dbkey}
92 --index_dir=${GALAXY_DATA_INDEX_DIR}
93 #end if
94
95 ## Inputs.
96 --inputA=$gtf_input
97 #if $group_analysis.do_groups == "No":
98 --input1=$aligned_reads1
99 --input2=$aligned_reads2
100 #else:
101 ## Replicates.
102 --labels
103 #for $group in $group_analysis.groups
104 ${group.group}
105 #end for
106 --files
107 #for $group in $group_analysis.groups
108 #for $file in $group.files:
109 ${file.file}
110 #end for
111 ,
112 #end for
113 #end if
114
115 </command>
116 <inputs>
117 <param format="gtf" name="gtf_input" type="data" label="Transcripts" help="A transcript GTF file produced by cufflinks, cuffcompare, or other source."/>
118 <conditional name="group_analysis">
119 <param name="do_groups" type="select" label="Perform replicate analysis" help="Perform cuffdiff with replicates in each group.">
120 <option value="No">No</option>
121 <option value="Yes">Yes</option>
122 </param>
123 <when value="Yes">
124 <repeat name="groups" title="Group">
125 <param name="group" title="Group name" type="text" label="Group name (no spaces or commas)"/>
126 <repeat name="files" title="Replicate">
127 <param name="file" label="Add file" type="data" format="sam,bam"/>
128 </repeat>
129 </repeat>
130 </when>
131 <when value="No">
132 <param format="sam,bam" name="aligned_reads1" type="data" label="SAM or BAM file of aligned RNA-Seq reads" help=""/>
133 <param format="sam,bam" name="aligned_reads2" type="data" label="SAM or BAM file of aligned RNA-Seq reads" help=""/>
134 </when>
135 </conditional>
136
137 <param name="fdr" type="float" value="0.05" label="False Discovery Rate" help="The allowed false discovery rate."/>
138
139 <param name="min_alignment_count" type="integer" value="10" label="Min Alignment Count" help="The minimum number of alignments in a locus for needed to conduct significance testing on changes in that locus observed between samples."/>
140
141 <param name="do_normalization" type="select" label="Perform quartile normalization" help="Removes top 25% of genes from FPKM denominator to improve accuracy of differential expression calls for low abundance transcripts.">
142 <option value="No">No</option>
143 <option value="Yes">Yes</option>
144 </param>
145
146 <param name="multiread_correct" type="select" label="Use multi-read correct" help="Tells Cufflinks to do an initial estimation procedure to more accurately weight reads mapping to multiple locations in the genome.">
147 <option value="No" selected="true">No</option>
148 <option value="Yes">Yes</option>
149 </param>
150
151 <conditional name="bias_correction">
152 <param name="do_bias_correction" type="select" label="Perform Bias Correction" help="Bias detection and correction can significantly improve accuracy of transcript abundance estimates.">
153 <option value="No">No</option>
154 <option value="Yes">Yes</option>
155 </param>
156 <when value="Yes">
157 <conditional name="seq_source">
158 <param name="index_source" type="select" label="Reference sequence data">
159 <option value="cached">Locally cached</option>
160 <option value="history">History</option>
161 </param>
162 <when value="cached"></when>
163 <when value="history">
164 <param name="ref_file" type="data" format="fasta" label="Using reference file" />
165 </when>
166 </conditional>
167 </when>
168 <when value="No"></when>
169 </conditional>
170
171 <conditional name="additional">
172 <param name="sAdditional" type="select" label="Set Additional Parameters? (not recommended)">
173 <option value="No">No</option>
174 <option value="Yes">Yes</option>
175 </param>
176 <when value="No"></when>
177 <when value="Yes">
178 <param name="frag_mean_len" type="integer" value="200" label="Average Fragment Length"/>
179 <param name="frag_len_std_dev" type="integer" value="80" label="Fragment Length Standard Deviation"/>
180 </when>
181 </conditional>
182
183 <param name="output_sel" type="select" multiple="true" display="checkboxes" force_select="true" label="Select outputs for history datasets">
184 <option value="cuffdata">cuffdata - html page with links to cuffdiff outputs</option>
185 <option value="cummeRbund_db">cummeRbund database</option>
186 <option value="run_info">run.info</option>
187 <option value="read_groups_info">read_groups.info</option>
188 <option value="splicing_diff">splicing.diff</option>
189 <option value="promoters_diff">promoters.diff</option>
190 <option value="genes_exp_diff">genes_exp.diff</option>
191 <option value="genes_fpkm_tracking">genes.fpkm_tracking</option>
192 <option value="genes_count_tracking">genes.count_tracking</option>
193 <option value="genes_read_group_tracking">genes.read_group_tracking</option>
194 <option value="isoforms_exp_diff">isoforms.exp_diff</option>
195 <option value="isoforms_fpkm_tracking">isoforms.fpkm_tracking</option>
196 <option value="isoforms_count_tracking">isoforms.count_tracking</option>
197 <option value="isoforms_read_group_tracking">isoforms.read_group_tracking</option>
198 <option value="cds_diff">cds.diff</option>
199 <option value="cds_exp_diff">cds_exp.diff</option>
200 <option value="cds_fpkm_tracking">cds.fpkm_tracking</option>
201 <option value="cds_count_tracking">cds.count_tracking</option>
202 <option value="cds_read_group_tracking">cds.read_group_tracking</option>
203 <option value="tss_groups_exp_diff">tss_groups_exp.diff</option>
204 <option value="tss_groups_fpkm_tracking">tss_groups.fpkm_tracking</option>
205 <option value="tss_groups_count_tracking">tss_groups.count_tracking</option>
206 <option value="tss_groups_read_group_tracking">tss_groups.read_group_tracking</option>
207 </param>
208
209 </inputs>
210
211 <outputs>
212 <data format="text" name="run_info" label="${tool.name} on ${on_string}: run.info">
213 <filter>output_sel and 'run_info' in output_sel</filter>
214 </data>
215 <data format="tabular" name="read_groups_info" label="${tool.name} on ${on_string}: read_groups.info">
216 <filter>output_sel and 'read_groups_info' in output_sel</filter>
217 </data>
218 <data format="tabular" name="splicing_diff" label="${tool.name} on ${on_string}: splicing differential expression testing">
219 <filter>output_sel and 'splicing_diff' in output_sel</filter>
220 </data>
221 <data format="tabular" name="promoters_diff" label="${tool.name} on ${on_string}: promoters differential expression testing">
222 <filter>output_sel and 'promoters_diff' in output_sel</filter>
223 </data>
224 <data format="tabular" name="cds_diff" label="${tool.name} on ${on_string}: CDS overloading diffential expression testing">
225 <filter>output_sel and 'cds_diff' in output_sel</filter>
226 </data>
227 <data format="tabular" name="cds_exp_diff" label="${tool.name} on ${on_string}: CDS differential expression testing">
228 <filter>output_sel and 'cds_exp_diff' in output_sel</filter>
229 </data>
230 <data format="tabular" name="cds_fpkm_tracking" label="${tool.name} on ${on_string}: CDS FPKM tracking">
231 <filter>output_sel and 'cds_fpkm_tracking' in output_sel</filter>
232 </data>
233 <data format="tabular" name="cds_count_tracking" label="${tool.name} on ${on_string}: CDS counts">
234 <filter>output_sel and 'cds_count_tracking' in output_sel</filter>
235 </data>
236 <data format="tabular" name="cds_read_group_tracking" label="${tool.name} on ${on_string}: CDS Read Group tracking">
237 <filter>output_sel and 'cds_read_group_tracking' in output_sel</filter>
238 </data>
239 <data format="tabular" name="tss_groups_exp_diff" label="${tool.name} on ${on_string}: TSS groups differential expression testing">
240 <filter>output_sel and 'tss_groups_exp' in output_sel</filter>
241 </data>
242 <data format="tabular" name="tss_groups_fpkm_tracking" label="${tool.name} on ${on_string}: TSS groups FPKM tracking">
243 <filter>output_sel and 'tss_groups_fpkm_tracking' in output_sel</filter>
244 </data>
245 <data format="tabular" name="tss_groups_count_tracking" label="${tool.name} on ${on_string}: TSS groups counts">
246 <filter>output_sel and 'tss_groups_count_tracking' in output_sel</filter>
247 </data>
248 <data format="tabular" name="tss_groups_read_group_tracking" label="${tool.name} on ${on_string}: TSS groups Read Group tracking">
249 <filter>output_sel and 'tss_groups_read_group_tracking' in output_sel</filter>
250 </data>
251 <data format="tabular" name="isoforms_exp_diff" label="${tool.name} on ${on_string}: transcript differential expression testing">
252 <filter>output_sel and 'isoforms_exp' in output_sel</filter>
253 </data>
254 <data format="tabular" name="isoforms_fpkm_tracking" label="${tool.name} on ${on_string}: transcript FPKM tracking">
255 <filter>output_sel and 'isoforms_fpkm_tracking' in output_sel</filter>
256 </data>
257 <data format="tabular" name="isoforms_count_tracking" label="${tool.name} on ${on_string}: transcript counts">
258 <filter>output_sel and 'isoforms_count_tracking' in output_sel</filter>
259 </data>
260 <data format="tabular" name="isoforms_read_group_tracking" label="${tool.name} on ${on_string}: transcript Read Group tracking">
261 <filter>output_sel and 'isoforms_read_group_tracking' in output_sel</filter>
262 </data>
263 <data format="tabular" name="genes_exp_diff" label="${tool.name} on ${on_string}: gene differential expression testing">
264 <filter>output_sel and 'genes_exp' in output_sel</filter>
265 </data>
266 <data format="tabular" name="genes_fpkm_tracking" label="${tool.name} on ${on_string}: gene FPKM tracking">
267 <filter>output_sel and 'genes_fpkm_tracking' in output_sel</filter>
268 </data>
269 <data format="tabular" name="genes_count_tracking" label="${tool.name} on ${on_string}: gene counts">
270 <filter>output_sel and 'genes_count_tracking' in output_sel</filter>
271 </data>
272 <data format="tabular" name="genes_read_group_tracking" label="${tool.name} on ${on_string}: gene Read Group tracking">
273 <filter>output_sel and 'genes_read_group_tracking' in output_sel</filter>
274 </data>
275 <data format="cuffdata" name="cuffdata" label="${tool.name} on ${on_string}: cuffdata" >
276 <filter>not output_sel or output_sel and 'cuffdata' in output_sel</filter>
277 </data>
278 <data format="cuffdatadb" name="cummeRbund_db" label="${tool.name} on ${on_string}: cummeRbund Database" >
279 <filter>output_sel and 'cummeRbund_db' in output_sel</filter>
280 </data>
281 </outputs>
282 <stdio>
283 <exit_code range="1:" level="fatal" description="Cufflinks Err" />
284 </stdio>
285
286
287 <tests>
288 <test>
289 <!--
290 cuffdiff cuffcompare_out5.gtf cuffdiff_in1.sam cuffdiff_in2.sam
291 -->
292 <param name="gtf_input" value="cuffcompare_out5.gtf" ftype="gtf" />
293 <param name="do_groups" value="No" />
294 <param name="aligned_reads1" value="cuffdiff_in1.sam" ftype="sam" />
295 <param name="aligned_reads2" value="cuffdiff_in2.sam" ftype="sam" />
296 <!-- Defaults. -->
297 <param name="fdr" value="0.05" />
298 <param name="min_alignment_count" value="0" />
299 <param name="do_bias_correction" value="No" />
300 <param name="do_normalization" value="No" />
301 <param name="multiread_correct" value="No"/>
302 <param name="sAdditional" value="No"/>
303 <!--
304 Line diffs are needed because cuffdiff does not produce deterministic output.
305 TODO: can we find datasets that lead to deterministic behavior?
306 -->
307 <output name="splicing_diff" file="cuffdiff_out9.txt"/>
308 <output name="promoters_diff" file="cuffdiff_out10.txt"/>
309 <output name="cds_diff" file="cuffdiff_out11.txt"/>
310 <output name="cds_exp_fpkm_tracking" file="cuffdiff_out4.txt"/>
311 <output name="cds_fpkm_tracking" file="cuffdiff_out8.txt"/>
312 <output name="tss_groups_exp" file="cuffdiff_out3.txt" lines_diff="200"/>
313 <output name="tss_groups_fpkm_tracking" file="cuffdiff_out7.txt"/>
314 <output name="genes_exp" file="cuffdiff_out2.txt" lines_diff="200"/>
315 <output name="genes_fpkm_tracking" file="cuffdiff_out6.txt" lines_diff="200"/>
316 <output name="isoforms_exp" file="cuffdiff_out1.txt" lines_diff="200"/>
317 <output name="isoforms_fpkm_tracking" file="cuffdiff_out5.txt" lines_diff="200"/>
318 </test>
319 </tests>
320
321 <help>
322 **Cuffdiff Overview**
323
324 Cuffdiff is part of Cufflinks_. Cuffdiff find significant changes in transcript expression, splicing, and promoter use. Please cite: Trapnell C, Williams BA, Pertea G, Mortazavi AM, Kwan G, van Baren MJ, Salzberg SL, Wold B, Pachter L. Transcript assembly and abundance estimation from RNA-Seq reveals thousands of new transcripts and switching among isoforms. Nature Biotechnology doi:10.1038/nbt.1621
325
326 .. _Cufflinks: http://cufflinks.cbcb.umd.edu/
327
328 ------
329
330 **Know what you are doing**
331
332 .. class:: warningmark
333
334 There is no such thing (yet) as an automated gearshift in expression analysis. It is all like stick-shift driving in San Francisco. In other words, running this tool with default parameters will probably not give you meaningful results. A way to deal with this is to **understand** the parameters by carefully reading the `documentation`__ and experimenting. Fortunately, Galaxy makes experimenting easy.
335
336 .. __: http://cufflinks.cbcb.umd.edu/manual.html#cuffdiff
337
338 ------
339
340 **Input format**
341
342 Cuffdiff takes Cufflinks or Cuffcompare GTF files as input along with two SAM files containing the fragment alignments for two or more samples.
343
344 ------
345
346 **Outputs**
347
348 Cuffdiff produces many output files:
349
350 1. Transcript FPKM expression tracking.
351 2. Gene FPKM expression tracking; tracks the summed FPKM of transcripts sharing each gene_id
352 3. Primary transcript FPKM tracking; tracks the summed FPKM of transcripts sharing each tss_id
353 4. Coding sequence FPKM tracking; tracks the summed FPKM of transcripts sharing each p_id, independent of tss_id
354 5. Transcript differential FPKM.
355 6. Gene differential FPKM. Tests difference sin the summed FPKM of transcripts sharing each gene_id
356 7. Primary transcript differential FPKM. Tests difference sin the summed FPKM of transcripts sharing each tss_id
357 8. Coding sequence differential FPKM. Tests difference sin the summed FPKM of transcripts sharing each p_id independent of tss_id
358 9. Differential splicing tests: this tab delimited file lists, for each primary transcript, the amount of overloading detected among its isoforms, i.e. how much differential splicing exists between isoforms processed from a single primary transcript. Only primary transcripts from which two or more isoforms are spliced are listed in this file.
359 10. Differential promoter tests: this tab delimited file lists, for each gene, the amount of overloading detected among its primary transcripts, i.e. how much differential promoter use exists between samples. Only genes producing two or more distinct primary transcripts (i.e. multi-promoter genes) are listed here.
360 11. Differential CDS tests: this tab delimited file lists, for each gene, the amount of overloading detected among its coding sequences, i.e. how much differential CDS output exists between samples. Only genes producing two or more distinct CDS (i.e. multi-protein genes) are listed here.
361
362 -------
363
364 **Settings**
365
366 All of the options have a default value. You can change any of them. Most of the options in Cuffdiff have been implemented here.
367
368 ------
369
370 **Cuffdiff parameter list**
371
372 This is a list of implemented Cuffdiff options::
373
374 -m INT Average fragement length; default 200
375 -s INT Fragment legnth standard deviation; default 80
376 -c INT The minimum number of alignments in a locus for needed to conduct significance testing on changes in that locus observed between samples. If no testing is performed, changes in the locus are deemed not significant, and the locus' observed changes don't contribute to correction for multiple testing. The default is 1,000 fragment alignments (up to 2,000 paired reads).
377 --FDR FLOAT The allowed false discovery rate. The default is 0.05.
378 --num-importance-samples INT Sets the number of importance samples generated for each locus during abundance estimation. Default: 1000
379 --max-mle-iterations INT Sets the number of iterations allowed during maximum likelihood estimation of abundances. Default: 5000
380 -N With this option, Cufflinks excludes the contribution of the top 25 percent most highly expressed genes from the number of mapped fragments used in the FPKM denominator. This can improve robustness of differential expression calls for less abundant genes and transcripts.
381
382 </help>
383 </tool>