comparison featurecounts.xml @ 2:7c3041c778d0 draft default tip

Uploaded
author dpryan79
date Wed, 09 Nov 2016 16:38:01 -0500
parents 3edb501ec957
children
comparison
equal deleted inserted replaced
1:60b43da9a265 2:7c3041c778d0
1 <tool id="featurecounts" name="featureCounts" version="1.4.6.p5" profile="16.04">
2 <description>Measure gene expression in RNA-Seq experiments from SAM or BAM files.</description>
3 <requirements>
4 <requirement type="package" version="1.4.6p5">subread</requirement>
5 </requirements>
6
7 <version_command>featureCounts -v 2&gt;&amp;1 | grep .</version_command>
8 <command><![CDATA[
9 ## Check whether all alignments are from the same type (bam || sam)
10 featureCounts
11 #if $gtf_source.ref_source=="history":
12 -a "$gtf_source.reference_gene_sets"
13 #else:
14 -a "$gtf_source.reference_gene_sets_builtin.fields.path"
15 #end if
16
17 -o "output"
18 -T \${GALAXY_SLOTS:-2}
19
20 -t "$extended_parameters.gff_feature_type"
21 -g "$extended_parameters.gff_feature_attribute"
22 $extended_parameters.summarization_level
23 $extended_parameters.contribute_to_multiple_features
24 -s $extended_parameters.strand_specificity
25 $extended_parameters.multimapping_enabled.multimapping_counts
26
27 #if str($extended_parameters.multimapping_enabled.multimapping_counts) == " -M"
28 $extended_parameters.multimapping_enabled.fraction
29 #end if
30
31 -Q $extended_parameters.mapping_quality
32 $extended_parameters.largest_overlap
33 --minOverlap $extended_parameters.min_overlap
34 $extended_parameters.read_reduction
35 $extended_parameters.primary
36 $extended_parameters.ignore_dup
37
38 #if str($extended_parameters.read_extension_5p) != "0"
39 --readExtension5 $extended_parameters.read_extension_5p
40 #end if
41
42 #if str($extended_parameters.read_extension_3p) != "0"
43 --readExtension3 $extended_parameters.read_extension_3p
44 #end if
45
46 $pe_parameters.fragment_counting_enabled.fragment_counting
47 #if str($pe_parameters.fragment_counting_enabled.fragment_counting) == " -p"
48 $pe_parameters.fragment_counting_enabled.check_distance_enabled.check_distance
49 #if str($pe_parameters.fragment_counting_enabled.check_distance_enabled.check_distance) == " -P"
50 -d $pe_parameters.fragment_counting_enabled.check_distance_enabled.minimum_fragment_length
51 -D $pe_parameters.fragment_counting_enabled.check_distance_enabled.maximum_fragment_length
52 #end if
53 #end if
54
55 $pe_parameters.only_both_ends
56 -S $pe_parameters.orientation
57 $pe_parameters.exclude_chimerics
58
59 "${alignment}"
60
61 ## Removal of comment and column-header line
62 && grep -v "^#" "output" | tail -n+2 > body.txt
63
64 ## Set the right columns for the tabular formats
65 #if $format.value == "tabdel_medium"
66 && cut -f 1,7 body.txt > expression_matrix.txt
67
68 ## Paste doesn't allow a non ordered list of columns: -f 1,7,8,6 will only return columns 1,7 and 8
69 ## Thus the gene length column (last column) has to be added separately
70 && cut -f 6 body.txt > gene_lengths.txt
71 && paste expression_matrix.txt gene_lengths.txt > expression_matrix.txt.bak
72 && mv -f expression_matrix.txt.bak "${output_medium}"
73 #elif $format.value == "tabdel_short"
74 && cut -f 1,7 body.txt > "${output_short}"
75 #else
76 && cp body.txt "${output_full}"
77 #end if
78
79
80 #if str($include_feature_length_file) == "true"
81 && cut -f 1,6 body.txt > "${output_feature_lengths}"
82 #end if
83
84 && tail -n+2 "output.summary" > "${output_summary}"
85
86 ]]></command>
87 <inputs>
88 <param name="alignment"
89 type="data"
90 multiple="false"
91 format="bam,sam"
92 label="Alignment file"
93 help="The input alignment file(s) where the gene expression has to be counted. The file can have a SAM or BAM format; but ALL files must be in the same format" />
94
95 <conditional name="gtf_source">
96 <param name="ref_source" type="select" label="Gene annotation file">
97 <option value="cached">locally cached</option>
98 <option value="history">in your history</option>
99 </param>
100 <when value="cached">
101 <param name="reference_gene_sets_builtin" type="select" label="Using locally cached annotation" help="If the annotation file you require is not listed here, please contact the Galaxy administrator">
102 <options from_data_table="gene_sets">
103 <filter type="sort_by" column="1" />
104 <validator type="no_options" message="No annotations are available." />
105 </options>
106 </param>
107 </when>
108 <when value="history">
109 <param name="reference_gene_sets"
110 format="gff,gtf,gff3"
111 type="data"
112 label="Gene annotation file"
113 help="The program assumes that the provided annotation file is in GTF format. Make sure that the gene annotation file corresponds to the same reference genome as used for the alignment" />
114 </when>
115 </conditional>
116
117 <param name="format"
118 type="select"
119 label="Output format"
120 help="The output format will be tabular, select the preferred columns here">
121 <option value="tabdel_short" selected="true">Gene-ID "\t" read-count (DESeq2 IUC wrapper compatible)</option>
122 <option value="tabdel_medium">Gene-ID "\t" read-count "\t" gene-length</option>
123 <option value="tabdel_full">featureCounts 1.4.0+ default (includes regions provided by the GTF file)</option>
124 </param>
125
126 <param name="include_feature_length_file"
127 type="boolean"
128 truevalue="true"
129 falsevalue="false"
130 checked="false"
131 label="Create gene-length file"
132 help="Creates a tabular file that contains the effective (nucleotides used for counting reads) length of the feature; might be useful for estimating FPKM/RPKM" />
133
134
135 <section name="pe_parameters" title="Options for paired-end reads">
136 <conditional name="fragment_counting_enabled">
137
138 <param name="fragment_counting"
139 type="select"
140 argument="-p"
141 checked="true"
142 label="Count fragments instead of reads"
143 help="If specified, fragments (or templates) will be counted instead of reads.">
144 <option value="" selected="true">Disabled; all reads/mates will be counted individually</option>
145 <option value=" -p">Enabled; fragments (or templates) will be counted instead of reads</option>
146 </param>
147
148 <when value=" -p">
149 <conditional name="check_distance_enabled">
150 <param name="check_distance"
151 type="boolean"
152 truevalue=" -P"
153 falsevalue=""
154 argument="-P"
155 label="Check paired-end distance"
156 help="If specified, paired-end distance will be checked when assigning fragments to meta-features or features. This option is only applicable when -p (Count fragments instead of reads) is specified. The distance thresholds should be specified using -d and -D (minimum and maximum fragment/template length) options." />
157 <when value=" -P">
158 <param name="minimum_fragment_length"
159 type="integer"
160 value="50"
161 argument="-d"
162 label="Minimum fragment/template length." />
163 <param name="maximum_fragment_length"
164 type="integer"
165 value="600"
166 argument="-D"
167 label="Maximum fragment/template length." />
168 </when>
169 <when value="" />
170 </conditional>
171 </when>
172 <when value="" />
173 </conditional>
174
175 <param name="only_both_ends"
176 type="boolean"
177 truevalue=" -B"
178 falsevalue=""
179 argument="-B"
180 label="Only allow fragments with both reads aligned"
181 help="If specified, only fragments that have both ends successfully aligned will be considered for summarization. This option is only applicable for paired-end reads." />
182
183 <param name="orientation"
184 type="select"
185 label="Orientation of the two read from the same pair"
186 argument="-S"
187 help="Default is 'fr'">
188 <option value="fr" selected="true">Forward, Reverse (fr)</option>
189 <option value="ff">Forward, Forward (ff)</option>
190 <option value="rf">Reverse, Forward (rf)</option>
191 </param>
192
193 <param name="exclude_chimerics"
194 type="boolean"
195 truevalue=" -C"
196 falsevalue=""
197 argument="-C"
198 checked="true"
199 label="Exclude chimeric fragments"
200 help="If specified, the chimeric fragments (those fragments that have their two ends aligned to different chromosomes) will NOT be included for summarization. This option is only applicable for paired-end read data." />
201 </section>
202
203 <section name="extended_parameters" title="Advanced options">
204 <param name="gff_feature_type"
205 type="text"
206 value="exon"
207 argument="-t"
208 label="GFF feature type filter"
209 help="Specify the feature type. Only rows which have the matched matched feature type in the provided GTF annotation file will be included for read counting. `exon' by default." />
210
211 <param name="gff_feature_attribute"
212 type="text"
213 value="gene_id"
214 argument="-g"
215 label="GFF gene identifier"
216 help="Specify the attribute type used to group features (eg. exons) into meta-features (eg. genes), when GTF annotation is provided. `gene_id' by default. This attribute type is usually the gene identifier. This argument is useful for the meta-feature level summarization." />
217
218 <param name="summarization_level"
219 type="boolean"
220 truevalue=" -f"
221 falsevalue=""
222 argument="-f"
223 label="On feature level"
224 help="If specified, read summarization will be performed at the feature level. By default (-f is not specified), the read summarization is performed at the meta-feature level." />
225
226 <param name ="contribute_to_multiple_features"
227 type="boolean"
228 truevalue=" -O"
229 falsevalue=""
230 argument="-O"
231 label="Allow read to contribute to multiple features"
232 help="If specified, reads (or fragments if -p is specified) will be allowed to be assigned to more than one matched meta- feature (or matched feature if -f is specified)" />
233
234 <param name="strand_specificity"
235 type="select"
236 label="Strand specificity of the protocol"
237 argument="-s"
238 help="Indicate if strand-specific read counting should be performed.">
239 <option value="0" selected="true">Unstranded</option>
240 <option value="1">Stranded (forwards)</option>
241 <option value="2">Stranded (reverse)</option>
242 </param>
243
244 <conditional name="multimapping_enabled">
245 <param name="multimapping_counts"
246 type="select"
247 argument="-M"
248 label="Count multi-mapping reads/fragments"
249 help="If specified, multi-mapping reads/fragments will be counted (ie. a multi-mapping read will be counted up to N times if it has N reported mapping locations). The program uses the `NH' tag to find multi-mapping reads.">
250 <option value="" selected="true">Disabled; multi-mapping reads are excluded (default)</option>
251 <option value=" -M">Enabled; multi-mapping reads are included</option>
252 </param>
253 <when value=" -M">
254 <param name="fraction"
255 type="boolean"
256 truevalue="--fraction"
257 falsevalue=""
258 argument="--fraction"
259 label="Assign fractions to multimapping reads"
260 help="If specified, a fractional count 1/n will be generated for each multi-mapping read, where n is the number of alignments (indica- ted by 'NH' tag) reported for the read. This option must be used together with the '-M' option." />
261 </when>
262 <when value="" />
263 </conditional>
264
265 <param name="mapping_quality"
266 type="integer"
267 value="12"
268 argument="-Q"
269 label="Minimum mapping quality per read"
270 help="The minimum mapping quality score a read must satisfy in order to be counted. For paired-end reads, at least one end should satisfy this criteria. 12 by default." />
271
272 <param name="largest_overlap"
273 type="boolean"
274 truevalue=" --largestOverlap"
275 falsevalue=""
276 argument="--largestOverlap"
277 label="Largest overlap"
278 help="If specified, reads (or fragments) will be assigned to the target that has the largest number of overlapping bases" />
279
280 <param name="min_overlap"
281 type="integer"
282 value="1"
283 argument="--minOverlap"
284 label="Minimum overlap"
285 help="Specify the minimum required number of overlapping bases between a read (or a fragment) and a feature. 1 by default. If a negative value is provided, the read will be extended from both ends." />
286
287 <param name="read_extension_5p"
288 type="integer"
289 value="0"
290 argument="--readExtension5"
291 label="Read 5' extension"
292 help="Reads are extended upstream by ... bases from their 5' end" />
293
294 <param name="read_extension_3p"
295 type="integer"
296 value="0"
297 argument="--readExtension3"
298 label="Read 3' extension"
299 help="Reads are extended upstream by ... bases from their 3' end" />
300
301 <param name="read_reduction"
302 type="select"
303 label="Reduce read to single position"
304 argument="--read2pos"
305 help="The read is reduced to its 5' most base or 3'most base. Read summarization is then performed based on thesingle base which the read is reduced to.">
306 <option value="" selected="true">Leave the read as it is</option>
307 <option value="--read2pos 5">Reduce it to the 5' end</option>
308 <option value="--read2pos 3">Reduce it to the 3' end</option>
309 </param>
310
311 <param name="primary"
312 type="boolean"
313 truevalue=" --primary"
314 falsevalue=""
315 argument="--primary"
316 label="Only count primary alignments"
317 help="If specified, only primary alignments will be counted. Primaryand secondary alignments are identified using bit 0x100 in theFlag field of SAM/BAM files. All primary alignments in a datasetwill be counted no matter they are from multi-mapping reads ornot ('-M' is ignored)." />
318
319 <param name="ignore_dup"
320 type="boolean"
321 truevalue=" --ignoreDup"
322 falsevalue=""
323 argument="--ignoreDup"
324 label="Ignore reads marked as duplicate"
325 help="If specified, reads that were marked asduplicates will be ignored. Bit Ox400 in FLAG field of SAM/BAMfile is used for identifying duplicate reads. In paired enddata, the entire read pair will be ignored if at least one endis found to be a duplicate read." />
326
327 <param name="count_split_alignments_only"
328 type="boolean"
329 truevalue=" --countSplitAlignmentsOnly"
330 falsevalue=""
331 argument="--countSplitAlignmentsOnly"
332 label="Ignore reads marked as duplicate"
333 help="If specified, only split alignments (CIGARstrings containing letter `N') will be counted. All the otheralignments will be ignored. An example of split alignments isthe exon-spanning reads in RNA-seq data." />
334 </section>
335 </inputs>
336 <outputs>
337 <data format="tabular"
338 name="output_medium"
339 label="${tool.name} on ${on_string}">
340 <filter>format == "tabdel_medium"</filter>
341 <actions>
342 <action name="column_names" type="metadata" default="Geneid,${alignment.name},Length" />
343 </actions>
344 </data>
345
346 <data format="tabular"
347 name="output_short"
348 label="${tool.name} on ${on_string}">
349 <filter>format == "tabdel_short"</filter>
350 <actions>
351 <action name="column_names" type="metadata" default="Geneid,${alignment.name}" />
352 </actions>
353 </data>
354
355 <data format="tabular"
356 name="output_full"
357 label="${tool.name} on ${on_string}: count table">
358 <filter>format == "tabdel_full"</filter>
359 <actions>
360 <action name="column_names" type="metadata" default="Geneid,Chr,Start,End,Strand,Length,${alignment.name}" />
361 </actions>
362 </data>
363
364 <data format="tabular"
365 name="output_summary"
366 hidden="true"
367 label="${tool.name} on ${on_string}: summary">
368 <actions>
369 <action name="column_names" type="metadata" default="Status,${alignment.name}" />
370 </actions>
371 </data>
372
373 <data format="tabular"
374 name="output_feature_lengths"
375 label="${tool.name} on ${on_string}: feature lengths">
376 <filter>include_feature_length_file</filter>
377 <actions>
378 <action name="column_names" type="metadata" default="Feature,Length" />
379 </actions>
380 </data>
381 </outputs>
382 <tests>
383 <test>
384 <param name="alignment" value="featureCounts_input1.bam" ftype="bam" />
385 <param name="reference_gene_sets" value="featureCounts_guide.gff" ftype="gff" />
386 <param name="format" value="tabdel_short" />
387 <param name="include_feature_length_file" value="true"/>
388 <param name="ref_source" value="history" />
389 <output name="output" file="output_1_short.tab"/>
390 <output name="output_summary" file="output_1_summary.tab"/>
391 </test>
392 <test>
393 <param name="alignment" value="featureCounts_input1.bam" ftype="bam" />
394 <param name="reference_gene_sets" value="featureCounts_guide.gff" ftype="gff" />
395 <param name="format" value="tabdel_medium" />
396 <param name="include_feature_length_file" value="true"/>
397 <param name="ref_source" value="history" />
398 <output name="output" file="output_1_medium.tab"/>
399 <output name="output_summary" file="output_1_summary.tab"/>
400 </test>
401 <test>
402 <param name="alignment" value="featureCounts_input1.bam" ftype="bam" />
403 <param name="reference_gene_sets" value="featureCounts_guide.gff" ftype="gff" />
404 <param name="format" value="tabdel_full" />
405 <param name="include_feature_length_file" value="true"/>
406 <param name="ref_source" value="history" />
407 <output name="output" file="output_1_full.tab"/>
408 <output name="output_summary" file="output_1_summary.tab"/>
409 <output name="output_feature_lengths" file="output_feature_lengths.tab"/>
410 </test>
411
412 <test>
413 <param name="alignment" value="featureCounts_input1.bam" ftype="bam" />
414 <param name="reference_gene_sets" value="featureCounts_guide.gff" ftype="gff" />
415 <param name="format" value="tabdel_short" />
416 <param name="include_feature_length_file" value="true"/>
417 <param name="ref_source" value="history" />
418 <output name="output" file="output_2_short.tab"/>
419 <output name="output_summary" file="output_2_summary.tab"/>
420 </test>
421 <test>
422 <param name="alignment" value="featureCounts_input1.bam" ftype="bam" />
423 <param name="reference_gene_sets" value="featureCounts_guide.gff" ftype="gff" />
424 <param name="format" value="tabdel_medium" />
425 <param name="include_feature_length_file" value="true"/>
426 <param name="ref_source" value="history" />
427 <output name="output" file="output_2_medium.tab"/>
428 <output name="output_summary" file="output_2_summary.tab"/>
429 </test>
430 <test>
431 <param name="alignment" value="featureCounts_input1.bam" ftype="bam" />
432 <param name="reference_gene_sets" value="featureCounts_guide.gff" ftype="gff" />
433 <param name="format" value="tabdel_full" />
434 <param name="include_feature_length_file" value="true"/>
435 <param name="ref_source" value="history" />
436 <output name="output" file="output_2_full.tab"/>
437 <output name="output_summary" file="output_2_summary.tab"/>
438 <output name="output_feature_lengths" file="output_feature_lengths.tab"/>
439 </test>
440 </tests>
441
442 <help><![CDATA[
443 featureCounts
444 #############
445
446 Overview
447 --------
448 FeatureCounts is a light-weight read counting program written entirely in the C programming language. It can be used to count both gDNA-seq and RNA-seq reads for genomic features in in SAM/BAM files.
449
450 Input formats
451 -------------
452 Alignments should be provided in either:
453
454 - SAM format, http://samtools.sourceforge.net/samtools.shtml#5
455 - BAM format
456
457 Gene regions should be provided in the GFF/GTF format:
458
459 - http://genome.ucsc.edu/FAQ/FAQformat.html#format3
460 - http://www.ensembl.org/info/website/upload/gff.html
461
462 Output format
463 -------------
464 FeatureCounts produces a table containing counted reads, per gene, per row. Optionally the last column can be set to be the effective gene-length. These tables are compatible with the DESeq2 Galaxy wrapper by IUC. Column names are added as metadata object.
465 ]]></help>
466 <citations>
467 <citation type="doi">10.1093/bioinformatics/btt656</citation>
468 </citations>
469 </tool>