Mercurial > repos > dpryan79 > featurecounts_test
comparison featurecounts.xml @ 2:7c3041c778d0 draft default tip
Uploaded
author | dpryan79 |
---|---|
date | Wed, 09 Nov 2016 16:38:01 -0500 |
parents | 3edb501ec957 |
children |
comparison
equal
deleted
inserted
replaced
1:60b43da9a265 | 2:7c3041c778d0 |
---|---|
1 <tool id="featurecounts" name="featureCounts" version="1.4.6.p5" profile="16.04"> | |
2 <description>Measure gene expression in RNA-Seq experiments from SAM or BAM files.</description> | |
3 <requirements> | |
4 <requirement type="package" version="1.4.6p5">subread</requirement> | |
5 </requirements> | |
6 | |
7 <version_command>featureCounts -v 2>&1 | grep .</version_command> | |
8 <command><![CDATA[ | |
9 ## Check whether all alignments are from the same type (bam || sam) | |
10 featureCounts | |
11 #if $gtf_source.ref_source=="history": | |
12 -a "$gtf_source.reference_gene_sets" | |
13 #else: | |
14 -a "$gtf_source.reference_gene_sets_builtin.fields.path" | |
15 #end if | |
16 | |
17 -o "output" | |
18 -T \${GALAXY_SLOTS:-2} | |
19 | |
20 -t "$extended_parameters.gff_feature_type" | |
21 -g "$extended_parameters.gff_feature_attribute" | |
22 $extended_parameters.summarization_level | |
23 $extended_parameters.contribute_to_multiple_features | |
24 -s $extended_parameters.strand_specificity | |
25 $extended_parameters.multimapping_enabled.multimapping_counts | |
26 | |
27 #if str($extended_parameters.multimapping_enabled.multimapping_counts) == " -M" | |
28 $extended_parameters.multimapping_enabled.fraction | |
29 #end if | |
30 | |
31 -Q $extended_parameters.mapping_quality | |
32 $extended_parameters.largest_overlap | |
33 --minOverlap $extended_parameters.min_overlap | |
34 $extended_parameters.read_reduction | |
35 $extended_parameters.primary | |
36 $extended_parameters.ignore_dup | |
37 | |
38 #if str($extended_parameters.read_extension_5p) != "0" | |
39 --readExtension5 $extended_parameters.read_extension_5p | |
40 #end if | |
41 | |
42 #if str($extended_parameters.read_extension_3p) != "0" | |
43 --readExtension3 $extended_parameters.read_extension_3p | |
44 #end if | |
45 | |
46 $pe_parameters.fragment_counting_enabled.fragment_counting | |
47 #if str($pe_parameters.fragment_counting_enabled.fragment_counting) == " -p" | |
48 $pe_parameters.fragment_counting_enabled.check_distance_enabled.check_distance | |
49 #if str($pe_parameters.fragment_counting_enabled.check_distance_enabled.check_distance) == " -P" | |
50 -d $pe_parameters.fragment_counting_enabled.check_distance_enabled.minimum_fragment_length | |
51 -D $pe_parameters.fragment_counting_enabled.check_distance_enabled.maximum_fragment_length | |
52 #end if | |
53 #end if | |
54 | |
55 $pe_parameters.only_both_ends | |
56 -S $pe_parameters.orientation | |
57 $pe_parameters.exclude_chimerics | |
58 | |
59 "${alignment}" | |
60 | |
61 ## Removal of comment and column-header line | |
62 && grep -v "^#" "output" | tail -n+2 > body.txt | |
63 | |
64 ## Set the right columns for the tabular formats | |
65 #if $format.value == "tabdel_medium" | |
66 && cut -f 1,7 body.txt > expression_matrix.txt | |
67 | |
68 ## Paste doesn't allow a non ordered list of columns: -f 1,7,8,6 will only return columns 1,7 and 8 | |
69 ## Thus the gene length column (last column) has to be added separately | |
70 && cut -f 6 body.txt > gene_lengths.txt | |
71 && paste expression_matrix.txt gene_lengths.txt > expression_matrix.txt.bak | |
72 && mv -f expression_matrix.txt.bak "${output_medium}" | |
73 #elif $format.value == "tabdel_short" | |
74 && cut -f 1,7 body.txt > "${output_short}" | |
75 #else | |
76 && cp body.txt "${output_full}" | |
77 #end if | |
78 | |
79 | |
80 #if str($include_feature_length_file) == "true" | |
81 && cut -f 1,6 body.txt > "${output_feature_lengths}" | |
82 #end if | |
83 | |
84 && tail -n+2 "output.summary" > "${output_summary}" | |
85 | |
86 ]]></command> | |
87 <inputs> | |
88 <param name="alignment" | |
89 type="data" | |
90 multiple="false" | |
91 format="bam,sam" | |
92 label="Alignment file" | |
93 help="The input alignment file(s) where the gene expression has to be counted. The file can have a SAM or BAM format; but ALL files must be in the same format" /> | |
94 | |
95 <conditional name="gtf_source"> | |
96 <param name="ref_source" type="select" label="Gene annotation file"> | |
97 <option value="cached">locally cached</option> | |
98 <option value="history">in your history</option> | |
99 </param> | |
100 <when value="cached"> | |
101 <param name="reference_gene_sets_builtin" type="select" label="Using locally cached annotation" help="If the annotation file you require is not listed here, please contact the Galaxy administrator"> | |
102 <options from_data_table="gene_sets"> | |
103 <filter type="sort_by" column="1" /> | |
104 <validator type="no_options" message="No annotations are available." /> | |
105 </options> | |
106 </param> | |
107 </when> | |
108 <when value="history"> | |
109 <param name="reference_gene_sets" | |
110 format="gff,gtf,gff3" | |
111 type="data" | |
112 label="Gene annotation file" | |
113 help="The program assumes that the provided annotation file is in GTF format. Make sure that the gene annotation file corresponds to the same reference genome as used for the alignment" /> | |
114 </when> | |
115 </conditional> | |
116 | |
117 <param name="format" | |
118 type="select" | |
119 label="Output format" | |
120 help="The output format will be tabular, select the preferred columns here"> | |
121 <option value="tabdel_short" selected="true">Gene-ID "\t" read-count (DESeq2 IUC wrapper compatible)</option> | |
122 <option value="tabdel_medium">Gene-ID "\t" read-count "\t" gene-length</option> | |
123 <option value="tabdel_full">featureCounts 1.4.0+ default (includes regions provided by the GTF file)</option> | |
124 </param> | |
125 | |
126 <param name="include_feature_length_file" | |
127 type="boolean" | |
128 truevalue="true" | |
129 falsevalue="false" | |
130 checked="false" | |
131 label="Create gene-length file" | |
132 help="Creates a tabular file that contains the effective (nucleotides used for counting reads) length of the feature; might be useful for estimating FPKM/RPKM" /> | |
133 | |
134 | |
135 <section name="pe_parameters" title="Options for paired-end reads"> | |
136 <conditional name="fragment_counting_enabled"> | |
137 | |
138 <param name="fragment_counting" | |
139 type="select" | |
140 argument="-p" | |
141 checked="true" | |
142 label="Count fragments instead of reads" | |
143 help="If specified, fragments (or templates) will be counted instead of reads."> | |
144 <option value="" selected="true">Disabled; all reads/mates will be counted individually</option> | |
145 <option value=" -p">Enabled; fragments (or templates) will be counted instead of reads</option> | |
146 </param> | |
147 | |
148 <when value=" -p"> | |
149 <conditional name="check_distance_enabled"> | |
150 <param name="check_distance" | |
151 type="boolean" | |
152 truevalue=" -P" | |
153 falsevalue="" | |
154 argument="-P" | |
155 label="Check paired-end distance" | |
156 help="If specified, paired-end distance will be checked when assigning fragments to meta-features or features. This option is only applicable when -p (Count fragments instead of reads) is specified. The distance thresholds should be specified using -d and -D (minimum and maximum fragment/template length) options." /> | |
157 <when value=" -P"> | |
158 <param name="minimum_fragment_length" | |
159 type="integer" | |
160 value="50" | |
161 argument="-d" | |
162 label="Minimum fragment/template length." /> | |
163 <param name="maximum_fragment_length" | |
164 type="integer" | |
165 value="600" | |
166 argument="-D" | |
167 label="Maximum fragment/template length." /> | |
168 </when> | |
169 <when value="" /> | |
170 </conditional> | |
171 </when> | |
172 <when value="" /> | |
173 </conditional> | |
174 | |
175 <param name="only_both_ends" | |
176 type="boolean" | |
177 truevalue=" -B" | |
178 falsevalue="" | |
179 argument="-B" | |
180 label="Only allow fragments with both reads aligned" | |
181 help="If specified, only fragments that have both ends successfully aligned will be considered for summarization. This option is only applicable for paired-end reads." /> | |
182 | |
183 <param name="orientation" | |
184 type="select" | |
185 label="Orientation of the two read from the same pair" | |
186 argument="-S" | |
187 help="Default is 'fr'"> | |
188 <option value="fr" selected="true">Forward, Reverse (fr)</option> | |
189 <option value="ff">Forward, Forward (ff)</option> | |
190 <option value="rf">Reverse, Forward (rf)</option> | |
191 </param> | |
192 | |
193 <param name="exclude_chimerics" | |
194 type="boolean" | |
195 truevalue=" -C" | |
196 falsevalue="" | |
197 argument="-C" | |
198 checked="true" | |
199 label="Exclude chimeric fragments" | |
200 help="If specified, the chimeric fragments (those fragments that have their two ends aligned to different chromosomes) will NOT be included for summarization. This option is only applicable for paired-end read data." /> | |
201 </section> | |
202 | |
203 <section name="extended_parameters" title="Advanced options"> | |
204 <param name="gff_feature_type" | |
205 type="text" | |
206 value="exon" | |
207 argument="-t" | |
208 label="GFF feature type filter" | |
209 help="Specify the feature type. Only rows which have the matched matched feature type in the provided GTF annotation file will be included for read counting. `exon' by default." /> | |
210 | |
211 <param name="gff_feature_attribute" | |
212 type="text" | |
213 value="gene_id" | |
214 argument="-g" | |
215 label="GFF gene identifier" | |
216 help="Specify the attribute type used to group features (eg. exons) into meta-features (eg. genes), when GTF annotation is provided. `gene_id' by default. This attribute type is usually the gene identifier. This argument is useful for the meta-feature level summarization." /> | |
217 | |
218 <param name="summarization_level" | |
219 type="boolean" | |
220 truevalue=" -f" | |
221 falsevalue="" | |
222 argument="-f" | |
223 label="On feature level" | |
224 help="If specified, read summarization will be performed at the feature level. By default (-f is not specified), the read summarization is performed at the meta-feature level." /> | |
225 | |
226 <param name ="contribute_to_multiple_features" | |
227 type="boolean" | |
228 truevalue=" -O" | |
229 falsevalue="" | |
230 argument="-O" | |
231 label="Allow read to contribute to multiple features" | |
232 help="If specified, reads (or fragments if -p is specified) will be allowed to be assigned to more than one matched meta- feature (or matched feature if -f is specified)" /> | |
233 | |
234 <param name="strand_specificity" | |
235 type="select" | |
236 label="Strand specificity of the protocol" | |
237 argument="-s" | |
238 help="Indicate if strand-specific read counting should be performed."> | |
239 <option value="0" selected="true">Unstranded</option> | |
240 <option value="1">Stranded (forwards)</option> | |
241 <option value="2">Stranded (reverse)</option> | |
242 </param> | |
243 | |
244 <conditional name="multimapping_enabled"> | |
245 <param name="multimapping_counts" | |
246 type="select" | |
247 argument="-M" | |
248 label="Count multi-mapping reads/fragments" | |
249 help="If specified, multi-mapping reads/fragments will be counted (ie. a multi-mapping read will be counted up to N times if it has N reported mapping locations). The program uses the `NH' tag to find multi-mapping reads."> | |
250 <option value="" selected="true">Disabled; multi-mapping reads are excluded (default)</option> | |
251 <option value=" -M">Enabled; multi-mapping reads are included</option> | |
252 </param> | |
253 <when value=" -M"> | |
254 <param name="fraction" | |
255 type="boolean" | |
256 truevalue="--fraction" | |
257 falsevalue="" | |
258 argument="--fraction" | |
259 label="Assign fractions to multimapping reads" | |
260 help="If specified, a fractional count 1/n will be generated for each multi-mapping read, where n is the number of alignments (indica- ted by 'NH' tag) reported for the read. This option must be used together with the '-M' option." /> | |
261 </when> | |
262 <when value="" /> | |
263 </conditional> | |
264 | |
265 <param name="mapping_quality" | |
266 type="integer" | |
267 value="12" | |
268 argument="-Q" | |
269 label="Minimum mapping quality per read" | |
270 help="The minimum mapping quality score a read must satisfy in order to be counted. For paired-end reads, at least one end should satisfy this criteria. 12 by default." /> | |
271 | |
272 <param name="largest_overlap" | |
273 type="boolean" | |
274 truevalue=" --largestOverlap" | |
275 falsevalue="" | |
276 argument="--largestOverlap" | |
277 label="Largest overlap" | |
278 help="If specified, reads (or fragments) will be assigned to the target that has the largest number of overlapping bases" /> | |
279 | |
280 <param name="min_overlap" | |
281 type="integer" | |
282 value="1" | |
283 argument="--minOverlap" | |
284 label="Minimum overlap" | |
285 help="Specify the minimum required number of overlapping bases between a read (or a fragment) and a feature. 1 by default. If a negative value is provided, the read will be extended from both ends." /> | |
286 | |
287 <param name="read_extension_5p" | |
288 type="integer" | |
289 value="0" | |
290 argument="--readExtension5" | |
291 label="Read 5' extension" | |
292 help="Reads are extended upstream by ... bases from their 5' end" /> | |
293 | |
294 <param name="read_extension_3p" | |
295 type="integer" | |
296 value="0" | |
297 argument="--readExtension3" | |
298 label="Read 3' extension" | |
299 help="Reads are extended upstream by ... bases from their 3' end" /> | |
300 | |
301 <param name="read_reduction" | |
302 type="select" | |
303 label="Reduce read to single position" | |
304 argument="--read2pos" | |
305 help="The read is reduced to its 5' most base or 3'most base. Read summarization is then performed based on thesingle base which the read is reduced to."> | |
306 <option value="" selected="true">Leave the read as it is</option> | |
307 <option value="--read2pos 5">Reduce it to the 5' end</option> | |
308 <option value="--read2pos 3">Reduce it to the 3' end</option> | |
309 </param> | |
310 | |
311 <param name="primary" | |
312 type="boolean" | |
313 truevalue=" --primary" | |
314 falsevalue="" | |
315 argument="--primary" | |
316 label="Only count primary alignments" | |
317 help="If specified, only primary alignments will be counted. Primaryand secondary alignments are identified using bit 0x100 in theFlag field of SAM/BAM files. All primary alignments in a datasetwill be counted no matter they are from multi-mapping reads ornot ('-M' is ignored)." /> | |
318 | |
319 <param name="ignore_dup" | |
320 type="boolean" | |
321 truevalue=" --ignoreDup" | |
322 falsevalue="" | |
323 argument="--ignoreDup" | |
324 label="Ignore reads marked as duplicate" | |
325 help="If specified, reads that were marked asduplicates will be ignored. Bit Ox400 in FLAG field of SAM/BAMfile is used for identifying duplicate reads. In paired enddata, the entire read pair will be ignored if at least one endis found to be a duplicate read." /> | |
326 | |
327 <param name="count_split_alignments_only" | |
328 type="boolean" | |
329 truevalue=" --countSplitAlignmentsOnly" | |
330 falsevalue="" | |
331 argument="--countSplitAlignmentsOnly" | |
332 label="Ignore reads marked as duplicate" | |
333 help="If specified, only split alignments (CIGARstrings containing letter `N') will be counted. All the otheralignments will be ignored. An example of split alignments isthe exon-spanning reads in RNA-seq data." /> | |
334 </section> | |
335 </inputs> | |
336 <outputs> | |
337 <data format="tabular" | |
338 name="output_medium" | |
339 label="${tool.name} on ${on_string}"> | |
340 <filter>format == "tabdel_medium"</filter> | |
341 <actions> | |
342 <action name="column_names" type="metadata" default="Geneid,${alignment.name},Length" /> | |
343 </actions> | |
344 </data> | |
345 | |
346 <data format="tabular" | |
347 name="output_short" | |
348 label="${tool.name} on ${on_string}"> | |
349 <filter>format == "tabdel_short"</filter> | |
350 <actions> | |
351 <action name="column_names" type="metadata" default="Geneid,${alignment.name}" /> | |
352 </actions> | |
353 </data> | |
354 | |
355 <data format="tabular" | |
356 name="output_full" | |
357 label="${tool.name} on ${on_string}: count table"> | |
358 <filter>format == "tabdel_full"</filter> | |
359 <actions> | |
360 <action name="column_names" type="metadata" default="Geneid,Chr,Start,End,Strand,Length,${alignment.name}" /> | |
361 </actions> | |
362 </data> | |
363 | |
364 <data format="tabular" | |
365 name="output_summary" | |
366 hidden="true" | |
367 label="${tool.name} on ${on_string}: summary"> | |
368 <actions> | |
369 <action name="column_names" type="metadata" default="Status,${alignment.name}" /> | |
370 </actions> | |
371 </data> | |
372 | |
373 <data format="tabular" | |
374 name="output_feature_lengths" | |
375 label="${tool.name} on ${on_string}: feature lengths"> | |
376 <filter>include_feature_length_file</filter> | |
377 <actions> | |
378 <action name="column_names" type="metadata" default="Feature,Length" /> | |
379 </actions> | |
380 </data> | |
381 </outputs> | |
382 <tests> | |
383 <test> | |
384 <param name="alignment" value="featureCounts_input1.bam" ftype="bam" /> | |
385 <param name="reference_gene_sets" value="featureCounts_guide.gff" ftype="gff" /> | |
386 <param name="format" value="tabdel_short" /> | |
387 <param name="include_feature_length_file" value="true"/> | |
388 <param name="ref_source" value="history" /> | |
389 <output name="output" file="output_1_short.tab"/> | |
390 <output name="output_summary" file="output_1_summary.tab"/> | |
391 </test> | |
392 <test> | |
393 <param name="alignment" value="featureCounts_input1.bam" ftype="bam" /> | |
394 <param name="reference_gene_sets" value="featureCounts_guide.gff" ftype="gff" /> | |
395 <param name="format" value="tabdel_medium" /> | |
396 <param name="include_feature_length_file" value="true"/> | |
397 <param name="ref_source" value="history" /> | |
398 <output name="output" file="output_1_medium.tab"/> | |
399 <output name="output_summary" file="output_1_summary.tab"/> | |
400 </test> | |
401 <test> | |
402 <param name="alignment" value="featureCounts_input1.bam" ftype="bam" /> | |
403 <param name="reference_gene_sets" value="featureCounts_guide.gff" ftype="gff" /> | |
404 <param name="format" value="tabdel_full" /> | |
405 <param name="include_feature_length_file" value="true"/> | |
406 <param name="ref_source" value="history" /> | |
407 <output name="output" file="output_1_full.tab"/> | |
408 <output name="output_summary" file="output_1_summary.tab"/> | |
409 <output name="output_feature_lengths" file="output_feature_lengths.tab"/> | |
410 </test> | |
411 | |
412 <test> | |
413 <param name="alignment" value="featureCounts_input1.bam" ftype="bam" /> | |
414 <param name="reference_gene_sets" value="featureCounts_guide.gff" ftype="gff" /> | |
415 <param name="format" value="tabdel_short" /> | |
416 <param name="include_feature_length_file" value="true"/> | |
417 <param name="ref_source" value="history" /> | |
418 <output name="output" file="output_2_short.tab"/> | |
419 <output name="output_summary" file="output_2_summary.tab"/> | |
420 </test> | |
421 <test> | |
422 <param name="alignment" value="featureCounts_input1.bam" ftype="bam" /> | |
423 <param name="reference_gene_sets" value="featureCounts_guide.gff" ftype="gff" /> | |
424 <param name="format" value="tabdel_medium" /> | |
425 <param name="include_feature_length_file" value="true"/> | |
426 <param name="ref_source" value="history" /> | |
427 <output name="output" file="output_2_medium.tab"/> | |
428 <output name="output_summary" file="output_2_summary.tab"/> | |
429 </test> | |
430 <test> | |
431 <param name="alignment" value="featureCounts_input1.bam" ftype="bam" /> | |
432 <param name="reference_gene_sets" value="featureCounts_guide.gff" ftype="gff" /> | |
433 <param name="format" value="tabdel_full" /> | |
434 <param name="include_feature_length_file" value="true"/> | |
435 <param name="ref_source" value="history" /> | |
436 <output name="output" file="output_2_full.tab"/> | |
437 <output name="output_summary" file="output_2_summary.tab"/> | |
438 <output name="output_feature_lengths" file="output_feature_lengths.tab"/> | |
439 </test> | |
440 </tests> | |
441 | |
442 <help><![CDATA[ | |
443 featureCounts | |
444 ############# | |
445 | |
446 Overview | |
447 -------- | |
448 FeatureCounts is a light-weight read counting program written entirely in the C programming language. It can be used to count both gDNA-seq and RNA-seq reads for genomic features in in SAM/BAM files. | |
449 | |
450 Input formats | |
451 ------------- | |
452 Alignments should be provided in either: | |
453 | |
454 - SAM format, http://samtools.sourceforge.net/samtools.shtml#5 | |
455 - BAM format | |
456 | |
457 Gene regions should be provided in the GFF/GTF format: | |
458 | |
459 - http://genome.ucsc.edu/FAQ/FAQformat.html#format3 | |
460 - http://www.ensembl.org/info/website/upload/gff.html | |
461 | |
462 Output format | |
463 ------------- | |
464 FeatureCounts produces a table containing counted reads, per gene, per row. Optionally the last column can be set to be the effective gene-length. These tables are compatible with the DESeq2 Galaxy wrapper by IUC. Column names are added as metadata object. | |
465 ]]></help> | |
466 <citations> | |
467 <citation type="doi">10.1093/bioinformatics/btt656</citation> | |
468 </citations> | |
469 </tool> |