comparison agat.xml @ 0:cffa21bb7a92 draft

planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/agat commit 0851e9e6d46223a8233c56f3b0bcf14e19d63916
author bgruening
date Tue, 23 May 2023 13:42:43 +0000
parents
children cc46560b4992
comparison
equal deleted inserted replaced
-1:000000000000 0:cffa21bb7a92
1 <tool id="agat" name="AGAT" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@">
2 <description>GTF/GFF analysis toolkit</description>
3 <macros>
4 <import>macros.xml</import>
5 </macros>
6 <expand macro="biotools"/>
7 <expand macro="requirements" />
8 <version_command>agat_sq_stat_basic.pl --version</version_command>
9 <command detect_errors="exit_code"><![CDATA[
10 #if $tool.selector == 'fix'
11 @input_annotation_single@
12 agat_convert_sp_gxf2gxf.pl -gff $input_annotation --output 'output.gff' &&
13 cat 'output.gff' > '${annotation_gff}'
14 #else if $tool.selector == 'convert_GFF2GTF'
15 @input_annotation_single@
16 agat_convert_sp_gff2gtf.pl --gff $input_annotation --gtf_version $tool.gtf_version --output 'output.gtf' &&
17 cat 'output.gtf' > '${annotation_gtf}'
18 #else if $tool.selector == 'convert_GTF2GFF'
19 @input_annotation_single@
20 agat_convert_sp_gxf2gxf.pl --gff $input_annotation --output 'output.gff' &&
21 cat 'output.gff' > '${annotation_gff}'
22 #else if $tool.selector == 'compare'
23 @input_annotation_double@
24 agat_sp_compare_two_annotations.pl --gff1 $input1 --gff2 $input2 --output 'temp_output' &&
25 cat 'temp_output' > '${stats_output}'
26 #else if $tool.selector == 'extract'
27 @input_annotation_single@
28 @input_reference@
29 agat_sp_extract_sequences.pl
30 --gff $input_annotation
31 -f $ref_genome
32 $tool.mrna
33 $tool.cdna
34 $tool.clean_final_stop
35 $tool.clean_internal_stop
36 #if $tool.downstream
37 --downstream $tool.downstream
38 #end if
39 $tool.full
40 $tool.keep_attributes
41 $tool.keep_parent_attributes
42 $tool.merge
43 $tool.plus_strand_only
44 $tool.protein
45 $tool.remove_orf_offset
46 $tool.revcomp
47 $tool.split
48 #if $tool.type
49 --type $tool.type
50 #end if
51 #if $tool.upstream
52 --upstream $tool.upstream
53 #end if
54 --output '${sequence_output}'
55 #else if $tool.selector == 'functional_analysis'
56 @input_annotation_single@
57 @input_reference@
58 mkdir -p './statistics' &&
59 agat_sp_statistics.pl
60 --gff $input_annotation
61 --gs $ref_genome
62 --output 'temp_output' &&
63 cat 'temp_output' > '$stats_output'
64 #else if $tool.selector == 'merge_annotations'
65 @input_annotation_double@
66 agat_sp_merge_annotations.pl -gff $input1 --gff $input2 --output 'temp_output' &&
67 cat 'temp_output' > '${annotation_gff}'
68 #else if $tool.selector == 'annotation_statistics'
69 @input_annotation_single@
70 @input_reference@
71 agat_sp_statistics.pl --gff $input_annotation --gs $ref_genome -d --output 'temp_output' &&
72 cat 'temp_output' > '$stats_output'
73 #else if $tool.selector == 'filter_feature_fasta'
74 @input_annotation_single@
75 @input_reference@
76 agat_sq_filter_feature_from_fasta.pl --gff $input_annotation --fasta $ref_genome --output 'temp_output' &&
77 cat 'temp_output' > '${features_filtered}'
78 #else if $tool.selector == 'complement'
79 @input_annotation_double@
80 agat_sp_complement_annotations.pl --ref $input1 --add $input2 --size_min $tool.size_min --output 'temp_output' &&
81 cat 'temp_output' > '${annotation_gff}'
82 #end if
83 ]]>
84 </command>
85 <inputs>
86 <conditional name="tool">
87 <param name="selector" type="select" label="AGAT tool selector" help="As AGAT is a toolkit, it contains a lot of tools. If any of them is missing, please contact the server admin.">
88 <option value="annotation_statistics">Annotation statistics (agat_sp_statistics.pl)</option>
89 <option value="compare">Compare annotation files (agat_sp_compare_two_annotations.pl)</option>
90 <option value="complement">Complement annotation file (agat_sp_complement_annotations.pl)</option>
91 <option value="extract">Extract sequences (agat_sp_extract_sequences.pl)</option>
92 <option value="convert_GFF2GTF">GFF to GTF format conversion (agat_convert_sp_gff2gtf.pl)</option>
93 <option value="convert_GTF2GFF">GTF to GFF3 format conversion (agat_convert_sp_gxf2gxf.pl)</option>
94 <option value="filter_feature_fasta">Filter annotation by sequence name (agat_sq_filter_feature_from_fasta.pl)</option>
95 <option value="fix">Fix and/or standarize GFF3 annotation file (agat_convert_sp_gxf2gxf.pl)</option>
96 <option value="functional_analysis">Functional analysis (agat_sp_functional_statistics.pl)</option>
97 <option value="merge_annotations">Merge annotations (agat_sp_merge_annotations.pl)</option>
98 </param>
99 <when value="annotation_statistics">
100 <expand macro="ANNOTATION_INPUT"/>
101 <expand macro="REFERENCE_FASTA"/>
102 </when>
103 <when value="compare">
104 <param argument="--gff1" name="input_annotation1" type="data" format="gff,gtf,gff3,gff3.gz" label="Annotation file 1" help="Input GTF/GFF file" />
105 <param argument="--gff2" name="input_annotation2" type="data" format="gff,gtf,gff3,gff3.gz" label="Annotation file 2" help="Input GTF/GFF file" />
106 </when>
107 <when value="extract">
108 <expand macro="ANNOTATION_INPUT"/>
109 <expand macro="REFERENCE_FASTA"/>
110 <param name="type" type="select" label="Type of feature to extract" optional="true" help="Define the feature you want to extract the sequence from.">
111 <option value="gene">Gene</option>
112 <option value="transcript">Transcript</option>
113 <option value="exon">Exon</option>
114 <option value="cds">CDS</option>
115 <option value="trna">tRNA</option>
116 <option value="three_prime_utr">3' UTR</option>
117 <option value="five_prime_utr">5' UTR</option>
118 </param>
119 <param argument="--mrna" type="boolean" truevalue="--mrna" falsevalue="" checked="false" label="Extract mRNA sequences" help=" This extract the mrna
120 sequence (i.e transcribed sequence (devoid of introns, but containing untranslated exons))." />
121 <param argument="--cdna" type="boolean" truevalue="--cdna" falsevalue="" checked="false" label="Extract the cDNA sequence"
122 help=" This extract the cdna sequence (i.e reverse complement of the mRNA: transcribed sequence (devoid of introns, but
123 containing untranslated exons, then reverse complemented)." />
124 <param argument="--clean_final_stop" type="boolean" truevalue="--clean_final_stop" falsevalue="" checked="false" label="Clean final stop codons"
125 help=" This option allows removing the translation of the final stop codons that is represented by the '*' character. This character can be
126 disturbing for many programs (e.g interproscan)" />
127 <param argument="--clean_internal_stop" type="boolean" truevalue="--clean_internal_stop" falsevalue="" checked="false" label="Clean internal
128 stop codons" help="The Clean Internal Stop option allows replacing the translation of the stop codons present among the sequence that is
129 represented by the '*' character by . This character can be disturbing for many programs (e.g interproscan)" />
130 <param argument="--upstream" type="integer" min="0" value="" optional="true" label="Upstream nucleotides" help="It will take that number of nucleotide in more at the 5' extremity." />
131 <param argument="--downstream" type="integer" min="0" value="" optional="true" label="Downstream nucleotides" help="It will take that number of downstream nucleotides." />
132 <param argument="--full" type="boolean" truevalue="--full" falsevalue="" checked="false" label="Full" help="This option allows dealing
133 with feature that may span over several locations like CDS or exon, in order to extract the full sequence from the start extremity
134 of the first chunck to the end extremity of the last chunk. The use of that option with '--type exon' will extract the pre-mRNA
135 sequence (i.e with introns). Use of that option on CDS will give the pre-mRNA without the untraslated regions (UTRs). " />
136 <param argument="--keep_attributes" type="boolean" truevalue="--keep_attributes" falsevalue="" checked="false" label="Keep attributes"
137 help="The value of the attribute tags will be extracted from the feature type specified by the option --type and stored in the FASTA header." />
138 <param argument="--keep_parent_attributes" type="boolean" truevalue="--keep_parent_attributes" falsevalue="" checked="false" label="Keep parental attributes"
139 help="Keep parental attributes" />
140 <param argument="--merge" type="boolean" truevalue="--merge" falsevalue="" checked="false" label="Merge" help="By default, only features that span
141 several locations (e.g. CDS and utr can span over several exons) are merged together. In order to merge other type of features (e.g. exon) you
142 must activate this parameter." />
143 <param argument="--plus_strand_only" type="boolean" truevalue="--plus_strand_only" falsevalue="" checked="false" label="Plus strand only" help="By default
144 the extrated feature sequences from a minus strand is reverse complemented. Activating this option you will always get sequence from plus strand (not reverse complemented). " />
145 <param argument="--protein" type="boolean" truevalue="--protein" falsevalue="" checked="false" label="Protein" help="It will extract the sequence in amino acids." />
146 <param argument="--remove_orf_offset" type="boolean" truevalue="--remove_orf_offset" falsevalue="" checked="false" label="Remove ORF offset" help=" CDS can start with a phase different
147 from 0 when a gene model is fragmented. When asking for protein translation this (start) offset is trimmed out automatically. But when you extract CDS dna sequences, this (start)
148 offset is not removed by default. To remove it activate this option. If --upstream or --downstream option are used too, the (start) offset is trimmed first, then is added the piece
149 of sequence asked for." />
150 <param argument="--revcomp" type="boolean" truevalue="--revcomp" falsevalue="" checked="false" label="Reverse complement the extracted sequence" help="By default the extrated feature
151 sequences from a minus strand is reverse complemented. Consequently, for minus strand features that option will extract the sequences from plus strand from left to right." />
152 <param argument="--split" type="boolean" truevalue="--split" falsevalue="" checked="false" label="Split" help="By default, all features that span several locations (e.g. CDs and UTR can
153 span over several exons) are merge together to shape the biological feature (e.g. several CDS chuncks are merged to create the CDS in its whole). If you wish to extract all the chuncks
154 independently activate this option." />
155 </when>
156 <when value="convert_GFF2GTF">
157 <expand macro="ANNOTATION_INPUT" format="gff,gff3,gff3.gz"/>
158 <param argument="--gtf_version" type="select" label="GTF version">
159 <option value="3">GTF v3 - 9 feature types accepted: gene, transcript, exon, CDS, Selenocysteine, start_codon, stop_codon, three_prime_utr and five_prime_utr</option>
160 <option value="2.5">GTF v2.5 - 8 feature types accepted: gene, transcript, exon, CDS, UTR, start_codon, stop_codon and Selenocysteine</option>
161 <option value="2.2">GTF v2.2 - 9 feature types accepted: CDS, start_codon, stop_codon, 5UTR, 3UTR, inter, inter_CNS, intron_CNS and exon</option>
162 <option value="2.1">GTF v2.1 - 6 feature types accepted: CDS, start_codon, stop_codon, exon, 5UTR and 3UTR</option>
163 <option value="2">GTF v2 - 4 feature types accepted: CDS, start_codon, stop_codon and exon</option>
164 <option value="1">GTF v1 - 5 feature types accepted: CDS, start_codon, stop_codon, exon and intron</option>
165 <option value="relax">Relax: all feature types are accepted.</option>
166 </param>
167 </when>
168 <when value="convert_GTF2GFF">
169 <expand macro="ANNOTATION_INPUT" format="gtf"/>
170 </when>
171 <when value="filter_feature_fasta">
172 <expand macro="ANNOTATION_INPUT" />
173 <expand macro="REFERENCE_FASTA"/>
174 </when>
175 <when value="fix">
176 <expand macro="ANNOTATION_INPUT" format="gff,gff3,gff3.gz"/>
177 </when>
178 <when value="functional_analysis">
179 <expand macro="ANNOTATION_INPUT" format="gff,gtf,gff3,gff3.gz"/>
180 <expand macro="REFERENCE_FASTA"/>
181 </when>
182 <when value="merge_annotations">
183 <param argument="--gff1" name="input_annotation1" type="data" format="gff,gtf,gff3,gff3.gz" label="Annotation file 1" help="Input GTF/GFF file" />
184 <param argument="--gff2" name="input_annotation2" type="data" format="gff,gtf,gff3,gff3.gz" label="Annotation file 2" help="Input GTF/GFF file" />
185 </when>
186 <when value="complement">
187 <param argument="--ref" name="input_annotation1" type="data" format="gff,gtf,gff3,gff3.gz" label="Reference annotaiton" help="Reference GTF/GFF file" />
188 <param argument="--add" name="input_annotation2" type="data" format="gff,gtf,gff3,gff3.gz" label="Annotation to complement" help="Annotation file you would like to use to complement the reference annotation." />
189 <param argument="--size_min" type="integer" min="0" value="0" label="Minimun CDS size" help="Option to keep the non-overlping gene only if the CDS size (in nucleotide) is over the minimum
190 size defined. Default = 0 that means all of them are kept." />
191 </when>
192 </conditional>
193 </inputs>
194 <outputs>
195 <data name="annotation_gff" format="gff" label="${tool.name} on ${on_string}: annotation file (GFF)">
196 <filter>tool['selector'] not in ['annotation_statistics','extract','functional_analysis','compare','convert_GFF2GTF','filter_feature_fasta']</filter>
197 </data>
198 <data name="annotation_gtf" format="gtf" label="${tool.name} on ${on_string}: annotation file (GTF)">
199 <filter>tool['selector'] == 'convert_GFF2GTF'</filter>
200 </data>
201 <data name="features_filtered" format="tabular" label="${tool.name} on ${on_string}: filtered results">
202 <filter>tool['selector'] == 'filter_feature_fasta'</filter>
203 </data>
204 <data name="sequence_output" format="fasta" label="${tool.name} on ${on_string}: FASTA file">
205 <filter>tool['selector'] =='extract'</filter>
206 </data>
207 <data name="stats_output" format="txt" label="${tool.name} on ${on_string}: stats file">
208 <filter>tool['selector'] in ['annotation_statistics','compare','functional_analysis']</filter>
209 </data>
210 <collection name="distribution_plots_wiso" type="list" label="${tool.name} on ${on_string}: distribution plots (with isoforms)">
211 <discover_datasets pattern="__designation_and_ext__" directory="temp_output_distribution_plots/with_isoforms" format="pdf"/>
212 <filter>tool['selector'] == 'annotation_statistics'</filter>
213 </collection>
214 <collection name="distribution_plots_woiso" type="list" label="${tool.name} on ${on_string}: distribution plots (without isoforms)">
215 <discover_datasets pattern="__designation_and_ext__" directory="temp_output_distribution_plots/without_isoforms" format="pdf"/>
216 <filter>tool['selector'] == 'annotation_statistics'</filter>
217 </collection>
218 </outputs>
219 <tests>
220 <!-- Test 01: annotation statistics-->
221 <test expect_num_outputs="3">
222 <conditional name="tool">
223 <param name="selector" value="annotation_statistics"/>
224 <param name="gff" value="annotation.gtf" ftype="gtf"/>
225 <conditional name="reference_genome">
226 <param name="source" value="history"/>
227 <param name="history_item" value="genome.fasta.gz"/>
228 </conditional>
229 </conditional>
230 <output name="stats_output" file="test01_stats.txt" ftype="txt"/>
231 <output_collection name="distribution_plots_woiso" type="list" count="4">
232 <element name="transcriptClass_cds" file="test01_plot2.pdf" ftype="pdf" compare="sim_size" delta="100"/>
233 </output_collection>
234 <output_collection name="distribution_plots_wiso" type="list" count="4">
235 <element name="transcriptClass_cds" file="test01_plot1.pdf" ftype="pdf" compare="sim_size" delta="100"/>
236 </output_collection>
237 </test>
238 <!-- Test 02: extract sequences -->
239 <test expect_num_outputs="1">
240 <conditional name="tool">
241 <param name="selector" value="extract"/>
242 <param name="gff" value="annotation_small.gtf"/>
243 <conditional name="reference_genome">
244 <param name="source" value="history"/>
245 <param name="history_item" value="genome.fasta.gz"/>
246 </conditional>
247 <param name="type" value="gene"/>
248 <param name="upstream" value="10"/>
249 <param name="downstream" value="20"/>
250 </conditional>
251 <output name="sequence_output" file="test02.fasta" ftype="fasta"/>
252 </test>
253 <!-- Test 03: compare annotations -->
254 <test expect_num_outputs="1">
255 <conditional name="tool">
256 <param name="selector" value="compare"/>
257 <param name="input_annotation1" value="annotation.gtf"/>
258 <param name="input_annotation2" value="annotation_small.gtf"/>
259 </conditional>
260 <output name="stats_output" file="test03.txt" ftype="txt" lines_diff="2"/>
261 </test>
262 <!-- Test 04: comlement annotation -->
263 <test expect_num_outputs="1">
264 <conditional name="tool">
265 <param name="selector" value="complement"/>
266 <param name="input_annotation1" value="annotation_small.gtf" ftype="gtf"/>
267 <param name="input_annotation2" value="annotation_unique.gtf" ftype="gtf"/>
268 <param name="size_min" value="10"/>
269 </conditional>
270 <output name="annotation_gff" file="test04.gff" ftype="gff"/>
271 </test>
272 <!-- Test 05: Convert GFF2GTF -->
273 <test expect_num_outputs="1">
274 <conditional name="tool">
275 <param name="selector" value="convert_GFF2GTF"/>
276 <param name="gff" value="test04.gff" ftype="gff"/>
277 <param name="gtf_version" value="2"/>
278 </conditional>
279 <output name="annotation_gtf" file="test05.gtf" ftype="gtf"/>
280 </test>
281 <!-- Test 06: Convert GTF2GFF -->
282 <test expect_num_outputs="1">
283 <conditional name="tool">
284 <param name="selector" value="convert_GTF2GFF"/>
285 <param name="gff" value="annotation_small.gtf" ftype="gtf"/>
286 </conditional>
287 <output name="annotation_gff" file="test06.gff" ftype="gff"/>
288 </test>
289 <!-- Test 07: Filter feature FASTA -->
290 <test expect_num_outputs="1">
291 <conditional name="tool">
292 <param name="selector" value="filter_feature_fasta"/>
293 <param name="gff" value="annotation_small.gtf" ftype="gtf"/>
294 <conditional name="reference_genome">
295 <param name="source" value="history"/>
296 <param name="history_item" value="genome.fasta.gz"/>
297 </conditional>
298 </conditional>
299 <output name="features_filtered" file="test07.tabular" ftype="tabular"/>
300 </test>
301 <!-- Test 08: Fix annotation file -->
302 <test expect_num_outputs="1">
303 <conditional name="tool">
304 <param name="selector" value="fix"/>
305 <param name="gff" value="annotation_broken.gff" ftype="gff"/>
306 </conditional>
307 <output name="annotation_gff" file="annotation_fixed.gff" ftype="gff"/>
308 <assert_stdout>
309 <has_text text="2 exons created that were missing" />
310 </assert_stdout>
311 </test>
312 <!-- Test 09: Functional analysis -->
313 <test expect_num_outputs="1">
314 <conditional name="tool">
315 <param name="selector" value="functional_analysis"/>
316 <param name="gff" value="annotation_small.gtf"/>
317 <conditional name="reference_genome">
318 <param name="source" value="history"/>
319 <param name="history_item" value="genome.fasta.gz"/>
320 </conditional>
321 </conditional>
322 <output name="stats_output" file="test09.txt" ftype="txt"/>
323 </test>
324 <!-- Test 10: Merge annotations -->
325 <test expect_num_outputs="1">
326 <conditional name="tool">
327 <param name="selector" value="merge_annotations"/>
328 <param name="input_annotation1" value="annotation_small.gtf"/>
329 <param name="input_annotation2" value="annotation_unique.gtf"/>
330 </conditional>
331 <output name="annotation_gff" file="test10.gff" ftype="gff"/>
332 </test>
333 <!-- Test 11: Test compressed files -->
334 <test expect_num_outputs="1">
335 <conditional name="tool">
336 <param name="selector" value="fix"/>
337 <param name="gff" value="annotation_broken.gff.gz" ftype="gff"/>
338 </conditional>
339 <output name="annotation_gff" file="annotation_fixed.gff" ftype="gff"/>
340 <assert_stdout>
341 <has_text text="2 exons created that were missing" />
342 </assert_stdout>
343 </test>
344 <!-- Test 12:test indexed references -->
345 <test expect_num_outputs="1">
346 <conditional name="tool">
347 <param name="selector" value="extract"/>
348 <param name="gff" value="phix174.gff"/>
349 <conditional name="reference_genome">
350 <param name="source" value="indexed"/>
351 <param name="index" value="phix174"/>
352 </conditional>
353 <param name="type" value="gene"/>
354 </conditional>
355 <assert_stdout>
356 <has_text text="Job done" />
357 </assert_stdout>
358 </test>
359 </tests>
360 <help><![CDATA[
361
362 .. class:: infomark
363
364 **Purpose**
365
366 AGAT a GFF/GTF toolkit allowing you to perform almost everything you might want to achieve ^^
367
368 AGAT has the power to check, fix, pad missing information (features/attributes) of any kind of GTF and GFF to create complete, sorted and standardised gff3 format.
369 Over the years it has been enriched by many many tools to perform just about any tasks that is possible related to GTF/GFF format files (sanitizing, conversions,
370 merging, modifying, filtering, FASTA sequence extraction, adding information, etc). Comparing to other methods AGAT is robust to even the most despicable GTF/GFF files.
371
372 ]]></help>
373 <expand macro="citations"/>
374 </tool>