comparison gffread.xml @ 7:9c298cab341d draft default tip

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gffread commit f40643d8b80299ebb84faebe92579321ac459746"
author iuc
date Sat, 25 Sep 2021 15:38:01 +0000
parents bba49324f2fa
children
comparison
equal deleted inserted replaced
6:bba49324f2fa 7:9c298cab341d
1 <tool id="gffread" name="gffread" version="@VERSION@.0"> 1 <tool id="gffread" name="gffread" version="@GALAXY_TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="20.05">
2 <description>Filters and/or converts GFF3/GTF2 records</description> 2 <description>Filters and/or converts GFF3/GTF2 records</description>
3 <xrefs> 3 <xrefs>
4 <xref type="bio.tools">gffread</xref> 4 <xref type="bio.tools">gffread</xref>
5 </xrefs> 5 </xrefs>
6 <macros> 6 <macros>
7 <token name="@VERSION@">0.11.6</token> 7 <!-- the version of this tool must not be lowered since in the past 2.x was used
8 lets use small increments and hope that gffread catches up one day -->
9 <token name="@GALAXY_TOOL_VERSION@">2.2.1.3</token>
10 <token name="@TOOL_VERSION@">0.12.7</token>
11 <token name="@VERSION_SUFFIX@">0</token>
8 <xml name="fasta_output_select"> 12 <xml name="fasta_output_select">
9 <param name="fa_outputs" type="select" display="checkboxes" multiple="true" label="Select fasta outputs"> 13 <param name="fa_outputs" type="select" display="checkboxes" multiple="true" label="Select fasta outputs">
10 <option value="-w exons.fa">fasta file with spliced exons for each GFF transcript (-w exons.fa)</option> 14 <option value="-w exons.fa">fasta file with spliced exons for each GFF transcript (-w)</option>
11 <option value="-x cds.fa">fasta file with spliced CDS for each GFF transcript (-x cds.fa)</option> 15 <option value="-x cds.fa">fasta file with spliced CDS for each GFF transcript (-x)</option>
12 <option value="-y pep.fa">protein fasta file with the translation of CDS for each record (-y pep.fa)</option> 16 <option value="-y pep.fa">protein fasta file with the translation of CDS for each record (-y)</option>
13 <option value="-W">for each fasta: record the exon coordinates projected onto the spliced sequence (-W)</option> 17 <option value="-W">for each fasta: record the exon coordinates projected onto the spliced sequence (-W)</option>
18 <option value="-S">for protein fasta: use '*' instead of '.' as stop codon translation (-S)</option>
14 </param> 19 </param>
15 </xml> 20 </xml>
16 <xml name="ref_filtering_select"> 21 <xml name="ref_filtering_select">
17 <param name="ref_filtering" type="select" display="checkboxes" multiple="true" label="reference based filters"> 22 <param name="ref_filtering" type="select" display="checkboxes" multiple="true" label="reference based filters">
18 <option value="-N">discard multi-exon mRNAs that have any intron with a non-canonical splice site consensus, i.e. not GT-AG, GC-AG or AT-AC (-N)</option> 23 <option value="-N">discard multi-exon mRNAs that have any intron with a non-canonical splice site consensus, i.e. not GT-AG, GC-AG or AT-AC (-N)</option>
23 <option value="-B">single-exon transcripts are also checked on the opposite strand (-B with -V)</option> 28 <option value="-B">single-exon transcripts are also checked on the opposite strand (-B with -V)</option>
24 --> 29 -->
25 </param> 30 </param>
26 </xml> 31 </xml>
27 <xml name="trackname"> 32 <xml name="trackname">
28 <param name="tname" type="text" value="" optional="true" label="Trackname to use in the second column of each GFF output line" help="(-t track_name}"> 33 <param argument="-t" name="tname" type="text" value="" optional="true" label="Trackname to use in the second column of each GFF output line" help="">
29 <validator type="regex">\w+</validator> 34 <validator type="regex">\w+</validator>
30 </param> 35 </param>
31 </xml> 36 </xml>
32 <xml name="merge_opts"> 37 <xml name="merge_opts">
33 <option value="-K">also collapse shorter, fully contained transcripts with fewer introns than the container (-K)</option> 38 <option value="-K">also collapse shorter, fully contained transcripts with fewer introns than the container (-K)</option>
34 <option value="-Q">remove the containment restriction: multi-exon transcripts will be collapsed if just their introns match, while single-exon transcripts can partially overlap 80% (-Q)</option> 39 <option value="-Q">remove the containment restriction: multi-exon transcripts will be collapsed if just their introns match, while single-exon transcripts can partially overlap 80% (-Q)</option>
35 <option value="-d dupinfo">output collapsing info (-d dupinfo)</option> 40 <option value="-d dupinfo">output collapsing info (-d)</option>
36 </xml> 41 </xml>
37 <xml name="cluster_opts"> 42 <xml name="cluster_opts">
38 <option value="--force-exons"> make sure that the lowest level GFF features are printed as 'exon' features (--force-exons)</option> 43 <option value="--force-exons"> make sure that the lowest level GFF features are printed as 'exon' features (--force-exons)</option>
39 <option value="-Z">merge close exons into a single exon (for intron size &lt; 4) (-Z)</option> 44 <option value="-Z">merge close exons into a single exon (for intron size &lt; 4) (-Z)</option>
40 </xml> 45 </xml>
49 <expand macro="cluster_opts" /> 54 <expand macro="cluster_opts" />
50 </param> 55 </param>
51 </xml> 56 </xml>
52 </macros> 57 </macros>
53 <requirements> 58 <requirements>
54 <requirement type="package" version="@VERSION@">gffread</requirement> 59 <requirement type="package" version="@TOOL_VERSION@">gffread</requirement>
55 </requirements> 60 </requirements>
61 <version_command>gffread --version</version_command>
56 <command detect_errors="aggressive"> 62 <command detect_errors="aggressive">
57 <![CDATA[ 63 <![CDATA[
58 #if $reference_genome.source == 'history': 64 #if $reference_genome.source == 'history':
59 ln -s '$reference_genome.genome_fasta' genomeref.fa && 65 ln -s '$reference_genome.genome_fasta' genomeref.fa &&
60 #end if 66 #end if
67
61 gffread '$input' 68 gffread '$input'
69 #if $input.ext.startswith("bed")
70 --in-bed
71 #end if
62 #if $reference_genome.source == 'cached': 72 #if $reference_genome.source == 'cached':
63 -g '${reference_genome.fasta_indexes.fields.path}' 73 -g '${reference_genome.fasta_indexes.fields.path}'
64 #if $reference_genome.ref_filtering and str($reference_genome.ref_filtering) != '': 74 #if $reference_genome.ref_filtering and str($reference_genome.ref_filtering) != '':
65 #echo ' '.join(str($reference_genome.ref_filtering).split(',')) 75 #echo ' '.join(str($reference_genome.ref_filtering).split(','))
66 #end if 76 #end if
105 #if $reference_genome.fa_outputs and str($reference_genome.fa_outputs) != '': 115 #if $reference_genome.fa_outputs and str($reference_genome.fa_outputs) != '':
106 #echo ' ' + ' '.join(str($reference_genome.fa_outputs).split(',')) 116 #echo ' ' + ' '.join(str($reference_genome.fa_outputs).split(','))
107 #end if 117 #end if
108 #end if 118 #end if
109 #if $gffs.gff_fmt != 'none': 119 #if $gffs.gff_fmt != 'none':
110 #if $gffs.tname: 120 #if $gffs.gff_fmt != 'bed' and $gffs.tname:
111 -t '$gffs.tname' 121 -t '$gffs.tname'
112 #end if 122 #end if
113 #if $gffs.gff_fmt == 'gff': 123 #if $gffs.gff_fmt == 'gff':
124 ## TODO bug 'gft' -> 'gtf'
114 #if $input.datatype.file_ext == 'gft': 125 #if $input.datatype.file_ext == 'gft':
115 $gffs.ensembl 126 $gffs.ensembl
116 #end if 127 #end if
117 $gffs.output_cmd
118 #elif $gffs.gff_fmt == 'gtf':
119 $gffs.output_cmd
120 #end if 128 #end if
121 #end if 129 #if $gffs.gff_fmt == 'gtf'
130 -T
131 #elif $gffs.gff_fmt == 'bed'
132 --bed
133 #end if
134 -o output.$gffs.gff_fmt
135 #end if
136
137 ## Missing options
138 ##
139 ## --ids
140 ## --nids
141 ## -l
142 ## --jmatch
143 ## --nc
144 ## --ignore-locus
145 ## -A -s (see above)
146 ## --sort-alpha : chromosomes (reference sequences) are sorted alphabetically
147 ## --sort-by : sort the reference sequences by the order in which their
148 ## names are given in the <refseq.lst> file
149 ## Misc
150 ## --keep-exon-attrs : for -F option, do not attempt to reduce redundant
151 ## --attrs
152 ## --keep-genes : in transcript-only mode (default), also preserve gene records
153 ## --keep-comments: for GFF3 input/output, try to preserve comments
154 ## -B (see above)
155 ## -P
156 ## --add-hasCDS : add a "hasCDS" attribute with value "true" for transcripts
157 ## that have CDS features
158 ## --adj-stop stop codon adjustment: enables -P and performs automatic
159 ## adjustment of the CDS stop coordinate if premature or downstream
160
161 ## --in-tlf: input GFF-like one-line-per-transcript format without exon/CDS
162 ## features (see --tlf option below); automatic if the input
163 ## filename ends with .tlf)
164 ## --stream: fast processing of input GFF/BED transcripts as they are received
165 ## ((no sorting, exons must be grouped by transcript in the input data)
166
167 ## Clustering
168
169 ## -Y
170
171 ## Output
172
173 ## --gene2exon
174 ## --t-adopt
175 ## -j
176 ## --w-add
177 ## --w-nocds
122 ]]> 178 ]]>
123 </command> 179 </command>
124 <inputs> 180 <inputs>
125 <param name="input" type="data" format="gff3,gtf" label="Input GFF3 or GTF feature file"/> 181 <param name="input" type="data" format="bed,gff3,gtf" label="Input BED, GFF3 or GTF feature file"/>
126 <!-- filtering --> 182 <!-- filtering -->
127 <param name="filtering" type="select" display="checkboxes" multiple="true" label="filters"> 183 <param name="filtering" type="select" display="checkboxes" multiple="true" label="filters">
128 <option value="-U">discard single-exon transcripts (-U)</option> 184 <option value="-U">discard single-exon transcripts (-U)</option>
129 <option value="-C">coding only: discard mRNAs that have no CDS feature (-C)</option> 185 <option value="-C">coding only: discard mRNAs that have no CDS feature (-C)</option>
130 <option value="-G">only parse additional exon attributes from the first exon and move them to the mRNA level (useful for GTF input) (-G)</option> 186 <option value="-G">only parse additional exon attributes from the first exon and move them to the mRNA level (useful for GTF input) (-G)</option>
136 <option value="none">No</option> 192 <option value="none">No</option>
137 <option value="filter">Yes</option> 193 <option value="filter">Yes</option>
138 </param> 194 </param>
139 <when value="none"/> 195 <when value="none"/>
140 <when value="filter"> 196 <when value="filter">
141 <param name="range" type="text" value="" label="Only show transcripts overlapping coordinate range"> 197 <param argument="-r" name="range" type="text" value="" label="Only show transcripts overlapping coordinate range">
142 <help><![CDATA[ 198 <help><![CDATA[
143 (-r [['strand']'chr':]'start'..'end') <br> 199 [['strand']'chr':]'start'..'end' <br>
144 examples: <br> 200 examples: <br>
145 1000..500000 <br> 201 1000..500000 <br>
146 chr1:1000..500000 <br> 202 chr1:1000..500000 <br>
147 +chr1:1000..500000 <br> 203 +chr1:1000..500000 <br>
148 -chr1:1000..500000 204 -chr1:1000..500000
149 ]]> 205 ]]>
150 </help> 206 </help>
151 <validator type="regex">(([+-])?(\w+:))?\d+\.\.\d+</validator> 207 <validator type="regex">(([+-])?(\w+:))?\d+\.\.\d+</validator>
152 </param> 208 </param>
153 <param name="discard_partial" type="boolean" truevalue="-R" falsevalue="" checked="false" 209 <param argument="-R" name="discard_partial" type="boolean" truevalue="-R" falsevalue="" checked="false"
154 label="Discard all transcripts that are not fully contained within the given range" help="(-R)"/> 210 label="Discard all transcripts that are not fully contained within the given range" help=""/>
155 </when> 211 </when>
156 </conditional> 212 </conditional>
157 <param name="maxintron" type="integer" value="" optional="true" min="0" label="Filter out transcipts with large introns" 213 <param argument="-i" name="maxintron" type="integer" value="" optional="true" min="0" label="Filter out transcipts with large introns"
158 help="If set, discard transcripts having an intron larger (-i max_intron)"/> 214 help="If set, discard transcripts having an intron larger"/>
159 <param name="chr_replace" type="data" format="tabular" optional="true" label="Replace reference sequence names" > 215 <param argument="-m" name="chr_replace" type="data" format="tabular" optional="true" label="Replace reference sequence names" >
160 <help><![CDATA[(-m chr_replace) <br> 216 <help><![CDATA[
161 chr_replace is a reference sequence replacement table consisting of 2 columns: "original_ref_ID" "new_ref_ID"<br> 217 chr_replace is a reference sequence replacement table consisting of 2 columns: "original_ref_ID" "new_ref_ID"<br>
162 It is useful for switching between Ensembl and UCSC naming conventions <br> 218 It is useful for switching between Ensembl and UCSC naming conventions <br>
163 NOTE: GFF records on reference sequences that are not found among the "original_ref_ID" entries in this file will be filtered out 219 NOTE: GFF records on reference sequences that are not found among the "original_ref_ID" entries in this file will be filtered out
164 ]]> 220 ]]>
165 </help> 221 </help>
175 </param> 231 </param>
176 --> 232 -->
177 233
178 <!-- merging --> 234 <!-- merging -->
179 <conditional name="merging"> 235 <conditional name="merging">
180 <param name="merge_sel" type="select" label="Transcript merging" help="(-M/--merge or --cluster-only)"> 236 <param name="merge_sel" type="select" label="Transcript merging" help="">
181 <option value="none">none</option> 237 <option value="none">none</option>
182 <option value="merge">merge: cluster the input transcripts into loci, collapsing matching transcripts</option> 238 <option value="merge">merge: cluster the input transcripts into loci, collapsing matching transcripts (--merge)</option>
183 <option value="cluster">cluster-only: merge but without collapsing matching transcripts</option> 239 <option value="cluster">cluster-only: merge but without collapsing matching transcripts (--cluster-only)</option>
184 </param> 240 </param>
185 <when value="none"/> 241 <when value="none"/>
186 <when value="merge"> 242 <when value="merge">
187 <param name="merge_cmd" type="hidden" value="--merge"/> 243 <param name="merge_cmd" type="hidden" value="--merge"/>
188 <expand macro="merge_opt_sel" /> 244 <expand macro="merge_opt_sel" />
193 </when> 249 </when>
194 </conditional> 250 </conditional>
195 <!-- reference sequence file --> 251 <!-- reference sequence file -->
196 <!-- Error: -g option is required for options -w, -x, -y, -V, -N, -M --> 252 <!-- Error: -g option is required for options -w, -x, -y, -V, -N, -M -->
197 <conditional name="reference_genome"> 253 <conditional name="reference_genome">
198 <param name="source" type="select" label="Reference Genome" help="(-g genome.fasta) NOTE: Required for fasta outputs"> 254 <param name="source" type="select" label="Reference Genome" help="NOTE: Required for fasta outputs">
199 <option value="none">none</option> 255 <option value="none">none</option>
200 <option value="cached"></option> 256 <option value="cached"></option>
201 <option value="history">From your history</option> 257 <option value="history">From your history</option>
202 </param> 258 </param>
203 <when value="none"> 259 <when value="none">
204 </when> 260 </when>
205 <when value="cached"> 261 <when value="cached">
206 <param name="fasta_indexes" type="select" label="Source FASTA Sequence"> 262 <param argument="-g" name="fasta_indexes" type="select" label="Source FASTA Sequence">
207 <options from_data_table="all_fasta"/> 263 <options from_data_table="all_fasta"/>
208 </param> 264 </param>
209 <expand macro="ref_filtering_select" /> 265 <expand macro="ref_filtering_select" />
210 <expand macro="fasta_output_select" /> 266 <expand macro="fasta_output_select" />
211 </when> 267 </when>
212 <when value="history"> 268 <when value="history">
213 <param name="genome_fasta" type="data" format="fasta" label="Genome Reference Fasta"/> 269 <param argument="-g" name="genome_fasta" type="data" format="fasta" label="Genome Reference Fasta"/>
214 <expand macro="ref_filtering_select" /> 270 <expand macro="ref_filtering_select" />
215 <expand macro="fasta_output_select" /> 271 <expand macro="fasta_output_select" />
216 </when> 272 </when>
217 </conditional> 273 </conditional>
218 274
220 <conditional name="gffs"> 276 <conditional name="gffs">
221 <param name="gff_fmt" type="select" label="Feature File Output" help="(-o output.gff3|output.gtf)"> 277 <param name="gff_fmt" type="select" label="Feature File Output" help="(-o output.gff3|output.gtf)">
222 <option value="none">none</option> 278 <option value="none">none</option>
223 <option value="gff">GFF</option> 279 <option value="gff">GFF</option>
224 <option value="gtf">GTF</option> 280 <option value="gtf">GTF</option>
281 <option value="bed">BED</option>
225 </param> 282 </param>
226 <when value="none"> 283 <when value="none">
227 </when> 284 </when>
228 <when value="gff"> 285 <when value="gff">
229 <param name="output_cmd" type="hidden" value="-o output.gff3"/> 286 <param argument="-L" name="ensembl" type="boolean" truevalue="-L" falsevalue="" checked="false" label="Ensembl GTF to GFF3 conversion" help=""/>
230 <param name="ensembl" type="boolean" truevalue="-L" falsevalue="" checked="false" label="Ensembl GTF to GFF3 conversion" help="(-L)"/>
231 <expand macro="trackname" /> 287 <expand macro="trackname" />
232 </when> 288 </when>
233 <when value="gtf"> 289 <when value="gtf">
234 <param name="output_cmd" type="hidden" value="-T -o output.gtf"/>
235 <expand macro="trackname" /> 290 <expand macro="trackname" />
236 </when> 291 </when>
292 <when value="bed">
293 </when>
237 </conditional> 294 </conditional>
238 295
239 <param name="full_gff_attribute_preservation" type="boolean" truevalue="-F" falsevalue="" checked="false" 296 <param argument="-F" name="full_gff_attribute_preservation" type="boolean" truevalue="-F" falsevalue="" checked="false"
240 label="full GFF attribute preservation (all attributes are shown)" help="(-F)"/> 297 label="full GFF attribute preservation (all attributes are shown)" help=""/>
241 <param name="decode_url" type="boolean" truevalue="-D" falsevalue="" checked="false" 298 <param argument="-D" name="decode_url" type="boolean" truevalue="-D" falsevalue="" checked="false"
242 label="decode url encoded characters within attributes" help="(-D)"/> 299 label="decode url encoded characters within attributes" help=""/>
243 <param name="expose" type="boolean" truevalue="-E" falsevalue="" checked="false" 300 <param argument="-E" name="expose" type="boolean" truevalue="-E" falsevalue="" checked="false"
244 label="warn about duplicate transcript IDs and other potential problems with the given GFF/GTF records" help="(-E)"/> 301 label="warn about duplicate transcript IDs and other potential problems with the given GFF/GTF records" help=""/>
245 302
246 </inputs> 303 </inputs>
247 <outputs> 304 <outputs>
248 <data name="output_gff" format="gff3" metadata_source="input" label="${tool.name} on ${on_string}: gff3" from_work_dir="output.gff3"> 305 <data name="output_gff" format="gff3" metadata_source="input" label="${tool.name} on ${on_string}: gff3" from_work_dir="output.gff">
249 <filter>gffs['gff_fmt'] == 'gff'</filter> 306 <filter>gffs['gff_fmt'] == 'gff'</filter>
250 </data> 307 </data>
251 <data name="output_gtf" format="gtf" metadata_source="input" label="${tool.name} on ${on_string}: gtf" from_work_dir="output.gtf"> 308 <data name="output_gtf" format="gtf" metadata_source="input" label="${tool.name} on ${on_string}: gtf" from_work_dir="output.gtf">
252 <filter>gffs['gff_fmt'] == 'gtf'</filter> 309 <filter>gffs['gff_fmt'] == 'gtf'</filter>
310 </data>
311 <data name="output_bed" format="bed" metadata_source="input" label="${tool.name} on ${on_string}: bed" from_work_dir="output.bed">
312 <filter>gffs['gff_fmt'] == 'bed'</filter>
253 </data> 313 </data>
254 <data name="output_exons" format="fasta" label="${tool.name} on ${on_string}: exons.fa" from_work_dir="exons.fa"> 314 <data name="output_exons" format="fasta" label="${tool.name} on ${on_string}: exons.fa" from_work_dir="exons.fa">
255 <filter>'fa_outputs' in reference_genome and str(reference_genome['fa_outputs']).find('exons.fa') > 0 </filter> 315 <filter>'fa_outputs' in reference_genome and str(reference_genome['fa_outputs']).find('exons.fa') > 0 </filter>
256 </data> 316 </data>
257 <data name="output_cds" format="fasta" label="${tool.name} on ${on_string}: cds.fa" from_work_dir="cds.fa"> 317 <data name="output_cds" format="fasta" label="${tool.name} on ${on_string}: cds.fa" from_work_dir="cds.fa">
263 <data name="output_dupinfo" format="txt" label="${tool.name} on ${on_string}: dupinfo" from_work_dir="dupinfo"> 323 <data name="output_dupinfo" format="txt" label="${tool.name} on ${on_string}: dupinfo" from_work_dir="dupinfo">
264 <filter>'merge_options' in merging and merging['merge_options'].find('dupinfo') > 0</filter> 324 <filter>'merge_options' in merging and merging['merge_options'].find('dupinfo') > 0</filter>
265 </data> 325 </data>
266 </outputs> 326 </outputs>
267 <tests> 327 <tests>
268 <test> 328 <test expect_num_outputs="1">
269 <param name="input" ftype="gtf" value="Homo_sapiens.GRCh37_19.71.gtf"/> 329 <param name="input" ftype="gtf" value="Homo_sapiens.GRCh37_19.71.gtf"/>
270 <param name="gff_fmt" value="gff"/> 330 <param name="gff_fmt" value="gff"/>
271 <output name="output_gff" file="Homo_sapiens.GRCh37_19.71.gff3" ftype="gff3" lines_diff="2" /> 331 <output name="output_gff" file="Homo_sapiens.GRCh37_19.71.gff3" ftype="gff3" lines_diff="4" />
272 </test> 332 </test>
273 <test> 333 <test expect_num_outputs="1">
334 <param name="input" ftype="gtf" value="Homo_sapiens.GRCh37_19.71.gtf"/>
335 <param name="gff_fmt" value="gff"/>
336 <output name="output_gff" file="Homo_sapiens.GRCh37_19.71.gff3" ftype="gff3" lines_diff="4" />
337 </test>
338 <test expect_num_outputs="1">
274 <param name="input" ftype="gtf" value="ecoli-k12.gff3"/> 339 <param name="input" ftype="gtf" value="ecoli-k12.gff3"/>
275 <param name="gff_fmt" value="gff"/> 340 <param name="gff_fmt" value="gff"/>
276 <param name="full_gff_attribute_preservation" value="-F"/> 341 <param name="full_gff_attribute_preservation" value="-F"/>
277 <output name="output_gff" file="ecoli-k12.processed.gff3" ftype="gff3" lines_diff="2" /> 342 <output name="output_gff" file="ecoli-k12.processed.gff3" ftype="gff3" lines_diff="4" />
278 </test> 343 </test>
279 <test> 344 <!-- bed output -->
280 <param name="input" ftype="gtf" value="Homo_sapiens.GRCh37_19.71.gtf"/> 345 <test expect_num_outputs="1">
281 <param name="filtering" value="--no-pseudo"/> 346 <param name="input" ftype="gff3" value="Homo_sapiens.GRCh37_19.71.gff3"/>
282 <param name="gff_fmt" value="gtf"/> 347 <param name="gff_fmt" value="bed"/>
283 <output name="output_gtf"> 348 <output name="output_bed" ftype="bed">
284 <assert_contents> 349 <assert_contents>
285 <not_has_text text="pseudo" /> 350 <has_n_lines n="42"/>
286 </assert_contents> 351 <has_n_columns n="13"/>
287 </output> 352 </assert_contents>
288 </test> 353 </output>
289 <test> 354 </test>
355 <!-- bed input and test tname -->
356 <test expect_num_outputs="1">
357 <param name="input" ftype="bed" value="Homo_sapiens.GRCh37_19.71.bed"/>
358 <param name="gff_fmt" value="gff"/>
359 <param name="tname" value="track name"/>
360 <output name="output_bed" ftype="gff3">
361 <assert_contents>
362 <has_n_lines n="388"/>
363 <!-- this will work with https://github.com/galaxyproject/galaxy/pull/12528 -->
364 <!-- <has_n_columns n="9" comment="#"/> -->
365 <has_text text="track name"/>
366 </assert_contents>
367 </output>
368 </test>
369 <test expect_num_outputs="1">
290 <param name="input" ftype="gtf" value="Homo_sapiens.GRCh37_19.71.gtf"/> 370 <param name="input" ftype="gtf" value="Homo_sapiens.GRCh37_19.71.gtf"/>
291 <param name="region_filter" value="filter"/> 371 <param name="region_filter" value="filter"/>
292 <param name="range" value="19:496500..504965"/> 372 <param name="range" value="19:496500..504965"/>
293 <param name="gff_fmt" value="gtf"/> 373 <param name="gff_fmt" value="gtf"/>
294 <output name="output_gtf"> 374 <output name="output_gtf">
296 <has_text text="ENST00000587541" /> 376 <has_text text="ENST00000587541" />
297 <has_text text="ENST00000382683" /> 377 <has_text text="ENST00000382683" />
298 </assert_contents> 378 </assert_contents>
299 </output> 379 </output>
300 </test> 380 </test>
301 <test> 381 <test expect_num_outputs="1">
302 <param name="input" ftype="gtf" value="Homo_sapiens.GRCh37_19.71.gtf"/> 382 <param name="input" ftype="gtf" value="Homo_sapiens.GRCh37_19.71.gtf"/>
303 <param name="region_filter" value="filter"/> 383 <param name="region_filter" value="filter"/>
304 <param name="range" value="19:496500..504965"/> 384 <param name="range" value="19:496500..504965"/>
305 <param name="discard_partial" value="true"/> 385 <param name="discard_partial" value="true"/>
306 <param name="gff_fmt" value="gtf"/> 386 <param name="gff_fmt" value="gtf"/>
309 <not_has_text text="ENST00000587541" /> 389 <not_has_text text="ENST00000587541" />
310 <has_text text="ENST00000382683" /> 390 <has_text text="ENST00000382683" />
311 </assert_contents> 391 </assert_contents>
312 </output> 392 </output>
313 </test> 393 </test>
314 <test> 394 <test expect_num_outputs="1">
315 <param name="input" ftype="gtf" value="Homo_sapiens.GRCh37_19.71.gtf"/> 395 <param name="input" ftype="gtf" value="Homo_sapiens.GRCh37_19.71.gtf"/>
316 <param name="filtering" value="-C"/> 396 <param name="filtering" value="-C"/>
317 <param name="region_filter" value="filter"/> 397 <param name="region_filter" value="filter"/>
318 <param name="range" value="19:496500..504965"/> 398 <param name="range" value="19:496500..504965"/>
319 <param name="gff_fmt" value="gtf"/> 399 <param name="gff_fmt" value="gtf"/>
322 <not_has_text text="ENST00000587541" /> 402 <not_has_text text="ENST00000587541" />
323 <has_text text="ENST00000382683" /> 403 <has_text text="ENST00000382683" />
324 </assert_contents> 404 </assert_contents>
325 </output> 405 </output>
326 </test> 406 </test>
327 <test> 407 <test expect_num_outputs="4">
328 <param name="input" ftype="gtf" value="Homo_sapiens.GRCh37_19.71.gtf"/> 408 <param name="input" ftype="gtf" value="Homo_sapiens.GRCh37_19.71.gtf"/>
329 <param name="source" value="history"/> 409 <param name="source" value="history"/>
330 <param name="genome_fasta" ftype="fasta" value="Homo_sapiens.GRCh37.71.dna.chromosome.19.fa"/> 410 <param name="genome_fasta" ftype="fasta" value="Homo_sapiens.GRCh37.71.dna.chromosome.19.fa"/>
331 <param name="fa_outputs" value="-w exons.fa,-x cds.fa,-y pep.fa"/> 411 <param name="fa_outputs" value="-w exons.fa,-x cds.fa,-y pep.fa"/>
332 <param name="region_filter" value="filter"/> 412 <param name="region_filter" value="filter"/>
355 <has_text text="ENST00000346144" /> 435 <has_text text="ENST00000346144" />
356 <has_text text="MDFGLALLLAGLLGLLLGQSLQVKPLQVEPPEPVVAVALGASRQLTCRLACADRGASVQWRGLDTSLGAV" /> 436 <has_text text="MDFGLALLLAGLLGLLLGQSLQVKPLQVEPPEPVVAVALGASRQLTCRLACADRGASVQWRGLDTSLGAV" />
357 </assert_contents> 437 </assert_contents>
358 </output> 438 </output>
359 </test> 439 </test>
360 440 <test expect_num_outputs="1">
441 <param name="input" ftype="gtf" value="stop_codons.gtf"/>
442 <param name="source" value="history"/>
443 <param name="genome_fasta" ftype="fasta" value="Homo_sapiens.GRCh37.71.dna.chromosome.19.fa"/>
444 <param name="fa_outputs" value="-y pep.fa,-S"/>
445 <output name="output_pep">
446 <assert_contents>
447 <has_text text="ENST00000269812" />
448 <has_text text="PLRGLHPRV*LQTPLERCPCWPPAGGTGGCPHCLLHLRLLQSPTPTALSEGGGAGTEAQPVTDVDPGRG*" />
449 </assert_contents>
450 </output>
451 </test>
361 </tests> 452 </tests>
362 <help> 453 <help>
363 <![CDATA[ 454 <![CDATA[
364 **gffread Filters and/or converts GFF3/GTF2 records** 455 **gffread Filters and/or converts GFF3/GTF2 records**
365 456
366 The gffread command is documented with the stringtie_ package. 457 The gffread command is documented with the stringtie_ package.
367 458
368 .. _stringtie: http://ccb.jhu.edu/software/stringtie/gff.shtml#gffread 459 .. _stringtie: http://ccb.jhu.edu/software/stringtie/gff.shtml#gffread
369 460
370 461 gffread v0.12.7. Usage: ::
371 gffread v0.11.4. Usage: :: 462
372 463 gffread [-g <genomic_seqs_fasta> | <dir>] [-s <seq_info.fsize>]
373 gffread <input_gff> [-g <genomic_seqs_fasta> | <dir>][-s <seq_info.fsize>] 464 [-o <outfile>] [-t <trackname>] [-r [<strand>]<chr>:<start>-<end> [-R]]
374 [-o <outfile>] [-t <trackname>] [-r [[<strand>]<chr>:]<start>..<end> [-R]] 465 [--jmatch <chr>:<start>-<end>] [--no-pseudo]
375 [-CTVNJMKQAFPGUBHZWTOLE] [-w <exons.fa>] [-x <cds.fa>] [-y <tr_cds.fa>] 466 [-CTVNJMKQAFPGUBHZWTOLE] [-w <exons.fa>] [-x <cds.fa>] [-y <tr_cds.fa>]
376 [-i <maxintron>] [--bed] [--table <attrlist>] [--sort-by <refseq_list.txt>] 467 [-j ][--ids <IDs.lst> | --nids <IDs.lst>] [--attrs <attr-list>] [-i <maxintron>]
377 468 [--stream] [--bed | --gtf | --tlf] [--table <attrlist>] [--sort-by <ref.lst>]
469 [<input_gff>]
470
378 Filter, convert or cluster GFF/GTF/BED records, extract the sequence of 471 Filter, convert or cluster GFF/GTF/BED records, extract the sequence of
379 transcripts (exon or CDS) and more. 472 transcripts (exon or CDS) and more.
380 By default (i.e. without -O) only transcripts are processed, discarding any 473 By default (i.e. without -O) only transcripts are processed, discarding any
381 other non-transcript features. Default output is a simplified GFF3 with only 474 other non-transcript features. Default output is a simplified GFF3 with only
382 the basic attributes. 475 the basic attributes.
383 476
384 <input_gff> is a GFF file, use '-' for stdin
385
386 Options: 477 Options:
387 478 --ids discard records/transcripts if their IDs are not listed in <IDs.lst>
479 --nids discard records/transcripts if their IDs are listed in <IDs.lst>
388 -i discard transcripts having an intron larger than <maxintron> 480 -i discard transcripts having an intron larger than <maxintron>
389 -l discard transcripts shorter than <minlen> bases 481 -l discard transcripts shorter than <minlen> bases
390 -r only show transcripts overlapping coordinate range <start>..<end> 482 -r only show transcripts overlapping coordinate range <start>..<end>
391 (on chromosome/contig <chr>, strand <strand> if provided) 483 (on chromosome/contig <chr>, strand <strand> if provided)
392 -R for -r option, discard all transcripts that are not fully 484 -R for -r option, discard all transcripts that are not fully
393 contained within the given range 485 contained within the given range
486 --jmatch only output transcripts matching the given junction
394 -U discard single-exon transcripts 487 -U discard single-exon transcripts
395 -C coding only: discard mRNAs that have no CDS features 488 -C coding only: discard mRNAs that have no CDS features
396 --nc non-coding only: discard mRNAs that have CDS features 489 --nc non-coding only: discard mRNAs that have CDS features
397 --ignore-locus : discard locus features and attributes found in the input 490 --ignore-locus : discard locus features and attributes found in the input
398 -A use the description field from <seq_info.fsize> and add it 491 -A use the description field from <seq_info.fsize> and add it
399 as the value for a 'descr' attribute to the GFF record 492 as the value for a 'descr' attribute to the GFF record
400 -s <seq_info.fsize> is a tab-delimited file providing this info 493 -s <seq_info.fsize> is a tab-delimited file providing this info
401 for each of the mapped sequences: 494 for each of the mapped sequences:
402 <seq-name> <seq-length> <seq-description> 495 <seq-name> <seq-length> <seq-description>
403 (useful for -A option with mRNA/EST/protein mappings) 496 (useful for -A option with mRNA/EST/protein mappings)
404 497 Sorting: (by default, chromosomes are kept in the order they were found)
405 Sorting: (by default, chromosomes are kept in the order they were found)
406 --sort-alpha : chromosomes (reference sequences) are sorted alphabetically 498 --sort-alpha : chromosomes (reference sequences) are sorted alphabetically
407 --sort-by : sort the reference sequences by the order in which their 499 --sort-by : sort the reference sequences by the order in which their
408 names are given in the <refseq.lst> file 500 names are given in the <refseq.lst> file
409
410 Misc options: 501 Misc options:
411 -F preserve all GFF attributes (for non-exon features) 502 -F keep all GFF attributes (for non-exon features)
412 --keep-exon-attrs : for -F option, do not attempt to reduce redundant 503 --keep-exon-attrs : for -F option, do not attempt to reduce redundant
413 exon/CDS attributes 504 exon/CDS attributes
414 -G do not keep exon attributes, move them to the transcript feature 505 -G do not keep exon attributes, move them to the transcript feature
415 (for GFF3 output) 506 (for GFF3 output)
507 --attrs <attr-list> only output the GTF/GFF attributes listed in <attr-list>
508 which is a comma delimited list of attribute names to
416 --keep-genes : in transcript-only mode (default), also preserve gene records 509 --keep-genes : in transcript-only mode (default), also preserve gene records
417 --keep-comments: for GFF3 input/output, try to preserve comments 510 --keep-comments: for GFF3 input/output, try to preserve comments
418 -O process other non-transcript GFF records (by default non-transcript 511 -O process other non-transcript GFF records (by default non-transcript
419 records are ignored) 512 records are ignored)
420 -V discard any mRNAs with CDS having in-frame stop codons (requires -g) 513 -V discard any mRNAs with CDS having in-frame stop codons (requires -g)
438 --in-bed: input should be parsed as BED format (automatic if the input 531 --in-bed: input should be parsed as BED format (automatic if the input
439 filename ends with .bed*) 532 filename ends with .bed*)
440 --in-tlf: input GFF-like one-line-per-transcript format without exon/CDS 533 --in-tlf: input GFF-like one-line-per-transcript format without exon/CDS
441 features (see --tlf option below); automatic if the input 534 features (see --tlf option below); automatic if the input
442 filename ends with .tlf) 535 filename ends with .tlf)
443 536 --stream: fast processing of input GFF/BED transcripts as they are received
537 ((no sorting, exons must be grouped by transcript in the input data)
444 Clustering: 538 Clustering:
445 -M/--merge : cluster the input transcripts into loci, discarding 539 -M/--merge : cluster the input transcripts into loci, discarding
446 "duplicated" transcripts (those with the same exact introns 540 "redundant" transcripts (those with the same exact introns
447 and fully contained or equal boundaries) 541 and fully contained or equal boundaries)
448 -d <dupinfo> : for -M option, write duplication info to file <dupinfo> 542 -d <dupinfo> : for -M option, write duplication info to file <dupinfo>
449 --cluster-only: same as -M/--merge but without discarding any of the 543 --cluster-only: same as -M/--merge but without discarding any of the
450 "duplicate" transcripts, only create "locus" features 544 "duplicate" transcripts, only create "locus" features
451 -K for -M option: also discard as redundant the shorter, fully contained 545 -K for -M option: also discard as redundant the shorter, fully contained
453 -Q for -M option, no longer require boundary containment when assessing 547 -Q for -M option, no longer require boundary containment when assessing
454 redundancy (can be combined with -K); only introns have to match for 548 redundancy (can be combined with -K); only introns have to match for
455 multi-exon transcripts, and >=80% overlap for single-exon transcripts 549 multi-exon transcripts, and >=80% overlap for single-exon transcripts
456 -Y for -M option, enforce -Q but also discard overlapping single-exon 550 -Y for -M option, enforce -Q but also discard overlapping single-exon
457 transcripts, even on the opposite strand (can be combined with -K) 551 transcripts, even on the opposite strand (can be combined with -K)
458
459 Output options: 552 Output options:
460 --force-exons: make sure that the lowest level GFF features are considered 553 --force-exons: make sure that the lowest level GFF features are considered
461 "exon" features 554 "exon" features
462 --gene2exon: for single-line genes not parenting any transcripts, add an 555 --gene2exon: for single-line genes not parenting any transcripts, add an
463 exon feature spanning the entire gene (treat it as a transcript) 556 exon feature spanning the entire gene (treat it as a transcript)
466 -D decode url encoded characters within attributes 559 -D decode url encoded characters within attributes
467 -Z merge very close exons into a single exon (when intron size<4) 560 -Z merge very close exons into a single exon (when intron size<4)
468 -g full path to a multi-fasta file with the genomic sequences 561 -g full path to a multi-fasta file with the genomic sequences
469 for all input mappings, OR a directory with single-fasta files 562 for all input mappings, OR a directory with single-fasta files
470 (one per genomic sequence, with file names matching sequence names) 563 (one per genomic sequence, with file names matching sequence names)
471 -w write a fasta file with spliced exons for each GFF transcript 564 -j output the junctions and the corresponding transcripts
565 -w write a fasta file with spliced exons for each transcript
566 --w-add <N> for the -w option, extract additional <N> bases
567 both upstream and downstream of the transcript boundaries
568 --w-nocds for -w, disable the output of CDS info in the FASTA file
472 -x write a fasta file with spliced CDS for each GFF transcript 569 -x write a fasta file with spliced CDS for each GFF transcript
473 -y write a protein fasta file with the translation of CDS for each record 570 -y write a protein fasta file with the translation of CDS for each record
474 -W for -w and -x options, write in the FASTA defline the exon 571 -W for -w, -x and -y options, write in the FASTA defline all the exon
475 coordinates projected onto the spliced sequence; 572 coordinates projected onto the spliced sequence;
476 for -y option, write transcript attributes in the FASTA defline
477 -S for -y option, use '*' instead of '.' as stop codon translation 573 -S for -y option, use '*' instead of '.' as stop codon translation
478 -L Ensembl GTF to GFF3 conversion (implies -F; should be used with -m) 574 -L Ensembl GTF to GFF3 conversion, adds version to IDs
479 -m <chr_replace> is a name mapping table for converting reference 575 -m <chr_replace> is a name mapping table for converting reference
480 sequence names, having this 2-column format: 576 sequence names, having this 2-column format:
481 <original_ref_ID> <new_ref_ID> 577 <original_ref_ID> <new_ref_ID>
482 WARNING: all GFF records on reference sequences whose original IDs
483 are not found in the 1st column of this table will be discarded!
484 -t use <trackname> in the 2nd column of each GFF/GTF output line 578 -t use <trackname> in the 2nd column of each GFF/GTF output line
485 -o write the records into <outfile> instead of stdout 579 -o write the output records into <outfile> instead of stdout
486 -T main output will be GTF instead of GFF3 580 -T main output will be GTF instead of GFF3
487 --bed output records in BED format instead of default GFF3 581 --bed output records in BED format instead of default GFF3
488 --tlf output "transcript line format" which is like GFF 582 --tlf output "transcript line format" which is like GFF
489 but exons, CDS features and related data are stored as GFF 583 but with exons and CDS related features stored as GFF
490 attributes in the transcript feature line, like this: 584 attributes in the transcript feature line, like this:
491 exoncount=N;exons=<exons>;CDSphase=<N>;CDS=<CDScoords> 585 exoncount=N;exons=<exons>;CDSphase=<N>;CDS=<CDScoords>
492 <exons> is a comma-delimited list of exon_start-exon_end coordinates; 586 <exons> is a comma-delimited list of exon_start-exon_end coordinates;
493 <CDScoords> is CDS_start:CDS_end coordinates or a list like <exons> 587 <CDScoords> is CDS_start:CDS_end coordinates or a list like <exons>
494 --table output a simple tab delimited format instead of GFF, with columns 588 --table output a simple tab delimited format instead of GFF, with columns
495 having the values of GFF attributes given in <attrlist>; special 589 having the values of GFF attributes given in <attrlist>; special
496 pseudo-attributes (prefixed by @) are recognized: 590 pseudo-attributes (prefixed by @) are recognized:
497 @chr, @start, @end, @strand, @numexons, @exons, @cds, @covlen, @cdslen 591 @id, @geneid, @chr, @start, @end, @strand, @numexons, @exons,
592 @cds, @covlen, @cdslen
593 If any of -w/-y/-x FASTA output files are enabled, the same fields
594 (excluding @id) are appended to the definition line of corresponding
595 FASTA records
498 -v,-E expose (warn about) duplicate transcript IDs and other potential 596 -v,-E expose (warn about) duplicate transcript IDs and other potential
499 problems with the given GFF/GTF records 597 problems with the given GFF/GTF records
598
500 ]]> 599 ]]>
501 </help> 600 </help>
502 <citations> 601 <citations>
503 <citation type="doi">10.1038/nbt.1621</citation> 602 <citation type="doi">10.1038/nbt.1621</citation>
504 </citations> 603 </citations>