comparison gffread.xml @ 1:96c4d0e18546 draft

planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/cufflinks/gffread commit eb18f691975ef9539b5ebd4f118343c8ad967a1f
author devteam
date Tue, 07 Feb 2017 18:39:51 -0500
parents baeea9c2ff0f
children 6562753c6bdc
comparison
equal deleted inserted replaced
0:baeea9c2ff0f 1:96c4d0e18546
1 <tool id="gffread" name="gffread" version="@VERSION@.0"> 1 <tool id="gffread" name="gffread" version="@VERSION@.0">
2 <description>Filters and/or converts GFF3/GTF2 records</description> 2 <description>Filters and/or converts GFF3/GTF2 records</description>
3 <expand macro="requirements" />
4 <expand macro="stdio" />
5 <macros> 3 <macros>
6 <import>cuff_macros.xml</import> 4 <import>cuff_macros.xml</import>
7 <xml name="fasta_output_select"> 5 <xml name="fasta_output_select">
8 <param name="fa_outputs" type="select" display="checkboxes" multiple="true" label="Select fasta outputs"> 6 <param name="fa_outputs" type="select" display="checkboxes" multiple="true" label="Select fasta outputs">
9 <option value="-w exons.fa">fasta file with spliced exons for each GFF transcript (-w exons.fa)</option> 7 <option value="-w exons.fa">fasta file with spliced exons for each GFF transcript (-w exons.fa)</option>
16 <param name="ref_filtering" type="select" display="checkboxes" multiple="true" label="reference based filters"> 14 <param name="ref_filtering" type="select" display="checkboxes" multiple="true" label="reference based filters">
17 <option value="-N">discard multi-exon mRNAs that have any intron with a non-canonical splice site consensus, i.e. not GT-AG, GC-AG or AT-AC (-N)</option> 15 <option value="-N">discard multi-exon mRNAs that have any intron with a non-canonical splice site consensus, i.e. not GT-AG, GC-AG or AT-AC (-N)</option>
18 <option value="-J">discard any mRNAs that either lack initial START codon or the terminal STOP codon, or have an in-frame stop codon (-J)</option> 16 <option value="-J">discard any mRNAs that either lack initial START codon or the terminal STOP codon, or have an in-frame stop codon (-J)</option>
19 <option value="-V">discard any mRNAs with CDS having in-frame stop codons (-V)</option> 17 <option value="-V">discard any mRNAs with CDS having in-frame stop codons (-V)</option>
20 <option value="-H">check and adjust the starting CDS phase if the original phase leads to a translation with an in-frame stop codon (-H with -V)</option> 18 <option value="-H">check and adjust the starting CDS phase if the original phase leads to a translation with an in-frame stop codon (-H with -V)</option>
21 <!-- gffread bug: B not in missing from param to the arg parser 19 <!-- gffread bug: B not in missing from param to the arg parser
22 <option value="-B">single-exon transcripts are also checked on the opposite strand (-B with -V)</option> 20 <option value="-B">single-exon transcripts are also checked on the opposite strand (-B with -V)</option>
23 --> 21 -->
24 </param> 22 </param>
25 </xml> 23 </xml>
26 <xml name="trackname"> 24 <xml name="trackname">
47 <param name="merge_options" type="select" display="checkboxes" multiple="true" label="Cluster options"> 45 <param name="merge_options" type="select" display="checkboxes" multiple="true" label="Cluster options">
48 <expand macro="cluster_opts" /> 46 <expand macro="cluster_opts" />
49 </param> 47 </param>
50 </xml> 48 </xml>
51 </macros> 49 </macros>
50 <expand macro="requirements" />
51 <expand macro="stdio" />
52 <command> 52 <command>
53 <![CDATA[ 53 <![CDATA[
54 #if $reference_genome.source == 'history': 54 #if $reference_genome.source == 'history':
55 ln -s $reference_genome.genome_fasta genomeref.fa && 55 ln -s '$reference_genome.genome_fasta' genomeref.fa &&
56 #end if 56 #end if
57 gffread $input 57 gffread '$input'
58 #if $reference_genome.source == 'cached': 58 #if $reference_genome.source == 'cached':
59 -g "${reference_genome.fasta_indexes.fields.path}" 59 -g '${reference_genome.fasta_indexes.fields.path}'
60 #if $reference_genome.ref_filtering and str($reference_genome.ref_filtering) != '': 60 #if $reference_genome.ref_filtering and str($reference_genome.ref_filtering) != '':
61 #echo ' '.join(str($reference_genome.ref_filtering).split(',')) 61 #echo ' '.join(str($reference_genome.ref_filtering).split(','))
62 #end if 62 #end if
63 #elif $reference_genome.source == 'history': 63 #elif $reference_genome.source == 'history':
64 -g genomeref.fa 64 -g genomeref.fa
72 #end if 72 #end if
73 #if $maxintron and $maxintron > 0: 73 #if $maxintron and $maxintron > 0:
74 -i $maxintron 74 -i $maxintron
75 #end if 75 #end if
76 #if $region.region_filter == 'filter': 76 #if $region.region_filter == 'filter':
77 -r $region.range $region.discard_partial 77 -r '$region.range' $region.discard_partial
78 #end if 78 #end if
79 #if $merging.merge_sel != 'none': 79 #if $merging.merge_sel != 'none':
80 $merging.merge_cmd 80 $merging.merge_cmd
81 #if $merging.merge_options: 81 #if $merging.merge_options:
82 #echo ' '.join(str($merging.merge_options).split(',')) 82 #echo ' '.join(str($merging.merge_options).split(','))
83 #end if 83 #end if
84 #end if 84 #end if
85 #if $chr_replace: 85 #if $chr_replace:
86 -m "$chr_replace" 86 -m '$chr_replace'
87 #end if 87 #end if
88 ## 88 ##
89 ## Although documented, does not appear to be used in the gffread code 89 ## Although documented, does not appear to be used in the gffread code
90 ## #if $seq_info: 90 ## #if $seq_info:
91 ## -A -s "$seq_info" 91 ## -A -s "$seq_info"
97 #echo ' ' + ' '.join(str($reference_genome.fa_outputs).split(',')) 97 #echo ' ' + ' '.join(str($reference_genome.fa_outputs).split(','))
98 #end if 98 #end if
99 #end if 99 #end if
100 #if $gffs.gff_fmt != 'none': 100 #if $gffs.gff_fmt != 'none':
101 #if $gffs.tname: 101 #if $gffs.tname:
102 -t "$gffs.tname" 102 -t '$gffs.tname'
103 #end if 103 #end if
104 #if $gffs.gff_fmt == 'gff': 104 #if $gffs.gff_fmt == 'gff':
105 #if $input.datatype.file_ext == 'gft': 105 #if $input.datatype.file_ext == 'gft':
106 $gffs.ensembl 106 $gffs.ensembl
107 #end if 107 #end if
119 <option value="-U">discard single-exon transcripts (-U)</option> 119 <option value="-U">discard single-exon transcripts (-U)</option>
120 <option value="-C">coding only: discard mRNAs that have no CDS feature (-C)</option> 120 <option value="-C">coding only: discard mRNAs that have no CDS feature (-C)</option>
121 <option value="-G">only parse additional exon attributes from the first exon and move them to the mRNA level (useful for GTF input) (-G)</option> 121 <option value="-G">only parse additional exon attributes from the first exon and move them to the mRNA level (useful for GTF input) (-G)</option>
122 <option value="-O">process also non-transcript GFF records (by default non-transcript records are ignored) (-O)</option> 122 <option value="-O">process also non-transcript GFF records (by default non-transcript records are ignored) (-O)</option>
123 <option value="--no-pseudo">filter out records matching the 'pseudo' keyword (--no-pseudo)</option> 123 <option value="--no-pseudo">filter out records matching the 'pseudo' keyword (--no-pseudo)</option>
124 </param> 124 </param>
125 <conditional name="region"> 125 <conditional name="region">
126 <param name="region_filter" type="select" label="Filter by genome region"> 126 <param name="region_filter" type="select" label="Filter by genome region">
127 <option value="none">No</option> 127 <option value="none">No</option>
128 <option value="filter">Yes</option> 128 <option value="filter">Yes</option>
129 </param> 129 </param>
130 <when value="none"/> 130 <when value="none"/>
131 <when value="filter"> 131 <when value="filter">
132 <param name="range" type="text" value="" label="Only show transcripts overlapping coordinate range"> 132 <param name="range" type="text" value="" label="Only show transcripts overlapping coordinate range">
133 <help><![CDATA[ 133 <help><![CDATA[
134 (-r [['strand']'chr':]'start'..'end') <br> 134 (-r [['strand']'chr':]'start'..'end') <br>
135 examples: <br> 135 examples: <br>
136 1000..500000 <br> 136 1000..500000 <br>
137 chr1:1000..500000 <br> 137 chr1:1000..500000 <br>
138 +chr1:1000..500000 <br> 138 +chr1:1000..500000 <br>
139 -chr1:1000..500000 139 -chr1:1000..500000
140 ]]> 140 ]]>
141 </help> 141 </help>
142 <validator type="regex">(([+-])?(\w+:))?\d+\.\.\d+</validator> 142 <validator type="regex">(([+-])?(\w+:))?\d+\.\.\d+</validator>
143 </param> 143 </param>
144 <param name="discard_partial" type="boolean" truevalue="-R" falsevalue="" check="false" 144 <param name="discard_partial" type="boolean" truevalue="-R" falsevalue="" checked="false"
145 label="discard all transcripts that are not fully contained within the given range" help="(-R)"/> 145 label="Discard all transcripts that are not fully contained within the given range" help="(-R)"/>
146 </when> 146 </when>
147 </conditional> 147 </conditional>
148 <param name="maxintron" type="integer" value="" optional="true" min="0" label="Filter out transcipts with large introns" 148 <param name="maxintron" type="integer" value="" optional="true" min="0" label="Filter out transcipts with large introns"
149 help="If set, discard transcripts having an intron larger (-i max_intron)"/> 149 help="If set, discard transcripts having an intron larger (-i max_intron)"/>
150 <param name="chr_replace" type="data" format="tabular" optional="true" label="Replace reference sequence names" > 150 <param name="chr_replace" type="data" format="tabular" optional="true" label="Replace reference sequence names" >
151 <help><![CDATA[(-m chr_replace) <br> 151 <help><![CDATA[(-m chr_replace) <br>
152 chr_replace is a reference sequence replacement table consisting of 2 columns: "original_ref_ID" "new_ref_ID"<br> 152 chr_replace is a reference sequence replacement table consisting of 2 columns: "original_ref_ID" "new_ref_ID"<br>
153 It is useful for switching between Ensembl and UCSC naming conventions <br> 153 It is useful for switching between Ensembl and UCSC naming conventions <br>
154 NOTE: GFF records on reference sequences that are not found among the "original_ref_ID" entries in this file will be filtered out 154 NOTE: GFF records on reference sequences that are not found among the "original_ref_ID" entries in this file will be filtered out
155 ]]> 155 ]]>
156 </help> 156 </help>
157 </param> 157 </param>
158 158
159 <!-- Although documented, does not appear to be used in the gffread code 159 <!-- Although documented, does not appear to be used in the gffread code
160 <param name="seq_info" type="data" format="tabular" optional="true" label="Use the description field as the value for a 'descr' attribute to the GFF record"> 160 <param name="seq_info" type="data" format="tabular" optional="true" label="Use the description field as the value for a 'descr' attribute to the GFF record">
161 <help> 161 <help>
162 (-s seq_info.fsize -A) useful with mRNA/EST/protein mappings &lt;br&gt; 162 (-s seq_info.fsize -A) useful with mRNA/EST/protein mappings &lt;br&gt;
163 seq_info input file is a 3 column tab-delimited file providing this info for each of the mapped sequences: &lt;br&gt; 163 seq_info input file is a 3 column tab-delimited file providing this info for each of the mapped sequences: &lt;br&gt;
164 "seq-name" "seq-length" "seq-description" &lt;br&gt; 164 "seq-name" "seq-length" "seq-description" &lt;br&gt;
165 </help> 165 </help>
166 </param> 166 </param>
167 --> 167 -->
168 168
169 <!-- merging --> 169 <!-- merging -->
170 <conditional name="merging"> 170 <conditional name="merging">
171 <param name="merge_sel" type="select" label="Transcript merging" help="(-M/--merge or --cluster-only)"> 171 <param name="merge_sel" type="select" label="Transcript merging" help="(-M/--merge or --cluster-only)">
199 </param> 199 </param>
200 <expand macro="ref_filtering_select" /> 200 <expand macro="ref_filtering_select" />
201 <expand macro="fasta_output_select" /> 201 <expand macro="fasta_output_select" />
202 </when> 202 </when>
203 <when value="history"> 203 <when value="history">
204 <param name="genome_fasta" type="data" format="fasta" label="Genome Reference Fasta"/> 204 <param name="genome_fasta" type="data" format="fasta" label="Genome Reference Fasta"/>
205 <expand macro="ref_filtering_select" /> 205 <expand macro="ref_filtering_select" />
206 <expand macro="fasta_output_select" /> 206 <expand macro="fasta_output_select" />
207 </when> 207 </when>
208 </conditional> 208 </conditional>
209 209
210 <!-- outputs --> 210 <!-- outputs -->
211 <conditional name="gffs"> 211 <conditional name="gffs">
212 <param name="gff_fmt" type="select" optional="true" label="Feature File Output" help="(-o output.gff3|output.gtf)"> 212 <param name="gff_fmt" type="select" label="Feature File Output" help="(-o output.gff3|output.gtf)">
213 <option value="none">none</option> 213 <option value="none">none</option>
214 <option value="gff">GFF</option> 214 <option value="gff">GFF</option>
215 <option value="gtf">GTF</option> 215 <option value="gtf">GTF</option>
216 </param> 216 </param>
217 <when value="none"> 217 <when value="none">
218 </when> 218 </when>
219 <when value="gff"> 219 <when value="gff">
220 <param name="output_cmd" type="hidden" value="-o output.gff3"/> 220 <param name="output_cmd" type="hidden" value="-o output.gff3"/>
221 <param name="ensembl" type="boolean" truevalue="-L" falsevalue="" check="false" label="Ensembl GTF to GFF3 conversion" help="(-L)"/> 221 <param name="ensembl" type="boolean" truevalue="-L" falsevalue="" checked="false" label="Ensembl GTF to GFF3 conversion" help="(-L)"/>
222 <expand macro="trackname" /> 222 <expand macro="trackname" />
223 </when> 223 </when>
224 <when value="gtf"> 224 <when value="gtf">
225 <param name="output_cmd" type="hidden" value="-T -o output.gtf"/> 225 <param name="output_cmd" type="hidden" value="-T -o output.gtf"/>
226 <expand macro="trackname" /> 226 <expand macro="trackname" />
227 </when> 227 </when>
228 </conditional> 228 </conditional>
229 229
230 <param name="full_gff_attribute_preservation" type="boolean" truevalue="-F" falsevalue="" check="false" 230 <param name="full_gff_attribute_preservation" type="boolean" truevalue="-F" falsevalue="" checked="false"
231 label="full GFF attribute preservation (all attributes are shown)" help="(-F)"/> 231 label="full GFF attribute preservation (all attributes are shown)" help="(-F)"/>
232 <param name="decode_url" type="boolean" truevalue="-D" falsevalue="" check="false" 232 <param name="decode_url" type="boolean" truevalue="-D" falsevalue="" checked="false"
233 label="decode url encoded characters within attributes" help="(-D)"/> 233 label="decode url encoded characters within attributes" help="(-D)"/>
234 <param name="expose" type="boolean" truevalue="-E" falsevalue="" check="false" 234 <param name="expose" type="boolean" truevalue="-E" falsevalue="" checked="false"
235 label="warn about duplicate transcript IDs and other potential problems with the given GFF/GTF records" help="(-E)"/> 235 label="warn about duplicate transcript IDs and other potential problems with the given GFF/GTF records" help="(-E)"/>
236 236
237 </inputs> 237 </inputs>
238 <outputs> 238 <outputs>
239 <data name="output_gff" format="gff3" metadata_source="input" label="${tool.name} on ${on_string}: gff3" from_work_dir="output.gff3"> 239 <data name="output_gff" format="gff3" metadata_source="input" label="${tool.name} on ${on_string}: gff3" from_work_dir="output.gff3">
257 </outputs> 257 </outputs>
258 <tests> 258 <tests>
259 <test> 259 <test>
260 <param name="input" ftype="gtf" value="Homo_sapiens.GRCh37_19.71.gtf"/> 260 <param name="input" ftype="gtf" value="Homo_sapiens.GRCh37_19.71.gtf"/>
261 <param name="gff_fmt" value="gff"/> 261 <param name="gff_fmt" value="gff"/>
262 <output name="output_gff" file="Homo_sapiens.GRCh37_19.71.gff3" ftype="gff3" /> 262 <output name="output_gff" file="Homo_sapiens.GRCh37_19.71.gff3" ftype="gff3" lines_diff="2" />
263 </test> 263 </test>
264 264
265 <test> 265 <test>
266 <param name="input" ftype="gtf" value="Homo_sapiens.GRCh37_19.71.gtf"/> 266 <param name="input" ftype="gtf" value="Homo_sapiens.GRCh37_19.71.gtf"/>
267 <param name="filtering" value="--no-pseudo"/> 267 <param name="filtering" value="--no-pseudo"/>
268 <param name="gff_fmt" value="gtf"/> 268 <param name="gff_fmt" value="gtf"/>
269 <output name="output_gtf"> 269 <output name="output_gtf">
270 <assert_contents> 270 <assert_contents>
271 <not_has_text text="pseudo" /> 271 <not_has_text text="pseudo" />
272 </assert_contents> 272 </assert_contents>
273 </output> 273 </output>
274 </test> 274 </test>
275 275
276 <test> 276 <test>
277 <param name="input" ftype="gtf" value="Homo_sapiens.GRCh37_19.71.gtf"/> 277 <param name="input" ftype="gtf" value="Homo_sapiens.GRCh37_19.71.gtf"/>
280 <param name="gff_fmt" value="gtf"/> 280 <param name="gff_fmt" value="gtf"/>
281 <output name="output_gtf"> 281 <output name="output_gtf">
282 <assert_contents> 282 <assert_contents>
283 <has_text text="ENST00000587541" /> 283 <has_text text="ENST00000587541" />
284 <has_text text="ENST00000382683" /> 284 <has_text text="ENST00000382683" />
285 </assert_contents> 285 </assert_contents>
286 </output> 286 </output>
287 </test> 287 </test>
288 288
289 <test> 289 <test>
290 <param name="input" ftype="gtf" value="Homo_sapiens.GRCh37_19.71.gtf"/> 290 <param name="input" ftype="gtf" value="Homo_sapiens.GRCh37_19.71.gtf"/>
292 <param name="range" value="19:496500..504965"/> 292 <param name="range" value="19:496500..504965"/>
293 <param name="discard_partial" value="true"/> 293 <param name="discard_partial" value="true"/>
294 <param name="gff_fmt" value="gtf"/> 294 <param name="gff_fmt" value="gtf"/>
295 <output name="output_gtf"> 295 <output name="output_gtf">
296 <assert_contents> 296 <assert_contents>
297 <has_text text="ENST00000587541" /> 297 <not_has_text text="ENST00000587541" />
298 <has_text text="ENST00000382683" /> 298 <has_text text="ENST00000382683" />
299 </assert_contents> 299 </assert_contents>
300 </output> 300 </output>
301 </test> 301 </test>
302 302
303 <test> 303 <test>
304 <param name="input" ftype="gtf" value="Homo_sapiens.GRCh37_19.71.gtf"/> 304 <param name="input" ftype="gtf" value="Homo_sapiens.GRCh37_19.71.gtf"/>
308 <param name="gff_fmt" value="gtf"/> 308 <param name="gff_fmt" value="gtf"/>
309 <output name="output_gtf"> 309 <output name="output_gtf">
310 <assert_contents> 310 <assert_contents>
311 <not_has_text text="ENST00000587541" /> 311 <not_has_text text="ENST00000587541" />
312 <has_text text="ENST00000382683" /> 312 <has_text text="ENST00000382683" />
313 </assert_contents> 313 </assert_contents>
314 </output> 314 </output>
315 </test> 315 </test>
316 316
317 <test> 317 <test>
318 <param name="input" ftype="gtf" value="Homo_sapiens.GRCh37_19.71.gtf"/> 318 <param name="input" ftype="gtf" value="Homo_sapiens.GRCh37_19.71.gtf"/>
319 <param name="source" value="history"/> 319 <param name="source" value="history"/>
320 <param name="genome_fasta" ftype="fasta" value="Homo_sapiens.GRCh37.71.dna.chromosome.19.fa"/> 320 <param name="genome_fasta" ftype="fasta" value="Homo_sapiens.GRCh37.71.dna.chromosome.19.fa"/>
321 <param name="fa_outputs" value="-w exons.f,-x cds.fa,-y pep.fa"/> 321 <param name="fa_outputs" value="-w exons.fa,-x cds.fa,-y pep.fa"/>
322 <param name="region_filter" value="filter"/> 322 <param name="region_filter" value="filter"/>
323 <param name="range" value="19:496500..504965"/> 323 <param name="range" value="19:496500..504965"/>
324 <param name="gff_fmt" value="gtf"/> 324 <param name="gff_fmt" value="gtf"/>
325 <output name="output_gtf"> 325 <output name="output_gtf">
326 <assert_contents> 326 <assert_contents>
327 <not_has_text text="ENST00000587541" /> 327 <not_has_text text="ENST00000587541" />
328 <has_text text="ENST00000382683" /> 328 <has_text text="ENST00000382683" />
329 </assert_contents> 329 </assert_contents>
330 </output> 330 </output>
331 <output name="output_exons"> 331 <output name="output_exons">
332 <assert_contents> 332 <assert_contents>
333 <has_text text="ENST00000346144 gene=MADCAM1 CDS=47-932" /> 333 <has_text text="ENST00000346144 gene=MADCAM1 CDS=47-932" />
334 <has_text text="CTATTTAAGCGGCTTCCCCGCGGCCTCGGGACAGAGGGGACTGAGCATGGATTTCGGACTGGCCCTCCTG" /> 334 <has_text text="CTATTTAAGCGGCTTCCCCGCGGCCTCGGGACAGAGGGGACTGAGCATGGATTTCGGACTGGCCCTCCTG" />
335 </assert_contents> 335 </assert_contents>
336 </output> 336 </output>
337 <output name="output_cds"> 337 <output name="output_cds">
338 <assert_contents> 338 <assert_contents>
339 <has_text text="ENST00000346144 gene=MADCAM1" /> 339 <has_text text="ENST00000346144 gene=MADCAM1" />
340 <has_text text="ATGGATTTCGGACTGGCCCTCCTGCTGGCGGGGCTTCTGGGGCTCCTCCTCGGCCAGTCCCTCCAGGTGA" /> 340 <has_text text="ATGGATTTCGGACTGGCCCTCCTGCTGGCGGGGCTTCTGGGGCTCCTCCTCGGCCAGTCCCTCCAGGTGA" />
341 </assert_contents> 341 </assert_contents>
342 </output> 342 </output>
343 <output name="output_pep"> 343 <output name="output_pep">
344 <assert_contents> 344 <assert_contents>
345 <has_text text="ENST00000346144 gene=MADCAM1" /> 345 <has_text text="ENST00000346144 gene=MADCAM1" />
346 <has_text text="MDFGLALLLAGLLGLLLGQSLQVKPLQVEPPEPVVAVALGASRQLTCRLACADRGASVQWRGLDTSLGAV" /> 346 <has_text text="MDFGLALLLAGLLGLLLGQSLQVKPLQVEPPEPVVAVALGASRQLTCRLACADRGASVQWRGLDTSLGAV" />
347 </assert_contents> 347 </assert_contents>
348 </output> 348 </output>
349 </test> 349 </test>
350 350
351 </tests> 351 </tests>
352 <help> 352 <help>
357 357
358 .. _cufflinks: http://cole-trapnell-lab.github.io/cufflinks/ 358 .. _cufflinks: http://cole-trapnell-lab.github.io/cufflinks/
359 359
360 Usage: :: 360 Usage: ::
361 361
362 gffread "input_gff" [-g "genomic_seqs_fasta" | "dir"][-s "seq_info.fsize"] 362 gffread "input_gff" [-g "genomic_seqs_fasta" | "dir"][-s "seq_info.fsize"]
363 [-o "outfile.gff"] [-t "tname"] [-r [["strand"]"chr":]"start".."end" [-R]] 363 [-o "outfile.gff"] [-t "tname"] [-r [["strand"]"chr":]"start".."end" [-R]]
364 [-CTVNJMKQAFGUBHZWTOLE] [-w "exons.fa"] [-x "cds.fa"] [-y "tr_cds.fa"] 364 [-CTVNJMKQAFGUBHZWTOLE] [-w "exons.fa"] [-x "cds.fa"] [-y "tr_cds.fa"]
365 [-i "maxintron"] 365 [-i "maxintron"]
366 366
367 Options: :: 367 Options: ::
368 368
369 -g full path to a multi-fasta file with the genomic sequences 369 -g full path to a multi-fasta file with the genomic sequences
370 for all input mappings, OR a directory with single-fasta files 370 for all input mappings, OR a directory with single-fasta files
371 (one per genomic sequence, with file names matching sequence names) 371 (one per genomic sequence, with file names matching sequence names)
374 <seq-name> <seq-length> <seq-description> 374 <seq-name> <seq-length> <seq-description>
375 (useful for -A option with mRNA/EST/protein mappings) 375 (useful for -A option with mRNA/EST/protein mappings)
376 -i discard transcripts having an intron larger than <maxintron> 376 -i discard transcripts having an intron larger than <maxintron>
377 -r only show transcripts overlapping coordinate range <start>..<end> 377 -r only show transcripts overlapping coordinate range <start>..<end>
378 (on chromosome/contig <chr>, strand <strand> if provided) 378 (on chromosome/contig <chr>, strand <strand> if provided)
379 -R for -r option, discard all transcripts that are not fully 379 -R for -r option, discard all transcripts that are not fully
380 contained within the given range 380 contained within the given range
381 -U discard single-exon transcripts 381 -U discard single-exon transcripts
382 -C coding only: discard mRNAs that have no CDS feature 382 -C coding only: discard mRNAs that have no CDS feature
383 -F full GFF attribute preservation (all attributes are shown) 383 -F full GFF attribute preservation (all attributes are shown)
384 -G only parse additional exon attributes from the first exon 384 -G only parse additional exon attributes from the first exon
385 and move them to the mRNA level (useful for GTF input) 385 and move them to the mRNA level (useful for GTF input)
386 -A use the description field from <seq_info.fsize> and add it 386 -A use the description field from <seq_info.fsize> and add it
387 as the value for a 'descr' attribute to the GFF record 387 as the value for a 'descr' attribute to the GFF record
388 388
389 -O process also non-transcript GFF records (by default non-transcript 389 -O process also non-transcript GFF records (by default non-transcript
390 records are ignored) 390 records are ignored)
391 -V discard any mRNAs with CDS having in-frame stop codons 391 -V discard any mRNAs with CDS having in-frame stop codons
392 -H for -V option, check and adjust the starting CDS phase 392 -H for -V option, check and adjust the starting CDS phase
393 if the original phase leads to a translation with an 393 if the original phase leads to a translation with an
394 in-frame stop codon 394 in-frame stop codon
395 -B for -V option, single-exon transcripts are also checked on the 395 -B for -V option, single-exon transcripts are also checked on the
396 opposite strand 396 opposite strand
397 -N discard multi-exon mRNAs that have any intron with a non-canonical 397 -N discard multi-exon mRNAs that have any intron with a non-canonical
398 splice site consensus (i.e. not GT-AG, GC-AG or AT-AC) 398 splice site consensus (i.e. not GT-AG, GC-AG or AT-AC)
399 -J discard any mRNAs that either lack initial START codon 399 -J discard any mRNAs that either lack initial START codon
400 or the terminal STOP codon, or have an in-frame stop codon 400 or the terminal STOP codon, or have an in-frame stop codon
401 (only print mRNAs with a fulll, valid CDS) 401 (only print mRNAs with a fulll, valid CDS)
402 --no-pseudo: filter out records matching the 'pseudo' keyword 402 --no-pseudo: filter out records matching the 'pseudo' keyword
403 403
404 -M/--merge : cluster the input transcripts into loci, collapsing matching 404 -M/--merge : cluster the input transcripts into loci, collapsing matching
405 transcripts (those with the same exact introns and fully contained) 405 transcripts (those with the same exact introns and fully contained)
406 -d <dupinfo> : for -M option, write collapsing info to file <dupinfo> 406 -d <dupinfo> : for -M option, write collapsing info to file <dupinfo>
407 --cluster-only: same as --merge but without collapsing matching transcripts 407 --cluster-only: same as --merge but without collapsing matching transcripts
408 -K for -M option: also collapse shorter, fully contained transcripts 408 -K for -M option: also collapse shorter, fully contained transcripts
409 with fewer introns than the container 409 with fewer introns than the container
410 -Q for -M option, remove the containment restriction: 410 -Q for -M option, remove the containment restriction:
411 (multi-exon transcripts will be collapsed if just their introns match, 411 (multi-exon transcripts will be collapsed if just their introns match,
412 while single-exon transcripts can partially overlap (80%)) 412 while single-exon transcripts can partially overlap (80%))
413 413
414 --force-exons: make sure that the lowest level GFF features are printed as 414 --force-exons: make sure that the lowest level GFF features are printed as
415 "exon" features 415 "exon" features
416 -E expose (warn about) duplicate transcript IDs and other potential 416 -E expose (warn about) duplicate transcript IDs and other potential
417 problems with the given GFF/GTF records 417 problems with the given GFF/GTF records
418 -D decode url encoded characters within attributes 418 -D decode url encoded characters within attributes
419 -Z merge close exons into a single exon (for intron size<4) 419 -Z merge close exons into a single exon (for intron size<4)
420 -w write a fasta file with spliced exons for each GFF transcript 420 -w write a fasta file with spliced exons for each GFF transcript
421 -x write a fasta file with spliced CDS for each GFF transcript 421 -x write a fasta file with spliced CDS for each GFF transcript