changeset 7:9c298cab341d draft default tip

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gffread commit f40643d8b80299ebb84faebe92579321ac459746"
author iuc
date Sat, 25 Sep 2021 15:38:01 +0000
parents bba49324f2fa
children
files gffread.xml test-data/Homo_sapiens.GRCh37_19.71.bed test-data/Homo_sapiens.GRCh37_19.71.gff3 test-data/ecoli-k12.processed.gff3 test-data/stop_codons.gtf
diffstat 5 files changed, 251 insertions(+), 96 deletions(-) [+]
line wrap: on
line diff
--- a/gffread.xml	Tue Aug 31 08:29:57 2021 +0000
+++ b/gffread.xml	Sat Sep 25 15:38:01 2021 +0000
@@ -1,16 +1,21 @@
-<tool id="gffread" name="gffread" version="@VERSION@.0">
+<tool id="gffread" name="gffread" version="@GALAXY_TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="20.05">
     <description>Filters and/or converts GFF3/GTF2 records</description>
     <xrefs>
         <xref type="bio.tools">gffread</xref>
     </xrefs>
     <macros>
-        <token name="@VERSION@">0.11.6</token>
+        <!-- the version of this tool must not be lowered since in the past 2.x was used
+            lets use small increments and hope that gffread catches up one day -->
+        <token name="@GALAXY_TOOL_VERSION@">2.2.1.3</token>
+        <token name="@TOOL_VERSION@">0.12.7</token>
+        <token name="@VERSION_SUFFIX@">0</token>
         <xml name="fasta_output_select">
             <param name="fa_outputs" type="select" display="checkboxes" multiple="true" label="Select fasta outputs">
-                <option value="-w exons.fa">fasta file with spliced exons for each GFF transcript (-w exons.fa)</option>
-                <option value="-x cds.fa">fasta file with spliced CDS for each GFF transcript (-x cds.fa)</option>
-                <option value="-y pep.fa">protein fasta file with the translation of CDS for each record (-y pep.fa)</option>
+                <option value="-w exons.fa">fasta file with spliced exons for each GFF transcript (-w)</option>
+                <option value="-x cds.fa">fasta file with spliced CDS for each GFF transcript (-x)</option>
+                <option value="-y pep.fa">protein fasta file with the translation of CDS for each record (-y)</option>
                 <option value="-W">for each fasta: record the exon coordinates projected onto the spliced sequence (-W)</option>
+                <option value="-S">for protein fasta: use '*' instead of '.' as stop codon translation (-S)</option>
             </param>
         </xml>
         <xml name="ref_filtering_select">
@@ -25,14 +30,14 @@
             </param>
         </xml>
         <xml name="trackname">
-            <param name="tname" type="text" value="" optional="true" label="Trackname to use in the second column of each GFF output line" help="(-t track_name}">
+            <param argument="-t" name="tname" type="text" value="" optional="true" label="Trackname to use in the second column of each GFF output line" help="">
                 <validator type="regex">\w+</validator>
             </param>
         </xml>
         <xml name="merge_opts">
              <option value="-K">also collapse shorter, fully contained transcripts with fewer introns than the container (-K)</option>
              <option value="-Q">remove the containment restriction: multi-exon transcripts will be collapsed if just their introns match, while single-exon transcripts can partially overlap 80% (-Q)</option>
-             <option value="-d dupinfo">output collapsing info (-d dupinfo)</option>
+             <option value="-d dupinfo">output collapsing info (-d)</option>
         </xml>
         <xml name="cluster_opts">
              <option value="--force-exons"> make sure that the lowest level GFF features are printed as 'exon' features (--force-exons)</option>
@@ -51,14 +56,19 @@
         </xml>
     </macros>
     <requirements>
-        <requirement type="package" version="@VERSION@">gffread</requirement>
+        <requirement type="package" version="@TOOL_VERSION@">gffread</requirement>
     </requirements>
+    <version_command>gffread --version</version_command>
     <command detect_errors="aggressive">
 <![CDATA[
     #if $reference_genome.source == 'history':
         ln -s '$reference_genome.genome_fasta' genomeref.fa &&
     #end if
+
     gffread '$input'
+    #if $input.ext.startswith("bed")
+        --in-bed
+    #end if
     #if $reference_genome.source == 'cached':
         -g '${reference_genome.fasta_indexes.fields.path}'
         #if $reference_genome.ref_filtering and str($reference_genome.ref_filtering) != '':
@@ -107,22 +117,68 @@
         #end if
     #end if
     #if $gffs.gff_fmt != 'none':
-        #if $gffs.tname:
+        #if $gffs.gff_fmt != 'bed' and $gffs.tname:
             -t '$gffs.tname'
         #end if
         #if $gffs.gff_fmt == 'gff':
+            ## TODO bug 'gft' -> 'gtf'
             #if $input.datatype.file_ext == 'gft':
                 $gffs.ensembl
             #end if
-            $gffs.output_cmd
-        #elif $gffs.gff_fmt == 'gtf':
-            $gffs.output_cmd
+        #end if
+        #if $gffs.gff_fmt == 'gtf'
+            -T
+        #elif $gffs.gff_fmt == 'bed'
+            --bed
         #end if
+        -o output.$gffs.gff_fmt
     #end if
+
+## Missing options
+##
+## --ids
+## --nids
+## -l 
+## --jmatch
+## --nc
+## --ignore-locus
+## -A -s (see above)
+##  --sort-alpha : chromosomes (reference sequences) are sorted alphabetically
+##  --sort-by : sort the reference sequences by the order in which their
+##       names are given in the <refseq.lst> file
+## Misc      
+## --keep-exon-attrs : for -F option, do not attempt to reduce redundant
+## --attrs
+##      --keep-genes : in transcript-only mode (default), also preserve gene records
+##      --keep-comments: for GFF3 input/output, try to preserve comments
+## -B (see above)
+## -P
+##      --add-hasCDS : add a "hasCDS" attribute with value "true" for transcripts
+##           that have CDS features
+##      --adj-stop stop codon adjustment: enables -P and performs automatic
+##           adjustment of the CDS stop coordinate if premature or downstream
+
+##  --in-tlf: input GFF-like one-line-per-transcript format without exon/CDS
+##            features (see --tlf option below); automatic if the input
+##            filename ends with .tlf)
+##  --stream: fast processing of input GFF/BED transcripts as they are received
+##            ((no sorting, exons must be grouped by transcript in the input data)
+
+## Clustering
+
+## -Y
+
+## Output 
+
+## --gene2exon
+## --t-adopt
+## -j
+## --w-add
+## --w-nocds
 ]]>
     </command>
     <inputs>
-        <param name="input" type="data" format="gff3,gtf" label="Input GFF3 or GTF feature file"/>
+        <param name="input" type="data" format="bed,gff3,gtf" label="Input BED, GFF3 or GTF feature file"/>
         <!-- filtering -->
         <param name="filtering" type="select" display="checkboxes" multiple="true" label="filters">
             <option value="-U">discard single-exon transcripts (-U)</option>
@@ -138,9 +194,9 @@
             </param>
             <when value="none"/>
             <when value="filter">
-                <param name="range" type="text" value="" label="Only show transcripts overlapping coordinate range">
+                <param argument="-r" name="range" type="text" value="" label="Only show transcripts overlapping coordinate range">
                     <help><![CDATA[
-                    (-r [['strand']'chr':]'start'..'end') <br>
+                    [['strand']'chr':]'start'..'end' <br>
                     examples: <br>
                     1000..500000 <br>
                     chr1:1000..500000 <br>
@@ -150,14 +206,14 @@
                     </help>
                     <validator type="regex">(([+-])?(\w+:))?\d+\.\.\d+</validator>
                 </param>
-                <param name="discard_partial" type="boolean" truevalue="-R" falsevalue="" checked="false"
-                       label="Discard all transcripts that are not fully contained within the given range" help="(-R)"/>
+                <param argument="-R" name="discard_partial" type="boolean" truevalue="-R" falsevalue="" checked="false"
+                       label="Discard all transcripts that are not fully contained within the given range" help=""/>
             </when>
         </conditional>
-        <param name="maxintron" type="integer" value="" optional="true" min="0" label="Filter out transcipts with large introns"
-               help="If set, discard transcripts having an intron larger (-i max_intron)"/>
-        <param name="chr_replace" type="data" format="tabular" optional="true" label="Replace reference sequence names" >
-            <help><![CDATA[(-m chr_replace) <br>
+        <param argument="-i" name="maxintron" type="integer" value="" optional="true" min="0" label="Filter out transcipts with large introns"
+               help="If set, discard transcripts having an intron larger"/>
+        <param argument="-m" name="chr_replace" type="data" format="tabular" optional="true" label="Replace reference sequence names" >
+            <help><![CDATA[
                 chr_replace is a reference sequence replacement table consisting of 2 columns: "original_ref_ID"  "new_ref_ID"<br>
                 It is useful for switching between Ensembl and UCSC naming conventions <br>
                 NOTE: GFF records on reference sequences that are not found among the "original_ref_ID" entries in this file will be filtered out
@@ -177,10 +233,10 @@
 
         <!-- merging -->
         <conditional name="merging">
-            <param name="merge_sel" type="select" label="Transcript merging" help="(-M/--merge or --cluster-only)">
+            <param name="merge_sel" type="select" label="Transcript merging" help="">
                 <option value="none">none</option>
-                <option value="merge">merge: cluster the input transcripts into loci, collapsing matching transcripts</option>
-                <option value="cluster">cluster-only: merge but without collapsing matching transcripts</option>
+                <option value="merge">merge: cluster the input transcripts into loci, collapsing matching transcripts (--merge)</option>
+                <option value="cluster">cluster-only: merge but without collapsing matching transcripts (--cluster-only)</option>
             </param>
             <when value="none"/>
             <when value="merge">
@@ -195,7 +251,7 @@
         <!-- reference sequence file -->
         <!-- Error: -g option is required for options -w, -x, -y, -V, -N, -M -->
         <conditional name="reference_genome">
-            <param name="source" type="select" label="Reference Genome" help="(-g genome.fasta) NOTE: Required for fasta outputs">
+            <param name="source" type="select" label="Reference Genome" help="NOTE: Required for fasta outputs">
                 <option value="none">none</option>
                 <option value="cached"></option>
                 <option value="history">From your history</option>
@@ -203,14 +259,14 @@
             <when value="none">
             </when>
             <when value="cached">
-                <param name="fasta_indexes" type="select" label="Source FASTA Sequence">
+                <param argument="-g" name="fasta_indexes" type="select" label="Source FASTA Sequence">
                     <options from_data_table="all_fasta"/>
                 </param>
                 <expand macro="ref_filtering_select" />
                 <expand macro="fasta_output_select" />
             </when>
             <when value="history">
-                <param name="genome_fasta" type="data" format="fasta" label="Genome Reference Fasta"/>
+                <param argument="-g" name="genome_fasta" type="data" format="fasta" label="Genome Reference Fasta"/>
                 <expand macro="ref_filtering_select" />
                 <expand macro="fasta_output_select" />
             </when>
@@ -222,35 +278,39 @@
                 <option value="none">none</option>
                 <option value="gff">GFF</option>
                 <option value="gtf">GTF</option>
+                <option value="bed">BED</option>
             </param>
             <when value="none">
             </when>
             <when value="gff">
-                <param name="output_cmd" type="hidden" value="-o output.gff3"/>
-                <param name="ensembl" type="boolean" truevalue="-L" falsevalue="" checked="false" label="Ensembl GTF to GFF3 conversion" help="(-L)"/>
+                <param argument="-L" name="ensembl" type="boolean" truevalue="-L" falsevalue="" checked="false" label="Ensembl GTF to GFF3 conversion" help=""/>
                 <expand macro="trackname" />
             </when>
             <when value="gtf">
-                <param name="output_cmd" type="hidden" value="-T -o output.gtf"/>
                 <expand macro="trackname" />
             </when>
+            <when value="bed">
+            </when>
         </conditional>
 
-        <param name="full_gff_attribute_preservation" type="boolean" truevalue="-F" falsevalue="" checked="false"
-                       label="full GFF attribute preservation (all attributes are shown)" help="(-F)"/>
-        <param name="decode_url" type="boolean" truevalue="-D" falsevalue="" checked="false"
-                       label="decode url encoded characters within attributes" help="(-D)"/>
-        <param name="expose" type="boolean" truevalue="-E" falsevalue="" checked="false"
-                       label="warn about duplicate transcript IDs and other potential problems with the given GFF/GTF records" help="(-E)"/>
+        <param argument="-F" name="full_gff_attribute_preservation" type="boolean" truevalue="-F" falsevalue="" checked="false"
+                       label="full GFF attribute preservation (all attributes are shown)" help=""/>
+        <param argument="-D" name="decode_url" type="boolean" truevalue="-D" falsevalue="" checked="false"
+                       label="decode url encoded characters within attributes" help=""/>
+        <param argument="-E" name="expose" type="boolean" truevalue="-E" falsevalue="" checked="false"
+                       label="warn about duplicate transcript IDs and other potential problems with the given GFF/GTF records" help=""/>
 
     </inputs>
     <outputs>
-        <data name="output_gff" format="gff3" metadata_source="input" label="${tool.name} on ${on_string}: gff3" from_work_dir="output.gff3">
+        <data name="output_gff" format="gff3" metadata_source="input" label="${tool.name} on ${on_string}: gff3" from_work_dir="output.gff">
             <filter>gffs['gff_fmt'] == 'gff'</filter>
         </data>
         <data name="output_gtf" format="gtf" metadata_source="input" label="${tool.name} on ${on_string}: gtf" from_work_dir="output.gtf">
             <filter>gffs['gff_fmt'] == 'gtf'</filter>
         </data>
+        <data name="output_bed" format="bed" metadata_source="input" label="${tool.name} on ${on_string}: bed" from_work_dir="output.bed">
+            <filter>gffs['gff_fmt'] == 'bed'</filter>
+        </data>
         <data name="output_exons" format="fasta" label="${tool.name} on ${on_string}: exons.fa" from_work_dir="exons.fa">
             <filter>'fa_outputs' in reference_genome and str(reference_genome['fa_outputs']).find('exons.fa') > 0 </filter>
         </data>
@@ -265,28 +325,48 @@
         </data>
     </outputs>
     <tests>
-        <test>
+        <test expect_num_outputs="1">
             <param name="input" ftype="gtf" value="Homo_sapiens.GRCh37_19.71.gtf"/>
             <param name="gff_fmt" value="gff"/>
-            <output name="output_gff" file="Homo_sapiens.GRCh37_19.71.gff3" ftype="gff3" lines_diff="2" />
+            <output name="output_gff" file="Homo_sapiens.GRCh37_19.71.gff3" ftype="gff3" lines_diff="4" />
         </test>
-        <test>
+        <test expect_num_outputs="1">
+            <param name="input" ftype="gtf" value="Homo_sapiens.GRCh37_19.71.gtf"/>
+            <param name="gff_fmt" value="gff"/>
+            <output name="output_gff" file="Homo_sapiens.GRCh37_19.71.gff3" ftype="gff3" lines_diff="4" />
+        </test>
+        <test expect_num_outputs="1">
             <param name="input" ftype="gtf" value="ecoli-k12.gff3"/>
             <param name="gff_fmt" value="gff"/>
             <param name="full_gff_attribute_preservation" value="-F"/>
-            <output name="output_gff" file="ecoli-k12.processed.gff3" ftype="gff3" lines_diff="2" />
+            <output name="output_gff" file="ecoli-k12.processed.gff3" ftype="gff3" lines_diff="4" />
         </test>
-        <test>
-            <param name="input" ftype="gtf" value="Homo_sapiens.GRCh37_19.71.gtf"/>
-            <param name="filtering" value="--no-pseudo"/>
-            <param name="gff_fmt" value="gtf"/>
-            <output name="output_gtf">
+        <!-- bed output -->
+        <test expect_num_outputs="1">
+            <param name="input" ftype="gff3" value="Homo_sapiens.GRCh37_19.71.gff3"/>
+            <param name="gff_fmt" value="bed"/>
+            <output name="output_bed" ftype="bed">
                 <assert_contents>
-                    <not_has_text text="pseudo" />
+                    <has_n_lines n="42"/>
+                    <has_n_columns n="13"/>
                 </assert_contents>
             </output>
         </test>
-        <test>
+        <!-- bed input and test tname -->
+        <test expect_num_outputs="1">
+            <param name="input" ftype="bed" value="Homo_sapiens.GRCh37_19.71.bed"/>
+            <param name="gff_fmt" value="gff"/>
+            <param name="tname" value="track name"/>
+            <output name="output_bed" ftype="gff3">
+                <assert_contents>
+                    <has_n_lines n="388"/>
+                    <!-- this will work with https://github.com/galaxyproject/galaxy/pull/12528 -->
+                    <!-- <has_n_columns n="9" comment="#"/> -->
+                    <has_text text="track name"/>
+                </assert_contents>
+            </output>
+        </test>
+        <test expect_num_outputs="1">
             <param name="input" ftype="gtf" value="Homo_sapiens.GRCh37_19.71.gtf"/>
             <param name="region_filter" value="filter"/>
             <param name="range" value="19:496500..504965"/>
@@ -298,7 +378,7 @@
                 </assert_contents>
             </output>
         </test>
-        <test>
+        <test expect_num_outputs="1">
             <param name="input" ftype="gtf" value="Homo_sapiens.GRCh37_19.71.gtf"/>
             <param name="region_filter" value="filter"/>
             <param name="range" value="19:496500..504965"/>
@@ -311,7 +391,7 @@
                 </assert_contents>
             </output>
         </test>
-        <test>
+        <test expect_num_outputs="1">
             <param name="input" ftype="gtf" value="Homo_sapiens.GRCh37_19.71.gtf"/>
             <param name="filtering" value="-C"/>
             <param name="region_filter" value="filter"/>
@@ -324,7 +404,7 @@
                 </assert_contents>
             </output>
         </test>
-        <test>
+        <test expect_num_outputs="4">
             <param name="input" ftype="gtf" value="Homo_sapiens.GRCh37_19.71.gtf"/>
             <param name="source" value="history"/>
             <param name="genome_fasta" ftype="fasta" value="Homo_sapiens.GRCh37.71.dna.chromosome.19.fa"/>
@@ -357,7 +437,18 @@
                 </assert_contents>
             </output>
         </test>
-
+        <test expect_num_outputs="1">
+            <param name="input" ftype="gtf" value="stop_codons.gtf"/>
+            <param name="source" value="history"/>
+            <param name="genome_fasta" ftype="fasta" value="Homo_sapiens.GRCh37.71.dna.chromosome.19.fa"/>
+            <param name="fa_outputs" value="-y pep.fa,-S"/>
+            <output name="output_pep">
+                <assert_contents>
+                    <has_text text="ENST00000269812" />
+                    <has_text text="PLRGLHPRV*LQTPLERCPCWPPAGGTGGCPHCLLHLRLLQSPTPTALSEGGGAGTEAQPVTDVDPGRG*" />
+                </assert_contents>
+            </output>
+        </test>
     </tests>
     <help>
 <![CDATA[
@@ -367,30 +458,32 @@
 
 .. _stringtie: http://ccb.jhu.edu/software/stringtie/gff.shtml#gffread
 
-
-gffread v0.11.4. Usage: ::
+gffread v0.12.7. Usage: ::
 
-    gffread <input_gff> [-g <genomic_seqs_fasta> | <dir>][-s <seq_info.fsize>] 
-     [-o <outfile>] [-t <trackname>] [-r [[<strand>]<chr>:]<start>..<end> [-R]]
+    gffread [-g <genomic_seqs_fasta> | <dir>] [-s <seq_info.fsize>] 
+     [-o <outfile>] [-t <trackname>] [-r [<strand>]<chr>:<start>-<end> [-R]]
+     [--jmatch <chr>:<start>-<end>] [--no-pseudo] 
      [-CTVNJMKQAFPGUBHZWTOLE] [-w <exons.fa>] [-x <cds.fa>] [-y <tr_cds.fa>]
-     [-i <maxintron>] [--bed] [--table <attrlist>] [--sort-by <refseq_list.txt>]
-     
+     [-j ][--ids <IDs.lst> | --nids <IDs.lst>] [--attrs <attr-list>] [-i <maxintron>]
+     [--stream] [--bed | --gtf | --tlf] [--table <attrlist>] [--sort-by <ref.lst>]
+     [<input_gff>] 
+    
      Filter, convert or cluster GFF/GTF/BED records, extract the sequence of
      transcripts (exon or CDS) and more.
      By default (i.e. without -O) only transcripts are processed, discarding any
      other non-transcript features. Default output is a simplified GFF3 with only
      the basic attributes.
      
-     <input_gff> is a GFF file, use '-' for stdin
- 
     Options:
-
+     --ids discard records/transcripts if their IDs are not listed in <IDs.lst>
+     --nids discard records/transcripts if their IDs are listed in <IDs.lst>
      -i   discard transcripts having an intron larger than <maxintron>
      -l   discard transcripts shorter than <minlen> bases
      -r   only show transcripts overlapping coordinate range <start>..<end>
           (on chromosome/contig <chr>, strand <strand> if provided)
      -R   for -r option, discard all transcripts that are not fully 
           contained within the given range
+     --jmatch only output transcripts matching the given junction
      -U   discard single-exon transcripts
      -C   coding only: discard mRNAs that have no CDS features
      --nc non-coding only: discard mRNAs that have CDS features
@@ -401,18 +494,18 @@
           for each of the mapped sequences:
           <seq-name> <seq-length> <seq-description>
           (useful for -A option with mRNA/EST/protein mappings)
-      
-     Sorting: (by default, chromosomes are kept in the order they were found)
+    Sorting: (by default, chromosomes are kept in the order they were found)
      --sort-alpha : chromosomes (reference sequences) are sorted alphabetically
      --sort-by : sort the reference sequences by the order in which their
           names are given in the <refseq.lst> file
-              
     Misc options: 
-     -F   preserve all GFF attributes (for non-exon features)
+     -F   keep all GFF attributes (for non-exon features)
      --keep-exon-attrs : for -F option, do not attempt to reduce redundant
           exon/CDS attributes
      -G   do not keep exon attributes, move them to the transcript feature
           (for GFF3 output)
+     --attrs <attr-list> only output the GTF/GFF attributes listed in <attr-list>
+        which is a comma delimited list of attribute names to
      --keep-genes : in transcript-only mode (default), also preserve gene records
      --keep-comments: for GFF3 input/output, try to preserve comments
      -O   process other non-transcript GFF records (by default non-transcript
@@ -440,10 +533,11 @@
      --in-tlf: input GFF-like one-line-per-transcript format without exon/CDS
                features (see --tlf option below); automatic if the input
                filename ends with .tlf)
-               
+     --stream: fast processing of input GFF/BED transcripts as they are received
+               ((no sorting, exons must be grouped by transcript in the input data)
     Clustering:
      -M/--merge : cluster the input transcripts into loci, discarding
-          "duplicated" transcripts (those with the same exact introns
+          "redundant" transcripts (those with the same exact introns
           and fully contained or equal boundaries)
      -d <dupinfo> : for -M option, write duplication info to file <dupinfo>
      --cluster-only: same as -M/--merge but without discarding any of the
@@ -455,7 +549,6 @@
           multi-exon transcripts, and >=80% overlap for single-exon transcripts
      -Y   for -M option, enforce -Q but also discard overlapping single-exon 
           transcripts, even on the opposite strand (can be combined with -K)
-          
     Output options:
      --force-exons: make sure that the lowest level GFF features are considered
            "exon" features
@@ -468,25 +561,26 @@
      -g   full path to a multi-fasta file with the genomic sequences
           for all input mappings, OR a directory with single-fasta files
           (one per genomic sequence, with file names matching sequence names)
-     -w    write a fasta file with spliced exons for each GFF transcript
+     -j    output the junctions and the corresponding transcripts
+     -w    write a fasta file with spliced exons for each transcript
+     --w-add <N> for the -w option, extract additional <N> bases
+           both upstream and downstream of the transcript boundaries
+     --w-nocds for -w, disable the output of CDS info in the FASTA file
      -x    write a fasta file with spliced CDS for each GFF transcript
      -y    write a protein fasta file with the translation of CDS for each record
-     -W    for -w and -x options, write in the FASTA defline the exon
+     -W    for -w, -x and -y options, write in the FASTA defline all the exon
            coordinates projected onto the spliced sequence;
-           for -y option, write transcript attributes in the FASTA defline
      -S    for -y option, use '*' instead of '.' as stop codon translation
-     -L    Ensembl GTF to GFF3 conversion (implies -F; should be used with -m)
+     -L    Ensembl GTF to GFF3 conversion, adds version to IDs
      -m    <chr_replace> is a name mapping table for converting reference 
            sequence names, having this 2-column format:
            <original_ref_ID> <new_ref_ID>
-           WARNING: all GFF records on reference sequences whose original IDs
-           are not found in the 1st column of this table will be discarded!
      -t    use <trackname> in the 2nd column of each GFF/GTF output line
-     -o    write the records into <outfile> instead of stdout
+     -o    write the output records into <outfile> instead of stdout
      -T    main output will be GTF instead of GFF3
      --bed output records in BED format instead of default GFF3
      --tlf output "transcript line format" which is like GFF
-           but exons, CDS features and related data are stored as GFF 
+           but with exons and CDS related features stored as GFF 
            attributes in the transcript feature line, like this:
              exoncount=N;exons=<exons>;CDSphase=<N>;CDS=<CDScoords> 
            <exons> is a comma-delimited list of exon_start-exon_end coordinates;
@@ -494,9 +588,14 @@
      --table output a simple tab delimited format instead of GFF, with columns
            having the values of GFF attributes given in <attrlist>; special
            pseudo-attributes (prefixed by @) are recognized:
-           @chr, @start, @end, @strand, @numexons, @exons, @cds, @covlen, @cdslen
+           @id, @geneid, @chr, @start, @end, @strand, @numexons, @exons, 
+           @cds, @covlen, @cdslen
+           If any of -w/-y/-x FASTA output files are enabled, the same fields
+           (excluding @id) are appended to the definition line of corresponding
+           FASTA records
      -v,-E expose (warn about) duplicate transcript IDs and other potential
            problems with the given GFF/GTF records
+
 ]]>
     </help>
     <citations>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/Homo_sapiens.GRCh37_19.71.bed	Sat Sep 25 15:38:01 2021 +0000
@@ -0,0 +1,42 @@
+19	223157	223261	ENST00000410397	100	-	223157	223261	0,0,0	1	104,	0,	geneID=ENSG00000222329;gene_name=U6
+19	229639	230165	ENST00000587910	100	-	229639	230165	0,0,0	2	70,82,	0,444,	geneID=ENSG00000267600;gene_name=AC098474.1
+19	239144	239247	ENST00000588755	100	-	239144	239247	0,0,0	1	103,	0,	geneID=ENSG00000267305;gene_name=CTD-3113P16.7
+19	279494	280170	ENST00000589981	100	+	279494	280170	0,0,0	1	676,	0,	geneID=ENSG00000267447;gene_name=VN2R11P
+19	281042	291386	ENST00000269812	100	-	281387	291336	0,0,0	6	495,177,58,278,152,102,	0,1091,1709,6431,6977,10242,	CDS=281387:291336;CDSphase=0;geneID=ENSG00000141934;gene_name=PPAP2C
+19	281344	291393	ENST00000434325	100	-	281387	288055	0,0,0	6	193,177,58,278,152,68,	0,789,1407,6129,6675,9981,	CDS=281387:288055;CDSphase=0;geneID=ENSG00000141934;gene_name=PPAP2C
+19	281387	291200	ENST00000327790	100	-	281387	291066	0,0,0	6	150,177,58,278,152,249,	0,746,1364,6086,6632,9564,	CDS=281387:291066;CDSphase=0;geneID=ENSG00000141934;gene_name=PPAP2C
+19	281990	287636	ENST00000586998	100	-	282121	287636	2,0,0	3	320,58,163,	0,761,5483,	CDS=282121:287636;CDSphase=2;geneID=ENSG00000141934;gene_name=PPAP2C
+19	287160	288530	ENST00000589672	100	-	287160	288530	0,0,0	2	591,511,	0,859,	geneID=ENSG00000141934;gene_name=PPAP2C
+19	287473	291382	ENST00000591572	100	-	287473	291336	0,0,0	3	278,170,98,	0,546,3811,	CDS=287473:291336;CDSphase=0;geneID=ENSG00000141934;gene_name=PPAP2C
+19	305572	306467	ENST00000591533	100	+	305572	306467	0,0,0	2	131,411,	0,484,	geneID=ENSG00000267124;gene_name=CTD-3113P16.5
+19	305574	344793	ENST00000264819	100	-	306689	344782	0,0,0	14	1137,418,89,125,95,82,152,70,92,124,126,143,91,20,	0,1544,3002,3226,6270,6616,7917,20060,20932,21558,22289,28825,30508,39199,	CDS=306689:344782;CDSphase=0;geneID=ENSG00000105556;gene_name=MIER2
+19	305578	325706	ENST00000589092	100	+	305578	325706	0,0,0	2	356,83,	0,20045,	geneID=ENSG00000267124;gene_name=CTD-3113P16.5
+19	326606	336178	ENST00000586994	100	-	326606	336178	0,0,0	4	650,126,143,96,	0,1257,7793,9476,	geneID=ENSG00000105556;gene_name=MIER2
+19	327863	340599	ENST00000592722	100	-	327863	340599	0,0,0	5	126,117,143,91,86,	0,2400,6536,8219,12650,	geneID=ENSG00000105556;gene_name=MIER2
+19	334114	344798	ENST00000587966	100	-	334114	344798	0,0,0	2	428,25,	0,10659,	geneID=ENSG00000105556;gene_name=MIER2
+19	361749	376013	ENST00000342640	100	-	362199	375970	0,0,0	8	677,160,118,70,62,72,123,351,	0,5315,9455,10881,11720,12190,12549,13913,	CDS=362199:375970;CDSphase=0;geneID=ENSG00000105549;gene_name=THEG
+19	362057	374620	ENST00000530711	100	-	362057	374620	0,0,0	3	369,160,322,	0,5007,12241,	geneID=ENSG00000105549;gene_name=THEG
+19	362199	375970	ENST00000346878	100	-	362199	375970	0,0,0	7	227,160,118,70,62,123,308,	0,4865,9005,10431,11270,12099,13463,	CDS=362199:375970;CDSphase=0;geneID=ENSG00000105549;gene_name=THEG
+19	367201	374249	ENST00000528213	100	-	367201	374249	0,0,0	5	23,118,70,62,310,	0,4003,5429,6268,6738,	geneID=ENSG00000105549;gene_name=THEG
+19	397588	398941	ENST00000591757	100	+	397588	398941	0,0,0	2	45,252,	0,1101,	geneID=ENSG00000267443;gene_name=AC010641.1
+19	405444	409139	ENST00000332235	100	-	407095	408361	0,0,0	2	2957,134,	0,3561,	CDS=407095:408361;CDSphase=0;geneID=ENSG00000183186;gene_name=C2CD4C
+19	416582	419879	ENST00000587423	100	-	416582	419879	0,0,0	2	740,957,	0,2340,	geneID=ENSG00000129946;gene_name=SHC2
+19	416582	422828	ENST00000588376	100	-	416582	422828	0,0,0	3	740,134,683,	0,2340,5563,	geneID=ENSG00000129946;gene_name=SHC2
+19	416592	460996	ENST00000264554	100	-	418927	460996	0,0,0	13	730,134,311,135,64,157,127,52,54,120,61,71,468,	0,2330,5553,8504,14091,18116,19572,19787,20037,22125,22377,24269,43936,	CDS=418927:460996;CDSphase=0;geneID=ENSG00000129946;gene_name=SHC2
+19	416608	441384	ENST00000589922	100	-	416608	441384	0,0,0	11	714,134,311,135,64,157,127,304,120,61,523,	0,2314,5537,8488,14075,18100,19556,19771,22109,22361,24253,	geneID=ENSG00000129946;gene_name=SHC2
+19	417199	436258	ENST00000590170	100	-	434761	436258	0,0,0	6	123,134,234,64,157,94,	0,1723,4946,13484,17509,18965,	CDS=434761:436258;CDSphase=0;geneID=ENSG00000129946;gene_name=SHC2
+19	417199	436258	ENST00000591948	100	-	417199	436258	0,0,0	6	123,134,234,64,157,94,	0,1723,4946,13484,17509,18965,	geneID=ENSG00000129946;gene_name=SHC2
+19	434701	460571	ENST00000590222	100	-	439397	460571	1,0,0	9	164,127,52,54,120,61,259,71,43,	0,1463,1678,1928,4016,4268,4531,6160,25827,	CDS=439397:460571;CDSphase=1;geneID=ENSG00000129946;gene_name=SHC2
+19	435770	436534	ENST00000591388	100	-	435770	436534	0,0,0	3	191,127,155,	0,394,609,	geneID=ENSG00000129946;gene_name=SHC2
+19	435778	439031	ENST00000590113	100	-	435778	439031	0,0,0	6	183,127,52,54,120,62,	0,386,601,851,2939,3191,	geneID=ENSG00000129946;gene_name=SHC2
+19	453133	453245	ENST00000516730	100	+	453133	453245	0,0,0	1	112,	0,	geneID=ENSG00000252539;gene_name=RNA5SP462
+19	463345	474983	ENST00000315489	100	-	463843	474747	0,0,0	4	1019,114,108,363,	0,4303,9048,11275,	CDS=463843:474747;CDSphase=0;geneID=ENSG00000181781;gene_name=ODF3L2
+19	463466	474880	ENST00000382696	100	-	463843	474747	0,0,0	3	898,114,260,	0,4182,11154,	CDS=463843:474747;CDSphase=0;geneID=ENSG00000181781;gene_name=ODF3L2
+19	464145	472631	ENST00000591681	100	-	464145	472631	0,0,0	3	219,114,238,	0,3503,8248,	geneID=ENSG00000181781;gene_name=ODF3L2
+19	489175	505342	ENST00000587541	100	+	489175	505342	0,0,0	3	864,261,598,	0,12493,15569,	geneID=ENSG00000099866;gene_name=MADCAM1
+19	490045	507813	ENST00000592413	100	-	490045	507813	0,0,0	3	308,84,438,	0,11495,17330,	geneID=ENSG00000266933;gene_name=AC005775.2
+19	496453	505207	ENST00000346144	100	+	496499	504965	0,0,0	4	98,285,330,463,	0,1379,2042,8291,	CDS=496499:504965;CDSphase=0;geneID=ENSG00000099866;gene_name=MADCAM1
+19	496453	505347	ENST00000215637	100	+	496499	504965	0,0,0	5	98,285,330,261,603,	0,1379,2042,5215,8291,	CDS=496499:504965;CDSphase=0;geneID=ENSG00000099866;gene_name=MADCAM1
+19	496499	504965	ENST00000382683	100	+	496499	504965	0,0,0	3	52,330,221,	0,1996,8245,	CDS=496499:504965;CDSphase=0;geneID=ENSG00000099866;gene_name=MADCAM1
+19	507298	519654	ENST00000359315	100	+	507506	519423	0,0,0	2	546,766,	0,11590,	CDS=507506:519423;CDSphase=0;geneID=ENSG00000141933;gene_name=TPGS1
+19	507499	510372	ENST00000588278	100	+	507499	510372	0,0,0	1	2873,	0,	geneID=ENSG00000141933;gene_name=TPGS1
--- a/test-data/Homo_sapiens.GRCh37_19.71.gff3	Tue Aug 31 08:29:57 2021 +0000
+++ b/test-data/Homo_sapiens.GRCh37_19.71.gff3	Sat Sep 25 15:38:01 2021 +0000
@@ -1,6 +1,6 @@
-# gffread /tmp/tmpq6d_yfqc/files/9/2/2/dataset_922cd54b-d77c-48fb-abf7-6fc8d8fdb97c.dat -o output.gff3
-# gffread v0.11.6
 ##gff-version 3
+# gffread v0.12.7
+# gffread /tmp/tmpk_iy6dhb/files/e/1/9/dataset_e191f2e3-7ad2-452e-b21c-edd22b6ba6e2.dat -o output.gff
 19	snRNA	transcript	223158	223261	.	-	.	ID=ENST00000410397;geneID=ENSG00000222329;gene_name=U6
 19	snRNA	exon	223158	223261	.	-	.	Parent=ENST00000410397
 19	unprocessed_pseudogene	transcript	229640	230165	.	-	.	ID=ENST00000587910;geneID=ENSG00000267600;gene_name=AC098474.1
--- a/test-data/ecoli-k12.processed.gff3	Tue Aug 31 08:29:57 2021 +0000
+++ b/test-data/ecoli-k12.processed.gff3	Sat Sep 25 15:38:01 2021 +0000
@@ -1,33 +1,33 @@
-# gffread /tmp/tmpq6d_yfqc/files/2/7/7/dataset_277f6e18-b25a-4b59-b712-49b5c202a183.dat -F -o output.gff3
-# gffread v0.11.6
 ##gff-version 3
-NC_000913.3	RefSeq	gene	190	255	.	+	.	ID=gene-b0001;geneID=gene-b0001;gene_name=thrL;Dbxref=ASAP:ABE-0000006,ECOCYC:EG11277,EcoGene:EG11277,GeneID:944742;Name=thrL;gbkey=Gene;gene=thrL;gene_biotype=protein_coding;gene_synonym=ECK0001;locus_tag=b0001
+# gffread v0.12.7
+# gffread /tmp/tmpk_iy6dhb/files/7/c/b/dataset_7cbb521e-a7fc-4b92-8335-006b4f916f5c.dat -F -o output.gff
+NC_000913.3	RefSeq	gene	190	255	.	+	.	ID=gene-b0001;gene_name=thrL;Dbxref=ASAP:ABE-0000006,ECOCYC:EG11277,EcoGene:EG11277,GeneID:944742;Name=thrL;gbkey=Gene;gene=thrL;gene_biotype=protein_coding;gene_synonym=ECK0001;locus_tag=b0001
 NC_000913.3	RefSeq	CDS	190	255	.	+	0	Parent=gene-b0001;Dbxref=UniProtKB/Swiss-Prot:P0AD86,Genbank:NP_414542.1,ASAP:ABE-0000006,ECOCYC:EG11277,EcoGene:EG11277,GeneID:944742;Name=NP_414542.1;gbkey=CDS;gene=thrL;locus_tag=b0001;orig_transcript_id=gnl|b0001|mrna.b0001;product=thr operon leader peptide;protein_id=NP_414542.1;transl_table=11
-NC_000913.3	RefSeq	gene	337	2799	.	+	.	ID=gene-b0002;geneID=gene-b0002;gene_name=thrA;Dbxref=ASAP:ABE-0000008,ECOCYC:EG10998,EcoGene:EG10998,GeneID:945803;Name=thrA;gbkey=Gene;gene=thrA;gene_biotype=protein_coding;gene_synonym=ECK0002,Hs,thrA1,thrA2,thrD;locus_tag=b0002
+NC_000913.3	RefSeq	gene	337	2799	.	+	.	ID=gene-b0002;gene_name=thrA;Dbxref=ASAP:ABE-0000008,ECOCYC:EG10998,EcoGene:EG10998,GeneID:945803;Name=thrA;gbkey=Gene;gene=thrA;gene_biotype=protein_coding;gene_synonym=ECK0002,Hs,thrA1,thrA2,thrD;locus_tag=b0002
 NC_000913.3	RefSeq	CDS	337	2799	.	+	0	Parent=gene-b0002;Dbxref=UniProtKB/Swiss-Prot:P00561,Genbank:NP_414543.1,ASAP:ABE-0000008,ECOCYC:EG10998,EcoGene:EG10998,GeneID:945803;Name=NP_414543.1;gbkey=CDS;gene=thrA;locus_tag=b0002;orig_transcript_id=gnl|b0002|mrna.b0002;product=fused aspartate kinase/homoserine dehydrogenase 1;protein_id=NP_414543.1;transl_table=11
-NC_000913.3	RefSeq	gene	2801	3733	.	+	.	ID=gene-b0003;geneID=gene-b0003;gene_name=thrB;Dbxref=ASAP:ABE-0000010,ECOCYC:EG10999,EcoGene:EG10999,GeneID:947498;Name=thrB;gbkey=Gene;gene=thrB;gene_biotype=protein_coding;gene_synonym=ECK0003;locus_tag=b0003
+NC_000913.3	RefSeq	gene	2801	3733	.	+	.	ID=gene-b0003;gene_name=thrB;Dbxref=ASAP:ABE-0000010,ECOCYC:EG10999,EcoGene:EG10999,GeneID:947498;Name=thrB;gbkey=Gene;gene=thrB;gene_biotype=protein_coding;gene_synonym=ECK0003;locus_tag=b0003
 NC_000913.3	RefSeq	CDS	2801	3733	.	+	0	Parent=gene-b0003;Dbxref=UniProtKB/Swiss-Prot:P00547,Genbank:NP_414544.1,ASAP:ABE-0000010,ECOCYC:EG10999,EcoGene:EG10999,GeneID:947498;Name=NP_414544.1;gbkey=CDS;gene=thrB;locus_tag=b0003;orig_transcript_id=gnl|b0003|mrna.b0003;product=homoserine kinase;protein_id=NP_414544.1;transl_table=11
-NC_000913.3	RefSeq	gene	3734	5020	.	+	.	ID=gene-b0004;geneID=gene-b0004;gene_name=thrC;Dbxref=ASAP:ABE-0000012,ECOCYC:EG11000,EcoGene:EG11000,GeneID:945198;Name=thrC;gbkey=Gene;gene=thrC;gene_biotype=protein_coding;gene_synonym=ECK0004;locus_tag=b0004
+NC_000913.3	RefSeq	gene	3734	5020	.	+	.	ID=gene-b0004;gene_name=thrC;Dbxref=ASAP:ABE-0000012,ECOCYC:EG11000,EcoGene:EG11000,GeneID:945198;Name=thrC;gbkey=Gene;gene=thrC;gene_biotype=protein_coding;gene_synonym=ECK0004;locus_tag=b0004
 NC_000913.3	RefSeq	CDS	3734	5020	.	+	0	Parent=gene-b0004;Dbxref=UniProtKB/Swiss-Prot:P00934,Genbank:NP_414545.1,ASAP:ABE-0000012,ECOCYC:EG11000,EcoGene:EG11000,GeneID:945198;Name=NP_414545.1;gbkey=CDS;gene=thrC;locus_tag=b0004;orig_transcript_id=gnl|b0004|mrna.b0004;product=threonine synthase;protein_id=NP_414545.1;transl_table=11
-NC_000913.3	RefSeq	gene	5234	5530	.	+	.	ID=gene-b0005;geneID=gene-b0005;gene_name=yaaX;Dbxref=ASAP:ABE-0000015,ECOCYC:G6081,EcoGene:EG14384,GeneID:944747;Name=yaaX;gbkey=Gene;gene=yaaX;gene_biotype=protein_coding;gene_synonym=ECK0005;locus_tag=b0005
+NC_000913.3	RefSeq	gene	5234	5530	.	+	.	ID=gene-b0005;gene_name=yaaX;Dbxref=ASAP:ABE-0000015,ECOCYC:G6081,EcoGene:EG14384,GeneID:944747;Name=yaaX;gbkey=Gene;gene=yaaX;gene_biotype=protein_coding;gene_synonym=ECK0005;locus_tag=b0005
 NC_000913.3	RefSeq	CDS	5234	5530	.	+	0	Parent=gene-b0005;Dbxref=UniProtKB/Swiss-Prot:P75616,Genbank:NP_414546.1,ASAP:ABE-0000015,ECOCYC:G6081,EcoGene:EG14384,GeneID:944747;Name=NP_414546.1;gbkey=CDS;gene=yaaX;locus_tag=b0005;orig_transcript_id=gnl|b0005|mrna.b0005;product=DUF2502 domain-containing protein YaaX;protein_id=NP_414546.1;transl_table=11
-NC_000913.3	RefSeq	gene	5683	6459	.	-	.	ID=gene-b0006;geneID=gene-b0006;gene_name=yaaA;Dbxref=ASAP:ABE-0000018,ECOCYC:EG10011,EcoGene:EG10011,GeneID:944749;Name=yaaA;gbkey=Gene;gene=yaaA;gene_biotype=protein_coding;gene_synonym=ECK0006;locus_tag=b0006
+NC_000913.3	RefSeq	gene	5683	6459	.	-	.	ID=gene-b0006;gene_name=yaaA;Dbxref=ASAP:ABE-0000018,ECOCYC:EG10011,EcoGene:EG10011,GeneID:944749;Name=yaaA;gbkey=Gene;gene=yaaA;gene_biotype=protein_coding;gene_synonym=ECK0006;locus_tag=b0006
 NC_000913.3	RefSeq	CDS	5683	6459	.	-	0	Parent=gene-b0006;Dbxref=UniProtKB/Swiss-Prot:P0A8I3,Genbank:NP_414547.1,ASAP:ABE-0000018,ECOCYC:EG10011,EcoGene:EG10011,GeneID:944749;Name=NP_414547.1;gbkey=CDS;gene=yaaA;locus_tag=b0006;orig_transcript_id=gnl|b0006|mrna.b0006;product=peroxide stress resistance protein YaaA;protein_id=NP_414547.1;transl_table=11
-NC_000913.3	RefSeq	gene	6529	7959	.	-	.	ID=gene-b0007;geneID=gene-b0007;gene_name=yaaJ;Dbxref=ASAP:ABE-0000020,ECOCYC:EG11555,EcoGene:EG11555,GeneID:944745;Name=yaaJ;gbkey=Gene;gene=yaaJ;gene_biotype=protein_coding;gene_synonym=ECK0007;locus_tag=b0007
+NC_000913.3	RefSeq	gene	6529	7959	.	-	.	ID=gene-b0007;gene_name=yaaJ;Dbxref=ASAP:ABE-0000020,ECOCYC:EG11555,EcoGene:EG11555,GeneID:944745;Name=yaaJ;gbkey=Gene;gene=yaaJ;gene_biotype=protein_coding;gene_synonym=ECK0007;locus_tag=b0007
 NC_000913.3	RefSeq	CDS	6529	7959	.	-	0	Parent=gene-b0007;Dbxref=UniProtKB/Swiss-Prot:P30143,Genbank:NP_414548.1,ASAP:ABE-0000020,ECOCYC:EG11555,EcoGene:EG11555,GeneID:944745;Name=NP_414548.1;gbkey=CDS;gene=yaaJ;locus_tag=b0007;orig_transcript_id=gnl|b0007|mrna.b0007;product=putative transporter YaaJ;protein_id=NP_414548.1;transl_table=11
-NC_000913.3	RefSeq	gene	8238	9191	.	+	.	ID=gene-b0008;geneID=gene-b0008;gene_name=talB;Dbxref=ASAP:ABE-0000027,ECOCYC:EG11556,EcoGene:EG11556,GeneID:944748;Name=talB;gbkey=Gene;gene=talB;gene_biotype=protein_coding;gene_synonym=ECK0008,yaaK;locus_tag=b0008
+NC_000913.3	RefSeq	gene	8238	9191	.	+	.	ID=gene-b0008;gene_name=talB;Dbxref=ASAP:ABE-0000027,ECOCYC:EG11556,EcoGene:EG11556,GeneID:944748;Name=talB;gbkey=Gene;gene=talB;gene_biotype=protein_coding;gene_synonym=ECK0008,yaaK;locus_tag=b0008
 NC_000913.3	RefSeq	CDS	8238	9191	.	+	0	Parent=gene-b0008;Dbxref=UniProtKB/Swiss-Prot:P0A870,Genbank:NP_414549.1,ASAP:ABE-0000027,ECOCYC:EG11556,EcoGene:EG11556,GeneID:944748;Name=NP_414549.1;gbkey=CDS;gene=talB;locus_tag=b0008;orig_transcript_id=gnl|b0008|mrna.b0008;product=transaldolase B;protein_id=NP_414549.1;transl_table=11
-NC_000913.3	RefSeq	gene	9306	9893	.	+	.	ID=gene-b0009;geneID=gene-b0009;gene_name=mog;Dbxref=ASAP:ABE-0000030,ECOCYC:EG11511,EcoGene:EG11511,GeneID:944760;Name=mog;gbkey=Gene;gene=mog;gene_biotype=protein_coding;gene_synonym=bisD,chlG,ECK0009,mogA,yaaG;locus_tag=b0009
+NC_000913.3	RefSeq	gene	9306	9893	.	+	.	ID=gene-b0009;gene_name=mog;Dbxref=ASAP:ABE-0000030,ECOCYC:EG11511,EcoGene:EG11511,GeneID:944760;Name=mog;gbkey=Gene;gene=mog;gene_biotype=protein_coding;gene_synonym=bisD,chlG,ECK0009,mogA,yaaG;locus_tag=b0009
 NC_000913.3	RefSeq	CDS	9306	9893	.	+	0	Parent=gene-b0009;Dbxref=UniProtKB/Swiss-Prot:P0AF03,Genbank:NP_414550.1,ASAP:ABE-0000030,ECOCYC:EG11511,EcoGene:EG11511,GeneID:944760;Name=NP_414550.1;gbkey=CDS;gene=mog;locus_tag=b0009;orig_transcript_id=gnl|b0009|mrna.b0009;product=molybdopterin adenylyltransferase;protein_id=NP_414550.1;transl_table=11
-NC_000913.3	RefSeq	gene	9928	10494	.	-	.	ID=gene-b0010;geneID=gene-b0010;gene_name=satP;Dbxref=ASAP:ABE-0000032,ECOCYC:EG11512,EcoGene:EG11512,GeneID:944792;Name=satP;gbkey=Gene;gene=satP;gene_biotype=protein_coding;gene_synonym=ECK0010,yaaH;locus_tag=b0010
+NC_000913.3	RefSeq	gene	9928	10494	.	-	.	ID=gene-b0010;gene_name=satP;Dbxref=ASAP:ABE-0000032,ECOCYC:EG11512,EcoGene:EG11512,GeneID:944792;Name=satP;gbkey=Gene;gene=satP;gene_biotype=protein_coding;gene_synonym=ECK0010,yaaH;locus_tag=b0010
 NC_000913.3	RefSeq	CDS	9928	10494	.	-	0	Parent=gene-b0010;Dbxref=UniProtKB/Swiss-Prot:P0AC98,Genbank:NP_414551.1,ASAP:ABE-0000032,ECOCYC:EG11512,EcoGene:EG11512,GeneID:944792;Name=NP_414551.1;gbkey=CDS;gene=satP;locus_tag=b0010;orig_transcript_id=gnl|b0010|mrna.b0010;product=acetate/succinate:H(+) symporter;protein_id=NP_414551.1;transl_table=11
-NC_000913.3	RefSeq	gene	10643	11356	.	-	.	ID=gene-b0011;geneID=gene-b0011;gene_name=yaaW;Dbxref=ASAP:ABE-0000037,ECOCYC:G6082,EcoGene:EG14340,GeneID:944771;Name=yaaW;gbkey=Gene;gene=yaaW;gene_biotype=protein_coding;gene_synonym=ECK0011;locus_tag=b0011
+NC_000913.3	RefSeq	gene	10643	11356	.	-	.	ID=gene-b0011;gene_name=yaaW;Dbxref=ASAP:ABE-0000037,ECOCYC:G6082,EcoGene:EG14340,GeneID:944771;Name=yaaW;gbkey=Gene;gene=yaaW;gene_biotype=protein_coding;gene_synonym=ECK0011;locus_tag=b0011
 NC_000913.3	RefSeq	CDS	10643	11356	.	-	0	Parent=gene-b0011;Dbxref=UniProtKB/Swiss-Prot:P75617,Genbank:NP_414552.1,ASAP:ABE-0000037,ECOCYC:G6082,EcoGene:EG14340,GeneID:944771;Name=NP_414552.1;gbkey=CDS;gene=yaaW;locus_tag=b0011;orig_transcript_id=gnl|b0011|mrna.b0011;product=putative enzyme-specific chaperone YaaW;protein_id=NP_414552.1;transl_table=11
-NC_000913.3	RefSeq	gene	10830	11315	.	+	.	ID=gene-b0012;geneID=gene-b0012;gene_name=mbiA;Dbxref=ASAP:ABE-0000040,ECOCYC:EG11509,EcoGene:EG11509,GeneID:948295;Name=mbiA;gbkey=Gene;gene=mbiA;gene_biotype=protein_coding;gene_synonym=ECK0012,htgA,htpY;locus_tag=b0012
+NC_000913.3	RefSeq	gene	10830	11315	.	+	.	ID=gene-b0012;gene_name=mbiA;Dbxref=ASAP:ABE-0000040,ECOCYC:EG11509,EcoGene:EG11509,GeneID:948295;Name=mbiA;gbkey=Gene;gene=mbiA;gene_biotype=protein_coding;gene_synonym=ECK0012,htgA,htpY;locus_tag=b0012
 NC_000913.3	RefSeq	CDS	10830	11315	.	+	0	Parent=gene-b0012;Dbxref=UniProtKB/Swiss-Prot:P28697,Genbank:YP_009518733.1,ASAP:ABE-0000040,ECOCYC:EG11509,EcoGene:EG11509,GeneID:948295;Name=YP_009518733.1;gbkey=CDS;gene=mbiA;locus_tag=b0012;orig_transcript_id=gnl|b0012|mrna.CDS13;product=uncharacterized protein MbiA;protein_id=YP_009518733.1;transl_table=11
-NC_000913.3	RefSeq	gene	11382	11786	.	-	.	ID=gene-b0013;geneID=gene-b0013;gene_name=yaaI;Dbxref=ASAP:ABE-0000043,ECOCYC:G8202,EcoGene:EG11513,GeneID:944751;Name=yaaI;gbkey=Gene;gene=yaaI;gene_biotype=protein_coding;gene_synonym=ECK0013;locus_tag=b0013
+NC_000913.3	RefSeq	gene	11382	11786	.	-	.	ID=gene-b0013;gene_name=yaaI;Dbxref=ASAP:ABE-0000043,ECOCYC:G8202,EcoGene:EG11513,GeneID:944751;Name=yaaI;gbkey=Gene;gene=yaaI;gene_biotype=protein_coding;gene_synonym=ECK0013;locus_tag=b0013
 NC_000913.3	RefSeq	CDS	11382	11786	.	-	0	Parent=gene-b0013;Dbxref=UniProtKB/Swiss-Prot:P28696,Genbank:NP_414554.1,ASAP:ABE-0000043,ECOCYC:G8202,EcoGene:EG11513,GeneID:944751;Name=NP_414554.1;gbkey=CDS;gene=yaaI;locus_tag=b0013;orig_transcript_id=gnl|b0013|mrna.b0013;product=DUF2541 domain-containing protein YaaI;protein_id=NP_414554.1;transl_table=11
-NC_000913.3	RefSeq	gene	12163	14079	.	+	.	ID=gene-b0014;geneID=gene-b0014;gene_name=dnaK;Dbxref=ASAP:ABE-0000052,ECOCYC:EG10241,EcoGene:EG10241,GeneID:944750;Name=dnaK;gbkey=Gene;gene=dnaK;gene_biotype=protein_coding;gene_synonym=ECK0014,groPAB,groPC,groPF,grpC,grpF,seg;locus_tag=b0014
+NC_000913.3	RefSeq	gene	12163	14079	.	+	.	ID=gene-b0014;gene_name=dnaK;Dbxref=ASAP:ABE-0000052,ECOCYC:EG10241,EcoGene:EG10241,GeneID:944750;Name=dnaK;gbkey=Gene;gene=dnaK;gene_biotype=protein_coding;gene_synonym=ECK0014,groPAB,groPC,groPF,grpC,grpF,seg;locus_tag=b0014
 NC_000913.3	RefSeq	CDS	12163	14079	.	+	0	Parent=gene-b0014;Dbxref=UniProtKB/Swiss-Prot:P0A6Y8,Genbank:NP_414555.1,ASAP:ABE-0000052,ECOCYC:EG10241,EcoGene:EG10241,GeneID:944750;Name=NP_414555.1;gbkey=CDS;gene=dnaK;locus_tag=b0014;orig_transcript_id=gnl|b0014|mrna.b0014;product=chaperone protein DnaK;protein_id=NP_414555.1;transl_table=11
-NC_000913.3	RefSeq	gene	14168	15298	.	+	.	ID=gene-b0015;geneID=gene-b0015;gene_name=dnaJ;Dbxref=ASAP:ABE-0000054,ECOCYC:EG10240,EcoGene:EG10240,GeneID:944753;Name=dnaJ;gbkey=Gene;gene=dnaJ;gene_biotype=protein_coding;gene_synonym=ECK0015,groP,grpC;locus_tag=b0015
+NC_000913.3	RefSeq	gene	14168	15298	.	+	.	ID=gene-b0015;gene_name=dnaJ;Dbxref=ASAP:ABE-0000054,ECOCYC:EG10240,EcoGene:EG10240,GeneID:944753;Name=dnaJ;gbkey=Gene;gene=dnaJ;gene_biotype=protein_coding;gene_synonym=ECK0015,groP,grpC;locus_tag=b0015
 NC_000913.3	RefSeq	CDS	14168	15298	.	+	0	Parent=gene-b0015;Dbxref=UniProtKB/Swiss-Prot:P08622,Genbank:NP_414556.1,ASAP:ABE-0000054,ECOCYC:EG10240,EcoGene:EG10240,GeneID:944753;Name=NP_414556.1;gbkey=CDS;gene=dnaJ;locus_tag=b0015;orig_transcript_id=gnl|b0015|mrna.b0015;product=chaperone protein DnaJ;protein_id=NP_414556.1;transl_table=11
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/stop_codons.gtf	Sat Sep 25 15:38:01 2021 +0000
@@ -0,0 +1,14 @@
+19	protein_coding	exon	291275	291386	.	-	.	 gene_id "ENSG00000141934"; transcript_id "ENST00000269812"; exon_number "1"; gene_name "PPAP2C"; gene_biotype "protein_coding"; transcript_name "PPAP2C-001"; exon_id "ENSE00001234447";
+19	protein_coding	CDS	291275	291336	.	-	0	 gene_id "ENSG00000141934"; transcript_id "ENST00000269812"; exon_number "1"; gene_name "PPAP2C"; gene_biotype "protein_coding"; transcript_name "PPAP2C-001"; protein_id "ENSP00000269812";
+19	protein_coding	start_codon	291334	291336	.	-	0	 gene_id "ENSG00000141934"; transcript_id "ENST00000269812"; exon_number "1"; gene_name "PPAP2C"; gene_biotype "protein_coding"; transcript_name "PPAP2C-001";
+19	protein_coding	exon	288020	288171	.	-	.	 gene_id "ENSG00000141934"; transcript_id "ENST00000269812"; exon_number "2"; gene_name "PPAP2C"; gene_biotype "protein_coding"; transcript_name "PPAP2C-001"; exon_id "ENSE00003304149";
+19	protein_coding	CDS	288020	288171	.	-	2	 gene_id "ENSG00000141934"; transcript_id "ENST00000269812"; exon_number "2"; gene_name "PPAP2C"; gene_biotype "protein_coding"; transcript_name "PPAP2C-001"; protein_id "ENSP00000269812";
+19	protein_coding	exon	287474	287751	.	-	.	 gene_id "ENSG00000141934"; transcript_id "ENST00000269812"; exon_number "3"; gene_name "PPAP2C"; gene_biotype "protein_coding"; transcript_name "PPAP2C-001"; exon_id "ENSE00003352024";
+19	protein_coding	CDS	287474	287751	.	-	0	 gene_id "ENSG00000141934"; transcript_id "ENST00000269812"; exon_number "3"; gene_name "PPAP2C"; gene_biotype "protein_coding"; transcript_name "PPAP2C-001"; protein_id "ENSP00000269812";
+19	protein_coding	exon	282752	282809	.	-	.	 gene_id "ENSG00000141934"; transcript_id "ENST00000269812"; exon_number "4"; gene_name "PPAP2C"; gene_biotype "protein_coding"; transcript_name "PPAP2C-001"; exon_id "ENSE00000951309";
+19	protein_coding	CDS	282752	282809	.	-	1	 gene_id "ENSG00000141934"; transcript_id "ENST00000269812"; exon_number "4"; gene_name "PPAP2C"; gene_biotype "protein_coding"; transcript_name "PPAP2C-001"; protein_id "ENSP00000269812";
+19	protein_coding	exon	282134	282310	.	-	.	 gene_id "ENSG00000141934"; transcript_id "ENST00000269812"; exon_number "5"; gene_name "PPAP2C"; gene_biotype "protein_coding"; transcript_name "PPAP2C-001"; exon_id "ENSE00000951310";
+19	protein_coding	CDS	282134	282310	.	-	0	 gene_id "ENSG00000141934"; transcript_id "ENST00000269812"; exon_number "5"; gene_name "PPAP2C"; gene_biotype "protein_coding"; transcript_name "PPAP2C-001"; protein_id "ENSP00000269812";
+19	protein_coding	exon	281043	281537	.	-	.	 gene_id "ENSG00000141934"; transcript_id "ENST00000269812"; exon_number "6"; gene_name "PPAP2C"; gene_biotype "protein_coding"; transcript_name "PPAP2C-001"; exon_id "ENSE00000951311";
+19	protein_coding	CDS	281391	281537	.	-	0	 gene_id "ENSG00000141934"; transcript_id "ENST00000269812"; exon_number "6"; gene_name "PPAP2C"; gene_biotype "protein_coding"; transcript_name "PPAP2C-001"; protein_id "ENSP00000269812";
+19	protein_coding	stop_codon	281388	281390	.	-	0	 gene_id "ENSG00000141934"; transcript_id "ENST00000269812"; exon_number "6"; gene_name "PPAP2C"; gene_biotype "protein_coding"; transcript_name "PPAP2C-001";