annotate variant_effect_predictor/Bio/Tools/GFF.pm @ 3:d30fa12e4cc5 default tip

Merge heads 2:a5976b2dce6f and 1:09613ce8151e which were created as a result of a recently fixed bug.
author devteam <devteam@galaxyproject.org>
date Mon, 13 Jan 2014 10:38:30 -0500
parents 1f6dce3d34e0
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1 # $Id: GFF.pm,v 1.26 2002/11/24 21:35:40 jason Exp $
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3 # BioPerl module for Bio::Tools::GFF
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
5 # Cared for by the Bioperl core team
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
6 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
7 # Copyright Matthew Pocock
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
8 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
9 # You may distribute this module under the same terms as perl itself
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
10
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
11 # POD documentation - main docs before the code
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
12
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
13 =head1 NAME
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
14
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
15 Bio::Tools::GFF - A Bio::SeqAnalysisParserI compliant GFF format parser
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
16
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
17 =head1 SYNOPSIS
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
18
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
19 use Bio::Tools::GFF;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
20
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
21 # specify input via -fh or -file
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
22 my $gffio = Bio::Tools::GFF->new(-fh => \*STDIN, -gff_version => 2);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
23 my $feature;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
24 # loop over the input stream
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
25 while($feature = $gffio->next_feature()) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
26 # do something with feature
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
27 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
28 $gffio->close();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
29
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
30 # you can also obtain a GFF parser as a SeqAnalasisParserI in
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
31 # HT analysis pipelines (see Bio::SeqAnalysisParserI and
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
32 # Bio::Factory::SeqAnalysisParserFactory)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
33 my $factory = Bio::Factory::SeqAnalysisParserFactory->new();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
34 my $parser = $factory->get_parser(-input => \*STDIN, -method => "gff");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
35 while($feature = $parser->next_feature()) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
36 # do something with feature
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
37 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
38
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
39 =head1 DESCRIPTION
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
40
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
41 This class provides a simple GFF parser and writer. In the sense of a
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
42 SeqAnalysisParser, it parses an input file or stream into SeqFeatureI
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
43 objects, but is not in any way specific to a particular analysis
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
44 program and the output that program produces.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
45
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
46 That is, if you can get your analysis program spit out GFF, here is
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
47 your result parser.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
48
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
49 =head1 FEEDBACK
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
50
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
51 =head2 Mailing Lists
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
52
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
53 User feedback is an integral part of the evolution of this and other
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
54 Bioperl modules. Send your comments and suggestions preferably to one
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
55 of the Bioperl mailing lists. Your participation is much appreciated.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
56
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
57 bioperl-l@bioperl.org - General discussion
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
58 http://bio.perl.org/MailList.html - About the mailing lists
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
59
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
60 =head2 Reporting Bugs
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
61
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
62 Report bugs to the Bioperl bug tracking system to help us keep track
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
63 the bugs and their resolution. Bug reports can be submitted via email
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
64 or the web:
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
65
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
66 bioperl-bugs@bio.perl.org
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
67 http://bugzilla.bioperl.org/
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
68
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
69 =head1 AUTHOR - Matthew Pocock
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
70
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
71 Email mrp@sanger.ac.uk
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
72
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
73 =head1 APPENDIX
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
74
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
75 The rest of the documentation details each of the object methods. Internal methods are usually preceded with a _
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
76
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
77 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
78
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
79 # Let the code begin...
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
80
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
81 package Bio::Tools::GFF;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
82
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
83 use vars qw(@ISA);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
84 use strict;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
85
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
86 use Bio::Root::IO;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
87 use Bio::SeqAnalysisParserI;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
88 use Bio::SeqFeature::Generic;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
89
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
90 @ISA = qw(Bio::Root::Root Bio::SeqAnalysisParserI Bio::Root::IO);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
91
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
92 =head2 new
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
93
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
94 Title : new
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
95 Usage :
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
96 Function: Creates a new instance. Recognized named parameters are -file, -fh,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
97 and -gff_version.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
98
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
99 Returns : a new object
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
100 Args : names parameters
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
101
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
102
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
103 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
104
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
105 sub new {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
106 my ($class, @args) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
107 my $self = $class->SUPER::new(@args);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
108
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
109 my ($gff_version) = $self->_rearrange([qw(GFF_VERSION)],@args);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
110
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
111 # initialize IO
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
112 $self->_initialize_io(@args);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
113
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
114 $gff_version ||= 2;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
115 if(($gff_version != 1) && ($gff_version != 2)) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
116 $self->throw("Can't build a GFF object with the unknown version ".
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
117 $gff_version);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
118 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
119 $self->gff_version($gff_version);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
120 return $self;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
121 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
122
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
123 =head2 next_feature
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
124
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
125 Title : next_feature
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
126 Usage : $seqfeature = $gffio->next_feature();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
127 Function: Returns the next feature available in the input file or stream, or
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
128 undef if there are no more features.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
129 Example :
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
130 Returns : A Bio::SeqFeatureI implementing object, or undef if there are no
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
131 more features.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
132 Args : none
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
133
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
134 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
135
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
136 sub next_feature {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
137 my ($self) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
138
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
139 my $gff_string;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
140
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
141 # be graceful about empty lines or comments, and make sure we return undef
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
142 # if the input's consumed
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
143 while(($gff_string = $self->_readline()) && defined($gff_string)) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
144 next if($gff_string =~ /^\#/ || $gff_string =~ /^\s*$/ ||
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
145 $gff_string =~ /^\/\//);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
146 last;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
147 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
148 return undef unless $gff_string;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
149
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
150 my $feat = Bio::SeqFeature::Generic->new();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
151 $self->from_gff_string($feat, $gff_string);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
152
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
153 return $feat;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
154 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
155
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
156 =head2 from_gff_string
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
157
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
158 Title : from_gff_string
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
159 Usage : $gff->from_gff_string($feature, $gff_string);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
160 Function: Sets properties of a SeqFeatureI object from a GFF-formatted
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
161 string. Interpretation of the string depends on the version
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
162 that has been specified at initialization.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
163
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
164 This method is used by next_feature(). It actually dispatches to
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
165 one of the version-specific (private) methods.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
166 Example :
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
167 Returns : void
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
168 Args : A Bio::SeqFeatureI implementing object to be initialized
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
169 The GFF-formatted string to initialize it from
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
170
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
171 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
172
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
173 sub from_gff_string {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
174 my ($self, $feat, $gff_string) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
175
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
176 if($self->gff_version() == 1) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
177 $self->_from_gff1_string($feat, $gff_string);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
178 } else {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
179 $self->_from_gff2_string($feat, $gff_string);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
180 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
181 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
182
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
183 =head2 _from_gff1_string
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
184
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
185 Title : _from_gff1_string
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
186 Usage :
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
187 Function:
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
188 Example :
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
189 Returns : void
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
190 Args : A Bio::SeqFeatureI implementing object to be initialized
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
191 The GFF-formatted string to initialize it from
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
192
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
193 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
194
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
195 sub _from_gff1_string {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
196 my ($gff, $feat, $string) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
197 chomp $string;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
198 my ($seqname, $source, $primary, $start, $end, $score, $strand, $frame, @group) = split(/\t/, $string);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
199
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
200 if ( !defined $frame ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
201 $feat->throw("[$string] does not look like GFF to me");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
202 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
203 $frame = 0 unless( $frame =~ /^\d+$/);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
204 $feat->seq_id($seqname);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
205 $feat->source_tag($source);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
206 $feat->primary_tag($primary);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
207 $feat->start($start);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
208 $feat->end($end);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
209 $feat->frame($frame);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
210 if ( $score eq '.' ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
211 #$feat->score(undef);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
212 } else {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
213 $feat->score($score);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
214 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
215 if ( $strand eq '-' ) { $feat->strand(-1); }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
216 if ( $strand eq '+' ) { $feat->strand(1); }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
217 if ( $strand eq '.' ) { $feat->strand(0); }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
218 foreach my $g ( @group ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
219 if ( $g =~ /(\S+)=(\S+)/ ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
220 my $tag = $1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
221 my $value = $2;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
222 $feat->add_tag_value($1, $2);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
223 } else {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
224 $feat->add_tag_value('group', $g);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
225 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
226 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
227 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
228
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
229 =head2 _from_gff2_string
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
230
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
231 Title : _from_gff2_string
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
232 Usage :
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
233 Function:
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
234 Example :
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
235 Returns : void
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
236 Args : A Bio::SeqFeatureI implementing object to be initialized
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
237 The GFF2-formatted string to initialize it from
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
238
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
239
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
240 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
241
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
242 sub _from_gff2_string {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
243 my ($gff, $feat, $string) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
244 chomp($string);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
245 # according to the Sanger website, GFF2 should be single-tab separated elements, and the
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
246 # free-text at the end should contain text-translated tab symbols but no "real" tabs,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
247 # so splitting on \t is safe, and $attribs gets the entire attributes field to be parsed later
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
248 my ($seqname, $source, $primary, $start, $end, $score, $strand, $frame, @attribs) = split(/\t+/, $string);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
249 my $attribs = join '', @attribs; # just in case the rule against tab characters has been broken
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
250 if ( !defined $frame ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
251 $feat->throw("[$string] does not look like GFF2 to me");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
252 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
253 $feat->seq_id($seqname);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
254 $feat->source_tag($source);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
255 $feat->primary_tag($primary);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
256 $feat->start($start);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
257 $feat->end($end);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
258 $feat->frame($frame);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
259 if ( $score eq '.' ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
260 #$feat->score(undef);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
261 } else {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
262 $feat->score($score);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
263 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
264 if ( $strand eq '-' ) { $feat->strand(-1); }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
265 if ( $strand eq '+' ) { $feat->strand(1); }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
266 if ( $strand eq '.' ) { $feat->strand(0); }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
267
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
268
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
269 # <Begin Inefficient Code from Mark Wilkinson>
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
270 # this routine is necessay to allow the presence of semicolons in
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
271 # quoted text Semicolons are the delimiting character for new
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
272 # tag/value attributes. it is more or less a "state" machine, with
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
273 # the "quoted" flag going up and down as we pass thorugh quotes to
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
274 # distinguish free-text semicolon and hash symbols from GFF control
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
275 # characters
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
276
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
277
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
278 my $flag = 0; # this could be changed to a bit and just be twiddled
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
279 my @parsed;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
280
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
281 # run through each character one at a time and check it
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
282 # NOTE: changed to foreach loop which is more efficient in perl
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
283 # --jasons
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
284
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
285 foreach my $a ( split //, $attribs ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
286 # flag up on entering quoted text, down on leaving it
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
287 if( $a eq '"') { $flag = ( $flag == 0 ) ? 1:0 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
288 elsif( $a eq ';' && $flag ) { $a = "INSERT_SEMICOLON_HERE"}
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
289 elsif( $a eq '#' && ! $flag ) { last }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
290 push @parsed, $a;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
291 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
292 $attribs = join "", @parsed; # rejoin into a single string
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
293
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
294 # <End Inefficient Code>
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
295 # Please feel free to fix this and make it more "perlish"
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
296
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
297 my @key_vals = split /;/, $attribs; # attributes are semicolon-delimited
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
298
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
299 foreach my $pair ( @key_vals ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
300 # replace semicolons that were removed from free-text above.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
301 $pair =~ s/INSERT_SEMICOLON_HERE/;/g;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
302
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
303 # separate the key from the value
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
304 my ($blank, $key, $values) = split /^\s*([\w\d]+)\s/, $pair;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
305
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
306
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
307 if( defined $values ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
308 my @values;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
309 # free text is quoted, so match each free-text block
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
310 # and remove it from the $values string
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
311 while ($values =~ s/"(.*?)"//){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
312 # and push it on to the list of values (tags may have
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
313 # more than one value... and the value may be undef)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
314 push @values, $1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
315 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
316
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
317 # and what is left over should be space-separated
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
318 # non-free-text values
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
319
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
320 my @othervals = split /\s+/, $values;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
321 foreach my $othervalue(@othervals){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
322 # get rid of any empty strings which might
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
323 # result from the split
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
324 if (CORE::length($othervalue) > 0) {push @values, $othervalue}
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
325 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
326
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
327 foreach my $value(@values){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
328 $feat->add_tag_value($key, $value);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
329 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
330 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
331 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
332 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
333
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
334 =head2 write_feature
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
335
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
336 Title : write_feature
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
337 Usage : $gffio->write_feature($feature);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
338 Function: Writes the specified SeqFeatureI object in GFF format to the stream
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
339 associated with this instance.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
340 Returns : none
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
341 Args : An array of Bio::SeqFeatureI implementing objects to be serialized
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
342
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
343 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
344
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
345 sub write_feature {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
346 my ($self, @features) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
347 foreach my $feature ( @features ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
348 $self->_print($self->gff_string($feature)."\n");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
349 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
350 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
351
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
352 =head2 gff_string
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
353
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
354 Title : gff_string
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
355 Usage : $gffstr = $gffio->gff_string($feature);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
356 Function: Obtain the GFF-formatted representation of a SeqFeatureI object.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
357 The formatting depends on the version specified at initialization.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
358
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
359 This method is used by write_feature(). It actually dispatches to
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
360 one of the version-specific (private) methods.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
361 Example :
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
362 Returns : A GFF-formatted string representation of the SeqFeature
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
363 Args : A Bio::SeqFeatureI implementing object to be GFF-stringified
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
364
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
365 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
366
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
367 sub gff_string{
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
368 my ($self, $feature) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
369
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
370 if($self->gff_version() == 1) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
371 return $self->_gff1_string($feature);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
372 } else {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
373 return $self->_gff2_string($feature);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
374 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
375 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
376
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
377 =head2 _gff1_string
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
378
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
379 Title : _gff1_string
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
380 Usage : $gffstr = $gffio->_gff1_string
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
381 Function:
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
382 Example :
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
383 Returns : A GFF1-formatted string representation of the SeqFeature
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
384 Args : A Bio::SeqFeatureI implementing object to be GFF-stringified
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
385
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
386 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
387
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
388 sub _gff1_string{
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
389 my ($gff, $feat) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
390 my ($str,$score,$frame,$name,$strand);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
391
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
392 if( $feat->can('score') ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
393 $score = $feat->score();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
394 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
395 $score = '.' unless defined $score;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
396
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
397 if( $feat->can('frame') ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
398 $frame = $feat->frame();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
399 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
400 $frame = '.' unless defined $frame;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
401
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
402 $strand = $feat->strand();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
403 if(! $strand) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
404 $strand = ".";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
405 } elsif( $strand == 1 ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
406 $strand = '+';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
407 } elsif ( $feat->strand == -1 ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
408 $strand = '-';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
409 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
410
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
411 if( $feat->can('seqname') ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
412 $name = $feat->seq_id();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
413 $name ||= 'SEQ';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
414 } else {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
415 $name = 'SEQ';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
416 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
417
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
418
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
419 $str = join("\t",
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
420 $name,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
421 $feat->source_tag(),
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
422 $feat->primary_tag(),
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
423 $feat->start(),
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
424 $feat->end(),
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
425 $score,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
426 $strand,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
427 $frame);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
428
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
429 foreach my $tag ( $feat->all_tags ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
430 foreach my $value ( $feat->each_tag_value($tag) ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
431 $str .= " $tag=$value";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
432 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
433 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
434
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
435
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
436 return $str;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
437 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
438
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
439 =head2 _gff2_string
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
440
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
441 Title : _gff2_string
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
442 Usage : $gffstr = $gffio->_gff2_string
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
443 Function:
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
444 Example :
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
445 Returns : A GFF2-formatted string representation of the SeqFeature
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
446 Args : A Bio::SeqFeatureI implementing object to be GFF2-stringified
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
447
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
448 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
449
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
450 sub _gff2_string{
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
451 my ($gff, $feat) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
452 my ($str,$score,$frame,$name,$strand);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
453
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
454 if( $feat->can('score') ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
455 $score = $feat->score();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
456 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
457 $score = '.' unless defined $score;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
458
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
459 if( $feat->can('frame') ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
460 $frame = $feat->frame();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
461 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
462 $frame = '.' unless defined $frame;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
463
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
464 $strand = $feat->strand();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
465 if(! $strand) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
466 $strand = ".";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
467 } elsif( $strand == 1 ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
468 $strand = '+';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
469 } elsif ( $feat->strand == -1 ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
470 $strand = '-';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
471 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
472
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
473 if( $feat->can('seqname') ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
474 $name = $feat->seq_id();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
475 $name ||= 'SEQ';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
476 } else {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
477 $name = 'SEQ';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
478 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
479 $str = join("\t",
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
480 $name,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
481 $feat->source_tag(),
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
482 $feat->primary_tag(),
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
483 $feat->start(),
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
484 $feat->end(),
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
485 $score,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
486 $strand,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
487 $frame);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
488
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
489 # the routine below is the only modification I made to the original
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
490 # ->gff_string routine (above) as on November 17th, 2000, the
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
491 # Sanger webpage describing GFF2 format reads: "From version 2
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
492 # onwards, the attribute field must have a tag value structure
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
493 # following the syntax used within objects in a .ace file,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
494 # flattened onto one line by semicolon separators. Tags must be
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
495 # standard identifiers ([A-Za-z][A-Za-z0-9_]*). Free text values
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
496 # must be quoted with double quotes".
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
497
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
498 # MW
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
499
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
500 my $valuestr;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
501 my @all_tags = $feat->all_tags;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
502 if (@all_tags) { # only play this game if it is worth playing...
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
503 $str .= "\t"; # my interpretation of the GFF2
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
504 # specification suggests the need
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
505 # for this additional TAB character...??
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
506 foreach my $tag ( @all_tags ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
507 my $valuestr; # a string which will hold one or more values
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
508 # for this tag, with quoted free text and
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
509 # space-separated individual values.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
510 foreach my $value ( $feat->each_tag_value($tag) ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
511 if ($value =~ /[^A-Za-z0-9_]/){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
512 $value =~ s/\t/\\t/g; # substitute tab and newline
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
513 # characters
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
514 $value =~ s/\n/\\n/g; # to their UNIX equivalents
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
515 $value = '"' . $value . '" '} # if the value contains
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
516 # anything other than valid
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
517 # tag/value characters, then
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
518 # quote it
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
519 $value = "\"\"" unless defined $value;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
520 # if it is completely empty,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
521 # then just make empty double
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
522 # quotes
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
523 $valuestr .= $value . " "; # with a trailing space in case
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
524 # there are multiple values
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
525 # for this tag (allowed in GFF2 and .ace format)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
526 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
527 $str .= "$tag $valuestr ; "; # semicolon delimited with no '=' sign
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
528 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
529 chop $str; chop $str # remove the trailing semicolon and space
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
530 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
531 return $str;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
532 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
533
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
534 =head2 gff_version
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
535
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
536 Title : _gff_version
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
537 Usage : $gffversion = $gffio->gff_version
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
538 Function:
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
539 Example :
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
540 Returns : The GFF version this parser will accept and emit.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
541 Args : none
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
542
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
543 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
544
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
545 sub gff_version {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
546 my ($self, $value) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
547 if(defined $value && (($value == 1) || ($value == 2))) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
548 $self->{'GFF_VERSION'} = $value;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
549 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
550 return $self->{'GFF_VERSION'};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
551 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
552
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
553 # Make filehandles
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
554
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
555 =head2 newFh
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
556
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
557 Title : newFh
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
558 Usage : $fh = Bio::Tools::GFF->newFh(-file=>$filename,-format=>'Format')
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
559 Function: does a new() followed by an fh()
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
560 Example : $fh = Bio::Tools::GFF->newFh(-file=>$filename,-format=>'Format')
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
561 $feature = <$fh>; # read a feature object
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
562 print $fh $feature ; # write a feature object
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
563 Returns : filehandle tied to the Bio::Tools::GFF class
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
564 Args :
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
565
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
566 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
567
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
568 sub newFh {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
569 my $class = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
570 return unless my $self = $class->new(@_);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
571 return $self->fh;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
572 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
573
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
574 =head2 fh
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
575
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
576 Title : fh
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
577 Usage : $obj->fh
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
578 Function:
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
579 Example : $fh = $obj->fh; # make a tied filehandle
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
580 $feature = <$fh>; # read a feature object
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
581 print $fh $feature; # write a feature object
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
582 Returns : filehandle tied to Bio::Tools::GFF class
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
583 Args : none
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
584
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
585 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
586
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
587
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
588 sub fh {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
589 my $self = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
590 my $class = ref($self) || $self;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
591 my $s = Symbol::gensym;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
592 tie $$s,$class,$self;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
593 return $s;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
594 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
595
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
596 sub DESTROY {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
597 my $self = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
598
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
599 $self->close();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
600 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
601
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
602 sub TIEHANDLE {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
603 my ($class,$val) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
604 return bless {'gffio' => $val}, $class;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
605 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
606
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
607 sub READLINE {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
608 my $self = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
609 return $self->{'gffio'}->next_feature() unless wantarray;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
610 my (@list, $obj);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
611 push @list, $obj while $obj = $self->{'gffio'}->next_feature();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
612 return @list;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
613 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
614
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
615 sub PRINT {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
616 my $self = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
617 $self->{'gffio'}->write_feature(@_);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
618 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
619
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
620 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
621