annotate variant_effect_predictor/Bio/Tools/GFF.pm @ 0:2bc9b66ada89 draft default tip

Uploaded
author mahtabm
date Thu, 11 Apr 2013 06:29:17 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1 # $Id: GFF.pm,v 1.26 2002/11/24 21:35:40 jason Exp $
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2 #
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3 # BioPerl module for Bio::Tools::GFF
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
4 #
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
5 # Cared for by the Bioperl core team
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
6 #
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
7 # Copyright Matthew Pocock
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
8 #
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
9 # You may distribute this module under the same terms as perl itself
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
10
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
11 # POD documentation - main docs before the code
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
12
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
13 =head1 NAME
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
14
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
15 Bio::Tools::GFF - A Bio::SeqAnalysisParserI compliant GFF format parser
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
16
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
17 =head1 SYNOPSIS
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
18
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
19 use Bio::Tools::GFF;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
20
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
21 # specify input via -fh or -file
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
22 my $gffio = Bio::Tools::GFF->new(-fh => \*STDIN, -gff_version => 2);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
23 my $feature;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
24 # loop over the input stream
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
25 while($feature = $gffio->next_feature()) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
26 # do something with feature
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
27 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
28 $gffio->close();
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
29
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
30 # you can also obtain a GFF parser as a SeqAnalasisParserI in
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
31 # HT analysis pipelines (see Bio::SeqAnalysisParserI and
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
32 # Bio::Factory::SeqAnalysisParserFactory)
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
33 my $factory = Bio::Factory::SeqAnalysisParserFactory->new();
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
34 my $parser = $factory->get_parser(-input => \*STDIN, -method => "gff");
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
35 while($feature = $parser->next_feature()) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
36 # do something with feature
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
37 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
38
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
39 =head1 DESCRIPTION
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
40
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
41 This class provides a simple GFF parser and writer. In the sense of a
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
42 SeqAnalysisParser, it parses an input file or stream into SeqFeatureI
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
43 objects, but is not in any way specific to a particular analysis
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
44 program and the output that program produces.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
45
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
46 That is, if you can get your analysis program spit out GFF, here is
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
47 your result parser.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
48
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
49 =head1 FEEDBACK
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
50
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
51 =head2 Mailing Lists
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
52
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
53 User feedback is an integral part of the evolution of this and other
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
54 Bioperl modules. Send your comments and suggestions preferably to one
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
55 of the Bioperl mailing lists. Your participation is much appreciated.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
56
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
57 bioperl-l@bioperl.org - General discussion
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
58 http://bio.perl.org/MailList.html - About the mailing lists
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
59
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
60 =head2 Reporting Bugs
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
61
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
62 Report bugs to the Bioperl bug tracking system to help us keep track
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
63 the bugs and their resolution. Bug reports can be submitted via email
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
64 or the web:
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
65
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
66 bioperl-bugs@bio.perl.org
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
67 http://bugzilla.bioperl.org/
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
68
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
69 =head1 AUTHOR - Matthew Pocock
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
70
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
71 Email mrp@sanger.ac.uk
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
72
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
73 =head1 APPENDIX
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
74
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
75 The rest of the documentation details each of the object methods. Internal methods are usually preceded with a _
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
76
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
77 =cut
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
78
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
79 # Let the code begin...
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
80
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
81 package Bio::Tools::GFF;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
82
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
83 use vars qw(@ISA);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
84 use strict;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
85
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
86 use Bio::Root::IO;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
87 use Bio::SeqAnalysisParserI;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
88 use Bio::SeqFeature::Generic;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
89
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
90 @ISA = qw(Bio::Root::Root Bio::SeqAnalysisParserI Bio::Root::IO);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
91
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
92 =head2 new
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
93
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
94 Title : new
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
95 Usage :
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
96 Function: Creates a new instance. Recognized named parameters are -file, -fh,
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
97 and -gff_version.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
98
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
99 Returns : a new object
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
100 Args : names parameters
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
101
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
102
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
103 =cut
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
104
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
105 sub new {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
106 my ($class, @args) = @_;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
107 my $self = $class->SUPER::new(@args);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
108
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
109 my ($gff_version) = $self->_rearrange([qw(GFF_VERSION)],@args);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
110
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
111 # initialize IO
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
112 $self->_initialize_io(@args);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
113
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
114 $gff_version ||= 2;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
115 if(($gff_version != 1) && ($gff_version != 2)) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
116 $self->throw("Can't build a GFF object with the unknown version ".
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
117 $gff_version);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
118 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
119 $self->gff_version($gff_version);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
120 return $self;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
121 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
122
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
123 =head2 next_feature
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
124
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
125 Title : next_feature
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
126 Usage : $seqfeature = $gffio->next_feature();
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
127 Function: Returns the next feature available in the input file or stream, or
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
128 undef if there are no more features.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
129 Example :
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
130 Returns : A Bio::SeqFeatureI implementing object, or undef if there are no
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
131 more features.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
132 Args : none
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
133
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
134 =cut
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
135
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
136 sub next_feature {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
137 my ($self) = @_;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
138
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
139 my $gff_string;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
140
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
141 # be graceful about empty lines or comments, and make sure we return undef
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
142 # if the input's consumed
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
143 while(($gff_string = $self->_readline()) && defined($gff_string)) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
144 next if($gff_string =~ /^\#/ || $gff_string =~ /^\s*$/ ||
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
145 $gff_string =~ /^\/\//);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
146 last;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
147 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
148 return undef unless $gff_string;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
149
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
150 my $feat = Bio::SeqFeature::Generic->new();
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
151 $self->from_gff_string($feat, $gff_string);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
152
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
153 return $feat;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
154 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
155
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
156 =head2 from_gff_string
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
157
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
158 Title : from_gff_string
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
159 Usage : $gff->from_gff_string($feature, $gff_string);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
160 Function: Sets properties of a SeqFeatureI object from a GFF-formatted
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
161 string. Interpretation of the string depends on the version
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
162 that has been specified at initialization.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
163
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
164 This method is used by next_feature(). It actually dispatches to
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
165 one of the version-specific (private) methods.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
166 Example :
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
167 Returns : void
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
168 Args : A Bio::SeqFeatureI implementing object to be initialized
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
169 The GFF-formatted string to initialize it from
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
170
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
171 =cut
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
172
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
173 sub from_gff_string {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
174 my ($self, $feat, $gff_string) = @_;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
175
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
176 if($self->gff_version() == 1) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
177 $self->_from_gff1_string($feat, $gff_string);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
178 } else {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
179 $self->_from_gff2_string($feat, $gff_string);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
180 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
181 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
182
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
183 =head2 _from_gff1_string
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
184
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
185 Title : _from_gff1_string
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
186 Usage :
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
187 Function:
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
188 Example :
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
189 Returns : void
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
190 Args : A Bio::SeqFeatureI implementing object to be initialized
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
191 The GFF-formatted string to initialize it from
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
192
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
193 =cut
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
194
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
195 sub _from_gff1_string {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
196 my ($gff, $feat, $string) = @_;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
197 chomp $string;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
198 my ($seqname, $source, $primary, $start, $end, $score, $strand, $frame, @group) = split(/\t/, $string);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
199
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
200 if ( !defined $frame ) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
201 $feat->throw("[$string] does not look like GFF to me");
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
202 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
203 $frame = 0 unless( $frame =~ /^\d+$/);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
204 $feat->seq_id($seqname);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
205 $feat->source_tag($source);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
206 $feat->primary_tag($primary);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
207 $feat->start($start);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
208 $feat->end($end);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
209 $feat->frame($frame);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
210 if ( $score eq '.' ) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
211 #$feat->score(undef);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
212 } else {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
213 $feat->score($score);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
214 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
215 if ( $strand eq '-' ) { $feat->strand(-1); }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
216 if ( $strand eq '+' ) { $feat->strand(1); }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
217 if ( $strand eq '.' ) { $feat->strand(0); }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
218 foreach my $g ( @group ) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
219 if ( $g =~ /(\S+)=(\S+)/ ) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
220 my $tag = $1;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
221 my $value = $2;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
222 $feat->add_tag_value($1, $2);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
223 } else {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
224 $feat->add_tag_value('group', $g);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
225 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
226 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
227 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
228
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
229 =head2 _from_gff2_string
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
230
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
231 Title : _from_gff2_string
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
232 Usage :
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
233 Function:
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
234 Example :
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
235 Returns : void
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
236 Args : A Bio::SeqFeatureI implementing object to be initialized
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
237 The GFF2-formatted string to initialize it from
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
238
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
239
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
240 =cut
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
241
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
242 sub _from_gff2_string {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
243 my ($gff, $feat, $string) = @_;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
244 chomp($string);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
245 # according to the Sanger website, GFF2 should be single-tab separated elements, and the
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
246 # free-text at the end should contain text-translated tab symbols but no "real" tabs,
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
247 # so splitting on \t is safe, and $attribs gets the entire attributes field to be parsed later
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
248 my ($seqname, $source, $primary, $start, $end, $score, $strand, $frame, @attribs) = split(/\t+/, $string);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
249 my $attribs = join '', @attribs; # just in case the rule against tab characters has been broken
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
250 if ( !defined $frame ) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
251 $feat->throw("[$string] does not look like GFF2 to me");
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
252 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
253 $feat->seq_id($seqname);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
254 $feat->source_tag($source);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
255 $feat->primary_tag($primary);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
256 $feat->start($start);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
257 $feat->end($end);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
258 $feat->frame($frame);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
259 if ( $score eq '.' ) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
260 #$feat->score(undef);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
261 } else {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
262 $feat->score($score);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
263 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
264 if ( $strand eq '-' ) { $feat->strand(-1); }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
265 if ( $strand eq '+' ) { $feat->strand(1); }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
266 if ( $strand eq '.' ) { $feat->strand(0); }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
267
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
268
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
269 # <Begin Inefficient Code from Mark Wilkinson>
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
270 # this routine is necessay to allow the presence of semicolons in
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
271 # quoted text Semicolons are the delimiting character for new
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
272 # tag/value attributes. it is more or less a "state" machine, with
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
273 # the "quoted" flag going up and down as we pass thorugh quotes to
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
274 # distinguish free-text semicolon and hash symbols from GFF control
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
275 # characters
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
276
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
277
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
278 my $flag = 0; # this could be changed to a bit and just be twiddled
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
279 my @parsed;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
280
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
281 # run through each character one at a time and check it
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
282 # NOTE: changed to foreach loop which is more efficient in perl
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
283 # --jasons
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
284
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
285 foreach my $a ( split //, $attribs ) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
286 # flag up on entering quoted text, down on leaving it
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
287 if( $a eq '"') { $flag = ( $flag == 0 ) ? 1:0 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
288 elsif( $a eq ';' && $flag ) { $a = "INSERT_SEMICOLON_HERE"}
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
289 elsif( $a eq '#' && ! $flag ) { last }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
290 push @parsed, $a;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
291 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
292 $attribs = join "", @parsed; # rejoin into a single string
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
293
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
294 # <End Inefficient Code>
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
295 # Please feel free to fix this and make it more "perlish"
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
296
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
297 my @key_vals = split /;/, $attribs; # attributes are semicolon-delimited
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
298
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
299 foreach my $pair ( @key_vals ) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
300 # replace semicolons that were removed from free-text above.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
301 $pair =~ s/INSERT_SEMICOLON_HERE/;/g;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
302
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
303 # separate the key from the value
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
304 my ($blank, $key, $values) = split /^\s*([\w\d]+)\s/, $pair;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
305
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
306
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
307 if( defined $values ) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
308 my @values;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
309 # free text is quoted, so match each free-text block
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
310 # and remove it from the $values string
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
311 while ($values =~ s/"(.*?)"//){
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
312 # and push it on to the list of values (tags may have
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
313 # more than one value... and the value may be undef)
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
314 push @values, $1;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
315 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
316
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
317 # and what is left over should be space-separated
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
318 # non-free-text values
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
319
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
320 my @othervals = split /\s+/, $values;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
321 foreach my $othervalue(@othervals){
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
322 # get rid of any empty strings which might
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
323 # result from the split
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
324 if (CORE::length($othervalue) > 0) {push @values, $othervalue}
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
325 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
326
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
327 foreach my $value(@values){
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
328 $feat->add_tag_value($key, $value);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
329 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
330 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
331 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
332 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
333
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
334 =head2 write_feature
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
335
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
336 Title : write_feature
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
337 Usage : $gffio->write_feature($feature);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
338 Function: Writes the specified SeqFeatureI object in GFF format to the stream
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
339 associated with this instance.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
340 Returns : none
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
341 Args : An array of Bio::SeqFeatureI implementing objects to be serialized
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
342
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
343 =cut
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
344
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
345 sub write_feature {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
346 my ($self, @features) = @_;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
347 foreach my $feature ( @features ) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
348 $self->_print($self->gff_string($feature)."\n");
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
349 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
350 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
351
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
352 =head2 gff_string
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
353
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
354 Title : gff_string
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
355 Usage : $gffstr = $gffio->gff_string($feature);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
356 Function: Obtain the GFF-formatted representation of a SeqFeatureI object.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
357 The formatting depends on the version specified at initialization.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
358
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
359 This method is used by write_feature(). It actually dispatches to
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
360 one of the version-specific (private) methods.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
361 Example :
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
362 Returns : A GFF-formatted string representation of the SeqFeature
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
363 Args : A Bio::SeqFeatureI implementing object to be GFF-stringified
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
364
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
365 =cut
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
366
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
367 sub gff_string{
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
368 my ($self, $feature) = @_;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
369
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
370 if($self->gff_version() == 1) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
371 return $self->_gff1_string($feature);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
372 } else {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
373 return $self->_gff2_string($feature);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
374 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
375 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
376
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
377 =head2 _gff1_string
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
378
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
379 Title : _gff1_string
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
380 Usage : $gffstr = $gffio->_gff1_string
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
381 Function:
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
382 Example :
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
383 Returns : A GFF1-formatted string representation of the SeqFeature
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
384 Args : A Bio::SeqFeatureI implementing object to be GFF-stringified
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
385
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
386 =cut
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
387
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
388 sub _gff1_string{
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
389 my ($gff, $feat) = @_;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
390 my ($str,$score,$frame,$name,$strand);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
391
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
392 if( $feat->can('score') ) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
393 $score = $feat->score();
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
394 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
395 $score = '.' unless defined $score;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
396
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
397 if( $feat->can('frame') ) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
398 $frame = $feat->frame();
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
399 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
400 $frame = '.' unless defined $frame;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
401
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
402 $strand = $feat->strand();
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
403 if(! $strand) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
404 $strand = ".";
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
405 } elsif( $strand == 1 ) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
406 $strand = '+';
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
407 } elsif ( $feat->strand == -1 ) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
408 $strand = '-';
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
409 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
410
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
411 if( $feat->can('seqname') ) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
412 $name = $feat->seq_id();
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
413 $name ||= 'SEQ';
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
414 } else {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
415 $name = 'SEQ';
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
416 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
417
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
418
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
419 $str = join("\t",
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
420 $name,
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
421 $feat->source_tag(),
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
422 $feat->primary_tag(),
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
423 $feat->start(),
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
424 $feat->end(),
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
425 $score,
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
426 $strand,
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
427 $frame);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
428
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
429 foreach my $tag ( $feat->all_tags ) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
430 foreach my $value ( $feat->each_tag_value($tag) ) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
431 $str .= " $tag=$value";
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
432 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
433 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
434
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
435
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
436 return $str;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
437 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
438
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
439 =head2 _gff2_string
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
440
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
441 Title : _gff2_string
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
442 Usage : $gffstr = $gffio->_gff2_string
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
443 Function:
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
444 Example :
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
445 Returns : A GFF2-formatted string representation of the SeqFeature
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
446 Args : A Bio::SeqFeatureI implementing object to be GFF2-stringified
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
447
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
448 =cut
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
449
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
450 sub _gff2_string{
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
451 my ($gff, $feat) = @_;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
452 my ($str,$score,$frame,$name,$strand);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
453
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
454 if( $feat->can('score') ) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
455 $score = $feat->score();
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
456 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
457 $score = '.' unless defined $score;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
458
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
459 if( $feat->can('frame') ) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
460 $frame = $feat->frame();
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
461 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
462 $frame = '.' unless defined $frame;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
463
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
464 $strand = $feat->strand();
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
465 if(! $strand) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
466 $strand = ".";
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
467 } elsif( $strand == 1 ) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
468 $strand = '+';
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
469 } elsif ( $feat->strand == -1 ) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
470 $strand = '-';
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
471 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
472
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
473 if( $feat->can('seqname') ) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
474 $name = $feat->seq_id();
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
475 $name ||= 'SEQ';
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
476 } else {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
477 $name = 'SEQ';
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
478 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
479 $str = join("\t",
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
480 $name,
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
481 $feat->source_tag(),
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
482 $feat->primary_tag(),
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
483 $feat->start(),
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
484 $feat->end(),
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
485 $score,
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
486 $strand,
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
487 $frame);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
488
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
489 # the routine below is the only modification I made to the original
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
490 # ->gff_string routine (above) as on November 17th, 2000, the
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
491 # Sanger webpage describing GFF2 format reads: "From version 2
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
492 # onwards, the attribute field must have a tag value structure
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
493 # following the syntax used within objects in a .ace file,
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
494 # flattened onto one line by semicolon separators. Tags must be
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
495 # standard identifiers ([A-Za-z][A-Za-z0-9_]*). Free text values
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
496 # must be quoted with double quotes".
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
497
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
498 # MW
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
499
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
500 my $valuestr;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
501 my @all_tags = $feat->all_tags;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
502 if (@all_tags) { # only play this game if it is worth playing...
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
503 $str .= "\t"; # my interpretation of the GFF2
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
504 # specification suggests the need
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
505 # for this additional TAB character...??
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
506 foreach my $tag ( @all_tags ) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
507 my $valuestr; # a string which will hold one or more values
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
508 # for this tag, with quoted free text and
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
509 # space-separated individual values.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
510 foreach my $value ( $feat->each_tag_value($tag) ) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
511 if ($value =~ /[^A-Za-z0-9_]/){
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
512 $value =~ s/\t/\\t/g; # substitute tab and newline
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
513 # characters
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
514 $value =~ s/\n/\\n/g; # to their UNIX equivalents
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
515 $value = '"' . $value . '" '} # if the value contains
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
516 # anything other than valid
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
517 # tag/value characters, then
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
518 # quote it
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
519 $value = "\"\"" unless defined $value;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
520 # if it is completely empty,
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
521 # then just make empty double
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
522 # quotes
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
523 $valuestr .= $value . " "; # with a trailing space in case
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
524 # there are multiple values
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
525 # for this tag (allowed in GFF2 and .ace format)
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
526 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
527 $str .= "$tag $valuestr ; "; # semicolon delimited with no '=' sign
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
528 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
529 chop $str; chop $str # remove the trailing semicolon and space
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
530 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
531 return $str;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
532 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
533
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
534 =head2 gff_version
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
535
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
536 Title : _gff_version
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
537 Usage : $gffversion = $gffio->gff_version
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
538 Function:
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
539 Example :
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
540 Returns : The GFF version this parser will accept and emit.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
541 Args : none
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
542
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
543 =cut
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
544
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
545 sub gff_version {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
546 my ($self, $value) = @_;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
547 if(defined $value && (($value == 1) || ($value == 2))) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
548 $self->{'GFF_VERSION'} = $value;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
549 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
550 return $self->{'GFF_VERSION'};
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
551 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
552
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
553 # Make filehandles
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
554
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
555 =head2 newFh
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
556
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
557 Title : newFh
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
558 Usage : $fh = Bio::Tools::GFF->newFh(-file=>$filename,-format=>'Format')
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
559 Function: does a new() followed by an fh()
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
560 Example : $fh = Bio::Tools::GFF->newFh(-file=>$filename,-format=>'Format')
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
561 $feature = <$fh>; # read a feature object
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
562 print $fh $feature ; # write a feature object
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
563 Returns : filehandle tied to the Bio::Tools::GFF class
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
564 Args :
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
565
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
566 =cut
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
567
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
568 sub newFh {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
569 my $class = shift;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
570 return unless my $self = $class->new(@_);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
571 return $self->fh;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
572 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
573
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
574 =head2 fh
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
575
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
576 Title : fh
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
577 Usage : $obj->fh
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
578 Function:
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
579 Example : $fh = $obj->fh; # make a tied filehandle
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
580 $feature = <$fh>; # read a feature object
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
581 print $fh $feature; # write a feature object
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
582 Returns : filehandle tied to Bio::Tools::GFF class
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
583 Args : none
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
584
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
585 =cut
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
586
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
587
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
588 sub fh {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
589 my $self = shift;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
590 my $class = ref($self) || $self;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
591 my $s = Symbol::gensym;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
592 tie $$s,$class,$self;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
593 return $s;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
594 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
595
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
596 sub DESTROY {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
597 my $self = shift;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
598
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
599 $self->close();
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
600 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
601
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
602 sub TIEHANDLE {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
603 my ($class,$val) = @_;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
604 return bless {'gffio' => $val}, $class;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
605 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
606
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
607 sub READLINE {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
608 my $self = shift;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
609 return $self->{'gffio'}->next_feature() unless wantarray;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
610 my (@list, $obj);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
611 push @list, $obj while $obj = $self->{'gffio'}->next_feature();
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
612 return @list;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
613 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
614
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
615 sub PRINT {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
616 my $self = shift;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
617 $self->{'gffio'}->write_feature(@_);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
618 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
619
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
620 1;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
621