annotate variant_effect_predictor/Bio/Tools/GFF.pm @ 1:d6778b5d8382 draft default tip

Deleted selected files
author willmclaren
date Fri, 03 Aug 2012 10:05:43 -0400
parents 21066c0abaf5
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1 # $Id: GFF.pm,v 1.26 2002/11/24 21:35:40 jason Exp $
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
2 #
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
3 # BioPerl module for Bio::Tools::GFF
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
4 #
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
5 # Cared for by the Bioperl core team
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
6 #
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
7 # Copyright Matthew Pocock
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
8 #
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
9 # You may distribute this module under the same terms as perl itself
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
10
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
11 # POD documentation - main docs before the code
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
12
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
13 =head1 NAME
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
14
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
15 Bio::Tools::GFF - A Bio::SeqAnalysisParserI compliant GFF format parser
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
16
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
17 =head1 SYNOPSIS
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
18
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
19 use Bio::Tools::GFF;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
20
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
21 # specify input via -fh or -file
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
22 my $gffio = Bio::Tools::GFF->new(-fh => \*STDIN, -gff_version => 2);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
23 my $feature;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
24 # loop over the input stream
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
25 while($feature = $gffio->next_feature()) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
26 # do something with feature
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
27 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
28 $gffio->close();
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
29
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
30 # you can also obtain a GFF parser as a SeqAnalasisParserI in
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
31 # HT analysis pipelines (see Bio::SeqAnalysisParserI and
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
32 # Bio::Factory::SeqAnalysisParserFactory)
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
33 my $factory = Bio::Factory::SeqAnalysisParserFactory->new();
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
34 my $parser = $factory->get_parser(-input => \*STDIN, -method => "gff");
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
35 while($feature = $parser->next_feature()) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
36 # do something with feature
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
37 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
38
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
39 =head1 DESCRIPTION
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
40
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
41 This class provides a simple GFF parser and writer. In the sense of a
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
42 SeqAnalysisParser, it parses an input file or stream into SeqFeatureI
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
43 objects, but is not in any way specific to a particular analysis
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
44 program and the output that program produces.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
45
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
46 That is, if you can get your analysis program spit out GFF, here is
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
47 your result parser.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
48
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
49 =head1 FEEDBACK
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
50
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
51 =head2 Mailing Lists
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
52
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
53 User feedback is an integral part of the evolution of this and other
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
54 Bioperl modules. Send your comments and suggestions preferably to one
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
55 of the Bioperl mailing lists. Your participation is much appreciated.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
56
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
57 bioperl-l@bioperl.org - General discussion
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
58 http://bio.perl.org/MailList.html - About the mailing lists
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
59
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
60 =head2 Reporting Bugs
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
61
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
62 Report bugs to the Bioperl bug tracking system to help us keep track
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
63 the bugs and their resolution. Bug reports can be submitted via email
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
64 or the web:
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
65
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
66 bioperl-bugs@bio.perl.org
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
67 http://bugzilla.bioperl.org/
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
68
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
69 =head1 AUTHOR - Matthew Pocock
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
70
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
71 Email mrp@sanger.ac.uk
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
72
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
73 =head1 APPENDIX
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
74
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
75 The rest of the documentation details each of the object methods. Internal methods are usually preceded with a _
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
76
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
77 =cut
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
78
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
79 # Let the code begin...
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
80
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
81 package Bio::Tools::GFF;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
82
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
83 use vars qw(@ISA);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
84 use strict;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
85
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
86 use Bio::Root::IO;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
87 use Bio::SeqAnalysisParserI;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
88 use Bio::SeqFeature::Generic;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
89
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
90 @ISA = qw(Bio::Root::Root Bio::SeqAnalysisParserI Bio::Root::IO);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
91
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
92 =head2 new
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
93
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
94 Title : new
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
95 Usage :
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
96 Function: Creates a new instance. Recognized named parameters are -file, -fh,
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
97 and -gff_version.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
98
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
99 Returns : a new object
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
100 Args : names parameters
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
101
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
102
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
103 =cut
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
104
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
105 sub new {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
106 my ($class, @args) = @_;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
107 my $self = $class->SUPER::new(@args);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
108
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
109 my ($gff_version) = $self->_rearrange([qw(GFF_VERSION)],@args);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
110
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
111 # initialize IO
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
112 $self->_initialize_io(@args);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
113
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
114 $gff_version ||= 2;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
115 if(($gff_version != 1) && ($gff_version != 2)) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
116 $self->throw("Can't build a GFF object with the unknown version ".
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
117 $gff_version);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
118 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
119 $self->gff_version($gff_version);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
120 return $self;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
121 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
122
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
123 =head2 next_feature
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
124
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
125 Title : next_feature
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
126 Usage : $seqfeature = $gffio->next_feature();
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
127 Function: Returns the next feature available in the input file or stream, or
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
128 undef if there are no more features.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
129 Example :
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
130 Returns : A Bio::SeqFeatureI implementing object, or undef if there are no
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
131 more features.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
132 Args : none
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
133
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
134 =cut
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
135
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
136 sub next_feature {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
137 my ($self) = @_;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
138
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
139 my $gff_string;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
140
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
141 # be graceful about empty lines or comments, and make sure we return undef
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
142 # if the input's consumed
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
143 while(($gff_string = $self->_readline()) && defined($gff_string)) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
144 next if($gff_string =~ /^\#/ || $gff_string =~ /^\s*$/ ||
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
145 $gff_string =~ /^\/\//);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
146 last;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
147 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
148 return undef unless $gff_string;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
149
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
150 my $feat = Bio::SeqFeature::Generic->new();
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
151 $self->from_gff_string($feat, $gff_string);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
152
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
153 return $feat;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
154 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
155
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
156 =head2 from_gff_string
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
157
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
158 Title : from_gff_string
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
159 Usage : $gff->from_gff_string($feature, $gff_string);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
160 Function: Sets properties of a SeqFeatureI object from a GFF-formatted
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
161 string. Interpretation of the string depends on the version
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
162 that has been specified at initialization.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
163
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
164 This method is used by next_feature(). It actually dispatches to
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
165 one of the version-specific (private) methods.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
166 Example :
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
167 Returns : void
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
168 Args : A Bio::SeqFeatureI implementing object to be initialized
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
169 The GFF-formatted string to initialize it from
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
170
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
171 =cut
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
172
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
173 sub from_gff_string {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
174 my ($self, $feat, $gff_string) = @_;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
175
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
176 if($self->gff_version() == 1) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
177 $self->_from_gff1_string($feat, $gff_string);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
178 } else {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
179 $self->_from_gff2_string($feat, $gff_string);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
180 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
181 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
182
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
183 =head2 _from_gff1_string
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
184
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
185 Title : _from_gff1_string
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
186 Usage :
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
187 Function:
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
188 Example :
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
189 Returns : void
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
190 Args : A Bio::SeqFeatureI implementing object to be initialized
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
191 The GFF-formatted string to initialize it from
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
192
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
193 =cut
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
194
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
195 sub _from_gff1_string {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
196 my ($gff, $feat, $string) = @_;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
197 chomp $string;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
198 my ($seqname, $source, $primary, $start, $end, $score, $strand, $frame, @group) = split(/\t/, $string);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
199
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
200 if ( !defined $frame ) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
201 $feat->throw("[$string] does not look like GFF to me");
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
202 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
203 $frame = 0 unless( $frame =~ /^\d+$/);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
204 $feat->seq_id($seqname);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
205 $feat->source_tag($source);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
206 $feat->primary_tag($primary);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
207 $feat->start($start);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
208 $feat->end($end);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
209 $feat->frame($frame);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
210 if ( $score eq '.' ) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
211 #$feat->score(undef);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
212 } else {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
213 $feat->score($score);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
214 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
215 if ( $strand eq '-' ) { $feat->strand(-1); }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
216 if ( $strand eq '+' ) { $feat->strand(1); }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
217 if ( $strand eq '.' ) { $feat->strand(0); }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
218 foreach my $g ( @group ) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
219 if ( $g =~ /(\S+)=(\S+)/ ) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
220 my $tag = $1;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
221 my $value = $2;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
222 $feat->add_tag_value($1, $2);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
223 } else {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
224 $feat->add_tag_value('group', $g);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
225 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
226 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
227 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
228
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
229 =head2 _from_gff2_string
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
230
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
231 Title : _from_gff2_string
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
232 Usage :
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
233 Function:
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
234 Example :
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
235 Returns : void
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
236 Args : A Bio::SeqFeatureI implementing object to be initialized
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
237 The GFF2-formatted string to initialize it from
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
238
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
239
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
240 =cut
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
241
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
242 sub _from_gff2_string {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
243 my ($gff, $feat, $string) = @_;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
244 chomp($string);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
245 # according to the Sanger website, GFF2 should be single-tab separated elements, and the
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
246 # free-text at the end should contain text-translated tab symbols but no "real" tabs,
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
247 # so splitting on \t is safe, and $attribs gets the entire attributes field to be parsed later
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
248 my ($seqname, $source, $primary, $start, $end, $score, $strand, $frame, @attribs) = split(/\t+/, $string);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
249 my $attribs = join '', @attribs; # just in case the rule against tab characters has been broken
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
250 if ( !defined $frame ) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
251 $feat->throw("[$string] does not look like GFF2 to me");
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
252 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
253 $feat->seq_id($seqname);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
254 $feat->source_tag($source);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
255 $feat->primary_tag($primary);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
256 $feat->start($start);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
257 $feat->end($end);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
258 $feat->frame($frame);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
259 if ( $score eq '.' ) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
260 #$feat->score(undef);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
261 } else {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
262 $feat->score($score);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
263 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
264 if ( $strand eq '-' ) { $feat->strand(-1); }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
265 if ( $strand eq '+' ) { $feat->strand(1); }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
266 if ( $strand eq '.' ) { $feat->strand(0); }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
267
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
268
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
269 # <Begin Inefficient Code from Mark Wilkinson>
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
270 # this routine is necessay to allow the presence of semicolons in
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
271 # quoted text Semicolons are the delimiting character for new
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
272 # tag/value attributes. it is more or less a "state" machine, with
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
273 # the "quoted" flag going up and down as we pass thorugh quotes to
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
274 # distinguish free-text semicolon and hash symbols from GFF control
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
275 # characters
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
276
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
277
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
278 my $flag = 0; # this could be changed to a bit and just be twiddled
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
279 my @parsed;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
280
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
281 # run through each character one at a time and check it
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
282 # NOTE: changed to foreach loop which is more efficient in perl
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
283 # --jasons
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
284
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
285 foreach my $a ( split //, $attribs ) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
286 # flag up on entering quoted text, down on leaving it
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
287 if( $a eq '"') { $flag = ( $flag == 0 ) ? 1:0 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
288 elsif( $a eq ';' && $flag ) { $a = "INSERT_SEMICOLON_HERE"}
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
289 elsif( $a eq '#' && ! $flag ) { last }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
290 push @parsed, $a;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
291 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
292 $attribs = join "", @parsed; # rejoin into a single string
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
293
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
294 # <End Inefficient Code>
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
295 # Please feel free to fix this and make it more "perlish"
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
296
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
297 my @key_vals = split /;/, $attribs; # attributes are semicolon-delimited
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
298
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
299 foreach my $pair ( @key_vals ) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
300 # replace semicolons that were removed from free-text above.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
301 $pair =~ s/INSERT_SEMICOLON_HERE/;/g;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
302
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
303 # separate the key from the value
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
304 my ($blank, $key, $values) = split /^\s*([\w\d]+)\s/, $pair;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
305
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
306
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
307 if( defined $values ) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
308 my @values;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
309 # free text is quoted, so match each free-text block
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
310 # and remove it from the $values string
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
311 while ($values =~ s/"(.*?)"//){
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
312 # and push it on to the list of values (tags may have
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
313 # more than one value... and the value may be undef)
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
314 push @values, $1;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
315 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
316
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
317 # and what is left over should be space-separated
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
318 # non-free-text values
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
319
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
320 my @othervals = split /\s+/, $values;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
321 foreach my $othervalue(@othervals){
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
322 # get rid of any empty strings which might
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
323 # result from the split
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
324 if (CORE::length($othervalue) > 0) {push @values, $othervalue}
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
325 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
326
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
327 foreach my $value(@values){
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
328 $feat->add_tag_value($key, $value);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
329 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
330 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
331 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
332 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
333
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
334 =head2 write_feature
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
335
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
336 Title : write_feature
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
337 Usage : $gffio->write_feature($feature);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
338 Function: Writes the specified SeqFeatureI object in GFF format to the stream
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
339 associated with this instance.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
340 Returns : none
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
341 Args : An array of Bio::SeqFeatureI implementing objects to be serialized
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
342
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
343 =cut
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
344
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
345 sub write_feature {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
346 my ($self, @features) = @_;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
347 foreach my $feature ( @features ) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
348 $self->_print($self->gff_string($feature)."\n");
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
349 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
350 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
351
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
352 =head2 gff_string
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
353
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
354 Title : gff_string
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
355 Usage : $gffstr = $gffio->gff_string($feature);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
356 Function: Obtain the GFF-formatted representation of a SeqFeatureI object.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
357 The formatting depends on the version specified at initialization.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
358
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
359 This method is used by write_feature(). It actually dispatches to
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
360 one of the version-specific (private) methods.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
361 Example :
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
362 Returns : A GFF-formatted string representation of the SeqFeature
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
363 Args : A Bio::SeqFeatureI implementing object to be GFF-stringified
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
364
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
365 =cut
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
366
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
367 sub gff_string{
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
368 my ($self, $feature) = @_;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
369
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
370 if($self->gff_version() == 1) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
371 return $self->_gff1_string($feature);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
372 } else {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
373 return $self->_gff2_string($feature);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
374 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
375 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
376
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
377 =head2 _gff1_string
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
378
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
379 Title : _gff1_string
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
380 Usage : $gffstr = $gffio->_gff1_string
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
381 Function:
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
382 Example :
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
383 Returns : A GFF1-formatted string representation of the SeqFeature
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
384 Args : A Bio::SeqFeatureI implementing object to be GFF-stringified
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
385
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
386 =cut
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
387
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
388 sub _gff1_string{
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
389 my ($gff, $feat) = @_;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
390 my ($str,$score,$frame,$name,$strand);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
391
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
392 if( $feat->can('score') ) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
393 $score = $feat->score();
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
394 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
395 $score = '.' unless defined $score;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
396
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
397 if( $feat->can('frame') ) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
398 $frame = $feat->frame();
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
399 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
400 $frame = '.' unless defined $frame;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
401
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
402 $strand = $feat->strand();
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
403 if(! $strand) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
404 $strand = ".";
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
405 } elsif( $strand == 1 ) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
406 $strand = '+';
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
407 } elsif ( $feat->strand == -1 ) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
408 $strand = '-';
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
409 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
410
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
411 if( $feat->can('seqname') ) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
412 $name = $feat->seq_id();
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
413 $name ||= 'SEQ';
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
414 } else {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
415 $name = 'SEQ';
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
416 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
417
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
418
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
419 $str = join("\t",
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
420 $name,
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
421 $feat->source_tag(),
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
422 $feat->primary_tag(),
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
423 $feat->start(),
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
424 $feat->end(),
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
425 $score,
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
426 $strand,
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
427 $frame);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
428
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
429 foreach my $tag ( $feat->all_tags ) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
430 foreach my $value ( $feat->each_tag_value($tag) ) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
431 $str .= " $tag=$value";
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
432 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
433 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
434
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
435
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
436 return $str;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
437 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
438
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
439 =head2 _gff2_string
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
440
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
441 Title : _gff2_string
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
442 Usage : $gffstr = $gffio->_gff2_string
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
443 Function:
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
444 Example :
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
445 Returns : A GFF2-formatted string representation of the SeqFeature
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
446 Args : A Bio::SeqFeatureI implementing object to be GFF2-stringified
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
447
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
448 =cut
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
449
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
450 sub _gff2_string{
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
451 my ($gff, $feat) = @_;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
452 my ($str,$score,$frame,$name,$strand);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
453
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
454 if( $feat->can('score') ) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
455 $score = $feat->score();
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
456 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
457 $score = '.' unless defined $score;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
458
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
459 if( $feat->can('frame') ) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
460 $frame = $feat->frame();
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
461 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
462 $frame = '.' unless defined $frame;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
463
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
464 $strand = $feat->strand();
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
465 if(! $strand) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
466 $strand = ".";
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
467 } elsif( $strand == 1 ) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
468 $strand = '+';
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
469 } elsif ( $feat->strand == -1 ) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
470 $strand = '-';
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
471 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
472
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
473 if( $feat->can('seqname') ) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
474 $name = $feat->seq_id();
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
475 $name ||= 'SEQ';
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
476 } else {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
477 $name = 'SEQ';
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
478 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
479 $str = join("\t",
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
480 $name,
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
481 $feat->source_tag(),
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
482 $feat->primary_tag(),
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
483 $feat->start(),
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
484 $feat->end(),
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
485 $score,
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
486 $strand,
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
487 $frame);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
488
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
489 # the routine below is the only modification I made to the original
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
490 # ->gff_string routine (above) as on November 17th, 2000, the
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
491 # Sanger webpage describing GFF2 format reads: "From version 2
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
492 # onwards, the attribute field must have a tag value structure
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
493 # following the syntax used within objects in a .ace file,
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
494 # flattened onto one line by semicolon separators. Tags must be
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
495 # standard identifiers ([A-Za-z][A-Za-z0-9_]*). Free text values
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
496 # must be quoted with double quotes".
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
497
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
498 # MW
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
499
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
500 my $valuestr;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
501 my @all_tags = $feat->all_tags;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
502 if (@all_tags) { # only play this game if it is worth playing...
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
503 $str .= "\t"; # my interpretation of the GFF2
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
504 # specification suggests the need
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
505 # for this additional TAB character...??
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
506 foreach my $tag ( @all_tags ) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
507 my $valuestr; # a string which will hold one or more values
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
508 # for this tag, with quoted free text and
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
509 # space-separated individual values.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
510 foreach my $value ( $feat->each_tag_value($tag) ) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
511 if ($value =~ /[^A-Za-z0-9_]/){
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
512 $value =~ s/\t/\\t/g; # substitute tab and newline
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
513 # characters
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
514 $value =~ s/\n/\\n/g; # to their UNIX equivalents
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
515 $value = '"' . $value . '" '} # if the value contains
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
516 # anything other than valid
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
517 # tag/value characters, then
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
518 # quote it
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
519 $value = "\"\"" unless defined $value;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
520 # if it is completely empty,
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
521 # then just make empty double
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
522 # quotes
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
523 $valuestr .= $value . " "; # with a trailing space in case
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
524 # there are multiple values
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
525 # for this tag (allowed in GFF2 and .ace format)
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
526 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
527 $str .= "$tag $valuestr ; "; # semicolon delimited with no '=' sign
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
528 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
529 chop $str; chop $str # remove the trailing semicolon and space
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
530 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
531 return $str;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
532 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
533
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
534 =head2 gff_version
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
535
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
536 Title : _gff_version
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
537 Usage : $gffversion = $gffio->gff_version
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
538 Function:
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
539 Example :
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
540 Returns : The GFF version this parser will accept and emit.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
541 Args : none
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
542
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
543 =cut
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
544
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
545 sub gff_version {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
546 my ($self, $value) = @_;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
547 if(defined $value && (($value == 1) || ($value == 2))) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
548 $self->{'GFF_VERSION'} = $value;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
549 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
550 return $self->{'GFF_VERSION'};
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
551 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
552
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
553 # Make filehandles
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
554
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
555 =head2 newFh
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
556
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
557 Title : newFh
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
558 Usage : $fh = Bio::Tools::GFF->newFh(-file=>$filename,-format=>'Format')
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
559 Function: does a new() followed by an fh()
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
560 Example : $fh = Bio::Tools::GFF->newFh(-file=>$filename,-format=>'Format')
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
561 $feature = <$fh>; # read a feature object
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
562 print $fh $feature ; # write a feature object
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
563 Returns : filehandle tied to the Bio::Tools::GFF class
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
564 Args :
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
565
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
566 =cut
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
567
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
568 sub newFh {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
569 my $class = shift;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
570 return unless my $self = $class->new(@_);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
571 return $self->fh;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
572 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
573
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
574 =head2 fh
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
575
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
576 Title : fh
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
577 Usage : $obj->fh
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
578 Function:
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
579 Example : $fh = $obj->fh; # make a tied filehandle
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
580 $feature = <$fh>; # read a feature object
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
581 print $fh $feature; # write a feature object
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
582 Returns : filehandle tied to Bio::Tools::GFF class
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
583 Args : none
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
584
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
585 =cut
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
586
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
587
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
588 sub fh {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
589 my $self = shift;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
590 my $class = ref($self) || $self;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
591 my $s = Symbol::gensym;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
592 tie $$s,$class,$self;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
593 return $s;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
594 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
595
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
596 sub DESTROY {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
597 my $self = shift;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
598
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
599 $self->close();
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
600 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
601
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
602 sub TIEHANDLE {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
603 my ($class,$val) = @_;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
604 return bless {'gffio' => $val}, $class;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
605 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
606
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
607 sub READLINE {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
608 my $self = shift;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
609 return $self->{'gffio'}->next_feature() unless wantarray;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
610 my (@list, $obj);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
611 push @list, $obj while $obj = $self->{'gffio'}->next_feature();
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
612 return @list;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
613 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
614
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
615 sub PRINT {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
616 my $self = shift;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
617 $self->{'gffio'}->write_feature(@_);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
618 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
619
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
620 1;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
621