0
|
1 # $Id: Generic.pm,v 1.74.2.1 2003/09/09 20:12:37 lstein Exp $
|
|
2 #
|
|
3 # BioPerl module for Bio::SeqFeature::Generic
|
|
4 #
|
|
5 # Cared for by Ewan Birney <birney@sanger.ac.uk>
|
|
6 #
|
|
7 # Copyright Ewan Birney
|
|
8 #
|
|
9 # You may distribute this module under the same terms as perl itself
|
|
10
|
|
11 # POD documentation - main docs before the code
|
|
12
|
|
13 =head1 NAME
|
|
14
|
|
15 Bio::SeqFeature::Generic - Generic SeqFeature
|
|
16
|
|
17 =head1 SYNOPSIS
|
|
18
|
|
19 $feat = new Bio::SeqFeature::Generic ( -start => 10, -end => 100,
|
|
20 -strand => -1, -primary => 'repeat',
|
|
21 -source_tag => 'repeatmasker',
|
|
22 -score => 1000,
|
|
23 -tag => {
|
|
24 new => 1,
|
|
25 author => 'someone',
|
|
26 sillytag => 'this is silly!' } );
|
|
27
|
|
28 $feat = new Bio::SeqFeature::Generic ( -gff_string => $string );
|
|
29 # if you want explicitly GFF1
|
|
30 $feat = new Bio::SeqFeature::Generic ( -gff1_string => $string );
|
|
31
|
|
32 # add it to an annotated sequence
|
|
33
|
|
34 $annseq->add_SeqFeature($feat);
|
|
35
|
|
36
|
|
37
|
|
38 =head1 DESCRIPTION
|
|
39
|
|
40 Bio::SeqFeature::Generic is a generic implementation for the
|
|
41 Bio::SeqFeatureI interface, providing a simple object to provide all
|
|
42 the information for a feature on a sequence.
|
|
43
|
|
44 For many Features, this is all you will need to use (for example, this
|
|
45 is fine for Repeats in DNA sequence or Domains in protein
|
|
46 sequence). For other features, which have more structure, this is a
|
|
47 good base class to extend using inheritence to have new things: this
|
|
48 is what is done in the Bio::SeqFeature::Gene,
|
|
49 Bio::SeqFeature::Transcript and Bio::SeqFeature::Exon, which provide
|
|
50 well coordinated classes to represent genes on DNA sequence (for
|
|
51 example, you can get the protein sequence out from a transcript
|
|
52 class).
|
|
53
|
|
54 For many Features, you want to add some piece of information, for
|
|
55 example a common one is that this feature is 'new' whereas other
|
|
56 features are 'old'. The tag system, which here is implemented using a
|
|
57 hash can be used here. You can use the tag system to extend the
|
|
58 SeqFeature::Generic programmatically: that is, you know that you have
|
|
59 read in more information into the tag 'mytag' which you can then
|
|
60 retrieve. This means you do not need to know how to write inherieted
|
|
61 Perl to provide more complex information on a feature, and/or, if you
|
|
62 do know but you do not want to write a new class every time you need
|
|
63 some extra piece of information, you can use the tag system to easily
|
|
64 store and then retrieve information.
|
|
65
|
|
66 The tag system can be written in/out of GFF format, and also into EMBL
|
|
67 format via the SeqIO system
|
|
68
|
|
69 =head1 Implemented Interfaces
|
|
70
|
|
71 This class implementes the following interfaces.
|
|
72
|
|
73 =over 4
|
|
74
|
|
75 =item Bio::SeqFeatureI
|
|
76
|
|
77 Note that this includes implementing Bio::RangeI.
|
|
78
|
|
79 =item Bio::AnnotatableI
|
|
80
|
|
81 =item Bio::FeatureHolderI
|
|
82
|
|
83 Features held by a feature are essentially sub-features.
|
|
84
|
|
85 =back
|
|
86
|
|
87 =head1 FEEDBACK
|
|
88
|
|
89 =head2 Mailing Lists
|
|
90
|
|
91 User feedback is an integral part of the evolution of this and other
|
|
92 Bioperl modules. Send your comments and suggestions preferably to one
|
|
93 of the Bioperl mailing lists. Your participation is much appreciated.
|
|
94
|
|
95 bioperl-l@bioperl.org - General discussion
|
|
96 http://bio.perl.org/MailList.html - About the mailing lists
|
|
97
|
|
98 =head2 Reporting Bugs
|
|
99
|
|
100 Report bugs to the Bioperl bug tracking system to help us keep track
|
|
101 the bugs and their resolution. Bug reports can be submitted via email
|
|
102 or the web:
|
|
103
|
|
104 bioperl-bugs@bio.perl.org
|
|
105 http://bugzilla.bioperl.org/
|
|
106
|
|
107 =head1 AUTHOR - Ewan Birney
|
|
108
|
|
109 Ewan Birney E<lt>birney@sanger.ac.ukE<gt>
|
|
110
|
|
111 =head1 DEVELOPERS
|
|
112
|
|
113 This class has been written with an eye out of inheritence. The fields
|
|
114 the actual object hash are:
|
|
115
|
|
116 _gsf_tag_hash = reference to a hash for the tags
|
|
117 _gsf_sub_array = reference to an array for subfeatures
|
|
118
|
|
119 =head1 APPENDIX
|
|
120
|
|
121 The rest of the documentation details each of the object
|
|
122 methods. Internal methods are usually preceded with a _
|
|
123
|
|
124 =cut
|
|
125
|
|
126
|
|
127 # Let the code begin...
|
|
128
|
|
129
|
|
130 package Bio::SeqFeature::Generic;
|
|
131 use vars qw(@ISA);
|
|
132 use strict;
|
|
133
|
|
134 use Bio::Root::Root;
|
|
135 use Bio::SeqFeatureI;
|
|
136 use Bio::AnnotatableI;
|
|
137 use Bio::FeatureHolderI;
|
|
138 use Bio::Annotation::Collection;
|
|
139 use Bio::Location::Simple;
|
|
140 use Bio::Tools::GFF;
|
|
141 #use Tie::IxHash;
|
|
142
|
|
143 @ISA = qw(Bio::Root::Root Bio::SeqFeatureI
|
|
144 Bio::AnnotatableI Bio::FeatureHolderI);
|
|
145
|
|
146 sub new {
|
|
147 my ( $caller, @args) = @_;
|
|
148 my ($self) = $caller->SUPER::new(@args);
|
|
149
|
|
150 $self->{'_parse_h'} = {};
|
|
151 $self->{'_gsf_tag_hash'} = {};
|
|
152 # tie %{$self->{'_gsf_tag_hash'}}, "Tie::IxHash";
|
|
153
|
|
154 # bulk-set attributes
|
|
155 $self->set_attributes(@args);
|
|
156
|
|
157 # done - we hope
|
|
158 return $self;
|
|
159 }
|
|
160
|
|
161
|
|
162 =head2 set_attributes
|
|
163
|
|
164 Title : set_attributes
|
|
165 Usage :
|
|
166 Function: Sets a whole array of parameters at once.
|
|
167 Example :
|
|
168 Returns : none
|
|
169 Args : Named parameters, in the form as they would otherwise be passed
|
|
170 to new(). Currently recognized are:
|
|
171
|
|
172 -start start position
|
|
173 -end end position
|
|
174 -strand strand
|
|
175 -primary primary tag
|
|
176 -source source tag
|
|
177 -frame frame
|
|
178 -score score value
|
|
179 -tag a reference to a tag/value hash
|
|
180 -gff_string GFF v.2 string to initialize from
|
|
181 -gff1_string GFF v.1 string to initialize from
|
|
182 -seq_id the display name of the sequence
|
|
183 -annotation the AnnotationCollectionI object
|
|
184 -location the LocationI object
|
|
185
|
|
186 =cut
|
|
187
|
|
188 sub set_attributes {
|
|
189 my ($self,@args) = @_;
|
|
190 my ($start, $end, $strand, $primary_tag, $source_tag, $primary, $source, $frame,
|
|
191 $score, $tag, $gff_string, $gff1_string,
|
|
192 $seqname, $seqid, $annot, $location,$display_name) =
|
|
193 $self->_rearrange([qw(START
|
|
194 END
|
|
195 STRAND
|
|
196 PRIMARY_TAG
|
|
197 SOURCE_TAG
|
|
198 PRIMARY
|
|
199 SOURCE
|
|
200 FRAME
|
|
201 SCORE
|
|
202 TAG
|
|
203 GFF_STRING
|
|
204 GFF1_STRING
|
|
205 SEQNAME
|
|
206 SEQ_ID
|
|
207 ANNOTATION
|
|
208 LOCATION
|
|
209 DISPLAY_NAME
|
|
210 )], @args);
|
|
211 $location && $self->location($location);
|
|
212 $gff_string && $self->_from_gff_string($gff_string);
|
|
213 $gff1_string && do {
|
|
214 $self->gff_format(Bio::Tools::GFF->new('-gff_version' => 1));
|
|
215 $self->_from_gff_stream($gff1_string);
|
|
216 };
|
|
217 $primary_tag && $self->primary_tag($primary_tag);
|
|
218 $source_tag && $self->source_tag($source_tag);
|
|
219 $primary && $self->primary_tag($primary);
|
|
220 $source && $self->source_tag($source);
|
|
221 defined $start && $self->start($start);
|
|
222 defined $end && $self->end($end);
|
|
223 defined $strand && $self->strand($strand);
|
|
224 defined $frame && $self->frame($frame);
|
|
225 $score && $self->score($score);
|
|
226 $annot && $self->annotation($annot);
|
|
227 defined $display_name && $self->display_name($display_name);
|
|
228 if($seqname) {
|
|
229 $self->warn("-seqname is deprecated. Please use -seq_id instead.");
|
|
230 $seqid = $seqname unless $seqid;
|
|
231 }
|
|
232 $seqid && $self->seq_id($seqid);
|
|
233 $tag && do {
|
|
234 foreach my $t ( keys %$tag ) {
|
|
235 $self->add_tag_value($t,$tag->{$t});
|
|
236 }
|
|
237 };
|
|
238 }
|
|
239
|
|
240
|
|
241 =head2 direct_new
|
|
242
|
|
243 Title : direct_new
|
|
244 Usage : my $obj = Bio::SeqFeature::Generic->direct_new
|
|
245 Function: create a blessed hash - for performance improvement in
|
|
246 object creation
|
|
247 Returns : Bio::SeqFeature::Generic object
|
|
248 Args : none
|
|
249
|
|
250
|
|
251 =cut
|
|
252
|
|
253 sub direct_new {
|
|
254 my ( $class) = @_;
|
|
255 my ($self) = {};
|
|
256
|
|
257 bless $self,$class;
|
|
258
|
|
259 return $self;
|
|
260 }
|
|
261
|
|
262 =head2 location
|
|
263
|
|
264 Title : location
|
|
265 Usage : my $location = $seqfeature->location()
|
|
266 Function: returns a location object suitable for identifying location
|
|
267 of feature on sequence or parent feature
|
|
268 Returns : Bio::LocationI object
|
|
269 Args : [optional] Bio::LocationI object to set the value to.
|
|
270
|
|
271
|
|
272 =cut
|
|
273
|
|
274 sub location {
|
|
275 my($self, $value ) = @_;
|
|
276
|
|
277 if (defined($value)) {
|
|
278 unless (ref($value) and $value->isa('Bio::LocationI')) {
|
|
279 $self->throw("object $value pretends to be a location but ".
|
|
280 "does not implement Bio::LocationI");
|
|
281 }
|
|
282 $self->{'_location'} = $value;
|
|
283 }
|
|
284 elsif (! $self->{'_location'}) {
|
|
285 # guarantees a real location object is returned every time
|
|
286 $self->{'_location'} = Bio::Location::Simple->new();
|
|
287 }
|
|
288 return $self->{'_location'};
|
|
289 }
|
|
290
|
|
291
|
|
292 =head2 start
|
|
293
|
|
294 Title : start
|
|
295 Usage : $start = $feat->start
|
|
296 $feat->start(20)
|
|
297 Function: Get/set on the start coordinate of the feature
|
|
298 Returns : integer
|
|
299 Args : none
|
|
300
|
|
301
|
|
302 =cut
|
|
303
|
|
304 sub start {
|
|
305 my ($self,$value) = @_;
|
|
306 return $self->location->start($value);
|
|
307 }
|
|
308
|
|
309 =head2 end
|
|
310
|
|
311 Title : end
|
|
312 Usage : $end = $feat->end
|
|
313 $feat->end($end)
|
|
314 Function: get/set on the end coordinate of the feature
|
|
315 Returns : integer
|
|
316 Args : none
|
|
317
|
|
318
|
|
319 =cut
|
|
320
|
|
321 sub end {
|
|
322 my ($self,$value) = @_;
|
|
323 return $self->location->end($value);
|
|
324 }
|
|
325
|
|
326 =head2 length
|
|
327
|
|
328 Title : length
|
|
329 Usage :
|
|
330 Function:
|
|
331 Example :
|
|
332 Returns :
|
|
333 Args :
|
|
334
|
|
335
|
|
336 =cut
|
|
337
|
|
338 sub length {
|
|
339 my ($self) = @_;
|
|
340 return $self->end - $self->start() + 1;
|
|
341 }
|
|
342
|
|
343 =head2 strand
|
|
344
|
|
345 Title : strand
|
|
346 Usage : $strand = $feat->strand()
|
|
347 $feat->strand($strand)
|
|
348 Function: get/set on strand information, being 1,-1 or 0
|
|
349 Returns : -1,1 or 0
|
|
350 Args : none
|
|
351
|
|
352
|
|
353 =cut
|
|
354
|
|
355 sub strand {
|
|
356 my ($self,$value) = @_;
|
|
357 return $self->location->strand($value);
|
|
358 }
|
|
359
|
|
360 =head2 score
|
|
361
|
|
362 Title : score
|
|
363 Usage : $score = $feat->score()
|
|
364 $feat->score($score)
|
|
365 Function: get/set on score information
|
|
366 Returns : float
|
|
367 Args : none if get, the new value if set
|
|
368
|
|
369
|
|
370 =cut
|
|
371
|
|
372 sub score {
|
|
373 my ($self,$value) = @_;
|
|
374
|
|
375 if (defined($value)) {
|
|
376 if ( $value !~ /^[+-]?\d+\.?\d*(e-\d+)?/ ) {
|
|
377 $self->throw("'$value' is not a valid score");
|
|
378 }
|
|
379 $self->{'_gsf_score'} = $value;
|
|
380 }
|
|
381
|
|
382 return $self->{'_gsf_score'};
|
|
383 }
|
|
384
|
|
385 =head2 frame
|
|
386
|
|
387 Title : frame
|
|
388 Usage : $frame = $feat->frame()
|
|
389 $feat->frame($frame)
|
|
390 Function: get/set on frame information
|
|
391 Returns : 0,1,2, '.'
|
|
392 Args : none if get, the new value if set
|
|
393
|
|
394
|
|
395 =cut
|
|
396
|
|
397 sub frame {
|
|
398 my ($self,$value) = @_;
|
|
399
|
|
400 if ( defined $value ) {
|
|
401 if ( $value !~ /^[0-2.]$/ ) {
|
|
402 $self->throw("'$value' is not a valid frame");
|
|
403 }
|
|
404 if( $value eq '.' ) { $value = '.'; }
|
|
405 $self->{'_gsf_frame'} = $value;
|
|
406 }
|
|
407 return $self->{'_gsf_frame'};
|
|
408 }
|
|
409
|
|
410 =head2 primary_tag
|
|
411
|
|
412 Title : primary_tag
|
|
413 Usage : $tag = $feat->primary_tag()
|
|
414 $feat->primary_tag('exon')
|
|
415 Function: get/set on the primary tag for a feature,
|
|
416 eg 'exon'
|
|
417 Returns : a string
|
|
418 Args : none
|
|
419
|
|
420
|
|
421 =cut
|
|
422
|
|
423 sub primary_tag {
|
|
424 my ($self,$value) = @_;
|
|
425 if ( defined $value ) {
|
|
426 $self->{'_primary_tag'} = $value;
|
|
427 }
|
|
428 return $self->{'_primary_tag'};
|
|
429 }
|
|
430
|
|
431 =head2 source_tag
|
|
432
|
|
433 Title : source_tag
|
|
434 Usage : $tag = $feat->source_tag()
|
|
435 $feat->source_tag('genscan');
|
|
436 Function: Returns the source tag for a feature,
|
|
437 eg, 'genscan'
|
|
438 Returns : a string
|
|
439 Args : none
|
|
440
|
|
441
|
|
442 =cut
|
|
443
|
|
444 sub source_tag {
|
|
445 my ($self,$value) = @_;
|
|
446
|
|
447 if( defined $value ) {
|
|
448 $self->{'_source_tag'} = $value;
|
|
449 }
|
|
450 return $self->{'_source_tag'};
|
|
451 }
|
|
452
|
|
453 =head2 has_tag
|
|
454
|
|
455 Title : has_tag
|
|
456 Usage : $value = $self->has_tag('some_tag')
|
|
457 Function: Tests wether a feature contaings a tag
|
|
458 Returns : TRUE if the SeqFeature has the tag,
|
|
459 and FALSE otherwise.
|
|
460 Args : The name of a tag
|
|
461
|
|
462
|
|
463 =cut
|
|
464
|
|
465 sub has_tag {
|
|
466 my ($self, $tag) = @_;
|
|
467 return exists $self->{'_gsf_tag_hash'}->{$tag};
|
|
468 }
|
|
469
|
|
470 =head2 add_tag_value
|
|
471
|
|
472 Title : add_tag_value
|
|
473 Usage : $self->add_tag_value('note',"this is a note");
|
|
474 Returns : TRUE on success
|
|
475 Args : tag (string) and value (any scalar)
|
|
476
|
|
477
|
|
478 =cut
|
|
479
|
|
480 sub add_tag_value {
|
|
481 my ($self, $tag, $value) = @_;
|
|
482 $self->{'_gsf_tag_hash'}->{$tag} ||= [];
|
|
483 push(@{$self->{'_gsf_tag_hash'}->{$tag}},$value);
|
|
484 }
|
|
485
|
|
486
|
|
487 =head2 get_tag_values
|
|
488
|
|
489 Title : get_tag_values
|
|
490 Usage : @values = $gsf->get_tag_values('note');
|
|
491 Function: Returns a list of all the values stored
|
|
492 under a particular tag.
|
|
493 Returns : A list of scalars
|
|
494 Args : The name of the tag
|
|
495
|
|
496
|
|
497 =cut
|
|
498
|
|
499 sub get_tag_values {
|
|
500 my ($self, $tag) = @_;
|
|
501
|
|
502 if( ! defined $tag ) { return (); }
|
|
503 if ( ! exists $self->{'_gsf_tag_hash'}->{$tag} ) {
|
|
504 $self->throw("asking for tag value that does not exist $tag");
|
|
505 }
|
|
506 return @{$self->{'_gsf_tag_hash'}->{$tag}};
|
|
507 }
|
|
508
|
|
509
|
|
510 =head2 get_all_tags
|
|
511
|
|
512 Title : get_all_tags
|
|
513 Usage : @tags = $feat->get_all_tags()
|
|
514 Function: Get a list of all the tags in a feature
|
|
515 Returns : An array of tag names
|
|
516 Args : none
|
|
517
|
|
518
|
|
519 =cut
|
|
520
|
|
521 sub get_all_tags {
|
|
522 my ($self, @args) = @_;
|
|
523 return keys %{ $self->{'_gsf_tag_hash'}};
|
|
524 }
|
|
525
|
|
526 =head2 remove_tag
|
|
527
|
|
528 Title : remove_tag
|
|
529 Usage : $feat->remove_tag('some_tag')
|
|
530 Function: removes a tag from this feature
|
|
531 Returns : the array of values for this tag before removing it
|
|
532 Args : tag (string)
|
|
533
|
|
534
|
|
535 =cut
|
|
536
|
|
537 sub remove_tag {
|
|
538 my ($self, $tag) = @_;
|
|
539
|
|
540 if ( ! exists $self->{'_gsf_tag_hash'}->{$tag} ) {
|
|
541 $self->throw("trying to remove a tag that does not exist: $tag");
|
|
542 }
|
|
543 my @vals = @{$self->{'_gsf_tag_hash'}->{$tag}};
|
|
544 delete $self->{'_gsf_tag_hash'}->{$tag};
|
|
545 return @vals;
|
|
546 }
|
|
547
|
|
548 =head2 attach_seq
|
|
549
|
|
550 Title : attach_seq
|
|
551 Usage : $sf->attach_seq($seq)
|
|
552 Function: Attaches a Bio::Seq object to this feature. This
|
|
553 Bio::Seq object is for the *entire* sequence: ie
|
|
554 from 1 to 10000
|
|
555 Example :
|
|
556 Returns : TRUE on success
|
|
557 Args : a Bio::PrimarySeqI compliant object
|
|
558
|
|
559
|
|
560 =cut
|
|
561
|
|
562 sub attach_seq {
|
|
563 my ($self, $seq) = @_;
|
|
564
|
|
565 if ( ! ($seq && ref($seq) && $seq->isa("Bio::PrimarySeqI")) ) {
|
|
566 $self->throw("Must attach Bio::PrimarySeqI objects to SeqFeatures");
|
|
567 }
|
|
568
|
|
569 $self->{'_gsf_seq'} = $seq;
|
|
570
|
|
571 # attach to sub features if they want it
|
|
572 foreach ( $self->sub_SeqFeature() ) {
|
|
573 $_->attach_seq($seq);
|
|
574 }
|
|
575
|
|
576 return 1;
|
|
577 }
|
|
578
|
|
579 =head2 seq
|
|
580
|
|
581 Title : seq
|
|
582 Usage : $tseq = $sf->seq()
|
|
583 Function: returns the truncated sequence (if there) for this
|
|
584 Example :
|
|
585 Returns : sub seq (a Bio::PrimarySeqI compliant object) on attached sequence
|
|
586 bounded by start & end, or undef if there is no sequence attached
|
|
587 Args : none
|
|
588
|
|
589
|
|
590 =cut
|
|
591
|
|
592 sub seq {
|
|
593 my ($self, $arg) = @_;
|
|
594
|
|
595 if ( defined $arg ) {
|
|
596 $self->throw("Calling SeqFeature::Generic->seq with an argument. You probably want attach_seq");
|
|
597 }
|
|
598
|
|
599 if ( ! exists $self->{'_gsf_seq'} ) {
|
|
600 return undef;
|
|
601 }
|
|
602
|
|
603 # assumming our seq object is sensible, it should not have to yank
|
|
604 # the entire sequence out here.
|
|
605
|
|
606 my $seq = $self->{'_gsf_seq'}->trunc($self->start(), $self->end());
|
|
607
|
|
608
|
|
609 if ( $self->strand == -1 ) {
|
|
610
|
|
611 # ok. this does not work well (?)
|
|
612 #print STDERR "Before revcom", $seq->str, "\n";
|
|
613 $seq = $seq->revcom;
|
|
614 #print STDERR "After revcom", $seq->str, "\n";
|
|
615 }
|
|
616
|
|
617 return $seq;
|
|
618 }
|
|
619
|
|
620 =head2 entire_seq
|
|
621
|
|
622 Title : entire_seq
|
|
623 Usage : $whole_seq = $sf->entire_seq()
|
|
624 Function: gives the entire sequence that this seqfeature is attached to
|
|
625 Example :
|
|
626 Returns : a Bio::PrimarySeqI compliant object, or undef if there is no
|
|
627 sequence attached
|
|
628 Args :
|
|
629
|
|
630
|
|
631 =cut
|
|
632
|
|
633 sub entire_seq {
|
|
634 my ($self) = @_;
|
|
635
|
|
636 return $self->{'_gsf_seq'};
|
|
637 }
|
|
638
|
|
639
|
|
640 =head2 seq_id
|
|
641
|
|
642 Title : seq_id
|
|
643 Usage : $obj->seq_id($newval)
|
|
644 Function: There are many cases when you make a feature that you
|
|
645 do know the sequence name, but do not know its actual
|
|
646 sequence. This is an attribute such that you can store
|
|
647 the ID (e.g., display_id) of the sequence.
|
|
648
|
|
649 This attribute should *not* be used in GFF dumping, as
|
|
650 that should come from the collection in which the seq
|
|
651 feature was found.
|
|
652 Returns : value of seq_id
|
|
653 Args : newvalue (optional)
|
|
654
|
|
655
|
|
656 =cut
|
|
657
|
|
658 sub seq_id {
|
|
659 my ($obj,$value) = @_;
|
|
660 if ( defined $value ) {
|
|
661 $obj->{'_gsf_seq_id'} = $value;
|
|
662 }
|
|
663 return $obj->{'_gsf_seq_id'};
|
|
664 }
|
|
665
|
|
666 =head2 display_name
|
|
667
|
|
668 Title : display_name
|
|
669 Usage : $featname = $obj->display_name
|
|
670 Function: Implements the display_name() method, which is a human-readable
|
|
671 name for the feature.
|
|
672 Returns : value of display_name (a string)
|
|
673 Args : Optionally, on set the new value or undef
|
|
674
|
|
675 =cut
|
|
676
|
|
677 sub display_name{
|
|
678 my $self = shift;
|
|
679
|
|
680 return $self->{'display_name'} = shift if @_;
|
|
681 return $self->{'display_name'};
|
|
682 }
|
|
683
|
|
684 =head1 Methods for implementing Bio::AnnotatableI
|
|
685
|
|
686 =cut
|
|
687
|
|
688 =head2 annotation
|
|
689
|
|
690 Title : annotation
|
|
691 Usage : $obj->annotation($annot_obj)
|
|
692 Function: Get/set the annotation collection object for annotating this
|
|
693 feature.
|
|
694
|
|
695 Example :
|
|
696 Returns : A Bio::AnnotationCollectionI object
|
|
697 Args : newvalue (optional)
|
|
698
|
|
699
|
|
700 =cut
|
|
701
|
|
702 sub annotation {
|
|
703 my ($obj,$value) = @_;
|
|
704
|
|
705 # we are smart if someone references the object and there hasn't been
|
|
706 # one set yet
|
|
707 if(defined $value || ! defined $obj->{'annotation'} ) {
|
|
708 $value = new Bio::Annotation::Collection unless ( defined $value );
|
|
709 $obj->{'annotation'} = $value;
|
|
710 }
|
|
711 return $obj->{'annotation'};
|
|
712 }
|
|
713
|
|
714 =head1 Methods to implement Bio::FeatureHolderI
|
|
715
|
|
716 This includes methods for retrieving, adding, and removing
|
|
717 features. Since this is already a feature, features held by this
|
|
718 feature holder are essentially sub-features.
|
|
719
|
|
720 =cut
|
|
721
|
|
722 =head2 get_SeqFeatures
|
|
723
|
|
724 Title : get_SeqFeatures
|
|
725 Usage : @feats = $feat->get_SeqFeatures();
|
|
726 Function: Returns an array of sub Sequence Features
|
|
727 Returns : An array
|
|
728 Args : none
|
|
729
|
|
730
|
|
731 =cut
|
|
732
|
|
733 sub get_SeqFeatures {
|
|
734 my ($self) = @_;
|
|
735
|
|
736 if ($self->{'_gsf_sub_array'}) {
|
|
737 return @{$self->{'_gsf_sub_array'}};
|
|
738 } else {
|
|
739 return;
|
|
740 }
|
|
741 }
|
|
742
|
|
743 =head2 add_SeqFeature
|
|
744
|
|
745 Title : add_SeqFeature
|
|
746 Usage : $feat->add_SeqFeature($subfeat);
|
|
747 $feat->add_SeqFeature($subfeat,'EXPAND')
|
|
748 Function: adds a SeqFeature into the subSeqFeature array.
|
|
749 with no 'EXPAND' qualifer, subfeat will be tested
|
|
750 as to whether it lies inside the parent, and throw
|
|
751 an exception if not.
|
|
752
|
|
753 If EXPAND is used, the parent's start/end/strand will
|
|
754 be adjusted so that it grows to accommodate the new
|
|
755 subFeature
|
|
756 Returns : nothing
|
|
757 Args : An object which has the SeqFeatureI interface
|
|
758
|
|
759
|
|
760 =cut
|
|
761
|
|
762 #'
|
|
763 sub add_SeqFeature{
|
|
764 my ($self,$feat,$expand) = @_;
|
|
765
|
|
766 if ( !$feat->isa('Bio::SeqFeatureI') ) {
|
|
767 $self->warn("$feat does not implement Bio::SeqFeatureI. Will add it anyway, but beware...");
|
|
768 }
|
|
769
|
|
770 if($expand && ($expand eq 'EXPAND')) {
|
|
771 $self->_expand_region($feat);
|
|
772 } else {
|
|
773 if ( !$self->contains($feat) ) {
|
|
774 $self->throw("$feat is not contained within parent feature, and expansion is not valid");
|
|
775 }
|
|
776 }
|
|
777
|
|
778 $self->{'_gsf_sub_array'} = [] unless exists($self->{'_gsf_sub_array'});
|
|
779 push(@{$self->{'_gsf_sub_array'}},$feat);
|
|
780
|
|
781 }
|
|
782
|
|
783 =head2 remove_SeqFeatures
|
|
784
|
|
785 Title : remove_SeqFeatures
|
|
786 Usage : $sf->remove_SeqFeatures
|
|
787 Function: Removes all sub SeqFeatures
|
|
788
|
|
789 If you want to remove only a subset, remove that subset from the
|
|
790 returned array, and add back the rest.
|
|
791
|
|
792 Example :
|
|
793 Returns : The array of Bio::SeqFeatureI implementing sub-features that was
|
|
794 deleted from this feature.
|
|
795 Args : none
|
|
796
|
|
797
|
|
798 =cut
|
|
799
|
|
800 sub remove_SeqFeatures {
|
|
801 my ($self) = @_;
|
|
802
|
|
803 my @subfeats = @{$self->{'_gsf_sub_array'}};
|
|
804 $self->{'_gsf_sub_array'} = []; # zap the array implicitly.
|
|
805 return @subfeats;
|
|
806 }
|
|
807
|
|
808 =head1 GFF-related methods
|
|
809
|
|
810 =cut
|
|
811
|
|
812 =head2 gff_format
|
|
813
|
|
814 Title : gff_format
|
|
815 Usage : # get:
|
|
816 $gffio = $feature->gff_format();
|
|
817 # set (change the default version of GFF2):
|
|
818 $feature->gff_format(Bio::Tools::GFF->new(-gff_version => 1));
|
|
819 Function: Get/set the GFF format interpreter. This object is supposed to
|
|
820 format and parse GFF. See Bio::Tools::GFF for the interface.
|
|
821
|
|
822 If this method is called as class method, the default for all
|
|
823 newly created instances will be changed. Otherwise only this
|
|
824 instance will be affected.
|
|
825 Example :
|
|
826 Returns : a Bio::Tools::GFF compliant object
|
|
827 Args : On set, an instance of Bio::Tools::GFF or a derived object.
|
|
828
|
|
829
|
|
830 =cut
|
|
831
|
|
832 sub gff_format {
|
|
833 my ($self, $gffio) = @_;
|
|
834
|
|
835 if(defined($gffio)) {
|
|
836 if(ref($self)) {
|
|
837 $self->{'_gffio'} = $gffio;
|
|
838 } else {
|
|
839 $Bio::SeqFeatureI::static_gff_formatter = $gffio;
|
|
840 }
|
|
841 }
|
|
842 return (ref($self) && exists($self->{'_gffio'}) ?
|
|
843 $self->{'_gffio'} : $self->_static_gff_formatter);
|
|
844 }
|
|
845
|
|
846 =head2 gff_string
|
|
847
|
|
848 Title : gff_string
|
|
849 Usage : $str = $feat->gff_string;
|
|
850 $str = $feat->gff_string($gff_formatter);
|
|
851 Function: Provides the feature information in GFF format.
|
|
852
|
|
853 We override this here from Bio::SeqFeatureI in order to use the
|
|
854 formatter returned by gff_format().
|
|
855
|
|
856 Returns : A string
|
|
857 Args : Optionally, an object implementing gff_string().
|
|
858
|
|
859
|
|
860 =cut
|
|
861
|
|
862 sub gff_string{
|
|
863 my ($self,$formatter) = @_;
|
|
864
|
|
865 $formatter = $self->gff_format() unless $formatter;
|
|
866 return $formatter->gff_string($self);
|
|
867 }
|
|
868
|
|
869 # =head2 slurp_gff_file
|
|
870 #
|
|
871 # Title : slurp_file
|
|
872 # Usage : @features = Bio::SeqFeature::Generic::slurp_gff_file(\*FILE);
|
|
873 # Function: Sneaky function to load an entire file as in memory objects.
|
|
874 # Beware of big files.
|
|
875 #
|
|
876 # This method is deprecated. Use Bio::Tools::GFF instead, which can
|
|
877 # also handle large files.
|
|
878 #
|
|
879 # Example :
|
|
880 # Returns :
|
|
881 # Args :
|
|
882 #
|
|
883 # =cut
|
|
884
|
|
885 sub slurp_gff_file {
|
|
886 my ($f) = @_;
|
|
887 my @out;
|
|
888 if ( !defined $f ) {
|
|
889 die "Must have a filehandle";
|
|
890 }
|
|
891
|
|
892 Bio::Root::Root->warn("deprecated method slurp_gff_file() called in Bio::SeqFeature::Generic. Use Bio::Tools::GFF instead.");
|
|
893
|
|
894 while(<$f>) {
|
|
895
|
|
896 my $sf = Bio::SeqFeature::Generic->new('-gff_string' => $_);
|
|
897 push(@out, $sf);
|
|
898 }
|
|
899
|
|
900 return @out;
|
|
901
|
|
902 }
|
|
903
|
|
904 =head2 _from_gff_string
|
|
905
|
|
906 Title : _from_gff_string
|
|
907 Usage :
|
|
908 Function: Set feature properties from GFF string.
|
|
909
|
|
910 This method uses the object returned by gff_format() for the
|
|
911 actual interpretation of the string. Set a different GFF format
|
|
912 interpreter first if you need a specific version, like GFF1. (The
|
|
913 default is GFF2.)
|
|
914 Example :
|
|
915 Returns :
|
|
916 Args : a GFF-formatted string
|
|
917
|
|
918
|
|
919 =cut
|
|
920
|
|
921 sub _from_gff_string {
|
|
922 my ($self, $string) = @_;
|
|
923
|
|
924 $self->gff_format()->from_gff_string($self, $string);
|
|
925 }
|
|
926
|
|
927
|
|
928 =head2 _expand_region
|
|
929
|
|
930 Title : _expand_region
|
|
931 Usage : $self->_expand_region($feature);
|
|
932 Function: Expand the total region covered by this feature to
|
|
933 accomodate for the given feature.
|
|
934
|
|
935 May be called whenever any kind of subfeature is added to this
|
|
936 feature. add_sub_SeqFeature() already does this.
|
|
937 Returns :
|
|
938 Args : A Bio::SeqFeatureI implementing object.
|
|
939
|
|
940
|
|
941 =cut
|
|
942
|
|
943 sub _expand_region {
|
|
944 my ($self, $feat) = @_;
|
|
945 if(! $feat->isa('Bio::SeqFeatureI')) {
|
|
946 $self->warn("$feat does not implement Bio::SeqFeatureI");
|
|
947 }
|
|
948 # if this doesn't have start/end set - forget it!
|
|
949 if((! defined($self->start())) && (! defined $self->end())) {
|
|
950 $self->start($feat->start());
|
|
951 $self->end($feat->end());
|
|
952 $self->strand($feat->strand) unless defined($self->strand());
|
|
953 } else {
|
|
954 my $range = $self->union($feat);
|
|
955 $self->start($range->start);
|
|
956 $self->end($range->end);
|
|
957 $self->strand($range->strand);
|
|
958 }
|
|
959 }
|
|
960
|
|
961 =head2 _parse
|
|
962
|
|
963 Title : _parse
|
|
964 Usage :
|
|
965 Function: Parsing hints
|
|
966 Example :
|
|
967 Returns :
|
|
968 Args :
|
|
969
|
|
970
|
|
971 =cut
|
|
972
|
|
973 sub _parse {
|
|
974 my ($self) = @_;
|
|
975
|
|
976 return $self->{'_parse_h'};
|
|
977 }
|
|
978
|
|
979 =head2 _tag_value
|
|
980
|
|
981 Title : _tag_value
|
|
982 Usage :
|
|
983 Function: For internal use only. Convenience method for those tags that
|
|
984 may only have a single value.
|
|
985 Returns :
|
|
986 Args :
|
|
987
|
|
988
|
|
989 =cut
|
|
990
|
|
991 sub _tag_value {
|
|
992 my ($self, $tag, $value) = @_;
|
|
993
|
|
994 if(defined($value) || (! $self->has_tag($tag))) {
|
|
995 $self->remove_tag($tag) if($self->has_tag($tag));
|
|
996 $self->add_tag_value($tag, $value);
|
|
997 }
|
|
998 return ($self->each_tag_value($tag))[0];
|
|
999 }
|
|
1000
|
|
1001 #######################################################################
|
|
1002 # aliases for methods that changed their names in an attempt to make #
|
|
1003 # bioperl names more consistent #
|
|
1004 #######################################################################
|
|
1005
|
|
1006 sub seqname {
|
|
1007 my $self = shift;
|
|
1008 $self->warn("SeqFeatureI::seqname() is deprecated. Please use seq_id() instead.");
|
|
1009 return $self->seq_id(@_);
|
|
1010 }
|
|
1011
|
|
1012 sub display_id {
|
|
1013 my $self = shift;
|
|
1014 $self->warn("SeqFeatureI::display_id() is deprecated. Please use display_name() instead.");
|
|
1015 return $self->display_name(@_);
|
|
1016 }
|
|
1017
|
|
1018 # this is towards consistent naming
|
|
1019 sub each_tag_value { return shift->get_tag_values(@_); }
|
|
1020 sub all_tags { return shift->get_all_tags(@_); }
|
|
1021
|
|
1022 # we revamped the feature containing property to implementing
|
|
1023 # Bio::FeatureHolderI
|
|
1024 *sub_SeqFeature = \&get_SeqFeatures;
|
|
1025 *add_sub_SeqFeature = \&add_SeqFeature;
|
|
1026 *flush_sub_SeqFeatures = \&remove_SeqFeatures;
|
|
1027 # this one is because of inconsistent naming ...
|
|
1028 *flush_sub_SeqFeature = \&remove_SeqFeatures;
|
|
1029
|
|
1030
|
|
1031 1;
|