0
|
1 =head1 NAME
|
|
2
|
|
3 Bio::DB::GFF::Feature -- A relative segment identified by a feature type
|
|
4
|
|
5 =head1 SYNOPSIS
|
|
6
|
|
7 See L<Bio::DB::GFF>.
|
|
8
|
|
9 =head1 DESCRIPTION
|
|
10
|
|
11 Bio::DB::GFF::Feature is a stretch of sequence that corresponding to a
|
|
12 single annotation in a GFF database. It inherits from
|
|
13 Bio::DB::GFF::RelSegment, and so has all the support for relative
|
|
14 addressing of this class and its ancestors. It also inherits from
|
|
15 Bio::SeqFeatureI and so has the familiar start(), stop(),
|
|
16 primary_tag() and location() methods (it implements Bio::LocationI
|
|
17 too, if needed).
|
|
18
|
|
19 Bio::DB::GFF::Feature adds new methods to retrieve the annotation's
|
|
20 type, group, and other GFF attributes. Annotation types are
|
|
21 represented by Bio::DB::GFF::Typename objects, a simple class that has
|
|
22 two methods called method() and source(). These correspond to the
|
|
23 method and source fields of a GFF file.
|
|
24
|
|
25 Annotation groups serve the dual purpose of giving the annotation a
|
|
26 human-readable name, and providing higher-order groupings of
|
|
27 subfeatures into features. The groups returned by this module are
|
|
28 objects of the Bio::DB::GFF::Featname class.
|
|
29
|
|
30 Bio::DB::GFF::Feature inherits from and implements the abstract
|
|
31 methods of Bio::SeqFeatureI, allowing it to interoperate with other
|
|
32 Bioperl modules.
|
|
33
|
|
34 Generally, you will not create or manipulate Bio::DB::GFF::Feature
|
|
35 objects directly, but use those that are returned by the
|
|
36 Bio::DB::GFF::RelSegment-E<gt>features() method.
|
|
37
|
|
38 =head2 Important note about start() vs end()
|
|
39
|
|
40 If features are derived from segments that use relative addressing
|
|
41 (which is the default), then start() will be less than end() if the
|
|
42 feature is on the opposite strand from the reference sequence. This
|
|
43 breaks Bio::SeqI compliance, but is necessary to avoid having the real
|
|
44 genomic locations designated by start() and end() swap places when
|
|
45 changing reference points.
|
|
46
|
|
47 To avoid this behavior, call $segment-E<gt>absolute(1) before fetching
|
|
48 features from it. This will force everything into absolute
|
|
49 coordinates.
|
|
50
|
|
51 For example:
|
|
52
|
|
53 my $segment = $db->segment('CHROMOSOME_I');
|
|
54 $segment->absolute(1);
|
|
55 my @features = $segment->features('transcript');
|
|
56
|
|
57 =head1 API
|
|
58
|
|
59 The remainder of this document describes the public and private
|
|
60 methods implemented by this module.
|
|
61
|
|
62 =cut
|
|
63
|
|
64 package Bio::DB::GFF::Feature;
|
|
65
|
|
66 use strict;
|
|
67
|
|
68 use Bio::DB::GFF::Util::Rearrange;
|
|
69 use Bio::DB::GFF::RelSegment;
|
|
70 use Bio::DB::GFF::Featname;
|
|
71 use Bio::DB::GFF::Typename;
|
|
72 use Bio::DB::GFF::Homol;
|
|
73 use Bio::SeqFeatureI;
|
|
74 use Bio::Root::Root;
|
|
75 use Bio::LocationI;
|
|
76
|
|
77 use vars qw(@ISA $AUTOLOAD);
|
|
78 @ISA = qw(Bio::DB::GFF::RelSegment Bio::SeqFeatureI
|
|
79 Bio::Root::Root);
|
|
80
|
|
81 #'
|
|
82
|
|
83 *segments = \&sub_SeqFeature;
|
|
84 my %CONSTANT_TAGS = (method=>1, source=>1, score=>1, phase=>1, notes=>1, id=>1, group=>1);
|
|
85
|
|
86 =head2 new_from_parent
|
|
87
|
|
88 Title : new_from_parent
|
|
89 Usage : $f = Bio::DB::GFF::Feature->new_from_parent(@args);
|
|
90 Function: create a new feature object
|
|
91 Returns : new Bio::DB::GFF::Feature object
|
|
92 Args : see below
|
|
93 Status : Internal
|
|
94
|
|
95 This method is called by Bio::DB::GFF to create a new feature using
|
|
96
|
|
97 information obtained from the GFF database. It is one of two similar
|
|
98 constructors. This one is called when the feature is generated from a
|
|
99 RelSegment object, and should inherit that object's coordinate system.
|
|
100
|
|
101 The 13 arguments are positional (sorry):
|
|
102
|
|
103 $parent a Bio::DB::GFF::RelSegment object (or descendent)
|
|
104 $start start of this feature
|
|
105 $stop stop of this feature
|
|
106 $method this feature's GFF method
|
|
107 $source this feature's GFF source
|
|
108 $score this feature's score
|
|
109 $fstrand this feature's strand (relative to the source
|
|
110 sequence, which has its own strandedness!)
|
|
111 $phase this feature's phase
|
|
112 $group this feature's group (a Bio::DB::GFF::Featname object)
|
|
113 $db_id this feature's internal database ID
|
|
114 $group_id this feature's internal group database ID
|
|
115 $tstart this feature's target start
|
|
116 $tstop this feature's target stop
|
|
117
|
|
118 tstart and tstop aren't used for anything at the moment, since the
|
|
119 information is embedded in the group object.
|
|
120
|
|
121 =cut
|
|
122
|
|
123 # this is called for a feature that is attached to a parent sequence,
|
|
124 # in which case it inherits its coordinate reference system and strandedness
|
|
125 sub new_from_parent {
|
|
126 my $package = shift;
|
|
127 my ($parent,
|
|
128 $start,$stop,
|
|
129 $method,$source,$score,
|
|
130 $fstrand,$phase,
|
|
131 $group,$db_id,$group_id,
|
|
132 $tstart,$tstop) = @_;
|
|
133
|
|
134 ($start,$stop) = ($stop,$start) if defined($fstrand) and $fstrand eq '-';
|
|
135 my $class = $group ? $group->class : $parent->class;
|
|
136
|
|
137 my $self = bless {
|
|
138 factory => $parent->{factory},
|
|
139 sourceseq => $parent->{sourceseq},
|
|
140 strand => $parent->{strand},
|
|
141 ref => $parent->{ref},
|
|
142 refstart => $parent->{refstart},
|
|
143 refstrand => $parent->{refstrand},
|
|
144 absolute => $parent->{absolute},
|
|
145 start => $start,
|
|
146 stop => $stop,
|
|
147 type => Bio::DB::GFF::Typename->new($method,$source),
|
|
148 fstrand => $fstrand,
|
|
149 score => $score,
|
|
150 phase => $phase,
|
|
151 group => $group,
|
|
152 db_id => $db_id,
|
|
153 group_id => $group_id,
|
|
154 class => $class,
|
|
155 },$package;
|
|
156 $self;
|
|
157 }
|
|
158
|
|
159 =head2 new
|
|
160
|
|
161 Title : new
|
|
162 Usage : $f = Bio::DB::GFF::Feature->new(@args);
|
|
163 Function: create a new feature object
|
|
164 Returns : new Bio::DB::GFF::Feature object
|
|
165 Args : see below
|
|
166 Status : Internal
|
|
167
|
|
168 This method is called by Bio::DB::GFF to create a new feature using
|
|
169 information obtained from the GFF database. It is one of two similar
|
|
170 constructors. This one is called when the feature is generated
|
|
171 without reference to a RelSegment object, and should therefore use its
|
|
172 default coordinate system (relative to itself).
|
|
173
|
|
174 The 11 arguments are positional:
|
|
175
|
|
176 $factory a Bio::DB::GFF adaptor object (or descendent)
|
|
177 $srcseq the source sequence
|
|
178 $start start of this feature
|
|
179 $stop stop of this feature
|
|
180 $method this feature's GFF method
|
|
181 $source this feature's GFF source
|
|
182 $score this feature's score
|
|
183 $fstrand this feature's strand (relative to the source
|
|
184 sequence, which has its own strandedness!)
|
|
185 $phase this feature's phase
|
|
186 $group this feature's group
|
|
187 $db_id this feature's internal database ID
|
|
188
|
|
189 =cut
|
|
190
|
|
191 # 'This is called when creating a feature from scratch. It does not have
|
|
192 # an inherited coordinate system.
|
|
193 sub new {
|
|
194 my $package = shift;
|
|
195 my ($factory,
|
|
196 $srcseq,
|
|
197 $start,$stop,
|
|
198 $method,$source,
|
|
199 $score,$fstrand,$phase,
|
|
200 $group,$db_id,$group_id,
|
|
201 $tstart,$tstop) = @_;
|
|
202
|
|
203 my $self = bless { },$package;
|
|
204 ($start,$stop) = ($stop,$start) if defined($fstrand) and $fstrand eq '-';
|
|
205
|
|
206 my $class = $group ? $group->class : 'Sequence';
|
|
207
|
|
208 @{$self}{qw(factory sourceseq start stop strand class)} =
|
|
209 ($factory,$srcseq,$start,$stop,$fstrand,$class);
|
|
210
|
|
211 # if the target start and stop are defined, then we use this information to create
|
|
212 # the reference sequence
|
|
213 # THIS SHOULD BE BUILT INTO RELSEGMENT
|
|
214 if (0 && $tstart ne '' && $tstop ne '') {
|
|
215 if ($tstart < $tstop) {
|
|
216 @{$self}{qw(ref refstart refstrand)} = ($group,$start - $tstart + 1,'+');
|
|
217 } else {
|
|
218 @{$self}{'start','stop'} = @{$self}{'stop','start'};
|
|
219 @{$self}{qw(ref refstart refstrand)} = ($group,$tstop + $stop - 1,'-');
|
|
220 }
|
|
221
|
|
222 } else {
|
|
223 @{$self}{qw(ref refstart refstrand)} = ($srcseq,1,'+');
|
|
224 }
|
|
225
|
|
226 @{$self}{qw(type fstrand score phase group db_id group_id absolute)} =
|
|
227 (Bio::DB::GFF::Typename->new($method,$source),$fstrand,$score,$phase,
|
|
228 $group,$db_id,$group_id,$factory->{absolute});
|
|
229
|
|
230 $self;
|
|
231 }
|
|
232
|
|
233 =head2 type
|
|
234
|
|
235 Title : type
|
|
236 Usage : $type = $f->type([$newtype])
|
|
237 Function: get or set the feature type
|
|
238 Returns : a Bio::DB::GFF::Typename object
|
|
239 Args : a new Typename object (optional)
|
|
240 Status : Public
|
|
241
|
|
242 This method gets or sets the type of the feature. The type is a
|
|
243 Bio::DB::GFF::Typename object, which encapsulates the feature method
|
|
244 and source.
|
|
245
|
|
246 The method() and source() methods described next provide shortcuts to
|
|
247 the individual fields of the type.
|
|
248
|
|
249 =cut
|
|
250
|
|
251 sub type {
|
|
252 my $self = shift;
|
|
253 my $d = $self->{type};
|
|
254 $self->{type} = shift if @_;
|
|
255 $d;
|
|
256 }
|
|
257
|
|
258 =head2 method
|
|
259
|
|
260 Title : method
|
|
261 Usage : $method = $f->method([$newmethod])
|
|
262 Function: get or set the feature method
|
|
263 Returns : a string
|
|
264 Args : a new method (optional)
|
|
265 Status : Public
|
|
266
|
|
267 This method gets or sets the feature method. It is a convenience
|
|
268 feature that delegates the task to the feature's type object.
|
|
269
|
|
270 =cut
|
|
271
|
|
272 sub method {
|
|
273 my $self = shift;
|
|
274 my $d = $self->{type}->method;
|
|
275 $self->{type}->method(shift) if @_;
|
|
276 $d;
|
|
277 }
|
|
278
|
|
279 =head2 source
|
|
280
|
|
281 Title : source
|
|
282 Usage : $source = $f->source([$newsource])
|
|
283 Function: get or set the feature source
|
|
284 Returns : a string
|
|
285 Args : a new source (optional)
|
|
286 Status : Public
|
|
287
|
|
288 This method gets or sets the feature source. It is a convenience
|
|
289 feature that delegates the task to the feature's type object.
|
|
290
|
|
291 =cut
|
|
292
|
|
293 sub source {
|
|
294 my $self = shift;
|
|
295 my $d = $self->{type}->source;
|
|
296 $self->{type}->source(shift) if @_;
|
|
297 $d;
|
|
298 }
|
|
299
|
|
300 =head2 score
|
|
301
|
|
302 Title : score
|
|
303 Usage : $score = $f->score([$newscore])
|
|
304 Function: get or set the feature score
|
|
305 Returns : a string
|
|
306 Args : a new score (optional)
|
|
307 Status : Public
|
|
308
|
|
309 This method gets or sets the feature score.
|
|
310
|
|
311 =cut
|
|
312
|
|
313 sub score {
|
|
314 my $self = shift;
|
|
315 my $d = $self->{score};
|
|
316 $self->{score} = shift if @_;
|
|
317 $d;
|
|
318 }
|
|
319
|
|
320 =head2 phase
|
|
321
|
|
322 Title : phase
|
|
323 Usage : $phase = $f->phase([$phase])
|
|
324 Function: get or set the feature phase
|
|
325 Returns : a string
|
|
326 Args : a new phase (optional)
|
|
327 Status : Public
|
|
328
|
|
329 This method gets or sets the feature phase.
|
|
330
|
|
331 =cut
|
|
332
|
|
333 sub phase {
|
|
334 my $self = shift;
|
|
335 my $d = $self->{phase};
|
|
336 $self->{phase} = shift if @_;
|
|
337 $d;
|
|
338 }
|
|
339
|
|
340 =head2 strand
|
|
341
|
|
342 Title : strand
|
|
343 Usage : $strand = $f->strand
|
|
344 Function: get the feature strand
|
|
345 Returns : +1, 0 -1
|
|
346 Args : none
|
|
347 Status : Public
|
|
348
|
|
349 Returns the strand of the feature. Unlike the other methods, the
|
|
350 strand cannot be changed once the object is created (due to coordinate
|
|
351 considerations).
|
|
352
|
|
353 =cut
|
|
354
|
|
355 sub strand {
|
|
356 my $self = shift;
|
|
357 return 0 unless $self->{fstrand};
|
|
358 if ($self->absolute) {
|
|
359 return Bio::DB::GFF::RelSegment::_to_strand($self->{fstrand});
|
|
360 }
|
|
361 return $self->SUPER::strand;
|
|
362 }
|
|
363
|
|
364 =head2 group
|
|
365
|
|
366 Title : group
|
|
367 Usage : $group = $f->group([$new_group])
|
|
368 Function: get or set the feature group
|
|
369 Returns : a Bio::DB::GFF::Featname object
|
|
370 Args : a new group (optional)
|
|
371 Status : Public
|
|
372
|
|
373 This method gets or sets the feature group. The group is a
|
|
374 Bio::DB::GFF::Featname object, which has an ID and a class.
|
|
375
|
|
376 =cut
|
|
377
|
|
378 sub group {
|
|
379 my $self = shift;
|
|
380 my $d = $self->{group};
|
|
381 $self->{group} = shift if @_;
|
|
382 $d;
|
|
383 }
|
|
384
|
|
385 =head2 display_id
|
|
386
|
|
387 Title : display_id
|
|
388 Usage : $display_id = $f->display_id([$display_id])
|
|
389 Function: get or set the feature display id
|
|
390 Returns : a Bio::DB::GFF::Featname object
|
|
391 Args : a new display_id (optional)
|
|
392 Status : Public
|
|
393
|
|
394 This method is an alias for group(). It is provided for
|
|
395 Bio::SeqFeatureI compatibility.
|
|
396
|
|
397 =cut
|
|
398
|
|
399 =head2 info
|
|
400
|
|
401 Title : info
|
|
402 Usage : $info = $f->info([$new_info])
|
|
403 Function: get or set the feature group
|
|
404 Returns : a Bio::DB::GFF::Featname object
|
|
405 Args : a new group (optional)
|
|
406 Status : Public
|
|
407
|
|
408 This method is an alias for group(). It is provided for AcePerl
|
|
409 compatibility.
|
|
410
|
|
411 =cut
|
|
412
|
|
413 *info = \&group;
|
|
414 *display_id = \&group;
|
|
415 *display_name = \&group;
|
|
416
|
|
417 =head2 target
|
|
418
|
|
419 Title : target
|
|
420 Usage : $target = $f->target([$new_target])
|
|
421 Function: get or set the feature target
|
|
422 Returns : a Bio::DB::GFF::Featname object
|
|
423 Args : a new group (optional)
|
|
424 Status : Public
|
|
425
|
|
426 This method works like group(), but only returns the group if it
|
|
427 implements the start() method. This is typical for
|
|
428 similarity/assembly features, where the target encodes the start and stop
|
|
429 location of the alignment.
|
|
430
|
|
431 =cut
|
|
432
|
|
433 sub target {
|
|
434 my $self = shift;
|
|
435 my $group = $self->group or return;
|
|
436 return unless $group->can('start');
|
|
437 $group;
|
|
438 }
|
|
439
|
|
440 =head2 hit
|
|
441
|
|
442 Title : hit
|
|
443 Usage : $hit = $f->hit([$new_hit])
|
|
444 Function: get or set the feature hit
|
|
445 Returns : a Bio::DB::GFF::Featname object
|
|
446 Args : a new group (optional)
|
|
447 Status : Public
|
|
448
|
|
449 This is the same as target(), for compatibility with
|
|
450 Bio::SeqFeature::SimilarityPair.
|
|
451
|
|
452 =cut
|
|
453
|
|
454 *hit = \⌖
|
|
455
|
|
456 =head2 id
|
|
457
|
|
458 Title : id
|
|
459 Usage : $id = $f->id
|
|
460 Function: get the feature ID
|
|
461 Returns : a database identifier
|
|
462 Args : none
|
|
463 Status : Public
|
|
464
|
|
465 This method retrieves the database identifier for the feature. It
|
|
466 cannot be changed.
|
|
467
|
|
468 =cut
|
|
469
|
|
470 sub id { shift->{db_id} }
|
|
471
|
|
472 =head2 group_id
|
|
473
|
|
474 Title : group_id
|
|
475 Usage : $id = $f->group_id
|
|
476 Function: get the feature ID
|
|
477 Returns : a database identifier
|
|
478 Args : none
|
|
479 Status : Public
|
|
480
|
|
481 This method retrieves the database group identifier for the feature.
|
|
482 It cannot be changed. Often the group identifier is more useful than
|
|
483 the feature identifier, since it is used to refer to a complex object
|
|
484 containing subparts.
|
|
485
|
|
486 =cut
|
|
487
|
|
488 sub group_id { shift->{group_id} }
|
|
489
|
|
490 =head2 clone
|
|
491
|
|
492 Title : clone
|
|
493 Usage : $feature = $f->clone
|
|
494 Function: make a copy of the feature
|
|
495 Returns : a new Bio::DB::GFF::Feature object
|
|
496 Args : none
|
|
497 Status : Public
|
|
498
|
|
499 This method returns a copy of the feature.
|
|
500
|
|
501 =cut
|
|
502
|
|
503 sub clone {
|
|
504 my $self = shift;
|
|
505 my $clone = $self->SUPER::clone;
|
|
506
|
|
507 if (ref(my $t = $clone->type)) {
|
|
508 my $type = $t->can('clone') ? $t->clone : bless {%$t},ref $t;
|
|
509 $clone->type($type);
|
|
510 }
|
|
511
|
|
512 if (ref(my $g = $clone->group)) {
|
|
513 my $group = $g->can('clone') ? $g->clone : bless {%$g},ref $g;
|
|
514 $clone->group($group);
|
|
515 }
|
|
516
|
|
517 if (my $merged = $self->{merged_segs}) {
|
|
518 $clone->{merged_segs} = { %$merged };
|
|
519 }
|
|
520
|
|
521 $clone;
|
|
522 }
|
|
523
|
|
524 =head2 compound
|
|
525
|
|
526 Title : compound
|
|
527 Usage : $flag = $f->compound([$newflag])
|
|
528 Function: get or set the compound flag
|
|
529 Returns : a boolean
|
|
530 Args : a new flag (optional)
|
|
531 Status : Public
|
|
532
|
|
533 This method gets or sets a flag indicated that the feature is not a
|
|
534 primary one from the database, but the result of aggregation.
|
|
535
|
|
536 =cut
|
|
537
|
|
538 sub compound {
|
|
539 my $self = shift;
|
|
540 my $d = $self->{compound};
|
|
541 $self->{compound} = shift if @_;
|
|
542 $d;
|
|
543 }
|
|
544
|
|
545 =head2 sub_SeqFeature
|
|
546
|
|
547 Title : sub_SeqFeature
|
|
548 Usage : @feat = $feature->sub_SeqFeature([$method])
|
|
549 Function: get subfeatures
|
|
550 Returns : a list of Bio::DB::GFF::Feature objects
|
|
551 Args : a feature method (optional)
|
|
552 Status : Public
|
|
553
|
|
554 This method returns a list of any subfeatures that belong to the main
|
|
555 feature. For those features that contain heterogeneous subfeatures,
|
|
556 you can retrieve a subset of the subfeatures by providing a method
|
|
557 name to filter on.
|
|
558
|
|
559 For AcePerl compatibility, this method may also be called as
|
|
560 segments().
|
|
561
|
|
562 =cut
|
|
563
|
|
564 sub sub_SeqFeature {
|
|
565 my $self = shift;
|
|
566 my $type = shift;
|
|
567 my $subfeat = $self->{subfeatures} or return;
|
|
568 $self->sort_features;
|
|
569 my @a;
|
|
570 if ($type) {
|
|
571 my $features = $subfeat->{lc $type} or return;
|
|
572 @a = @{$features};
|
|
573 } else {
|
|
574 @a = map {@{$_}} values %{$subfeat};
|
|
575 }
|
|
576 return @a;
|
|
577 }
|
|
578
|
|
579 =head2 add_subfeature
|
|
580
|
|
581 Title : add_subfeature
|
|
582 Usage : $feature->add_subfeature($feature)
|
|
583 Function: add a subfeature to the feature
|
|
584 Returns : nothing
|
|
585 Args : a Bio::DB::GFF::Feature object
|
|
586 Status : Public
|
|
587
|
|
588 This method adds a new subfeature to the object. It is used
|
|
589 internally by aggregators, but is available for public use as well.
|
|
590
|
|
591 =cut
|
|
592
|
|
593 sub add_subfeature {
|
|
594 my $self = shift;
|
|
595 my $feature = shift;
|
|
596 my $type = $feature->method;
|
|
597 my $subfeat = $self->{subfeatures}{lc $type} ||= [];
|
|
598 push @{$subfeat},$feature;
|
|
599 }
|
|
600
|
|
601 =head2 attach_seq
|
|
602
|
|
603 Title : attach_seq
|
|
604 Usage : $sf->attach_seq($seq)
|
|
605 Function: Attaches a Bio::Seq object to this feature. This
|
|
606 Bio::Seq object is for the *entire* sequence: ie
|
|
607 from 1 to 10000
|
|
608 Example :
|
|
609 Returns : TRUE on success
|
|
610 Args : a Bio::PrimarySeqI compliant object
|
|
611
|
|
612 =cut
|
|
613
|
|
614 sub attach_seq { }
|
|
615
|
|
616
|
|
617 =head2 location
|
|
618
|
|
619 Title : location
|
|
620 Usage : my $location = $seqfeature->location()
|
|
621 Function: returns a location object suitable for identifying location
|
|
622 of feature on sequence or parent feature
|
|
623 Returns : Bio::LocationI object
|
|
624 Args : none
|
|
625
|
|
626 =cut
|
|
627
|
|
628 sub location {
|
|
629 my $self = shift;
|
|
630 require Bio::Location::Split unless Bio::Location::Split->can('new');
|
|
631 require Bio::Location::Simple unless Bio::Location::Simple->can('new');
|
|
632
|
|
633 my $location;
|
|
634 if (my @segments = $self->segments) {
|
|
635 $location = Bio::Location::Split->new(-seq_id => $self->seq_id);
|
|
636 foreach (@segments) {
|
|
637 $location->add_sub_Location($_->location);
|
|
638 }
|
|
639 } else {
|
|
640 $location = Bio::Location::Simple->new(-start => $self->start,
|
|
641 -end => $self->stop,
|
|
642 -strand => $self->strand,
|
|
643 -seq_id => $self->seq_id);
|
|
644 }
|
|
645 $location;
|
|
646 }
|
|
647
|
|
648 =head2 entire_seq
|
|
649
|
|
650 Title : entire_seq
|
|
651 Usage : $whole_seq = $sf->entire_seq()
|
|
652 Function: gives the entire sequence that this seqfeature is attached to
|
|
653 Example :
|
|
654 Returns : a Bio::PrimarySeqI compliant object, or undef if there is no
|
|
655 sequence attached
|
|
656 Args : none
|
|
657
|
|
658
|
|
659 =cut
|
|
660
|
|
661 sub entire_seq {
|
|
662 my $self = shift;
|
|
663 $self->factory->segment($self->sourceseq);
|
|
664 }
|
|
665
|
|
666 =head2 merged_segments
|
|
667
|
|
668 Title : merged_segments
|
|
669 Usage : @segs = $feature->merged_segments([$method])
|
|
670 Function: get merged subfeatures
|
|
671 Returns : a list of Bio::DB::GFF::Feature objects
|
|
672 Args : a feature method (optional)
|
|
673 Status : Public
|
|
674
|
|
675 This method acts like sub_SeqFeature, except that it merges
|
|
676 overlapping segments of the same time into contiguous features. For
|
|
677 those features that contain heterogeneous subfeatures, you can
|
|
678 retrieve a subset of the subfeatures by providing a method name to
|
|
679 filter on.
|
|
680
|
|
681 A side-effect of this method is that the features are returned in
|
|
682 sorted order by their start tposition.
|
|
683
|
|
684 =cut
|
|
685
|
|
686 #'
|
|
687
|
|
688 sub merged_segments {
|
|
689 my $self = shift;
|
|
690 my $type = shift;
|
|
691 $type ||= ''; # prevent uninitialized variable warnings
|
|
692
|
|
693 my $truename = overload::StrVal($self);
|
|
694
|
|
695 return @{$self->{merged_segs}{$type}} if exists $self->{merged_segs}{$type};
|
|
696 my @segs = map { $_->[0] }
|
|
697 sort { $a->[1] <=> $b->[1] ||
|
|
698 $a->[2] cmp $b->[2] }
|
|
699 map { [$_, $_->start, $_->type] } $self->sub_SeqFeature($type);
|
|
700
|
|
701 # attempt to merge overlapping segments
|
|
702 my @merged = ();
|
|
703 for my $s (@segs) {
|
|
704 my $previous = $merged[-1] if @merged;
|
|
705 my ($pscore,$score) = (eval{$previous->score}||0,eval{$s->score}||0);
|
|
706 if (defined($previous)
|
|
707 && $previous->stop+1 >= $s->start
|
|
708 && (!defined($s->score) || $previous->score == $s->score)
|
|
709 && $previous->method eq $s->method
|
|
710 ) {
|
|
711 if ($self->absolute && $self->strand < 0) {
|
|
712 $previous->{start} = $s->{start};
|
|
713 } else {
|
|
714 $previous->{stop} = $s->{stop};
|
|
715 }
|
|
716 # fix up the target too
|
|
717 my $g = $previous->{group};
|
|
718 if ( ref($g) && $g->isa('Bio::DB::GFF::Homol')) {
|
|
719 my $cg = $s->{group};
|
|
720 $g->{stop} = $cg->{stop};
|
|
721 }
|
|
722 } elsif (defined($previous)
|
|
723 && $previous->start == $s->start
|
|
724 && $previous->stop == $s->stop) {
|
|
725 next;
|
|
726 } else {
|
|
727 my $copy = $s->clone;
|
|
728 push @merged,$copy;
|
|
729 }
|
|
730 }
|
|
731 $self->{merged_segs}{$type} = \@merged;
|
|
732 @merged;
|
|
733 }
|
|
734
|
|
735 =head2 sub_types
|
|
736
|
|
737 Title : sub_types
|
|
738 Usage : @methods = $feature->sub_types
|
|
739 Function: get methods of all sub-seqfeatures
|
|
740 Returns : a list of method names
|
|
741 Args : none
|
|
742 Status : Public
|
|
743
|
|
744 For those features that contain subfeatures, this method will return a
|
|
745 unique list of method names of those subfeatures, suitable for use
|
|
746 with sub_SeqFeature().
|
|
747
|
|
748 =cut
|
|
749
|
|
750 sub sub_types {
|
|
751 my $self = shift;
|
|
752 my $subfeat = $self->{subfeatures} or return;
|
|
753 return keys %$subfeat;
|
|
754 }
|
|
755
|
|
756 =head2 attributes
|
|
757
|
|
758 Title : attributes
|
|
759 Usage : @attributes = $feature->attributes($name)
|
|
760 Function: get the "attributes" on a particular feature
|
|
761 Returns : an array of string
|
|
762 Args : feature ID
|
|
763 Status : public
|
|
764
|
|
765 Some GFF version 2 files use the groups column to store a series of
|
|
766 attribute/value pairs. In this interpretation of GFF, the first such
|
|
767 pair is treated as the primary group for the feature; subsequent pairs
|
|
768 are treated as attributes. Two attributes have special meaning:
|
|
769 "Note" is for backward compatibility and is used for unstructured text
|
|
770 remarks. "Alias" is considered as a synonym for the feature name.
|
|
771
|
|
772 @gene_names = $feature->attributes('Gene');
|
|
773 @aliases = $feature->attributes('Alias');
|
|
774
|
|
775 If no name is provided, then attributes() returns a flattened hash, of
|
|
776 attribute=E<gt>value pairs. This lets you do:
|
|
777
|
|
778 %attributes = $db->attributes;
|
|
779
|
|
780 =cut
|
|
781
|
|
782 sub attributes {
|
|
783 my $self = shift;
|
|
784 my $factory = $self->factory;
|
|
785 defined(my $id = $self->id) or return;
|
|
786 $factory->attributes($id,@_)
|
|
787 }
|
|
788
|
|
789
|
|
790 =head2 notes
|
|
791
|
|
792 Title : notes
|
|
793 Usage : @notes = $feature->notes
|
|
794 Function: get the "notes" on a particular feature
|
|
795 Returns : an array of string
|
|
796 Args : feature ID
|
|
797 Status : public
|
|
798
|
|
799 Some GFF version 2 files use the groups column to store various notes
|
|
800 and remarks. Adaptors can elect to store the notes in the database,
|
|
801 or just ignore them. For those adaptors that store the notes, the
|
|
802 notes() method will return them as a list.
|
|
803
|
|
804 =cut
|
|
805
|
|
806 sub notes {
|
|
807 my $self = shift;
|
|
808 $self->attributes('Note');
|
|
809 }
|
|
810
|
|
811 =head2 aliases
|
|
812
|
|
813 Title : aliases
|
|
814 Usage : @aliases = $feature->aliases
|
|
815 Function: get the "aliases" on a particular feature
|
|
816 Returns : an array of string
|
|
817 Args : feature ID
|
|
818 Status : public
|
|
819
|
|
820 This method will return a list of attributes of type 'Alias'.
|
|
821
|
|
822 =cut
|
|
823
|
|
824 sub aliases {
|
|
825 my $self = shift;
|
|
826 $self->attributes('Alias');
|
|
827 }
|
|
828
|
|
829
|
|
830
|
|
831 =head2 Autogenerated Methods
|
|
832
|
|
833 Title : AUTOLOAD
|
|
834 Usage : @subfeat = $feature->Method
|
|
835 Function: Return subfeatures using autogenerated methods
|
|
836 Returns : a list of Bio::DB::GFF::Feature objects
|
|
837 Args : none
|
|
838 Status : Public
|
|
839
|
|
840 Any method that begins with an initial capital letter will be passed
|
|
841 to AUTOLOAD and treated as a call to sub_SeqFeature with the method
|
|
842 name used as the method argument. For instance, this call:
|
|
843
|
|
844 @exons = $feature->Exon;
|
|
845
|
|
846 is equivalent to this call:
|
|
847
|
|
848 @exons = $feature->sub_SeqFeature('exon');
|
|
849
|
|
850 =cut
|
|
851
|
|
852 =head2 SeqFeatureI methods
|
|
853
|
|
854 The following Bio::SeqFeatureI methods are implemented:
|
|
855
|
|
856 primary_tag(), source_tag(), all_tags(), has_tag(), each_tag_value() [renamed get_tag_values()].
|
|
857
|
|
858 =cut
|
|
859
|
|
860 *primary_tag = \&method;
|
|
861 *source_tag = \&source;
|
|
862 sub all_tags {
|
|
863 my $self = shift;
|
|
864 my @tags = keys %CONSTANT_TAGS;
|
|
865 # autogenerated methods
|
|
866 if (my $subfeat = $self->{subfeatures}) {
|
|
867 push @tags,keys %$subfeat;
|
|
868 }
|
|
869 @tags;
|
|
870 }
|
|
871 *get_all_tags = \&all_tags;
|
|
872
|
|
873 sub has_tag {
|
|
874 my $self = shift;
|
|
875 my $tag = shift;
|
|
876 my %tags = map {$_=>1} $self->all_tags;
|
|
877 return $tags{$tag};
|
|
878 }
|
|
879
|
|
880 *each_tag_value = \&get_tag_values;
|
|
881
|
|
882 sub get_tag_values {
|
|
883 my $self = shift;
|
|
884 my $tag = shift;
|
|
885 return $self->$tag() if $CONSTANT_TAGS{$tag};
|
|
886 $tag = ucfirst $tag;
|
|
887 return $self->$tag(); # try autogenerated tag
|
|
888 }
|
|
889
|
|
890 sub AUTOLOAD {
|
|
891 my($pack,$func_name) = $AUTOLOAD=~/(.+)::([^:]+)$/;
|
|
892 my $sub = $AUTOLOAD;
|
|
893 my $self = $_[0];
|
|
894
|
|
895 # ignore DESTROY calls
|
|
896 return if $func_name eq 'DESTROY';
|
|
897
|
|
898 # fetch subfeatures if func_name has an initial cap
|
|
899 # return sort {$a->start <=> $b->start} $self->sub_SeqFeature($func_name) if $func_name =~ /^[A-Z]/;
|
|
900 return $self->sub_SeqFeature($func_name) if $func_name =~ /^[A-Z]/;
|
|
901
|
|
902 # error message of last resort
|
|
903 $self->throw(qq(Can't locate object method "$func_name" via package "$pack"));
|
|
904 }#'
|
|
905
|
|
906 =head2 adjust_bounds
|
|
907
|
|
908 Title : adjust_bounds
|
|
909 Usage : $feature->adjust_bounds
|
|
910 Function: adjust the bounds of a feature
|
|
911 Returns : ($start,$stop,$strand)
|
|
912 Args : none
|
|
913 Status : Public
|
|
914
|
|
915 This method adjusts the boundaries of the feature to enclose all its
|
|
916 subfeatures. It returns the new start, stop and strand of the
|
|
917 enclosing feature.
|
|
918
|
|
919 =cut
|
|
920
|
|
921 # adjust a feature so that its boundaries are synched with its subparts' boundaries.
|
|
922 # this works recursively, so subfeatures can contain other features
|
|
923 sub adjust_bounds {
|
|
924 my $self = shift;
|
|
925 my $g = $self->{group};
|
|
926
|
|
927 if (my $subfeat = $self->{subfeatures}) {
|
|
928 for my $list (values %$subfeat) {
|
|
929 for my $feat (@$list) {
|
|
930
|
|
931 # fix up our bounds to hold largest subfeature
|
|
932 my($start,$stop,$strand) = $feat->adjust_bounds;
|
|
933 $self->{fstrand} = $strand unless defined $self->{fstrand};
|
|
934 my ($low,$high) = $start < $stop ? ($start,$stop) : ($stop,$start);
|
|
935 if ($self->{fstrand} ne '-') {
|
|
936 $self->{start} = $low if !defined($self->{start}) || $low < $self->{start};
|
|
937 $self->{stop} = $high if !defined($self->{stop}) || $high > $self->{stop};
|
|
938 } else {
|
|
939 $self->{start} = $high if !defined($self->{start}) || $high > $self->{start};
|
|
940 $self->{stop} = $low if !defined($self->{stop}) || $low < $self->{stop};
|
|
941 }
|
|
942
|
|
943 # fix up endpoints of targets too (for homologies only)
|
|
944 my $h = $feat->group;
|
|
945 next unless $h && $h->isa('Bio::DB::GFF::Homol');
|
|
946 next unless $g && $g->isa('Bio::DB::GFF::Homol');
|
|
947 ($start,$stop) = ($h->{start},$h->{stop});
|
|
948 if ($start <= $stop) {
|
|
949 $g->{start} = $start if !defined($g->{start}) || $start < $g->{start};
|
|
950 $g->{stop} = $stop if !defined($g->{stop}) || $stop > $g->{stop};
|
|
951 } else {
|
|
952 $g->{start} = $start if !defined($g->{start}) || $start > $g->{start};
|
|
953 $g->{stop} = $stop if !defined($g->{stop}) || $stop < $g->{stop};
|
|
954 }
|
|
955 }
|
|
956 }
|
|
957 }
|
|
958
|
|
959 ($self->{start},$self->{stop},$self->strand);
|
|
960 }
|
|
961
|
|
962 =head2 sort_features
|
|
963
|
|
964 Title : sort_features
|
|
965 Usage : $feature->sort_features
|
|
966 Function: sort features
|
|
967 Returns : nothing
|
|
968 Args : none
|
|
969 Status : Public
|
|
970
|
|
971 This method sorts subfeatures in ascending order by their start
|
|
972 position. For reverse strand features, it sorts subfeatures in
|
|
973 descending order. After this is called sub_SeqFeature will return the
|
|
974 features in order.
|
|
975
|
|
976 This method is called internally by merged_segments().
|
|
977
|
|
978 =cut
|
|
979
|
|
980 # sort features
|
|
981 sub sort_features {
|
|
982 my $self = shift;
|
|
983 return if $self->{sorted}++;
|
|
984 my $strand = $self->strand or return;
|
|
985 my $subfeat = $self->{subfeatures} or return;
|
|
986 for my $type (keys %$subfeat) {
|
|
987 $subfeat->{$type} = [map { $_->[0] }
|
|
988 sort {$a->[1] <=> $b->[1] }
|
|
989 map { [$_,$_->start] }
|
|
990 @{$subfeat->{$type}}] if $strand > 0;
|
|
991 $subfeat->{$type} = [map { $_->[0] }
|
|
992 sort {$b->[1] <=> $a->[1]}
|
|
993 map { [$_,$_->start] }
|
|
994 @{$subfeat->{$type}}] if $strand < 0;
|
|
995 }
|
|
996 }
|
|
997
|
|
998 =head2 asString
|
|
999
|
|
1000 Title : asString
|
|
1001 Usage : $string = $feature->asString
|
|
1002 Function: return human-readabled representation of feature
|
|
1003 Returns : a string
|
|
1004 Args : none
|
|
1005 Status : Public
|
|
1006
|
|
1007 This method returns a human-readable representation of the feature and
|
|
1008 is called by the overloaded "" operator.
|
|
1009
|
|
1010 =cut
|
|
1011
|
|
1012 sub asString {
|
|
1013 my $self = shift;
|
|
1014 my $type = $self->type;
|
|
1015 my $name = $self->group;
|
|
1016 return "$type($name)" if $name;
|
|
1017 return $type;
|
|
1018 # my $type = $self->method;
|
|
1019 # my $id = $self->group || 'unidentified';
|
|
1020 # return join '/',$id,$type,$self->SUPER::asString;
|
|
1021 }
|
|
1022
|
|
1023 sub name {
|
|
1024 my $self =shift;
|
|
1025 return $self->group || $self->SUPER::name;
|
|
1026 }
|
|
1027
|
|
1028 sub gff_string {
|
|
1029 my $self = shift;
|
|
1030 my ($start,$stop) = ($self->start,$self->stop);
|
|
1031
|
|
1032 # the defined() tests prevent uninitialized variable warnings, when dealing with clone objects
|
|
1033 # whose endpoints may be undefined
|
|
1034 ($start,$stop) = ($stop,$start) if defined($start) && defined($stop) && $start > $stop;
|
|
1035
|
|
1036 my ($class,$name) = ('','');
|
|
1037 my @group;
|
|
1038 if (my $t = $self->target) {
|
|
1039 my $class = $t->class;
|
|
1040 my $name = $t->name;
|
|
1041 my $start = $t->start;
|
|
1042 my $stop = $t->stop;
|
|
1043 push @group,qq(Target "$class:$name" $start $stop);
|
|
1044 }
|
|
1045
|
|
1046 elsif (my $g = $self->group) {
|
|
1047 $class = $g->class || '';
|
|
1048 $name = $g->name || '';
|
|
1049 push @group,"$class $name";
|
|
1050 }
|
|
1051 push @group,map {qq(Note "$_")} $self->notes;
|
|
1052
|
|
1053 my $group_field = join ' ; ',@group;
|
|
1054 my $strand = ('-','.','+')[$self->strand+1];
|
|
1055 my $ref = $self->refseq;
|
|
1056 my $n = ref($ref) ? $ref->name : $ref;
|
|
1057 my $phase = $self->phase;
|
|
1058 $phase = '.' unless defined $phase;
|
|
1059 return join("\t",$n,$self->source,$self->method,$start||'.',$stop||'.',$self->score||'.',$strand||'.',$phase,$group_field);
|
|
1060 }
|
|
1061
|
|
1062 =head1 A Note About Similarities
|
|
1063
|
|
1064 The current default aggregator for GFF "similarity" features creates a
|
|
1065 composite Bio::DB::GFF::Feature object of type "gapped_alignment".
|
|
1066 The target() method for the feature as a whole will return a
|
|
1067 RelSegment object that is as long as the extremes of the similarity
|
|
1068 hit target, but will not necessarily be the same length as the query
|
|
1069 sequence. The length of each "similarity" subfeature will be exactly
|
|
1070 the same length as its target(). These subfeatures are essentially
|
|
1071 the HSPs of the match.
|
|
1072
|
|
1073 The following illustrates this:
|
|
1074
|
|
1075 @similarities = $segment->feature('similarity:BLASTN');
|
|
1076 $sim = $similarities[0];
|
|
1077
|
|
1078 print $sim->type; # yields "gapped_similarity:BLASTN"
|
|
1079
|
|
1080 $query_length = $sim->length;
|
|
1081 $target_length = $sim->target->length; # $query_length != $target_length
|
|
1082
|
|
1083 @matches = $sim->Similarity; # use autogenerated method
|
|
1084 $query1_length = $matches[0]->length;
|
|
1085 $target1_length = $matches[0]->target->length; # $query1_length == $target1_length
|
|
1086
|
|
1087 If you merge segments by calling merged_segments(), then the length of
|
|
1088 the query sequence segments will no longer necessarily equal the
|
|
1089 length of the targets, because the alignment information will have
|
|
1090 been lost. Nevertheless, the targets are adjusted so that the first
|
|
1091 and last base pairs of the query match the first and last base pairs
|
|
1092 of the target.
|
|
1093
|
|
1094 =cut
|
|
1095
|
|
1096 1;
|
|
1097
|
|
1098 =head1 BUGS
|
|
1099
|
|
1100 This module is still under development.
|
|
1101
|
|
1102 =head1 SEE ALSO
|
|
1103
|
|
1104 L<bioperl>, L<Bio::DB::GFF>, L<Bio::DB::RelSegment>
|
|
1105
|
|
1106 =head1 AUTHOR
|
|
1107
|
|
1108 Lincoln Stein E<lt>lstein@cshl.orgE<gt>.
|
|
1109
|
|
1110 Copyright (c) 2001 Cold Spring Harbor Laboratory.
|
|
1111
|
|
1112 This library is free software; you can redistribute it and/or modify
|
|
1113 it under the same terms as Perl itself.
|
|
1114
|
|
1115 =cut
|
|
1116
|