comparison variant_effect_predictor/Bio/SeqFeatureI.pm @ 0:1f6dce3d34e0

Uploaded
author mahtabm
date Thu, 11 Apr 2013 02:01:53 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:1f6dce3d34e0
1 # $Id: SeqFeatureI.pm,v 1.43.2.5 2003/08/28 19:29:34 jason Exp $
2 #
3 # BioPerl module for Bio::SeqFeatureI
4 #
5 # Cared for by Ewan Birney <birney@sanger.ac.uk>
6 #
7 # Copyright Ewan Birney
8 #
9 # You may distribute this module under the same terms as perl itself
10
11 # POD documentation - main docs before the code
12
13 =head1 NAME
14
15 Bio::SeqFeatureI - Abstract interface of a Sequence Feature
16
17 =head1 SYNOPSIS
18
19 # get a seqfeature somehow, eg,
20
21 foreach $feat ( $seq->top_SeqFeatures() ) {
22 print "Feature from ", $feat->start, "to ",
23 $feat->end, " Primary tag ", $feat->primary_tag,
24 ", produced by ", $feat->source_tag(), "\n";
25
26 if( $feat->strand == 0 ) {
27 print "Feature applicable to either strand\n";
28 } else {
29 print "Feature on strand ", $feat->strand,"\n"; # -1,1
30 }
31
32 foreach $tag ( $feat->all_tags() ) {
33 print "Feature has tag ", $tag, "with values, ",
34 join(' ',$feat->each_tag_value($tag)), "\n";
35 }
36 print "new feature\n" if $feat->has_tag('new');
37 # features can have sub features
38 my @subfeat = $feat->get_SeqFeatures();
39 }
40
41 =head1 DESCRIPTION
42
43 This interface is the functions one can expect for any Sequence
44 Feature, whatever its implementation or whether it is a more complex
45 type (eg, a Gene). This object doesn\'t actually provide any
46 implemention, it just provides the definitions of what methods one can
47 call. See Bio::SeqFeature::Generic for a good standard implementation
48 of this object
49
50 =head1 FEEDBACK
51
52 User feedback is an integral part of the evolution of this and other
53 Bioperl modules. Send your comments and suggestions preferably to one
54 of the Bioperl mailing lists. Your participation is much appreciated.
55
56 bioperl-l@bioperl.org - General discussion
57 http://bio.perl.org/MailList.html - About the mailing lists
58
59 =head2 Reporting Bugs
60
61 Report bugs to the Bioperl bug tracking system to help us keep track
62 the bugs and their resolution. Bug reports can be submitted via email
63 or the web:
64
65 bioperl-bugs@bio.perl.org
66 http://bugzilla.bioperl.org/
67
68 =head1 APPENDIX
69
70 The rest of the documentation details each of the object
71 methods. Internal methods are usually preceded with a _
72
73 =cut
74
75
76 # Let the code begin...
77
78
79 package Bio::SeqFeatureI;
80 use vars qw(@ISA $HasInMemory);
81 use strict;
82
83 BEGIN {
84 eval { require Bio::DB::InMemoryCache };
85 if( $@ ) { $HasInMemory = 0 }
86 else { $HasInMemory = 1 }
87 }
88
89 use Bio::RangeI;
90 use Bio::Seq;
91
92 use Carp;
93
94 @ISA = qw(Bio::RangeI);
95
96 =head1 SeqFeatureI specific methods
97
98 New method interfaces.
99
100 =cut
101
102 =head2 get_SeqFeatures
103
104 Title : get_SeqFeatures
105 Usage : @feats = $feat->get_SeqFeatures();
106 Function: Returns an array of sub Sequence Features
107 Returns : An array
108 Args : none
109
110
111 =cut
112
113 sub get_SeqFeatures{
114 my ($self,@args) = @_;
115
116 $self->throw_not_implemented();
117 }
118
119 =head2 display_name
120
121 Title : display_name
122 Usage : $name = $feat->display_name()
123 Function: Returns the human-readable name of the feature for displays.
124 Returns : a string
125 Args : none
126
127 =cut
128
129 sub display_name {
130 shift->throw_not_implemented();
131 }
132
133 =head2 primary_tag
134
135 Title : primary_tag
136 Usage : $tag = $feat->primary_tag()
137 Function: Returns the primary tag for a feature,
138 eg 'exon'
139 Returns : a string
140 Args : none
141
142
143 =cut
144
145 sub primary_tag{
146 my ($self,@args) = @_;
147
148 $self->throw_not_implemented();
149
150 }
151
152 =head2 source_tag
153
154 Title : source_tag
155 Usage : $tag = $feat->source_tag()
156 Function: Returns the source tag for a feature,
157 eg, 'genscan'
158 Returns : a string
159 Args : none
160
161
162 =cut
163
164 sub source_tag{
165 my ($self,@args) = @_;
166
167 $self->throw_not_implemented();
168 }
169
170 =head2 has_tag
171
172 Title : has_tag
173 Usage : $tag_exists = $self->has_tag('some_tag')
174 Function:
175 Returns : TRUE if the specified tag exists, and FALSE otherwise
176 Args :
177
178
179 =cut
180
181 sub has_tag{
182 my ($self,@args) = @_;
183
184 $self->throw_not_implemented();
185
186 }
187
188 =head2 get_tag_values
189
190 Title : get_tag_values
191 Usage : @values = $self->get_tag_values('some_tag')
192 Function:
193 Returns : An array comprising the values of the specified tag.
194 Args :
195
196
197 =cut
198
199 sub get_tag_values {
200 shift->throw_not_implemented();
201 }
202
203 =head2 get_all_tags
204
205 Title : get_all_tags
206 Usage : @tags = $feat->get_all_tags()
207 Function: gives all tags for this feature
208 Returns : an array of strings
209 Args : none
210
211
212 =cut
213
214 sub get_all_tags{
215 shift->throw_not_implemented();
216 }
217
218 =head2 attach_seq
219
220 Title : attach_seq
221 Usage : $sf->attach_seq($seq)
222 Function: Attaches a Bio::Seq object to this feature. This
223 Bio::Seq object is for the *entire* sequence: ie
224 from 1 to 10000
225
226 Note that it is not guaranteed that if you obtain a feature
227 from an object in bioperl, it will have a sequence
228 attached. Also, implementors of this interface can choose
229 to provide an empty implementation of this method. I.e.,
230 there is also no guarantee that if you do attach a
231 sequence, seq() or entire_seq() will not return undef.
232
233 The reason that this method is here on the interface is to
234 enable you to call it on every SeqFeatureI compliant
235 object, and that it will be implemented in a useful way and
236 set to a useful value for the great majority of use
237 cases. Implementors who choose to ignore the call are
238 encouraged to specifically state this in their
239 documentation.
240
241 Example :
242 Returns : TRUE on success
243 Args : a Bio::PrimarySeqI compliant object
244
245
246 =cut
247
248 sub attach_seq {
249 shift->throw_not_implemented();
250 }
251
252 =head2 seq
253
254 Title : seq
255 Usage : $tseq = $sf->seq()
256 Function: returns the truncated sequence (if there is a sequence attached)
257 for this feature
258 Example :
259 Returns : sub seq (a Bio::PrimarySeqI compliant object) on attached sequence
260 bounded by start & end, or undef if there is no sequence attached
261 Args : none
262
263
264 =cut
265
266 sub seq {
267 shift->throw_not_implemented();
268 }
269
270 =head2 entire_seq
271
272 Title : entire_seq
273 Usage : $whole_seq = $sf->entire_seq()
274 Function: gives the entire sequence that this seqfeature is attached to
275 Example :
276 Returns : a Bio::PrimarySeqI compliant object, or undef if there is no
277 sequence attached
278 Args : none
279
280
281 =cut
282
283 sub entire_seq {
284 shift->throw_not_implemented();
285 }
286
287
288 =head2 seq_id
289
290 Title : seq_id
291 Usage : $obj->seq_id($newval)
292 Function: There are many cases when you make a feature that you
293 do know the sequence name, but do not know its actual
294 sequence. This is an attribute such that you can store
295 the ID (e.g., display_id) of the sequence.
296
297 This attribute should *not* be used in GFF dumping, as
298 that should come from the collection in which the seq
299 feature was found.
300 Returns : value of seq_id
301 Args : newvalue (optional)
302
303
304 =cut
305
306 sub seq_id {
307 shift->throw_not_implemented();
308 }
309
310 =head2 gff_string
311
312 Title : gff_string
313 Usage : $str = $feat->gff_string;
314 $str = $feat->gff_string($gff_formatter);
315 Function: Provides the feature information in GFF format.
316
317 The implementation provided here returns GFF2 by default. If you
318 want a different version, supply an object implementing a method
319 gff_string() accepting a SeqFeatureI object as argument. E.g., to
320 obtain GFF1 format, do the following:
321
322 my $gffio = Bio::Tools::GFF->new(-gff_version => 1);
323 $gff1str = $feat->gff_string($gff1io);
324
325 Returns : A string
326 Args : Optionally, an object implementing gff_string().
327
328
329 =cut
330
331 sub gff_string{
332 my ($self,$formatter) = @_;
333
334 $formatter = $self->_static_gff_formatter unless $formatter;
335 return $formatter->gff_string($self);
336 }
337
338 my $static_gff_formatter = undef;
339
340 =head2 _static_gff_formatter
341
342 Title : _static_gff_formatter
343 Usage :
344 Function:
345 Example :
346 Returns :
347 Args :
348
349
350 =cut
351
352 sub _static_gff_formatter{
353 my ($self,@args) = @_;
354
355 if( !defined $static_gff_formatter ) {
356 $static_gff_formatter = Bio::Tools::GFF->new('-gff_version' => 2);
357 }
358 return $static_gff_formatter;
359 }
360
361 =head1 Bio::RangeI methods
362
363 List of interfaces inherited from Bio::RangeI (see L<Bio::RangeI>
364 for details).
365
366 =cut
367
368 =head2 start
369
370 Title : start
371 Usage : $start = $feat->start
372 Function: Returns the start coordinate of the feature
373 Returns : integer
374 Args : none
375
376
377 =head2 end
378
379 Title : end
380 Usage : $end = $feat->end
381 Function: Returns the end coordinate of the feature
382 Returns : integer
383 Args : none
384
385 =head2 strand
386
387 Title : strand
388 Usage : $strand = $feat->strand()
389 Function: Returns strand information, being 1,-1 or 0
390 Returns : -1,1 or 0
391 Args : none
392
393
394 =cut
395
396 =head1 Decorating methods
397
398 These methods have an implementation provided by Bio::SeqFeatureI,
399 but can be validly overwritten by subclasses
400
401 =head2 spliced_seq
402
403 Title : spliced_seq
404
405 Usage : $seq = $feature->spliced_seq()
406 $seq = $feature_with_remote_locations->spliced_seq($db_for_seqs)
407
408 Function: Provides a sequence of the feature which is the most
409 semantically "relevant" feature for this sequence. A
410 default implementation is provided which for simple cases
411 returns just the sequence, but for split cases, loops over
412 the split location to return the sequence. In the case of
413 split locations with remote locations, eg
414
415 join(AB000123:5567-5589,80..1144)
416
417 in the case when a database object is passed in, it will
418 attempt to retrieve the sequence from the database object,
419 and "Do the right thing", however if no database object is
420 provided, it will generate the correct number of N's (DNA)
421 or X's (protein, though this is unlikely).
422
423 This function is deliberately "magical" attempting to
424 second guess what a user wants as "the" sequence for this
425 feature
426
427 Implementing classes are free to override this method with
428 their own magic if they have a better idea what the user
429 wants
430
431 Args : [optional] A Bio::DB::RandomAccessI compliant object
432 Returns : A Bio::Seq
433
434 =cut
435
436 sub spliced_seq {
437 my $self = shift;
438 my $db = shift;
439
440 if( ! $self->location->isa("Bio::Location::SplitLocationI") ) {
441 return $self->seq(); # nice and easy!
442 }
443
444 # redundant test, but the above ISA is probably not ideal.
445 if( ! $self->location->isa("Bio::Location::SplitLocationI") ) {
446 $self->throw("not atomic, not split, yikes, in trouble!");
447 }
448
449 my $seqstr;
450 my $seqid = $self->entire_seq->display_id;
451 # This is to deal with reverse strand features
452 # so we are really sorting features 5' -> 3' on their strand
453 # i.e. rev strand features will be sorted largest to smallest
454 # as this how revcom CDSes seem to be annotated in genbank.
455 # Might need to eventually allow this to be programable?
456 # (can I mention how much fun this is NOT! --jason)
457
458 my ($mixed,$mixedloc,$fstrand) = (0);
459
460 if( defined $db &&
461 ref($db) && !$db->isa('Bio::DB::RandomAccessI') ) {
462 $self->warn("Must pass in a valid Bio::DB::RandomAccessI object for access to remote locations for spliced_seq");
463 $db = undef;
464 } elsif( defined $db &&
465 $HasInMemory && ! $db->isa('Bio::DB::InMemoryCache') ) {
466 $db = new Bio::DB::InMemoryCache(-seqdb => $db);
467 }
468
469 if( $self->isa('Bio::Das::SegmentI') &&
470 ! $self->absolute ) {
471 $self->warn("Calling spliced_seq with a Bio::Das::SegmentI ".
472 "which does have absolute set to 1 -- be warned ".
473 "you may not be getting things on the correct strand");
474 }
475
476 my @locs = map { $_->[0] }
477 # sort so that most negative is first basically to order
478 # the features on the opposite strand 5'->3' on their strand
479 # rather than they way most are input which is on the fwd strand
480
481 sort { $a->[1] <=> $b->[1] } # Yes Tim, Schwartzian transformation
482 map {
483 $fstrand = $_->strand unless defined $fstrand;
484 $mixed = 1 if defined $_->strand && $fstrand != $_->strand;
485 if( defined $_->seq_id ) {
486 $mixedloc = 1 if( $_->seq_id ne $seqid );
487 }
488 [ $_, $_->start* ($_->strand || 1)];
489 } $self->location->each_Location;
490
491 if ( $mixed ) {
492 $self->warn("Mixed strand locations, spliced seq using the input ".
493 "order rather than trying to sort");
494 @locs = $self->location->each_Location;
495 } elsif( $mixedloc ) {
496 # we'll use the prescribed location order
497 @locs = $self->location->each_Location;
498 }
499
500
501 foreach my $loc ( @locs ) {
502 if( ! $loc->isa("Bio::Location::Atomic") ) {
503 $self->throw("Can only deal with one level deep locations");
504 }
505 my $called_seq;
506 if( $fstrand != $loc->strand ) {
507 $self->warn("feature strand is different from location strand!");
508 }
509 # deal with remote sequences
510 if( defined $loc->seq_id &&
511 $loc->seq_id ne $seqid ) {
512 if( defined $db ) {
513 my $sid = $loc->seq_id;
514 $sid =~ s/\.\d+$//g;
515 eval {
516 $called_seq = $db->get_Seq_by_acc($sid);
517 };
518 if( $@ ) {
519 $self->warn("In attempting to join a remote location, sequence $sid was not in database. Will provide padding N's. Full exception \n\n$@");
520 $called_seq = undef;
521 }
522 } else {
523 $self->warn( "cannot get remote location for ".$loc->seq_id ." without a valid Bio::DB::RandomAccessI database handle (like Bio::DB::GenBank)");
524 $called_seq = undef;
525 }
526 if( !defined $called_seq ) {
527 $seqstr .= 'N' x $self->length;
528 next;
529 }
530 } else {
531 $called_seq = $self->entire_seq;
532 }
533
534 if( $self->isa('Bio::Das::SegmentI') ) {
535 my ($s,$e) = ($loc->start,$loc->end);
536 $seqstr .= $called_seq->subseq($s,$e)->seq();
537 } else {
538 # This is dumb subseq should work on locations...
539 if( $loc->strand == 1 ) {
540 $seqstr .= $called_seq->subseq($loc->start,$loc->end);
541 } else {
542 $seqstr .= $called_seq->trunc($loc->start,$loc->end)->revcom->seq();
543 }
544 }
545 }
546 my $out = Bio::Seq->new( -id => $self->entire_seq->display_id . "_spliced_feat",
547 -seq => $seqstr);
548
549 return $out;
550 }
551
552 =head1 RangeI methods
553
554 These methods are inherited from RangeI and can be used
555 directly from a SeqFeatureI interface. Remember that a
556 SeqFeature is-a RangeI, and so wherever you see RangeI you
557 can use a feature ($r in the below documentation).
558
559 =head2 overlaps
560
561 Title : overlaps
562 Usage : if($feat->overlaps($r)) { do stuff }
563 if($feat->overlaps(200)) { do stuff }
564 Function: tests if $feat overlaps $r
565 Args : a RangeI to test for overlap with, or a point
566 Returns : true if the Range overlaps with the feature, false otherwise
567
568
569 =head2 contains
570
571 Title : contains
572 Usage : if($feat->contains($r) { do stuff }
573 Function: tests whether $feat totally contains $r
574 Args : a RangeI to test for being contained
575 Returns : true if the argument is totaly contained within this range
576
577
578 =head2 equals
579
580 Title : equals
581 Usage : if($feat->equals($r))
582 Function: test whether $feat has the same start, end, strand as $r
583 Args : a RangeI to test for equality
584 Returns : true if they are describing the same range
585
586
587 =head1 Geometrical methods
588
589 These methods do things to the geometry of ranges, and return
590 triplets (start, stop, strand) from which new ranges could be built.
591
592 =head2 intersection
593
594 Title : intersection
595 Usage : ($start, $stop, $strand) = $feat->intersection($r)
596 Function: gives the range that is contained by both ranges
597 Args : a RangeI to compare this one to
598 Returns : nothing if they do not overlap, or the range that they do overlap
599
600 =head2 union
601
602 Title : union
603 Usage : ($start, $stop, $strand) = $feat->union($r);
604 : ($start, $stop, $strand) = Bio::RangeI->union(@ranges);
605 Function: finds the minimal range that contains all of the ranges
606 Args : a range or list of ranges to find the union of
607 Returns : the range containing all of the ranges
608
609 =cut
610
611 =head2 location
612
613 Title : location
614 Usage : my $location = $seqfeature->location()
615 Function: returns a location object suitable for identifying location
616 of feature on sequence or parent feature
617 Returns : Bio::LocationI object
618 Args : none
619
620
621 =cut
622
623 sub location {
624 my ($self) = @_;
625
626 $self->throw_not_implemented();
627 }
628
629
630 1;