comparison variant_effect_predictor/Bio/DB/GFF/Segment.pm @ 0:21066c0abaf5 draft

Uploaded
author willmclaren
date Fri, 03 Aug 2012 10:04:48 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:21066c0abaf5
1 =head1 NAME
2
3 Bio::DB::GFF::Segment -- Simple DNA segment object
4
5 =head1 SYNOPSIS
6
7 See L<Bio::DB::GFF>.
8
9 =head1 DESCRIPTION
10
11 Bio::DB::GFF::Segment provides the basic representation of a range of
12 DNA contained in a GFF database. It is the base class from which the
13 Bio::DB::GFF::RelSegment and Bio::DB::GFF::Feature classes are
14 derived.
15
16 Generally, you will not create or manipulate Bio::DB::GFF::Segment
17 objects directly, but use those that are returned by the Bio::DB::GFF
18 module.
19
20 =cut
21
22 package Bio::DB::GFF::Segment;
23
24 use strict;
25 use Bio::Root::Root;
26 use Bio::Annotation::Collection;
27 use Bio::RangeI;
28 use Bio::Das::SegmentI;
29 use Bio::SeqI;
30
31 use vars qw(@ISA);
32 @ISA = qw(Bio::Root::Root Bio::RangeI Bio::SeqI Bio::Das::SegmentI);
33
34 use overload
35 '""' => 'asString',
36 eq => 'equals',
37 fallback => 1;
38
39 =head1 API
40
41 The remainder of this document describes the API for
42 Bio::DB::GFF::Segment.
43
44 =cut
45
46 =head2 new
47
48 Title : new
49 Usage : $s = Bio::DB::GFF::Segment->new(@args)
50 Function: create a new segment
51 Returns : a new Bio::DB::GFF::Segment object
52 Args : see below
53 Status : Public
54
55 This method creates a new Bio::DB::GFF::Segment object. Generally
56 this is called automatically by the Bio::DB::GFF module and
57 derivatives.
58
59 There are five positional arguments:
60
61 $factory a Bio::DB::GFF::Adaptor to use for database access
62 $sourceseq ID of the source sequence
63 $sourceclass class of the source sequence
64 $start start of the desired segment relative to source sequence
65 $stop stop of the desired segment relative to source sequence
66
67 =cut
68
69 sub new {
70 my $class = shift;
71 my ($factory,$segclass,$segname,$start,$stop) = @_;
72 $segclass = $segname->class if ref($segname) && $segname->can('class');
73 $segclass ||= 'Sequence';
74
75 $factory or $class->throw("->new(): provide a factory argument");
76 $class = ref $class if ref $class;
77 return bless { factory => $factory,
78 sourceseq => $segname,
79 class => $segclass,
80 start => $start,
81 stop => $stop,
82 strand => 0,
83 },$class;
84 }
85
86 # read-only accessors
87
88 =head2 factory
89
90 Title : factory
91 Usage : $s->factory
92 Function: get the factory object
93 Returns : a Bio::DB::GFF::Adaptor
94 Args : none
95 Status : Public
96
97 This is a read-only accessor for the Bio::DB::GFF::Adaptor object used
98 to create the segment.
99
100 =cut
101
102 sub factory { shift->{factory} }
103
104 # start, stop, length
105
106 =head2 start
107
108 Title : start
109 Usage : $s->start
110 Function: start of segment
111 Returns : integer
112 Args : none
113 Status : Public
114
115 This is a read-only accessor for the start of the segment.
116
117 =cut
118
119 sub start { shift->{start} }
120
121 =head2 end
122
123 Title : end
124 Usage : $s->end
125 Function: end of segment
126 Returns : integer
127 Args : none
128 Status : Public
129
130 This is a read-only accessor for the end of the segment.
131
132 =cut
133
134 sub end { shift->{stop} }
135
136 =head2 stop
137
138 Title : stop
139 Usage : $s->stop
140 Function: stop of segment
141 Returns : integer
142 Args : none
143 Status : Public
144
145 This is an alias for end(), provided for AcePerl compatibility.
146
147 =cut
148
149 *stop = \&end;
150
151 =head2 length
152
153 Title : length
154 Usage : $s->length
155 Function: length of segment
156 Returns : integer
157 Args : none
158 Status : Public
159
160 Returns the length of the segment. Always a positive number.
161
162 =cut
163
164 sub length { abs($_[0]->{start} - $_[0]->{stop})+1 }
165
166
167 =head2 strand
168
169 Title : strand
170 Usage : $s->strand
171 Function: strand of segment
172 Returns : +1,0,-1
173 Args : none
174 Status : Public
175
176 Returns the strand on which the segment resides, either +1, 0 or -1.
177
178 =cut
179
180 sub strand {
181 my $self = shift;
182 0;
183 }
184
185 =head2 low
186
187 Title : low
188 Usage : $s->low
189 Function: return lower coordinate
190 Returns : lower coordinate
191 Args : none
192 Status : Public
193
194 Returns the lower coordinate, either start or end.
195
196 =cut
197
198 sub low {
199 my $self = shift;
200 my ($start,$stop) = ($self->start,$self->stop);
201 return $start < $stop ? $start : $stop;
202 }
203 *abs_low = \&low;
204
205 =head2 high
206
207 Title : high
208 Usage : $s->high
209 Function: return higher coordinate
210 Returns : higher coordinate
211 Args : none
212 Status : Public
213
214 Returns the higher coordinate, either start or end.
215
216 =cut
217
218 sub high {
219 my $self = shift;
220 my ($start,$stop) = ($self->start,$self->stop);
221 return $start > $stop ? $start : $stop;
222 }
223 *abs_high = \&high;
224
225 =head2 sourceseq
226
227 Title : sourceseq
228 Usage : $s->sourceseq
229 Function: get the segment source
230 Returns : a string
231 Args : none
232 Status : Public
233
234 Returns the name of the source sequence for this segment.
235
236 =cut
237
238 sub sourceseq { shift->{sourceseq} }
239
240 =head2 class
241
242 Title : class
243 Usage : $s->class([$newclass])
244 Function: get the source sequence class
245 Returns : a string
246 Args : new class (optional)
247 Status : Public
248
249 Gets or sets the class for the source sequence for this segment.
250
251 =cut
252
253 sub class {
254 my $self = shift;
255 my $d = $self->{class};
256 $self->{class} = shift if @_;
257 $d;
258 }
259
260 =head2 subseq
261
262 Title : subseq
263 Usage : $s->subseq($start,$stop)
264 Function: generate a subsequence
265 Returns : a Bio::DB::GFF::Segment object
266 Args : start and end of subsequence
267 Status : Public
268
269 This method generates a new segment from the start and end positions
270 given in the arguments. If stop E<lt> start, then the strand is reversed.
271
272 =cut
273
274 sub subseq {
275 my $self = shift;
276 my ($newstart,$newstop) = @_;
277 my ($refseq,$start,$stop,$class) = ($self->{sourceseq},
278 $self->{start},$self->{stop},
279 $self->class);
280
281 # We deliberately force subseq to return objects of type RelSegment
282 # Otherwise, when we get a subsequence from a Feature object,
283 # its method and source go along for the ride, which is incorrect.
284 my $new = $self->new_from_segment($self);
285 if ($start <= $stop) {
286 @{$new}{qw(start stop)} = ($start + $newstart - 1, $start + $newstop - 1);
287 } else {
288 @{$new}{qw(start stop)} = ($start - ($newstart - 1), $start - ($newstop - 1)),
289
290 }
291
292 $new;
293 }
294
295 =head2 seq
296
297 Title : seq
298 Usage : $s->seq
299 Function: get the sequence string for this segment
300 Returns : a string
301 Args : none
302 Status : Public
303
304 Returns the sequence for this segment as a simple string. (-) strand
305 segments are automatically reverse complemented
306
307 This method is also called dna() and protein() for backward
308 compatibility with AceDB.
309
310 =cut
311
312 sub seq {
313 my $self = shift;
314 my ($ref,$class,$start,$stop,$strand)
315 = @{$self}{qw(sourceseq class start stop strand)};
316 # ($start,$stop) = ($stop,$start) if $strand eq '-';
317 $self->factory->dna($ref,$start,$stop,$class);
318 }
319
320 *protein = *dna = \&seq;
321
322
323 =head2 primary_seq
324
325 Title : primary_seq
326 Usage : $s->primary_seq
327 Function: returns a Bio::PrimarySeqI compatible object
328 Returns : a Bio::PrimarySeqI object
329 Args : none
330 Status : Public
331
332 This is for compatibility with BioPerl's separation of SeqI
333 from PrimarySeqI. It just returns itself.
334
335 =cut
336
337 #'
338
339 sub primary_seq { shift }
340
341 =head2 type
342
343 Title : type
344 Usage : $s->type
345 Function: return the string "feature"
346 Returns : the string "feature"
347 Args : none
348 Status : Public
349
350 This is for future sequence ontology-compatibility and
351 represents the default type of a feature on the genome
352
353 =cut
354
355 sub type { "feature" }
356
357 =head2 equals
358
359 Title : equals
360 Usage : $s->equals($d)
361 Function: segment equality
362 Returns : true, if two segments are equal
363 Args : another segment
364 Status : Public
365
366 Returns true if the two segments have the same source sequence, start and stop.
367
368 =cut
369
370 sub equals {
371 my $self = shift;
372 my $peer = shift;
373 return unless defined $peer;
374 return $self->asString eq $peer unless ref($peer) && $peer->isa('Bio::DB::GFF::Segment');
375 return $self->{start} eq $peer->{start}
376 && $self->{stop} eq $peer->{stop}
377 && $self->{sourceseq} eq $peer->{sourceseq};
378 }
379
380 =head2 asString
381
382 Title : asString
383 Usage : $s->asString
384 Function: human-readable string for segment
385 Returns : a string
386 Args : none
387 Status : Public
388
389 Returns a human-readable string representing this sequence. Format
390 is:
391
392 sourceseq/start,stop
393
394 =cut
395
396 sub asString {
397 my $self = shift;
398 my $label = $self->refseq;
399 my $start = $self->start;
400 my $stop = $self->stop;
401 return "$label:$start,$stop";
402 }
403
404 =head2 clone
405
406 Title : clone
407 Usage : $copy = $s->clone
408 Function: make a copy of this segment
409 Returns : a Bio::DB::GFF::Segment object
410 Args : none
411 Status : Public
412
413 This method creates a copy of the segment and returns it.
414
415 =cut
416
417 # deep copy of the thing
418 sub clone {
419 my $self = shift;
420 my %h = %$self;
421 return bless \%h,ref($self);
422 }
423
424 =head2 error
425
426 Title : error
427 Usage : $error = $s->error([$new_error])
428 Function: get or set the last error
429 Returns : a string
430 Args : an error message (optional)
431 Status : Public
432
433 In case of a fault, this method can be used to obtain the last error
434 message. Internally it is called to set the error message.
435
436 =cut
437
438 sub error {
439 my $self = shift;
440 my $g = $self->{error};
441 $self->{error} = shift if @_;
442 $g;
443 }
444
445 =head1 Relative Addressing Methods
446
447 The following methods are provided for compatibility with
448 Bio::DB::GFF::RelSegment, which provides relative addressing
449 functions.
450
451 =head2 abs_start
452
453 Title : abs_start
454 Usage : $s->abs_start
455 Function: the absolute start of the segment
456 Returns : an integer
457 Args : none
458 Status : Public
459
460 This is an alias to start(), and provided for API compatibility with
461 Bio::DB::GFF::RelSegment.
462
463 =cut
464
465 *abs_start = \&start;
466
467 =head2 abs_end
468
469 Title : abs_end
470 Usage : $s->abs_end
471 Function: the absolute stop of the segment
472 Returns : an integer
473 Args : none
474 Status : Public
475
476 This is an alias to stop(), and provided for API compatibility with
477 Bio::DB::GFF::RelSegment.
478
479 =cut
480
481 *abs_stop = \&stop;
482 *abs_end = \&stop;
483
484 =head2 abs_strand
485
486 Title : abs_strand
487 Usage : $s->abs_strand
488 Function: the absolute strand of the segment
489 Returns : +1,0,-1
490 Args : none
491 Status : Public
492
493 This is an alias to strand(), and provided for API compatibility with
494 Bio::DB::GFF::RelSegment.
495
496 =cut
497
498 sub abs_strand {
499 my $self = shift;
500 return $self->abs_end <=> $self->abs_start;
501 }
502
503 =head2 abs_ref
504
505 Title : abs_ref
506 Usage : $s->abs_ref
507 Function: the reference sequence for this segment
508 Returns : a string
509 Args : none
510 Status : Public
511
512 This is an alias to sourceseq(), and is here to provide API
513 compatibility with Bio::DB::GFF::RelSegment.
514
515 =cut
516
517 *abs_ref = \&sourceseq;
518
519 =head2 refseq
520
521 Title : refseq
522 Usage : $s->refseq
523 Function: get or set the reference sequence
524 Returns : a string
525 Args : none
526 Status : Public
527
528 Examine or change the reference sequence. This is an alias to
529 sourceseq(), provided here for API compatibility with
530 Bio::DB::GFF::RelSegment.
531
532 =cut
533
534 *refseq = \&sourceseq;
535
536 =head2 ref
537
538 Title : ref
539 Usage : $s->refseq
540 Function: get or set the reference sequence
541 Returns : a string
542 Args : none
543 Status : Public
544
545 An alias for refseq()
546
547 =cut
548
549 sub ref { shift->refseq(@_) }
550
551 =head2 seq_id
552
553 Title : seq_id
554 Usage : $ref = $s->seq_id
555 Function: get the reference sequence in a LocationI-compatible way
556 Returns : a string
557 Args : none
558 Status : Public
559
560 An alias for refseq() but only allows reading.
561
562 =cut
563
564 sub seq_id { shift->refseq }
565
566 =head2 truncated
567
568 Title : truncated
569 Usage : $truncated = $s->truncated
570 Function: Flag indicating that the segment was truncated during creation
571 Returns : A boolean flag
572 Args : none
573 Status : Public
574
575 This indicates that the sequence was truncated during creation. The
576 returned flag is undef if no truncation occured. If truncation did
577 occur, the flag is actually an array ref in which the first element is
578 true if truncation occurred on the left, and the second element
579 occurred if truncation occurred on the right.
580
581 =cut
582
583 sub truncated {
584 my $self = shift;
585 my $hash = $self->{truncated} or return;
586 CORE::ref($hash) eq 'HASH' or return [1,1]; # paranoia -- not that this would ever happen ;-)
587 return [$hash->{start},$hash->{stop}];
588 }
589
590 =head2 Bio::RangeI Methods
591
592 The following Bio::RangeI methods are supported:
593
594 overlaps(), contains(), equals(),intersection(),union(),overlap_extent()
595
596 =cut
597
598 sub overlaps {
599 my $self = shift;
600 my($other,$so) = @_;
601 if ($other->isa('Bio::DB::GFF::RelSegment')) {
602 return if $self->abs_ref ne $other->abs_ref;
603 }
604 $self->SUPER::overlaps(@_);
605 }
606
607 sub contains {
608 my $self = shift;
609 my($other,$so) = @_;
610 if ($other->isa('Bio::DB::GFF::RelSegment')) {
611 return if $self->abs_ref ne $other->abs_ref;
612 }
613 $self->SUPER::contains(@_);
614 }
615 #sub equals {
616 # my $self = shift;
617 # my($other,$so) = @_;
618 # if ($other->isa('Bio::DB::GFF::RelSegment')) {
619 # return if $self->abs_ref ne $other->abs_ref;
620 # }
621 # $self->SUPER::equals(@_);
622 #}
623 sub intersection {
624 my $self = shift;
625 my($other,$so) = @_;
626 if ($other->isa('Bio::DB::GFF::RelSegment')) {
627 return if $self->abs_ref ne $other->abs_ref;
628 }
629 $self->SUPER::intersection(@_);
630 }
631 sub union {
632 my $self = shift;
633 my($other) = @_;
634 if ($other->isa('Bio::DB::GFF::RelSegment')) {
635 return if $self->abs_ref ne $other->abs_ref;
636 }
637 $self->SUPER::union(@_);
638 }
639
640 sub overlap_extent {
641 my $self = shift;
642 my($other) = @_;
643 if ($other->isa('Bio::DB::GFF::RelSegment')) {
644 return if $self->abs_ref ne $other->abs_ref;
645 }
646 $self->SUPER::overlap_extent(@_);
647 }
648
649
650 =head2 Bio::SeqI implementation
651
652 =cut
653
654 =head2 primary_id
655
656 Title : primary_id
657 Usage : $unique_implementation_key = $obj->primary_id;
658 Function: Returns the unique id for this object in this
659 implementation. This allows implementations to manage their
660 own object ids in a way the implementaiton can control
661 clients can expect one id to map to one object.
662
663 For sequences with no accession number, this method should
664 return a stringified memory location.
665
666 Returns : A string
667 Args : None
668 Status : Virtual
669
670
671 =cut
672
673 sub primary_id {
674 my ($obj,$value) = @_;
675
676 if( defined $value) {
677 $obj->{'primary_id'} = $value;
678 }
679 if( ! exists $obj->{'primary_id'} ) {
680 return "$obj";
681 }
682 return $obj->{'primary_id'};
683 }
684
685
686 =head2 display_name
687
688 Title : display_name
689 Usage : $id = $obj->display_name or $obj->display_name($newid);
690 Function: Gets or sets the display id, also known as the common name of
691 the Seq object.
692
693 The semantics of this is that it is the most likely string
694 to be used as an identifier of the sequence, and likely to
695 have "human" readability. The id is equivalent to the LOCUS
696 field of the GenBank/EMBL databanks and the ID field of the
697 Swissprot/sptrembl database. In fasta format, the >(\S+) is
698 presumed to be the id, though some people overload the id
699 to embed other information. Bioperl does not use any
700 embedded information in the ID field, and people are
701 encouraged to use other mechanisms (accession field for
702 example, or extending the sequence object) to solve this.
703
704 Notice that $seq->id() maps to this function, mainly for
705 legacy/convenience issues.
706 Returns : A string
707 Args : None or a new id
708
709 Note, this used to be called display_id(), and this name is preserved for
710 backward compatibility. The default is to return the seq_id().
711
712 =cut
713
714 sub display_name { shift->seq_id }
715 *display_id = \&display_name;
716
717 =head2 accession_number
718
719 Title : accession_number
720 Usage : $unique_biological_key = $obj->accession_number;
721 Function: Returns the unique biological id for a sequence, commonly
722 called the accession_number. For sequences from established
723 databases, the implementors should try to use the correct
724 accession number. Notice that primary_id() provides the
725 unique id for the implemetation, allowing multiple objects
726 to have the same accession number in a particular implementation.
727
728 For sequences with no accession number, this method should return
729 "unknown".
730 Returns : A string
731 Args : None
732
733
734 =cut
735
736 sub accession_number {
737 return 'unknown';
738 }
739
740 =head2 alphabet
741
742 Title : alphabet
743 Usage : if( $obj->alphabet eq 'dna' ) { /Do Something/ }
744 Function: Returns the type of sequence being one of
745 'dna', 'rna' or 'protein'. This is case sensitive.
746
747 This is not called <type> because this would cause
748 upgrade problems from the 0.5 and earlier Seq objects.
749
750 Returns : a string either 'dna','rna','protein'. NB - the object must
751 make a call of the type - if there is no type specified it
752 has to guess.
753 Args : none
754 Status : Virtual
755
756
757 =cut
758
759 sub alphabet{
760 return 'dna'; # no way this will be anything other than dna!
761 }
762
763 =head2 desc
764
765 Title : desc
766 Usage : $seqobj->desc($string) or $seqobj->desc()
767 Function: Sets or gets the description of the sequence
768 Example :
769 Returns : The description
770 Args : The description or none
771
772
773 =cut
774
775 sub desc { shift->asString }
776
777 =head2 species
778
779 Title : species
780 Usage : $species = $seq->species() or $seq->species($species)
781 Function: Gets or sets the species
782 Example :
783 Returns : Bio::Species object
784 Args : None or Bio::Species object
785
786 See L<Bio::Species> for more information
787
788 =cut
789
790 sub species {
791 my ($self, $species) = @_;
792 if ($species) {
793 $self->{'species'} = $species;
794 } else {
795 return $self->{'species'};
796 }
797 }
798
799 =head2 annotation
800
801 Title : annotation
802 Usage : $ann = $seq->annotation or $seq->annotation($annotation)
803 Function: Gets or sets the annotation
804 Example :
805 Returns : Bio::Annotation object
806 Args : None or Bio::Annotation object
807
808 See L<Bio::Annotation> for more information
809
810 =cut
811
812 sub annotation {
813 my ($obj,$value) = @_;
814 if( defined $value || ! defined $obj->{'annotation'} ) {
815 $value = new Bio::Annotation::Collection() unless defined $value;
816 $obj->{'annotation'} = $value;
817 }
818 return $obj->{'annotation'};
819
820 }
821
822 =head2 is_circular
823
824 Title : is_circular
825 Usage : if( $obj->is_circular) { /Do Something/ }
826 Function: Returns true if the molecule is circular
827 Returns : Boolean value
828 Args : none
829
830 =cut
831
832 sub is_circular{
833 return 0;
834 }
835
836
837 1;
838 __END__
839
840 =head1 BUGS
841
842 Report them please.
843
844 =head1 SEE ALSO
845
846 L<bioperl>
847
848 =head1 AUTHOR
849
850 Lincoln Stein E<lt>lstein@cshl.orgE<gt>.
851
852 Copyright (c) 2001 Cold Spring Harbor Laboratory.
853
854 This library is free software; you can redistribute it and/or modify
855 it under the same terms as Perl itself.
856
857 =head1 CONTRIBUTORS
858
859 Jason Stajich E<lt>jason@bioperl.orgE<gt>.
860
861 =cut
862