diff variant_effect_predictor/Bio/DB/GFF/Segment.pm @ 0:1f6dce3d34e0

Uploaded
author mahtabm
date Thu, 11 Apr 2013 02:01:53 -0400
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/variant_effect_predictor/Bio/DB/GFF/Segment.pm	Thu Apr 11 02:01:53 2013 -0400
@@ -0,0 +1,862 @@
+=head1 NAME
+
+Bio::DB::GFF::Segment -- Simple DNA segment object
+
+=head1 SYNOPSIS
+
+See L<Bio::DB::GFF>.
+
+=head1 DESCRIPTION
+
+Bio::DB::GFF::Segment provides the basic representation of a range of
+DNA contained in a GFF database.  It is the base class from which the
+Bio::DB::GFF::RelSegment and Bio::DB::GFF::Feature classes are
+derived.
+
+Generally, you will not create or manipulate Bio::DB::GFF::Segment
+objects directly, but use those that are returned by the Bio::DB::GFF
+module.
+
+=cut
+
+package Bio::DB::GFF::Segment;
+
+use strict;
+use Bio::Root::Root;
+use Bio::Annotation::Collection;
+use Bio::RangeI;
+use Bio::Das::SegmentI;
+use Bio::SeqI;
+
+use vars qw(@ISA);
+@ISA = qw(Bio::Root::Root Bio::RangeI Bio::SeqI Bio::Das::SegmentI);
+
+use overload 
+  '""'     => 'asString',
+  eq       => 'equals',
+  fallback => 1;
+
+=head1 API
+
+The remainder of this document describes the API for
+Bio::DB::GFF::Segment.
+
+=cut
+
+=head2 new
+
+ Title   : new
+ Usage   : $s = Bio::DB::GFF::Segment->new(@args)
+ Function: create a new segment
+ Returns : a new Bio::DB::GFF::Segment object
+ Args    : see below
+ Status  : Public
+
+This method creates a new Bio::DB::GFF::Segment object.  Generally
+this is called automatically by the Bio::DB::GFF module and
+derivatives.
+
+There are five positional arguments:
+
+ $factory      a Bio::DB::GFF::Adaptor to use for database access
+ $sourceseq    ID of the source sequence
+ $sourceclass  class of the source sequence
+ $start        start of the desired segment relative to source sequence
+ $stop         stop of the desired segment relative to source sequence
+
+=cut
+
+sub new {
+  my $class = shift;
+  my ($factory,$segclass,$segname,$start,$stop) = @_;
+  $segclass = $segname->class if ref($segname) && $segname->can('class');
+  $segclass ||= 'Sequence';
+
+  $factory or $class->throw("->new(): provide a factory argument");
+  $class = ref $class if ref $class;
+  return bless { factory   => $factory,
+		 sourceseq => $segname,
+		 class     => $segclass,
+		 start     => $start,
+		 stop      => $stop,
+		 strand    => 0,
+	       },$class;
+}
+
+# read-only accessors
+
+=head2 factory
+
+ Title   : factory
+ Usage   : $s->factory
+ Function: get the factory object
+ Returns : a Bio::DB::GFF::Adaptor
+ Args    : none
+ Status  : Public
+
+This is a read-only accessor for the Bio::DB::GFF::Adaptor object used 
+to create the segment.
+
+=cut
+
+sub factory { shift->{factory} }
+
+# start, stop, length
+
+=head2 start
+
+ Title   : start
+ Usage   : $s->start
+ Function: start of segment
+ Returns : integer
+ Args    : none
+ Status  : Public
+
+This is a read-only accessor for the start of the segment.
+
+=cut
+
+sub start  { shift->{start} }
+
+=head2 end
+
+ Title   : end
+ Usage   : $s->end
+ Function: end of segment
+ Returns : integer
+ Args    : none
+ Status  : Public
+
+This is a read-only accessor for the end of the segment.
+
+=cut
+
+sub end   { shift->{stop}  }
+
+=head2 stop
+
+ Title   : stop
+ Usage   : $s->stop
+ Function: stop of segment
+ Returns : integer
+ Args    : none
+ Status  : Public
+
+This is an alias for end(), provided for AcePerl compatibility.
+
+=cut
+
+*stop = \&end;
+
+=head2 length
+
+ Title   : length
+ Usage   : $s->length
+ Function: length of segment
+ Returns : integer
+ Args    : none
+ Status  : Public
+
+Returns the length of the segment.  Always a positive number.
+
+=cut
+
+sub length { abs($_[0]->{start} - $_[0]->{stop})+1 }
+
+
+=head2 strand
+
+ Title   : strand
+ Usage   : $s->strand
+ Function: strand of segment
+ Returns : +1,0,-1
+ Args    : none
+ Status  : Public
+
+Returns the strand on which the segment resides, either +1, 0 or -1.
+
+=cut
+
+sub strand {
+  my $self = shift;
+  0;
+}
+
+=head2 low
+
+ Title   : low
+ Usage   : $s->low
+ Function: return lower coordinate
+ Returns : lower coordinate
+ Args    : none
+ Status  : Public
+
+Returns the lower coordinate, either start or end.
+
+=cut
+
+sub low {
+  my $self = shift;
+  my ($start,$stop) = ($self->start,$self->stop);
+  return $start < $stop ? $start : $stop;
+}
+*abs_low = \&low;
+
+=head2 high
+
+ Title   : high
+ Usage   : $s->high
+ Function: return higher coordinate
+ Returns : higher coordinate
+ Args    : none
+ Status  : Public
+
+Returns the higher coordinate, either start or end.
+
+=cut
+
+sub high {
+  my $self = shift;
+  my ($start,$stop) = ($self->start,$self->stop);
+  return $start > $stop ? $start : $stop;
+}
+*abs_high = \&high;
+
+=head2 sourceseq
+
+ Title   : sourceseq
+ Usage   : $s->sourceseq
+ Function: get the segment source
+ Returns : a string
+ Args    : none
+ Status  : Public
+
+Returns the name of the source sequence for this segment.
+
+=cut
+
+sub sourceseq { shift->{sourceseq} }
+
+=head2 class
+
+ Title   : class
+ Usage   : $s->class([$newclass])
+ Function: get the source sequence class
+ Returns : a string
+ Args    : new class (optional)
+ Status  : Public
+
+Gets or sets the class for the source sequence for this segment.
+
+=cut
+
+sub class     { 
+  my $self = shift;
+  my $d = $self->{class};
+  $self->{class} = shift if @_;
+  $d;
+}
+
+=head2 subseq
+
+ Title   : subseq
+ Usage   : $s->subseq($start,$stop)
+ Function: generate a subsequence
+ Returns : a Bio::DB::GFF::Segment object
+ Args    : start and end of subsequence
+ Status  : Public
+
+This method generates a new segment from the start and end positions
+given in the arguments.  If stop E<lt> start, then the strand is reversed.
+
+=cut
+
+sub subseq {
+  my $self = shift;
+  my ($newstart,$newstop) = @_;
+  my ($refseq,$start,$stop,$class) = ($self->{sourceseq},
+				      $self->{start},$self->{stop},
+				      $self->class);
+
+  # We deliberately force subseq to return objects of type RelSegment
+  # Otherwise, when we get a subsequence from a Feature object,
+  # its method and source go along for the ride, which is incorrect.
+  my $new = $self->new_from_segment($self);
+  if ($start <= $stop) {
+    @{$new}{qw(start stop)} = ($start + $newstart - 1, $start + $newstop  - 1);
+  } else {
+    @{$new}{qw(start stop)} = ($start - ($newstart - 1), $start - ($newstop  - 1)),
+
+  }
+
+  $new;
+}
+
+=head2 seq
+
+ Title   : seq
+ Usage   : $s->seq
+ Function: get the sequence string for this segment
+ Returns : a string
+ Args    : none
+ Status  : Public
+
+Returns the sequence for this segment as a simple string.  (-) strand
+segments are automatically reverse complemented
+
+This method is also called dna() and protein() for backward
+compatibility with AceDB.
+
+=cut
+
+sub seq {
+  my $self = shift;
+  my ($ref,$class,$start,$stop,$strand) 
+    = @{$self}{qw(sourceseq class start stop strand)};
+#  ($start,$stop) = ($stop,$start) if $strand eq '-';
+  $self->factory->dna($ref,$start,$stop,$class);
+}
+
+*protein = *dna = \&seq;
+
+
+=head2 primary_seq
+
+ Title   : primary_seq
+ Usage   : $s->primary_seq
+ Function: returns a Bio::PrimarySeqI compatible object
+ Returns : a Bio::PrimarySeqI object
+ Args    : none
+ Status  : Public
+
+This is for compatibility with BioPerl's separation of SeqI
+from PrimarySeqI.  It just returns itself.
+
+=cut
+
+#'
+
+sub primary_seq { shift }
+
+=head2 type
+
+ Title   : type
+ Usage   : $s->type
+ Function: return the string "feature"
+ Returns : the string "feature"
+ Args    : none
+ Status  : Public
+
+This is for future sequence ontology-compatibility and
+represents the default type of a feature on the genome
+
+=cut
+
+sub type { "feature" }
+
+=head2 equals
+
+ Title   : equals
+ Usage   : $s->equals($d)
+ Function: segment equality
+ Returns : true, if two segments are equal
+ Args    : another segment
+ Status  : Public
+
+Returns true if the two segments have the same source sequence, start and stop.
+
+=cut
+
+sub equals {
+  my $self = shift;
+  my $peer = shift;
+  return unless defined $peer;
+  return $self->asString eq $peer unless ref($peer) && $peer->isa('Bio::DB::GFF::Segment');
+  return $self->{start} eq $peer->{start}
+         && $self->{stop}  eq $peer->{stop}
+         && $self->{sourceseq} eq $peer->{sourceseq};
+}
+
+=head2 asString
+
+ Title   : asString
+ Usage   : $s->asString
+ Function: human-readable string for segment
+ Returns : a string
+ Args    : none
+ Status  : Public
+
+Returns a human-readable string representing this sequence.  Format
+is:
+
+   sourceseq/start,stop
+
+=cut
+
+sub asString {
+  my $self = shift;
+  my $label = $self->refseq;
+  my $start = $self->start;
+  my $stop  = $self->stop;
+  return "$label:$start,$stop";
+}
+
+=head2 clone
+
+ Title   : clone
+ Usage   : $copy = $s->clone
+ Function: make a copy of this segment
+ Returns : a Bio::DB::GFF::Segment object
+ Args    : none
+ Status  : Public
+
+This method creates a copy of the segment and returns it.
+
+=cut
+
+# deep copy of the thing
+sub clone {
+  my $self = shift;
+  my %h = %$self;
+  return bless \%h,ref($self);
+}
+
+=head2 error
+
+ Title   : error
+ Usage   : $error = $s->error([$new_error])
+ Function: get or set the last error
+ Returns : a string
+ Args    : an error message (optional)
+ Status  : Public
+
+In case of a fault, this method can be used to obtain the last error
+message.  Internally it is called to set the error message.
+
+=cut
+
+sub error {
+  my $self = shift;
+  my $g = $self->{error};
+  $self->{error} = shift if @_;
+  $g;
+}
+
+=head1 Relative Addressing Methods
+
+The following methods are provided for compatibility with
+Bio::DB::GFF::RelSegment, which provides relative addressing
+functions.
+
+=head2 abs_start
+
+ Title   : abs_start
+ Usage   : $s->abs_start
+ Function: the absolute start of the segment
+ Returns : an integer
+ Args    : none
+ Status  : Public
+
+This is an alias to start(), and provided for API compatibility with
+Bio::DB::GFF::RelSegment.
+
+=cut
+
+*abs_start  = \&start;
+
+=head2 abs_end
+
+ Title   : abs_end
+ Usage   : $s->abs_end
+ Function: the absolute stop of the segment
+ Returns : an integer
+ Args    : none
+ Status  : Public
+
+This is an alias to stop(), and provided for API compatibility with
+Bio::DB::GFF::RelSegment.
+
+=cut
+
+*abs_stop   = \&stop;
+*abs_end    = \&stop;
+
+=head2 abs_strand
+
+ Title   : abs_strand
+ Usage   : $s->abs_strand
+ Function: the absolute strand of the segment
+ Returns : +1,0,-1
+ Args    : none
+ Status  : Public
+
+This is an alias to strand(), and provided for API compatibility with
+Bio::DB::GFF::RelSegment.
+
+=cut
+
+sub abs_strand {
+  my $self = shift;
+  return $self->abs_end <=> $self->abs_start;
+}
+
+=head2 abs_ref
+
+ Title   : abs_ref
+ Usage   : $s->abs_ref
+ Function: the reference sequence for this segment
+ Returns : a string
+ Args    : none
+ Status  : Public
+
+This is an alias to sourceseq(), and is here to provide API
+compatibility with Bio::DB::GFF::RelSegment.
+
+=cut
+
+*abs_ref    = \&sourceseq;
+
+=head2 refseq
+
+ Title   : refseq
+ Usage   : $s->refseq
+ Function: get or set the reference sequence
+ Returns : a string
+ Args    : none
+ Status  : Public
+
+Examine or change the reference sequence. This is an alias to
+sourceseq(), provided here for API compatibility with
+Bio::DB::GFF::RelSegment.
+
+=cut
+
+*refseq     = \&sourceseq;
+
+=head2 ref
+
+ Title   : ref
+ Usage   : $s->refseq
+ Function: get or set the reference sequence
+ Returns : a string
+ Args    : none
+ Status  : Public
+
+An alias for refseq()
+
+=cut
+
+sub ref { shift->refseq(@_) }
+
+=head2 seq_id
+
+ Title   : seq_id
+ Usage   : $ref = $s->seq_id
+ Function: get the reference sequence in a LocationI-compatible way
+ Returns : a string
+ Args    : none
+ Status  : Public
+
+An alias for refseq() but only allows reading.
+
+=cut
+
+sub seq_id { shift->refseq }
+
+=head2 truncated
+
+ Title   : truncated
+ Usage   : $truncated = $s->truncated
+ Function: Flag indicating that the segment was truncated during creation
+ Returns : A boolean flag
+ Args    : none
+ Status  : Public
+
+This indicates that the sequence was truncated during creation.  The
+returned flag is undef if no truncation occured.  If truncation did
+occur, the flag is actually an array ref in which the first element is
+true if truncation occurred on the left, and the second element
+occurred if truncation occurred on the right.
+
+=cut
+
+sub truncated {
+  my $self = shift;
+  my $hash = $self->{truncated} or return;
+  CORE::ref($hash) eq 'HASH' or return [1,1];  # paranoia -- not that this would ever happen ;-)
+  return [$hash->{start},$hash->{stop}];
+}
+
+=head2 Bio::RangeI Methods
+
+The following Bio::RangeI methods are supported:
+
+overlaps(), contains(), equals(),intersection(),union(),overlap_extent()
+
+=cut
+
+sub overlaps {
+  my $self  = shift;
+  my($other,$so) = @_;
+  if ($other->isa('Bio::DB::GFF::RelSegment')) {
+    return if $self->abs_ref ne $other->abs_ref;
+  }
+  $self->SUPER::overlaps(@_);
+}
+
+sub contains {
+  my $self  = shift;
+  my($other,$so) = @_;
+  if ($other->isa('Bio::DB::GFF::RelSegment')) {
+    return if $self->abs_ref ne $other->abs_ref;
+  }
+  $self->SUPER::contains(@_);
+}
+#sub equals {
+#  my $self  = shift;
+#  my($other,$so) = @_;
+#  if ($other->isa('Bio::DB::GFF::RelSegment')) {
+#    return if $self->abs_ref ne $other->abs_ref;
+#  }
+#  $self->SUPER::equals(@_);
+#}
+sub intersection {
+  my $self  = shift;
+  my($other,$so) = @_;
+  if ($other->isa('Bio::DB::GFF::RelSegment')) {
+    return if $self->abs_ref ne $other->abs_ref;
+  }
+  $self->SUPER::intersection(@_);
+}
+sub union {
+  my $self  = shift;
+  my($other) = @_;
+  if ($other->isa('Bio::DB::GFF::RelSegment')) {
+    return if $self->abs_ref ne $other->abs_ref;
+  }
+  $self->SUPER::union(@_);
+}
+
+sub overlap_extent {
+  my $self  = shift;
+  my($other) = @_;
+  if ($other->isa('Bio::DB::GFF::RelSegment')) {
+    return if $self->abs_ref ne $other->abs_ref;
+  }
+  $self->SUPER::overlap_extent(@_);
+}
+
+
+=head2 Bio::SeqI implementation
+
+=cut
+
+=head2 primary_id
+
+ Title   : primary_id
+ Usage   : $unique_implementation_key = $obj->primary_id;
+ Function: Returns the unique id for this object in this
+           implementation. This allows implementations to manage their
+           own object ids in a way the implementaiton can control
+           clients can expect one id to map to one object.
+
+           For sequences with no accession number, this method should
+           return a stringified memory location.
+
+ Returns : A string
+ Args    : None
+ Status  : Virtual
+
+
+=cut
+
+sub primary_id {
+   my ($obj,$value) = @_;
+
+   if( defined $value) {
+      $obj->{'primary_id'} = $value;
+    }
+   if( ! exists $obj->{'primary_id'} ) {
+       return "$obj";
+   }
+   return $obj->{'primary_id'};
+}
+
+
+=head2 display_name
+
+ Title   : display_name
+ Usage   : $id = $obj->display_name or $obj->display_name($newid);
+ Function: Gets or sets the display id, also known as the common name of
+           the Seq object.
+
+           The semantics of this is that it is the most likely string
+           to be used as an identifier of the sequence, and likely to
+           have "human" readability.  The id is equivalent to the LOCUS
+           field of the GenBank/EMBL databanks and the ID field of the
+           Swissprot/sptrembl database. In fasta format, the >(\S+) is
+           presumed to be the id, though some people overload the id
+           to embed other information. Bioperl does not use any
+           embedded information in the ID field, and people are
+           encouraged to use other mechanisms (accession field for
+           example, or extending the sequence object) to solve this.
+
+           Notice that $seq->id() maps to this function, mainly for
+           legacy/convenience issues.
+ Returns : A string
+ Args    : None or a new id
+
+Note, this used to be called display_id(), and this name is preserved for
+backward compatibility.  The default is to return the seq_id().
+
+=cut
+
+sub display_name { shift->seq_id }
+*display_id = \&display_name;
+
+=head2 accession_number
+
+ Title   : accession_number
+ Usage   : $unique_biological_key = $obj->accession_number;
+ Function: Returns the unique biological id for a sequence, commonly
+           called the accession_number. For sequences from established
+           databases, the implementors should try to use the correct
+           accession number. Notice that primary_id() provides the
+           unique id for the implemetation, allowing multiple objects
+           to have the same accession number in a particular implementation.
+
+           For sequences with no accession number, this method should return
+           "unknown".
+ Returns : A string
+ Args    : None
+
+
+=cut
+
+sub accession_number {
+    return 'unknown';
+}
+
+=head2 alphabet
+
+ Title   : alphabet
+ Usage   : if( $obj->alphabet eq 'dna' ) { /Do Something/ }
+ Function: Returns the type of sequence being one of
+           'dna', 'rna' or 'protein'. This is case sensitive.
+
+           This is not called <type> because this would cause
+           upgrade problems from the 0.5 and earlier Seq objects.
+
+ Returns : a string either 'dna','rna','protein'. NB - the object must
+           make a call of the type - if there is no type specified it
+           has to guess.
+ Args    : none
+ Status  : Virtual
+
+
+=cut
+
+sub alphabet{
+    return 'dna'; # no way this will be anything other than dna!
+}
+
+=head2 desc
+
+ Title   : desc
+ Usage   : $seqobj->desc($string) or $seqobj->desc()
+ Function: Sets or gets the description of the sequence
+ Example :
+ Returns : The description
+ Args    : The description or none
+
+
+=cut
+
+sub desc { shift->asString }
+
+=head2 species
+
+ Title   : species
+ Usage   : $species = $seq->species() or $seq->species($species)
+ Function: Gets or sets the species
+ Example :
+ Returns : Bio::Species object
+ Args    : None or Bio::Species object
+
+See L<Bio::Species> for more information
+
+=cut
+
+sub species {
+    my ($self, $species) = @_;
+    if ($species) {
+        $self->{'species'} = $species;
+    } else {
+        return $self->{'species'};
+    }
+}
+
+=head2 annotation
+
+ Title   : annotation
+ Usage   : $ann = $seq->annotation or $seq->annotation($annotation)
+ Function: Gets or sets the annotation
+ Example :
+ Returns : Bio::Annotation object
+ Args    : None or Bio::Annotation object
+
+See L<Bio::Annotation> for more information
+
+=cut
+
+sub annotation {
+   my ($obj,$value) = @_;
+   if( defined $value || ! defined $obj->{'annotation'} ) {
+       $value = new Bio::Annotation::Collection() unless defined $value;
+      $obj->{'annotation'} = $value;
+    }
+    return $obj->{'annotation'};
+
+}
+
+=head2 is_circular
+
+ Title   : is_circular
+ Usage   : if( $obj->is_circular) { /Do Something/ }
+ Function: Returns true if the molecule is circular
+ Returns : Boolean value
+ Args    : none
+
+=cut
+
+sub is_circular{
+    return 0;
+}
+
+
+1;
+__END__
+
+=head1 BUGS
+
+Report them please.
+
+=head1 SEE ALSO
+
+L<bioperl>
+
+=head1 AUTHOR
+
+Lincoln Stein E<lt>lstein@cshl.orgE<gt>.  
+
+Copyright (c) 2001 Cold Spring Harbor Laboratory.
+
+This library is free software; you can redistribute it and/or modify
+it under the same terms as Perl itself.
+
+=head1 CONTRIBUTORS
+
+Jason Stajich E<lt>jason@bioperl.orgE<gt>.
+
+=cut
+