diff variant_effect_predictor/Bio/DB/GFF/RelSegment.pm @ 0:1f6dce3d34e0

Uploaded
author mahtabm
date Thu, 11 Apr 2013 02:01:53 -0400
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/variant_effect_predictor/Bio/DB/GFF/RelSegment.pm	Thu Apr 11 02:01:53 2013 -0400
@@ -0,0 +1,1099 @@
+=head1 NAME
+
+Bio::DB::GFF::RelSegment -- Sequence segment with relative coordinate support
+
+=head1 SYNOPSIS
+
+See L<Bio::DB::GFF>.
+
+=head1 DESCRIPTION
+
+Bio::DB::GFF::RelSegment is a stretch of sequence that can handle
+relative coordinate addressing.  It inherits from
+Bio::DB::GFF::Segment, and is the base class for
+Bio::DB::GFF::Feature.
+
+In addition to the source sequence, a relative segment has a
+"reference sequence", which is used as the basis for its coordinate
+system.  The reference sequence can be changed at will, allowing you
+freedom to change the "frame of reference" for features contained
+within the segment.  For example, by setting a segment's reference
+sequence to the beginning of a gene, you can view all other features
+in gene-relative coordinates.
+
+The reference sequence and the source sequence must be on the same
+physical stretch of DNA, naturally.  However, they do not have to be
+on the same strand.  The strandedness of the reference sequence
+determines whether coordinates increase to the right or the left.
+
+Generally, you will not create or manipulate Bio::DB::GFF::RelSeg0ment
+objects directly, but use those that are returned by the Bio::DB::GFF
+module.
+
+=head2 An Example
+
+To understand how relative coordinates work, consider the following
+example from the C. elegans database.  First we create the appropriate
+GFF accessor object (the factory):
+
+   my $db = Bio::DB::GFF->new(-dsn => 'dbi:mysql:elegans',
+                              -adaptor=>'dbi:mysqlopt');
+
+Now we fetch out a segment based on cosmid clone ZK909:
+
+  my $seg = $db->segment('ZK909');
+
+If we call the segment's refseq() method, we see that the base of the
+coordinate system is the sequence "ZK154", and that its start and
+stop positions are 1 and the length of the cosmid:
+
+  print $seg->refseq;
+  => ZK909
+
+  print $seg->start,' - ',$seg->stop;
+  => 1 - 33782
+
+As a convenience, the "" operator is overloaded in this class, to give
+the reference sequence, and start and stop positions:
+
+  print $seg;
+  => ZK909:1,33782
+
+Internally, Bio::DB::GFF::RelSegment has looked up the absolute
+coordinates of this segment and maintains the source sequence and the
+absolute coordinates relative to the source sequence.  We can see this 
+information using sourceseq() (inherited from Bio::DB::GFF::Segment)
+and the abs_start() and abs_end() methods:
+
+  print $seg->sourceseq;
+  => CHROMOSOME_I
+
+  print $seg->abs_start,' - ',$seg->abs_end;
+  => 14839545 - 14873326
+
+We can also put the segment into absolute mode, so that it behaves
+like Bio::DB::Segment, and always represents coordinates on the source
+sequence.  This is done by passing a true value to the absolute()
+method:
+
+  $seq->absolute(1);
+  print $seg;
+  => CHROMOSOME_I:14839545,14873326
+
+We can change the reference sequence at any time.  One way is to call
+the segment's ref() method, giving it the ID (and optionally the
+class) of another landmark on the genome.  For example, if we know
+that cosmid ZK337 is adjacent to ZK909, then we can view ZK909 in
+ZK337-relative coordinates:
+
+  $seg->refseq('ZK337');
+  print $seg;
+  => ZK337:-33670,111
+
+We can call the segment's features() method in order to get the list
+of contigs that overlap this segment (in the C. elegans database,
+contigs have feature type "Sequence:Link"):
+
+  @links = $seg->features('Sequence:Link');
+
+We can now set the reference sequence to the first of these contigs like so:
+
+  $seg->refseq($links[0]);
+  print $seg;
+  => Sequence:Link(LINK_Y95D11A):3997326,4031107
+
+=cut
+
+package Bio::DB::GFF::RelSegment;
+
+use strict;
+
+use Bio::DB::GFF::Feature;
+use Bio::DB::GFF::Util::Rearrange;
+use Bio::DB::GFF::Segment;
+use Bio::RangeI;
+
+use vars qw(@ISA);
+@ISA = qw(Bio::DB::GFF::Segment);
+
+use overload '""' => 'asString',
+             'bool' => sub { overload::StrVal(shift) },
+             fallback=>1;
+
+=head1 API
+
+The remainder of this document describes the API for
+Bio::DB::GFF::Segment.
+
+=cut
+
+=head2 new
+
+ Title   : new
+ Usage   : $s = Bio::DB::GFF::RelSegment->new(@args)
+ Function: create a new relative segment
+ Returns : a new Bio::DB::GFF::RelSegment object
+ Args    : see below
+ Status  : Public
+
+This method creates a new Bio::DB::GFF::RelSegment object.  Generally
+this is called automatically by the Bio::DB::GFF module and
+derivatives.
+
+This function uses a named-argument style:
+
+ -factory      a Bio::DB::GFF::Adaptor to use for database access
+ -seq          ID of the source sequence
+ -class        class of the source sequence
+ -start        start of the desired segment relative to source sequence
+ -stop         stop of the desired segment relative to source sequence
+ -ref          ID of the reference sequence
+ -refclass     class of the reference sequence
+ -offset       0-based offset from source sequence to start of segment
+ -length       length of desired segment
+ -absolute, -force_absolute
+               use absolute coordinates, rather than coordinates relative
+               to the start of self or the reference sequence
+
+The -seq argument accepts the ID of any landmark in the database.  The
+stored source sequence becomes whatever the GFF file indicates is the
+proper sequence for this landmark.  A class of "Sequence" is assumed
+unless otherwise specified in the -class argument.
+
+If the argument to -seq is a Bio::GFF::Featname object (such as
+returned by the group() method), then the class is taken from that.
+
+The optional -start and -stop arguments specify the end points for the
+retrieved segment.  For those who do not like 1-based indexing,
+-offset and -length are provided.  If both -start/-stop and
+-offset/-length are provided, the latter overrides the former.
+Generally it is not a good idea to mix metaphors.
+
+-ref and -refclass together indicate a sequence to be used for
+relative coordinates.  If not provided, the source sequence indicated
+by -seq is used as the reference sequence.  If the argument to -ref is
+a Bio::GFF::Featname object (such as returned by the group() method),
+then the class is taken from that.
+
+-force_absolute should be used if you wish to skip the lookup of the
+absolute position of the source sequence that ordinarily occurs when
+you create a relative segment.  In this case, the source sequence must
+be a sequence that has been specified as the "source" in the GFF file.
+
+=cut
+
+# Create a new Bio::DB::GFF::RelSegment Object
+# arguments are:
+#      -factory    => factory and DBI interface
+#      -seq        => $sequence_name
+#      -start      => $start_relative_to_sequence
+#      -stop       => $stop_relative_to_sequence
+#      -ref        => $sequence which establishes coordinate system
+#      -offset     => 0-based offset relative to sequence
+#      -length     => length of segment
+#      -nocheck    => turn off checking, force segment to be constructed
+#      -absolute   => use absolute coordinate addressing
+#' 
+sub new {
+  my $package = shift;
+  my ($factory,$name,$start,$stop,$refseq,$class,$refclass,$offset,$length,$force_absolute,$nocheck) =
+    rearrange([
+	       'FACTORY',
+	       [qw(NAME SEQ SEQUENCE SOURCESEQ)],
+	       [qw(START BEGIN)],
+	       [qw(STOP END)],
+	       [qw(REFSEQ REF REFNAME)],
+	       [qw(CLASS SEQCLASS)],
+	       qw(REFCLASS),
+	       [qw(OFFSET OFF)],
+	       [qw(LENGTH LEN)],
+	       [qw(ABSOLUTE)],
+	       [qw(NOCHECK FORCE)],
+	     ],@_);
+
+  $package = ref $package if ref $package;
+  $factory or $package->throw("new(): provide a -factory argument");
+
+  # to allow people to use segments as sources
+  if (ref($name) && $name->isa('Bio::DB::GFF::Segment')) {
+    $start = 1              unless defined $start;
+    $stop  = $name->length  unless defined $stop;
+    return $name->subseq($start,$stop);
+  }
+
+  my @object_results;
+
+  # support for Featname objects
+  if (ref($name) && $name->can('class')) {
+    $class = $name->class;
+    $name  = $name->name;
+  }
+  # if the class of the landmark is not specified then default to 'Sequence'
+  $class ||= eval{$factory->default_class} || 'Sequence';
+
+  # confirm that indicated sequence is actually in the database!
+  my @abscoords;
+
+  # abscoords() will now return an array ref, each element of which is
+  # ($absref,$absclass,$absstart,$absstop,$absstrand)
+
+  if ($nocheck) {
+    $force_absolute++;
+    $start = 1;
+  }
+
+  if ($force_absolute && defined($start)) { # absolute position is given to us
+    @abscoords = ([$name,$class,$start,$stop,'+']);
+  } else {
+    my $result = $factory->abscoords($name,$class,$force_absolute ? $name : ()) or return;
+    @abscoords = @$result;
+  }
+
+  foreach (@abscoords) {
+    my ($absref,$absclass,$absstart,$absstop,$absstrand,$sname) = @$_;
+    $sname = $name unless defined $sname;
+    my ($this_start,$this_stop,$this_length) = ($start,$stop,$length);
+
+    # partially fill in object
+    my $self = bless { factory => $factory },$package;
+
+    $absstrand ||= '+';
+
+    # an explicit length overrides start and stop
+    if (defined $offset) {
+      warn "new(): bad idea to call new() with both a start and an offset"
+	if defined $this_start;
+      $this_start = $offset+1;
+    }
+    if (defined $this_length) {
+      warn "new(): bad idea to call new() with both a stop and a length"
+	if defined $this_stop;
+      $this_stop = $this_start + $length - 1;
+    }
+
+    # this allows a SQL optimization way down deep
+    $self->{whole}++ if $absref eq $sname and !defined($this_start) and !defined($this_stop);
+
+    $this_start     = 1                    if !defined $this_start;
+    $this_stop      = $absstop-$absstart+1 if !defined $this_stop;
+    $this_length = $this_stop - $this_start + 1;
+
+    # now offset to correct subsegment based on desired start and stop
+    if ($force_absolute) {
+      ($this_start,$this_stop) = ($absstart,$absstop);
+      $self->absolute(1);
+    } elsif ($absstrand eq '+') {
+      $this_start =  $absstart   + $this_start - 1;
+      $this_stop  =  $this_start + $this_length - 1;
+    } else {
+      $this_start =  $absstop - ($this_start - 1);
+      $this_stop  =  $absstop - ($this_stop - 1);
+    }
+
+    # handle truncation in either direction
+    # This only happens if the segment runs off the end of
+    # the reference sequence
+    if ($factory->strict_bounds_checking &&
+	(($this_start < $absstart) || ($this_stop > $absstop))) {
+      # return empty if we are completely off the end of the ref se
+      next unless $this_start<=$absstop && $this_stop>=$absstart;
+      if (my $a = $factory->abscoords($absref,'Sequence')) {
+	my $refstart = $a->[0][2];
+	my $refstop  = $a->[0][3];
+	if ($this_start < $refstart) {
+	  $this_start = $refstart;
+	  $self->{truncated}{start}++;
+	}
+	if ($this_stop > $refstop) {
+	  $this_stop = $absstop;
+	  $self->{truncated}{stop}++;
+	}
+      }
+    }
+
+    @{$self}{qw(sourceseq start stop strand class)}
+      = ($absref,$this_start,$this_stop,$absstrand,$absclass);
+
+    # handle reference sequence
+    if (defined $refseq) {
+      $refclass = $refseq->class if $refseq->can('class');
+      $refclass ||= 'Sequence';
+      my ($refref,$refstart,$refstop,$refstrand) = $factory->abscoords($refseq,$refclass);
+      unless ($refref eq $absref) {
+	$self->error("reference sequence is on $refref but source sequence is on $absref");
+	return;
+      }
+      $refstart = $refstop if $refstrand eq '-';
+      @{$self}{qw(ref refstart refstrand)} = ($refseq,$refstart,$refstrand);
+    } else {
+      $absstart = $absstop if $absstrand eq '-';
+      @{$self}{qw(ref refstart refstrand)} = ($sname,$absstart,$absstrand);
+    }
+    push @object_results,$self;
+  }
+
+  return wantarray ? @object_results : $object_results[0];
+}
+
+# overridden methods
+# start, stop, length
+sub start {
+  my $self = shift;
+  return $self->strand < 0 ? $self->{stop} : $self->{start} if $self->absolute;
+  $self->_abs2rel($self->{start});
+}
+sub end {
+  my $self = shift;
+  return $self->strand < 0 ? $self->{start} : $self->{stop} if $self->absolute;
+  $self->_abs2rel($self->{stop});
+}
+*stop = \&end;
+
+sub length {
+  my $self = shift;
+  return unless defined $self->abs_end;
+  abs($self->abs_end - $self->abs_start) + 1;
+}
+
+sub abs_start {
+  my $self = shift;
+  if ($self->absolute) {
+    my ($a,$b) = ($self->SUPER::abs_start,$self->SUPER::abs_end);
+    return ($a<$b) ? $a : $b;
+  }
+  else {
+    return $self->SUPER::abs_start(@_);
+  }
+}
+sub abs_end {
+  my $self = shift;
+  if ($self->absolute) {
+    my ($a,$b) = ($self->SUPER::abs_start,$self->SUPER::abs_end);
+    return ($a>$b) ? $a : $b;
+  }
+
+  else {
+    return $self->SUPER::abs_end(@_);
+  }
+}
+
+=head2 refseq
+
+ Title   : refseq
+ Usage   : $ref = $s->refseq([$newseq] [,$newseqclass])
+ Function: get/set reference sequence
+ Returns : current reference sequence
+ Args    : new reference sequence and class (optional)
+ Status  : Public
+
+This method will get or set the reference sequence.  Called with no
+arguments, it returns the current reference sequence.  Called with
+either a sequence ID and class, a Bio::DB::GFF::Segment object (or
+subclass) or a Bio::DB::GFF::Featname object, it will set the current
+reference sequence and return the previous one.
+
+The method will generate an exception if you attempt to set the
+reference sequence to a sequence that isn't contained in the database,
+or one that has a different source sequence from the segment.
+
+=cut
+
+#'
+sub refseq {
+  my $self = shift;
+  my $g    = $self->{ref};
+  if (@_) {
+    my ($newref,$newclass);
+    if (@_ == 2) {
+      $newclass = shift;
+      $newref   = shift;
+    } else {
+      $newref   = shift;
+      $newclass = 'Sequence';
+    }
+
+    defined $newref or $self->throw('refseq() called with an undef reference sequence');
+
+    # support for Featname objects
+    $newclass = $newref->class if ref($newref) && $newref->can('class');
+
+    # $self->throw("Cannot define a segment's reference sequence in terms of itself!")
+    # if ref($newref) and overload::StrVal($newref) eq overload::StrVal($self);
+
+    my ($refsource,undef,$refstart,$refstop,$refstrand);
+    if ($newref->isa('Bio::DB::GFF::RelSegment')) {
+      ($refsource,undef,$refstart,$refstop,$refstrand) =
+	($newref->sourceseq,undef,$newref->abs_start,$newref->abs_end,$newref->abs_strand >= 0 ? '+' : '-');
+    } else {
+      my $coords = $self->factory->abscoords($newref,$newclass);
+      foreach (@$coords) { # find the appropriate one
+	($refsource,undef,$refstart,$refstop,$refstrand) = @$_;
+	last if $refsource eq $self->{sourceseq};
+      }
+	
+    }
+    $self->throw("can't set reference sequence: $newref and $self are on different sequence segments")
+      unless $refsource eq $self->{sourceseq};
+
+    @{$self}{qw(ref refstart refstrand)} = ($newref,$refstart,$refstrand);
+    $self->absolute(0);
+  }
+  return $self->absolute ? $self->sourceseq : $g;
+}
+
+
+=head2 abs_low
+
+ Title   : abs_low
+ Usage   : $s->abs_low
+ Function: the absolute lowest coordinate of the segment
+ Returns : an integer
+ Args    : none
+ Status  : Public
+
+This is for GadFly compatibility, and returns the low coordinate in
+absolute coordinates;
+
+=cut
+
+sub abs_low {
+  my $self = shift;
+  my ($a,$b) = ($self->abs_start,$self->abs_end);
+  return ($a<$b) ? $a : $b;
+}
+
+=head2 abs_high
+
+ Title   : abs_high
+ Usage   : $s->abs_high
+ Function: the absolute highest coordinate of the segment
+ Returns : an integer
+ Args    : none
+ Status  : Public
+
+This is for GadFly compatibility, and returns the high coordinate in
+absolute coordinates;
+
+=cut
+
+sub abs_high {
+  my $self = shift;
+  my ($a,$b) = ($self->abs_start,$self->abs_end);
+  return ($a>$b) ? $a : $b;
+}
+
+
+=head2 asString
+
+ Title   : asString
+ Usage   : $s->asString
+ Function: human-readable representation of the segment
+ Returns : a string
+ Args    : none
+ Status  : Public
+
+This method will return a human-readable representation of the
+segment.  It is the overloaded method call for the "" operator.
+
+Currently the format is:
+
+  refseq:start,stop
+
+=cut
+
+sub asString {
+  my $self = shift;
+  return $self->SUPER::asString if $self->absolute;
+  my $label = $self->{ref};
+  my $start = $self->start || '';
+  my $stop  = $self->stop  || '';
+  if (ref($label) && overload::StrVal($self) eq overload::StrVal($label->ref)) {
+    $label = $self->abs_ref;
+    $start = $self->abs_start;
+    $stop  = $self->abs_end;
+  }
+  return "$label:$start,$stop";
+}
+
+sub name { shift->asString }
+
+=head2 absolute
+
+ Title   : absolute
+ Usage   : $abs = $s->absolute([$abs])
+ Function: get/set absolute coordinates
+ Returns : a boolean flag
+ Args    : new setting for flag (optional)
+ Status  : Public
+
+Called with a boolean flag, this method controls whether to display
+relative coordinates (relative to the reference sequence) or absolute
+coordinates (relative to the source sequence).  It will return the
+previous value of the setting.
+
+=cut
+
+sub absolute {
+  my $self = shift;
+  my $g = $self->{absolute};
+  $self->{absolute} = shift if @_;
+  $g;
+}
+
+=head2 features
+
+ Title   : features
+ Usage   : @features = $s->features(@args)
+ Function: get features that overlap this segment
+ Returns : a list of Bio::DB::GFF::Feature objects
+ Args    : see below
+ Status  : Public
+
+This method will find all features that overlap the segment and return
+a list of Bio::DB::GFF::Feature objects.  The features will use
+coordinates relative to the reference sequence in effect at the time
+that features() was called.
+
+The returned list can be limited to certain types of feature by
+filtering on their method and/or source.  In addition, it is possible
+to obtain an iterator that will step through a large number of
+features sequentially.
+
+Arguments can be provided positionally or using the named arguments
+format.  In the former case, the arguments are a list of feature types
+in the format "method:source".  Either method or source can be
+omitted, in which case the missing component is treated as a wildcard.
+If no colon is present, then the type is treated as a method name.
+Multiple arguments are ORed together.
+
+Examples:
+
+ @f = $s->features('exon:curated');           # all curated exons
+ @f = $s->features('exon:curated','intron');  # curated exons and all introns
+ @f = $s->features('similarity:.*EST.*');     # all similarities
+                                              # having something to do
+                                              # with ESTs
+
+The named parameter form gives you control over a few options:
+
+  -types      an array reference to type names in the format
+	      "method:source"
+
+  -merge     Whether to apply aggregators to the generated features (default yes)
+
+  -rare      Turn on an optimization suitable for a relatively rare feature type,
+             where it will be faster to filter by feature type first
+             and then by position, rather than vice versa.
+
+  -attributes a hashref containing a set of attributes to match
+
+  -iterator  Whether to return an iterator across the features.
+
+  -binsize   A true value will create a set of artificial features whose
+             start and stop positions indicate bins of the given size, and
+             whose scores are the number of features in the bin.  The
+             class and method of the feature will be set to "bin",
+             its source to "method:source", and its group to "bin:method:source".
+             This is a handy way of generating histograms of feature density.
+
+-merge is a boolean flag that controls whether the adaptor's
+aggregators wll be applied to the features returned by this method.
+
+If -iterator is true, then the method returns a single scalar value
+consisting of a Bio::SeqIO object.  You can call next_seq() repeatedly
+on this object to fetch each of the features in turn.  If iterator is
+false or absent, then all the features are returned as a list.
+
+The -attributes argument is a hashref containing one or more
+attributes to match against:
+
+  -attributes => { Gene => 'abc-1',
+                   Note => 'confirmed' }
+
+Attribute matching is simple string matching, and multiple attributes
+are ANDed together.
+
+=cut
+
+#'
+
+# return all features that overlap with this segment;
+# optionally modified by a list of types to filter on
+sub features {
+  my $self = shift;
+  my @args = $self->_process_feature_args(@_);
+  return $self->factory->overlapping_features(@args);
+}
+
+=head2 top_SeqFeatures
+
+ Title   : top_SeqFeatures
+ Usage   :
+ Function:
+ Example :
+ Returns :
+ Args    :
+
+Alias for features().  Provided for Bio::SeqI compatibility.
+
+=cut
+
+=head2 all_SeqFeatures
+
+ Title   : all_SeqFeatures
+ Usage   :
+ Function:
+ Example :
+ Returns :
+ Args    :
+
+Alias for features().  Provided for Bio::SeqI compatibility.
+
+=cut
+
+=head2 sub_SeqFeatures
+
+ Title   : sub_SeqFeatures
+ Usage   :
+ Function:
+ Example :
+ Returns :
+ Args    :
+
+Alias for features().  Provided for Bio::SeqI compatibility.
+
+=cut
+
+*top_SeqFeatures = *all_SeqFeatures = \&features;
+
+=head2 get_feature_stream
+
+ Title   : features
+ Usage   : $stream = $s->get_feature_stream(@args)
+ Function: get a stream of features that overlap this segment
+ Returns : a Bio::SeqIO::Stream-compliant stream
+ Args    : see below
+ Status  : Public
+
+This is the same as features(), but returns a stream.  Use like this:
+
+ $stream = $s->get_feature_stream('exon');
+ while (my $exon = $stream->next_seq) {
+    print $exon->start,"\n";
+ }
+
+=cut
+
+sub get_feature_stream {
+  my $self = shift;
+  my @args = $_[0] =~ /^-/ ? (@_,-iterator=>1) : (-types=>\@_,-iterator=>1);
+  $self->features(@args);
+}
+
+=head2 get_seq_stream
+
+ Title   : get_seq_stream
+ Usage   : $stream = $s->get_seq_stream(@args)
+ Function: get a stream of features that overlap this segment
+ Returns : a Bio::SeqIO::Stream-compliant stream
+ Args    : see below
+ Status  : Public
+
+This is the same as feature_stream(), and is provided for Bioperl
+compatibility.  Use like this:
+
+ $stream = $s->get_seq_stream('exon');
+ while (my $exon = $stream->next_seq) {
+    print $exon->start,"\n";
+ }
+
+=cut
+
+*get_seq_stream = \&get_feature_stream;
+
+
+=head2 overlapping_features
+
+ Title   : overlapping_features
+ Usage   : @features = $s->overlapping_features(@args)
+ Function: get features that overlap this segment
+ Returns : a list of Bio::DB::GFF::Feature objects
+ Args    : see features()
+ Status  : Public
+
+This is an alias for the features() method, and takes the same
+arguments.
+
+=cut
+
+*overlapping_features = \&features;
+
+=head2 contained_features
+
+ Title   : contained_features
+ Usage   : @features = $s->contained_features(@args)
+ Function: get features that are contained by this segment
+ Returns : a list of Bio::DB::GFF::Feature objects
+ Args    : see features()
+ Status  : Public
+
+This is identical in behavior to features() except that it returns
+only those features that are completely contained within the segment,
+rather than any that overlap.
+
+=cut 
+
+# return all features completely contained within this segment
+sub contained_features {
+  my $self = shift;
+  local $self->{whole} = 0;
+  my @args = $self->_process_feature_args(@_);
+  return $self->factory->contained_features(@args);
+}
+
+# *contains = \&contained_features;
+
+=head2 contained_in
+
+ Title   : contained_in
+ Usage   : @features = $s->contained_in(@args)
+ Function: get features that contain this segment
+ Returns : a list of Bio::DB::GFF::Feature objects
+ Args    : see features()
+ Status  : Public
+
+This is identical in behavior to features() except that it returns
+only those features that completely contain the segment.
+
+=cut
+
+# return all features completely contained within this segment
+sub contained_in {
+  my $self = shift;
+  local $self->{whole} = 0;
+  my @args = $self->_process_feature_args(@_);
+  return $self->factory->contained_in(@args);
+}
+
+=head2 _process_feature_args
+
+ Title   : _process_feature_args
+ Usage   : @args = $s->_process_feature_args(@args)
+ Function: preprocess arguments passed to features, 
+           contained_features, and overlapping_features
+ Returns : a list of parsed arguents
+ Args    : see feature()
+ Status  : Internal
+
+This is an internal method that is used to check and format the
+arguments to features() before passing them on to the adaptor.
+
+=cut 
+
+sub _process_feature_args {
+  my $self       = shift;
+
+  my ($ref,$class,$start,$stop,$strand,$whole)
+    = @{$self}{qw(sourceseq class start stop strand whole)};
+
+  ($start,$stop) = ($stop,$start) if $strand eq '-';
+
+  my @args = (-ref=>$ref,-class=>$class);
+
+  # indicating that we are fetching the whole segment allows certain
+  # SQL optimizations.
+  push @args,(-start=>$start,-stop=>$stop) unless $whole;
+
+  if (@_) {
+    if ($_[0] =~ /^-/) {
+      push @args,@_;
+    } else {
+      my @types = @_;
+      push @args,-types=>\@types;
+    }
+  }
+  push @args,-parent=>$self;
+  @args;
+}
+
+=head2 types
+
+ Title   : types
+ Usage   : @types = $s->types([-enumerate=>1])
+ Function: list feature types that overlap this segment
+ Returns : a list of Bio::DB::GFF::Typename objects or a hash
+ Args    : see below
+ Status  : Public
+
+The types() method will return a list of Bio::DB::GFF::Typename
+objects, each corresponding to a feature that overlaps the segment.
+If the optional -enumerate parameter is set to a true value, then the
+method will return a hash in which the keys are the type names and the 
+values are the number of times a feature of that type is present on
+the segment.  For example:
+
+  %count = $s->types(-enumerate=>1);
+
+=cut 
+
+# wrapper for lower-level types() call.
+sub types {
+  my $self = shift;
+  my ($ref,$class,$start,$stop,$strand) = @{$self}{qw(sourceseq class start stop strand)};
+  ($start,$stop) = ($stop,$start) if $strand eq '-';
+
+  my @args;
+  if (@_ && $_[0] !~ /^-/) {
+    @args = (-type => \@_)
+  } else {
+    @args = @_;
+  }
+  $self->factory->types(-ref  => $ref,
+			-class => $class,
+			-start=> $start,
+			-stop => $stop,
+			@args);
+}
+
+=head1 Internal Methods
+
+The following are internal methods and should not be called directly.
+
+=head2 new_from_segment
+
+ Title   : new_from_segment
+ Usage   : $s = $segment->new_from_segment(@args)
+ Function: create a new relative segment
+ Returns : a new Bio::DB::GFF::RelSegment object
+ Args    : see below
+ Status  : Internal
+
+This constructor is used internally by the subseq() method.  It forces
+the new segment into the Bio::DB::GFF::RelSegment package, regardless
+of the package that it is called from.  This causes subclass-specfic
+information, such as feature types, to be dropped when a subsequence
+is created.
+
+=cut
+
+sub new_from_segment {
+  my $package   = shift;
+  $package      = ref $package if ref $package;
+  my $segment   = shift;
+  my $new = {};
+  @{$new}{qw(factory sourceseq start stop strand class ref refstart refstrand)}
+    = @{$segment}{qw(factory sourceseq start stop strand class ref refstart refstrand)};
+  return bless $new,__PACKAGE__;
+}
+
+=head2 _abs2rel
+
+ Title   : _abs2rel
+ Usage   : @coords = $s->_abs2rel(@coords)
+ Function: convert absolute coordinates into relative coordinates
+ Returns : a list of relative coordinates
+ Args    : a list of absolute coordinates
+ Status  : Internal
+
+This is used internally to map from absolute to relative
+coordinates. It does not take the offset of the reference sequence
+into account, so please use abs2rel() instead.
+
+=cut
+
+sub _abs2rel {
+  my $self = shift;
+  my @result;
+  return unless defined $_[0];
+
+  if ($self->absolute) {
+    @result = @_;
+  } else {
+    my ($refstart,$refstrand) = @{$self}{qw(refstart refstrand)};
+    @result = defined($refstrand) && $refstrand eq '-' ? map { $refstart - $_ + 1 } @_
+                                                       : map { $_ - $refstart + 1 } @_;
+  }
+  # if called with a single argument, caller will expect a single scalar reply
+  # not the size of the returned array!
+  return $result[0] if @result == 1 and !wantarray;
+  @result;
+}
+
+=head2 rel2abs
+
+ Title   : rel2abs
+ Usage   : @coords = $s->rel2abs(@coords)
+ Function: convert relative coordinates into absolute coordinates
+ Returns : a list of absolute coordinates
+ Args    : a list of relative coordinates
+ Status  : Public
+
+This function takes a list of positions in relative coordinates to the
+segment, and converts them into absolute coordinates.
+
+=cut
+
+sub rel2abs {
+  my $self = shift;
+  my @result;
+
+  if ($self->absolute) {
+    @result = @_;
+  } else {
+    my ($abs_start,$abs_strand) = ($self->abs_start,$self->abs_strand);
+    @result = $abs_strand < 0 ? map { $abs_start - $_ + 1 } @_
+                              : map { $_ + $abs_start - 1 } @_;
+  }
+  # if called with a single argument, caller will expect a single scalar reply
+  # not the size of the returned array!
+  return $result[0] if @result == 1 and !wantarray;
+  @result;
+}
+
+=head2 abs2rel
+
+ Title   : abs2rel
+ Usage   : @rel_coords = $s-abs2rel(@abs_coords)
+ Function: convert absolute coordinates into relative coordinates
+ Returns : a list of relative coordinates
+ Args    : a list of absolutee coordinates
+ Status  : Public
+
+This function takes a list of positions in absolute coordinates
+and returns a list expressed in relative coordinates.
+
+=cut
+
+sub abs2rel {
+  my $self = shift;
+  my @result;
+
+  if ($self->absolute) {
+    @result = @_;
+  } else {
+    my ($abs_start,$abs_strand) = ($self->abs_start,$self->abs_strand);
+    @result = $abs_strand < 0 ? map { $abs_start - $_ + 1 } @_
+                              : map { $_ - $abs_start + 1 } @_;
+  }
+  # if called with a single argument, caller will expect a single scalar reply
+  # not the size of the returned array!
+  return $result[0] if @result == 1 and !wantarray;
+  @result;
+}
+
+sub subseq {
+  my $self = shift;
+  my $obj  = $self->SUPER::subseq(@_);
+  bless $obj,__PACKAGE__;    # always bless into the generic RelSegment package
+}
+
+sub strand {
+  my $self = shift;
+  if ($self->absolute) {
+    return _to_strand($self->{strand});
+  }
+  return $self->stop <=> $self->start;
+}
+
+sub _to_strand {
+  my $s = shift;
+  return -1 if $s eq '-';
+  return +1 if $s eq '+';
+  return 0;
+}
+
+=head2 Bio::RangeI Methods
+
+The following Bio::RangeI methods are supported:
+
+overlaps(), contains(), equals(),intersection(),union(),overlap_extent()
+
+=cut
+
+sub intersection {
+  my $self     = shift;
+  my (@ranges) = @_;
+  unshift @ranges,$self if ref $self;
+  $ranges[0]->isa('Bio::DB::GFF::RelSegment')
+    or return $self->SUPER::intersection(@_);
+
+  my $ref = $ranges[0]->abs_ref;
+  my ($low,$high);
+  foreach (@ranges) {
+    return unless $_->can('abs_ref');
+    $ref eq $_->abs_ref or return;
+    $low  = $_->abs_low   if !defined($low)  or $low  < $_->abs_low;
+    $high = $_->abs_high  if !defined($high) or $high > $_->abs_high;
+  }
+  return unless $low < $high;
+  $self->new(-factory=> $self->factory,
+	     -seq    => $ref,
+	     -start  => $low,
+	     -stop   => $high);
+}
+
+sub overlaps {
+  my $self = shift;
+  my($other,$so) = @_;
+  return $self->SUPER::overlaps(@_) unless $other->isa('Bio::DB::GFF::RelSegment');
+  return if $self->abs_ref ne $other->abs_ref;
+  return if $self->abs_low  > $other->abs_high;
+  return if $self->abs_high < $other->abs_low;
+  1;
+}
+
+sub contains {
+  my $self = shift;
+  my($other,$so) = @_;
+  return $self->SUPER::overlaps(@_) unless $other->isa('Bio::DB::GFF::RelSegment');
+  return if $self->abs_ref ne $other->abs_ref;
+  return unless $self->abs_low <= $other->abs_low;
+  return unless $self->abs_high >= $other->abs_high;
+  1;
+}
+
+sub union {
+  my $self     = shift;
+  my (@ranges) = @_;
+  unshift @ranges,$self if ref $self;
+  $ranges[0]->isa('Bio::DB::GFF::RelSegment')
+    or return $self->SUPER::union(@_);
+
+  my $ref = $ranges[0]->abs_ref;
+  my ($low,$high);
+  foreach (@ranges) {
+    return unless $_->can('abs_ref');
+    $ref eq $_->abs_ref or return;
+    $low  = $_->abs_low  if !defined($low)  or $low  > $_->abs_low;
+    $high = $_->abs_high if !defined($high) or $high < $_->abs_high;
+  }
+  $self->new(-factory=> $self->factory,
+	     -seq    => $ref,
+	     -start  => $low,
+	     -stop   => $high);
+}
+
+
+1;
+
+__END__
+
+=head1 BUGS
+
+Schemas need some work.
+
+=head1 SEE ALSO
+
+L<bioperl>
+
+=head1 AUTHOR
+
+Lincoln Stein E<lt>lstein@cshl.orgE<gt>.  
+
+Copyright (c) 2001 Cold Spring Harbor Laboratory.
+
+This library is free software; you can redistribute it and/or modify
+it under the same terms as Perl itself.
+
+=cut
+