diff variant_effect_predictor/Bio/EnsEMBL/MappedSlice.pm @ 0:1f6dce3d34e0

Uploaded
author mahtabm
date Thu, 11 Apr 2013 02:01:53 -0400
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/variant_effect_predictor/Bio/EnsEMBL/MappedSlice.pm	Thu Apr 11 02:01:53 2013 -0400
@@ -0,0 +1,668 @@
+=head1 LICENSE
+
+  Copyright (c) 1999-2012 The European Bioinformatics Institute and
+  Genome Research Limited.  All rights reserved.
+
+  This software is distributed under a modified Apache license.
+  For license details, please see
+
+    http://www.ensembl.org/info/about/code_licence.html
+
+=head1 CONTACT
+
+  Please email comments or questions to the public Ensembl
+  developers list at <dev@ensembl.org>.
+
+  Questions may also be sent to the Ensembl help desk at
+  <helpdesk@ensembl.org>.
+
+=cut
+
+=head1 NAME
+
+Bio::EnsEMBL::MappedSlice - an object representing a mapped slice
+
+=head1 SYNOPSIS
+
+  # get a reference slice
+  my $slice =
+    $slice_adaptor->fetch_by_region( 'chromosome', 14, 900000, 950000 );
+
+  # create MappedSliceContainer based on the reference slice
+  my $msc = Bio::EnsEMBL::MappedSliceContainer->new( -SLICE => $slice );
+
+  # set the adaptor for fetching AssemblySlices
+  my $asa = $slice->adaptor->db->get_AssemblySliceAdaptor;
+  $msc->set_AssemblySliceAdaptor($asa);
+
+  # add an AssemblySlice to your MappedSliceContainer
+  $msc->attach_AssemblySlice('NCBIM36');
+
+  foreach my $mapped_slice ( @{ $msc->get_all_MappedSlices } ) {
+    print $mapped_slice->name, "\n";
+
+    foreach my $sf ( @{ $mapped_slice->get_all_SimpleFeatures } ) {
+      print "  ", &to_string($sf), "\n";
+    }
+  }
+
+=head1 DESCRIPTION
+
+NOTE: this code is under development and not fully functional nor tested
+yet.  Use only for development.
+
+This object represents a mapped slice, i.e. a slice that's attached
+to a reference slice and a mapper to convert coordinates to/from the
+reference. The attachment is done via a MappedSliceContainer which
+has the reference slice and the "container slice" defining the common
+coordinate system for all MappedSlices.
+
+A MappedSlice is supposed to behave as close to a Bio::EnsEMBL::Slice
+as possible. Most Slice methods are implemented in MappedSlice and will
+return an equivalent value to what Slice does. There are some exceptions
+of unimplemented methods, either because there is no useful equivalent
+for a MappedSlice to do, or they are too complicated.
+
+Not supported Bio::EnsEMBL::Slice methods:
+
+  All deprecated methods
+  All Bio::PrimarySeqI compliance methods
+  expand
+  get_generic_features
+  get_seq_region_id
+  seq_region_Slice
+
+Not currently supported but maybe should/could:
+
+  calculate_pi
+  calculate_theta
+  get_base_count
+  get_by_Individual
+  get_by_strain
+  invert
+
+Internally, a MappedSlice is a collection of Bio::EnsEMBL::Slices and
+associated Bio::EnsEMBL::Mappers which map the slices to the common
+container coordinate system.
+
+MappedSlices are usually created and attached to a MappedSliceContainer
+by an adaptor/factory.
+
+=head1 METHODS
+
+  new
+  add_Slice_Mapper_pair
+  get_all_Slice_Mapper_pairs
+  adaptor
+  container
+  name
+  seq_region_name
+  start
+  end
+  strand
+  length
+  seq_region_length
+  centrepoint
+  coord_system
+  coord_system_name
+  is_toplevel
+  seq (not implemented yet)
+  subseq (not implemented yet)
+  get_repeatmasked_seq (not implemented yet)
+  sub_MappedSlice (not implemented yet)
+  project (not implemented yet)
+
+=head1 RELATED MODULES
+
+  Bio::EnsEMBL::MappedSlice
+  Bio::EnsEMBL::DBSQL::AssemblySliceAdaptor
+  Bio::EnsEMBL::Compara::AlignSlice
+  Bio::EnsEMBL::Compara::AlignSlice::Slice
+  Bio::EnsEMBL::AlignStrainSlice
+  Bio::EnsEMBL::StrainSlice
+
+=cut
+
+package Bio::EnsEMBL::MappedSlice;
+
+use strict;
+use warnings;
+no warnings 'uninitialized';
+
+use Bio::EnsEMBL::Utils::Argument qw(rearrange);
+use Bio::EnsEMBL::Utils::Exception qw(throw warning);
+use Bio::EnsEMBL::Mapper;
+use Scalar::Util qw(weaken);
+
+use vars qw($AUTOLOAD);
+
+
+=head2 new
+
+  Arg [ADAPTOR]   : Adaptor $adaptor - an adaptor of the appropriate type
+  Arg [CONTAINER] : Bio::EnsEMBL::MappedSliceContainer $container - the
+                    container this MappedSlice is attached to
+  Arg [NAME]      : String $name - name
+  Example     : my $mapped_slice = Bio::EnsEMBL::MappedSlice->new(
+                  -ADAPTOR   => $adaptor,
+                  -CONTAINER => $container,
+                  -NAME      => $name,
+                );
+  Description : Constructor. Usually you won't call this method manually, but
+                the MappedSlice will be constructed by an adaptor/factory.
+  Return type : Bio::EnsEMBL::MappedSlice
+  Exceptions  : thrown on wrong or missing arguments
+  Caller      : general, MappedSlice adaptors
+  Status      : At Risk
+              : under development
+
+=cut
+
+sub new {
+  my $caller = shift;
+  my $class = ref($caller) || $caller;
+
+  my ($adaptor, $container, $name) =
+    rearrange([qw(ADAPTOR CONTAINER NAME)], @_);
+
+  # arguement check
+  unless ($container and ref($container) and
+          $container->isa('Bio::EnsEMBL::MappedSliceContainer')) {
+    throw("Need a MappedSliceContainer.");
+  }
+
+  my $self = {};
+  bless ($self, $class);
+
+  #
+  # initialise object
+  #
+  
+  # need to weaken reference to prevent circular reference
+  weaken($self->{'container'} = $container);
+
+  $self->adaptor($adaptor) if (defined($adaptor));
+  $self->{'name'} = $name if (defined($name));
+
+  $self->{'slice_mapper_pairs'} = [];
+
+  return $self;
+}
+
+
+=head2 add_Slice_Mapper_pair 
+
+  Arg[1]      : Bio::EnsEMBL::Slice $slice - slice to add
+  Arg[2]      : Bio::EnsEMBL::Mapper $mapper - the mapper for this slice
+  Example     : $mapped_slice->add_Slice_Mapper_pair($slice, $mapper);
+  Description : Adds a native slice and a corresponding mapper to map to/from
+                the artificial container coord system.
+  Return type : listref of Bio::EnsEMBL::MappedSlice
+  Exceptions  : thrown on wrong or missing arguments
+  Caller      : general, MappedSlice adaptors
+  Status      : At Risk
+              : under development
+
+=cut
+
+sub add_Slice_Mapper_pair {
+  my $self = shift;
+  my $slice = shift;
+  my $mapper = shift;
+
+  # argument check
+  unless ($slice and ref($slice) and ($slice->isa('Bio::EnsEMBL::Slice') or $slice->isa('Bio::EnsEMBL::LRGSlice')) ) {
+    throw("You must provide a slice.");
+  }
+
+  unless ($mapper and ref($mapper) and $mapper->isa('Bio::EnsEMBL::Mapper')) {
+    throw("You must provide a mapper.");
+  }
+
+  push @{ $self->{'slice_mapper_pairs'} }, [ $slice, $mapper ];
+  
+  return $self->{'slice_mapper_pairs'};
+}
+
+
+=head2 get_all_Slice_Mapper_pairs 
+
+  Example     : foreach my $pair (@{ $self->get_all_Slice_Mapper_pairs }) {
+                  my ($slice, $mapper) = @$pair;
+
+                  # get container coordinates
+                  my @coords = $mapper->map_coordinates(
+                    $slice->seq_region_name,
+                    $slice->start,
+                    $slice->end,
+                    $slice->strand,
+                    'mapped_slice'
+                  );
+
+                  # ....
+                }
+  Description : Gets all Slice/Mapper pairs this MappedSlice is composed of.
+                Each slice (and features on it) can be mapped onto the
+                artificial container coord system using the mapper.
+  Return type : listref of listref of a Bio::EnsEMBL::Slice and
+                Bio::EnsEMBL::Mapper pair
+  Exceptions  : none
+  Caller      : general
+  Status      : At Risk
+              : under development
+
+=cut
+
+sub get_all_Slice_Mapper_pairs {
+  my $self = shift;
+  return $self->{'slice_mapper_pairs'};
+}
+
+
+=head2 adaptor
+
+  Arg[1]      : (optional) Adaptor $adaptor - the adaptor/factory for this
+                object
+  Example     : $mapped_slice->adaptor($assembly_slice_adaptor);
+  Description : Getter/setter for the adaptor/factory for this object.
+  Return type : Adaptor of appropriate type
+  Exceptions  : none
+  Caller      : general
+  Status      : At Risk
+              : under development
+
+=cut
+
+sub adaptor {
+  my $self = shift;
+  weaken($self->{'adaptor'} = shift) if (@_);
+  return $self->{'adaptor'};
+}
+
+
+=head2 container
+
+  Arg[1]      : (optional) Bio::EnsEMBL::MappedSliceContainer - the container
+                this object is attached to
+  Example     : my $container = $mapped_slice->container;
+                print $container->ref_slice->name, "\n";
+  Description : Getter/setter for the container this object is attached to. The
+                container will give you access to the reference slice, a common
+                artificial container slice, and a mapper to map to it from the
+                container coord system.
+
+                The implementation uses a weak reference to attach the container
+                since the container holds a list of MappedSlices itself.
+  Return type : Bio::EnsEMBL::MappedSliceContainer
+  Exceptions  : none
+  Caller      : general
+  Status      : At Risk
+              : under development
+
+=cut
+
+sub container {
+  my $self = shift;
+  weaken($self->{'container'} = shift) if (@_);
+  return $self->{'container'};
+}
+
+
+=head2 name
+
+  Arg[1]      : String - the name of this object
+  Example     : my $name = $mapped_slice->container->ref_slice->name .
+                  ":mapped_" . $ident_string;
+                $mapped_slice->name($name);
+  Description : Getter/setter for this object's name
+  Return type : String
+  Exceptions  : none
+  Caller      : general
+  Status      : At Risk
+              : under development
+
+=cut
+
+sub name {
+  my $self = shift;
+  $self->{'name'} = shift if (@_);
+  return $self->{'name'};
+}
+
+
+=head2 seq_region_name
+
+  Example     : my $sr_name = $mapped_slice->seq_region_name;
+  Description : Returns the seq_region name of the reference slice.
+  Return type : String
+  Exceptions  : none
+  Caller      : general
+  Status      : At Risk
+              : under development
+
+=cut
+
+sub seq_region_name {
+  my $self = shift;
+  return $self->container->ref_slice->seq_region_name;
+}
+
+
+=head2 start
+
+  Example     : my $start = $mapped_slice->start;
+  Description : Returns the start of the container slice.
+  Return type : Int
+  Exceptions  : none
+  Caller      : general
+  Status      : At Risk
+              : under development
+
+=cut
+
+sub start {
+  my $self = shift;
+  return $self->container->container_slice->start;
+}
+
+
+=head2 end
+
+  Example     : my $end = $mapped_slice->end;
+  Description : Returns the end of the container slice.
+  Return type : Int
+  Exceptions  : none
+  Caller      : general
+  Status      : At Risk
+              : under development
+
+=cut
+
+sub end {
+  my $self = shift;
+  return $self->container->container_slice->end;
+}
+
+
+=head2 strand
+
+  Example     : my $strand = $mapped_slice->strand;
+  Description : Returns the strand of the container slice.
+  Return type : Int
+  Exceptions  : none
+  Caller      : general
+  Status      : At Risk
+              : under development
+
+=cut
+
+sub strand {
+  my $self = shift;
+  return $self->container->container_slice->strand;
+}
+
+
+=head2 length
+
+  Example     : my $length = $mapped_slice->length;
+  Description : Returns the length of the container slice
+  Return type : Int
+  Exceptions  : none
+  Caller      : general
+  Status      : At Risk
+              : under development
+
+=cut
+
+sub length {
+  my $self = shift;
+  return $self->container->container_slice->length;
+}
+
+
+=head2 seq_region_length
+
+  Example     : my $sr_length = $mapped_slice->seq_region_length;
+  Description : Returns the seq_region length of the reference slice.
+  Return type : Int
+  Exceptions  : none
+  Caller      : general
+  Status      : At Risk
+              : under development
+
+=cut
+
+sub seq_region_length {
+  my $self = shift;
+  return $self->container->ref_slice->seq_region_length;
+}
+
+
+=head2 centrepoint
+
+  Example     : my $centrepoint = $mapped_slice->centrepoint;
+  Description : Returns the centrepoint of the container slice.
+  Return type : Int
+  Exceptions  : none
+  Caller      : general
+  Status      : At Risk
+              : under development
+
+=cut
+
+sub centrepoint {
+  my $self = shift;
+  return $self->container->container_slice->centrepoint;
+}
+
+
+=head2 coord_system
+
+  Example     : my $cs = $mapped_slice->coord_system;
+  Description : Returns the coord system of the container slice.
+  Return type : Bio::EnsEMBL::CoordSystem
+  Exceptions  : none
+  Caller      : general
+  Status      : At Risk
+              : under development
+
+=cut
+
+sub coord_system {
+  my $self = shift;
+  return $self->container->container_slice->coord_system;
+}
+
+=head2 coord_system_name
+
+  Example     : my $cs_name = $mapped_slice->coord_system_name;
+  Description : Returns the coord system name of the container slice.
+  Return type : Int
+  Exceptions  : none
+  Caller      : general
+  Status      : At Risk
+              : under development
+
+=cut
+
+sub coord_system_name {
+  my $self = shift;
+  return $self->container->container_slice->coord_system_name;
+}
+
+=head2 is_toplevel
+
+  Example     : my $toplevel_flag = $mapped_slice->is_toplevel;
+  Description : Returns weather the container slice is toplevel.
+  Return type : Int
+  Exceptions  : none
+  Caller      : general
+  Status      : At Risk
+              : under development
+
+=cut
+
+sub is_toplevel {
+  my $self = shift;
+  return $self->container->container_slice->is_toplevel;
+}
+
+
+=head2 seq
+
+  Example     : my $seq = $mapped_slice->seq()
+  Description : Retrieves the expanded sequence of this mapped slice,
+                including "-" characters where there are inserts in any other
+                mapped slices. This will align with the sequence returned by
+                the container's seq() method.
+  Return type : String
+  Exceptions  : none
+  Caller      : general
+  Status      : At Risk
+              : under development
+
+=cut
+
+sub seq {
+  my $self = shift;
+  
+  # create an empty string
+  my $ms_seq = '';
+  
+  # this coord represents the current position in the MS sequence
+  my $start = 0;
+
+  # get slice/mapper pairs from mapped slice (usually only one anyway)
+  foreach my $pair(@{$self->get_all_Slice_Mapper_pairs()}) {
+    my ($s, $m) = @$pair;
+    
+    # make sure to send extra args
+    # eg strain slices might need read coverage filtering
+    my $seq = $s->seq(@_);
+    
+    # project from mapped slice to reference slice using the mapper
+    foreach my $ref_coord($m->map_coordinates('mapped_slice', 1, CORE::length($seq), $s->strand, 'mapped_slice')) {
+    
+      # normal coord
+      if(!$ref_coord->isa('Bio::EnsEMBL::Mapper::IndelCoordinate') && !$ref_coord->isa('Bio::EnsEMBL::Mapper::Gap')) {
+      
+        # project from reference slice to container slice using the container's mapper
+        foreach my $ms_coord($self->container->mapper->map_coordinates($self->container->ref_slice->seq_region_name, $ref_coord->start, $ref_coord->end, $ref_coord->strand, 'ref_slice')) {
+          
+          # normal coord
+          if(!$ms_coord->isa('Bio::EnsEMBL::Mapper::IndelCoordinate') && !$ms_coord->isa('Bio::EnsEMBL::Mapper::Gap')) {
+            $ms_seq .= substr($seq, $start, $ms_coord->length);
+            
+            $start += $ms_coord->length();
+          }
+          
+          # indel coord
+          else {
+            $ms_seq .= '-' x $ms_coord->length();
+          }
+        }
+      }
+      
+      # indel / gap
+      else {
+      
+        # if there's a gap here aswell, add corresponding sequence
+        if($ref_coord->gap_length > 0) {
+          $ms_seq .= substr($seq, $start, $ref_coord->gap_length);
+          $start += $ref_coord->gap_length;
+        }
+        
+        # add "-" to the sequence
+        $ms_seq .= '-' x ($ref_coord->length() - $ref_coord->gap_length());
+      }
+    }
+  }
+  
+  return $ms_seq;
+}
+
+sub subseq {
+}
+
+sub get_repeatmasked_seq {
+}
+
+sub sub_MappedSlice {
+}
+
+sub project {
+}
+
+
+=head2 AUTOLOAD
+
+  Arg[1..N]   : Arguments passed on to the calls on the underlying slices.
+  Example     : my @simple_features = @{ $mapped_slice->get_all_SimpleFeatures };
+  Description : Aggregate data gathered from composing Slices.
+                This will call Slice->get_all_* and combine the results.
+                Coordinates will be transformed to be on the container slice
+                coordinate system.
+
+                Calls involving DAS features are skipped since the DAS adaptor
+                handles coordinate conversions natively.
+  Return type : listref of features (same type as corresponding Slice method)
+  Exceptions  : none
+  Caller      : general
+  Status      : At Risk
+              : under development
+
+=cut
+
+sub AUTOLOAD {
+  my $self = shift;
+  
+  my $method = $AUTOLOAD;
+  $method =~ s/.*:://;
+  
+  # AUTOLOAD should only deal with get_all_* methods
+  return unless ($method =~ /^get_all_/);
+
+  # skip DAS methods
+  return if ($method =~ /DAS/);
+  
+  my @mapped_features = ();
+
+  foreach my $pair (@{ $self->get_all_Slice_Mapper_pairs }) {
+    my ($slice, $mapper) = @$pair;
+    #warn $slice->name;
+
+    # call $method on each native slice composing the MappedSlice
+    my @features = @{ $slice->$method(@_) };
+    
+    # map features onto the artificial container coordinate system
+    foreach my $f (@features) {
+      
+      my @coords = $mapper->map_coordinates(
+          $f->slice->seq_region_name,
+          $f->start,
+          $f->end,
+          $f->strand,
+          'mapped_slice'
+      );
+
+      # sanity check
+      if (scalar(@coords) > 1) {
+        warning("Got more than one Coordinate returned, expected only one!\n");
+      }
+
+      $f->start($coords[0]->start);
+      $f->end($coords[0]->end);
+      $f->strand($coords[0]->strand);
+      $f->slice($self->container->container_slice);
+
+      push @mapped_features, $f;
+    }
+
+  }
+
+  return \@mapped_features;
+}
+
+
+1;
+