view variant_effect_predictor/Bio/EnsEMBL/MappedSlice.pm @ 0:1f6dce3d34e0

Uploaded
author mahtabm
date Thu, 11 Apr 2013 02:01:53 -0400
parents
children
line wrap: on
line source

=head1 LICENSE

  Copyright (c) 1999-2012 The European Bioinformatics Institute and
  Genome Research Limited.  All rights reserved.

  This software is distributed under a modified Apache license.
  For license details, please see

    http://www.ensembl.org/info/about/code_licence.html

=head1 CONTACT

  Please email comments or questions to the public Ensembl
  developers list at <dev@ensembl.org>.

  Questions may also be sent to the Ensembl help desk at
  <helpdesk@ensembl.org>.

=cut

=head1 NAME

Bio::EnsEMBL::MappedSlice - an object representing a mapped slice

=head1 SYNOPSIS

  # get a reference slice
  my $slice =
    $slice_adaptor->fetch_by_region( 'chromosome', 14, 900000, 950000 );

  # create MappedSliceContainer based on the reference slice
  my $msc = Bio::EnsEMBL::MappedSliceContainer->new( -SLICE => $slice );

  # set the adaptor for fetching AssemblySlices
  my $asa = $slice->adaptor->db->get_AssemblySliceAdaptor;
  $msc->set_AssemblySliceAdaptor($asa);

  # add an AssemblySlice to your MappedSliceContainer
  $msc->attach_AssemblySlice('NCBIM36');

  foreach my $mapped_slice ( @{ $msc->get_all_MappedSlices } ) {
    print $mapped_slice->name, "\n";

    foreach my $sf ( @{ $mapped_slice->get_all_SimpleFeatures } ) {
      print "  ", &to_string($sf), "\n";
    }
  }

=head1 DESCRIPTION

NOTE: this code is under development and not fully functional nor tested
yet.  Use only for development.

This object represents a mapped slice, i.e. a slice that's attached
to a reference slice and a mapper to convert coordinates to/from the
reference. The attachment is done via a MappedSliceContainer which
has the reference slice and the "container slice" defining the common
coordinate system for all MappedSlices.

A MappedSlice is supposed to behave as close to a Bio::EnsEMBL::Slice
as possible. Most Slice methods are implemented in MappedSlice and will
return an equivalent value to what Slice does. There are some exceptions
of unimplemented methods, either because there is no useful equivalent
for a MappedSlice to do, or they are too complicated.

Not supported Bio::EnsEMBL::Slice methods:

  All deprecated methods
  All Bio::PrimarySeqI compliance methods
  expand
  get_generic_features
  get_seq_region_id
  seq_region_Slice

Not currently supported but maybe should/could:

  calculate_pi
  calculate_theta
  get_base_count
  get_by_Individual
  get_by_strain
  invert

Internally, a MappedSlice is a collection of Bio::EnsEMBL::Slices and
associated Bio::EnsEMBL::Mappers which map the slices to the common
container coordinate system.

MappedSlices are usually created and attached to a MappedSliceContainer
by an adaptor/factory.

=head1 METHODS

  new
  add_Slice_Mapper_pair
  get_all_Slice_Mapper_pairs
  adaptor
  container
  name
  seq_region_name
  start
  end
  strand
  length
  seq_region_length
  centrepoint
  coord_system
  coord_system_name
  is_toplevel
  seq (not implemented yet)
  subseq (not implemented yet)
  get_repeatmasked_seq (not implemented yet)
  sub_MappedSlice (not implemented yet)
  project (not implemented yet)

=head1 RELATED MODULES

  Bio::EnsEMBL::MappedSlice
  Bio::EnsEMBL::DBSQL::AssemblySliceAdaptor
  Bio::EnsEMBL::Compara::AlignSlice
  Bio::EnsEMBL::Compara::AlignSlice::Slice
  Bio::EnsEMBL::AlignStrainSlice
  Bio::EnsEMBL::StrainSlice

=cut

package Bio::EnsEMBL::MappedSlice;

use strict;
use warnings;
no warnings 'uninitialized';

use Bio::EnsEMBL::Utils::Argument qw(rearrange);
use Bio::EnsEMBL::Utils::Exception qw(throw warning);
use Bio::EnsEMBL::Mapper;
use Scalar::Util qw(weaken);

use vars qw($AUTOLOAD);


=head2 new

  Arg [ADAPTOR]   : Adaptor $adaptor - an adaptor of the appropriate type
  Arg [CONTAINER] : Bio::EnsEMBL::MappedSliceContainer $container - the
                    container this MappedSlice is attached to
  Arg [NAME]      : String $name - name
  Example     : my $mapped_slice = Bio::EnsEMBL::MappedSlice->new(
                  -ADAPTOR   => $adaptor,
                  -CONTAINER => $container,
                  -NAME      => $name,
                );
  Description : Constructor. Usually you won't call this method manually, but
                the MappedSlice will be constructed by an adaptor/factory.
  Return type : Bio::EnsEMBL::MappedSlice
  Exceptions  : thrown on wrong or missing arguments
  Caller      : general, MappedSlice adaptors
  Status      : At Risk
              : under development

=cut

sub new {
  my $caller = shift;
  my $class = ref($caller) || $caller;

  my ($adaptor, $container, $name) =
    rearrange([qw(ADAPTOR CONTAINER NAME)], @_);

  # arguement check
  unless ($container and ref($container) and
          $container->isa('Bio::EnsEMBL::MappedSliceContainer')) {
    throw("Need a MappedSliceContainer.");
  }

  my $self = {};
  bless ($self, $class);

  #
  # initialise object
  #
  
  # need to weaken reference to prevent circular reference
  weaken($self->{'container'} = $container);

  $self->adaptor($adaptor) if (defined($adaptor));
  $self->{'name'} = $name if (defined($name));

  $self->{'slice_mapper_pairs'} = [];

  return $self;
}


=head2 add_Slice_Mapper_pair 

  Arg[1]      : Bio::EnsEMBL::Slice $slice - slice to add
  Arg[2]      : Bio::EnsEMBL::Mapper $mapper - the mapper for this slice
  Example     : $mapped_slice->add_Slice_Mapper_pair($slice, $mapper);
  Description : Adds a native slice and a corresponding mapper to map to/from
                the artificial container coord system.
  Return type : listref of Bio::EnsEMBL::MappedSlice
  Exceptions  : thrown on wrong or missing arguments
  Caller      : general, MappedSlice adaptors
  Status      : At Risk
              : under development

=cut

sub add_Slice_Mapper_pair {
  my $self = shift;
  my $slice = shift;
  my $mapper = shift;

  # argument check
  unless ($slice and ref($slice) and ($slice->isa('Bio::EnsEMBL::Slice') or $slice->isa('Bio::EnsEMBL::LRGSlice')) ) {
    throw("You must provide a slice.");
  }

  unless ($mapper and ref($mapper) and $mapper->isa('Bio::EnsEMBL::Mapper')) {
    throw("You must provide a mapper.");
  }

  push @{ $self->{'slice_mapper_pairs'} }, [ $slice, $mapper ];
  
  return $self->{'slice_mapper_pairs'};
}


=head2 get_all_Slice_Mapper_pairs 

  Example     : foreach my $pair (@{ $self->get_all_Slice_Mapper_pairs }) {
                  my ($slice, $mapper) = @$pair;

                  # get container coordinates
                  my @coords = $mapper->map_coordinates(
                    $slice->seq_region_name,
                    $slice->start,
                    $slice->end,
                    $slice->strand,
                    'mapped_slice'
                  );

                  # ....
                }
  Description : Gets all Slice/Mapper pairs this MappedSlice is composed of.
                Each slice (and features on it) can be mapped onto the
                artificial container coord system using the mapper.
  Return type : listref of listref of a Bio::EnsEMBL::Slice and
                Bio::EnsEMBL::Mapper pair
  Exceptions  : none
  Caller      : general
  Status      : At Risk
              : under development

=cut

sub get_all_Slice_Mapper_pairs {
  my $self = shift;
  return $self->{'slice_mapper_pairs'};
}


=head2 adaptor

  Arg[1]      : (optional) Adaptor $adaptor - the adaptor/factory for this
                object
  Example     : $mapped_slice->adaptor($assembly_slice_adaptor);
  Description : Getter/setter for the adaptor/factory for this object.
  Return type : Adaptor of appropriate type
  Exceptions  : none
  Caller      : general
  Status      : At Risk
              : under development

=cut

sub adaptor {
  my $self = shift;
  weaken($self->{'adaptor'} = shift) if (@_);
  return $self->{'adaptor'};
}


=head2 container

  Arg[1]      : (optional) Bio::EnsEMBL::MappedSliceContainer - the container
                this object is attached to
  Example     : my $container = $mapped_slice->container;
                print $container->ref_slice->name, "\n";
  Description : Getter/setter for the container this object is attached to. The
                container will give you access to the reference slice, a common
                artificial container slice, and a mapper to map to it from the
                container coord system.

                The implementation uses a weak reference to attach the container
                since the container holds a list of MappedSlices itself.
  Return type : Bio::EnsEMBL::MappedSliceContainer
  Exceptions  : none
  Caller      : general
  Status      : At Risk
              : under development

=cut

sub container {
  my $self = shift;
  weaken($self->{'container'} = shift) if (@_);
  return $self->{'container'};
}


=head2 name

  Arg[1]      : String - the name of this object
  Example     : my $name = $mapped_slice->container->ref_slice->name .
                  ":mapped_" . $ident_string;
                $mapped_slice->name($name);
  Description : Getter/setter for this object's name
  Return type : String
  Exceptions  : none
  Caller      : general
  Status      : At Risk
              : under development

=cut

sub name {
  my $self = shift;
  $self->{'name'} = shift if (@_);
  return $self->{'name'};
}


=head2 seq_region_name

  Example     : my $sr_name = $mapped_slice->seq_region_name;
  Description : Returns the seq_region name of the reference slice.
  Return type : String
  Exceptions  : none
  Caller      : general
  Status      : At Risk
              : under development

=cut

sub seq_region_name {
  my $self = shift;
  return $self->container->ref_slice->seq_region_name;
}


=head2 start

  Example     : my $start = $mapped_slice->start;
  Description : Returns the start of the container slice.
  Return type : Int
  Exceptions  : none
  Caller      : general
  Status      : At Risk
              : under development

=cut

sub start {
  my $self = shift;
  return $self->container->container_slice->start;
}


=head2 end

  Example     : my $end = $mapped_slice->end;
  Description : Returns the end of the container slice.
  Return type : Int
  Exceptions  : none
  Caller      : general
  Status      : At Risk
              : under development

=cut

sub end {
  my $self = shift;
  return $self->container->container_slice->end;
}


=head2 strand

  Example     : my $strand = $mapped_slice->strand;
  Description : Returns the strand of the container slice.
  Return type : Int
  Exceptions  : none
  Caller      : general
  Status      : At Risk
              : under development

=cut

sub strand {
  my $self = shift;
  return $self->container->container_slice->strand;
}


=head2 length

  Example     : my $length = $mapped_slice->length;
  Description : Returns the length of the container slice
  Return type : Int
  Exceptions  : none
  Caller      : general
  Status      : At Risk
              : under development

=cut

sub length {
  my $self = shift;
  return $self->container->container_slice->length;
}


=head2 seq_region_length

  Example     : my $sr_length = $mapped_slice->seq_region_length;
  Description : Returns the seq_region length of the reference slice.
  Return type : Int
  Exceptions  : none
  Caller      : general
  Status      : At Risk
              : under development

=cut

sub seq_region_length {
  my $self = shift;
  return $self->container->ref_slice->seq_region_length;
}


=head2 centrepoint

  Example     : my $centrepoint = $mapped_slice->centrepoint;
  Description : Returns the centrepoint of the container slice.
  Return type : Int
  Exceptions  : none
  Caller      : general
  Status      : At Risk
              : under development

=cut

sub centrepoint {
  my $self = shift;
  return $self->container->container_slice->centrepoint;
}


=head2 coord_system

  Example     : my $cs = $mapped_slice->coord_system;
  Description : Returns the coord system of the container slice.
  Return type : Bio::EnsEMBL::CoordSystem
  Exceptions  : none
  Caller      : general
  Status      : At Risk
              : under development

=cut

sub coord_system {
  my $self = shift;
  return $self->container->container_slice->coord_system;
}

=head2 coord_system_name

  Example     : my $cs_name = $mapped_slice->coord_system_name;
  Description : Returns the coord system name of the container slice.
  Return type : Int
  Exceptions  : none
  Caller      : general
  Status      : At Risk
              : under development

=cut

sub coord_system_name {
  my $self = shift;
  return $self->container->container_slice->coord_system_name;
}

=head2 is_toplevel

  Example     : my $toplevel_flag = $mapped_slice->is_toplevel;
  Description : Returns weather the container slice is toplevel.
  Return type : Int
  Exceptions  : none
  Caller      : general
  Status      : At Risk
              : under development

=cut

sub is_toplevel {
  my $self = shift;
  return $self->container->container_slice->is_toplevel;
}


=head2 seq

  Example     : my $seq = $mapped_slice->seq()
  Description : Retrieves the expanded sequence of this mapped slice,
                including "-" characters where there are inserts in any other
                mapped slices. This will align with the sequence returned by
                the container's seq() method.
  Return type : String
  Exceptions  : none
  Caller      : general
  Status      : At Risk
              : under development

=cut

sub seq {
  my $self = shift;
  
  # create an empty string
  my $ms_seq = '';
  
  # this coord represents the current position in the MS sequence
  my $start = 0;

  # get slice/mapper pairs from mapped slice (usually only one anyway)
  foreach my $pair(@{$self->get_all_Slice_Mapper_pairs()}) {
    my ($s, $m) = @$pair;
    
    # make sure to send extra args
    # eg strain slices might need read coverage filtering
    my $seq = $s->seq(@_);
    
    # project from mapped slice to reference slice using the mapper
    foreach my $ref_coord($m->map_coordinates('mapped_slice', 1, CORE::length($seq), $s->strand, 'mapped_slice')) {
    
      # normal coord
      if(!$ref_coord->isa('Bio::EnsEMBL::Mapper::IndelCoordinate') && !$ref_coord->isa('Bio::EnsEMBL::Mapper::Gap')) {
      
        # project from reference slice to container slice using the container's mapper
        foreach my $ms_coord($self->container->mapper->map_coordinates($self->container->ref_slice->seq_region_name, $ref_coord->start, $ref_coord->end, $ref_coord->strand, 'ref_slice')) {
          
          # normal coord
          if(!$ms_coord->isa('Bio::EnsEMBL::Mapper::IndelCoordinate') && !$ms_coord->isa('Bio::EnsEMBL::Mapper::Gap')) {
            $ms_seq .= substr($seq, $start, $ms_coord->length);
            
            $start += $ms_coord->length();
          }
          
          # indel coord
          else {
            $ms_seq .= '-' x $ms_coord->length();
          }
        }
      }
      
      # indel / gap
      else {
      
        # if there's a gap here aswell, add corresponding sequence
        if($ref_coord->gap_length > 0) {
          $ms_seq .= substr($seq, $start, $ref_coord->gap_length);
          $start += $ref_coord->gap_length;
        }
        
        # add "-" to the sequence
        $ms_seq .= '-' x ($ref_coord->length() - $ref_coord->gap_length());
      }
    }
  }
  
  return $ms_seq;
}

sub subseq {
}

sub get_repeatmasked_seq {
}

sub sub_MappedSlice {
}

sub project {
}


=head2 AUTOLOAD

  Arg[1..N]   : Arguments passed on to the calls on the underlying slices.
  Example     : my @simple_features = @{ $mapped_slice->get_all_SimpleFeatures };
  Description : Aggregate data gathered from composing Slices.
                This will call Slice->get_all_* and combine the results.
                Coordinates will be transformed to be on the container slice
                coordinate system.

                Calls involving DAS features are skipped since the DAS adaptor
                handles coordinate conversions natively.
  Return type : listref of features (same type as corresponding Slice method)
  Exceptions  : none
  Caller      : general
  Status      : At Risk
              : under development

=cut

sub AUTOLOAD {
  my $self = shift;
  
  my $method = $AUTOLOAD;
  $method =~ s/.*:://;
  
  # AUTOLOAD should only deal with get_all_* methods
  return unless ($method =~ /^get_all_/);

  # skip DAS methods
  return if ($method =~ /DAS/);
  
  my @mapped_features = ();

  foreach my $pair (@{ $self->get_all_Slice_Mapper_pairs }) {
    my ($slice, $mapper) = @$pair;
    #warn $slice->name;

    # call $method on each native slice composing the MappedSlice
    my @features = @{ $slice->$method(@_) };
    
    # map features onto the artificial container coordinate system
    foreach my $f (@features) {
      
      my @coords = $mapper->map_coordinates(
          $f->slice->seq_region_name,
          $f->start,
          $f->end,
          $f->strand,
          'mapped_slice'
      );

      # sanity check
      if (scalar(@coords) > 1) {
        warning("Got more than one Coordinate returned, expected only one!\n");
      }

      $f->start($coords[0]->start);
      $f->end($coords[0]->end);
      $f->strand($coords[0]->strand);
      $f->slice($self->container->container_slice);

      push @mapped_features, $f;
    }

  }

  return \@mapped_features;
}


1;