view variant_effect_predictor/Bio/EnsEMBL/AlignStrainSlice.pm @ 3:d30fa12e4cc5 default tip

Merge heads 2:a5976b2dce6f and 1:09613ce8151e which were created as a result of a recently fixed bug.
author devteam <devteam@galaxyproject.org>
date Mon, 13 Jan 2014 10:38:30 -0500
parents 1f6dce3d34e0
children
line wrap: on
line source

=head1 LICENSE

  Copyright (c) 1999-2012 The European Bioinformatics Institute and
  Genome Research Limited.  All rights reserved.

  This software is distributed under a modified Apache license.
  For license details, please see

    http://www.ensembl.org/info/about/code_licence.html

=head1 CONTACT

  Please email comments or questions to the public Ensembl
  developers list at <dev@ensembl.org>.

  Questions may also be sent to the Ensembl help desk at
  <helpdesk@ensembl.org>.

=cut

=head1 NAME

Bio::EnsEMBL::AlignStrainSlice - Represents the slice of the genome aligned with certain strains (applying the variations/indels)

=head1 SYNOPSIS

  $sa = $db->get_SliceAdaptor;

  $slice =
    $sa->fetch_by_region( 'chromosome', 'X', 1_000_000, 2_000_000 );

  $strainSlice1 = $slice->get_by_Strain($strain_name1);
  $strainSlice2 = $slice->get_by_Strain($strain_name2);

  my @strainSlices;
  push @strainSlices, $strainSlice1;
  push @strainSlices, $strainSlice2;

  $alignSlice = Bio::EnsEMBL::AlignStrainSlice->new(
    -SLICE   => $slice,
    -STRAINS => \@strainSlices
  );

  # Get coordinates of variation in alignSlice
  my $alleleFeatures = $strainSlice1->get_all_AlleleFeature_Slice();

  foreach my $af ( @{$alleleFeatures} ) {
    my $new_feature = $alignSlice->alignFeature( $af, $strainSlice1 );
    print( "Coordinates of the feature in AlignSlice are: ",
      $new_feature->start, "-", $new_feature->end, "\n" );
  }

=head1 DESCRIPTION

A AlignStrainSlice object represents a region of a genome align for
certain strains.  It can be used to align certain strains to a reference
slice.

=head1 METHODS

=cut

package Bio::EnsEMBL::AlignStrainSlice;
use strict;

use Bio::EnsEMBL::Utils::Argument qw(rearrange);
use Bio::EnsEMBL::Mapper;
use Bio::EnsEMBL::Mapper::RangeRegistry;
use Bio::EnsEMBL::Utils::Exception qw(throw deprecate warning);

=head2 new

    Arg[1]      : Bio::EnsEMBL::Slice $Slice
    Arg[2]      : listref of Bio::EnsEMBL::StrainSlice $strainSlice
    Example     : push @strainSlices, $strainSlice1;
                  push @strainSlices, $strainSlice2;
                  .....
                  push @strainSlices, $strainSliceN;
                  $alignStrainSlice = Bio::EnsEMBL::AlignStrainSlice->new(-SLICE => $slice,
									  -STRAIN => \@strainSlices);
    Description : Creates a new Bio::EnsEMBL::AlignStrainSlice object that will contain a mapper between
                  the Slice object, plus all the indels from the different Strains
    ReturnType  : Bio::EnsEMBL::AlignStrainSlice
    Exceptions  : none
    Caller      : general

=cut

sub new{
    my $caller = shift;
    my $class = ref($caller) || $caller;

    my ($slice, $strainSlices) = rearrange([qw(SLICE STRAINS)],@_);

    #check that both StrainSlice and Slice are identical (must have been defined in the same slice)
    foreach my $strainSlice (@{$strainSlices}){
	if (($strainSlice->start != $slice->start) || ($strainSlice->end != $slice->end) || ($strainSlice->seq_region_name ne $slice->seq_region_name)){
	    warning("Not possible to create Align object from different Slices");
	    return [];
	}
    }

    return bless{'slice' => $slice,
		 'strains' => $strainSlices}, $class;
}

=head2 alignFeature

    Arg[1]      : Bio::EnsEMBL::Feature $feature
    Arg[2]      : Bio::EnsEMBL::StrainSlice $strainSlice
    Example     : $new_feature = $alignSlice->alignFeature($feature, $strainSlice);
    Description : Creates a new Bio::EnsEMBL::Feature object that aligned to 
                  the AlignStrainSlice object.
    ReturnType  : Bio::EnsEMBL::Feature
    Exceptions  : none
    Caller      : general

=cut

sub alignFeature{
    my $self = shift;
    my $feature = shift;

    #check that the object is a Feature
    if (!ref($feature) || !$feature->isa('Bio::EnsEMBL::Feature')){	
	throw("Bio::EnsEMBL::Feature object expected");
    }
    #and align it to the AlignStrainSlice object
    my $mapper_strain = $self->mapper();

    my @results;
  
    if ($feature->start > $feature->end){
	#this is an Indel, map it with the special method
	@results = $mapper_strain->map_indel('Slice',$feature->start, $feature->end, $feature->strand,'Slice');
	#and modify the coordinates according to the length of the indel
	$results[0]->end($results[0]->start + $feature->length_diff -1);
    }
    else{
	@results = $mapper_strain->map_coordinates('Slice',$feature->start, $feature->end, $feature->strand,'Slice');
     }
    #get need start and end of the new feature, aligned ot AlignStrainSlice
    my @results_ordered = sort {$a->start <=> $b->start} @results;

    my %new_feature = %$feature; #make a shallow copy of the Feature
    $new_feature{'start'}= $results_ordered[0]->start();
    $new_feature{'end'} = $results_ordered[-1]->end();  #get last element of the array, the end of the slice

    return bless \%new_feature, ref($feature);
    
}


#getter for the mapper between the Slice and the different StrainSlice objects
sub mapper{
    my $self = shift;
    
    if (!defined $self->{'mapper'}){
	#get the alleleFeatures in all the strains
	if (!defined $self->{'indels'}){
	    #when the list of indels is not defined, get them
	    $self->{'indels'} = $self->_get_indels();
	}
	my $indels = $self->{'indels'}; #gaps in reference slice
	my $mapper = Bio::EnsEMBL::Mapper->new('Slice', 'AlignStrainSlice');
	my $start_slice = 1;
	my $end_slice;
	my $start_align = 1;
	my $end_align;
	my $length_indel = 0;
	my $length_acum_indel = 0;
	foreach my $indel (@{$indels}){
	    $end_slice = $indel->[0] - 1;
	    $end_align = $indel->[0] - 1 + $length_acum_indel; #we must consider length previous indels

	    $length_indel = $indel->[1] - $indel->[0] + 1;
	    

	    $mapper->add_map_coordinates('Slice',$start_slice,$end_slice,1,'AlignStrainSlice',$start_align,$end_align);
	    
	    $mapper->add_indel_coordinates('Slice',$end_slice + 1,$end_slice,1,'AlignStrainSlice',$end_align + 1,$end_align + $length_indel);
	    $start_slice = $end_slice + 1;
	    $start_align = $indel->[1] + 1 + $length_acum_indel; #we must consider legnth previous indels
	    
	    $length_acum_indel += $length_indel;
	}
	if ($start_slice <= $self->length){
	    $mapper->add_map_coordinates('Slice',$start_slice,$self->length,1,'AlignStrainSlice',$start_align,$start_align + $self->length - $start_slice)
	}
	$self->{'mapper'} = $mapper;
	
    }
    return $self->{'mapper'};
}

#returns the length of the AlignSlice: length of the Slice plus the gaps
sub length{
    my $self = shift;
    my $length;
    if (!defined $self->{'indels'}){
	#when the list of indels is not defined, get them
	$self->{'indels'} = $self->_get_indels();	
    }
    $length = $self->{'slice'}->length;
    map {$length += ($_->[1] - $_->[0] + 1)} @{$self->{'indels'}};
    return $length;
}

=head2 strains

  Args       : None
  Description: Returns list with all strains used to
               define this AlignStrainSlice object
  Returntype : listref of Bio::EnsEMBL::StrainSlice objects
  Exceptions : none
  Caller     : general

=cut

sub strains{
    my $self = shift;

    return $self->{'strains'};
}

=head2 Slice

  Args       : None
  Description: Returns slice where the AlignStrainSlice
               is defined
  Returntype : Bio::EnsEMBL::Slice object
  Exceptions : none
  Caller     : general

=cut

sub Slice{
    my $self = shift;
    return $self->{'slice'};
}
#method to retrieve, in order, a list with all the indels in the different strains
sub _get_indels{
    my $self = shift;
    
    #go throuh all the strains getting ONLY the indels (length_diff <> 0)
    my @indels;
    foreach my $strainSlice (@{$self->strains}){
	my $differences = $strainSlice->get_all_AlleleFeatures_Slice(); #need to check there are differences....
	foreach my $af (@{$differences}){
	    #if length is 0, but is a -, it is still a gap in the strain
	    if (($af->length_diff != 0) || ($af->length_diff == 0 && $af->allele_string =~ /-/)){
		push @indels, $af;
	    }
	}
    }
    #need to overlap the gaps using the RangeRegistry module
    my $range_registry = Bio::EnsEMBL::Mapper::RangeRegistry->new();
    foreach my $indel (@indels){
	#in the reference and the strain there is a gap
	$range_registry->check_and_register(1,$indel->start,$indel->start) if ($indel->length_diff == 0);
	#deletion in reference slice
	$range_registry->check_and_register(1,$indel->start, $indel->end ) if ($indel->length_diff < 0);
	#insertion in reference slice
	$range_registry->check_and_register(1,$indel->start,$indel->start + $indel->length_diff - 1) if ($indel->length_diff > 0);
    }
    #and return all the gap coordinates....
    return $range_registry->get_ranges(1);
}

=head2 get_all_Slices

  Args       : none
  Description: This Slice is made of several Bio::EnsEMBL::StrainSlices
               sequence. This method returns these StrainSlices (or part of
               them) with the original coordinates 
  Returntype : listref of Bio::EnsEMBL::StrainSlice objects
  Exceptions : end should be at least as big as start
  Caller     : general

=cut

sub get_all_Slices {
  my $self = shift;

  my @strains;
  #add the reference strain
  my $dbVar = $self->Slice->adaptor->db->get_db_adaptor('variation');
  unless($dbVar) {
	warning("Variation database must be attached to core database to " .
		"retrieve variation information" );
	return '';
    }
  my $indAdaptor = $dbVar->get_IndividualAdaptor();
  my $ref_name =  $indAdaptor->get_reference_strain_name;
  my $ref_strain = Bio::EnsEMBL::StrainSlice->new(
					  -START   => $self->Slice->{'start'},
					  -END     => $self->Slice->{'end'},
					  -STRAND  => $self->Slice->{'strand'},
					  -ADAPTOR => $self->Slice->adaptor(),
					  -SEQ    => $self->Slice->{'seq'},
					  -SEQ_REGION_NAME => $self->Slice->{'seq_region_name'},
					  -SEQ_REGION_LENGTH => $self->Slice->{'seq_region_length'},
					  -COORD_SYSTEM    => $self->Slice->{'coord_system'},
					  -STRAIN_NAME     => $ref_name,
					   );
  #this is a fake reference alisce, should not contain any alleleFeature
  undef $ref_strain->{'alleleFeatures'};
  
  push @strains, @{$self->strains};
  my $new_feature;
  my $indel;
  my $aligned_features;
  my $indels = (); #reference to a hash containing indels in the different strains
  #we need to realign all Features in the different Slices and add '-' in the reference Slice
  foreach my $strain (@{$self->strains}){
      foreach my $af (@{$strain->get_all_AlleleFeatures_Slice()}){
	  $new_feature = $self->alignFeature($af); #align feature in AlignSlice coordinates
	  push @{$aligned_features},$new_feature if($new_feature->seq_region_start <= $strain->end); #some features might map outside slice
	  if ($af->start != $af->end){ #an indel, need to add to the reference, and realign in the strain	     
              #make a shallow copy of the indel - clear it first!
          $indel = undef;
	      %{$indel} = %{$new_feature};
	      bless $indel, ref($new_feature);
	      $indel->allele_string('-');
	      push @{$indels},$indel; #and include in the list of potential indels
	  }
      }
      next if (!defined $aligned_features);
      undef $strain->{'alleleFeatures'}; #remove all features before adding new aligned features
      push @{$strain->{'alleleFeatures'}}, @{$aligned_features};
      undef $aligned_features;
  }  
  push @strains, $ref_strain;
  #need to add indels in the different strains, if not present
  if (defined $indels){
      foreach my $strain (@strains){
	  #inlcude the indels in the StrainSlice object
	  push @{$strain->{'alignIndels'}},@{$indels};
      }
  }
  return \@strains;
}

1;