view variant_effect_predictor/Bio/EnsEMBL/StrainSlice.pm @ 3:d30fa12e4cc5 default tip

Merge heads 2:a5976b2dce6f and 1:09613ce8151e which were created as a result of a recently fixed bug.
author devteam <devteam@galaxyproject.org>
date Mon, 13 Jan 2014 10:38:30 -0500
parents 1f6dce3d34e0
children
line wrap: on
line source

=head1 LICENSE

  Copyright (c) 1999-2012 The European Bioinformatics Institute and
  Genome Research Limited.  All rights reserved.

  This software is distributed under a modified Apache license.
  For license details, please see

    http://www.ensembl.org/info/about/code_licence.html

=head1 CONTACT

  Please email comments or questions to the public Ensembl
  developers list at <dev@ensembl.org>.

  Questions may also be sent to the Ensembl help desk at
  <helpdesk@ensembl.org>.

=cut

=head1 NAME

Bio::EnsEMBL::StrainSlice - SubClass of the Slice. Represents the slice
of the genome for a certain strain (applying the variations)

=head1 SYNOPSIS

  $sa = $db->get_SliceAdaptor;

  $slice =
    $sa->fetch_by_region( 'chromosome', 'X', 1_000_000, 2_000_000 );

  $strainSlice = $slice->get_by_strain($strain_name);

  # get the sequence from the Strain Slice
  my $seq = $strainSlice->seq();
  print $seq;

  # get allele features between this StrainSlice and the reference
  my $afs = $strainSlice->get_all_AlleleFeatures_Slice();
  foreach my $af ( @{$afs} ) {
    print "AlleleFeature in position ", $af->start, "-", $af->end,
      " in strain with allele ", $af->allele_string, "\n";
  }

  # compare a strain against another strain
  my $strainSlice_2 = $slice->get_by_strain($strain_name_2);
  my $differences =
    $strainSlice->get_all_differences_StrainSlice($strainSlice_2);

  foreach my $difference ( @{$differences} ) {
    print "Difference in position ", $difference->start, "-",
      $difference->end(),           " in strain with allele ",
      $difference->allele_string(), "\n";
  }

=head1 DESCRIPTION

A StrainSlice object represents a region of a genome for a certain
strain.  It can be used to retrieve sequence or features from a strain.

=head1 METHODS

=cut

package Bio::EnsEMBL::StrainSlice;
use vars qw(@ISA);
use strict;

use Bio::EnsEMBL::Utils::Argument qw(rearrange);
use Bio::EnsEMBL::Utils::Sequence qw(reverse_comp);
use Bio::EnsEMBL::Slice;
use Bio::EnsEMBL::Mapper;
use Bio::EnsEMBL::Utils::Exception qw(throw deprecate warning);

@ISA = qw(Bio::EnsEMBL::Slice);


=head2 new

    Arg[1]      : Bio::EnsEMBL::Slice $slice
    Arg[2]      : string $strain_name
    Example     : $strainSlice = Bio::EnsEMBL::StrainSlice->new(-.... => ,
							      -strain_name => $strain_name);
    Description : Creates a new Bio::EnsEMBL::StrainSlice object that will contain a shallow copy of the
                  Slice object, plus additional information such as the Strain this Slice refers to
                  and listref of Bio::EnsEMBL::Variation::AlleleFeatures of differences with the
                  reference sequence
    ReturnType  : Bio::EnsEMBL::StrainSlice
    Exceptions  : none
    Caller      : general

=cut

sub new{
    my $caller = shift;
    my $class = ref($caller) || $caller;

    my ($strain_name) = rearrange(['STRAIN_NAME'],@_);

    my $self = $class->SUPER::new(@_);

    $self->{'strain_name'} = $strain_name;

    if(!$self->adaptor()) {
	warning('Cannot get new StrainSlice features without attached adaptor');
	return '';
    }
    my $variation_db = $self->adaptor->db->get_db_adaptor('variation');

    unless($variation_db) {
	warning("Variation database must be attached to core database to " .
		"retrieve variation information" );
	return '';
    }
    
    my $af_adaptor = $variation_db->get_AlleleFeatureAdaptor;
    
    if( $af_adaptor ) {
	#get the Individual for the given strain
	my $ind_adaptor = $variation_db->get_IndividualAdaptor;

	if ($ind_adaptor){
	    my $individual = shift @{$ind_adaptor->fetch_all_by_name($self->{'strain_name'})}; #the name should be unique for a strain
	    #check that the individua returned isin the database

            if (defined $individual){
                my $allele_features = $af_adaptor->fetch_all_by_Slice($self,$individual);
                #warning("No strain genotype data available for Slice ".$self->name." and Strain ".$individual->name) if ! defined $allele_features->[0];
                my $vf_ids = {}; #hash containing the relation vf_id->af
		$self->{'_strain'} = $individual;		
		map {defined $_->{'_variation_feature_id'} ? $vf_ids->{$_->{'_variation_feature_id'}} = $_ : ''
} @{$allele_features};
#		my $new_allele_features = $self->_filter_af_by_coverage($allele_features);
#		$self->{'alleleFeatures'} = $new_allele_features;
		$self->{'alleleFeatures'} = $allele_features || [];
		$self->{'_vf_ids'} = $vf_ids;
		return $self;
	    }
	    else{ 
		warning("Strain ($self->{strain_name}) not in the database");
		return $self;
	    }
	}
	else{
	    warning("Not possible to retrieve IndividualAdaptor from the variation database");
	    return '';
	}
    } else {
	warning("Not possible to retrieve VariationFeatureAdaptor from variation database");
	return '';
    }
}

=head2 _filter_af_by_coverage

    Arg [1]     : listref to Bio::EnsEMBL::Variation::AlleleFeatures  $allele_features
    Example     : my $new_list_allele_features = $strainSlice->_filter_af_by_coverage($allele_features);
    Description : For a list of allele features, gets a new list where they are filter depending on coverage
    ReturnType  : listref of Bio::EnsEMBL::Variation::AlleleFeature
    Exceptions  : none
    Caller      : internal function

=cut

sub _filter_af_by_coverage{
    my $self = shift;
    my $allele_features = shift;

    my $variation_db = $self->adaptor->db->get_db_adaptor('variation');

    unless($variation_db) {
	warning("Variation database must be attached to core database to " .
		"retrieve variation information" );
	return '';
    }
    
    my $rc_adaptor = $variation_db->get_ReadCoverageAdaptor();
    #this is ugly, but ReadCoverage is always defined in the positive strand

### EK : - it looks like the arguments to fetch_all_by_Slice_Sample_depth have changed
###  passing 1 will only get you the coverage of level 1
###  by omitting the parameter we take into account all coverage regions 
#    my $rcs = $rc_adaptor->fetch_all_by_Slice_Sample_depth($self,$self->{'_strain'},1);
    my $rcs = $rc_adaptor->fetch_all_by_Slice_Sample_depth($self,$self->{'_strain'});
    my $new_af;
    foreach my $af (@{$allele_features}){
	foreach my $rc (@{$rcs}){
	    if ($af->start <= $rc->end and $af->start >= $rc->start){
		push @{$new_af}, $af;
		last;
	    }
	}
    }
    
    return $new_af;
}


=head2 strain_name

    Arg [1]     : (optional) string $strain_name
    Example     : my $strain_name = $strainSlice->strain_name();
    Description : Getter/Setter for the name of the strain
    ReturnType  : string
    Exceptions  : none
    Caller      : general

=cut

sub strain_name{
   my $self = shift;
   if (@_){
       $self->{'strain_name'} = shift @_;
   }
   return $self->{'strain_name'};
}


=head2 display_Slice_name

    Args        : none
    Example     : my $strain_name = $strainSlice->display_Slice_name();
    Description : Getter for the name of the strain
    ReturnType  : string
    Exceptions  : none
    Caller      : webteam

=cut

sub display_Slice_name{
    my $self = shift;

    return $self->strain_name;
}

=head2 seq

  Arg [1]    : int $with_coverage (optional)
  Example    : print "SEQUENCE = ", $strainSlice->seq();
  Description: Returns the sequence of the region represented by this
               slice formatted as a string in the strain. If flag with_coverage
               is set to 1, returns sequence if there is coverage in the region
  Returntype : string
  Exceptions : none
  Caller     : general

=cut

sub seq {
  my $self = shift;
  my $with_coverage = shift;

  $with_coverage ||= 0;

  # special case for in-between (insert) coordinates
  return '' if($self->start() == $self->end() + 1);

  return $self->{'seq'} if($self->{'seq'});

  if($self->adaptor()) {
    my $seqAdaptor = $self->adaptor()->db()->get_SequenceAdaptor();
    my $reference_sequence = $seqAdaptor->fetch_by_Slice_start_end_strand($self,1,undef,1); #get the reference sequence for that slice
	
    #apply all differences to the reference sequence
    #first, in case there are any indels, create the new sequence (containing the '-' bases)
   # sort edits in reverse order to remove complication of
    # adjusting downstream edits
    my @indels_ordered = sort {$b->start() <=> $a->start()} @{$self->{'alignIndels'}} if (defined $self->{'alignIndels'});

    foreach my $vf (@indels_ordered){
	$vf->apply_edit($reference_sequence); #change, in the reference sequence, the vf
    }
	
    #need to find coverage information if diffe
    # sort edits in reverse order to remove complication of
    # adjusting downstream edits
    my @variation_features_ordered = sort {$b->start() <=> $a->start()} @{$self->{'alleleFeatures'}} if (defined $self->{'alleleFeatures'});

    foreach my $vf (@variation_features_ordered){
	$vf->apply_edit($reference_sequence); #change, in the reference sequence, the vf
    }
	
    #need to find coverage information if different from reference
    my $indAdaptor = $self->adaptor->db->get_db_adaptor('variation')->get_IndividualAdaptor;
    my $ref_strain = $indAdaptor->get_reference_strain_name;
    $self->_add_coverage_information($reference_sequence) if ($with_coverage == 1 && $self->strain_name ne $ref_strain);
    return substr(${$reference_sequence},0,1) if ($self->length == 1); 
    return substr(${$reference_sequence},0,$self->expanded_length); #returns the reference sequence, applying the variationFeatures. Remove additional bases added due to indels
  }

  # no attached sequence, and no db, so just return Ns
  return 'N' x $self->length();
}

sub expanded_length() {
	my $self = shift;
	
	my $length = $self->SUPER::length();
	
	foreach my $af(@{$self->{'alleleFeatures'}}) {
		$length += $af->length_diff() if $af->length_diff > 0;
	}
	
	return $length;
}



sub _add_coverage_information{
    my $self = shift;
    my $reference_sequence = shift;

    my $variation_db = $self->adaptor->db->get_db_adaptor('variation');

    unless($variation_db) {
	warning("Variation database must be attached to core database to " .
		"retrieve variation information" );
	return '';
    }
    
    my $rc_adaptor = $variation_db->get_ReadCoverageAdaptor();
### EK : - it looks like the arguments to fetch_all_by_Slice_Sample_depth have changed
###  passing 1 will only get you the coverage of level 1
###  by omitting the parameter we take into account all coverage regions 
#    my $rcs = $rc_adaptor->fetch_all_by_Slice_Sample_depth($self,$self->{'_strain'},1);
    my $rcs = $rc_adaptor->fetch_all_by_Slice_Sample_depth($self,$self->{'_strain'});
    my $rcs_sorted;
    @{$rcs_sorted} = sort {$a->start <=> $b->start} @{$rcs} if ($self->strand == -1);
    $rcs = $rcs_sorted if ($self->strand == -1);
    my $start = 1;
	
	
	# wm2 - new code to mask sequence, instead starts with masked string
	# and unmasks seq where there is read coverage
	
	# get all length-changing vars
	my @indels_ordered = sort {$a->start() <=> $b->start()} @{$self->{'alignIndels'}} if (defined $self->{'alignIndels'});
	
	my $masked_seq = '~' x length($$reference_sequence);
	
	foreach my $rc(@{$rcs}) {
		my ($start, $end) = ($rc->start, $rc->end);
		
		# adjust region for indels
		foreach my $indel(@indels_ordered) {
			next if $rc->start > $end;
			
			# if within RC region, only need adjust the end
			$start += $indel->length_diff unless $indel->start > $start;
			$end += $indel->length_diff;
		}
		
		# adjust coords for seq boundaries
		$start = 1 if $start < 1;
		$end = CORE::length($masked_seq) if $end > CORE::length($masked_seq);
		
		# now unmask the sequence using $$reference_sequence
		substr($masked_seq, $start - 1, $end - $start + 1) = substr($$reference_sequence, $start - 1, $end - $start + 1);
	}
	
	# wm2 - old code, starts with sequence and masks regions between read coverage - BUGGY
#    foreach my $rc (@{$rcs}){
#		$rc->start(1) if ($rc->start < 0); #if the region lies outside the boundaries of the slice
#		$rc->end($self->end - $self->start + 1) if ($rc->end + $self->start > $self->end);
#		
#		warn "Adjusted: ", $rc->start, "-", $rc->end;
#		
#		warn "Covering from ", $start, " over ", ($rc->start - $start - 1), " bases";
#		
#        substr($$reference_sequence, $start-1,($rc->start - $start - 1),'~' x ($rc->start - $start - 1)) if ($rc->start - 1 > $start);
#        $start = $rc->end;
#
#    }
#    substr($$reference_sequence, $start, ($self->length - $start) ,'~' x ($self->length - $start)) if ($self->length -1 > $start);
	
	# copy the masked sequence to the reference sequence
	$$reference_sequence = $masked_seq;
}


=head2 get_AlleleFeature

    Arg[1]      : Bio::EnsEMBL::Variation::VariationFeature $vf
    Example     : my $af = $strainSlice->get_AlleleFeature($vf);
    Description : Returns the AlleleFeature object associated with the VariationFeature (if any)
    ReturnType  : Bio::EnsEMBL::Variation::AlleleFeature
    Exceptions  : none
    Caller      : general

=cut

sub get_AlleleFeature{
    my $self = shift;
    my $vf = shift;
    
    my $af;
    #look at the hash containing the relation vf_id->alleleFeature, if present, return object, otherwise, undef
    $af = $self->{'_vf_ids'}->{$vf->dbID} if (defined $self->{'_vf_ids'}->{$vf->dbID});
    return $af;
}


=head2 get_all_AlleleFeatures_Slice

    Arg[1]      : int $with_coverage (optional)
    Example     : my $af = $strainSlice->get_all_AlleleFeatures_Slice()
    Description : Gets all AlleleFeatures between the StrainSlice object and the Slice is defined.
                  If argument $with_coverage set to 1, returns only AF if they have coverage information
    ReturnType  : listref of Bio::EnsEMBL::Variation::AlleleFeature
    Exceptions  : none
    Caller      : general

=cut

sub get_all_AlleleFeatures_Slice{
    my $self = shift;
    my $with_coverage = shift;

    my $variation_db = $self->adaptor->db->get_db_adaptor('variation');

    unless($variation_db) {
	warning("Variation database must be attached to core database to " .
		"retrieve variation information" );
	return '';
    }
    my $indAdaptor = $variation_db->get_IndividualAdaptor();
    my $ref_name =  $indAdaptor->get_reference_strain_name;
    return [] if ($self->strain_name eq $ref_name);
    $with_coverage ||= 0; #by default, get all AlleleFeatures
    if ($with_coverage == 1){
	my $new_allele_features = $self->_filter_af_by_coverage($self->{'alleleFeatures'});
	return $new_allele_features || [];
    }

    return $self->{'alleleFeatures'} || [];
}

=head2 get_all_differences_StrainSlice

    Arg[1]      : Bio::EnsEMBL::StrainSlice $ss
    Example     : my $differences = $strainSlice->get_all_differences_StrainSlice($ss)
    Description : Gets differences between 2 StrainSlice objects
    ReturnType  : listref of Bio::EnsEMBL::Variation::AlleleFeature
    Exceptions  : thrown on bad argument
    Caller      : general

=cut

sub get_all_differences_StrainSlice{
    my $self = shift;
    my $strainSlice = shift;

    if (!ref($strainSlice) || !$strainSlice->isa('Bio::EnsEMBL::StrainSlice')){
	throw('Bio::EnsEMBL::StrainSlice arg expected');
    }
    if ( @{$self->{'alleleFeatures'}} == 0 && @{$strainSlice->{'alleleFeatures'}} == 0){
	return undef; #there are no differences in any of the Strains
	
    }
    my $differences; #differences between strains
    if (@{$strainSlice->{'alleleFeatures'}} == 0){
	#need to create a copy of VariationFeature
	foreach my $difference (@{$self->{'alleleFeatures'}}){
	    my %vf = %$difference;
	    push @{$differences},bless \%vf,ref($difference);
	}
    }
    elsif (@{$self->{'alleleFeatures'}} == 0){
	#need to create a copy of VariationFeature, but changing the allele by the allele in the reference
	foreach my $difference (@{$strainSlice->{'alleleFeatures'}}){
	    push @{$differences}, $strainSlice->_convert_difference($difference);
	}
    }
    else{
	#both strains have differences
	#create a hash with the differences in the self strain slice
	my %variation_features_self = map {$_->start => $_} @{$self->{'alleleFeatures'}};
	foreach my $difference (@{$strainSlice->{'alleleFeatures'}}){
	    #there is no difference in the other strain slice, convert the allele
	    if (!defined $variation_features_self{$difference->start}){
		push @{$differences},$strainSlice->_convert_difference($difference);
	    }		
	    else{
		#if it is defined and have the same allele, delete from the hash
		if ($variation_features_self{$difference->start}->allele_string eq $difference->allele_string){
		    delete $variation_features_self{$difference->start};
		}
	    }
	}	
	#and copy the differences that in the self
	foreach my $difference (values %variation_features_self){
	    my %vf = %$difference;
	    push @{$differences},bless \%vf,ref($difference);
	}

    }
    #need to map differences to the self 
    my $mapper = $self->mapper(); #now that we have the differences, map them in the StrainSlice
#    print Dumper($mapper);
    my @results;
    foreach my $difference (@{$differences}){	
	@results = $mapper->map_coordinates('Slice',$difference->start,$difference->end,$difference->strand,'Slice');
	#we can have 3 possibilities:
	#the difference is an insertion and when mapping returns the boundaries of the insertion in the StrainSlice
	if (@results == 2){
	    #the first position in the result is the beginning of the insertion
	    if($results[0]->start < $results[1]->start){
		$difference->start($results[0]->end+1);
		$difference->end($results[1]->start-1);
	    }
	    else{
		$difference->start($results[1]->end+1);
		$difference->end($results[0]->start-1);
	    }
	    $difference->strand($results[0]->strand);
	}
	else{
	    #it can be either a SNP or a deletion, and we have the coordinates in the result, etither a Bio::EnsEMBL::Mapper::Coordinate
	    # or a Bio::EnsEMBL::Mapper::IndelCoordinate
#	    print "Difference: ",$difference->start, "-", $difference->end,"strand ",$difference->strand,"\n";
	    $difference->start($results[0]->start);
	    $difference->end($results[0]->end);
	    $difference->strand($results[0]->strand);
	}
    }

    return $differences;
}

#for a given VariationFeatures, converts the allele into the reference allele and returns a new list with
#the converted VariationFeatures
sub _convert_difference{
    my $self = shift;
    my $difference = shift;
    my %new_vf = %$difference; #make a copy of the variationFeature
    #and change the allele with the one from the reference Slice
    $new_vf{'allele_string'} = $self->SUPER::subseq($difference->start,$difference->end,$difference->strand);
    return bless \%new_vf,ref($difference);
}

=head2 sub_Slice

  Arg   1    : int $start
  Arg   2    : int $end
  Arge [3]   : int $strand
  Example    : none
  Description: Makes another StrainSlice that covers only part of this slice
               with the appropriate differences to the reference Slice
               If a slice is requested which lies outside of the boundaries
               of this function will return undef.  This means that
               behaviour will be consistant whether or not the slice is
               attached to the database (i.e. if there is attached sequence
               to the slice).  Alternatively the expand() method or the
               SliceAdaptor::fetch_by_region method can be used instead.
  Returntype : Bio::EnsEMBL::StrainSlice or undef if arguments are wrong
  Exceptions : thrown when trying to get the subSlice in the middle of a
               insertion
  Caller     : general

=cut

sub sub_Slice {
  my ( $self, $start, $end, $strand ) = @_;
  my $mapper = $self->mapper();
  #finally map from the Slice to the Strain
  my @results = $mapper->map_coordinates('StrainSlice',$start,$end,$strand,'StrainSlice');
  my $new_start;
  my $new_end;
  my $new_strand;
  my $new_seq;
 
  #Get need start and end for the subSlice of the StrainSlice
  my @results_ordered = sort {$a->start <=> $b->start} @results;
  $new_start = $results_ordered[0]->start();
  $new_strand = $results_ordered[0]->strand() if (ref($results_ordered[0]) eq 'Bio::EnsEMBL::Mapper::Coordinate');
  $new_strand = $results_ordered[-1]->strand() if (ref($results_ordered[-1]) eq 'Bio::EnsEMBL::Mapper::Coordinate');
  $new_end = $results_ordered[-1]->end();  #get last element of the array, the end of the slice

  my $subSlice = $self->SUPER::sub_Slice($new_start,$new_end,$new_strand);
  $subSlice->{'strain_name'} = $self->{'strain_name'};

  my $new_variations; #reference to an array that will contain the variationFeatures in the new subSlice
  #update the VariationFeatures in the sub_Slice of the Strain
  my $vf_start;
  my $vf_end;
  my $offset = $subSlice->start - $self->start;

  foreach my $variationFeature (@{$self->{'alleleFeatures'}}){
      #calculate the new position of the variation_feature in the subSlice
      $vf_start = $variationFeature->start - $offset;
      $vf_end = $variationFeature->end - $offset;
      if ($vf_start  >= 1 and $vf_end <= $subSlice->length){
	  #copy the variationFeature
	  my %new_vf;
	  %new_vf = %$variationFeature;
	  #and shift to the new coordinates
	  $new_vf{'start'} = $vf_start;
	  $new_vf{'end'} = $vf_end;	
	  my $test = bless \%new_vf, ref($variationFeature);
	  push @{$new_variations}, $test;
      }
  }
  $subSlice->{'alleleFeatures'} = $new_variations;
  return $subSlice;

}

=head2 ref_subseq

  Arg  [1]   : int $startBasePair
               relative to start of slice, which is 1.
  Arg  [2]   : int $endBasePair
               relative to start of slice.
  Arg  [3]   : (optional) int $strand
               The strand of the slice to obtain sequence from. Default
               value is 1.
  Description: returns string of dna from reference sequence
  Returntype : txt
  Exceptions : end should be at least as big as start
               strand must be set
  Caller     : general

=cut

sub ref_subseq{
  my $self = shift;
  my $start = shift;
  my $end = shift;
  my $strand = shift;
  # special case for in-between (insert) coordinates
  return '' if($start == $end + 1);

  my $subseq;
  if($self->adaptor){
    my $seqAdaptor = $self->adaptor->db->get_SequenceAdaptor();
    $subseq = ${$seqAdaptor->fetch_by_Slice_start_end_strand
      ( $self, $start,
        $end, $strand )};
  } else {
    ## check for gap at the beginning and pad it with Ns
    if ($start < 1) {
      $subseq = "N" x (1 - $start);
      $start = 1;
    }
    $subseq .= substr ($self->seq(), $start-1, $end - $start + 1);
    ## check for gap at the end and pad it with Ns
    if ($end > $self->length()) {
      $subseq .= "N" x ($end - $self->length());
    }
    reverse_comp(\$subseq) if($strand == -1);
  }
  return $subseq;
}

=head2 subseq

  Arg  [1]   : int $startBasePair
               relative to start of slice, which is 1.
  Arg  [2]   : int $endBasePair
               relative to start of slice.
  Arg  [3]   : (optional) int $strand
               The strand of the slice to obtain sequence from. Default
               value is 1.
  Description: returns string of dna sequence
  Returntype : txt
  Exceptions : end should be at least as big as start
               strand must be set
  Caller     : general

=cut

sub subseq {
  my ( $self, $start, $end, $strand ) = @_;

  if ( $end+1 < $start ) {
    throw("End coord + 1 is less than start coord");
  }

  # handle 'between' case for insertions
  return '' if( $start == $end + 1);

  $strand = 1 unless(defined $strand);

  if ( $strand != -1 && $strand != 1 ) {
    throw("Invalid strand [$strand] in call to Slice::subseq.");
  }

  my $subseq;
  my $seq;
  if($self->adaptor){


      $seq = $self->seq;
      reverse_comp(\$seq) if ($strand == -1);
      $subseq = substr($seq,$start-1,$end - $start + 1);
  } 
  else {
      ## check for gap at the beginning and pad it with Ns
      if ($start < 1) {
	  $subseq = "N" x (1 - $start);
	  $start = 1;
      }
      $subseq .= substr ($self->seq(), $start-1, $end - $start + 1);
      ## check for gap at the end and pad it with Ns
    if ($end > $self->length()) {
	$subseq .= "N" x ($end - $self->length());
    }
      reverse_comp(\$subseq) if($strand == -1);
  }
  return $subseq;
  
}


sub mapper{
    my $self = shift;
   
    if (@_) {
	delete $self->{'mapper'};
    }
    if(!defined $self->{'mapper'}){
	#create the mapper between the Slice and StrainSlice
	my $mapper = Bio::EnsEMBL::Mapper->new('Slice','StrainSlice');
	#align with Slice
	#get all the VariationFeatures in the strain Slice, from start to end in the Slice
	my @variation_features_ordered = sort {$a->start() <=> $b->start()} @{$self->{'alleleFeatures'}} if (defined $self->{'alleleFeatures'});
	
	my $start_slice = 1;
	my $end_slice;
	my $start_strain = 1;
	my $end_strain;
	my $length_allele;
	foreach my $variation_feature (@variation_features_ordered){
	    #we have a insertion/deletion: marks the beginning of new slice move coordinates	    
	    if ($variation_feature->length_diff != 0){
		$length_allele = $variation_feature->length + $variation_feature->length_diff();	    
		$end_slice = $variation_feature->start() - 1;

		if ($end_slice >= $start_slice){
		    $end_strain = $end_slice - $start_slice + $start_strain;
		    #add the sequence that maps
		    $mapper->add_map_coordinates('Slice',$start_slice,$end_slice,1,'StrainSlice',$start_strain,$end_strain);
		    #add the indel
		    $mapper->add_indel_coordinates('Slice',$end_slice+1,$end_slice + $variation_feature->length,1,'StrainSlice',$end_strain+1,$end_strain + $length_allele);
		    $start_strain = $end_strain + $length_allele + 1;
		}
		else{
		    #add the indel
		    $mapper->add_indel_coordinates('Slice',$end_slice+1,$end_slice + $variation_feature->length,1,'StrainSlice',$end_strain+1,$end_strain + $length_allele);
		    $start_strain += $length_allele;
		}
		$start_slice = $end_slice + $variation_feature->length+ 1;
	    }
	}
	if ($start_slice <= $self->length){
	    $mapper->add_map_coordinates('Slice',$start_slice,$self->length,1,'StrainSlice',$start_strain,$start_strain + $self->length - $start_slice);
	}
	$self->{'mapper'} = $mapper;
    }
    return $self->{'mapper'};
}

=head2 get_all_differences_Slice

    Description : DEPRECATED use get_all_AlleleFeatures instead
   
=cut

sub get_all_differences_Slice{
    my $self = shift;
    
    deprecate('Use get_all_differences_Slice instead');
    return $self->get_all_AlleleFeatures_Slice(@_);
}

=head2 get_all_VariationFeatures

    Arg[1]     : int $with_coverage (optional)
    Description :returns all alleleFeatures features on this slice. 
    ReturnType : listref of Bio::EnsEMBL::Variation::AlleleFeature
    Exceptions : none
    Caller     : contigview, snpview

=cut

sub get_all_VariationFeatures {
  my $self = shift;
  my $with_coverage = shift;
  $with_coverage ||= 0;
  return $self->get_all_AlleleFeatures_Slice($with_coverage);
}

=head2 get_original_seq_region_position

  Arg  [1]   : int $position
               relative to start of slice, which is 1.
  Description: Placeholder method - this method has no explicit use beyond
			   providiing compatibility with AlignSlice. To map positions
			   between the StrainSlice and the reference slice, use the
			   mapper and its methods.
  Returntype : ($strainSlice, $seq_region_position), an array where the first
               element is a Bio::EnsEMBL::StrainSlice and the second one is the
               requested seq_region_position.
  Exceptions : none
  Caller     : general

=cut

sub get_original_seq_region_position {
    my $self = shift;
    my $position = shift;
    #coordinates in the AlignSlice and Slice are the same, so far will return the same Slice
    #and coordinate
    return ($self,$position);
}


=head2 remove_indels

    Args        : none
    Example     : $strainSlice->remove_indels();
    Description : Removes insertions and deletions from the allele features
				  of this object
    ReturnType  : none
    Exceptions  : none
    Caller      : webteam

=cut

sub remove_indels {
	my $self = shift;
	
	my @new_afs = grep { $_->variation->var_class ne 'in-del' } @{$self->{'alleleFeatures'}};
	
	$self->{'alleleFeatures'} = \@new_afs;
}

1;