Mercurial > repos > mahtabm > ensembl
diff variant_effect_predictor/Bio/EnsEMBL/AlignStrainSlice.pm @ 0:1f6dce3d34e0
Uploaded
author | mahtabm |
---|---|
date | Thu, 11 Apr 2013 02:01:53 -0400 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/variant_effect_predictor/Bio/EnsEMBL/AlignStrainSlice.pm Thu Apr 11 02:01:53 2013 -0400 @@ -0,0 +1,344 @@ +=head1 LICENSE + + Copyright (c) 1999-2012 The European Bioinformatics Institute and + Genome Research Limited. All rights reserved. + + This software is distributed under a modified Apache license. + For license details, please see + + http://www.ensembl.org/info/about/code_licence.html + +=head1 CONTACT + + Please email comments or questions to the public Ensembl + developers list at <dev@ensembl.org>. + + Questions may also be sent to the Ensembl help desk at + <helpdesk@ensembl.org>. + +=cut + +=head1 NAME + +Bio::EnsEMBL::AlignStrainSlice - Represents the slice of the genome aligned with certain strains (applying the variations/indels) + +=head1 SYNOPSIS + + $sa = $db->get_SliceAdaptor; + + $slice = + $sa->fetch_by_region( 'chromosome', 'X', 1_000_000, 2_000_000 ); + + $strainSlice1 = $slice->get_by_Strain($strain_name1); + $strainSlice2 = $slice->get_by_Strain($strain_name2); + + my @strainSlices; + push @strainSlices, $strainSlice1; + push @strainSlices, $strainSlice2; + + $alignSlice = Bio::EnsEMBL::AlignStrainSlice->new( + -SLICE => $slice, + -STRAINS => \@strainSlices + ); + + # Get coordinates of variation in alignSlice + my $alleleFeatures = $strainSlice1->get_all_AlleleFeature_Slice(); + + foreach my $af ( @{$alleleFeatures} ) { + my $new_feature = $alignSlice->alignFeature( $af, $strainSlice1 ); + print( "Coordinates of the feature in AlignSlice are: ", + $new_feature->start, "-", $new_feature->end, "\n" ); + } + +=head1 DESCRIPTION + +A AlignStrainSlice object represents a region of a genome align for +certain strains. It can be used to align certain strains to a reference +slice. + +=head1 METHODS + +=cut + +package Bio::EnsEMBL::AlignStrainSlice; +use strict; + +use Bio::EnsEMBL::Utils::Argument qw(rearrange); +use Bio::EnsEMBL::Mapper; +use Bio::EnsEMBL::Mapper::RangeRegistry; +use Bio::EnsEMBL::Utils::Exception qw(throw deprecate warning); + +=head2 new + + Arg[1] : Bio::EnsEMBL::Slice $Slice + Arg[2] : listref of Bio::EnsEMBL::StrainSlice $strainSlice + Example : push @strainSlices, $strainSlice1; + push @strainSlices, $strainSlice2; + ..... + push @strainSlices, $strainSliceN; + $alignStrainSlice = Bio::EnsEMBL::AlignStrainSlice->new(-SLICE => $slice, + -STRAIN => \@strainSlices); + Description : Creates a new Bio::EnsEMBL::AlignStrainSlice object that will contain a mapper between + the Slice object, plus all the indels from the different Strains + ReturnType : Bio::EnsEMBL::AlignStrainSlice + Exceptions : none + Caller : general + +=cut + +sub new{ + my $caller = shift; + my $class = ref($caller) || $caller; + + my ($slice, $strainSlices) = rearrange([qw(SLICE STRAINS)],@_); + + #check that both StrainSlice and Slice are identical (must have been defined in the same slice) + foreach my $strainSlice (@{$strainSlices}){ + if (($strainSlice->start != $slice->start) || ($strainSlice->end != $slice->end) || ($strainSlice->seq_region_name ne $slice->seq_region_name)){ + warning("Not possible to create Align object from different Slices"); + return []; + } + } + + return bless{'slice' => $slice, + 'strains' => $strainSlices}, $class; +} + +=head2 alignFeature + + Arg[1] : Bio::EnsEMBL::Feature $feature + Arg[2] : Bio::EnsEMBL::StrainSlice $strainSlice + Example : $new_feature = $alignSlice->alignFeature($feature, $strainSlice); + Description : Creates a new Bio::EnsEMBL::Feature object that aligned to + the AlignStrainSlice object. + ReturnType : Bio::EnsEMBL::Feature + Exceptions : none + Caller : general + +=cut + +sub alignFeature{ + my $self = shift; + my $feature = shift; + + #check that the object is a Feature + if (!ref($feature) || !$feature->isa('Bio::EnsEMBL::Feature')){ + throw("Bio::EnsEMBL::Feature object expected"); + } + #and align it to the AlignStrainSlice object + my $mapper_strain = $self->mapper(); + + my @results; + + if ($feature->start > $feature->end){ + #this is an Indel, map it with the special method + @results = $mapper_strain->map_indel('Slice',$feature->start, $feature->end, $feature->strand,'Slice'); + #and modify the coordinates according to the length of the indel + $results[0]->end($results[0]->start + $feature->length_diff -1); + } + else{ + @results = $mapper_strain->map_coordinates('Slice',$feature->start, $feature->end, $feature->strand,'Slice'); + } + #get need start and end of the new feature, aligned ot AlignStrainSlice + my @results_ordered = sort {$a->start <=> $b->start} @results; + + my %new_feature = %$feature; #make a shallow copy of the Feature + $new_feature{'start'}= $results_ordered[0]->start(); + $new_feature{'end'} = $results_ordered[-1]->end(); #get last element of the array, the end of the slice + + return bless \%new_feature, ref($feature); + +} + + +#getter for the mapper between the Slice and the different StrainSlice objects +sub mapper{ + my $self = shift; + + if (!defined $self->{'mapper'}){ + #get the alleleFeatures in all the strains + if (!defined $self->{'indels'}){ + #when the list of indels is not defined, get them + $self->{'indels'} = $self->_get_indels(); + } + my $indels = $self->{'indels'}; #gaps in reference slice + my $mapper = Bio::EnsEMBL::Mapper->new('Slice', 'AlignStrainSlice'); + my $start_slice = 1; + my $end_slice; + my $start_align = 1; + my $end_align; + my $length_indel = 0; + my $length_acum_indel = 0; + foreach my $indel (@{$indels}){ + $end_slice = $indel->[0] - 1; + $end_align = $indel->[0] - 1 + $length_acum_indel; #we must consider length previous indels + + $length_indel = $indel->[1] - $indel->[0] + 1; + + + $mapper->add_map_coordinates('Slice',$start_slice,$end_slice,1,'AlignStrainSlice',$start_align,$end_align); + + $mapper->add_indel_coordinates('Slice',$end_slice + 1,$end_slice,1,'AlignStrainSlice',$end_align + 1,$end_align + $length_indel); + $start_slice = $end_slice + 1; + $start_align = $indel->[1] + 1 + $length_acum_indel; #we must consider legnth previous indels + + $length_acum_indel += $length_indel; + } + if ($start_slice <= $self->length){ + $mapper->add_map_coordinates('Slice',$start_slice,$self->length,1,'AlignStrainSlice',$start_align,$start_align + $self->length - $start_slice) + } + $self->{'mapper'} = $mapper; + + } + return $self->{'mapper'}; +} + +#returns the length of the AlignSlice: length of the Slice plus the gaps +sub length{ + my $self = shift; + my $length; + if (!defined $self->{'indels'}){ + #when the list of indels is not defined, get them + $self->{'indels'} = $self->_get_indels(); + } + $length = $self->{'slice'}->length; + map {$length += ($_->[1] - $_->[0] + 1)} @{$self->{'indels'}}; + return $length; +} + +=head2 strains + + Args : None + Description: Returns list with all strains used to + define this AlignStrainSlice object + Returntype : listref of Bio::EnsEMBL::StrainSlice objects + Exceptions : none + Caller : general + +=cut + +sub strains{ + my $self = shift; + + return $self->{'strains'}; +} + +=head2 Slice + + Args : None + Description: Returns slice where the AlignStrainSlice + is defined + Returntype : Bio::EnsEMBL::Slice object + Exceptions : none + Caller : general + +=cut + +sub Slice{ + my $self = shift; + return $self->{'slice'}; +} +#method to retrieve, in order, a list with all the indels in the different strains +sub _get_indels{ + my $self = shift; + + #go throuh all the strains getting ONLY the indels (length_diff <> 0) + my @indels; + foreach my $strainSlice (@{$self->strains}){ + my $differences = $strainSlice->get_all_AlleleFeatures_Slice(); #need to check there are differences.... + foreach my $af (@{$differences}){ + #if length is 0, but is a -, it is still a gap in the strain + if (($af->length_diff != 0) || ($af->length_diff == 0 && $af->allele_string =~ /-/)){ + push @indels, $af; + } + } + } + #need to overlap the gaps using the RangeRegistry module + my $range_registry = Bio::EnsEMBL::Mapper::RangeRegistry->new(); + foreach my $indel (@indels){ + #in the reference and the strain there is a gap + $range_registry->check_and_register(1,$indel->start,$indel->start) if ($indel->length_diff == 0); + #deletion in reference slice + $range_registry->check_and_register(1,$indel->start, $indel->end ) if ($indel->length_diff < 0); + #insertion in reference slice + $range_registry->check_and_register(1,$indel->start,$indel->start + $indel->length_diff - 1) if ($indel->length_diff > 0); + } + #and return all the gap coordinates.... + return $range_registry->get_ranges(1); +} + +=head2 get_all_Slices + + Args : none + Description: This Slice is made of several Bio::EnsEMBL::StrainSlices + sequence. This method returns these StrainSlices (or part of + them) with the original coordinates + Returntype : listref of Bio::EnsEMBL::StrainSlice objects + Exceptions : end should be at least as big as start + Caller : general + +=cut + +sub get_all_Slices { + my $self = shift; + + my @strains; + #add the reference strain + my $dbVar = $self->Slice->adaptor->db->get_db_adaptor('variation'); + unless($dbVar) { + warning("Variation database must be attached to core database to " . + "retrieve variation information" ); + return ''; + } + my $indAdaptor = $dbVar->get_IndividualAdaptor(); + my $ref_name = $indAdaptor->get_reference_strain_name; + my $ref_strain = Bio::EnsEMBL::StrainSlice->new( + -START => $self->Slice->{'start'}, + -END => $self->Slice->{'end'}, + -STRAND => $self->Slice->{'strand'}, + -ADAPTOR => $self->Slice->adaptor(), + -SEQ => $self->Slice->{'seq'}, + -SEQ_REGION_NAME => $self->Slice->{'seq_region_name'}, + -SEQ_REGION_LENGTH => $self->Slice->{'seq_region_length'}, + -COORD_SYSTEM => $self->Slice->{'coord_system'}, + -STRAIN_NAME => $ref_name, + ); + #this is a fake reference alisce, should not contain any alleleFeature + undef $ref_strain->{'alleleFeatures'}; + + push @strains, @{$self->strains}; + my $new_feature; + my $indel; + my $aligned_features; + my $indels = (); #reference to a hash containing indels in the different strains + #we need to realign all Features in the different Slices and add '-' in the reference Slice + foreach my $strain (@{$self->strains}){ + foreach my $af (@{$strain->get_all_AlleleFeatures_Slice()}){ + $new_feature = $self->alignFeature($af); #align feature in AlignSlice coordinates + push @{$aligned_features},$new_feature if($new_feature->seq_region_start <= $strain->end); #some features might map outside slice + if ($af->start != $af->end){ #an indel, need to add to the reference, and realign in the strain + #make a shallow copy of the indel - clear it first! + $indel = undef; + %{$indel} = %{$new_feature}; + bless $indel, ref($new_feature); + $indel->allele_string('-'); + push @{$indels},$indel; #and include in the list of potential indels + } + } + next if (!defined $aligned_features); + undef $strain->{'alleleFeatures'}; #remove all features before adding new aligned features + push @{$strain->{'alleleFeatures'}}, @{$aligned_features}; + undef $aligned_features; + } + push @strains, $ref_strain; + #need to add indels in the different strains, if not present + if (defined $indels){ + foreach my $strain (@strains){ + #inlcude the indels in the StrainSlice object + push @{$strain->{'alignIndels'}},@{$indels}; + } + } + return \@strains; +} + +1;