Mercurial > repos > mahtabm > ensembl
comparison variant_effect_predictor/Bio/EnsEMBL/AlignStrainSlice.pm @ 0:1f6dce3d34e0
Uploaded
| author | mahtabm |
|---|---|
| date | Thu, 11 Apr 2013 02:01:53 -0400 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:1f6dce3d34e0 |
|---|---|
| 1 =head1 LICENSE | |
| 2 | |
| 3 Copyright (c) 1999-2012 The European Bioinformatics Institute and | |
| 4 Genome Research Limited. All rights reserved. | |
| 5 | |
| 6 This software is distributed under a modified Apache license. | |
| 7 For license details, please see | |
| 8 | |
| 9 http://www.ensembl.org/info/about/code_licence.html | |
| 10 | |
| 11 =head1 CONTACT | |
| 12 | |
| 13 Please email comments or questions to the public Ensembl | |
| 14 developers list at <dev@ensembl.org>. | |
| 15 | |
| 16 Questions may also be sent to the Ensembl help desk at | |
| 17 <helpdesk@ensembl.org>. | |
| 18 | |
| 19 =cut | |
| 20 | |
| 21 =head1 NAME | |
| 22 | |
| 23 Bio::EnsEMBL::AlignStrainSlice - Represents the slice of the genome aligned with certain strains (applying the variations/indels) | |
| 24 | |
| 25 =head1 SYNOPSIS | |
| 26 | |
| 27 $sa = $db->get_SliceAdaptor; | |
| 28 | |
| 29 $slice = | |
| 30 $sa->fetch_by_region( 'chromosome', 'X', 1_000_000, 2_000_000 ); | |
| 31 | |
| 32 $strainSlice1 = $slice->get_by_Strain($strain_name1); | |
| 33 $strainSlice2 = $slice->get_by_Strain($strain_name2); | |
| 34 | |
| 35 my @strainSlices; | |
| 36 push @strainSlices, $strainSlice1; | |
| 37 push @strainSlices, $strainSlice2; | |
| 38 | |
| 39 $alignSlice = Bio::EnsEMBL::AlignStrainSlice->new( | |
| 40 -SLICE => $slice, | |
| 41 -STRAINS => \@strainSlices | |
| 42 ); | |
| 43 | |
| 44 # Get coordinates of variation in alignSlice | |
| 45 my $alleleFeatures = $strainSlice1->get_all_AlleleFeature_Slice(); | |
| 46 | |
| 47 foreach my $af ( @{$alleleFeatures} ) { | |
| 48 my $new_feature = $alignSlice->alignFeature( $af, $strainSlice1 ); | |
| 49 print( "Coordinates of the feature in AlignSlice are: ", | |
| 50 $new_feature->start, "-", $new_feature->end, "\n" ); | |
| 51 } | |
| 52 | |
| 53 =head1 DESCRIPTION | |
| 54 | |
| 55 A AlignStrainSlice object represents a region of a genome align for | |
| 56 certain strains. It can be used to align certain strains to a reference | |
| 57 slice. | |
| 58 | |
| 59 =head1 METHODS | |
| 60 | |
| 61 =cut | |
| 62 | |
| 63 package Bio::EnsEMBL::AlignStrainSlice; | |
| 64 use strict; | |
| 65 | |
| 66 use Bio::EnsEMBL::Utils::Argument qw(rearrange); | |
| 67 use Bio::EnsEMBL::Mapper; | |
| 68 use Bio::EnsEMBL::Mapper::RangeRegistry; | |
| 69 use Bio::EnsEMBL::Utils::Exception qw(throw deprecate warning); | |
| 70 | |
| 71 =head2 new | |
| 72 | |
| 73 Arg[1] : Bio::EnsEMBL::Slice $Slice | |
| 74 Arg[2] : listref of Bio::EnsEMBL::StrainSlice $strainSlice | |
| 75 Example : push @strainSlices, $strainSlice1; | |
| 76 push @strainSlices, $strainSlice2; | |
| 77 ..... | |
| 78 push @strainSlices, $strainSliceN; | |
| 79 $alignStrainSlice = Bio::EnsEMBL::AlignStrainSlice->new(-SLICE => $slice, | |
| 80 -STRAIN => \@strainSlices); | |
| 81 Description : Creates a new Bio::EnsEMBL::AlignStrainSlice object that will contain a mapper between | |
| 82 the Slice object, plus all the indels from the different Strains | |
| 83 ReturnType : Bio::EnsEMBL::AlignStrainSlice | |
| 84 Exceptions : none | |
| 85 Caller : general | |
| 86 | |
| 87 =cut | |
| 88 | |
| 89 sub new{ | |
| 90 my $caller = shift; | |
| 91 my $class = ref($caller) || $caller; | |
| 92 | |
| 93 my ($slice, $strainSlices) = rearrange([qw(SLICE STRAINS)],@_); | |
| 94 | |
| 95 #check that both StrainSlice and Slice are identical (must have been defined in the same slice) | |
| 96 foreach my $strainSlice (@{$strainSlices}){ | |
| 97 if (($strainSlice->start != $slice->start) || ($strainSlice->end != $slice->end) || ($strainSlice->seq_region_name ne $slice->seq_region_name)){ | |
| 98 warning("Not possible to create Align object from different Slices"); | |
| 99 return []; | |
| 100 } | |
| 101 } | |
| 102 | |
| 103 return bless{'slice' => $slice, | |
| 104 'strains' => $strainSlices}, $class; | |
| 105 } | |
| 106 | |
| 107 =head2 alignFeature | |
| 108 | |
| 109 Arg[1] : Bio::EnsEMBL::Feature $feature | |
| 110 Arg[2] : Bio::EnsEMBL::StrainSlice $strainSlice | |
| 111 Example : $new_feature = $alignSlice->alignFeature($feature, $strainSlice); | |
| 112 Description : Creates a new Bio::EnsEMBL::Feature object that aligned to | |
| 113 the AlignStrainSlice object. | |
| 114 ReturnType : Bio::EnsEMBL::Feature | |
| 115 Exceptions : none | |
| 116 Caller : general | |
| 117 | |
| 118 =cut | |
| 119 | |
| 120 sub alignFeature{ | |
| 121 my $self = shift; | |
| 122 my $feature = shift; | |
| 123 | |
| 124 #check that the object is a Feature | |
| 125 if (!ref($feature) || !$feature->isa('Bio::EnsEMBL::Feature')){ | |
| 126 throw("Bio::EnsEMBL::Feature object expected"); | |
| 127 } | |
| 128 #and align it to the AlignStrainSlice object | |
| 129 my $mapper_strain = $self->mapper(); | |
| 130 | |
| 131 my @results; | |
| 132 | |
| 133 if ($feature->start > $feature->end){ | |
| 134 #this is an Indel, map it with the special method | |
| 135 @results = $mapper_strain->map_indel('Slice',$feature->start, $feature->end, $feature->strand,'Slice'); | |
| 136 #and modify the coordinates according to the length of the indel | |
| 137 $results[0]->end($results[0]->start + $feature->length_diff -1); | |
| 138 } | |
| 139 else{ | |
| 140 @results = $mapper_strain->map_coordinates('Slice',$feature->start, $feature->end, $feature->strand,'Slice'); | |
| 141 } | |
| 142 #get need start and end of the new feature, aligned ot AlignStrainSlice | |
| 143 my @results_ordered = sort {$a->start <=> $b->start} @results; | |
| 144 | |
| 145 my %new_feature = %$feature; #make a shallow copy of the Feature | |
| 146 $new_feature{'start'}= $results_ordered[0]->start(); | |
| 147 $new_feature{'end'} = $results_ordered[-1]->end(); #get last element of the array, the end of the slice | |
| 148 | |
| 149 return bless \%new_feature, ref($feature); | |
| 150 | |
| 151 } | |
| 152 | |
| 153 | |
| 154 #getter for the mapper between the Slice and the different StrainSlice objects | |
| 155 sub mapper{ | |
| 156 my $self = shift; | |
| 157 | |
| 158 if (!defined $self->{'mapper'}){ | |
| 159 #get the alleleFeatures in all the strains | |
| 160 if (!defined $self->{'indels'}){ | |
| 161 #when the list of indels is not defined, get them | |
| 162 $self->{'indels'} = $self->_get_indels(); | |
| 163 } | |
| 164 my $indels = $self->{'indels'}; #gaps in reference slice | |
| 165 my $mapper = Bio::EnsEMBL::Mapper->new('Slice', 'AlignStrainSlice'); | |
| 166 my $start_slice = 1; | |
| 167 my $end_slice; | |
| 168 my $start_align = 1; | |
| 169 my $end_align; | |
| 170 my $length_indel = 0; | |
| 171 my $length_acum_indel = 0; | |
| 172 foreach my $indel (@{$indels}){ | |
| 173 $end_slice = $indel->[0] - 1; | |
| 174 $end_align = $indel->[0] - 1 + $length_acum_indel; #we must consider length previous indels | |
| 175 | |
| 176 $length_indel = $indel->[1] - $indel->[0] + 1; | |
| 177 | |
| 178 | |
| 179 $mapper->add_map_coordinates('Slice',$start_slice,$end_slice,1,'AlignStrainSlice',$start_align,$end_align); | |
| 180 | |
| 181 $mapper->add_indel_coordinates('Slice',$end_slice + 1,$end_slice,1,'AlignStrainSlice',$end_align + 1,$end_align + $length_indel); | |
| 182 $start_slice = $end_slice + 1; | |
| 183 $start_align = $indel->[1] + 1 + $length_acum_indel; #we must consider legnth previous indels | |
| 184 | |
| 185 $length_acum_indel += $length_indel; | |
| 186 } | |
| 187 if ($start_slice <= $self->length){ | |
| 188 $mapper->add_map_coordinates('Slice',$start_slice,$self->length,1,'AlignStrainSlice',$start_align,$start_align + $self->length - $start_slice) | |
| 189 } | |
| 190 $self->{'mapper'} = $mapper; | |
| 191 | |
| 192 } | |
| 193 return $self->{'mapper'}; | |
| 194 } | |
| 195 | |
| 196 #returns the length of the AlignSlice: length of the Slice plus the gaps | |
| 197 sub length{ | |
| 198 my $self = shift; | |
| 199 my $length; | |
| 200 if (!defined $self->{'indels'}){ | |
| 201 #when the list of indels is not defined, get them | |
| 202 $self->{'indels'} = $self->_get_indels(); | |
| 203 } | |
| 204 $length = $self->{'slice'}->length; | |
| 205 map {$length += ($_->[1] - $_->[0] + 1)} @{$self->{'indels'}}; | |
| 206 return $length; | |
| 207 } | |
| 208 | |
| 209 =head2 strains | |
| 210 | |
| 211 Args : None | |
| 212 Description: Returns list with all strains used to | |
| 213 define this AlignStrainSlice object | |
| 214 Returntype : listref of Bio::EnsEMBL::StrainSlice objects | |
| 215 Exceptions : none | |
| 216 Caller : general | |
| 217 | |
| 218 =cut | |
| 219 | |
| 220 sub strains{ | |
| 221 my $self = shift; | |
| 222 | |
| 223 return $self->{'strains'}; | |
| 224 } | |
| 225 | |
| 226 =head2 Slice | |
| 227 | |
| 228 Args : None | |
| 229 Description: Returns slice where the AlignStrainSlice | |
| 230 is defined | |
| 231 Returntype : Bio::EnsEMBL::Slice object | |
| 232 Exceptions : none | |
| 233 Caller : general | |
| 234 | |
| 235 =cut | |
| 236 | |
| 237 sub Slice{ | |
| 238 my $self = shift; | |
| 239 return $self->{'slice'}; | |
| 240 } | |
| 241 #method to retrieve, in order, a list with all the indels in the different strains | |
| 242 sub _get_indels{ | |
| 243 my $self = shift; | |
| 244 | |
| 245 #go throuh all the strains getting ONLY the indels (length_diff <> 0) | |
| 246 my @indels; | |
| 247 foreach my $strainSlice (@{$self->strains}){ | |
| 248 my $differences = $strainSlice->get_all_AlleleFeatures_Slice(); #need to check there are differences.... | |
| 249 foreach my $af (@{$differences}){ | |
| 250 #if length is 0, but is a -, it is still a gap in the strain | |
| 251 if (($af->length_diff != 0) || ($af->length_diff == 0 && $af->allele_string =~ /-/)){ | |
| 252 push @indels, $af; | |
| 253 } | |
| 254 } | |
| 255 } | |
| 256 #need to overlap the gaps using the RangeRegistry module | |
| 257 my $range_registry = Bio::EnsEMBL::Mapper::RangeRegistry->new(); | |
| 258 foreach my $indel (@indels){ | |
| 259 #in the reference and the strain there is a gap | |
| 260 $range_registry->check_and_register(1,$indel->start,$indel->start) if ($indel->length_diff == 0); | |
| 261 #deletion in reference slice | |
| 262 $range_registry->check_and_register(1,$indel->start, $indel->end ) if ($indel->length_diff < 0); | |
| 263 #insertion in reference slice | |
| 264 $range_registry->check_and_register(1,$indel->start,$indel->start + $indel->length_diff - 1) if ($indel->length_diff > 0); | |
| 265 } | |
| 266 #and return all the gap coordinates.... | |
| 267 return $range_registry->get_ranges(1); | |
| 268 } | |
| 269 | |
| 270 =head2 get_all_Slices | |
| 271 | |
| 272 Args : none | |
| 273 Description: This Slice is made of several Bio::EnsEMBL::StrainSlices | |
| 274 sequence. This method returns these StrainSlices (or part of | |
| 275 them) with the original coordinates | |
| 276 Returntype : listref of Bio::EnsEMBL::StrainSlice objects | |
| 277 Exceptions : end should be at least as big as start | |
| 278 Caller : general | |
| 279 | |
| 280 =cut | |
| 281 | |
| 282 sub get_all_Slices { | |
| 283 my $self = shift; | |
| 284 | |
| 285 my @strains; | |
| 286 #add the reference strain | |
| 287 my $dbVar = $self->Slice->adaptor->db->get_db_adaptor('variation'); | |
| 288 unless($dbVar) { | |
| 289 warning("Variation database must be attached to core database to " . | |
| 290 "retrieve variation information" ); | |
| 291 return ''; | |
| 292 } | |
| 293 my $indAdaptor = $dbVar->get_IndividualAdaptor(); | |
| 294 my $ref_name = $indAdaptor->get_reference_strain_name; | |
| 295 my $ref_strain = Bio::EnsEMBL::StrainSlice->new( | |
| 296 -START => $self->Slice->{'start'}, | |
| 297 -END => $self->Slice->{'end'}, | |
| 298 -STRAND => $self->Slice->{'strand'}, | |
| 299 -ADAPTOR => $self->Slice->adaptor(), | |
| 300 -SEQ => $self->Slice->{'seq'}, | |
| 301 -SEQ_REGION_NAME => $self->Slice->{'seq_region_name'}, | |
| 302 -SEQ_REGION_LENGTH => $self->Slice->{'seq_region_length'}, | |
| 303 -COORD_SYSTEM => $self->Slice->{'coord_system'}, | |
| 304 -STRAIN_NAME => $ref_name, | |
| 305 ); | |
| 306 #this is a fake reference alisce, should not contain any alleleFeature | |
| 307 undef $ref_strain->{'alleleFeatures'}; | |
| 308 | |
| 309 push @strains, @{$self->strains}; | |
| 310 my $new_feature; | |
| 311 my $indel; | |
| 312 my $aligned_features; | |
| 313 my $indels = (); #reference to a hash containing indels in the different strains | |
| 314 #we need to realign all Features in the different Slices and add '-' in the reference Slice | |
| 315 foreach my $strain (@{$self->strains}){ | |
| 316 foreach my $af (@{$strain->get_all_AlleleFeatures_Slice()}){ | |
| 317 $new_feature = $self->alignFeature($af); #align feature in AlignSlice coordinates | |
| 318 push @{$aligned_features},$new_feature if($new_feature->seq_region_start <= $strain->end); #some features might map outside slice | |
| 319 if ($af->start != $af->end){ #an indel, need to add to the reference, and realign in the strain | |
| 320 #make a shallow copy of the indel - clear it first! | |
| 321 $indel = undef; | |
| 322 %{$indel} = %{$new_feature}; | |
| 323 bless $indel, ref($new_feature); | |
| 324 $indel->allele_string('-'); | |
| 325 push @{$indels},$indel; #and include in the list of potential indels | |
| 326 } | |
| 327 } | |
| 328 next if (!defined $aligned_features); | |
| 329 undef $strain->{'alleleFeatures'}; #remove all features before adding new aligned features | |
| 330 push @{$strain->{'alleleFeatures'}}, @{$aligned_features}; | |
| 331 undef $aligned_features; | |
| 332 } | |
| 333 push @strains, $ref_strain; | |
| 334 #need to add indels in the different strains, if not present | |
| 335 if (defined $indels){ | |
| 336 foreach my $strain (@strains){ | |
| 337 #inlcude the indels in the StrainSlice object | |
| 338 push @{$strain->{'alignIndels'}},@{$indels}; | |
| 339 } | |
| 340 } | |
| 341 return \@strains; | |
| 342 } | |
| 343 | |
| 344 1; |
