Mercurial > repos > mahtabm > ensembl
comparison variant_effect_predictor/Bio/EnsEMBL/IndividualSlice.pm @ 0:1f6dce3d34e0
Uploaded
| author | mahtabm |
|---|---|
| date | Thu, 11 Apr 2013 02:01:53 -0400 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:1f6dce3d34e0 |
|---|---|
| 1 =head1 LICENSE | |
| 2 | |
| 3 Copyright (c) 1999-2012 The European Bioinformatics Institute and | |
| 4 Genome Research Limited. All rights reserved. | |
| 5 | |
| 6 This software is distributed under a modified Apache license. | |
| 7 For license details, please see | |
| 8 | |
| 9 http://www.ensembl.org/info/about/code_licence.html | |
| 10 | |
| 11 =head1 CONTACT | |
| 12 | |
| 13 Please email comments or questions to the public Ensembl | |
| 14 developers list at <dev@ensembl.org>. | |
| 15 | |
| 16 Questions may also be sent to the Ensembl help desk at | |
| 17 <helpdesk@ensembl.org>. | |
| 18 | |
| 19 =cut | |
| 20 | |
| 21 =head1 NAME | |
| 22 | |
| 23 Bio::EnsEMBL::IndividualSlice - SubClass of the Slice. Represents the | |
| 24 slice of the genome for a certain individual (applying the alleles for | |
| 25 this individual) | |
| 26 | |
| 27 =head1 SYNOPSIS | |
| 28 | |
| 29 $sa = $db->get_SliceAdaptor; | |
| 30 | |
| 31 $slice = | |
| 32 $sa->fetch_by_region( 'chromosome', 'X', 1_000_000, 2_000_000 ); | |
| 33 | |
| 34 $individualSlice = $slice->get_by_Individual($individual_name); | |
| 35 | |
| 36 # Get the sequence from the Individual Slice: will contain IUPAC codes | |
| 37 # for SNPs and Ensembl ambiguity codes for indels | |
| 38 my $seq = $individualSlice->seq(); | |
| 39 print $seq; | |
| 40 | |
| 41 # Get a subSlice of the Strain | |
| 42 my $subSlice_individual = | |
| 43 $individualSlice->sub_Slice( 5_000, 8_000, 1 ) | |
| 44 | |
| 45 # Compare two different individuals in the same Slice | |
| 46 my $sliceIndividual2 = $slice->get_by_Individual($individual_name2); | |
| 47 my $differences = | |
| 48 $individualSlice->get_all_differences_IndividualSlice( | |
| 49 $sliceIndividual2); | |
| 50 | |
| 51 foreach my $af ( @{$differences} ) { | |
| 52 print | |
| 53 "There is a difference between $individual_name " | |
| 54 . "and $individual_name2 at ", | |
| 55 $af->start, "-", $af->end, | |
| 56 " with allele ", $af->allele_string(), "\n"; | |
| 57 } | |
| 58 | |
| 59 =head1 DESCRIPTION | |
| 60 | |
| 61 A IndividualSlice object represents a region of a genome for a certain | |
| 62 individual. It can be used to retrieve sequence or features from a | |
| 63 individual. | |
| 64 | |
| 65 =head1 METHODS | |
| 66 | |
| 67 =cut | |
| 68 | |
| 69 package Bio::EnsEMBL::IndividualSlice; | |
| 70 use vars qw(@ISA); | |
| 71 use strict; | |
| 72 | |
| 73 use Bio::EnsEMBL::Utils::Argument qw(rearrange); | |
| 74 use Bio::EnsEMBL::Utils::Sequence qw(reverse_comp); | |
| 75 use Bio::EnsEMBL::Slice; | |
| 76 use Bio::EnsEMBL::Mapper; | |
| 77 use Bio::EnsEMBL::Utils::Exception qw(throw deprecate warning); | |
| 78 | |
| 79 @ISA = qw(Bio::EnsEMBL::Slice); | |
| 80 | |
| 81 | |
| 82 =head2 new | |
| 83 | |
| 84 Arg [1..N] : List of named arguments | |
| 85 Bio::EnsEMBL::CoordSystem COORD_SYSTEM | |
| 86 string SEQ_REGION_NAME, | |
| 87 int START, | |
| 88 int END, | |
| 89 string VERSION (optional, defaults to '') | |
| 90 int STRAND, (optional, defaults to 1) | |
| 91 Bio::EnsEMBL::DBSQL::SliceAdaptor ADAPTOR (optional) | |
| 92 Arg[N+1] : string $individual_name | |
| 93 Example : $individualSlice = Bio::EnsEMBL::IndividualSlice->new(-coord_system => $cs, | |
| 94 -start => 1, | |
| 95 -end => 10000, | |
| 96 -strand => 1, | |
| 97 -seq_region_name => 'X', | |
| 98 -seq_region_length => 12e6, | |
| 99 -individual_name => $individual_name); | |
| 100 Description : Creates a new Bio::EnsEMBL::IndividualSlice object that will contain a shallow copy of the | |
| 101 Slice object, plus additional information such as the individual this Slice refers to | |
| 102 and listref of Bio::EnsEMBL::Variation::AlleleFeatures of differences with the | |
| 103 reference sequence | |
| 104 ReturnType : Bio::EnsEMBL::IndividualSlice | |
| 105 Exceptions : none | |
| 106 Caller : general | |
| 107 | |
| 108 =cut | |
| 109 | |
| 110 sub new{ | |
| 111 my $caller = shift; | |
| 112 my $class = ref($caller) || $caller; | |
| 113 | |
| 114 #create the IndividualSlice object as the Slice, plus the individual attribute | |
| 115 my ($individual_name, $sample_id) = rearrange(['INDIVIDUAL', 'SAMPLE_ID'],@_); | |
| 116 | |
| 117 my $self = $class->SUPER::new(@_); | |
| 118 | |
| 119 $self->{'individual_name'} = $individual_name; | |
| 120 $self->{'sample_id'} = $sample_id; | |
| 121 | |
| 122 return $self; | |
| 123 | |
| 124 } | |
| 125 | |
| 126 =head2 individual_name | |
| 127 | |
| 128 Arg [1] : (optional) string $individual_name | |
| 129 Example : my $individual_name = $individualSlice->individual_name(); | |
| 130 Description : Getter/Setter for the name of the individual in the slice | |
| 131 ReturnType : string | |
| 132 Exceptions : none | |
| 133 Caller : general | |
| 134 | |
| 135 =cut | |
| 136 | |
| 137 sub individual_name{ | |
| 138 my $self = shift; | |
| 139 if (@_){ | |
| 140 $self->{'individual_name'} = shift @_; | |
| 141 } | |
| 142 return $self->{'individual_name'}; | |
| 143 } | |
| 144 | |
| 145 =head2 seq | |
| 146 | |
| 147 Arg [1] : none | |
| 148 Example : print "SEQUENCE = ", $strainSlice->seq(); | |
| 149 Description: Returns the sequence of the region represented by this | |
| 150 StrainSlice formatted as a string. | |
| 151 Returntype : string | |
| 152 Exceptions : none | |
| 153 Caller : general | |
| 154 | |
| 155 =cut | |
| 156 | |
| 157 sub seq { | |
| 158 my $self = shift; | |
| 159 | |
| 160 # special case for in-between (insert) coordinates | |
| 161 return '' if($self->start() == $self->end() + 1); | |
| 162 | |
| 163 return $self->{'seq'} if($self->{'seq'}); | |
| 164 | |
| 165 if($self->adaptor()) { | |
| 166 my $seqAdaptor = $self->adaptor()->db()->get_SequenceAdaptor(); | |
| 167 my $reference_sequence = $seqAdaptor->fetch_by_Slice_start_end_strand($self,1,undef,1); #get the reference sequence for that slice | |
| 168 #apply all differences to the reference sequence | |
| 169 | |
| 170 # sort edits in reverse order to remove complication of | |
| 171 # adjusting downstream edits | |
| 172 my @allele_features_ordered = sort {$b->start() <=> $a->start() || $b->end() <=> $a->end()} @{$self->{'alleleFeatures'}} if (defined $self->{'alleleFeatures'}); | |
| 173 | |
| 174 foreach my $af (@allele_features_ordered){ | |
| 175 $af->apply_edit($reference_sequence); #change, in the reference sequence, the af | |
| 176 } | |
| 177 # return substr(${$reference_sequence},0,1) if ($self->length == 1); | |
| 178 return ${$reference_sequence}; #returns the reference sequence, applying the alleleFeatures | |
| 179 } | |
| 180 | |
| 181 # no attached sequence, and no db, so just return Ns | |
| 182 return 'N' x $self->length(); | |
| 183 } | |
| 184 | |
| 185 =head2 get_all_differences_Slice | |
| 186 | |
| 187 Args : none | |
| 188 Example : my $differences = $individualSlice->get_all_differences_Slice() | |
| 189 Description : Gets all differences between the IndividualSlice object and the Slice is defined | |
| 190 ReturnType : listref of Bio::EnsEMBL::Variation::AlleleFeature | |
| 191 Exceptions : none | |
| 192 Caller : general | |
| 193 | |
| 194 =cut | |
| 195 | |
| 196 sub get_all_differences_Slice{ | |
| 197 my $self = shift; | |
| 198 my $differences; #reference to the array with the differences between Slice and StrainSlice | |
| 199 my $ref_allele; | |
| 200 foreach my $difference (@{$self->{'alleleFeatures'}}){ | |
| 201 if ($difference->length_diff == 0){ | |
| 202 #the difference is a SNP, check if it is the same as the reference allele | |
| 203 $ref_allele = $self->SUPER::subseq($difference->start,$difference->end,$difference->strand); | |
| 204 $ref_allele = '-' if ($ref_allele eq ''); | |
| 205 if ($ref_allele ne $difference->allele_string){ | |
| 206 #when the alleleFeature is different from the reference allele, add to the differences list | |
| 207 push @{$differences},$difference; | |
| 208 } | |
| 209 } | |
| 210 else{ | |
| 211 push @{$differences},$difference; | |
| 212 } | |
| 213 } | |
| 214 | |
| 215 return $differences; | |
| 216 | |
| 217 } | |
| 218 | |
| 219 =head2 get_all_differences_IndividualSlice | |
| 220 | |
| 221 Arg[1] : Bio::EnsEMBL::IndividualSlice $is | |
| 222 Example : my $differences = $individualSlice->get_all_differences_IndividualSlice($individualslice) | |
| 223 Description : Gets differences between 2 IndividualSlice objects | |
| 224 ReturnType : listref of Bio::EnsEMBL::Variation::AlleleFeature | |
| 225 Exceptions : thrown on bad argument | |
| 226 Caller : general | |
| 227 | |
| 228 =cut | |
| 229 | |
| 230 sub get_all_differences_IndividualSlice{ | |
| 231 my $self = shift; | |
| 232 my $individualSlice = shift; | |
| 233 | |
| 234 if (!ref($individualSlice) || !$individualSlice->isa('Bio::EnsEMBL::IndividualSlice')){ | |
| 235 throw('Bio::EnsEMBL::IndividualSlice arg expected'); | |
| 236 } | |
| 237 if ( @{$self->{'alleleFeatures'}} == 0 && @{$individualSlice->{'alleleFeatures'}} == 0){ | |
| 238 return undef; #there are no differences in any of the Individuals | |
| 239 | |
| 240 } | |
| 241 my $differences; #differences between individuals | |
| 242 if (@{$individualSlice->{'alleleFeatures'}} == 0){ | |
| 243 #need to create a copy of alleleFeature for the first Individual | |
| 244 foreach my $difference (@{$self->{'alleleFeatures'}}){ | |
| 245 my %vf = %$difference; | |
| 246 push @{$differences},bless \%vf,ref($difference); | |
| 247 } | |
| 248 } | |
| 249 elsif (@{$self->{'alleleFeatures'}} == 0){ | |
| 250 #need to create a copy of AlleleFeature, but changing the allele by the allele in the reference sequence | |
| 251 foreach my $difference (@{$individualSlice->{'alleleFeatures'}}){ | |
| 252 push @{$differences}, $individualSlice->_convert_difference($difference); | |
| 253 } | |
| 254 } | |
| 255 else{ | |
| 256 #both individuals have differences | |
| 257 #create a hash with the differences in the first slice | |
| 258 my %allele_features_self = map {$_->start.'-'.$_->end => $_} @{$self->{'alleleFeatures'}}; | |
| 259 foreach my $difference (@{$individualSlice->{'alleleFeatures'}}){ | |
| 260 #there is no difference in the other individual slice, convert the allele | |
| 261 if (!defined $allele_features_self{$difference->start.'-'.$difference->end}){ | |
| 262 push @{$differences},$individualSlice->_convert_difference($difference); | |
| 263 } | |
| 264 else{ | |
| 265 #if it is defined and have the same allele, delete from the hash since it is not a difference | |
| 266 #between the individuals | |
| 267 if ($allele_features_self{$difference->start.'-'.$difference->end}->allele_string eq $difference->allele_string){ | |
| 268 delete $allele_features_self{$difference->start.'-'.$difference->end}; | |
| 269 } | |
| 270 } | |
| 271 } | |
| 272 #and finally, make a shallow copy of the differences in the first individual | |
| 273 foreach my $difference (values %allele_features_self){ | |
| 274 my %vf = %$difference; | |
| 275 push @{$differences},bless \%vf,ref($difference); | |
| 276 } | |
| 277 | |
| 278 } | |
| 279 #need to map differences to the first individual, self, since the coordinates are in the Slice coordinate system | |
| 280 my $mapper = $self->mapper(); #now that we have the differences, map them in the IndividualSlice | |
| 281 my @results; | |
| 282 foreach my $difference (@{$differences}){ | |
| 283 @results = $mapper->map_coordinates('Slice',$difference->start,$difference->end,$difference->strand,'Slice'); | |
| 284 #we can have 3 possibilities: | |
| 285 #the difference is an insertion and when mapping returns the boundaries of the insertion in the IndividualSlice | |
| 286 if (@results == 2){ | |
| 287 #the first position in the result is the beginning of the insertion | |
| 288 if($results[0]->start < $results[1]->start){ | |
| 289 $difference->start($results[0]->end+1); | |
| 290 $difference->end($results[1]->start-1); | |
| 291 } | |
| 292 else{ | |
| 293 #it is the second position the beginning of the insertion | |
| 294 $difference->start($results[1]->end+1); | |
| 295 $difference->end($results[0]->start-1); | |
| 296 } | |
| 297 $difference->strand($results[0]->strand); | |
| 298 } | |
| 299 else{ | |
| 300 #it can be either a SNP or a deletion, and we have the coordinates in the result, etither a Bio::EnsEMBL::Mapper::Coordinate | |
| 301 # or a Bio::EnsEMBL::Mapper::IndelCoordinate | |
| 302 $difference->start($results[0]->start); | |
| 303 $difference->end($results[0]->end); | |
| 304 $difference->strand($results[0]->strand); | |
| 305 } | |
| 306 } | |
| 307 | |
| 308 return $differences; | |
| 309 } | |
| 310 | |
| 311 #for a given AlleleFeature, converts the allele into the reference allele and returns | |
| 312 #the converted AlleleFeature | |
| 313 | |
| 314 sub _convert_difference{ | |
| 315 my $self = shift; | |
| 316 my $difference = shift; | |
| 317 my %new_af = %$difference; #make a copy of the alleleFeature | |
| 318 #and change the allele with the one from the reference Slice | |
| 319 $new_af{'allele_string'} = $self->SUPER::subseq($difference->start,$difference->end,$difference->strand); | |
| 320 return bless \%new_af,ref($difference); | |
| 321 } | |
| 322 | |
| 323 =head2 mapper | |
| 324 | |
| 325 Args : none | |
| 326 Description: Getter for the mapper between the between the IndividualSlice and the Slice it refers to. | |
| 327 It is done automatically when necessary to create subSlice or to get the differences between individuals | |
| 328 Returntype : Bio::EnsEMBL::Mapper | |
| 329 Exceptions : none | |
| 330 Caller : Internal function | |
| 331 | |
| 332 =cut | |
| 333 | |
| 334 sub mapper{ | |
| 335 my $self = shift; | |
| 336 | |
| 337 if (@_) { | |
| 338 #allow to create again the mapper | |
| 339 delete $self->{'mapper'}; | |
| 340 } | |
| 341 if(!defined $self->{'mapper'}){ | |
| 342 #create the mapper between the Slice and StrainSlice | |
| 343 my $mapper = Bio::EnsEMBL::Mapper->new('Slice','IndividualSlice'); | |
| 344 #align with Slice | |
| 345 #get all the VariationFeatures in the Individual Slice, from start to end in the Slice | |
| 346 my @allele_features_ordered = sort {$a->start() <=> $b->start() || $b->end() <=> $a->end()} @{$self->{'alleleFeatures'}} if (defined $self->{'alleleFeatures'}); | |
| 347 | |
| 348 my $start_slice = 1; | |
| 349 my $end_slice; | |
| 350 my $start_individual = 1; | |
| 351 my $end_individual; | |
| 352 my $length_allele; | |
| 353 my $total_length_diff = 0; | |
| 354 #we will walk from left to right in the slice object, updating the start and end individual every time | |
| 355 #there is a new alleleFeature in the Individual | |
| 356 foreach my $allele_feature (@allele_features_ordered){ | |
| 357 #we have a insertion/deletion: marks the beginning of new slice move coordinates | |
| 358 if ($allele_feature->length_diff != 0){ | |
| 359 $total_length_diff += $allele_feature->length_diff; | |
| 360 $length_allele = $allele_feature->length + $allele_feature->length_diff(); #length of the allele in the Individual | |
| 361 $end_slice = $allele_feature->start() - 1; #set the end of the slice before the alleleFeature | |
| 362 if ($end_slice >= $start_slice){ | |
| 363 #normal cases (not with gaps) | |
| 364 $end_individual = $end_slice - $start_slice + $start_individual; #set the end of the individual from the beginning plus the offset | |
| 365 #add the sequence that maps | |
| 366 $mapper->add_map_coordinates('Slice',$start_slice,$end_slice,1,'IndividualSlice',$start_individual,$end_individual); | |
| 367 #and add the indel | |
| 368 $mapper->add_indel_coordinates('Slice',$end_slice+1,$end_slice + $allele_feature->length,1,'IndividualSlice',$end_individual+1,$end_individual + $length_allele); | |
| 369 $start_individual = $end_individual + $length_allele + 1; #set the beginning of the individual after the allele | |
| 370 } | |
| 371 else{ | |
| 372 #add the indel | |
| 373 $mapper->add_indel_coordinates('Slice',$end_slice+1,$end_slice + $allele_feature->length,1,'IndividualSlice',$end_individual+1,$end_individual + $length_allele); | |
| 374 $start_individual += $length_allele; | |
| 375 } | |
| 376 $start_slice = $end_slice + $allele_feature->length+ 1; #set the beginning of the slice after the variation feature | |
| 377 } | |
| 378 } | |
| 379 if ($start_slice <= $self->length){ | |
| 380 #if we haven't reached the end of the IndividualSlice, add the final map coordinates between the individual and the slice | |
| 381 $mapper->add_map_coordinates('Slice',$start_slice,$self->length,1,'IndividualSlice',$start_individual,$start_individual + $self->length - $start_slice); | |
| 382 } | |
| 383 | |
| 384 $mapper->add_map_coordinates('Slice', -$self->start+1, 0,1, 'IndividualSlice', -$self->start +1,0) if ($self->start > 0); #before individualSlice | |
| 385 $mapper->add_map_coordinates('Slice', $self->length + 1,$self->seq_region_length - ($self->length +1),1, 'IndividualSlice', $self->length + 1 + $total_length_diff,$self->seq_region_length + $total_length_diff - ($self->length +1) ) if ($self->length <= $self->seq_region_length); #after strainSlice | |
| 386 $self->{'mapper'} = $mapper; | |
| 387 } | |
| 388 return $self->{'mapper'}; | |
| 389 } | |
| 390 | |
| 391 =head2 sub_Slice | |
| 392 | |
| 393 Arg 1 : int $start | |
| 394 Arg 2 : int $end | |
| 395 Arge [3] : int $strand | |
| 396 Example : none | |
| 397 Description: Makes another IndividualSlice that covers only part of this IndividualSlice | |
| 398 with the appropriate differences to the reference Slice | |
| 399 If a slice is requested which lies outside of the boundaries | |
| 400 of this function will return undef. This means that | |
| 401 behaviour will be consistant whether or not the slice is | |
| 402 attached to the database (i.e. if there is attached sequence | |
| 403 to the slice). Alternatively the expand() method or the | |
| 404 SliceAdaptor::fetch_by_region method can be used instead. | |
| 405 Returntype : Bio::EnsEMBL::IndividualSlice or undef if arguments are wrong | |
| 406 Exceptions : thrown when trying to get the subSlice in the middle of a | |
| 407 insertion | |
| 408 Caller : general | |
| 409 | |
| 410 =cut | |
| 411 | |
| 412 sub sub_Slice { | |
| 413 my ( $self, $start, $end, $strand ) = @_; | |
| 414 my $mapper = $self->mapper(); | |
| 415 #map from the Individual to the Slice to get the sub_Slice, and then, apply the differences in the subSlice | |
| 416 my @results = $mapper->map_coordinates('IndividualSlice',$start,$end,$strand,'IndividualSlice'); | |
| 417 my $new_start; | |
| 418 my $new_end; | |
| 419 my $new_strand; | |
| 420 my $new_seq; | |
| 421 #Get need start and end for the subSlice of the IndividualSlice | |
| 422 my @results_ordered = sort {$a->start <=> $b->start} grep {ref($_) eq 'Bio::EnsEMBL::Mapper::Coordinate'} @results; | |
| 423 $new_start = $results_ordered[0]->start(); | |
| 424 $new_strand = $results_ordered[0]->strand() if (ref($results_ordered[0]) eq 'Bio::EnsEMBL::Mapper::Coordinate'); | |
| 425 # $new_strand = $results_ordered[-1]->strand() if (ref($results_ordered[-1]) eq 'Bio::EnsEMBL::Mapper::Coordinate'); | |
| 426 $new_end = $results_ordered[-1]->end(); #get last element of the array, the end of the slice | |
| 427 | |
| 428 my $subSlice = $self->SUPER::sub_Slice($new_start,$new_end,$new_strand); | |
| 429 $subSlice->{'individual_name'} = $self->{'individual_name'}; | |
| 430 | |
| 431 my $new_alleles; #reference to an array that will contain the variationFeatures in the new subSlice | |
| 432 #update the VariationFeatures in the sub_Slice of the Individual | |
| 433 my %af; | |
| 434 my $new_allele_feature; | |
| 435 foreach my $alleleFeature (@{$self->{'alleleFeatures'}}){ | |
| 436 $new_allele_feature = $alleleFeature->transfer($subSlice); | |
| 437 #only transfer the coordinates to the SubSlice that are within the boundaries | |
| 438 if ($new_allele_feature->start >= 1 && $new_allele_feature->end <= $subSlice->length){ | |
| 439 push @{$new_alleles}, $new_allele_feature; | |
| 440 } | |
| 441 } | |
| 442 $subSlice->{'alleleFeatures'} = $new_alleles; | |
| 443 return $subSlice; | |
| 444 | |
| 445 } | |
| 446 | |
| 447 =head2 subseq | |
| 448 | |
| 449 Arg [1] : int $startBasePair | |
| 450 relative to start of slice, which is 1. | |
| 451 Arg [2] : int $endBasePair | |
| 452 relative to start of slice. | |
| 453 Arg [3] : (optional) int $strand | |
| 454 The strand of the individual slice to obtain sequence from. Default | |
| 455 value is 1. | |
| 456 Description: returns string of dna sequence | |
| 457 Returntype : txt | |
| 458 Exceptions : end should be at least as big as start | |
| 459 strand must be set | |
| 460 Caller : general | |
| 461 | |
| 462 =cut | |
| 463 | |
| 464 sub subseq { | |
| 465 my ( $self, $start, $end, $strand ) = @_; | |
| 466 | |
| 467 if ( $end+1 < $start ) { | |
| 468 throw("End coord + 1 is less than start coord"); | |
| 469 } | |
| 470 | |
| 471 # handle 'between' case for insertions | |
| 472 return '' if( $start == $end + 1); | |
| 473 | |
| 474 $strand = 1 unless(defined $strand); | |
| 475 | |
| 476 if ( $strand != -1 && $strand != 1 ) { | |
| 477 throw("Invalid strand [$strand] in call to Slice::subseq."); | |
| 478 } | |
| 479 | |
| 480 my $subseq; | |
| 481 my $seq; | |
| 482 if($self->adaptor){ | |
| 483 my $seqAdaptor = $self->adaptor()->db()->get_SequenceAdaptor(); | |
| 484 $subseq = ${$seqAdaptor->fetch_by_Slice_start_end_strand($self,$start,$end,$strand)}; #get the reference sequence for that slice | |
| 485 #apply all differences to the reference sequence | |
| 486 # sort edits in reverse order to remove complication of | |
| 487 # adjusting downstream edits | |
| 488 my @allele_features_ordered = sort {$b->start() <=> $a->start() || $b->end() <=> $a->end()} @{$self->{'alleleFeatures'}} if (defined $self->{'alleleFeatures'}); | |
| 489 my $af_start; | |
| 490 my $af_end; | |
| 491 foreach my $af (@allele_features_ordered){ | |
| 492 if (($af->start - $start +1 > 0) && ($end - $af->end > 0)){ | |
| 493 #save the current start and end of the alleleFeature before changing for apply_edit | |
| 494 $af_start = $af->start; | |
| 495 $af_end = $af->end; | |
| 496 #apply the difference if the feature is in the new slice | |
| 497 $af->start($af->start - $start +1); | |
| 498 $af->end($af->end - $start +1); | |
| 499 $af->apply_edit(\$subseq); #change, in the reference sequence, the af | |
| 500 #restore the initial values of alleleFeature start and end | |
| 501 $af->start($af_start); | |
| 502 $af->end($af_end); | |
| 503 | |
| 504 } | |
| 505 } | |
| 506 } | |
| 507 else { | |
| 508 ## check for gap at the beginning and pad it with Ns | |
| 509 if ($start < 1) { | |
| 510 $subseq = "N" x (1 - $start); | |
| 511 $start = 1; | |
| 512 } | |
| 513 $subseq .= substr ($self->seq(), $start-1, $end - $start + 1); | |
| 514 ## check for gap at the end and pad it with Ns | |
| 515 if ($end > $self->length()) { | |
| 516 $subseq .= "N" x ($end - $self->length()); | |
| 517 } | |
| 518 reverse_comp(\$subseq) if($strand == -1); | |
| 519 } | |
| 520 return $subseq; | |
| 521 | |
| 522 } | |
| 523 | |
| 524 =head2 get_all_Transcripts | |
| 525 | |
| 526 Args : None | |
| 527 Example : @transcripts = @{$individualslice->get_all_Transcripts)}; | |
| 528 Description: Gets all transcripts which overlap this Individual Slice. If you want to | |
| 529 specify a particular analysis or type, then you are better off | |
| 530 using get_all_Genes or get_all_Genes_by_type and iterating | |
| 531 through the transcripts of each gene. | |
| 532 Returntype : reference to a list of Bio::EnsEMBL::Transcripts | |
| 533 Exceptions : none | |
| 534 Caller : general | |
| 535 | |
| 536 =cut | |
| 537 | |
| 538 sub get_all_Transcripts { | |
| 539 my $self = shift; | |
| 540 | |
| 541 my $transcripts = $self->SUPER::get_all_Transcripts(1); | |
| 542 $self->map_to_Individual($transcripts); | |
| 543 | |
| 544 return $transcripts; | |
| 545 } | |
| 546 | |
| 547 | |
| 548 =head2 get_all_Exons | |
| 549 | |
| 550 Arg [1] : (optional) string $dbtype | |
| 551 The dbtype of exons to obtain. This assumes that the db has | |
| 552 been added to the DBAdaptor under this name (using the | |
| 553 DBConnection::add_db_adaptor method). | |
| 554 Example : @exons = @{$individualSlice->get_all_Exons}; | |
| 555 Description: Gets all exons which overlap this IndividualSlice. Note that these exons | |
| 556 will not be associated with any transcripts, so this may not | |
| 557 be terribly useful. | |
| 558 Returntype : reference to a list of Bio::EnsEMBL::Exons | |
| 559 Exceptions : none | |
| 560 Caller : general | |
| 561 | |
| 562 =cut | |
| 563 | |
| 564 sub get_all_Exons { | |
| 565 my $self = shift; | |
| 566 my $dbtype = shift; | |
| 567 | |
| 568 my $exons = $self->SUPER::get_all_Exons($dbtype); | |
| 569 $self->map_to_Individual($exons); #map the exons to the Individual | |
| 570 | |
| 571 return $exons; | |
| 572 } | |
| 573 | |
| 574 =head2 get_all_Genes | |
| 575 | |
| 576 Arg [1] : (optional) string $logic_name | |
| 577 The name of the analysis used to generate the genes to retrieve | |
| 578 Arg [2] : (optional) string $dbtype | |
| 579 The dbtype of genes to obtain. This assumes that the db has | |
| 580 been added to the DBAdaptor under this name (using the | |
| 581 DBConnection::add_db_adaptor method). | |
| 582 Example : @genes = @{$individualSlice->get_all_Genes}; | |
| 583 Description: Retrieves all genes that overlap this slice. | |
| 584 Returntype : listref of Bio::EnsEMBL::Genes | |
| 585 Exceptions : none | |
| 586 Caller : none | |
| 587 | |
| 588 =cut | |
| 589 | |
| 590 sub get_all_Genes{ | |
| 591 my ($self, $logic_name, $dbtype) = @_; | |
| 592 | |
| 593 my $genes = $self->SUPER::get_all_Genes($logic_name, $dbtype, 1); | |
| 594 | |
| 595 $self->map_to_Individual($genes); | |
| 596 | |
| 597 foreach my $gene (@{$genes}){ | |
| 598 $self->map_to_Individual($gene->get_all_Exons); #map the Exons to the Individual | |
| 599 $self->map_to_Individual($gene->get_all_Transcripts); #map the Transcripts to the Individual | |
| 600 } | |
| 601 | |
| 602 return $genes; | |
| 603 } | |
| 604 | |
| 605 =head2 map_to_Individual | |
| 606 | |
| 607 Arg[1] : ref $features | |
| 608 Example : $individualSlice->map_to_Individual($exons); | |
| 609 Description : Gets the features from the Slice and maps it in the IndividualSlice, using the mapper | |
| 610 between Slice and IndividualSlice | |
| 611 ReturnType : None | |
| 612 Exceptions : None | |
| 613 Caller : general | |
| 614 | |
| 615 =cut | |
| 616 | |
| 617 sub map_to_Individual{ | |
| 618 my $self = shift; | |
| 619 my $features = shift; | |
| 620 | |
| 621 my $mapper = $self->mapper(); | |
| 622 my (@results, @results_ordered, $new_start, $new_end, $new_strand); | |
| 623 #foreach of the transcripts, map them to the IndividualSlice and replace the Slice with the IndividualSlice | |
| 624 foreach my $feature (@{$features}){ | |
| 625 $feature->slice($self); #replace the IndividualSlice as the Slice for this feature (the Slice plus the AlleleFeatures) | |
| 626 #map from the Slice to the Individual Slice | |
| 627 my @results = $mapper->map_coordinates('Slice',$feature->start,$feature->end,$feature->strand,'Slice'); | |
| 628 #from the results, order them but filter out those that are not coordinates | |
| 629 @results_ordered = sort {$a->start <=> $b->start} grep {ref($_) eq 'Bio::EnsEMBL::Mapper::Coordinate'} @results; | |
| 630 $new_start = $results_ordered[0]->start(); | |
| 631 $new_strand = $results_ordered[0]->strand(); | |
| 632 $new_end = $results_ordered[-1]->end(); #get last element of the array, the end of the slice | |
| 633 $feature->start($new_start); #update new coordinates | |
| 634 $feature->end($new_end); | |
| 635 $feature->strand($new_strand); | |
| 636 } | |
| 637 } | |
| 638 | |
| 639 sub alleleFeatures{ | |
| 640 my $self = shift; | |
| 641 return $self->{'alleleFeatures'}; | |
| 642 } | |
| 643 | |
| 644 sub add_AlleleFeature{ | |
| 645 my $self = shift; | |
| 646 | |
| 647 if (@_){ | |
| 648 if(!ref($_[0]) || !$_[0]->isa('Bio::EnsEMBL::Variation::AlleleFeature')) { | |
| 649 throw("Bio::EnsEMBL::Variation::AlleleFeature argument expected"); | |
| 650 } | |
| 651 #add the alleleFeature to the individualSlice | |
| 652 push @{$self->{'alleleFeatures'}},shift; | |
| 653 } | |
| 654 } | |
| 655 1; |
