Mercurial > repos > mahtabm > ensembl
comparison variant_effect_predictor/Bio/EnsEMBL/StrainSlice.pm @ 0:1f6dce3d34e0
Uploaded
author | mahtabm |
---|---|
date | Thu, 11 Apr 2013 02:01:53 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:1f6dce3d34e0 |
---|---|
1 =head1 LICENSE | |
2 | |
3 Copyright (c) 1999-2012 The European Bioinformatics Institute and | |
4 Genome Research Limited. All rights reserved. | |
5 | |
6 This software is distributed under a modified Apache license. | |
7 For license details, please see | |
8 | |
9 http://www.ensembl.org/info/about/code_licence.html | |
10 | |
11 =head1 CONTACT | |
12 | |
13 Please email comments or questions to the public Ensembl | |
14 developers list at <dev@ensembl.org>. | |
15 | |
16 Questions may also be sent to the Ensembl help desk at | |
17 <helpdesk@ensembl.org>. | |
18 | |
19 =cut | |
20 | |
21 =head1 NAME | |
22 | |
23 Bio::EnsEMBL::StrainSlice - SubClass of the Slice. Represents the slice | |
24 of the genome for a certain strain (applying the variations) | |
25 | |
26 =head1 SYNOPSIS | |
27 | |
28 $sa = $db->get_SliceAdaptor; | |
29 | |
30 $slice = | |
31 $sa->fetch_by_region( 'chromosome', 'X', 1_000_000, 2_000_000 ); | |
32 | |
33 $strainSlice = $slice->get_by_strain($strain_name); | |
34 | |
35 # get the sequence from the Strain Slice | |
36 my $seq = $strainSlice->seq(); | |
37 print $seq; | |
38 | |
39 # get allele features between this StrainSlice and the reference | |
40 my $afs = $strainSlice->get_all_AlleleFeatures_Slice(); | |
41 foreach my $af ( @{$afs} ) { | |
42 print "AlleleFeature in position ", $af->start, "-", $af->end, | |
43 " in strain with allele ", $af->allele_string, "\n"; | |
44 } | |
45 | |
46 # compare a strain against another strain | |
47 my $strainSlice_2 = $slice->get_by_strain($strain_name_2); | |
48 my $differences = | |
49 $strainSlice->get_all_differences_StrainSlice($strainSlice_2); | |
50 | |
51 foreach my $difference ( @{$differences} ) { | |
52 print "Difference in position ", $difference->start, "-", | |
53 $difference->end(), " in strain with allele ", | |
54 $difference->allele_string(), "\n"; | |
55 } | |
56 | |
57 =head1 DESCRIPTION | |
58 | |
59 A StrainSlice object represents a region of a genome for a certain | |
60 strain. It can be used to retrieve sequence or features from a strain. | |
61 | |
62 =head1 METHODS | |
63 | |
64 =cut | |
65 | |
66 package Bio::EnsEMBL::StrainSlice; | |
67 use vars qw(@ISA); | |
68 use strict; | |
69 | |
70 use Bio::EnsEMBL::Utils::Argument qw(rearrange); | |
71 use Bio::EnsEMBL::Utils::Sequence qw(reverse_comp); | |
72 use Bio::EnsEMBL::Slice; | |
73 use Bio::EnsEMBL::Mapper; | |
74 use Bio::EnsEMBL::Utils::Exception qw(throw deprecate warning); | |
75 | |
76 @ISA = qw(Bio::EnsEMBL::Slice); | |
77 | |
78 | |
79 =head2 new | |
80 | |
81 Arg[1] : Bio::EnsEMBL::Slice $slice | |
82 Arg[2] : string $strain_name | |
83 Example : $strainSlice = Bio::EnsEMBL::StrainSlice->new(-.... => , | |
84 -strain_name => $strain_name); | |
85 Description : Creates a new Bio::EnsEMBL::StrainSlice object that will contain a shallow copy of the | |
86 Slice object, plus additional information such as the Strain this Slice refers to | |
87 and listref of Bio::EnsEMBL::Variation::AlleleFeatures of differences with the | |
88 reference sequence | |
89 ReturnType : Bio::EnsEMBL::StrainSlice | |
90 Exceptions : none | |
91 Caller : general | |
92 | |
93 =cut | |
94 | |
95 sub new{ | |
96 my $caller = shift; | |
97 my $class = ref($caller) || $caller; | |
98 | |
99 my ($strain_name) = rearrange(['STRAIN_NAME'],@_); | |
100 | |
101 my $self = $class->SUPER::new(@_); | |
102 | |
103 $self->{'strain_name'} = $strain_name; | |
104 | |
105 if(!$self->adaptor()) { | |
106 warning('Cannot get new StrainSlice features without attached adaptor'); | |
107 return ''; | |
108 } | |
109 my $variation_db = $self->adaptor->db->get_db_adaptor('variation'); | |
110 | |
111 unless($variation_db) { | |
112 warning("Variation database must be attached to core database to " . | |
113 "retrieve variation information" ); | |
114 return ''; | |
115 } | |
116 | |
117 my $af_adaptor = $variation_db->get_AlleleFeatureAdaptor; | |
118 | |
119 if( $af_adaptor ) { | |
120 #get the Individual for the given strain | |
121 my $ind_adaptor = $variation_db->get_IndividualAdaptor; | |
122 | |
123 if ($ind_adaptor){ | |
124 my $individual = shift @{$ind_adaptor->fetch_all_by_name($self->{'strain_name'})}; #the name should be unique for a strain | |
125 #check that the individua returned isin the database | |
126 | |
127 if (defined $individual){ | |
128 my $allele_features = $af_adaptor->fetch_all_by_Slice($self,$individual); | |
129 #warning("No strain genotype data available for Slice ".$self->name." and Strain ".$individual->name) if ! defined $allele_features->[0]; | |
130 my $vf_ids = {}; #hash containing the relation vf_id->af | |
131 $self->{'_strain'} = $individual; | |
132 map {defined $_->{'_variation_feature_id'} ? $vf_ids->{$_->{'_variation_feature_id'}} = $_ : '' | |
133 } @{$allele_features}; | |
134 # my $new_allele_features = $self->_filter_af_by_coverage($allele_features); | |
135 # $self->{'alleleFeatures'} = $new_allele_features; | |
136 $self->{'alleleFeatures'} = $allele_features || []; | |
137 $self->{'_vf_ids'} = $vf_ids; | |
138 return $self; | |
139 } | |
140 else{ | |
141 warning("Strain ($self->{strain_name}) not in the database"); | |
142 return $self; | |
143 } | |
144 } | |
145 else{ | |
146 warning("Not possible to retrieve IndividualAdaptor from the variation database"); | |
147 return ''; | |
148 } | |
149 } else { | |
150 warning("Not possible to retrieve VariationFeatureAdaptor from variation database"); | |
151 return ''; | |
152 } | |
153 } | |
154 | |
155 =head2 _filter_af_by_coverage | |
156 | |
157 Arg [1] : listref to Bio::EnsEMBL::Variation::AlleleFeatures $allele_features | |
158 Example : my $new_list_allele_features = $strainSlice->_filter_af_by_coverage($allele_features); | |
159 Description : For a list of allele features, gets a new list where they are filter depending on coverage | |
160 ReturnType : listref of Bio::EnsEMBL::Variation::AlleleFeature | |
161 Exceptions : none | |
162 Caller : internal function | |
163 | |
164 =cut | |
165 | |
166 sub _filter_af_by_coverage{ | |
167 my $self = shift; | |
168 my $allele_features = shift; | |
169 | |
170 my $variation_db = $self->adaptor->db->get_db_adaptor('variation'); | |
171 | |
172 unless($variation_db) { | |
173 warning("Variation database must be attached to core database to " . | |
174 "retrieve variation information" ); | |
175 return ''; | |
176 } | |
177 | |
178 my $rc_adaptor = $variation_db->get_ReadCoverageAdaptor(); | |
179 #this is ugly, but ReadCoverage is always defined in the positive strand | |
180 | |
181 ### EK : - it looks like the arguments to fetch_all_by_Slice_Sample_depth have changed | |
182 ### passing 1 will only get you the coverage of level 1 | |
183 ### by omitting the parameter we take into account all coverage regions | |
184 # my $rcs = $rc_adaptor->fetch_all_by_Slice_Sample_depth($self,$self->{'_strain'},1); | |
185 my $rcs = $rc_adaptor->fetch_all_by_Slice_Sample_depth($self,$self->{'_strain'}); | |
186 my $new_af; | |
187 foreach my $af (@{$allele_features}){ | |
188 foreach my $rc (@{$rcs}){ | |
189 if ($af->start <= $rc->end and $af->start >= $rc->start){ | |
190 push @{$new_af}, $af; | |
191 last; | |
192 } | |
193 } | |
194 } | |
195 | |
196 return $new_af; | |
197 } | |
198 | |
199 | |
200 =head2 strain_name | |
201 | |
202 Arg [1] : (optional) string $strain_name | |
203 Example : my $strain_name = $strainSlice->strain_name(); | |
204 Description : Getter/Setter for the name of the strain | |
205 ReturnType : string | |
206 Exceptions : none | |
207 Caller : general | |
208 | |
209 =cut | |
210 | |
211 sub strain_name{ | |
212 my $self = shift; | |
213 if (@_){ | |
214 $self->{'strain_name'} = shift @_; | |
215 } | |
216 return $self->{'strain_name'}; | |
217 } | |
218 | |
219 | |
220 =head2 display_Slice_name | |
221 | |
222 Args : none | |
223 Example : my $strain_name = $strainSlice->display_Slice_name(); | |
224 Description : Getter for the name of the strain | |
225 ReturnType : string | |
226 Exceptions : none | |
227 Caller : webteam | |
228 | |
229 =cut | |
230 | |
231 sub display_Slice_name{ | |
232 my $self = shift; | |
233 | |
234 return $self->strain_name; | |
235 } | |
236 | |
237 =head2 seq | |
238 | |
239 Arg [1] : int $with_coverage (optional) | |
240 Example : print "SEQUENCE = ", $strainSlice->seq(); | |
241 Description: Returns the sequence of the region represented by this | |
242 slice formatted as a string in the strain. If flag with_coverage | |
243 is set to 1, returns sequence if there is coverage in the region | |
244 Returntype : string | |
245 Exceptions : none | |
246 Caller : general | |
247 | |
248 =cut | |
249 | |
250 sub seq { | |
251 my $self = shift; | |
252 my $with_coverage = shift; | |
253 | |
254 $with_coverage ||= 0; | |
255 | |
256 # special case for in-between (insert) coordinates | |
257 return '' if($self->start() == $self->end() + 1); | |
258 | |
259 return $self->{'seq'} if($self->{'seq'}); | |
260 | |
261 if($self->adaptor()) { | |
262 my $seqAdaptor = $self->adaptor()->db()->get_SequenceAdaptor(); | |
263 my $reference_sequence = $seqAdaptor->fetch_by_Slice_start_end_strand($self,1,undef,1); #get the reference sequence for that slice | |
264 | |
265 #apply all differences to the reference sequence | |
266 #first, in case there are any indels, create the new sequence (containing the '-' bases) | |
267 # sort edits in reverse order to remove complication of | |
268 # adjusting downstream edits | |
269 my @indels_ordered = sort {$b->start() <=> $a->start()} @{$self->{'alignIndels'}} if (defined $self->{'alignIndels'}); | |
270 | |
271 foreach my $vf (@indels_ordered){ | |
272 $vf->apply_edit($reference_sequence); #change, in the reference sequence, the vf | |
273 } | |
274 | |
275 #need to find coverage information if diffe | |
276 # sort edits in reverse order to remove complication of | |
277 # adjusting downstream edits | |
278 my @variation_features_ordered = sort {$b->start() <=> $a->start()} @{$self->{'alleleFeatures'}} if (defined $self->{'alleleFeatures'}); | |
279 | |
280 foreach my $vf (@variation_features_ordered){ | |
281 $vf->apply_edit($reference_sequence); #change, in the reference sequence, the vf | |
282 } | |
283 | |
284 #need to find coverage information if different from reference | |
285 my $indAdaptor = $self->adaptor->db->get_db_adaptor('variation')->get_IndividualAdaptor; | |
286 my $ref_strain = $indAdaptor->get_reference_strain_name; | |
287 $self->_add_coverage_information($reference_sequence) if ($with_coverage == 1 && $self->strain_name ne $ref_strain); | |
288 return substr(${$reference_sequence},0,1) if ($self->length == 1); | |
289 return substr(${$reference_sequence},0,$self->expanded_length); #returns the reference sequence, applying the variationFeatures. Remove additional bases added due to indels | |
290 } | |
291 | |
292 # no attached sequence, and no db, so just return Ns | |
293 return 'N' x $self->length(); | |
294 } | |
295 | |
296 sub expanded_length() { | |
297 my $self = shift; | |
298 | |
299 my $length = $self->SUPER::length(); | |
300 | |
301 foreach my $af(@{$self->{'alleleFeatures'}}) { | |
302 $length += $af->length_diff() if $af->length_diff > 0; | |
303 } | |
304 | |
305 return $length; | |
306 } | |
307 | |
308 | |
309 | |
310 sub _add_coverage_information{ | |
311 my $self = shift; | |
312 my $reference_sequence = shift; | |
313 | |
314 my $variation_db = $self->adaptor->db->get_db_adaptor('variation'); | |
315 | |
316 unless($variation_db) { | |
317 warning("Variation database must be attached to core database to " . | |
318 "retrieve variation information" ); | |
319 return ''; | |
320 } | |
321 | |
322 my $rc_adaptor = $variation_db->get_ReadCoverageAdaptor(); | |
323 ### EK : - it looks like the arguments to fetch_all_by_Slice_Sample_depth have changed | |
324 ### passing 1 will only get you the coverage of level 1 | |
325 ### by omitting the parameter we take into account all coverage regions | |
326 # my $rcs = $rc_adaptor->fetch_all_by_Slice_Sample_depth($self,$self->{'_strain'},1); | |
327 my $rcs = $rc_adaptor->fetch_all_by_Slice_Sample_depth($self,$self->{'_strain'}); | |
328 my $rcs_sorted; | |
329 @{$rcs_sorted} = sort {$a->start <=> $b->start} @{$rcs} if ($self->strand == -1); | |
330 $rcs = $rcs_sorted if ($self->strand == -1); | |
331 my $start = 1; | |
332 | |
333 | |
334 # wm2 - new code to mask sequence, instead starts with masked string | |
335 # and unmasks seq where there is read coverage | |
336 | |
337 # get all length-changing vars | |
338 my @indels_ordered = sort {$a->start() <=> $b->start()} @{$self->{'alignIndels'}} if (defined $self->{'alignIndels'}); | |
339 | |
340 my $masked_seq = '~' x length($$reference_sequence); | |
341 | |
342 foreach my $rc(@{$rcs}) { | |
343 my ($start, $end) = ($rc->start, $rc->end); | |
344 | |
345 # adjust region for indels | |
346 foreach my $indel(@indels_ordered) { | |
347 next if $rc->start > $end; | |
348 | |
349 # if within RC region, only need adjust the end | |
350 $start += $indel->length_diff unless $indel->start > $start; | |
351 $end += $indel->length_diff; | |
352 } | |
353 | |
354 # adjust coords for seq boundaries | |
355 $start = 1 if $start < 1; | |
356 $end = CORE::length($masked_seq) if $end > CORE::length($masked_seq); | |
357 | |
358 # now unmask the sequence using $$reference_sequence | |
359 substr($masked_seq, $start - 1, $end - $start + 1) = substr($$reference_sequence, $start - 1, $end - $start + 1); | |
360 } | |
361 | |
362 # wm2 - old code, starts with sequence and masks regions between read coverage - BUGGY | |
363 # foreach my $rc (@{$rcs}){ | |
364 # $rc->start(1) if ($rc->start < 0); #if the region lies outside the boundaries of the slice | |
365 # $rc->end($self->end - $self->start + 1) if ($rc->end + $self->start > $self->end); | |
366 # | |
367 # warn "Adjusted: ", $rc->start, "-", $rc->end; | |
368 # | |
369 # warn "Covering from ", $start, " over ", ($rc->start - $start - 1), " bases"; | |
370 # | |
371 # substr($$reference_sequence, $start-1,($rc->start - $start - 1),'~' x ($rc->start - $start - 1)) if ($rc->start - 1 > $start); | |
372 # $start = $rc->end; | |
373 # | |
374 # } | |
375 # substr($$reference_sequence, $start, ($self->length - $start) ,'~' x ($self->length - $start)) if ($self->length -1 > $start); | |
376 | |
377 # copy the masked sequence to the reference sequence | |
378 $$reference_sequence = $masked_seq; | |
379 } | |
380 | |
381 | |
382 =head2 get_AlleleFeature | |
383 | |
384 Arg[1] : Bio::EnsEMBL::Variation::VariationFeature $vf | |
385 Example : my $af = $strainSlice->get_AlleleFeature($vf); | |
386 Description : Returns the AlleleFeature object associated with the VariationFeature (if any) | |
387 ReturnType : Bio::EnsEMBL::Variation::AlleleFeature | |
388 Exceptions : none | |
389 Caller : general | |
390 | |
391 =cut | |
392 | |
393 sub get_AlleleFeature{ | |
394 my $self = shift; | |
395 my $vf = shift; | |
396 | |
397 my $af; | |
398 #look at the hash containing the relation vf_id->alleleFeature, if present, return object, otherwise, undef | |
399 $af = $self->{'_vf_ids'}->{$vf->dbID} if (defined $self->{'_vf_ids'}->{$vf->dbID}); | |
400 return $af; | |
401 } | |
402 | |
403 | |
404 =head2 get_all_AlleleFeatures_Slice | |
405 | |
406 Arg[1] : int $with_coverage (optional) | |
407 Example : my $af = $strainSlice->get_all_AlleleFeatures_Slice() | |
408 Description : Gets all AlleleFeatures between the StrainSlice object and the Slice is defined. | |
409 If argument $with_coverage set to 1, returns only AF if they have coverage information | |
410 ReturnType : listref of Bio::EnsEMBL::Variation::AlleleFeature | |
411 Exceptions : none | |
412 Caller : general | |
413 | |
414 =cut | |
415 | |
416 sub get_all_AlleleFeatures_Slice{ | |
417 my $self = shift; | |
418 my $with_coverage = shift; | |
419 | |
420 my $variation_db = $self->adaptor->db->get_db_adaptor('variation'); | |
421 | |
422 unless($variation_db) { | |
423 warning("Variation database must be attached to core database to " . | |
424 "retrieve variation information" ); | |
425 return ''; | |
426 } | |
427 my $indAdaptor = $variation_db->get_IndividualAdaptor(); | |
428 my $ref_name = $indAdaptor->get_reference_strain_name; | |
429 return [] if ($self->strain_name eq $ref_name); | |
430 $with_coverage ||= 0; #by default, get all AlleleFeatures | |
431 if ($with_coverage == 1){ | |
432 my $new_allele_features = $self->_filter_af_by_coverage($self->{'alleleFeatures'}); | |
433 return $new_allele_features || []; | |
434 } | |
435 | |
436 return $self->{'alleleFeatures'} || []; | |
437 } | |
438 | |
439 =head2 get_all_differences_StrainSlice | |
440 | |
441 Arg[1] : Bio::EnsEMBL::StrainSlice $ss | |
442 Example : my $differences = $strainSlice->get_all_differences_StrainSlice($ss) | |
443 Description : Gets differences between 2 StrainSlice objects | |
444 ReturnType : listref of Bio::EnsEMBL::Variation::AlleleFeature | |
445 Exceptions : thrown on bad argument | |
446 Caller : general | |
447 | |
448 =cut | |
449 | |
450 sub get_all_differences_StrainSlice{ | |
451 my $self = shift; | |
452 my $strainSlice = shift; | |
453 | |
454 if (!ref($strainSlice) || !$strainSlice->isa('Bio::EnsEMBL::StrainSlice')){ | |
455 throw('Bio::EnsEMBL::StrainSlice arg expected'); | |
456 } | |
457 if ( @{$self->{'alleleFeatures'}} == 0 && @{$strainSlice->{'alleleFeatures'}} == 0){ | |
458 return undef; #there are no differences in any of the Strains | |
459 | |
460 } | |
461 my $differences; #differences between strains | |
462 if (@{$strainSlice->{'alleleFeatures'}} == 0){ | |
463 #need to create a copy of VariationFeature | |
464 foreach my $difference (@{$self->{'alleleFeatures'}}){ | |
465 my %vf = %$difference; | |
466 push @{$differences},bless \%vf,ref($difference); | |
467 } | |
468 } | |
469 elsif (@{$self->{'alleleFeatures'}} == 0){ | |
470 #need to create a copy of VariationFeature, but changing the allele by the allele in the reference | |
471 foreach my $difference (@{$strainSlice->{'alleleFeatures'}}){ | |
472 push @{$differences}, $strainSlice->_convert_difference($difference); | |
473 } | |
474 } | |
475 else{ | |
476 #both strains have differences | |
477 #create a hash with the differences in the self strain slice | |
478 my %variation_features_self = map {$_->start => $_} @{$self->{'alleleFeatures'}}; | |
479 foreach my $difference (@{$strainSlice->{'alleleFeatures'}}){ | |
480 #there is no difference in the other strain slice, convert the allele | |
481 if (!defined $variation_features_self{$difference->start}){ | |
482 push @{$differences},$strainSlice->_convert_difference($difference); | |
483 } | |
484 else{ | |
485 #if it is defined and have the same allele, delete from the hash | |
486 if ($variation_features_self{$difference->start}->allele_string eq $difference->allele_string){ | |
487 delete $variation_features_self{$difference->start}; | |
488 } | |
489 } | |
490 } | |
491 #and copy the differences that in the self | |
492 foreach my $difference (values %variation_features_self){ | |
493 my %vf = %$difference; | |
494 push @{$differences},bless \%vf,ref($difference); | |
495 } | |
496 | |
497 } | |
498 #need to map differences to the self | |
499 my $mapper = $self->mapper(); #now that we have the differences, map them in the StrainSlice | |
500 # print Dumper($mapper); | |
501 my @results; | |
502 foreach my $difference (@{$differences}){ | |
503 @results = $mapper->map_coordinates('Slice',$difference->start,$difference->end,$difference->strand,'Slice'); | |
504 #we can have 3 possibilities: | |
505 #the difference is an insertion and when mapping returns the boundaries of the insertion in the StrainSlice | |
506 if (@results == 2){ | |
507 #the first position in the result is the beginning of the insertion | |
508 if($results[0]->start < $results[1]->start){ | |
509 $difference->start($results[0]->end+1); | |
510 $difference->end($results[1]->start-1); | |
511 } | |
512 else{ | |
513 $difference->start($results[1]->end+1); | |
514 $difference->end($results[0]->start-1); | |
515 } | |
516 $difference->strand($results[0]->strand); | |
517 } | |
518 else{ | |
519 #it can be either a SNP or a deletion, and we have the coordinates in the result, etither a Bio::EnsEMBL::Mapper::Coordinate | |
520 # or a Bio::EnsEMBL::Mapper::IndelCoordinate | |
521 # print "Difference: ",$difference->start, "-", $difference->end,"strand ",$difference->strand,"\n"; | |
522 $difference->start($results[0]->start); | |
523 $difference->end($results[0]->end); | |
524 $difference->strand($results[0]->strand); | |
525 } | |
526 } | |
527 | |
528 return $differences; | |
529 } | |
530 | |
531 #for a given VariationFeatures, converts the allele into the reference allele and returns a new list with | |
532 #the converted VariationFeatures | |
533 sub _convert_difference{ | |
534 my $self = shift; | |
535 my $difference = shift; | |
536 my %new_vf = %$difference; #make a copy of the variationFeature | |
537 #and change the allele with the one from the reference Slice | |
538 $new_vf{'allele_string'} = $self->SUPER::subseq($difference->start,$difference->end,$difference->strand); | |
539 return bless \%new_vf,ref($difference); | |
540 } | |
541 | |
542 =head2 sub_Slice | |
543 | |
544 Arg 1 : int $start | |
545 Arg 2 : int $end | |
546 Arge [3] : int $strand | |
547 Example : none | |
548 Description: Makes another StrainSlice that covers only part of this slice | |
549 with the appropriate differences to the reference Slice | |
550 If a slice is requested which lies outside of the boundaries | |
551 of this function will return undef. This means that | |
552 behaviour will be consistant whether or not the slice is | |
553 attached to the database (i.e. if there is attached sequence | |
554 to the slice). Alternatively the expand() method or the | |
555 SliceAdaptor::fetch_by_region method can be used instead. | |
556 Returntype : Bio::EnsEMBL::StrainSlice or undef if arguments are wrong | |
557 Exceptions : thrown when trying to get the subSlice in the middle of a | |
558 insertion | |
559 Caller : general | |
560 | |
561 =cut | |
562 | |
563 sub sub_Slice { | |
564 my ( $self, $start, $end, $strand ) = @_; | |
565 my $mapper = $self->mapper(); | |
566 #finally map from the Slice to the Strain | |
567 my @results = $mapper->map_coordinates('StrainSlice',$start,$end,$strand,'StrainSlice'); | |
568 my $new_start; | |
569 my $new_end; | |
570 my $new_strand; | |
571 my $new_seq; | |
572 | |
573 #Get need start and end for the subSlice of the StrainSlice | |
574 my @results_ordered = sort {$a->start <=> $b->start} @results; | |
575 $new_start = $results_ordered[0]->start(); | |
576 $new_strand = $results_ordered[0]->strand() if (ref($results_ordered[0]) eq 'Bio::EnsEMBL::Mapper::Coordinate'); | |
577 $new_strand = $results_ordered[-1]->strand() if (ref($results_ordered[-1]) eq 'Bio::EnsEMBL::Mapper::Coordinate'); | |
578 $new_end = $results_ordered[-1]->end(); #get last element of the array, the end of the slice | |
579 | |
580 my $subSlice = $self->SUPER::sub_Slice($new_start,$new_end,$new_strand); | |
581 $subSlice->{'strain_name'} = $self->{'strain_name'}; | |
582 | |
583 my $new_variations; #reference to an array that will contain the variationFeatures in the new subSlice | |
584 #update the VariationFeatures in the sub_Slice of the Strain | |
585 my $vf_start; | |
586 my $vf_end; | |
587 my $offset = $subSlice->start - $self->start; | |
588 | |
589 foreach my $variationFeature (@{$self->{'alleleFeatures'}}){ | |
590 #calculate the new position of the variation_feature in the subSlice | |
591 $vf_start = $variationFeature->start - $offset; | |
592 $vf_end = $variationFeature->end - $offset; | |
593 if ($vf_start >= 1 and $vf_end <= $subSlice->length){ | |
594 #copy the variationFeature | |
595 my %new_vf; | |
596 %new_vf = %$variationFeature; | |
597 #and shift to the new coordinates | |
598 $new_vf{'start'} = $vf_start; | |
599 $new_vf{'end'} = $vf_end; | |
600 my $test = bless \%new_vf, ref($variationFeature); | |
601 push @{$new_variations}, $test; | |
602 } | |
603 } | |
604 $subSlice->{'alleleFeatures'} = $new_variations; | |
605 return $subSlice; | |
606 | |
607 } | |
608 | |
609 =head2 ref_subseq | |
610 | |
611 Arg [1] : int $startBasePair | |
612 relative to start of slice, which is 1. | |
613 Arg [2] : int $endBasePair | |
614 relative to start of slice. | |
615 Arg [3] : (optional) int $strand | |
616 The strand of the slice to obtain sequence from. Default | |
617 value is 1. | |
618 Description: returns string of dna from reference sequence | |
619 Returntype : txt | |
620 Exceptions : end should be at least as big as start | |
621 strand must be set | |
622 Caller : general | |
623 | |
624 =cut | |
625 | |
626 sub ref_subseq{ | |
627 my $self = shift; | |
628 my $start = shift; | |
629 my $end = shift; | |
630 my $strand = shift; | |
631 # special case for in-between (insert) coordinates | |
632 return '' if($start == $end + 1); | |
633 | |
634 my $subseq; | |
635 if($self->adaptor){ | |
636 my $seqAdaptor = $self->adaptor->db->get_SequenceAdaptor(); | |
637 $subseq = ${$seqAdaptor->fetch_by_Slice_start_end_strand | |
638 ( $self, $start, | |
639 $end, $strand )}; | |
640 } else { | |
641 ## check for gap at the beginning and pad it with Ns | |
642 if ($start < 1) { | |
643 $subseq = "N" x (1 - $start); | |
644 $start = 1; | |
645 } | |
646 $subseq .= substr ($self->seq(), $start-1, $end - $start + 1); | |
647 ## check for gap at the end and pad it with Ns | |
648 if ($end > $self->length()) { | |
649 $subseq .= "N" x ($end - $self->length()); | |
650 } | |
651 reverse_comp(\$subseq) if($strand == -1); | |
652 } | |
653 return $subseq; | |
654 } | |
655 | |
656 =head2 subseq | |
657 | |
658 Arg [1] : int $startBasePair | |
659 relative to start of slice, which is 1. | |
660 Arg [2] : int $endBasePair | |
661 relative to start of slice. | |
662 Arg [3] : (optional) int $strand | |
663 The strand of the slice to obtain sequence from. Default | |
664 value is 1. | |
665 Description: returns string of dna sequence | |
666 Returntype : txt | |
667 Exceptions : end should be at least as big as start | |
668 strand must be set | |
669 Caller : general | |
670 | |
671 =cut | |
672 | |
673 sub subseq { | |
674 my ( $self, $start, $end, $strand ) = @_; | |
675 | |
676 if ( $end+1 < $start ) { | |
677 throw("End coord + 1 is less than start coord"); | |
678 } | |
679 | |
680 # handle 'between' case for insertions | |
681 return '' if( $start == $end + 1); | |
682 | |
683 $strand = 1 unless(defined $strand); | |
684 | |
685 if ( $strand != -1 && $strand != 1 ) { | |
686 throw("Invalid strand [$strand] in call to Slice::subseq."); | |
687 } | |
688 | |
689 my $subseq; | |
690 my $seq; | |
691 if($self->adaptor){ | |
692 | |
693 | |
694 $seq = $self->seq; | |
695 reverse_comp(\$seq) if ($strand == -1); | |
696 $subseq = substr($seq,$start-1,$end - $start + 1); | |
697 } | |
698 else { | |
699 ## check for gap at the beginning and pad it with Ns | |
700 if ($start < 1) { | |
701 $subseq = "N" x (1 - $start); | |
702 $start = 1; | |
703 } | |
704 $subseq .= substr ($self->seq(), $start-1, $end - $start + 1); | |
705 ## check for gap at the end and pad it with Ns | |
706 if ($end > $self->length()) { | |
707 $subseq .= "N" x ($end - $self->length()); | |
708 } | |
709 reverse_comp(\$subseq) if($strand == -1); | |
710 } | |
711 return $subseq; | |
712 | |
713 } | |
714 | |
715 | |
716 sub mapper{ | |
717 my $self = shift; | |
718 | |
719 if (@_) { | |
720 delete $self->{'mapper'}; | |
721 } | |
722 if(!defined $self->{'mapper'}){ | |
723 #create the mapper between the Slice and StrainSlice | |
724 my $mapper = Bio::EnsEMBL::Mapper->new('Slice','StrainSlice'); | |
725 #align with Slice | |
726 #get all the VariationFeatures in the strain Slice, from start to end in the Slice | |
727 my @variation_features_ordered = sort {$a->start() <=> $b->start()} @{$self->{'alleleFeatures'}} if (defined $self->{'alleleFeatures'}); | |
728 | |
729 my $start_slice = 1; | |
730 my $end_slice; | |
731 my $start_strain = 1; | |
732 my $end_strain; | |
733 my $length_allele; | |
734 foreach my $variation_feature (@variation_features_ordered){ | |
735 #we have a insertion/deletion: marks the beginning of new slice move coordinates | |
736 if ($variation_feature->length_diff != 0){ | |
737 $length_allele = $variation_feature->length + $variation_feature->length_diff(); | |
738 $end_slice = $variation_feature->start() - 1; | |
739 | |
740 if ($end_slice >= $start_slice){ | |
741 $end_strain = $end_slice - $start_slice + $start_strain; | |
742 #add the sequence that maps | |
743 $mapper->add_map_coordinates('Slice',$start_slice,$end_slice,1,'StrainSlice',$start_strain,$end_strain); | |
744 #add the indel | |
745 $mapper->add_indel_coordinates('Slice',$end_slice+1,$end_slice + $variation_feature->length,1,'StrainSlice',$end_strain+1,$end_strain + $length_allele); | |
746 $start_strain = $end_strain + $length_allele + 1; | |
747 } | |
748 else{ | |
749 #add the indel | |
750 $mapper->add_indel_coordinates('Slice',$end_slice+1,$end_slice + $variation_feature->length,1,'StrainSlice',$end_strain+1,$end_strain + $length_allele); | |
751 $start_strain += $length_allele; | |
752 } | |
753 $start_slice = $end_slice + $variation_feature->length+ 1; | |
754 } | |
755 } | |
756 if ($start_slice <= $self->length){ | |
757 $mapper->add_map_coordinates('Slice',$start_slice,$self->length,1,'StrainSlice',$start_strain,$start_strain + $self->length - $start_slice); | |
758 } | |
759 $self->{'mapper'} = $mapper; | |
760 } | |
761 return $self->{'mapper'}; | |
762 } | |
763 | |
764 =head2 get_all_differences_Slice | |
765 | |
766 Description : DEPRECATED use get_all_AlleleFeatures instead | |
767 | |
768 =cut | |
769 | |
770 sub get_all_differences_Slice{ | |
771 my $self = shift; | |
772 | |
773 deprecate('Use get_all_differences_Slice instead'); | |
774 return $self->get_all_AlleleFeatures_Slice(@_); | |
775 } | |
776 | |
777 =head2 get_all_VariationFeatures | |
778 | |
779 Arg[1] : int $with_coverage (optional) | |
780 Description :returns all alleleFeatures features on this slice. | |
781 ReturnType : listref of Bio::EnsEMBL::Variation::AlleleFeature | |
782 Exceptions : none | |
783 Caller : contigview, snpview | |
784 | |
785 =cut | |
786 | |
787 sub get_all_VariationFeatures { | |
788 my $self = shift; | |
789 my $with_coverage = shift; | |
790 $with_coverage ||= 0; | |
791 return $self->get_all_AlleleFeatures_Slice($with_coverage); | |
792 } | |
793 | |
794 =head2 get_original_seq_region_position | |
795 | |
796 Arg [1] : int $position | |
797 relative to start of slice, which is 1. | |
798 Description: Placeholder method - this method has no explicit use beyond | |
799 providiing compatibility with AlignSlice. To map positions | |
800 between the StrainSlice and the reference slice, use the | |
801 mapper and its methods. | |
802 Returntype : ($strainSlice, $seq_region_position), an array where the first | |
803 element is a Bio::EnsEMBL::StrainSlice and the second one is the | |
804 requested seq_region_position. | |
805 Exceptions : none | |
806 Caller : general | |
807 | |
808 =cut | |
809 | |
810 sub get_original_seq_region_position { | |
811 my $self = shift; | |
812 my $position = shift; | |
813 #coordinates in the AlignSlice and Slice are the same, so far will return the same Slice | |
814 #and coordinate | |
815 return ($self,$position); | |
816 } | |
817 | |
818 | |
819 =head2 remove_indels | |
820 | |
821 Args : none | |
822 Example : $strainSlice->remove_indels(); | |
823 Description : Removes insertions and deletions from the allele features | |
824 of this object | |
825 ReturnType : none | |
826 Exceptions : none | |
827 Caller : webteam | |
828 | |
829 =cut | |
830 | |
831 sub remove_indels { | |
832 my $self = shift; | |
833 | |
834 my @new_afs = grep { $_->variation->var_class ne 'in-del' } @{$self->{'alleleFeatures'}}; | |
835 | |
836 $self->{'alleleFeatures'} = \@new_afs; | |
837 } | |
838 | |
839 1; |