comparison variant_effect_predictor/Bio/EnsEMBL/StrainSlice.pm @ 0:1f6dce3d34e0

Uploaded
author mahtabm
date Thu, 11 Apr 2013 02:01:53 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:1f6dce3d34e0
1 =head1 LICENSE
2
3 Copyright (c) 1999-2012 The European Bioinformatics Institute and
4 Genome Research Limited. All rights reserved.
5
6 This software is distributed under a modified Apache license.
7 For license details, please see
8
9 http://www.ensembl.org/info/about/code_licence.html
10
11 =head1 CONTACT
12
13 Please email comments or questions to the public Ensembl
14 developers list at <dev@ensembl.org>.
15
16 Questions may also be sent to the Ensembl help desk at
17 <helpdesk@ensembl.org>.
18
19 =cut
20
21 =head1 NAME
22
23 Bio::EnsEMBL::StrainSlice - SubClass of the Slice. Represents the slice
24 of the genome for a certain strain (applying the variations)
25
26 =head1 SYNOPSIS
27
28 $sa = $db->get_SliceAdaptor;
29
30 $slice =
31 $sa->fetch_by_region( 'chromosome', 'X', 1_000_000, 2_000_000 );
32
33 $strainSlice = $slice->get_by_strain($strain_name);
34
35 # get the sequence from the Strain Slice
36 my $seq = $strainSlice->seq();
37 print $seq;
38
39 # get allele features between this StrainSlice and the reference
40 my $afs = $strainSlice->get_all_AlleleFeatures_Slice();
41 foreach my $af ( @{$afs} ) {
42 print "AlleleFeature in position ", $af->start, "-", $af->end,
43 " in strain with allele ", $af->allele_string, "\n";
44 }
45
46 # compare a strain against another strain
47 my $strainSlice_2 = $slice->get_by_strain($strain_name_2);
48 my $differences =
49 $strainSlice->get_all_differences_StrainSlice($strainSlice_2);
50
51 foreach my $difference ( @{$differences} ) {
52 print "Difference in position ", $difference->start, "-",
53 $difference->end(), " in strain with allele ",
54 $difference->allele_string(), "\n";
55 }
56
57 =head1 DESCRIPTION
58
59 A StrainSlice object represents a region of a genome for a certain
60 strain. It can be used to retrieve sequence or features from a strain.
61
62 =head1 METHODS
63
64 =cut
65
66 package Bio::EnsEMBL::StrainSlice;
67 use vars qw(@ISA);
68 use strict;
69
70 use Bio::EnsEMBL::Utils::Argument qw(rearrange);
71 use Bio::EnsEMBL::Utils::Sequence qw(reverse_comp);
72 use Bio::EnsEMBL::Slice;
73 use Bio::EnsEMBL::Mapper;
74 use Bio::EnsEMBL::Utils::Exception qw(throw deprecate warning);
75
76 @ISA = qw(Bio::EnsEMBL::Slice);
77
78
79 =head2 new
80
81 Arg[1] : Bio::EnsEMBL::Slice $slice
82 Arg[2] : string $strain_name
83 Example : $strainSlice = Bio::EnsEMBL::StrainSlice->new(-.... => ,
84 -strain_name => $strain_name);
85 Description : Creates a new Bio::EnsEMBL::StrainSlice object that will contain a shallow copy of the
86 Slice object, plus additional information such as the Strain this Slice refers to
87 and listref of Bio::EnsEMBL::Variation::AlleleFeatures of differences with the
88 reference sequence
89 ReturnType : Bio::EnsEMBL::StrainSlice
90 Exceptions : none
91 Caller : general
92
93 =cut
94
95 sub new{
96 my $caller = shift;
97 my $class = ref($caller) || $caller;
98
99 my ($strain_name) = rearrange(['STRAIN_NAME'],@_);
100
101 my $self = $class->SUPER::new(@_);
102
103 $self->{'strain_name'} = $strain_name;
104
105 if(!$self->adaptor()) {
106 warning('Cannot get new StrainSlice features without attached adaptor');
107 return '';
108 }
109 my $variation_db = $self->adaptor->db->get_db_adaptor('variation');
110
111 unless($variation_db) {
112 warning("Variation database must be attached to core database to " .
113 "retrieve variation information" );
114 return '';
115 }
116
117 my $af_adaptor = $variation_db->get_AlleleFeatureAdaptor;
118
119 if( $af_adaptor ) {
120 #get the Individual for the given strain
121 my $ind_adaptor = $variation_db->get_IndividualAdaptor;
122
123 if ($ind_adaptor){
124 my $individual = shift @{$ind_adaptor->fetch_all_by_name($self->{'strain_name'})}; #the name should be unique for a strain
125 #check that the individua returned isin the database
126
127 if (defined $individual){
128 my $allele_features = $af_adaptor->fetch_all_by_Slice($self,$individual);
129 #warning("No strain genotype data available for Slice ".$self->name." and Strain ".$individual->name) if ! defined $allele_features->[0];
130 my $vf_ids = {}; #hash containing the relation vf_id->af
131 $self->{'_strain'} = $individual;
132 map {defined $_->{'_variation_feature_id'} ? $vf_ids->{$_->{'_variation_feature_id'}} = $_ : ''
133 } @{$allele_features};
134 # my $new_allele_features = $self->_filter_af_by_coverage($allele_features);
135 # $self->{'alleleFeatures'} = $new_allele_features;
136 $self->{'alleleFeatures'} = $allele_features || [];
137 $self->{'_vf_ids'} = $vf_ids;
138 return $self;
139 }
140 else{
141 warning("Strain ($self->{strain_name}) not in the database");
142 return $self;
143 }
144 }
145 else{
146 warning("Not possible to retrieve IndividualAdaptor from the variation database");
147 return '';
148 }
149 } else {
150 warning("Not possible to retrieve VariationFeatureAdaptor from variation database");
151 return '';
152 }
153 }
154
155 =head2 _filter_af_by_coverage
156
157 Arg [1] : listref to Bio::EnsEMBL::Variation::AlleleFeatures $allele_features
158 Example : my $new_list_allele_features = $strainSlice->_filter_af_by_coverage($allele_features);
159 Description : For a list of allele features, gets a new list where they are filter depending on coverage
160 ReturnType : listref of Bio::EnsEMBL::Variation::AlleleFeature
161 Exceptions : none
162 Caller : internal function
163
164 =cut
165
166 sub _filter_af_by_coverage{
167 my $self = shift;
168 my $allele_features = shift;
169
170 my $variation_db = $self->adaptor->db->get_db_adaptor('variation');
171
172 unless($variation_db) {
173 warning("Variation database must be attached to core database to " .
174 "retrieve variation information" );
175 return '';
176 }
177
178 my $rc_adaptor = $variation_db->get_ReadCoverageAdaptor();
179 #this is ugly, but ReadCoverage is always defined in the positive strand
180
181 ### EK : - it looks like the arguments to fetch_all_by_Slice_Sample_depth have changed
182 ### passing 1 will only get you the coverage of level 1
183 ### by omitting the parameter we take into account all coverage regions
184 # my $rcs = $rc_adaptor->fetch_all_by_Slice_Sample_depth($self,$self->{'_strain'},1);
185 my $rcs = $rc_adaptor->fetch_all_by_Slice_Sample_depth($self,$self->{'_strain'});
186 my $new_af;
187 foreach my $af (@{$allele_features}){
188 foreach my $rc (@{$rcs}){
189 if ($af->start <= $rc->end and $af->start >= $rc->start){
190 push @{$new_af}, $af;
191 last;
192 }
193 }
194 }
195
196 return $new_af;
197 }
198
199
200 =head2 strain_name
201
202 Arg [1] : (optional) string $strain_name
203 Example : my $strain_name = $strainSlice->strain_name();
204 Description : Getter/Setter for the name of the strain
205 ReturnType : string
206 Exceptions : none
207 Caller : general
208
209 =cut
210
211 sub strain_name{
212 my $self = shift;
213 if (@_){
214 $self->{'strain_name'} = shift @_;
215 }
216 return $self->{'strain_name'};
217 }
218
219
220 =head2 display_Slice_name
221
222 Args : none
223 Example : my $strain_name = $strainSlice->display_Slice_name();
224 Description : Getter for the name of the strain
225 ReturnType : string
226 Exceptions : none
227 Caller : webteam
228
229 =cut
230
231 sub display_Slice_name{
232 my $self = shift;
233
234 return $self->strain_name;
235 }
236
237 =head2 seq
238
239 Arg [1] : int $with_coverage (optional)
240 Example : print "SEQUENCE = ", $strainSlice->seq();
241 Description: Returns the sequence of the region represented by this
242 slice formatted as a string in the strain. If flag with_coverage
243 is set to 1, returns sequence if there is coverage in the region
244 Returntype : string
245 Exceptions : none
246 Caller : general
247
248 =cut
249
250 sub seq {
251 my $self = shift;
252 my $with_coverage = shift;
253
254 $with_coverage ||= 0;
255
256 # special case for in-between (insert) coordinates
257 return '' if($self->start() == $self->end() + 1);
258
259 return $self->{'seq'} if($self->{'seq'});
260
261 if($self->adaptor()) {
262 my $seqAdaptor = $self->adaptor()->db()->get_SequenceAdaptor();
263 my $reference_sequence = $seqAdaptor->fetch_by_Slice_start_end_strand($self,1,undef,1); #get the reference sequence for that slice
264
265 #apply all differences to the reference sequence
266 #first, in case there are any indels, create the new sequence (containing the '-' bases)
267 # sort edits in reverse order to remove complication of
268 # adjusting downstream edits
269 my @indels_ordered = sort {$b->start() <=> $a->start()} @{$self->{'alignIndels'}} if (defined $self->{'alignIndels'});
270
271 foreach my $vf (@indels_ordered){
272 $vf->apply_edit($reference_sequence); #change, in the reference sequence, the vf
273 }
274
275 #need to find coverage information if diffe
276 # sort edits in reverse order to remove complication of
277 # adjusting downstream edits
278 my @variation_features_ordered = sort {$b->start() <=> $a->start()} @{$self->{'alleleFeatures'}} if (defined $self->{'alleleFeatures'});
279
280 foreach my $vf (@variation_features_ordered){
281 $vf->apply_edit($reference_sequence); #change, in the reference sequence, the vf
282 }
283
284 #need to find coverage information if different from reference
285 my $indAdaptor = $self->adaptor->db->get_db_adaptor('variation')->get_IndividualAdaptor;
286 my $ref_strain = $indAdaptor->get_reference_strain_name;
287 $self->_add_coverage_information($reference_sequence) if ($with_coverage == 1 && $self->strain_name ne $ref_strain);
288 return substr(${$reference_sequence},0,1) if ($self->length == 1);
289 return substr(${$reference_sequence},0,$self->expanded_length); #returns the reference sequence, applying the variationFeatures. Remove additional bases added due to indels
290 }
291
292 # no attached sequence, and no db, so just return Ns
293 return 'N' x $self->length();
294 }
295
296 sub expanded_length() {
297 my $self = shift;
298
299 my $length = $self->SUPER::length();
300
301 foreach my $af(@{$self->{'alleleFeatures'}}) {
302 $length += $af->length_diff() if $af->length_diff > 0;
303 }
304
305 return $length;
306 }
307
308
309
310 sub _add_coverage_information{
311 my $self = shift;
312 my $reference_sequence = shift;
313
314 my $variation_db = $self->adaptor->db->get_db_adaptor('variation');
315
316 unless($variation_db) {
317 warning("Variation database must be attached to core database to " .
318 "retrieve variation information" );
319 return '';
320 }
321
322 my $rc_adaptor = $variation_db->get_ReadCoverageAdaptor();
323 ### EK : - it looks like the arguments to fetch_all_by_Slice_Sample_depth have changed
324 ### passing 1 will only get you the coverage of level 1
325 ### by omitting the parameter we take into account all coverage regions
326 # my $rcs = $rc_adaptor->fetch_all_by_Slice_Sample_depth($self,$self->{'_strain'},1);
327 my $rcs = $rc_adaptor->fetch_all_by_Slice_Sample_depth($self,$self->{'_strain'});
328 my $rcs_sorted;
329 @{$rcs_sorted} = sort {$a->start <=> $b->start} @{$rcs} if ($self->strand == -1);
330 $rcs = $rcs_sorted if ($self->strand == -1);
331 my $start = 1;
332
333
334 # wm2 - new code to mask sequence, instead starts with masked string
335 # and unmasks seq where there is read coverage
336
337 # get all length-changing vars
338 my @indels_ordered = sort {$a->start() <=> $b->start()} @{$self->{'alignIndels'}} if (defined $self->{'alignIndels'});
339
340 my $masked_seq = '~' x length($$reference_sequence);
341
342 foreach my $rc(@{$rcs}) {
343 my ($start, $end) = ($rc->start, $rc->end);
344
345 # adjust region for indels
346 foreach my $indel(@indels_ordered) {
347 next if $rc->start > $end;
348
349 # if within RC region, only need adjust the end
350 $start += $indel->length_diff unless $indel->start > $start;
351 $end += $indel->length_diff;
352 }
353
354 # adjust coords for seq boundaries
355 $start = 1 if $start < 1;
356 $end = CORE::length($masked_seq) if $end > CORE::length($masked_seq);
357
358 # now unmask the sequence using $$reference_sequence
359 substr($masked_seq, $start - 1, $end - $start + 1) = substr($$reference_sequence, $start - 1, $end - $start + 1);
360 }
361
362 # wm2 - old code, starts with sequence and masks regions between read coverage - BUGGY
363 # foreach my $rc (@{$rcs}){
364 # $rc->start(1) if ($rc->start < 0); #if the region lies outside the boundaries of the slice
365 # $rc->end($self->end - $self->start + 1) if ($rc->end + $self->start > $self->end);
366 #
367 # warn "Adjusted: ", $rc->start, "-", $rc->end;
368 #
369 # warn "Covering from ", $start, " over ", ($rc->start - $start - 1), " bases";
370 #
371 # substr($$reference_sequence, $start-1,($rc->start - $start - 1),'~' x ($rc->start - $start - 1)) if ($rc->start - 1 > $start);
372 # $start = $rc->end;
373 #
374 # }
375 # substr($$reference_sequence, $start, ($self->length - $start) ,'~' x ($self->length - $start)) if ($self->length -1 > $start);
376
377 # copy the masked sequence to the reference sequence
378 $$reference_sequence = $masked_seq;
379 }
380
381
382 =head2 get_AlleleFeature
383
384 Arg[1] : Bio::EnsEMBL::Variation::VariationFeature $vf
385 Example : my $af = $strainSlice->get_AlleleFeature($vf);
386 Description : Returns the AlleleFeature object associated with the VariationFeature (if any)
387 ReturnType : Bio::EnsEMBL::Variation::AlleleFeature
388 Exceptions : none
389 Caller : general
390
391 =cut
392
393 sub get_AlleleFeature{
394 my $self = shift;
395 my $vf = shift;
396
397 my $af;
398 #look at the hash containing the relation vf_id->alleleFeature, if present, return object, otherwise, undef
399 $af = $self->{'_vf_ids'}->{$vf->dbID} if (defined $self->{'_vf_ids'}->{$vf->dbID});
400 return $af;
401 }
402
403
404 =head2 get_all_AlleleFeatures_Slice
405
406 Arg[1] : int $with_coverage (optional)
407 Example : my $af = $strainSlice->get_all_AlleleFeatures_Slice()
408 Description : Gets all AlleleFeatures between the StrainSlice object and the Slice is defined.
409 If argument $with_coverage set to 1, returns only AF if they have coverage information
410 ReturnType : listref of Bio::EnsEMBL::Variation::AlleleFeature
411 Exceptions : none
412 Caller : general
413
414 =cut
415
416 sub get_all_AlleleFeatures_Slice{
417 my $self = shift;
418 my $with_coverage = shift;
419
420 my $variation_db = $self->adaptor->db->get_db_adaptor('variation');
421
422 unless($variation_db) {
423 warning("Variation database must be attached to core database to " .
424 "retrieve variation information" );
425 return '';
426 }
427 my $indAdaptor = $variation_db->get_IndividualAdaptor();
428 my $ref_name = $indAdaptor->get_reference_strain_name;
429 return [] if ($self->strain_name eq $ref_name);
430 $with_coverage ||= 0; #by default, get all AlleleFeatures
431 if ($with_coverage == 1){
432 my $new_allele_features = $self->_filter_af_by_coverage($self->{'alleleFeatures'});
433 return $new_allele_features || [];
434 }
435
436 return $self->{'alleleFeatures'} || [];
437 }
438
439 =head2 get_all_differences_StrainSlice
440
441 Arg[1] : Bio::EnsEMBL::StrainSlice $ss
442 Example : my $differences = $strainSlice->get_all_differences_StrainSlice($ss)
443 Description : Gets differences between 2 StrainSlice objects
444 ReturnType : listref of Bio::EnsEMBL::Variation::AlleleFeature
445 Exceptions : thrown on bad argument
446 Caller : general
447
448 =cut
449
450 sub get_all_differences_StrainSlice{
451 my $self = shift;
452 my $strainSlice = shift;
453
454 if (!ref($strainSlice) || !$strainSlice->isa('Bio::EnsEMBL::StrainSlice')){
455 throw('Bio::EnsEMBL::StrainSlice arg expected');
456 }
457 if ( @{$self->{'alleleFeatures'}} == 0 && @{$strainSlice->{'alleleFeatures'}} == 0){
458 return undef; #there are no differences in any of the Strains
459
460 }
461 my $differences; #differences between strains
462 if (@{$strainSlice->{'alleleFeatures'}} == 0){
463 #need to create a copy of VariationFeature
464 foreach my $difference (@{$self->{'alleleFeatures'}}){
465 my %vf = %$difference;
466 push @{$differences},bless \%vf,ref($difference);
467 }
468 }
469 elsif (@{$self->{'alleleFeatures'}} == 0){
470 #need to create a copy of VariationFeature, but changing the allele by the allele in the reference
471 foreach my $difference (@{$strainSlice->{'alleleFeatures'}}){
472 push @{$differences}, $strainSlice->_convert_difference($difference);
473 }
474 }
475 else{
476 #both strains have differences
477 #create a hash with the differences in the self strain slice
478 my %variation_features_self = map {$_->start => $_} @{$self->{'alleleFeatures'}};
479 foreach my $difference (@{$strainSlice->{'alleleFeatures'}}){
480 #there is no difference in the other strain slice, convert the allele
481 if (!defined $variation_features_self{$difference->start}){
482 push @{$differences},$strainSlice->_convert_difference($difference);
483 }
484 else{
485 #if it is defined and have the same allele, delete from the hash
486 if ($variation_features_self{$difference->start}->allele_string eq $difference->allele_string){
487 delete $variation_features_self{$difference->start};
488 }
489 }
490 }
491 #and copy the differences that in the self
492 foreach my $difference (values %variation_features_self){
493 my %vf = %$difference;
494 push @{$differences},bless \%vf,ref($difference);
495 }
496
497 }
498 #need to map differences to the self
499 my $mapper = $self->mapper(); #now that we have the differences, map them in the StrainSlice
500 # print Dumper($mapper);
501 my @results;
502 foreach my $difference (@{$differences}){
503 @results = $mapper->map_coordinates('Slice',$difference->start,$difference->end,$difference->strand,'Slice');
504 #we can have 3 possibilities:
505 #the difference is an insertion and when mapping returns the boundaries of the insertion in the StrainSlice
506 if (@results == 2){
507 #the first position in the result is the beginning of the insertion
508 if($results[0]->start < $results[1]->start){
509 $difference->start($results[0]->end+1);
510 $difference->end($results[1]->start-1);
511 }
512 else{
513 $difference->start($results[1]->end+1);
514 $difference->end($results[0]->start-1);
515 }
516 $difference->strand($results[0]->strand);
517 }
518 else{
519 #it can be either a SNP or a deletion, and we have the coordinates in the result, etither a Bio::EnsEMBL::Mapper::Coordinate
520 # or a Bio::EnsEMBL::Mapper::IndelCoordinate
521 # print "Difference: ",$difference->start, "-", $difference->end,"strand ",$difference->strand,"\n";
522 $difference->start($results[0]->start);
523 $difference->end($results[0]->end);
524 $difference->strand($results[0]->strand);
525 }
526 }
527
528 return $differences;
529 }
530
531 #for a given VariationFeatures, converts the allele into the reference allele and returns a new list with
532 #the converted VariationFeatures
533 sub _convert_difference{
534 my $self = shift;
535 my $difference = shift;
536 my %new_vf = %$difference; #make a copy of the variationFeature
537 #and change the allele with the one from the reference Slice
538 $new_vf{'allele_string'} = $self->SUPER::subseq($difference->start,$difference->end,$difference->strand);
539 return bless \%new_vf,ref($difference);
540 }
541
542 =head2 sub_Slice
543
544 Arg 1 : int $start
545 Arg 2 : int $end
546 Arge [3] : int $strand
547 Example : none
548 Description: Makes another StrainSlice that covers only part of this slice
549 with the appropriate differences to the reference Slice
550 If a slice is requested which lies outside of the boundaries
551 of this function will return undef. This means that
552 behaviour will be consistant whether or not the slice is
553 attached to the database (i.e. if there is attached sequence
554 to the slice). Alternatively the expand() method or the
555 SliceAdaptor::fetch_by_region method can be used instead.
556 Returntype : Bio::EnsEMBL::StrainSlice or undef if arguments are wrong
557 Exceptions : thrown when trying to get the subSlice in the middle of a
558 insertion
559 Caller : general
560
561 =cut
562
563 sub sub_Slice {
564 my ( $self, $start, $end, $strand ) = @_;
565 my $mapper = $self->mapper();
566 #finally map from the Slice to the Strain
567 my @results = $mapper->map_coordinates('StrainSlice',$start,$end,$strand,'StrainSlice');
568 my $new_start;
569 my $new_end;
570 my $new_strand;
571 my $new_seq;
572
573 #Get need start and end for the subSlice of the StrainSlice
574 my @results_ordered = sort {$a->start <=> $b->start} @results;
575 $new_start = $results_ordered[0]->start();
576 $new_strand = $results_ordered[0]->strand() if (ref($results_ordered[0]) eq 'Bio::EnsEMBL::Mapper::Coordinate');
577 $new_strand = $results_ordered[-1]->strand() if (ref($results_ordered[-1]) eq 'Bio::EnsEMBL::Mapper::Coordinate');
578 $new_end = $results_ordered[-1]->end(); #get last element of the array, the end of the slice
579
580 my $subSlice = $self->SUPER::sub_Slice($new_start,$new_end,$new_strand);
581 $subSlice->{'strain_name'} = $self->{'strain_name'};
582
583 my $new_variations; #reference to an array that will contain the variationFeatures in the new subSlice
584 #update the VariationFeatures in the sub_Slice of the Strain
585 my $vf_start;
586 my $vf_end;
587 my $offset = $subSlice->start - $self->start;
588
589 foreach my $variationFeature (@{$self->{'alleleFeatures'}}){
590 #calculate the new position of the variation_feature in the subSlice
591 $vf_start = $variationFeature->start - $offset;
592 $vf_end = $variationFeature->end - $offset;
593 if ($vf_start >= 1 and $vf_end <= $subSlice->length){
594 #copy the variationFeature
595 my %new_vf;
596 %new_vf = %$variationFeature;
597 #and shift to the new coordinates
598 $new_vf{'start'} = $vf_start;
599 $new_vf{'end'} = $vf_end;
600 my $test = bless \%new_vf, ref($variationFeature);
601 push @{$new_variations}, $test;
602 }
603 }
604 $subSlice->{'alleleFeatures'} = $new_variations;
605 return $subSlice;
606
607 }
608
609 =head2 ref_subseq
610
611 Arg [1] : int $startBasePair
612 relative to start of slice, which is 1.
613 Arg [2] : int $endBasePair
614 relative to start of slice.
615 Arg [3] : (optional) int $strand
616 The strand of the slice to obtain sequence from. Default
617 value is 1.
618 Description: returns string of dna from reference sequence
619 Returntype : txt
620 Exceptions : end should be at least as big as start
621 strand must be set
622 Caller : general
623
624 =cut
625
626 sub ref_subseq{
627 my $self = shift;
628 my $start = shift;
629 my $end = shift;
630 my $strand = shift;
631 # special case for in-between (insert) coordinates
632 return '' if($start == $end + 1);
633
634 my $subseq;
635 if($self->adaptor){
636 my $seqAdaptor = $self->adaptor->db->get_SequenceAdaptor();
637 $subseq = ${$seqAdaptor->fetch_by_Slice_start_end_strand
638 ( $self, $start,
639 $end, $strand )};
640 } else {
641 ## check for gap at the beginning and pad it with Ns
642 if ($start < 1) {
643 $subseq = "N" x (1 - $start);
644 $start = 1;
645 }
646 $subseq .= substr ($self->seq(), $start-1, $end - $start + 1);
647 ## check for gap at the end and pad it with Ns
648 if ($end > $self->length()) {
649 $subseq .= "N" x ($end - $self->length());
650 }
651 reverse_comp(\$subseq) if($strand == -1);
652 }
653 return $subseq;
654 }
655
656 =head2 subseq
657
658 Arg [1] : int $startBasePair
659 relative to start of slice, which is 1.
660 Arg [2] : int $endBasePair
661 relative to start of slice.
662 Arg [3] : (optional) int $strand
663 The strand of the slice to obtain sequence from. Default
664 value is 1.
665 Description: returns string of dna sequence
666 Returntype : txt
667 Exceptions : end should be at least as big as start
668 strand must be set
669 Caller : general
670
671 =cut
672
673 sub subseq {
674 my ( $self, $start, $end, $strand ) = @_;
675
676 if ( $end+1 < $start ) {
677 throw("End coord + 1 is less than start coord");
678 }
679
680 # handle 'between' case for insertions
681 return '' if( $start == $end + 1);
682
683 $strand = 1 unless(defined $strand);
684
685 if ( $strand != -1 && $strand != 1 ) {
686 throw("Invalid strand [$strand] in call to Slice::subseq.");
687 }
688
689 my $subseq;
690 my $seq;
691 if($self->adaptor){
692
693
694 $seq = $self->seq;
695 reverse_comp(\$seq) if ($strand == -1);
696 $subseq = substr($seq,$start-1,$end - $start + 1);
697 }
698 else {
699 ## check for gap at the beginning and pad it with Ns
700 if ($start < 1) {
701 $subseq = "N" x (1 - $start);
702 $start = 1;
703 }
704 $subseq .= substr ($self->seq(), $start-1, $end - $start + 1);
705 ## check for gap at the end and pad it with Ns
706 if ($end > $self->length()) {
707 $subseq .= "N" x ($end - $self->length());
708 }
709 reverse_comp(\$subseq) if($strand == -1);
710 }
711 return $subseq;
712
713 }
714
715
716 sub mapper{
717 my $self = shift;
718
719 if (@_) {
720 delete $self->{'mapper'};
721 }
722 if(!defined $self->{'mapper'}){
723 #create the mapper between the Slice and StrainSlice
724 my $mapper = Bio::EnsEMBL::Mapper->new('Slice','StrainSlice');
725 #align with Slice
726 #get all the VariationFeatures in the strain Slice, from start to end in the Slice
727 my @variation_features_ordered = sort {$a->start() <=> $b->start()} @{$self->{'alleleFeatures'}} if (defined $self->{'alleleFeatures'});
728
729 my $start_slice = 1;
730 my $end_slice;
731 my $start_strain = 1;
732 my $end_strain;
733 my $length_allele;
734 foreach my $variation_feature (@variation_features_ordered){
735 #we have a insertion/deletion: marks the beginning of new slice move coordinates
736 if ($variation_feature->length_diff != 0){
737 $length_allele = $variation_feature->length + $variation_feature->length_diff();
738 $end_slice = $variation_feature->start() - 1;
739
740 if ($end_slice >= $start_slice){
741 $end_strain = $end_slice - $start_slice + $start_strain;
742 #add the sequence that maps
743 $mapper->add_map_coordinates('Slice',$start_slice,$end_slice,1,'StrainSlice',$start_strain,$end_strain);
744 #add the indel
745 $mapper->add_indel_coordinates('Slice',$end_slice+1,$end_slice + $variation_feature->length,1,'StrainSlice',$end_strain+1,$end_strain + $length_allele);
746 $start_strain = $end_strain + $length_allele + 1;
747 }
748 else{
749 #add the indel
750 $mapper->add_indel_coordinates('Slice',$end_slice+1,$end_slice + $variation_feature->length,1,'StrainSlice',$end_strain+1,$end_strain + $length_allele);
751 $start_strain += $length_allele;
752 }
753 $start_slice = $end_slice + $variation_feature->length+ 1;
754 }
755 }
756 if ($start_slice <= $self->length){
757 $mapper->add_map_coordinates('Slice',$start_slice,$self->length,1,'StrainSlice',$start_strain,$start_strain + $self->length - $start_slice);
758 }
759 $self->{'mapper'} = $mapper;
760 }
761 return $self->{'mapper'};
762 }
763
764 =head2 get_all_differences_Slice
765
766 Description : DEPRECATED use get_all_AlleleFeatures instead
767
768 =cut
769
770 sub get_all_differences_Slice{
771 my $self = shift;
772
773 deprecate('Use get_all_differences_Slice instead');
774 return $self->get_all_AlleleFeatures_Slice(@_);
775 }
776
777 =head2 get_all_VariationFeatures
778
779 Arg[1] : int $with_coverage (optional)
780 Description :returns all alleleFeatures features on this slice.
781 ReturnType : listref of Bio::EnsEMBL::Variation::AlleleFeature
782 Exceptions : none
783 Caller : contigview, snpview
784
785 =cut
786
787 sub get_all_VariationFeatures {
788 my $self = shift;
789 my $with_coverage = shift;
790 $with_coverage ||= 0;
791 return $self->get_all_AlleleFeatures_Slice($with_coverage);
792 }
793
794 =head2 get_original_seq_region_position
795
796 Arg [1] : int $position
797 relative to start of slice, which is 1.
798 Description: Placeholder method - this method has no explicit use beyond
799 providiing compatibility with AlignSlice. To map positions
800 between the StrainSlice and the reference slice, use the
801 mapper and its methods.
802 Returntype : ($strainSlice, $seq_region_position), an array where the first
803 element is a Bio::EnsEMBL::StrainSlice and the second one is the
804 requested seq_region_position.
805 Exceptions : none
806 Caller : general
807
808 =cut
809
810 sub get_original_seq_region_position {
811 my $self = shift;
812 my $position = shift;
813 #coordinates in the AlignSlice and Slice are the same, so far will return the same Slice
814 #and coordinate
815 return ($self,$position);
816 }
817
818
819 =head2 remove_indels
820
821 Args : none
822 Example : $strainSlice->remove_indels();
823 Description : Removes insertions and deletions from the allele features
824 of this object
825 ReturnType : none
826 Exceptions : none
827 Caller : webteam
828
829 =cut
830
831 sub remove_indels {
832 my $self = shift;
833
834 my @new_afs = grep { $_->variation->var_class ne 'in-del' } @{$self->{'alleleFeatures'}};
835
836 $self->{'alleleFeatures'} = \@new_afs;
837 }
838
839 1;