0
|
1 =head1 LICENSE
|
|
2
|
|
3 Copyright (c) 1999-2012 The European Bioinformatics Institute and
|
|
4 Genome Research Limited. All rights reserved.
|
|
5
|
|
6 This software is distributed under a modified Apache license.
|
|
7 For license details, please see
|
|
8
|
|
9 http://www.ensembl.org/info/about/code_licence.html
|
|
10
|
|
11 =head1 CONTACT
|
|
12
|
|
13 Please email comments or questions to the public Ensembl
|
|
14 developers list at <dev@ensembl.org>.
|
|
15
|
|
16 Questions may also be sent to the Ensembl help desk at
|
|
17 <helpdesk@ensembl.org>.
|
|
18
|
|
19 =cut
|
|
20
|
|
21 =head1 NAME
|
|
22
|
|
23 Bio::EnsEMBL::StrainSlice - SubClass of the Slice. Represents the slice
|
|
24 of the genome for a certain strain (applying the variations)
|
|
25
|
|
26 =head1 SYNOPSIS
|
|
27
|
|
28 $sa = $db->get_SliceAdaptor;
|
|
29
|
|
30 $slice =
|
|
31 $sa->fetch_by_region( 'chromosome', 'X', 1_000_000, 2_000_000 );
|
|
32
|
|
33 $strainSlice = $slice->get_by_strain($strain_name);
|
|
34
|
|
35 # get the sequence from the Strain Slice
|
|
36 my $seq = $strainSlice->seq();
|
|
37 print $seq;
|
|
38
|
|
39 # get allele features between this StrainSlice and the reference
|
|
40 my $afs = $strainSlice->get_all_AlleleFeatures_Slice();
|
|
41 foreach my $af ( @{$afs} ) {
|
|
42 print "AlleleFeature in position ", $af->start, "-", $af->end,
|
|
43 " in strain with allele ", $af->allele_string, "\n";
|
|
44 }
|
|
45
|
|
46 # compare a strain against another strain
|
|
47 my $strainSlice_2 = $slice->get_by_strain($strain_name_2);
|
|
48 my $differences =
|
|
49 $strainSlice->get_all_differences_StrainSlice($strainSlice_2);
|
|
50
|
|
51 foreach my $difference ( @{$differences} ) {
|
|
52 print "Difference in position ", $difference->start, "-",
|
|
53 $difference->end(), " in strain with allele ",
|
|
54 $difference->allele_string(), "\n";
|
|
55 }
|
|
56
|
|
57 =head1 DESCRIPTION
|
|
58
|
|
59 A StrainSlice object represents a region of a genome for a certain
|
|
60 strain. It can be used to retrieve sequence or features from a strain.
|
|
61
|
|
62 =head1 METHODS
|
|
63
|
|
64 =cut
|
|
65
|
|
66 package Bio::EnsEMBL::StrainSlice;
|
|
67 use vars qw(@ISA);
|
|
68 use strict;
|
|
69
|
|
70 use Bio::EnsEMBL::Utils::Argument qw(rearrange);
|
|
71 use Bio::EnsEMBL::Utils::Sequence qw(reverse_comp);
|
|
72 use Bio::EnsEMBL::Slice;
|
|
73 use Bio::EnsEMBL::Mapper;
|
|
74 use Bio::EnsEMBL::Utils::Exception qw(throw deprecate warning);
|
|
75
|
|
76 @ISA = qw(Bio::EnsEMBL::Slice);
|
|
77
|
|
78
|
|
79 =head2 new
|
|
80
|
|
81 Arg[1] : Bio::EnsEMBL::Slice $slice
|
|
82 Arg[2] : string $strain_name
|
|
83 Example : $strainSlice = Bio::EnsEMBL::StrainSlice->new(-.... => ,
|
|
84 -strain_name => $strain_name);
|
|
85 Description : Creates a new Bio::EnsEMBL::StrainSlice object that will contain a shallow copy of the
|
|
86 Slice object, plus additional information such as the Strain this Slice refers to
|
|
87 and listref of Bio::EnsEMBL::Variation::AlleleFeatures of differences with the
|
|
88 reference sequence
|
|
89 ReturnType : Bio::EnsEMBL::StrainSlice
|
|
90 Exceptions : none
|
|
91 Caller : general
|
|
92
|
|
93 =cut
|
|
94
|
|
95 sub new{
|
|
96 my $caller = shift;
|
|
97 my $class = ref($caller) || $caller;
|
|
98
|
|
99 my ($strain_name) = rearrange(['STRAIN_NAME'],@_);
|
|
100
|
|
101 my $self = $class->SUPER::new(@_);
|
|
102
|
|
103 $self->{'strain_name'} = $strain_name;
|
|
104
|
|
105 if(!$self->adaptor()) {
|
|
106 warning('Cannot get new StrainSlice features without attached adaptor');
|
|
107 return '';
|
|
108 }
|
|
109 my $variation_db = $self->adaptor->db->get_db_adaptor('variation');
|
|
110
|
|
111 unless($variation_db) {
|
|
112 warning("Variation database must be attached to core database to " .
|
|
113 "retrieve variation information" );
|
|
114 return '';
|
|
115 }
|
|
116
|
|
117 my $af_adaptor = $variation_db->get_AlleleFeatureAdaptor;
|
|
118
|
|
119 if( $af_adaptor ) {
|
|
120 #get the Individual for the given strain
|
|
121 my $ind_adaptor = $variation_db->get_IndividualAdaptor;
|
|
122
|
|
123 if ($ind_adaptor){
|
|
124 my $individual = shift @{$ind_adaptor->fetch_all_by_name($self->{'strain_name'})}; #the name should be unique for a strain
|
|
125 #check that the individua returned isin the database
|
|
126
|
|
127 if (defined $individual){
|
|
128 my $allele_features = $af_adaptor->fetch_all_by_Slice($self,$individual);
|
|
129 #warning("No strain genotype data available for Slice ".$self->name." and Strain ".$individual->name) if ! defined $allele_features->[0];
|
|
130 my $vf_ids = {}; #hash containing the relation vf_id->af
|
|
131 $self->{'_strain'} = $individual;
|
|
132 map {defined $_->{'_variation_feature_id'} ? $vf_ids->{$_->{'_variation_feature_id'}} = $_ : ''
|
|
133 } @{$allele_features};
|
|
134 # my $new_allele_features = $self->_filter_af_by_coverage($allele_features);
|
|
135 # $self->{'alleleFeatures'} = $new_allele_features;
|
|
136 $self->{'alleleFeatures'} = $allele_features || [];
|
|
137 $self->{'_vf_ids'} = $vf_ids;
|
|
138 return $self;
|
|
139 }
|
|
140 else{
|
|
141 warning("Strain ($self->{strain_name}) not in the database");
|
|
142 return $self;
|
|
143 }
|
|
144 }
|
|
145 else{
|
|
146 warning("Not possible to retrieve IndividualAdaptor from the variation database");
|
|
147 return '';
|
|
148 }
|
|
149 } else {
|
|
150 warning("Not possible to retrieve VariationFeatureAdaptor from variation database");
|
|
151 return '';
|
|
152 }
|
|
153 }
|
|
154
|
|
155 =head2 _filter_af_by_coverage
|
|
156
|
|
157 Arg [1] : listref to Bio::EnsEMBL::Variation::AlleleFeatures $allele_features
|
|
158 Example : my $new_list_allele_features = $strainSlice->_filter_af_by_coverage($allele_features);
|
|
159 Description : For a list of allele features, gets a new list where they are filter depending on coverage
|
|
160 ReturnType : listref of Bio::EnsEMBL::Variation::AlleleFeature
|
|
161 Exceptions : none
|
|
162 Caller : internal function
|
|
163
|
|
164 =cut
|
|
165
|
|
166 sub _filter_af_by_coverage{
|
|
167 my $self = shift;
|
|
168 my $allele_features = shift;
|
|
169
|
|
170 my $variation_db = $self->adaptor->db->get_db_adaptor('variation');
|
|
171
|
|
172 unless($variation_db) {
|
|
173 warning("Variation database must be attached to core database to " .
|
|
174 "retrieve variation information" );
|
|
175 return '';
|
|
176 }
|
|
177
|
|
178 my $rc_adaptor = $variation_db->get_ReadCoverageAdaptor();
|
|
179 #this is ugly, but ReadCoverage is always defined in the positive strand
|
|
180
|
|
181 ### EK : - it looks like the arguments to fetch_all_by_Slice_Sample_depth have changed
|
|
182 ### passing 1 will only get you the coverage of level 1
|
|
183 ### by omitting the parameter we take into account all coverage regions
|
|
184 # my $rcs = $rc_adaptor->fetch_all_by_Slice_Sample_depth($self,$self->{'_strain'},1);
|
|
185 my $rcs = $rc_adaptor->fetch_all_by_Slice_Sample_depth($self,$self->{'_strain'});
|
|
186 my $new_af;
|
|
187 foreach my $af (@{$allele_features}){
|
|
188 foreach my $rc (@{$rcs}){
|
|
189 if ($af->start <= $rc->end and $af->start >= $rc->start){
|
|
190 push @{$new_af}, $af;
|
|
191 last;
|
|
192 }
|
|
193 }
|
|
194 }
|
|
195
|
|
196 return $new_af;
|
|
197 }
|
|
198
|
|
199
|
|
200 =head2 strain_name
|
|
201
|
|
202 Arg [1] : (optional) string $strain_name
|
|
203 Example : my $strain_name = $strainSlice->strain_name();
|
|
204 Description : Getter/Setter for the name of the strain
|
|
205 ReturnType : string
|
|
206 Exceptions : none
|
|
207 Caller : general
|
|
208
|
|
209 =cut
|
|
210
|
|
211 sub strain_name{
|
|
212 my $self = shift;
|
|
213 if (@_){
|
|
214 $self->{'strain_name'} = shift @_;
|
|
215 }
|
|
216 return $self->{'strain_name'};
|
|
217 }
|
|
218
|
|
219
|
|
220 =head2 display_Slice_name
|
|
221
|
|
222 Args : none
|
|
223 Example : my $strain_name = $strainSlice->display_Slice_name();
|
|
224 Description : Getter for the name of the strain
|
|
225 ReturnType : string
|
|
226 Exceptions : none
|
|
227 Caller : webteam
|
|
228
|
|
229 =cut
|
|
230
|
|
231 sub display_Slice_name{
|
|
232 my $self = shift;
|
|
233
|
|
234 return $self->strain_name;
|
|
235 }
|
|
236
|
|
237 =head2 seq
|
|
238
|
|
239 Arg [1] : int $with_coverage (optional)
|
|
240 Example : print "SEQUENCE = ", $strainSlice->seq();
|
|
241 Description: Returns the sequence of the region represented by this
|
|
242 slice formatted as a string in the strain. If flag with_coverage
|
|
243 is set to 1, returns sequence if there is coverage in the region
|
|
244 Returntype : string
|
|
245 Exceptions : none
|
|
246 Caller : general
|
|
247
|
|
248 =cut
|
|
249
|
|
250 sub seq {
|
|
251 my $self = shift;
|
|
252 my $with_coverage = shift;
|
|
253
|
|
254 $with_coverage ||= 0;
|
|
255
|
|
256 # special case for in-between (insert) coordinates
|
|
257 return '' if($self->start() == $self->end() + 1);
|
|
258
|
|
259 return $self->{'seq'} if($self->{'seq'});
|
|
260
|
|
261 if($self->adaptor()) {
|
|
262 my $seqAdaptor = $self->adaptor()->db()->get_SequenceAdaptor();
|
|
263 my $reference_sequence = $seqAdaptor->fetch_by_Slice_start_end_strand($self,1,undef,1); #get the reference sequence for that slice
|
|
264
|
|
265 #apply all differences to the reference sequence
|
|
266 #first, in case there are any indels, create the new sequence (containing the '-' bases)
|
|
267 # sort edits in reverse order to remove complication of
|
|
268 # adjusting downstream edits
|
|
269 my @indels_ordered = sort {$b->start() <=> $a->start()} @{$self->{'alignIndels'}} if (defined $self->{'alignIndels'});
|
|
270
|
|
271 foreach my $vf (@indels_ordered){
|
|
272 $vf->apply_edit($reference_sequence); #change, in the reference sequence, the vf
|
|
273 }
|
|
274
|
|
275 #need to find coverage information if diffe
|
|
276 # sort edits in reverse order to remove complication of
|
|
277 # adjusting downstream edits
|
|
278 my @variation_features_ordered = sort {$b->start() <=> $a->start()} @{$self->{'alleleFeatures'}} if (defined $self->{'alleleFeatures'});
|
|
279
|
|
280 foreach my $vf (@variation_features_ordered){
|
|
281 $vf->apply_edit($reference_sequence); #change, in the reference sequence, the vf
|
|
282 }
|
|
283
|
|
284 #need to find coverage information if different from reference
|
|
285 my $indAdaptor = $self->adaptor->db->get_db_adaptor('variation')->get_IndividualAdaptor;
|
|
286 my $ref_strain = $indAdaptor->get_reference_strain_name;
|
|
287 $self->_add_coverage_information($reference_sequence) if ($with_coverage == 1 && $self->strain_name ne $ref_strain);
|
|
288 return substr(${$reference_sequence},0,1) if ($self->length == 1);
|
|
289 return substr(${$reference_sequence},0,$self->expanded_length); #returns the reference sequence, applying the variationFeatures. Remove additional bases added due to indels
|
|
290 }
|
|
291
|
|
292 # no attached sequence, and no db, so just return Ns
|
|
293 return 'N' x $self->length();
|
|
294 }
|
|
295
|
|
296 sub expanded_length() {
|
|
297 my $self = shift;
|
|
298
|
|
299 my $length = $self->SUPER::length();
|
|
300
|
|
301 foreach my $af(@{$self->{'alleleFeatures'}}) {
|
|
302 $length += $af->length_diff() if $af->length_diff > 0;
|
|
303 }
|
|
304
|
|
305 return $length;
|
|
306 }
|
|
307
|
|
308
|
|
309
|
|
310 sub _add_coverage_information{
|
|
311 my $self = shift;
|
|
312 my $reference_sequence = shift;
|
|
313
|
|
314 my $variation_db = $self->adaptor->db->get_db_adaptor('variation');
|
|
315
|
|
316 unless($variation_db) {
|
|
317 warning("Variation database must be attached to core database to " .
|
|
318 "retrieve variation information" );
|
|
319 return '';
|
|
320 }
|
|
321
|
|
322 my $rc_adaptor = $variation_db->get_ReadCoverageAdaptor();
|
|
323 ### EK : - it looks like the arguments to fetch_all_by_Slice_Sample_depth have changed
|
|
324 ### passing 1 will only get you the coverage of level 1
|
|
325 ### by omitting the parameter we take into account all coverage regions
|
|
326 # my $rcs = $rc_adaptor->fetch_all_by_Slice_Sample_depth($self,$self->{'_strain'},1);
|
|
327 my $rcs = $rc_adaptor->fetch_all_by_Slice_Sample_depth($self,$self->{'_strain'});
|
|
328 my $rcs_sorted;
|
|
329 @{$rcs_sorted} = sort {$a->start <=> $b->start} @{$rcs} if ($self->strand == -1);
|
|
330 $rcs = $rcs_sorted if ($self->strand == -1);
|
|
331 my $start = 1;
|
|
332
|
|
333
|
|
334 # wm2 - new code to mask sequence, instead starts with masked string
|
|
335 # and unmasks seq where there is read coverage
|
|
336
|
|
337 # get all length-changing vars
|
|
338 my @indels_ordered = sort {$a->start() <=> $b->start()} @{$self->{'alignIndels'}} if (defined $self->{'alignIndels'});
|
|
339
|
|
340 my $masked_seq = '~' x length($$reference_sequence);
|
|
341
|
|
342 foreach my $rc(@{$rcs}) {
|
|
343 my ($start, $end) = ($rc->start, $rc->end);
|
|
344
|
|
345 # adjust region for indels
|
|
346 foreach my $indel(@indels_ordered) {
|
|
347 next if $rc->start > $end;
|
|
348
|
|
349 # if within RC region, only need adjust the end
|
|
350 $start += $indel->length_diff unless $indel->start > $start;
|
|
351 $end += $indel->length_diff;
|
|
352 }
|
|
353
|
|
354 # adjust coords for seq boundaries
|
|
355 $start = 1 if $start < 1;
|
|
356 $end = CORE::length($masked_seq) if $end > CORE::length($masked_seq);
|
|
357
|
|
358 # now unmask the sequence using $$reference_sequence
|
|
359 substr($masked_seq, $start - 1, $end - $start + 1) = substr($$reference_sequence, $start - 1, $end - $start + 1);
|
|
360 }
|
|
361
|
|
362 # wm2 - old code, starts with sequence and masks regions between read coverage - BUGGY
|
|
363 # foreach my $rc (@{$rcs}){
|
|
364 # $rc->start(1) if ($rc->start < 0); #if the region lies outside the boundaries of the slice
|
|
365 # $rc->end($self->end - $self->start + 1) if ($rc->end + $self->start > $self->end);
|
|
366 #
|
|
367 # warn "Adjusted: ", $rc->start, "-", $rc->end;
|
|
368 #
|
|
369 # warn "Covering from ", $start, " over ", ($rc->start - $start - 1), " bases";
|
|
370 #
|
|
371 # substr($$reference_sequence, $start-1,($rc->start - $start - 1),'~' x ($rc->start - $start - 1)) if ($rc->start - 1 > $start);
|
|
372 # $start = $rc->end;
|
|
373 #
|
|
374 # }
|
|
375 # substr($$reference_sequence, $start, ($self->length - $start) ,'~' x ($self->length - $start)) if ($self->length -1 > $start);
|
|
376
|
|
377 # copy the masked sequence to the reference sequence
|
|
378 $$reference_sequence = $masked_seq;
|
|
379 }
|
|
380
|
|
381
|
|
382 =head2 get_AlleleFeature
|
|
383
|
|
384 Arg[1] : Bio::EnsEMBL::Variation::VariationFeature $vf
|
|
385 Example : my $af = $strainSlice->get_AlleleFeature($vf);
|
|
386 Description : Returns the AlleleFeature object associated with the VariationFeature (if any)
|
|
387 ReturnType : Bio::EnsEMBL::Variation::AlleleFeature
|
|
388 Exceptions : none
|
|
389 Caller : general
|
|
390
|
|
391 =cut
|
|
392
|
|
393 sub get_AlleleFeature{
|
|
394 my $self = shift;
|
|
395 my $vf = shift;
|
|
396
|
|
397 my $af;
|
|
398 #look at the hash containing the relation vf_id->alleleFeature, if present, return object, otherwise, undef
|
|
399 $af = $self->{'_vf_ids'}->{$vf->dbID} if (defined $self->{'_vf_ids'}->{$vf->dbID});
|
|
400 return $af;
|
|
401 }
|
|
402
|
|
403
|
|
404 =head2 get_all_AlleleFeatures_Slice
|
|
405
|
|
406 Arg[1] : int $with_coverage (optional)
|
|
407 Example : my $af = $strainSlice->get_all_AlleleFeatures_Slice()
|
|
408 Description : Gets all AlleleFeatures between the StrainSlice object and the Slice is defined.
|
|
409 If argument $with_coverage set to 1, returns only AF if they have coverage information
|
|
410 ReturnType : listref of Bio::EnsEMBL::Variation::AlleleFeature
|
|
411 Exceptions : none
|
|
412 Caller : general
|
|
413
|
|
414 =cut
|
|
415
|
|
416 sub get_all_AlleleFeatures_Slice{
|
|
417 my $self = shift;
|
|
418 my $with_coverage = shift;
|
|
419
|
|
420 my $variation_db = $self->adaptor->db->get_db_adaptor('variation');
|
|
421
|
|
422 unless($variation_db) {
|
|
423 warning("Variation database must be attached to core database to " .
|
|
424 "retrieve variation information" );
|
|
425 return '';
|
|
426 }
|
|
427 my $indAdaptor = $variation_db->get_IndividualAdaptor();
|
|
428 my $ref_name = $indAdaptor->get_reference_strain_name;
|
|
429 return [] if ($self->strain_name eq $ref_name);
|
|
430 $with_coverage ||= 0; #by default, get all AlleleFeatures
|
|
431 if ($with_coverage == 1){
|
|
432 my $new_allele_features = $self->_filter_af_by_coverage($self->{'alleleFeatures'});
|
|
433 return $new_allele_features || [];
|
|
434 }
|
|
435
|
|
436 return $self->{'alleleFeatures'} || [];
|
|
437 }
|
|
438
|
|
439 =head2 get_all_differences_StrainSlice
|
|
440
|
|
441 Arg[1] : Bio::EnsEMBL::StrainSlice $ss
|
|
442 Example : my $differences = $strainSlice->get_all_differences_StrainSlice($ss)
|
|
443 Description : Gets differences between 2 StrainSlice objects
|
|
444 ReturnType : listref of Bio::EnsEMBL::Variation::AlleleFeature
|
|
445 Exceptions : thrown on bad argument
|
|
446 Caller : general
|
|
447
|
|
448 =cut
|
|
449
|
|
450 sub get_all_differences_StrainSlice{
|
|
451 my $self = shift;
|
|
452 my $strainSlice = shift;
|
|
453
|
|
454 if (!ref($strainSlice) || !$strainSlice->isa('Bio::EnsEMBL::StrainSlice')){
|
|
455 throw('Bio::EnsEMBL::StrainSlice arg expected');
|
|
456 }
|
|
457 if ( @{$self->{'alleleFeatures'}} == 0 && @{$strainSlice->{'alleleFeatures'}} == 0){
|
|
458 return undef; #there are no differences in any of the Strains
|
|
459
|
|
460 }
|
|
461 my $differences; #differences between strains
|
|
462 if (@{$strainSlice->{'alleleFeatures'}} == 0){
|
|
463 #need to create a copy of VariationFeature
|
|
464 foreach my $difference (@{$self->{'alleleFeatures'}}){
|
|
465 my %vf = %$difference;
|
|
466 push @{$differences},bless \%vf,ref($difference);
|
|
467 }
|
|
468 }
|
|
469 elsif (@{$self->{'alleleFeatures'}} == 0){
|
|
470 #need to create a copy of VariationFeature, but changing the allele by the allele in the reference
|
|
471 foreach my $difference (@{$strainSlice->{'alleleFeatures'}}){
|
|
472 push @{$differences}, $strainSlice->_convert_difference($difference);
|
|
473 }
|
|
474 }
|
|
475 else{
|
|
476 #both strains have differences
|
|
477 #create a hash with the differences in the self strain slice
|
|
478 my %variation_features_self = map {$_->start => $_} @{$self->{'alleleFeatures'}};
|
|
479 foreach my $difference (@{$strainSlice->{'alleleFeatures'}}){
|
|
480 #there is no difference in the other strain slice, convert the allele
|
|
481 if (!defined $variation_features_self{$difference->start}){
|
|
482 push @{$differences},$strainSlice->_convert_difference($difference);
|
|
483 }
|
|
484 else{
|
|
485 #if it is defined and have the same allele, delete from the hash
|
|
486 if ($variation_features_self{$difference->start}->allele_string eq $difference->allele_string){
|
|
487 delete $variation_features_self{$difference->start};
|
|
488 }
|
|
489 }
|
|
490 }
|
|
491 #and copy the differences that in the self
|
|
492 foreach my $difference (values %variation_features_self){
|
|
493 my %vf = %$difference;
|
|
494 push @{$differences},bless \%vf,ref($difference);
|
|
495 }
|
|
496
|
|
497 }
|
|
498 #need to map differences to the self
|
|
499 my $mapper = $self->mapper(); #now that we have the differences, map them in the StrainSlice
|
|
500 # print Dumper($mapper);
|
|
501 my @results;
|
|
502 foreach my $difference (@{$differences}){
|
|
503 @results = $mapper->map_coordinates('Slice',$difference->start,$difference->end,$difference->strand,'Slice');
|
|
504 #we can have 3 possibilities:
|
|
505 #the difference is an insertion and when mapping returns the boundaries of the insertion in the StrainSlice
|
|
506 if (@results == 2){
|
|
507 #the first position in the result is the beginning of the insertion
|
|
508 if($results[0]->start < $results[1]->start){
|
|
509 $difference->start($results[0]->end+1);
|
|
510 $difference->end($results[1]->start-1);
|
|
511 }
|
|
512 else{
|
|
513 $difference->start($results[1]->end+1);
|
|
514 $difference->end($results[0]->start-1);
|
|
515 }
|
|
516 $difference->strand($results[0]->strand);
|
|
517 }
|
|
518 else{
|
|
519 #it can be either a SNP or a deletion, and we have the coordinates in the result, etither a Bio::EnsEMBL::Mapper::Coordinate
|
|
520 # or a Bio::EnsEMBL::Mapper::IndelCoordinate
|
|
521 # print "Difference: ",$difference->start, "-", $difference->end,"strand ",$difference->strand,"\n";
|
|
522 $difference->start($results[0]->start);
|
|
523 $difference->end($results[0]->end);
|
|
524 $difference->strand($results[0]->strand);
|
|
525 }
|
|
526 }
|
|
527
|
|
528 return $differences;
|
|
529 }
|
|
530
|
|
531 #for a given VariationFeatures, converts the allele into the reference allele and returns a new list with
|
|
532 #the converted VariationFeatures
|
|
533 sub _convert_difference{
|
|
534 my $self = shift;
|
|
535 my $difference = shift;
|
|
536 my %new_vf = %$difference; #make a copy of the variationFeature
|
|
537 #and change the allele with the one from the reference Slice
|
|
538 $new_vf{'allele_string'} = $self->SUPER::subseq($difference->start,$difference->end,$difference->strand);
|
|
539 return bless \%new_vf,ref($difference);
|
|
540 }
|
|
541
|
|
542 =head2 sub_Slice
|
|
543
|
|
544 Arg 1 : int $start
|
|
545 Arg 2 : int $end
|
|
546 Arge [3] : int $strand
|
|
547 Example : none
|
|
548 Description: Makes another StrainSlice that covers only part of this slice
|
|
549 with the appropriate differences to the reference Slice
|
|
550 If a slice is requested which lies outside of the boundaries
|
|
551 of this function will return undef. This means that
|
|
552 behaviour will be consistant whether or not the slice is
|
|
553 attached to the database (i.e. if there is attached sequence
|
|
554 to the slice). Alternatively the expand() method or the
|
|
555 SliceAdaptor::fetch_by_region method can be used instead.
|
|
556 Returntype : Bio::EnsEMBL::StrainSlice or undef if arguments are wrong
|
|
557 Exceptions : thrown when trying to get the subSlice in the middle of a
|
|
558 insertion
|
|
559 Caller : general
|
|
560
|
|
561 =cut
|
|
562
|
|
563 sub sub_Slice {
|
|
564 my ( $self, $start, $end, $strand ) = @_;
|
|
565 my $mapper = $self->mapper();
|
|
566 #finally map from the Slice to the Strain
|
|
567 my @results = $mapper->map_coordinates('StrainSlice',$start,$end,$strand,'StrainSlice');
|
|
568 my $new_start;
|
|
569 my $new_end;
|
|
570 my $new_strand;
|
|
571 my $new_seq;
|
|
572
|
|
573 #Get need start and end for the subSlice of the StrainSlice
|
|
574 my @results_ordered = sort {$a->start <=> $b->start} @results;
|
|
575 $new_start = $results_ordered[0]->start();
|
|
576 $new_strand = $results_ordered[0]->strand() if (ref($results_ordered[0]) eq 'Bio::EnsEMBL::Mapper::Coordinate');
|
|
577 $new_strand = $results_ordered[-1]->strand() if (ref($results_ordered[-1]) eq 'Bio::EnsEMBL::Mapper::Coordinate');
|
|
578 $new_end = $results_ordered[-1]->end(); #get last element of the array, the end of the slice
|
|
579
|
|
580 my $subSlice = $self->SUPER::sub_Slice($new_start,$new_end,$new_strand);
|
|
581 $subSlice->{'strain_name'} = $self->{'strain_name'};
|
|
582
|
|
583 my $new_variations; #reference to an array that will contain the variationFeatures in the new subSlice
|
|
584 #update the VariationFeatures in the sub_Slice of the Strain
|
|
585 my $vf_start;
|
|
586 my $vf_end;
|
|
587 my $offset = $subSlice->start - $self->start;
|
|
588
|
|
589 foreach my $variationFeature (@{$self->{'alleleFeatures'}}){
|
|
590 #calculate the new position of the variation_feature in the subSlice
|
|
591 $vf_start = $variationFeature->start - $offset;
|
|
592 $vf_end = $variationFeature->end - $offset;
|
|
593 if ($vf_start >= 1 and $vf_end <= $subSlice->length){
|
|
594 #copy the variationFeature
|
|
595 my %new_vf;
|
|
596 %new_vf = %$variationFeature;
|
|
597 #and shift to the new coordinates
|
|
598 $new_vf{'start'} = $vf_start;
|
|
599 $new_vf{'end'} = $vf_end;
|
|
600 my $test = bless \%new_vf, ref($variationFeature);
|
|
601 push @{$new_variations}, $test;
|
|
602 }
|
|
603 }
|
|
604 $subSlice->{'alleleFeatures'} = $new_variations;
|
|
605 return $subSlice;
|
|
606
|
|
607 }
|
|
608
|
|
609 =head2 ref_subseq
|
|
610
|
|
611 Arg [1] : int $startBasePair
|
|
612 relative to start of slice, which is 1.
|
|
613 Arg [2] : int $endBasePair
|
|
614 relative to start of slice.
|
|
615 Arg [3] : (optional) int $strand
|
|
616 The strand of the slice to obtain sequence from. Default
|
|
617 value is 1.
|
|
618 Description: returns string of dna from reference sequence
|
|
619 Returntype : txt
|
|
620 Exceptions : end should be at least as big as start
|
|
621 strand must be set
|
|
622 Caller : general
|
|
623
|
|
624 =cut
|
|
625
|
|
626 sub ref_subseq{
|
|
627 my $self = shift;
|
|
628 my $start = shift;
|
|
629 my $end = shift;
|
|
630 my $strand = shift;
|
|
631 # special case for in-between (insert) coordinates
|
|
632 return '' if($start == $end + 1);
|
|
633
|
|
634 my $subseq;
|
|
635 if($self->adaptor){
|
|
636 my $seqAdaptor = $self->adaptor->db->get_SequenceAdaptor();
|
|
637 $subseq = ${$seqAdaptor->fetch_by_Slice_start_end_strand
|
|
638 ( $self, $start,
|
|
639 $end, $strand )};
|
|
640 } else {
|
|
641 ## check for gap at the beginning and pad it with Ns
|
|
642 if ($start < 1) {
|
|
643 $subseq = "N" x (1 - $start);
|
|
644 $start = 1;
|
|
645 }
|
|
646 $subseq .= substr ($self->seq(), $start-1, $end - $start + 1);
|
|
647 ## check for gap at the end and pad it with Ns
|
|
648 if ($end > $self->length()) {
|
|
649 $subseq .= "N" x ($end - $self->length());
|
|
650 }
|
|
651 reverse_comp(\$subseq) if($strand == -1);
|
|
652 }
|
|
653 return $subseq;
|
|
654 }
|
|
655
|
|
656 =head2 subseq
|
|
657
|
|
658 Arg [1] : int $startBasePair
|
|
659 relative to start of slice, which is 1.
|
|
660 Arg [2] : int $endBasePair
|
|
661 relative to start of slice.
|
|
662 Arg [3] : (optional) int $strand
|
|
663 The strand of the slice to obtain sequence from. Default
|
|
664 value is 1.
|
|
665 Description: returns string of dna sequence
|
|
666 Returntype : txt
|
|
667 Exceptions : end should be at least as big as start
|
|
668 strand must be set
|
|
669 Caller : general
|
|
670
|
|
671 =cut
|
|
672
|
|
673 sub subseq {
|
|
674 my ( $self, $start, $end, $strand ) = @_;
|
|
675
|
|
676 if ( $end+1 < $start ) {
|
|
677 throw("End coord + 1 is less than start coord");
|
|
678 }
|
|
679
|
|
680 # handle 'between' case for insertions
|
|
681 return '' if( $start == $end + 1);
|
|
682
|
|
683 $strand = 1 unless(defined $strand);
|
|
684
|
|
685 if ( $strand != -1 && $strand != 1 ) {
|
|
686 throw("Invalid strand [$strand] in call to Slice::subseq.");
|
|
687 }
|
|
688
|
|
689 my $subseq;
|
|
690 my $seq;
|
|
691 if($self->adaptor){
|
|
692
|
|
693
|
|
694 $seq = $self->seq;
|
|
695 reverse_comp(\$seq) if ($strand == -1);
|
|
696 $subseq = substr($seq,$start-1,$end - $start + 1);
|
|
697 }
|
|
698 else {
|
|
699 ## check for gap at the beginning and pad it with Ns
|
|
700 if ($start < 1) {
|
|
701 $subseq = "N" x (1 - $start);
|
|
702 $start = 1;
|
|
703 }
|
|
704 $subseq .= substr ($self->seq(), $start-1, $end - $start + 1);
|
|
705 ## check for gap at the end and pad it with Ns
|
|
706 if ($end > $self->length()) {
|
|
707 $subseq .= "N" x ($end - $self->length());
|
|
708 }
|
|
709 reverse_comp(\$subseq) if($strand == -1);
|
|
710 }
|
|
711 return $subseq;
|
|
712
|
|
713 }
|
|
714
|
|
715
|
|
716 sub mapper{
|
|
717 my $self = shift;
|
|
718
|
|
719 if (@_) {
|
|
720 delete $self->{'mapper'};
|
|
721 }
|
|
722 if(!defined $self->{'mapper'}){
|
|
723 #create the mapper between the Slice and StrainSlice
|
|
724 my $mapper = Bio::EnsEMBL::Mapper->new('Slice','StrainSlice');
|
|
725 #align with Slice
|
|
726 #get all the VariationFeatures in the strain Slice, from start to end in the Slice
|
|
727 my @variation_features_ordered = sort {$a->start() <=> $b->start()} @{$self->{'alleleFeatures'}} if (defined $self->{'alleleFeatures'});
|
|
728
|
|
729 my $start_slice = 1;
|
|
730 my $end_slice;
|
|
731 my $start_strain = 1;
|
|
732 my $end_strain;
|
|
733 my $length_allele;
|
|
734 foreach my $variation_feature (@variation_features_ordered){
|
|
735 #we have a insertion/deletion: marks the beginning of new slice move coordinates
|
|
736 if ($variation_feature->length_diff != 0){
|
|
737 $length_allele = $variation_feature->length + $variation_feature->length_diff();
|
|
738 $end_slice = $variation_feature->start() - 1;
|
|
739
|
|
740 if ($end_slice >= $start_slice){
|
|
741 $end_strain = $end_slice - $start_slice + $start_strain;
|
|
742 #add the sequence that maps
|
|
743 $mapper->add_map_coordinates('Slice',$start_slice,$end_slice,1,'StrainSlice',$start_strain,$end_strain);
|
|
744 #add the indel
|
|
745 $mapper->add_indel_coordinates('Slice',$end_slice+1,$end_slice + $variation_feature->length,1,'StrainSlice',$end_strain+1,$end_strain + $length_allele);
|
|
746 $start_strain = $end_strain + $length_allele + 1;
|
|
747 }
|
|
748 else{
|
|
749 #add the indel
|
|
750 $mapper->add_indel_coordinates('Slice',$end_slice+1,$end_slice + $variation_feature->length,1,'StrainSlice',$end_strain+1,$end_strain + $length_allele);
|
|
751 $start_strain += $length_allele;
|
|
752 }
|
|
753 $start_slice = $end_slice + $variation_feature->length+ 1;
|
|
754 }
|
|
755 }
|
|
756 if ($start_slice <= $self->length){
|
|
757 $mapper->add_map_coordinates('Slice',$start_slice,$self->length,1,'StrainSlice',$start_strain,$start_strain + $self->length - $start_slice);
|
|
758 }
|
|
759 $self->{'mapper'} = $mapper;
|
|
760 }
|
|
761 return $self->{'mapper'};
|
|
762 }
|
|
763
|
|
764 =head2 get_all_differences_Slice
|
|
765
|
|
766 Description : DEPRECATED use get_all_AlleleFeatures instead
|
|
767
|
|
768 =cut
|
|
769
|
|
770 sub get_all_differences_Slice{
|
|
771 my $self = shift;
|
|
772
|
|
773 deprecate('Use get_all_differences_Slice instead');
|
|
774 return $self->get_all_AlleleFeatures_Slice(@_);
|
|
775 }
|
|
776
|
|
777 =head2 get_all_VariationFeatures
|
|
778
|
|
779 Arg[1] : int $with_coverage (optional)
|
|
780 Description :returns all alleleFeatures features on this slice.
|
|
781 ReturnType : listref of Bio::EnsEMBL::Variation::AlleleFeature
|
|
782 Exceptions : none
|
|
783 Caller : contigview, snpview
|
|
784
|
|
785 =cut
|
|
786
|
|
787 sub get_all_VariationFeatures {
|
|
788 my $self = shift;
|
|
789 my $with_coverage = shift;
|
|
790 $with_coverage ||= 0;
|
|
791 return $self->get_all_AlleleFeatures_Slice($with_coverage);
|
|
792 }
|
|
793
|
|
794 =head2 get_original_seq_region_position
|
|
795
|
|
796 Arg [1] : int $position
|
|
797 relative to start of slice, which is 1.
|
|
798 Description: Placeholder method - this method has no explicit use beyond
|
|
799 providiing compatibility with AlignSlice. To map positions
|
|
800 between the StrainSlice and the reference slice, use the
|
|
801 mapper and its methods.
|
|
802 Returntype : ($strainSlice, $seq_region_position), an array where the first
|
|
803 element is a Bio::EnsEMBL::StrainSlice and the second one is the
|
|
804 requested seq_region_position.
|
|
805 Exceptions : none
|
|
806 Caller : general
|
|
807
|
|
808 =cut
|
|
809
|
|
810 sub get_original_seq_region_position {
|
|
811 my $self = shift;
|
|
812 my $position = shift;
|
|
813 #coordinates in the AlignSlice and Slice are the same, so far will return the same Slice
|
|
814 #and coordinate
|
|
815 return ($self,$position);
|
|
816 }
|
|
817
|
|
818
|
|
819 =head2 remove_indels
|
|
820
|
|
821 Args : none
|
|
822 Example : $strainSlice->remove_indels();
|
|
823 Description : Removes insertions and deletions from the allele features
|
|
824 of this object
|
|
825 ReturnType : none
|
|
826 Exceptions : none
|
|
827 Caller : webteam
|
|
828
|
|
829 =cut
|
|
830
|
|
831 sub remove_indels {
|
|
832 my $self = shift;
|
|
833
|
|
834 my @new_afs = grep { $_->variation->var_class ne 'in-del' } @{$self->{'alleleFeatures'}};
|
|
835
|
|
836 $self->{'alleleFeatures'} = \@new_afs;
|
|
837 }
|
|
838
|
|
839 1;
|