0
|
1 =head1 LICENSE
|
|
2
|
|
3 Copyright (c) 1999-2012 The European Bioinformatics Institute and
|
|
4 Genome Research Limited. All rights reserved.
|
|
5
|
|
6 This software is distributed under a modified Apache license.
|
|
7 For license details, please see
|
|
8
|
|
9 http://www.ensembl.org/info/about/code_licence.html
|
|
10
|
|
11 =head1 CONTACT
|
|
12
|
|
13 Please email comments or questions to the public Ensembl
|
|
14 developers list at <dev@ensembl.org>.
|
|
15
|
|
16 Questions may also be sent to the Ensembl help desk at
|
|
17 <helpdesk@ensembl.org>.
|
|
18
|
|
19 =cut
|
|
20
|
|
21 =head1 NAME
|
|
22
|
|
23 Bio::EnsEMBL::IndividualSlice - SubClass of the Slice. Represents the
|
|
24 slice of the genome for a certain individual (applying the alleles for
|
|
25 this individual)
|
|
26
|
|
27 =head1 SYNOPSIS
|
|
28
|
|
29 $sa = $db->get_SliceAdaptor;
|
|
30
|
|
31 $slice =
|
|
32 $sa->fetch_by_region( 'chromosome', 'X', 1_000_000, 2_000_000 );
|
|
33
|
|
34 $individualSlice = $slice->get_by_Individual($individual_name);
|
|
35
|
|
36 # Get the sequence from the Individual Slice: will contain IUPAC codes
|
|
37 # for SNPs and Ensembl ambiguity codes for indels
|
|
38 my $seq = $individualSlice->seq();
|
|
39 print $seq;
|
|
40
|
|
41 # Get a subSlice of the Strain
|
|
42 my $subSlice_individual =
|
|
43 $individualSlice->sub_Slice( 5_000, 8_000, 1 )
|
|
44
|
|
45 # Compare two different individuals in the same Slice
|
|
46 my $sliceIndividual2 = $slice->get_by_Individual($individual_name2);
|
|
47 my $differences =
|
|
48 $individualSlice->get_all_differences_IndividualSlice(
|
|
49 $sliceIndividual2);
|
|
50
|
|
51 foreach my $af ( @{$differences} ) {
|
|
52 print
|
|
53 "There is a difference between $individual_name "
|
|
54 . "and $individual_name2 at ",
|
|
55 $af->start, "-", $af->end,
|
|
56 " with allele ", $af->allele_string(), "\n";
|
|
57 }
|
|
58
|
|
59 =head1 DESCRIPTION
|
|
60
|
|
61 A IndividualSlice object represents a region of a genome for a certain
|
|
62 individual. It can be used to retrieve sequence or features from a
|
|
63 individual.
|
|
64
|
|
65 =head1 METHODS
|
|
66
|
|
67 =cut
|
|
68
|
|
69 package Bio::EnsEMBL::IndividualSlice;
|
|
70 use vars qw(@ISA);
|
|
71 use strict;
|
|
72
|
|
73 use Bio::EnsEMBL::Utils::Argument qw(rearrange);
|
|
74 use Bio::EnsEMBL::Utils::Sequence qw(reverse_comp);
|
|
75 use Bio::EnsEMBL::Slice;
|
|
76 use Bio::EnsEMBL::Mapper;
|
|
77 use Bio::EnsEMBL::Utils::Exception qw(throw deprecate warning);
|
|
78
|
|
79 @ISA = qw(Bio::EnsEMBL::Slice);
|
|
80
|
|
81
|
|
82 =head2 new
|
|
83
|
|
84 Arg [1..N] : List of named arguments
|
|
85 Bio::EnsEMBL::CoordSystem COORD_SYSTEM
|
|
86 string SEQ_REGION_NAME,
|
|
87 int START,
|
|
88 int END,
|
|
89 string VERSION (optional, defaults to '')
|
|
90 int STRAND, (optional, defaults to 1)
|
|
91 Bio::EnsEMBL::DBSQL::SliceAdaptor ADAPTOR (optional)
|
|
92 Arg[N+1] : string $individual_name
|
|
93 Example : $individualSlice = Bio::EnsEMBL::IndividualSlice->new(-coord_system => $cs,
|
|
94 -start => 1,
|
|
95 -end => 10000,
|
|
96 -strand => 1,
|
|
97 -seq_region_name => 'X',
|
|
98 -seq_region_length => 12e6,
|
|
99 -individual_name => $individual_name);
|
|
100 Description : Creates a new Bio::EnsEMBL::IndividualSlice object that will contain a shallow copy of the
|
|
101 Slice object, plus additional information such as the individual this Slice refers to
|
|
102 and listref of Bio::EnsEMBL::Variation::AlleleFeatures of differences with the
|
|
103 reference sequence
|
|
104 ReturnType : Bio::EnsEMBL::IndividualSlice
|
|
105 Exceptions : none
|
|
106 Caller : general
|
|
107
|
|
108 =cut
|
|
109
|
|
110 sub new{
|
|
111 my $caller = shift;
|
|
112 my $class = ref($caller) || $caller;
|
|
113
|
|
114 #create the IndividualSlice object as the Slice, plus the individual attribute
|
|
115 my ($individual_name, $sample_id) = rearrange(['INDIVIDUAL', 'SAMPLE_ID'],@_);
|
|
116
|
|
117 my $self = $class->SUPER::new(@_);
|
|
118
|
|
119 $self->{'individual_name'} = $individual_name;
|
|
120 $self->{'sample_id'} = $sample_id;
|
|
121
|
|
122 return $self;
|
|
123
|
|
124 }
|
|
125
|
|
126 =head2 individual_name
|
|
127
|
|
128 Arg [1] : (optional) string $individual_name
|
|
129 Example : my $individual_name = $individualSlice->individual_name();
|
|
130 Description : Getter/Setter for the name of the individual in the slice
|
|
131 ReturnType : string
|
|
132 Exceptions : none
|
|
133 Caller : general
|
|
134
|
|
135 =cut
|
|
136
|
|
137 sub individual_name{
|
|
138 my $self = shift;
|
|
139 if (@_){
|
|
140 $self->{'individual_name'} = shift @_;
|
|
141 }
|
|
142 return $self->{'individual_name'};
|
|
143 }
|
|
144
|
|
145 =head2 seq
|
|
146
|
|
147 Arg [1] : none
|
|
148 Example : print "SEQUENCE = ", $strainSlice->seq();
|
|
149 Description: Returns the sequence of the region represented by this
|
|
150 StrainSlice formatted as a string.
|
|
151 Returntype : string
|
|
152 Exceptions : none
|
|
153 Caller : general
|
|
154
|
|
155 =cut
|
|
156
|
|
157 sub seq {
|
|
158 my $self = shift;
|
|
159
|
|
160 # special case for in-between (insert) coordinates
|
|
161 return '' if($self->start() == $self->end() + 1);
|
|
162
|
|
163 return $self->{'seq'} if($self->{'seq'});
|
|
164
|
|
165 if($self->adaptor()) {
|
|
166 my $seqAdaptor = $self->adaptor()->db()->get_SequenceAdaptor();
|
|
167 my $reference_sequence = $seqAdaptor->fetch_by_Slice_start_end_strand($self,1,undef,1); #get the reference sequence for that slice
|
|
168 #apply all differences to the reference sequence
|
|
169
|
|
170 # sort edits in reverse order to remove complication of
|
|
171 # adjusting downstream edits
|
|
172 my @allele_features_ordered = sort {$b->start() <=> $a->start() || $b->end() <=> $a->end()} @{$self->{'alleleFeatures'}} if (defined $self->{'alleleFeatures'});
|
|
173
|
|
174 foreach my $af (@allele_features_ordered){
|
|
175 $af->apply_edit($reference_sequence); #change, in the reference sequence, the af
|
|
176 }
|
|
177 # return substr(${$reference_sequence},0,1) if ($self->length == 1);
|
|
178 return ${$reference_sequence}; #returns the reference sequence, applying the alleleFeatures
|
|
179 }
|
|
180
|
|
181 # no attached sequence, and no db, so just return Ns
|
|
182 return 'N' x $self->length();
|
|
183 }
|
|
184
|
|
185 =head2 get_all_differences_Slice
|
|
186
|
|
187 Args : none
|
|
188 Example : my $differences = $individualSlice->get_all_differences_Slice()
|
|
189 Description : Gets all differences between the IndividualSlice object and the Slice is defined
|
|
190 ReturnType : listref of Bio::EnsEMBL::Variation::AlleleFeature
|
|
191 Exceptions : none
|
|
192 Caller : general
|
|
193
|
|
194 =cut
|
|
195
|
|
196 sub get_all_differences_Slice{
|
|
197 my $self = shift;
|
|
198 my $differences; #reference to the array with the differences between Slice and StrainSlice
|
|
199 my $ref_allele;
|
|
200 foreach my $difference (@{$self->{'alleleFeatures'}}){
|
|
201 if ($difference->length_diff == 0){
|
|
202 #the difference is a SNP, check if it is the same as the reference allele
|
|
203 $ref_allele = $self->SUPER::subseq($difference->start,$difference->end,$difference->strand);
|
|
204 $ref_allele = '-' if ($ref_allele eq '');
|
|
205 if ($ref_allele ne $difference->allele_string){
|
|
206 #when the alleleFeature is different from the reference allele, add to the differences list
|
|
207 push @{$differences},$difference;
|
|
208 }
|
|
209 }
|
|
210 else{
|
|
211 push @{$differences},$difference;
|
|
212 }
|
|
213 }
|
|
214
|
|
215 return $differences;
|
|
216
|
|
217 }
|
|
218
|
|
219 =head2 get_all_differences_IndividualSlice
|
|
220
|
|
221 Arg[1] : Bio::EnsEMBL::IndividualSlice $is
|
|
222 Example : my $differences = $individualSlice->get_all_differences_IndividualSlice($individualslice)
|
|
223 Description : Gets differences between 2 IndividualSlice objects
|
|
224 ReturnType : listref of Bio::EnsEMBL::Variation::AlleleFeature
|
|
225 Exceptions : thrown on bad argument
|
|
226 Caller : general
|
|
227
|
|
228 =cut
|
|
229
|
|
230 sub get_all_differences_IndividualSlice{
|
|
231 my $self = shift;
|
|
232 my $individualSlice = shift;
|
|
233
|
|
234 if (!ref($individualSlice) || !$individualSlice->isa('Bio::EnsEMBL::IndividualSlice')){
|
|
235 throw('Bio::EnsEMBL::IndividualSlice arg expected');
|
|
236 }
|
|
237 if ( @{$self->{'alleleFeatures'}} == 0 && @{$individualSlice->{'alleleFeatures'}} == 0){
|
|
238 return undef; #there are no differences in any of the Individuals
|
|
239
|
|
240 }
|
|
241 my $differences; #differences between individuals
|
|
242 if (@{$individualSlice->{'alleleFeatures'}} == 0){
|
|
243 #need to create a copy of alleleFeature for the first Individual
|
|
244 foreach my $difference (@{$self->{'alleleFeatures'}}){
|
|
245 my %vf = %$difference;
|
|
246 push @{$differences},bless \%vf,ref($difference);
|
|
247 }
|
|
248 }
|
|
249 elsif (@{$self->{'alleleFeatures'}} == 0){
|
|
250 #need to create a copy of AlleleFeature, but changing the allele by the allele in the reference sequence
|
|
251 foreach my $difference (@{$individualSlice->{'alleleFeatures'}}){
|
|
252 push @{$differences}, $individualSlice->_convert_difference($difference);
|
|
253 }
|
|
254 }
|
|
255 else{
|
|
256 #both individuals have differences
|
|
257 #create a hash with the differences in the first slice
|
|
258 my %allele_features_self = map {$_->start.'-'.$_->end => $_} @{$self->{'alleleFeatures'}};
|
|
259 foreach my $difference (@{$individualSlice->{'alleleFeatures'}}){
|
|
260 #there is no difference in the other individual slice, convert the allele
|
|
261 if (!defined $allele_features_self{$difference->start.'-'.$difference->end}){
|
|
262 push @{$differences},$individualSlice->_convert_difference($difference);
|
|
263 }
|
|
264 else{
|
|
265 #if it is defined and have the same allele, delete from the hash since it is not a difference
|
|
266 #between the individuals
|
|
267 if ($allele_features_self{$difference->start.'-'.$difference->end}->allele_string eq $difference->allele_string){
|
|
268 delete $allele_features_self{$difference->start.'-'.$difference->end};
|
|
269 }
|
|
270 }
|
|
271 }
|
|
272 #and finally, make a shallow copy of the differences in the first individual
|
|
273 foreach my $difference (values %allele_features_self){
|
|
274 my %vf = %$difference;
|
|
275 push @{$differences},bless \%vf,ref($difference);
|
|
276 }
|
|
277
|
|
278 }
|
|
279 #need to map differences to the first individual, self, since the coordinates are in the Slice coordinate system
|
|
280 my $mapper = $self->mapper(); #now that we have the differences, map them in the IndividualSlice
|
|
281 my @results;
|
|
282 foreach my $difference (@{$differences}){
|
|
283 @results = $mapper->map_coordinates('Slice',$difference->start,$difference->end,$difference->strand,'Slice');
|
|
284 #we can have 3 possibilities:
|
|
285 #the difference is an insertion and when mapping returns the boundaries of the insertion in the IndividualSlice
|
|
286 if (@results == 2){
|
|
287 #the first position in the result is the beginning of the insertion
|
|
288 if($results[0]->start < $results[1]->start){
|
|
289 $difference->start($results[0]->end+1);
|
|
290 $difference->end($results[1]->start-1);
|
|
291 }
|
|
292 else{
|
|
293 #it is the second position the beginning of the insertion
|
|
294 $difference->start($results[1]->end+1);
|
|
295 $difference->end($results[0]->start-1);
|
|
296 }
|
|
297 $difference->strand($results[0]->strand);
|
|
298 }
|
|
299 else{
|
|
300 #it can be either a SNP or a deletion, and we have the coordinates in the result, etither a Bio::EnsEMBL::Mapper::Coordinate
|
|
301 # or a Bio::EnsEMBL::Mapper::IndelCoordinate
|
|
302 $difference->start($results[0]->start);
|
|
303 $difference->end($results[0]->end);
|
|
304 $difference->strand($results[0]->strand);
|
|
305 }
|
|
306 }
|
|
307
|
|
308 return $differences;
|
|
309 }
|
|
310
|
|
311 #for a given AlleleFeature, converts the allele into the reference allele and returns
|
|
312 #the converted AlleleFeature
|
|
313
|
|
314 sub _convert_difference{
|
|
315 my $self = shift;
|
|
316 my $difference = shift;
|
|
317 my %new_af = %$difference; #make a copy of the alleleFeature
|
|
318 #and change the allele with the one from the reference Slice
|
|
319 $new_af{'allele_string'} = $self->SUPER::subseq($difference->start,$difference->end,$difference->strand);
|
|
320 return bless \%new_af,ref($difference);
|
|
321 }
|
|
322
|
|
323 =head2 mapper
|
|
324
|
|
325 Args : none
|
|
326 Description: Getter for the mapper between the between the IndividualSlice and the Slice it refers to.
|
|
327 It is done automatically when necessary to create subSlice or to get the differences between individuals
|
|
328 Returntype : Bio::EnsEMBL::Mapper
|
|
329 Exceptions : none
|
|
330 Caller : Internal function
|
|
331
|
|
332 =cut
|
|
333
|
|
334 sub mapper{
|
|
335 my $self = shift;
|
|
336
|
|
337 if (@_) {
|
|
338 #allow to create again the mapper
|
|
339 delete $self->{'mapper'};
|
|
340 }
|
|
341 if(!defined $self->{'mapper'}){
|
|
342 #create the mapper between the Slice and StrainSlice
|
|
343 my $mapper = Bio::EnsEMBL::Mapper->new('Slice','IndividualSlice');
|
|
344 #align with Slice
|
|
345 #get all the VariationFeatures in the Individual Slice, from start to end in the Slice
|
|
346 my @allele_features_ordered = sort {$a->start() <=> $b->start() || $b->end() <=> $a->end()} @{$self->{'alleleFeatures'}} if (defined $self->{'alleleFeatures'});
|
|
347
|
|
348 my $start_slice = 1;
|
|
349 my $end_slice;
|
|
350 my $start_individual = 1;
|
|
351 my $end_individual;
|
|
352 my $length_allele;
|
|
353 my $total_length_diff = 0;
|
|
354 #we will walk from left to right in the slice object, updating the start and end individual every time
|
|
355 #there is a new alleleFeature in the Individual
|
|
356 foreach my $allele_feature (@allele_features_ordered){
|
|
357 #we have a insertion/deletion: marks the beginning of new slice move coordinates
|
|
358 if ($allele_feature->length_diff != 0){
|
|
359 $total_length_diff += $allele_feature->length_diff;
|
|
360 $length_allele = $allele_feature->length + $allele_feature->length_diff(); #length of the allele in the Individual
|
|
361 $end_slice = $allele_feature->start() - 1; #set the end of the slice before the alleleFeature
|
|
362 if ($end_slice >= $start_slice){
|
|
363 #normal cases (not with gaps)
|
|
364 $end_individual = $end_slice - $start_slice + $start_individual; #set the end of the individual from the beginning plus the offset
|
|
365 #add the sequence that maps
|
|
366 $mapper->add_map_coordinates('Slice',$start_slice,$end_slice,1,'IndividualSlice',$start_individual,$end_individual);
|
|
367 #and add the indel
|
|
368 $mapper->add_indel_coordinates('Slice',$end_slice+1,$end_slice + $allele_feature->length,1,'IndividualSlice',$end_individual+1,$end_individual + $length_allele);
|
|
369 $start_individual = $end_individual + $length_allele + 1; #set the beginning of the individual after the allele
|
|
370 }
|
|
371 else{
|
|
372 #add the indel
|
|
373 $mapper->add_indel_coordinates('Slice',$end_slice+1,$end_slice + $allele_feature->length,1,'IndividualSlice',$end_individual+1,$end_individual + $length_allele);
|
|
374 $start_individual += $length_allele;
|
|
375 }
|
|
376 $start_slice = $end_slice + $allele_feature->length+ 1; #set the beginning of the slice after the variation feature
|
|
377 }
|
|
378 }
|
|
379 if ($start_slice <= $self->length){
|
|
380 #if we haven't reached the end of the IndividualSlice, add the final map coordinates between the individual and the slice
|
|
381 $mapper->add_map_coordinates('Slice',$start_slice,$self->length,1,'IndividualSlice',$start_individual,$start_individual + $self->length - $start_slice);
|
|
382 }
|
|
383
|
|
384 $mapper->add_map_coordinates('Slice', -$self->start+1, 0,1, 'IndividualSlice', -$self->start +1,0) if ($self->start > 0); #before individualSlice
|
|
385 $mapper->add_map_coordinates('Slice', $self->length + 1,$self->seq_region_length - ($self->length +1),1, 'IndividualSlice', $self->length + 1 + $total_length_diff,$self->seq_region_length + $total_length_diff - ($self->length +1) ) if ($self->length <= $self->seq_region_length); #after strainSlice
|
|
386 $self->{'mapper'} = $mapper;
|
|
387 }
|
|
388 return $self->{'mapper'};
|
|
389 }
|
|
390
|
|
391 =head2 sub_Slice
|
|
392
|
|
393 Arg 1 : int $start
|
|
394 Arg 2 : int $end
|
|
395 Arge [3] : int $strand
|
|
396 Example : none
|
|
397 Description: Makes another IndividualSlice that covers only part of this IndividualSlice
|
|
398 with the appropriate differences to the reference Slice
|
|
399 If a slice is requested which lies outside of the boundaries
|
|
400 of this function will return undef. This means that
|
|
401 behaviour will be consistant whether or not the slice is
|
|
402 attached to the database (i.e. if there is attached sequence
|
|
403 to the slice). Alternatively the expand() method or the
|
|
404 SliceAdaptor::fetch_by_region method can be used instead.
|
|
405 Returntype : Bio::EnsEMBL::IndividualSlice or undef if arguments are wrong
|
|
406 Exceptions : thrown when trying to get the subSlice in the middle of a
|
|
407 insertion
|
|
408 Caller : general
|
|
409
|
|
410 =cut
|
|
411
|
|
412 sub sub_Slice {
|
|
413 my ( $self, $start, $end, $strand ) = @_;
|
|
414 my $mapper = $self->mapper();
|
|
415 #map from the Individual to the Slice to get the sub_Slice, and then, apply the differences in the subSlice
|
|
416 my @results = $mapper->map_coordinates('IndividualSlice',$start,$end,$strand,'IndividualSlice');
|
|
417 my $new_start;
|
|
418 my $new_end;
|
|
419 my $new_strand;
|
|
420 my $new_seq;
|
|
421 #Get need start and end for the subSlice of the IndividualSlice
|
|
422 my @results_ordered = sort {$a->start <=> $b->start} grep {ref($_) eq 'Bio::EnsEMBL::Mapper::Coordinate'} @results;
|
|
423 $new_start = $results_ordered[0]->start();
|
|
424 $new_strand = $results_ordered[0]->strand() if (ref($results_ordered[0]) eq 'Bio::EnsEMBL::Mapper::Coordinate');
|
|
425 # $new_strand = $results_ordered[-1]->strand() if (ref($results_ordered[-1]) eq 'Bio::EnsEMBL::Mapper::Coordinate');
|
|
426 $new_end = $results_ordered[-1]->end(); #get last element of the array, the end of the slice
|
|
427
|
|
428 my $subSlice = $self->SUPER::sub_Slice($new_start,$new_end,$new_strand);
|
|
429 $subSlice->{'individual_name'} = $self->{'individual_name'};
|
|
430
|
|
431 my $new_alleles; #reference to an array that will contain the variationFeatures in the new subSlice
|
|
432 #update the VariationFeatures in the sub_Slice of the Individual
|
|
433 my %af;
|
|
434 my $new_allele_feature;
|
|
435 foreach my $alleleFeature (@{$self->{'alleleFeatures'}}){
|
|
436 $new_allele_feature = $alleleFeature->transfer($subSlice);
|
|
437 #only transfer the coordinates to the SubSlice that are within the boundaries
|
|
438 if ($new_allele_feature->start >= 1 && $new_allele_feature->end <= $subSlice->length){
|
|
439 push @{$new_alleles}, $new_allele_feature;
|
|
440 }
|
|
441 }
|
|
442 $subSlice->{'alleleFeatures'} = $new_alleles;
|
|
443 return $subSlice;
|
|
444
|
|
445 }
|
|
446
|
|
447 =head2 subseq
|
|
448
|
|
449 Arg [1] : int $startBasePair
|
|
450 relative to start of slice, which is 1.
|
|
451 Arg [2] : int $endBasePair
|
|
452 relative to start of slice.
|
|
453 Arg [3] : (optional) int $strand
|
|
454 The strand of the individual slice to obtain sequence from. Default
|
|
455 value is 1.
|
|
456 Description: returns string of dna sequence
|
|
457 Returntype : txt
|
|
458 Exceptions : end should be at least as big as start
|
|
459 strand must be set
|
|
460 Caller : general
|
|
461
|
|
462 =cut
|
|
463
|
|
464 sub subseq {
|
|
465 my ( $self, $start, $end, $strand ) = @_;
|
|
466
|
|
467 if ( $end+1 < $start ) {
|
|
468 throw("End coord + 1 is less than start coord");
|
|
469 }
|
|
470
|
|
471 # handle 'between' case for insertions
|
|
472 return '' if( $start == $end + 1);
|
|
473
|
|
474 $strand = 1 unless(defined $strand);
|
|
475
|
|
476 if ( $strand != -1 && $strand != 1 ) {
|
|
477 throw("Invalid strand [$strand] in call to Slice::subseq.");
|
|
478 }
|
|
479
|
|
480 my $subseq;
|
|
481 my $seq;
|
|
482 if($self->adaptor){
|
|
483 my $seqAdaptor = $self->adaptor()->db()->get_SequenceAdaptor();
|
|
484 $subseq = ${$seqAdaptor->fetch_by_Slice_start_end_strand($self,$start,$end,$strand)}; #get the reference sequence for that slice
|
|
485 #apply all differences to the reference sequence
|
|
486 # sort edits in reverse order to remove complication of
|
|
487 # adjusting downstream edits
|
|
488 my @allele_features_ordered = sort {$b->start() <=> $a->start() || $b->end() <=> $a->end()} @{$self->{'alleleFeatures'}} if (defined $self->{'alleleFeatures'});
|
|
489 my $af_start;
|
|
490 my $af_end;
|
|
491 foreach my $af (@allele_features_ordered){
|
|
492 if (($af->start - $start +1 > 0) && ($end - $af->end > 0)){
|
|
493 #save the current start and end of the alleleFeature before changing for apply_edit
|
|
494 $af_start = $af->start;
|
|
495 $af_end = $af->end;
|
|
496 #apply the difference if the feature is in the new slice
|
|
497 $af->start($af->start - $start +1);
|
|
498 $af->end($af->end - $start +1);
|
|
499 $af->apply_edit(\$subseq); #change, in the reference sequence, the af
|
|
500 #restore the initial values of alleleFeature start and end
|
|
501 $af->start($af_start);
|
|
502 $af->end($af_end);
|
|
503
|
|
504 }
|
|
505 }
|
|
506 }
|
|
507 else {
|
|
508 ## check for gap at the beginning and pad it with Ns
|
|
509 if ($start < 1) {
|
|
510 $subseq = "N" x (1 - $start);
|
|
511 $start = 1;
|
|
512 }
|
|
513 $subseq .= substr ($self->seq(), $start-1, $end - $start + 1);
|
|
514 ## check for gap at the end and pad it with Ns
|
|
515 if ($end > $self->length()) {
|
|
516 $subseq .= "N" x ($end - $self->length());
|
|
517 }
|
|
518 reverse_comp(\$subseq) if($strand == -1);
|
|
519 }
|
|
520 return $subseq;
|
|
521
|
|
522 }
|
|
523
|
|
524 =head2 get_all_Transcripts
|
|
525
|
|
526 Args : None
|
|
527 Example : @transcripts = @{$individualslice->get_all_Transcripts)};
|
|
528 Description: Gets all transcripts which overlap this Individual Slice. If you want to
|
|
529 specify a particular analysis or type, then you are better off
|
|
530 using get_all_Genes or get_all_Genes_by_type and iterating
|
|
531 through the transcripts of each gene.
|
|
532 Returntype : reference to a list of Bio::EnsEMBL::Transcripts
|
|
533 Exceptions : none
|
|
534 Caller : general
|
|
535
|
|
536 =cut
|
|
537
|
|
538 sub get_all_Transcripts {
|
|
539 my $self = shift;
|
|
540
|
|
541 my $transcripts = $self->SUPER::get_all_Transcripts(1);
|
|
542 $self->map_to_Individual($transcripts);
|
|
543
|
|
544 return $transcripts;
|
|
545 }
|
|
546
|
|
547
|
|
548 =head2 get_all_Exons
|
|
549
|
|
550 Arg [1] : (optional) string $dbtype
|
|
551 The dbtype of exons to obtain. This assumes that the db has
|
|
552 been added to the DBAdaptor under this name (using the
|
|
553 DBConnection::add_db_adaptor method).
|
|
554 Example : @exons = @{$individualSlice->get_all_Exons};
|
|
555 Description: Gets all exons which overlap this IndividualSlice. Note that these exons
|
|
556 will not be associated with any transcripts, so this may not
|
|
557 be terribly useful.
|
|
558 Returntype : reference to a list of Bio::EnsEMBL::Exons
|
|
559 Exceptions : none
|
|
560 Caller : general
|
|
561
|
|
562 =cut
|
|
563
|
|
564 sub get_all_Exons {
|
|
565 my $self = shift;
|
|
566 my $dbtype = shift;
|
|
567
|
|
568 my $exons = $self->SUPER::get_all_Exons($dbtype);
|
|
569 $self->map_to_Individual($exons); #map the exons to the Individual
|
|
570
|
|
571 return $exons;
|
|
572 }
|
|
573
|
|
574 =head2 get_all_Genes
|
|
575
|
|
576 Arg [1] : (optional) string $logic_name
|
|
577 The name of the analysis used to generate the genes to retrieve
|
|
578 Arg [2] : (optional) string $dbtype
|
|
579 The dbtype of genes to obtain. This assumes that the db has
|
|
580 been added to the DBAdaptor under this name (using the
|
|
581 DBConnection::add_db_adaptor method).
|
|
582 Example : @genes = @{$individualSlice->get_all_Genes};
|
|
583 Description: Retrieves all genes that overlap this slice.
|
|
584 Returntype : listref of Bio::EnsEMBL::Genes
|
|
585 Exceptions : none
|
|
586 Caller : none
|
|
587
|
|
588 =cut
|
|
589
|
|
590 sub get_all_Genes{
|
|
591 my ($self, $logic_name, $dbtype) = @_;
|
|
592
|
|
593 my $genes = $self->SUPER::get_all_Genes($logic_name, $dbtype, 1);
|
|
594
|
|
595 $self->map_to_Individual($genes);
|
|
596
|
|
597 foreach my $gene (@{$genes}){
|
|
598 $self->map_to_Individual($gene->get_all_Exons); #map the Exons to the Individual
|
|
599 $self->map_to_Individual($gene->get_all_Transcripts); #map the Transcripts to the Individual
|
|
600 }
|
|
601
|
|
602 return $genes;
|
|
603 }
|
|
604
|
|
605 =head2 map_to_Individual
|
|
606
|
|
607 Arg[1] : ref $features
|
|
608 Example : $individualSlice->map_to_Individual($exons);
|
|
609 Description : Gets the features from the Slice and maps it in the IndividualSlice, using the mapper
|
|
610 between Slice and IndividualSlice
|
|
611 ReturnType : None
|
|
612 Exceptions : None
|
|
613 Caller : general
|
|
614
|
|
615 =cut
|
|
616
|
|
617 sub map_to_Individual{
|
|
618 my $self = shift;
|
|
619 my $features = shift;
|
|
620
|
|
621 my $mapper = $self->mapper();
|
|
622 my (@results, @results_ordered, $new_start, $new_end, $new_strand);
|
|
623 #foreach of the transcripts, map them to the IndividualSlice and replace the Slice with the IndividualSlice
|
|
624 foreach my $feature (@{$features}){
|
|
625 $feature->slice($self); #replace the IndividualSlice as the Slice for this feature (the Slice plus the AlleleFeatures)
|
|
626 #map from the Slice to the Individual Slice
|
|
627 my @results = $mapper->map_coordinates('Slice',$feature->start,$feature->end,$feature->strand,'Slice');
|
|
628 #from the results, order them but filter out those that are not coordinates
|
|
629 @results_ordered = sort {$a->start <=> $b->start} grep {ref($_) eq 'Bio::EnsEMBL::Mapper::Coordinate'} @results;
|
|
630 $new_start = $results_ordered[0]->start();
|
|
631 $new_strand = $results_ordered[0]->strand();
|
|
632 $new_end = $results_ordered[-1]->end(); #get last element of the array, the end of the slice
|
|
633 $feature->start($new_start); #update new coordinates
|
|
634 $feature->end($new_end);
|
|
635 $feature->strand($new_strand);
|
|
636 }
|
|
637 }
|
|
638
|
|
639 sub alleleFeatures{
|
|
640 my $self = shift;
|
|
641 return $self->{'alleleFeatures'};
|
|
642 }
|
|
643
|
|
644 sub add_AlleleFeature{
|
|
645 my $self = shift;
|
|
646
|
|
647 if (@_){
|
|
648 if(!ref($_[0]) || !$_[0]->isa('Bio::EnsEMBL::Variation::AlleleFeature')) {
|
|
649 throw("Bio::EnsEMBL::Variation::AlleleFeature argument expected");
|
|
650 }
|
|
651 #add the alleleFeature to the individualSlice
|
|
652 push @{$self->{'alleleFeatures'}},shift;
|
|
653 }
|
|
654 }
|
|
655 1;
|