Mercurial > repos > mahtabm > ensembl
comparison variant_effect_predictor/Bio/EnsEMBL/Variation/AlleleFeature.pm @ 0:1f6dce3d34e0
Uploaded
author | mahtabm |
---|---|
date | Thu, 11 Apr 2013 02:01:53 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:1f6dce3d34e0 |
---|---|
1 =head1 LICENSE | |
2 | |
3 Copyright (c) 1999-2012 The European Bioinformatics Institute and | |
4 Genome Research Limited. All rights reserved. | |
5 | |
6 This software is distributed under a modified Apache license. | |
7 For license details, please see | |
8 | |
9 http://www.ensembl.org/info/about/code_licence.html | |
10 | |
11 =head1 CONTACT | |
12 | |
13 Please email comments or questions to the public Ensembl | |
14 developers list at <dev@ensembl.org>. | |
15 | |
16 Questions may also be sent to the Ensembl help desk at | |
17 <helpdesk@ensembl.org>. | |
18 | |
19 =cut | |
20 | |
21 # Ensembl module for Bio::EnsEMBL::Variation::AlleleFeature | |
22 # | |
23 # Copyright (c) 2005 Ensembl | |
24 # | |
25 | |
26 | |
27 =head1 NAME | |
28 | |
29 Bio::EnsEMBL::Variation::AlleleFeature - A genomic position for an allele in a sample. | |
30 | |
31 =head1 SYNOPSIS | |
32 | |
33 # Allele feature representing a single nucleotide polymorphism | |
34 $af = Bio::EnsEMBL::Variation::AlleleFeature->new | |
35 (-start => 100, | |
36 -end => 100, | |
37 -strand => 1, | |
38 -slice => $slice, | |
39 -allele_string => 'A', | |
40 -variation_name => 'rs635421', | |
41 -variation => $v); | |
42 ... | |
43 | |
44 # a allele feature is like any other ensembl feature, can be | |
45 # transformed etc. | |
46 $af = $af->transform('supercontig'); | |
47 | |
48 print $af->start(), "-", $af->end(), '(', $af->strand(), ')', "\n"; | |
49 | |
50 print $af->name(), ":", $af->allele_string(); | |
51 | |
52 # Get the Variation object which this feature represents the genomic | |
53 # position of. If not already retrieved from the DB, this will be | |
54 # transparently lazy-loaded | |
55 my $v = $af->variation(); | |
56 | |
57 =head1 DESCRIPTION | |
58 | |
59 This is a class representing the genomic position of a allele in a sample | |
60 from the ensembl-variation database. The actual variation information is | |
61 represented by an associated Bio::EnsEMBL::Variation::Variation object. Some | |
62 of the information has been denormalized and is available on the feature for | |
63 speed purposes. A AlleleFeature behaves as any other Ensembl feature. | |
64 See B<Bio::EnsEMBL::Feature> and B<Bio::EnsEMBL::Variation::Variation>. | |
65 | |
66 =head1 METHODS | |
67 | |
68 =cut | |
69 | |
70 use strict; | |
71 use warnings; | |
72 | |
73 package Bio::EnsEMBL::Variation::AlleleFeature; | |
74 | |
75 use Bio::EnsEMBL::Feature; | |
76 use Bio::EnsEMBL::Utils::Exception qw(throw warning); | |
77 use Bio::EnsEMBL::Utils::Argument qw(rearrange); | |
78 use Bio::EnsEMBL::Variation::Utils::Sequence qw(unambiguity_code); | |
79 use Bio::EnsEMBL::Variation::ConsequenceType; | |
80 | |
81 our @ISA = ('Bio::EnsEMBL::Feature'); | |
82 | |
83 =head2 new | |
84 | |
85 Arg [-ADAPTOR] : | |
86 see superclass constructor | |
87 | |
88 Arg [-START] : | |
89 see superclass constructor | |
90 Arg [-END] : | |
91 see superclass constructor | |
92 | |
93 Arg [-STRAND] : | |
94 see superclass constructor | |
95 | |
96 Arg [-SLICE] : | |
97 see superclass constructor | |
98 | |
99 Arg [-VARIATION_NAME] : | |
100 string - the name of the variation this feature is for (denormalisation | |
101 from Variation object). | |
102 | |
103 Arg [-SOURCE] : | |
104 string - the name of the source where the SNP comes from | |
105 | |
106 Arg [-VARIATION] : | |
107 int - the variation object associated with this feature. | |
108 | |
109 Arg [-VARIATION_ID] : | |
110 int - the internal id of the variation object associated with this | |
111 identifier. This may be provided instead of a variation object so that | |
112 the variation may be lazy-loaded from the database on demand. | |
113 | |
114 Arg [-SAMPLE_ID] : | |
115 int - the internal id of the sample object associated with this | |
116 identifier. This may be provided instead of the object so that | |
117 the population/individual may be lazy-loaded from the database on demand. | |
118 | |
119 Arg [-ALLELE_STRING] : | |
120 string - the allele for this AlleleFeature object. | |
121 | |
122 Arg [-OVERLAP_CONSEQUENCES] : | |
123 listref of Bio::EnsEMBL::Variation::OverlapConsequence objects. | |
124 | |
125 Example : | |
126 $af = Bio::EnsEMBL::Variation::AlleleFeature->new | |
127 (-start => 100, | |
128 -end => 100, | |
129 -strand => 1, | |
130 -slice => $slice, | |
131 -allele_string => 'A', | |
132 -consequence_type => 'NON_SYNONYMOUS_CODING', | |
133 -variation_name => 'rs635421', | |
134 -source => 'Celera', | |
135 -sample_id => $sample_id, | |
136 -variation => $v); | |
137 | |
138 Description: Constructor. Instantiates a new AlleleFeature object. | |
139 Returntype : Bio::EnsEMBL::Variation::AlleleFeature | |
140 Exceptions : none | |
141 Caller : general | |
142 Status : At Risk | |
143 | |
144 =cut | |
145 | |
146 sub new { | |
147 my $caller = shift; | |
148 my $class = ref($caller) || $caller; | |
149 | |
150 my $self = $class->SUPER::new(@_); | |
151 my ($allele, $overlap_consequences, $var_name, $variation, $variation_id, $population, $sample_id, $source) = | |
152 rearrange([qw(ALLELE_STRING OVERLAP_CONSEQUENCES VARIATION_NAME | |
153 VARIATION VARIATION_ID SAMPLE_ID SOURCE)], @_); | |
154 | |
155 $self->{'allele_string'} = $allele; | |
156 $self->{'overlap_consequences'} = $overlap_consequences; | |
157 $self->{'variation_name'} = $var_name; | |
158 $self->{'variation'} = $variation; | |
159 $self->{'_variation_id'} = $variation_id; | |
160 $self->{'_sample_id'} = $sample_id; | |
161 $self->{'source'} = $source; | |
162 | |
163 return $self; | |
164 } | |
165 | |
166 | |
167 | |
168 sub new_fast { | |
169 my $class = shift; | |
170 my $hashref = shift; | |
171 return bless $hashref, $class; | |
172 } | |
173 | |
174 | |
175 =head2 allele_string | |
176 | |
177 Arg [1] : string $newval (optional) | |
178 The new value to set the allele attribute to | |
179 Example : $allele = $obj->allele_string() | |
180 Description: Getter/Setter for the allele attribute. | |
181 Returntype : string | |
182 Exceptions : none | |
183 Caller : general | |
184 Status : At Risk | |
185 | |
186 =cut | |
187 | |
188 sub allele_string{ | |
189 my $self = shift; | |
190 return $self->{'allele_string'} = shift if(@_); | |
191 | |
192 return $self->{'allele_string'} if ($self->{'_half_genotype'}); #for half genotypes | |
193 return join('|',split (//,unambiguity_code($self->{'allele_string'}))); #for heterozygous alleles | |
194 } | |
195 | |
196 | |
197 | |
198 =head2 consequence_type | |
199 | |
200 Arg [1] : (optional) String $term_type | |
201 Description: Get a list of all the unique consequence terms of this | |
202 AlleleFeature. By default returns Ensembl display terms | |
203 (e.g. 'NON_SYNONYMOUS_CODING'). $term_type can also be 'label' | |
204 (e.g. 'Non-synonymous coding'), 'SO' (Sequence Ontology, e.g. | |
205 'non_synonymous_codon') or 'NCBI' (e.g. 'missense') | |
206 Returntype : listref of strings | |
207 Exceptions : none | |
208 Status : At Risk | |
209 | |
210 =cut | |
211 | |
212 sub consequence_type { | |
213 | |
214 my $self = shift; | |
215 my $term_type = shift; | |
216 | |
217 if($self->_is_sara) { | |
218 return ['SARA']; | |
219 } | |
220 else { | |
221 delete $self->{consequence_type} if defined($term_type); | |
222 | |
223 unless ($self->{consequence_type}) { | |
224 | |
225 $term_type ||= 'SO'; | |
226 my $method_name = $term_type.($term_type eq 'label' ? '' : '_term'); | |
227 $method_name = 'SO_term' unless $self->most_severe_OverlapConsequence->can($method_name); | |
228 | |
229 # work out the terms from the OverlapConsequence objects | |
230 $self->{consequence_type} = | |
231 [ $self->_is_sara ? 'SARA' : map { $_->$method_name } @{ $self->get_all_OverlapConsequences } ]; | |
232 | |
233 return $self->{consequence_type}; | |
234 } | |
235 } | |
236 } | |
237 | |
238 | |
239 =head2 get_all_OverlapConsequences | |
240 | |
241 Description: Get a list of all the unique OverlapConsequences of this AlleleFeature | |
242 Returntype : listref of Bio::EnsEMBL::Variation::OverlapConsequence objects | |
243 Exceptions : none | |
244 Status : At Risk | |
245 | |
246 =cut | |
247 | |
248 sub get_all_OverlapConsequences { | |
249 my $self = shift; | |
250 return $self->{overlap_consequences} | |
251 } | |
252 | |
253 | |
254 =head2 most_severe_OverlapConsequence | |
255 | |
256 Description: Get the OverlapConsequence considered (by Ensembl) to be the most severe | |
257 consequence of all the alleles of this AlleleFeature | |
258 Returntype : Bio::EnsEMBL::Variation::OverlapConsequence | |
259 Exceptions : none | |
260 Status : At Risk | |
261 | |
262 =cut | |
263 | |
264 sub most_severe_OverlapConsequence { | |
265 my $self = shift; | |
266 | |
267 unless ($self->{_most_severe_consequence}) { | |
268 | |
269 my $highest; | |
270 | |
271 for my $cons (@{ $self->get_all_OverlapConsequences }) { | |
272 $highest ||= $cons; | |
273 if ($cons->rank < $highest->rank) { | |
274 $highest = $cons; | |
275 } | |
276 } | |
277 | |
278 $self->{_most_severe_consequence} = $highest; | |
279 } | |
280 | |
281 return $self->{_most_severe_consequence}; | |
282 } | |
283 | |
284 =head2 display_consequence | |
285 | |
286 Arg [1] : (optional) String $term_type | |
287 Description: Get the term for the most severe consequence of this | |
288 AlleleFeature. By default returns Ensembl display terms | |
289 (e.g. 'NON_SYNONYMOUS_CODING'). $term_type can also be 'label' | |
290 (e.g. 'Non-synonymous coding'), 'SO' (Sequence Ontology, e.g. | |
291 'non_synonymous_codon') or 'NCBI' (e.g. 'missense') | |
292 Returntype : string | |
293 Exceptions : none | |
294 Caller : webteam | |
295 Status : At Risk | |
296 | |
297 =cut | |
298 | |
299 sub display_consequence { | |
300 my $self = shift; | |
301 my $term_type = shift; | |
302 | |
303 if($self->_is_sara) { | |
304 return 'SARA'; | |
305 } | |
306 else { | |
307 $term_type ||= 'SO'; | |
308 my $method_name = $term_type.($term_type eq 'label' ? '' : '_term'); | |
309 $method_name = 'SO_term' unless $self->most_severe_OverlapConsequence->can($method_name); | |
310 | |
311 return $self->most_severe_OverlapConsequence->$method_name; | |
312 } | |
313 } | |
314 | |
315 | |
316 =head2 get_all_TranscriptVariations | |
317 | |
318 Arg [1] : (optional) listref of Bio::EnsEMBL::Transcript objects | |
319 Example : $af->get_all_TranscriptVariations; | |
320 Description : Get all the TranscriptVariations associated with this AlleleFeature. | |
321 If the optional list of Transcripts is supplied, get only TranscriptVariations | |
322 associated with those Transcripts. | |
323 Returntype : listref of Bio::EnsEMBL::Variation::TranscriptVariation objects | |
324 Exceptions : Thrown on wrong argument type | |
325 Caller : general | |
326 Status : At Risk | |
327 | |
328 =cut | |
329 | |
330 sub get_all_TranscriptVariations { | |
331 my $self = shift; | |
332 my $trs = shift; | |
333 | |
334 my $cons = $self->variation_feature->get_all_TranscriptVariations($trs); | |
335 | |
336 # convert the TV to a SARA one if this is a SARA genotype | |
337 if($self->_is_sara) { | |
338 $_->_convert_to_sara foreach @$cons; | |
339 } | |
340 | |
341 # otherwise we need to rearrange the TranscriptVariationAlleles based | |
342 # on the alleles of this genotype | |
343 else { | |
344 my %alleles; | |
345 $alleles{$_} = 1 foreach split /\||\/|\\/, $self->allele_string; | |
346 | |
347 $_->_rearrange_alleles(\%alleles) foreach @$cons; | |
348 } | |
349 | |
350 return $cons; | |
351 } | |
352 | |
353 =head2 variation_name | |
354 | |
355 Arg [1] : string $newval (optional) | |
356 The new value to set the variation_name attribute to | |
357 Example : $variation_name = $obj->variation_name() | |
358 Description: Getter/Setter for the variation_name attribute. This is the | |
359 name of the variation associated with this feature. | |
360 Returntype : string | |
361 Exceptions : none | |
362 Caller : general | |
363 Status : At Risk | |
364 | |
365 =cut | |
366 | |
367 sub variation_name{ | |
368 my $self = shift; | |
369 return $self->{'variation_name'} = shift if(@_); | |
370 return $self->{'variation_name'}; | |
371 } | |
372 | |
373 =head2 variation | |
374 | |
375 Arg [1] : (optional) Bio::EnsEMBL::Variation::Variation $variation | |
376 Example : $v = $af->variation(); | |
377 Description: Getter/Setter for the variation associated with this feature. | |
378 If not set, and this AlleleFeature has an associated adaptor | |
379 an attempt will be made to lazy-load the variation from the | |
380 database. | |
381 Returntype : Bio::EnsEMBL::Variation::Variation | |
382 Exceptions : throw on incorrect argument | |
383 Caller : general | |
384 Status : At Risk | |
385 | |
386 =cut | |
387 | |
388 sub variation { | |
389 my $self = shift; | |
390 | |
391 if(@_) { | |
392 if(!ref($_[0]) || !$_[0]->isa('Bio::EnsEMBL::Variation::Variation')) { | |
393 throw("Bio::EnsEMBL::Variation::Variation argument expected"); | |
394 } | |
395 $self->{'variation'} = shift; | |
396 } | |
397 elsif(!defined($self->{'variation'}) && $self->{'adaptor'} && | |
398 defined($self->{'_variation_id'})) { | |
399 # lazy-load from database on demand | |
400 my $va = $self->{'adaptor'}->db()->get_VariationAdaptor(); | |
401 $self->{'variation'} = $va->fetch_by_dbID($self->{'_variation_id'}); | |
402 } | |
403 | |
404 return $self->{'variation'}; | |
405 } | |
406 | |
407 =head2 variation_feature | |
408 | |
409 Arg [1] : (optional) Bio::EnsEMBL::Variation::VariationFeature $vf | |
410 Example : $vf = $af->variation_feature(); | |
411 Description: Getter/Setter for the variation feature associated with this feature. | |
412 If not set, and this AlleleFeature has an associated adaptor | |
413 an attempt will be made to lazy-load the variation from the | |
414 database. | |
415 Returntype : Bio::EnsEMBL::Variation::VariationFeature | |
416 Exceptions : throw on incorrect argument | |
417 Caller : general | |
418 Status : At Risk | |
419 | |
420 =cut | |
421 | |
422 sub variation_feature { | |
423 my $self = shift; | |
424 | |
425 if(@_) { | |
426 if(!ref($_[0]) || !$_[0]->isa('Bio::EnsEMBL::Variation::VariationFeature')) { | |
427 throw("Bio::EnsEMBL::Variation::VariationFeature argument expected"); | |
428 } | |
429 $self->{'variation_feature'} = shift; | |
430 } | |
431 elsif(!defined($self->{'variation_feature'}) && $self->{'adaptor'} && | |
432 defined($self->{'_variation_feature_id'})) { | |
433 # lazy-load from database on demand | |
434 my $va = $self->{'adaptor'}->db()->get_VariationFeatureAdaptor(); | |
435 $self->{'variation_feature'} = $va->fetch_by_dbID($self->{'_variation_feature_id'}); | |
436 } | |
437 | |
438 return $self->{'variation_feature'}; | |
439 } | |
440 | |
441 =head2 individual | |
442 | |
443 Arg [1] : (optional) Bio::EnsEMBL::Variation::Individual $individual | |
444 Example : $p = $af->individual(); | |
445 Description: Getter/Setter for the individual associated with this feature. | |
446 If not set, and this AlleleFeature has an associated adaptor | |
447 an attempt will be made to lazy-load the individual from the | |
448 database. | |
449 Returntype : Bio::EnsEMBL::Variation::Individual | |
450 Exceptions : throw on incorrect argument | |
451 Caller : general | |
452 Status : At Risk | |
453 | |
454 =cut | |
455 | |
456 sub individual { | |
457 my $self = shift; | |
458 | |
459 if(@_) { | |
460 if(!ref($_[0]) || !$_[0]->isa('Bio::EnsEMBL::Variation::Individual')) { | |
461 throw("Bio::EnsEMBL::Variation::Individual argument expected"); | |
462 } | |
463 $self->{'individual'} = shift; | |
464 } | |
465 elsif(!defined($self->{'individual'}) && $self->{'adaptor'} && | |
466 defined($self->{'_sample_id'})) { | |
467 # lazy-load from database on demand | |
468 my $ia = $self->{'adaptor'}->db()->get_IndividualAdaptor(); | |
469 $self->{'individual'} = $ia->fetch_by_dbID($self->{'_sample_id'}); | |
470 if (!defined $self->{'individual'}){ | |
471 warning("AlleleFeature attached to Strain, not Individual"); | |
472 } | |
473 } | |
474 | |
475 return $self->{'individual'}; | |
476 } | |
477 | |
478 | |
479 =head2 apply_edit | |
480 | |
481 Arg [1] : reference to string $seqref | |
482 Arg [2] : int $start of the seq_ref | |
483 Example : $sequence = 'ACTGAATATTTAAGGCA'; | |
484 $af->apply_edit(\$sequence,$start); | |
485 print $sequence, "\n"; | |
486 Description: Applies this AlleleFeature directly to a sequence which is | |
487 passed by reference. | |
488 If either the start or end of this AlleleFeature are not defined | |
489 this function will not do anything to the passed sequence. | |
490 Returntype : reference to the same sequence that was passed in | |
491 Exceptions : none | |
492 Caller : Slice | |
493 Status : At Risk | |
494 | |
495 =cut | |
496 | |
497 sub apply_edit { | |
498 | |
499 my $self = shift; | |
500 my $seqref = shift; | |
501 | |
502 if(ref($seqref) ne 'SCALAR') { | |
503 throw("Reference to scalar argument expected"); | |
504 } | |
505 | |
506 if(!defined($self->{'start'}) || !defined($self->{'end'})) { | |
507 return $seqref; | |
508 } | |
509 | |
510 | |
511 my $len = $self->length; | |
512 my $as = $self->{'allele_string'}; | |
513 $as =~ s/\-//g; | |
514 | |
515 substr($$seqref, $self->{'start'}-1, $len) = $as; | |
516 | |
517 return $seqref; | |
518 | |
519 } | |
520 | |
521 =head2 length_diff | |
522 | |
523 Arg [1] : none | |
524 Example : my $diff = $af->length_diff(); | |
525 Description: Returns the difference in length caused by applying this | |
526 AlleleFeature to a sequence. This may be be negative (deletion), | |
527 positive (insertion) or 0 (replacement). | |
528 If either start or end are not defined 0 is returned. | |
529 Returntype : int | |
530 Exceptions : none | |
531 Caller : general | |
532 Status : At Risk | |
533 | |
534 =cut | |
535 | |
536 sub length_diff { | |
537 | |
538 my $self = shift; | |
539 | |
540 return 0 if(!defined($self->{'end'}) || !defined($self->{'start'})); | |
541 | |
542 return length($self->{'allele_string'}) - ($self->{'end'} - $self->{'start'} + 1) if ($self->{'allele_string'} ne '-'); | |
543 return 0 - ($self->{'end'} - $self->{'start'} +1) if ($self->{'allele_string'} eq '-'); #do we need the +1 in the distance ?? | |
544 | |
545 } | |
546 | |
547 | |
548 sub length { | |
549 my $self = shift; | |
550 return $self->{'end'} - $self->{'start'} + 1; | |
551 } | |
552 | |
553 =head2 source | |
554 | |
555 Arg [1] : string $source (optional) | |
556 The new value to set the source attribute to | |
557 Example : $source = $vf->source() | |
558 Description: Getter/Setter for the source attribute | |
559 Returntype : string | |
560 Exceptions : none | |
561 Caller : general | |
562 Status : At Risk | |
563 | |
564 =cut | |
565 | |
566 sub source{ | |
567 my $self = shift; | |
568 return $self->{'source'} = shift if(@_); | |
569 return $self->{'source'}; | |
570 } | |
571 | |
572 =head2 ref_allele_string | |
573 | |
574 Args : None | |
575 Example : $allele = $obj->ref_allele_string() | |
576 Description: Getter for the reference allele. | |
577 Returntype : string | |
578 Exceptions : none | |
579 Caller : general | |
580 Status : At Risk | |
581 | |
582 =cut | |
583 | |
584 sub ref_allele_string{ | |
585 my $self = shift; | |
586 | |
587 my $reference_allele; | |
588 if ( ref ($self->slice) eq 'Bio::EnsEMBL::Slice' ){ | |
589 #we already have the reference slice, so just get the sequence | |
590 $reference_allele = $self->seq; | |
591 } | |
592 else{ | |
593 #we have a Strain or IndividualSlice, get the reference sequence from the method | |
594 $reference_allele = $self->slice->ref_subseq($self->start,$self->end,$self->strand) || '-'; | |
595 } | |
596 | |
597 return $reference_allele; | |
598 } | |
599 | |
600 =head2 get_all_sources | |
601 | |
602 Args : none | |
603 Example : my @sources = @{$af->get_all_sources()}; | |
604 Description : returns a list of all the sources for this | |
605 AlleleFeature | |
606 ReturnType : reference to list of strings | |
607 Exceptions : none | |
608 Caller : general | |
609 Status : At Risk | |
610 : Variation database is under development. | |
611 =cut | |
612 | |
613 sub get_all_sources{ | |
614 my $self = shift; | |
615 | |
616 my @sources; | |
617 my %sources; | |
618 if ($self->{'adaptor'}){ | |
619 map {$sources{$_}++} @{$self->{'adaptor'}->get_all_synonym_sources($self)}; | |
620 $sources{$self->source}++; | |
621 @sources = keys %sources; | |
622 return \@sources; | |
623 } | |
624 return \@sources; | |
625 } | |
626 | |
627 sub _is_sara{ | |
628 my $self = shift; | |
629 | |
630 if(!defined($self->{_is_sara})) { | |
631 my $allele_string = $self->allele_string; | |
632 my $ref = $self->ref_allele_string; | |
633 | |
634 my $is_sara = 1; | |
635 | |
636 foreach my $a(split /\/|\||\\/, $allele_string) { | |
637 $is_sara = 0 if $ref !~ /$a/i; | |
638 } | |
639 | |
640 $self->{_is_sara} = $is_sara; | |
641 } | |
642 | |
643 return $self->{_is_sara}; | |
644 } | |
645 | |
646 1; |