comparison variant_effect_predictor/Bio/EnsEMBL/Variation/AlleleFeature.pm @ 0:1f6dce3d34e0

Uploaded
author mahtabm
date Thu, 11 Apr 2013 02:01:53 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:1f6dce3d34e0
1 =head1 LICENSE
2
3 Copyright (c) 1999-2012 The European Bioinformatics Institute and
4 Genome Research Limited. All rights reserved.
5
6 This software is distributed under a modified Apache license.
7 For license details, please see
8
9 http://www.ensembl.org/info/about/code_licence.html
10
11 =head1 CONTACT
12
13 Please email comments or questions to the public Ensembl
14 developers list at <dev@ensembl.org>.
15
16 Questions may also be sent to the Ensembl help desk at
17 <helpdesk@ensembl.org>.
18
19 =cut
20
21 # Ensembl module for Bio::EnsEMBL::Variation::AlleleFeature
22 #
23 # Copyright (c) 2005 Ensembl
24 #
25
26
27 =head1 NAME
28
29 Bio::EnsEMBL::Variation::AlleleFeature - A genomic position for an allele in a sample.
30
31 =head1 SYNOPSIS
32
33 # Allele feature representing a single nucleotide polymorphism
34 $af = Bio::EnsEMBL::Variation::AlleleFeature->new
35 (-start => 100,
36 -end => 100,
37 -strand => 1,
38 -slice => $slice,
39 -allele_string => 'A',
40 -variation_name => 'rs635421',
41 -variation => $v);
42 ...
43
44 # a allele feature is like any other ensembl feature, can be
45 # transformed etc.
46 $af = $af->transform('supercontig');
47
48 print $af->start(), "-", $af->end(), '(', $af->strand(), ')', "\n";
49
50 print $af->name(), ":", $af->allele_string();
51
52 # Get the Variation object which this feature represents the genomic
53 # position of. If not already retrieved from the DB, this will be
54 # transparently lazy-loaded
55 my $v = $af->variation();
56
57 =head1 DESCRIPTION
58
59 This is a class representing the genomic position of a allele in a sample
60 from the ensembl-variation database. The actual variation information is
61 represented by an associated Bio::EnsEMBL::Variation::Variation object. Some
62 of the information has been denormalized and is available on the feature for
63 speed purposes. A AlleleFeature behaves as any other Ensembl feature.
64 See B<Bio::EnsEMBL::Feature> and B<Bio::EnsEMBL::Variation::Variation>.
65
66 =head1 METHODS
67
68 =cut
69
70 use strict;
71 use warnings;
72
73 package Bio::EnsEMBL::Variation::AlleleFeature;
74
75 use Bio::EnsEMBL::Feature;
76 use Bio::EnsEMBL::Utils::Exception qw(throw warning);
77 use Bio::EnsEMBL::Utils::Argument qw(rearrange);
78 use Bio::EnsEMBL::Variation::Utils::Sequence qw(unambiguity_code);
79 use Bio::EnsEMBL::Variation::ConsequenceType;
80
81 our @ISA = ('Bio::EnsEMBL::Feature');
82
83 =head2 new
84
85 Arg [-ADAPTOR] :
86 see superclass constructor
87
88 Arg [-START] :
89 see superclass constructor
90 Arg [-END] :
91 see superclass constructor
92
93 Arg [-STRAND] :
94 see superclass constructor
95
96 Arg [-SLICE] :
97 see superclass constructor
98
99 Arg [-VARIATION_NAME] :
100 string - the name of the variation this feature is for (denormalisation
101 from Variation object).
102
103 Arg [-SOURCE] :
104 string - the name of the source where the SNP comes from
105
106 Arg [-VARIATION] :
107 int - the variation object associated with this feature.
108
109 Arg [-VARIATION_ID] :
110 int - the internal id of the variation object associated with this
111 identifier. This may be provided instead of a variation object so that
112 the variation may be lazy-loaded from the database on demand.
113
114 Arg [-SAMPLE_ID] :
115 int - the internal id of the sample object associated with this
116 identifier. This may be provided instead of the object so that
117 the population/individual may be lazy-loaded from the database on demand.
118
119 Arg [-ALLELE_STRING] :
120 string - the allele for this AlleleFeature object.
121
122 Arg [-OVERLAP_CONSEQUENCES] :
123 listref of Bio::EnsEMBL::Variation::OverlapConsequence objects.
124
125 Example :
126 $af = Bio::EnsEMBL::Variation::AlleleFeature->new
127 (-start => 100,
128 -end => 100,
129 -strand => 1,
130 -slice => $slice,
131 -allele_string => 'A',
132 -consequence_type => 'NON_SYNONYMOUS_CODING',
133 -variation_name => 'rs635421',
134 -source => 'Celera',
135 -sample_id => $sample_id,
136 -variation => $v);
137
138 Description: Constructor. Instantiates a new AlleleFeature object.
139 Returntype : Bio::EnsEMBL::Variation::AlleleFeature
140 Exceptions : none
141 Caller : general
142 Status : At Risk
143
144 =cut
145
146 sub new {
147 my $caller = shift;
148 my $class = ref($caller) || $caller;
149
150 my $self = $class->SUPER::new(@_);
151 my ($allele, $overlap_consequences, $var_name, $variation, $variation_id, $population, $sample_id, $source) =
152 rearrange([qw(ALLELE_STRING OVERLAP_CONSEQUENCES VARIATION_NAME
153 VARIATION VARIATION_ID SAMPLE_ID SOURCE)], @_);
154
155 $self->{'allele_string'} = $allele;
156 $self->{'overlap_consequences'} = $overlap_consequences;
157 $self->{'variation_name'} = $var_name;
158 $self->{'variation'} = $variation;
159 $self->{'_variation_id'} = $variation_id;
160 $self->{'_sample_id'} = $sample_id;
161 $self->{'source'} = $source;
162
163 return $self;
164 }
165
166
167
168 sub new_fast {
169 my $class = shift;
170 my $hashref = shift;
171 return bless $hashref, $class;
172 }
173
174
175 =head2 allele_string
176
177 Arg [1] : string $newval (optional)
178 The new value to set the allele attribute to
179 Example : $allele = $obj->allele_string()
180 Description: Getter/Setter for the allele attribute.
181 Returntype : string
182 Exceptions : none
183 Caller : general
184 Status : At Risk
185
186 =cut
187
188 sub allele_string{
189 my $self = shift;
190 return $self->{'allele_string'} = shift if(@_);
191
192 return $self->{'allele_string'} if ($self->{'_half_genotype'}); #for half genotypes
193 return join('|',split (//,unambiguity_code($self->{'allele_string'}))); #for heterozygous alleles
194 }
195
196
197
198 =head2 consequence_type
199
200 Arg [1] : (optional) String $term_type
201 Description: Get a list of all the unique consequence terms of this
202 AlleleFeature. By default returns Ensembl display terms
203 (e.g. 'NON_SYNONYMOUS_CODING'). $term_type can also be 'label'
204 (e.g. 'Non-synonymous coding'), 'SO' (Sequence Ontology, e.g.
205 'non_synonymous_codon') or 'NCBI' (e.g. 'missense')
206 Returntype : listref of strings
207 Exceptions : none
208 Status : At Risk
209
210 =cut
211
212 sub consequence_type {
213
214 my $self = shift;
215 my $term_type = shift;
216
217 if($self->_is_sara) {
218 return ['SARA'];
219 }
220 else {
221 delete $self->{consequence_type} if defined($term_type);
222
223 unless ($self->{consequence_type}) {
224
225 $term_type ||= 'SO';
226 my $method_name = $term_type.($term_type eq 'label' ? '' : '_term');
227 $method_name = 'SO_term' unless $self->most_severe_OverlapConsequence->can($method_name);
228
229 # work out the terms from the OverlapConsequence objects
230 $self->{consequence_type} =
231 [ $self->_is_sara ? 'SARA' : map { $_->$method_name } @{ $self->get_all_OverlapConsequences } ];
232
233 return $self->{consequence_type};
234 }
235 }
236 }
237
238
239 =head2 get_all_OverlapConsequences
240
241 Description: Get a list of all the unique OverlapConsequences of this AlleleFeature
242 Returntype : listref of Bio::EnsEMBL::Variation::OverlapConsequence objects
243 Exceptions : none
244 Status : At Risk
245
246 =cut
247
248 sub get_all_OverlapConsequences {
249 my $self = shift;
250 return $self->{overlap_consequences}
251 }
252
253
254 =head2 most_severe_OverlapConsequence
255
256 Description: Get the OverlapConsequence considered (by Ensembl) to be the most severe
257 consequence of all the alleles of this AlleleFeature
258 Returntype : Bio::EnsEMBL::Variation::OverlapConsequence
259 Exceptions : none
260 Status : At Risk
261
262 =cut
263
264 sub most_severe_OverlapConsequence {
265 my $self = shift;
266
267 unless ($self->{_most_severe_consequence}) {
268
269 my $highest;
270
271 for my $cons (@{ $self->get_all_OverlapConsequences }) {
272 $highest ||= $cons;
273 if ($cons->rank < $highest->rank) {
274 $highest = $cons;
275 }
276 }
277
278 $self->{_most_severe_consequence} = $highest;
279 }
280
281 return $self->{_most_severe_consequence};
282 }
283
284 =head2 display_consequence
285
286 Arg [1] : (optional) String $term_type
287 Description: Get the term for the most severe consequence of this
288 AlleleFeature. By default returns Ensembl display terms
289 (e.g. 'NON_SYNONYMOUS_CODING'). $term_type can also be 'label'
290 (e.g. 'Non-synonymous coding'), 'SO' (Sequence Ontology, e.g.
291 'non_synonymous_codon') or 'NCBI' (e.g. 'missense')
292 Returntype : string
293 Exceptions : none
294 Caller : webteam
295 Status : At Risk
296
297 =cut
298
299 sub display_consequence {
300 my $self = shift;
301 my $term_type = shift;
302
303 if($self->_is_sara) {
304 return 'SARA';
305 }
306 else {
307 $term_type ||= 'SO';
308 my $method_name = $term_type.($term_type eq 'label' ? '' : '_term');
309 $method_name = 'SO_term' unless $self->most_severe_OverlapConsequence->can($method_name);
310
311 return $self->most_severe_OverlapConsequence->$method_name;
312 }
313 }
314
315
316 =head2 get_all_TranscriptVariations
317
318 Arg [1] : (optional) listref of Bio::EnsEMBL::Transcript objects
319 Example : $af->get_all_TranscriptVariations;
320 Description : Get all the TranscriptVariations associated with this AlleleFeature.
321 If the optional list of Transcripts is supplied, get only TranscriptVariations
322 associated with those Transcripts.
323 Returntype : listref of Bio::EnsEMBL::Variation::TranscriptVariation objects
324 Exceptions : Thrown on wrong argument type
325 Caller : general
326 Status : At Risk
327
328 =cut
329
330 sub get_all_TranscriptVariations {
331 my $self = shift;
332 my $trs = shift;
333
334 my $cons = $self->variation_feature->get_all_TranscriptVariations($trs);
335
336 # convert the TV to a SARA one if this is a SARA genotype
337 if($self->_is_sara) {
338 $_->_convert_to_sara foreach @$cons;
339 }
340
341 # otherwise we need to rearrange the TranscriptVariationAlleles based
342 # on the alleles of this genotype
343 else {
344 my %alleles;
345 $alleles{$_} = 1 foreach split /\||\/|\\/, $self->allele_string;
346
347 $_->_rearrange_alleles(\%alleles) foreach @$cons;
348 }
349
350 return $cons;
351 }
352
353 =head2 variation_name
354
355 Arg [1] : string $newval (optional)
356 The new value to set the variation_name attribute to
357 Example : $variation_name = $obj->variation_name()
358 Description: Getter/Setter for the variation_name attribute. This is the
359 name of the variation associated with this feature.
360 Returntype : string
361 Exceptions : none
362 Caller : general
363 Status : At Risk
364
365 =cut
366
367 sub variation_name{
368 my $self = shift;
369 return $self->{'variation_name'} = shift if(@_);
370 return $self->{'variation_name'};
371 }
372
373 =head2 variation
374
375 Arg [1] : (optional) Bio::EnsEMBL::Variation::Variation $variation
376 Example : $v = $af->variation();
377 Description: Getter/Setter for the variation associated with this feature.
378 If not set, and this AlleleFeature has an associated adaptor
379 an attempt will be made to lazy-load the variation from the
380 database.
381 Returntype : Bio::EnsEMBL::Variation::Variation
382 Exceptions : throw on incorrect argument
383 Caller : general
384 Status : At Risk
385
386 =cut
387
388 sub variation {
389 my $self = shift;
390
391 if(@_) {
392 if(!ref($_[0]) || !$_[0]->isa('Bio::EnsEMBL::Variation::Variation')) {
393 throw("Bio::EnsEMBL::Variation::Variation argument expected");
394 }
395 $self->{'variation'} = shift;
396 }
397 elsif(!defined($self->{'variation'}) && $self->{'adaptor'} &&
398 defined($self->{'_variation_id'})) {
399 # lazy-load from database on demand
400 my $va = $self->{'adaptor'}->db()->get_VariationAdaptor();
401 $self->{'variation'} = $va->fetch_by_dbID($self->{'_variation_id'});
402 }
403
404 return $self->{'variation'};
405 }
406
407 =head2 variation_feature
408
409 Arg [1] : (optional) Bio::EnsEMBL::Variation::VariationFeature $vf
410 Example : $vf = $af->variation_feature();
411 Description: Getter/Setter for the variation feature associated with this feature.
412 If not set, and this AlleleFeature has an associated adaptor
413 an attempt will be made to lazy-load the variation from the
414 database.
415 Returntype : Bio::EnsEMBL::Variation::VariationFeature
416 Exceptions : throw on incorrect argument
417 Caller : general
418 Status : At Risk
419
420 =cut
421
422 sub variation_feature {
423 my $self = shift;
424
425 if(@_) {
426 if(!ref($_[0]) || !$_[0]->isa('Bio::EnsEMBL::Variation::VariationFeature')) {
427 throw("Bio::EnsEMBL::Variation::VariationFeature argument expected");
428 }
429 $self->{'variation_feature'} = shift;
430 }
431 elsif(!defined($self->{'variation_feature'}) && $self->{'adaptor'} &&
432 defined($self->{'_variation_feature_id'})) {
433 # lazy-load from database on demand
434 my $va = $self->{'adaptor'}->db()->get_VariationFeatureAdaptor();
435 $self->{'variation_feature'} = $va->fetch_by_dbID($self->{'_variation_feature_id'});
436 }
437
438 return $self->{'variation_feature'};
439 }
440
441 =head2 individual
442
443 Arg [1] : (optional) Bio::EnsEMBL::Variation::Individual $individual
444 Example : $p = $af->individual();
445 Description: Getter/Setter for the individual associated with this feature.
446 If not set, and this AlleleFeature has an associated adaptor
447 an attempt will be made to lazy-load the individual from the
448 database.
449 Returntype : Bio::EnsEMBL::Variation::Individual
450 Exceptions : throw on incorrect argument
451 Caller : general
452 Status : At Risk
453
454 =cut
455
456 sub individual {
457 my $self = shift;
458
459 if(@_) {
460 if(!ref($_[0]) || !$_[0]->isa('Bio::EnsEMBL::Variation::Individual')) {
461 throw("Bio::EnsEMBL::Variation::Individual argument expected");
462 }
463 $self->{'individual'} = shift;
464 }
465 elsif(!defined($self->{'individual'}) && $self->{'adaptor'} &&
466 defined($self->{'_sample_id'})) {
467 # lazy-load from database on demand
468 my $ia = $self->{'adaptor'}->db()->get_IndividualAdaptor();
469 $self->{'individual'} = $ia->fetch_by_dbID($self->{'_sample_id'});
470 if (!defined $self->{'individual'}){
471 warning("AlleleFeature attached to Strain, not Individual");
472 }
473 }
474
475 return $self->{'individual'};
476 }
477
478
479 =head2 apply_edit
480
481 Arg [1] : reference to string $seqref
482 Arg [2] : int $start of the seq_ref
483 Example : $sequence = 'ACTGAATATTTAAGGCA';
484 $af->apply_edit(\$sequence,$start);
485 print $sequence, "\n";
486 Description: Applies this AlleleFeature directly to a sequence which is
487 passed by reference.
488 If either the start or end of this AlleleFeature are not defined
489 this function will not do anything to the passed sequence.
490 Returntype : reference to the same sequence that was passed in
491 Exceptions : none
492 Caller : Slice
493 Status : At Risk
494
495 =cut
496
497 sub apply_edit {
498
499 my $self = shift;
500 my $seqref = shift;
501
502 if(ref($seqref) ne 'SCALAR') {
503 throw("Reference to scalar argument expected");
504 }
505
506 if(!defined($self->{'start'}) || !defined($self->{'end'})) {
507 return $seqref;
508 }
509
510
511 my $len = $self->length;
512 my $as = $self->{'allele_string'};
513 $as =~ s/\-//g;
514
515 substr($$seqref, $self->{'start'}-1, $len) = $as;
516
517 return $seqref;
518
519 }
520
521 =head2 length_diff
522
523 Arg [1] : none
524 Example : my $diff = $af->length_diff();
525 Description: Returns the difference in length caused by applying this
526 AlleleFeature to a sequence. This may be be negative (deletion),
527 positive (insertion) or 0 (replacement).
528 If either start or end are not defined 0 is returned.
529 Returntype : int
530 Exceptions : none
531 Caller : general
532 Status : At Risk
533
534 =cut
535
536 sub length_diff {
537
538 my $self = shift;
539
540 return 0 if(!defined($self->{'end'}) || !defined($self->{'start'}));
541
542 return length($self->{'allele_string'}) - ($self->{'end'} - $self->{'start'} + 1) if ($self->{'allele_string'} ne '-');
543 return 0 - ($self->{'end'} - $self->{'start'} +1) if ($self->{'allele_string'} eq '-'); #do we need the +1 in the distance ??
544
545 }
546
547
548 sub length {
549 my $self = shift;
550 return $self->{'end'} - $self->{'start'} + 1;
551 }
552
553 =head2 source
554
555 Arg [1] : string $source (optional)
556 The new value to set the source attribute to
557 Example : $source = $vf->source()
558 Description: Getter/Setter for the source attribute
559 Returntype : string
560 Exceptions : none
561 Caller : general
562 Status : At Risk
563
564 =cut
565
566 sub source{
567 my $self = shift;
568 return $self->{'source'} = shift if(@_);
569 return $self->{'source'};
570 }
571
572 =head2 ref_allele_string
573
574 Args : None
575 Example : $allele = $obj->ref_allele_string()
576 Description: Getter for the reference allele.
577 Returntype : string
578 Exceptions : none
579 Caller : general
580 Status : At Risk
581
582 =cut
583
584 sub ref_allele_string{
585 my $self = shift;
586
587 my $reference_allele;
588 if ( ref ($self->slice) eq 'Bio::EnsEMBL::Slice' ){
589 #we already have the reference slice, so just get the sequence
590 $reference_allele = $self->seq;
591 }
592 else{
593 #we have a Strain or IndividualSlice, get the reference sequence from the method
594 $reference_allele = $self->slice->ref_subseq($self->start,$self->end,$self->strand) || '-';
595 }
596
597 return $reference_allele;
598 }
599
600 =head2 get_all_sources
601
602 Args : none
603 Example : my @sources = @{$af->get_all_sources()};
604 Description : returns a list of all the sources for this
605 AlleleFeature
606 ReturnType : reference to list of strings
607 Exceptions : none
608 Caller : general
609 Status : At Risk
610 : Variation database is under development.
611 =cut
612
613 sub get_all_sources{
614 my $self = shift;
615
616 my @sources;
617 my %sources;
618 if ($self->{'adaptor'}){
619 map {$sources{$_}++} @{$self->{'adaptor'}->get_all_synonym_sources($self)};
620 $sources{$self->source}++;
621 @sources = keys %sources;
622 return \@sources;
623 }
624 return \@sources;
625 }
626
627 sub _is_sara{
628 my $self = shift;
629
630 if(!defined($self->{_is_sara})) {
631 my $allele_string = $self->allele_string;
632 my $ref = $self->ref_allele_string;
633
634 my $is_sara = 1;
635
636 foreach my $a(split /\/|\||\\/, $allele_string) {
637 $is_sara = 0 if $ref !~ /$a/i;
638 }
639
640 $self->{_is_sara} = $is_sara;
641 }
642
643 return $self->{_is_sara};
644 }
645
646 1;