comparison variant_effect_predictor/Bio/EnsEMBL/Variation/VariationFeature.pm @ 0:21066c0abaf5 draft

Uploaded
author willmclaren
date Fri, 03 Aug 2012 10:04:48 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:21066c0abaf5
1 =head1 LICENSE
2
3 Copyright (c) 1999-2012 The European Bioinformatics Institute and
4 Genome Research Limited. All rights reserved.
5
6 This software is distributed under a modified Apache license.
7 For license details, please see
8
9 http://www.ensembl.org/info/about/code_licence.html
10
11 =head1 CONTACT
12
13 Please email comments or questions to the public Ensembl
14 developers list at <dev@ensembl.org>.
15
16 Questions may also be sent to the Ensembl help desk at
17 <helpdesk@ensembl.org>.
18
19 =cut
20
21 # Ensembl module for Bio::EnsEMBL::Variation::VariationFeature
22 #
23 # Copyright (c) 2004 Ensembl
24 #
25
26
27 =head1 NAME
28
29 Bio::EnsEMBL::Variation::VariationFeature - A genomic position for a nucleotide variation.
30
31 =head1 SYNOPSIS
32
33 # Variation feature representing a single nucleotide polymorphism
34 $vf = Bio::EnsEMBL::Variation::VariationFeature->new
35 (-start => 100,
36 -end => 100,
37 -strand => 1,
38 -slice => $slice,
39 -allele_string => 'A/T',
40 -variation_name => 'rs635421',
41 -map_weight => 1,
42 -variation => $v);
43
44 # Variation feature representing a 2bp insertion
45 $vf = Bio::EnsEMBL::Variation::VariationFeature->new
46 (-start => 1522,
47 -end => 1521, # end = start-1 for insert
48 -strand => -1,
49 -slice => $slice,
50 -allele_string => '-/AA',
51 -variation_name => 'rs12111',
52 -map_weight => 1,
53 -variation => $v2);
54
55 ...
56
57 # a variation feature is like any other ensembl feature, can be
58 # transformed etc.
59 $vf = $vf->transform('supercontig');
60
61 print $vf->start(), "-", $vf->end(), '(', $vf->strand(), ')', "\n";
62
63 print $vf->name(), ":", $vf->allele_string();
64
65 # Get the Variation object which this feature represents the genomic
66 # position of. If not already retrieved from the DB, this will be
67 # transparently lazy-loaded
68 my $v = $vf->variation();
69
70 =head1 DESCRIPTION
71
72 This is a class representing the genomic position of a nucleotide variation
73 from the ensembl-variation database. The actual variation information is
74 represented by an associated Bio::EnsEMBL::Variation::Variation object. Some
75 of the information has been denormalized and is available on the feature for
76 speed purposes. A VariationFeature behaves as any other Ensembl feature.
77 See B<Bio::EnsEMBL::Feature> and B<Bio::EnsEMBL::Variation::Variation>.
78
79 =head1 METHODS
80
81 =cut
82
83 use strict;
84 use warnings;
85
86 package Bio::EnsEMBL::Variation::VariationFeature;
87
88 use Scalar::Util qw(weaken isweak);
89
90 use Bio::EnsEMBL::Variation::BaseVariationFeature;
91 use Bio::EnsEMBL::Utils::Exception qw(throw warning);
92 use Bio::EnsEMBL::Utils::Scalar qw(assert_ref);
93 use Bio::EnsEMBL::Utils::Argument qw(rearrange);
94 use Bio::EnsEMBL::Utils::Sequence qw(reverse_comp);
95 use Bio::EnsEMBL::Variation::Utils::Sequence qw(ambiguity_code hgvs_variant_notation SO_variation_class format_hgvs_string);
96 use Bio::EnsEMBL::Variation::Utils::Sequence;
97 use Bio::EnsEMBL::Variation::Variation;
98 use Bio::EnsEMBL::Variation::Utils::VariationEffect qw(MAX_DISTANCE_FROM_TRANSCRIPT);
99 use Bio::EnsEMBL::Variation::Utils::Constants qw($DEFAULT_OVERLAP_CONSEQUENCE %VARIATION_CLASSES);
100 use Bio::EnsEMBL::Variation::RegulatoryFeatureVariation;
101 use Bio::EnsEMBL::Variation::MotifFeatureVariation;
102 use Bio::EnsEMBL::Variation::ExternalFeatureVariation;
103 use Bio::EnsEMBL::Variation::IntergenicVariation;
104 use Bio::EnsEMBL::Slice;
105 use Bio::EnsEMBL::Variation::DBSQL::TranscriptVariationAdaptor;
106 use Bio::PrimarySeq;
107 use Bio::SeqUtils;
108
109 our @ISA = ('Bio::EnsEMBL::Variation::BaseVariationFeature');
110
111 our $DEBUG = 0;
112 =head2 new
113
114 Arg [-dbID] :
115 see superclass constructor
116
117 Arg [-ADAPTOR] :
118 see superclass constructor
119
120 Arg [-START] :
121 see superclass constructor
122 Arg [-END] :
123 see superclass constructor
124
125 Arg [-STRAND] :
126 see superclass constructor
127
128 Arg [-SLICE] :
129 see superclass constructor
130
131 Arg [-VARIATION_NAME] :
132 string - the name of the variation this feature is for (denormalisation
133 from Variation object).
134
135 Arg [-MAP_WEIGHT] :
136 int - the number of times that the variation associated with this feature
137 has hit the genome. If this was the only feature associated with this
138 variation_feature the map_weight would be 1.
139
140 Arg [-VARIATION] :
141 int - the variation object associated with this feature.
142
143 Arg [-SOURCE] :
144 string - the name of the source where the SNP comes from
145
146 Arg [-SOURCE_VERSION] :
147 number - the version of the source where the SNP comes from
148
149 Arg [-VALIDATION_CODE] :
150 reference to list of strings
151
152 Arg [-OVERLAP_CONSEQUENCES] :
153 listref of Bio::EnsEMBL::Variation::OverlapConsequences - all the consequences of this VariationFeature
154
155 Arg [-VARIATION_ID] :
156 int - the internal id of the variation object associated with this
157 identifier. This may be provided instead of a variation object so that
158 the variation may be lazy-loaded from the database on demand.
159
160 Example :
161 $vf = Bio::EnsEMBL::Variation::VariationFeature->new(
162 -start => 100,
163 -end => 100,
164 -strand => 1,
165 -slice => $slice,
166 -allele_string => 'A/T',
167 -variation_name => 'rs635421',
168 -map_weight => 1,
169 -source => 'dbSNP',
170 -validation_code => ['cluster','doublehit'],
171 -variation => $v
172 );
173
174 Description: Constructor. Instantiates a new VariationFeature object.
175 Returntype : Bio::EnsEMBL::Variation::Variation
176 Exceptions : none
177 Caller : general
178 Status : At Risk
179
180 =cut
181
182 sub new {
183 my $caller = shift;
184 my $class = ref($caller) || $caller;
185
186 my $self = $class->SUPER::new(@_);
187
188 my (
189 $allele_str,
190 $var_name,
191 $map_weight,
192 $variation,
193 $variation_id,
194 $source,
195 $source_version,
196 $is_somatic,
197 $validation_code,
198 $overlap_consequences,
199 $class_so_term,
200 $minor_allele,
201 $minor_allele_freq,
202 $minor_allele_count
203 ) = rearrange([qw(
204 ALLELE_STRING
205 VARIATION_NAME
206 MAP_WEIGHT
207 VARIATION
208 _VARIATION_ID
209 SOURCE
210 SOURCE_VERSION
211 IS_SOMATIC
212 VALIDATION_CODE
213 OVERLAP_CONSEQUENCES
214 CLASS_SO_TERM
215 MINOR_ALLELE
216 MINOR_ALLELE_FREQUENCY
217 MINOR_ALLELE_COUNT
218 )], @_);
219
220 $self->{'allele_string'} = $allele_str;
221 $self->{'variation_name'} = $var_name;
222 $self->{'map_weight'} = $map_weight;
223 $self->{'variation'} = $variation;
224 $self->{'_variation_id'} = $variation_id;
225 $self->{'source'} = $source;
226 $self->{'source_version'} = $source_version;
227 $self->{'is_somatic'} = $is_somatic;
228 $self->{'validation_code'} = $validation_code;
229 $self->{'overlap_consequences'} = $overlap_consequences;
230 $self->{'class_SO_term'} = $class_so_term;
231 $self->{'minor_allele'} = $minor_allele;
232 $self->{'minor_allele_frequency'} = $minor_allele_freq;
233 $self->{'minor_allele_count'} = $minor_allele_count;
234
235 return $self;
236 }
237
238
239
240 sub new_fast {
241
242 my $class = shift;
243 my $hashref = shift;
244 my $self = bless $hashref, $class;
245 weaken($self->{'adaptor'}) if ( ! isweak($self->{'adaptor'}) );
246 return $self;
247
248 }
249
250
251 =head2 allele_string
252
253 Arg [1] : string $newval (optional)
254 The new value to set the allele_string attribute to
255 Example : $allele_string = $obj->allele_string()
256 Description: Getter/Setter for the allele_string attribute.
257 The allele_string is a '/' demimited string representing the
258 alleles associated with this features variation.
259 Returntype : string
260 Exceptions : none
261 Caller : general
262 Status : Stable
263
264 =cut
265
266 sub allele_string{
267 my $self = shift;
268 return $self->{'allele_string'} = shift if(@_);
269 return $self->{'allele_string'};
270 }
271
272 =head2 display_id
273
274 Arg [1] : none
275 Example : print $vf->display_id(), "\n";
276 Description: Returns the 'display' identifier for this feature. For
277 VariationFeatures this is simply the name of the variation
278 it is associated with.
279 Returntype : string
280 Exceptions : none
281 Caller : webcode
282 Status : At Risk
283
284 =cut
285
286 sub display_id {
287 my $self = shift;
288 return $self->{'variation_name'} || '';
289 }
290
291
292
293 =head2 variation_name
294
295 Arg [1] : string $newval (optional)
296 The new value to set the variation_name attribute to
297 Example : $variation_name = $obj->variation_name()
298 Description: Getter/Setter for the variation_name attribute. This is the
299 name of the variation associated with this feature.
300 Returntype : string
301 Exceptions : none
302 Caller : general
303 Status : Stable
304
305 =cut
306
307 sub variation_name{
308 my $self = shift;
309 return $self->{'variation_name'} = shift if(@_);
310 return $self->{'variation_name'};
311 }
312
313
314
315 =head2 map_weight
316
317 Arg [1] : int $newval (optional)
318 The new value to set the map_weight attribute to
319 Example : $map_weight = $obj->map_weight()
320 Description: Getter/Setter for the map_weight attribute. The map_weight
321 is the number of times this features variation was mapped to
322 the genome.
323 Returntype : int
324 Exceptions : none
325 Caller : general
326 Status : At Risk
327
328 =cut
329
330 sub map_weight{
331 my $self = shift;
332 return $self->{'map_weight'} = shift if(@_);
333 return $self->{'map_weight'};
334 }
335
336 =head2 minor_allele
337
338 Arg [1] : string $minor_allele (optional)
339 The new minor allele string
340 Example : $ma = $obj->minor_allele()
341 Description: Get/set the minor allele of this variation, as reported by dbSNP
342 Returntype : string
343 Exceptions : none
344 Caller : general
345 Status : Stable
346
347 =cut
348
349 sub minor_allele {
350 my ($self, $minor_allele) = @_;
351 $self->{minor_allele} = $minor_allele if defined $minor_allele;
352 return $self->{minor_allele}
353 }
354
355 =head2 minor_allele_frequency
356
357 Arg [1] : float $minor_allele_frequency (optional)
358 The new minor allele frequency
359 Example : $maf = $obj->minor_allele_frequency()
360 Description: Get/set the frequency of the minor allele of this variation, as reported by dbSNP
361 Returntype : float
362 Exceptions : none
363 Caller : general
364 Status : Stable
365
366 =cut
367
368 sub minor_allele_frequency {
369 my ($self, $minor_allele_frequency) = @_;
370 $self->{minor_allele_frequency} = $minor_allele_frequency if defined $minor_allele_frequency;
371 return $self->{minor_allele_frequency}
372 }
373
374 =head2 minor_allele_count
375
376 Arg [1] : int $minor_allele_count (optional)
377 The new minor allele count
378 Example : $maf_count = $obj->minor_allele_count()
379 Description: Get/set the sample count of the minor allele of this variation, as reported by dbSNP
380 Returntype : int
381 Exceptions : none
382 Caller : general
383 Status : Stable
384
385 =cut
386
387 sub minor_allele_count {
388 my ($self, $minor_allele_count) = @_;
389 $self->{minor_allele_count} = $minor_allele_count if defined $minor_allele_count;
390 return $self->{minor_allele_count}
391 }
392
393
394
395 =head2 get_all_TranscriptVariations
396
397 Arg [1] : (optional) listref of Bio::EnsEMBL::Transcript objects
398 Example : $vf->get_all_TranscriptVariations;
399 Description : Get all the TranscriptVariations associated with this VariationFeature.
400 If the optional list of Transcripts is supplied, get only TranscriptVariations
401 associated with those Transcripts.
402 Returntype : listref of Bio::EnsEMBL::Variation::TranscriptVariation objects
403 Exceptions : Thrown on wrong argument type
404 Caller : general
405 Status : At Risk
406
407 =cut
408
409 sub get_all_TranscriptVariations {
410
411 my ($self, $transcripts) = @_;
412
413 if ($transcripts) {
414 assert_ref($transcripts, 'ARRAY');
415 map { assert_ref($_, 'Bio::EnsEMBL::Transcript') } @$transcripts;
416 }
417
418 #die unless $self->{transcript_variations};
419
420 if ($self->dbID && not defined $self->{transcript_variations}) {
421 # this VariationFeature is from the database, so we can just fetch the
422 # TranscriptVariations from the database as well
423
424 if (my $db = $self->adaptor->db) {
425 my $tva = $db->get_TranscriptVariationAdaptor;
426
427 # just fetch TVs for all Transcripts because that's more efficient,
428 # we'll only return those asked for later on
429
430 my $tvs = $tva->fetch_all_by_VariationFeatures([$self]);
431
432 map { $self->add_TranscriptVariation($_) } @$tvs;
433 }
434 }
435 elsif (not defined $self->{transcript_variations}) {
436 # this VariationFeature is not in the database so we have to build the
437 # TranscriptVariations ourselves
438
439 unless ($transcripts) {
440 # if the caller didn't supply some transcripts fetch those around this VariationFeature
441
442 # get a slice around this transcript including the maximum distance up and down-stream
443 # that we still call consequences for
444
445 my $slice = $self->feature_Slice->expand(
446 MAX_DISTANCE_FROM_TRANSCRIPT,
447 MAX_DISTANCE_FROM_TRANSCRIPT
448 );
449
450 # fetch all transcripts on this slice
451
452 $transcripts = $slice->get_all_Transcripts(1);
453 }
454
455 my @unfetched_transcripts = grep {
456 not exists $self->{transcript_variations}->{$_->stable_id}
457 } @$transcripts;
458
459 for my $transcript (@unfetched_transcripts) {
460 $self->add_TranscriptVariation(
461 Bio::EnsEMBL::Variation::TranscriptVariation->new(
462 -variation_feature => $self,
463 -transcript => $transcript,
464 -adaptor => ($self->adaptor->db ? $self->adaptor->db->get_TranscriptVariationAdaptor : undef),
465 )
466 );
467 }
468 }
469
470 if ($transcripts) {
471 # just return TranscriptVariations for the requested Transcripts
472 return [ map { $self->{transcript_variations}->{$_->stable_id} } @$transcripts ];
473 }
474 else {
475 # return all TranscriptVariations
476 return [ values %{ $self->{transcript_variations} } ];
477 }
478 }
479
480 =head2 get_all_RegulatoryFeatureVariations
481
482 Description : Get all the RegulatoryFeatureVariations associated with this VariationFeature.
483 Returntype : listref of Bio::EnsEMBL::Variation::RegulatoryFeatureVariation objects
484 Exceptions : none
485 Status : At Risk
486
487 =cut
488
489 sub get_all_RegulatoryFeatureVariations {
490 my $self = shift;
491 return $self->_get_all_RegulationVariations('RegulatoryFeature', @_);
492 }
493
494 =head2 get_all_MotifFeatureVariations
495
496 Description : Get all the MotifFeatureVariations associated with this VariationFeature.
497 Returntype : listref of Bio::EnsEMBL::Variation::MotifFeatureVariation objects
498 Exceptions : none
499 Status : At Risk
500
501 =cut
502
503 sub get_all_MotifFeatureVariations {
504 my $self = shift;
505 return $self->_get_all_RegulationVariations('MotifFeature', @_);
506 }
507
508 =head2 get_all_ExternalFeatureVariations
509
510 Description : Get all the ExternalFeatureVariations associated with this VariationFeature.
511 Returntype : listref of Bio::EnsEMBL::Variation::ExternalFeatureVariation objects
512 Exceptions : none
513 Status : At Risk
514
515 =cut
516
517 sub get_all_ExternalFeatureVariations {
518 my $self = shift;
519 return $self->_get_all_RegulationVariations('ExternalFeature', @_);
520 }
521
522 sub _get_all_RegulationVariations {
523 my ($self, $type) = @_;
524
525 unless ($type && ($type eq 'RegulatoryFeature' || $type eq 'MotifFeature' || $type eq 'ExternalFeature')) {
526 throw("Invalid Ensembl Regulation type '$type'");
527 }
528
529 unless ($self->{regulation_variations}->{$type}) {
530
531 my $fg_adaptor;
532
533 if (my $adap = $self->adaptor) {
534 if(my $db = $adap->db) {
535 $fg_adaptor = Bio::EnsEMBL::DBSQL::MergedAdaptor->new(
536 -species => $adap->db->species,
537 -type => $type,
538 );
539 }
540
541 unless ($fg_adaptor) {
542 warning("Failed to get adaptor for $type");
543 return [];
544 }
545 }
546 else {
547 warning('Cannot get variation features without attached adaptor');
548 return [];
549 }
550
551 my $slice = $self->feature_Slice;
552
553 my $constructor = 'Bio::EnsEMBL::Variation::'.$type.'Variation';
554
555 eval {
556 $self->{regulation_variations}->{$type} = [
557 map {
558 $constructor->new(
559 -variation_feature => $self,
560 -feature => $_,
561 );
562 } map { $_->transfer($self->slice) } @{ $fg_adaptor->fetch_all_by_Slice($slice) }
563 ];
564 };
565
566 $self->{regulation_variations}->{$type} ||= [];
567 }
568
569 return $self->{regulation_variations}->{$type};
570 }
571
572 sub get_IntergenicVariation {
573 my $self = shift;
574 my $no_ref_check = shift;
575
576 unless (exists $self->{intergenic_variation}) {
577 if (scalar(@{ $self->get_all_TranscriptVariations }) == 0) {
578 $self->{intergenic_variation} = Bio::EnsEMBL::Variation::IntergenicVariation->new(
579 -variation_feature => $self,
580 -no_ref_check => $no_ref_check,
581 );
582 }
583 else {
584 $self->{intergenic_variation} = undef;
585 }
586 }
587
588 return $self->{intergenic_variation};
589 }
590
591 =head2 get_all_VariationFeatureOverlaps
592
593 Description : Get all the VariationFeatureOverlaps associated with this VariationFeature, this
594 includes TranscriptVariations and regulatory feature overlap object.
595 Returntype : listref of Bio::EnsEMBL::Variation::VariationFeatureOverlap objects
596 Exceptions : none
597 Status : At Risk
598
599 =cut
600
601 sub get_all_VariationFeatureOverlaps {
602 my $self = shift;
603
604 my $vfos = [
605 @{ $self->get_all_TranscriptVariations },
606 @{ $self->get_all_RegulatoryFeatureVariations },
607 @{ $self->get_all_MotifFeatureVariations },
608 @{ $self->get_all_ExternalFeatureVariations },
609 ];
610
611 if (my $iv = $self->get_IntergenicVariation) {
612 push @$vfos, $iv;
613 }
614
615 return $vfos;
616 }
617
618 =head2 add_TranscriptVariation
619
620 Arg [1] : Bio::EnsEMBL::Variation::TranscriptVariation
621 Example : $vf->add_TranscriptVariation($tv);
622 Description : Adds a TranscriptVariation to the variation feature object.
623 Exceptions : thrown on bad argument
624 Caller : Bio::EnsEMBL::Variation::TranscriptVariationAdaptor
625 Status : At Risk
626
627 =cut
628
629 sub add_TranscriptVariation {
630 my ($self, $tv) = @_;
631 assert_ref($tv, 'Bio::EnsEMBL::Variation::TranscriptVariation');
632 # we need to weaken the reference back to us to avoid a circular reference
633 weaken($tv->{base_variation_feature});
634 $self->{transcript_variations}->{$tv->transcript_stable_id} = $tv;
635 }
636
637 =head2 variation
638
639 Arg [1] : (optional) Bio::EnsEMBL::Variation::Variation $variation
640 Example : $v = $vf->variation();
641 Description: Getter/Setter for the variation associated with this feature.
642 If not set, and this VariationFeature has an associated adaptor
643 an attempt will be made to lazy-load the variation from the
644 database.
645 Returntype : Bio::EnsEMBL::Variation::Variation
646 Exceptions : throw on incorrect argument
647 Caller : general
648 Status : Stable
649
650 =cut
651
652 sub variation {
653 my $self = shift;
654
655 if(@_) {
656 if(!ref($_[0]) || !$_[0]->isa('Bio::EnsEMBL::Variation::Variation')) {
657 throw("Bio::EnsEMBL::Variation::Variation argument expected");
658 }
659 $self->{'variation'} = shift;
660 }
661 elsif(!defined($self->{'variation'}) && $self->adaptor() &&
662 defined($self->{'_variation_id'})) {
663 # lazy-load from database on demand
664 my $va = $self->adaptor->db()->get_VariationAdaptor();
665 $self->{'variation'} = $va->fetch_by_dbID($self->{'_variation_id'});
666 }
667
668 return $self->{'variation'};
669 }
670
671 =head2 consequence_type
672
673 Arg [1] : (optional) String $term_type
674 Description: Get a list of all the unique consequence terms of this
675 VariationFeature. By default returns Ensembl display terms
676 (e.g. 'NON_SYNONYMOUS_CODING'). $term_type can also be 'label'
677 (e.g. 'Non-synonymous coding'), 'SO' (Sequence Ontology, e.g.
678 'non_synonymous_codon') or 'NCBI' (e.g. 'missense')
679 Returntype : listref of strings
680 Exceptions : none
681 Status : At Risk
682
683 =cut
684
685 sub consequence_type {
686
687 my $self = shift;
688 my $term_type = shift;
689
690 my $method_name;
691
692 # delete cached term
693 if(defined($term_type)) {
694 delete $self->{consequence_types};
695 $method_name = $term_type.($term_type eq 'label' ? '' : '_term');
696 $method_name = 'SO_term' unless @{$self->get_all_OverlapConsequences} && $self->get_all_OverlapConsequences->[0]->can($method_name);
697 }
698
699 $method_name ||= 'SO_term';
700
701 if (exists($self->{current_consequence_method}) && $self->{current_consequence_method} ne $method_name) {
702 delete $self->{consequence_type};
703 }
704
705 unless ($self->{consequence_types}) {
706
707 # work out the terms from the OverlapConsequence objects
708
709 $self->{consequence_types} =
710 [ map { $_->$method_name } @{ $self->get_all_OverlapConsequences } ];
711 }
712
713 $self->{current_consequence_method} = $method_name;
714
715 return $self->{consequence_types};
716 }
717
718 =head2 get_all_OverlapConsequences
719
720 Description: Get a list of all the unique OverlapConsequences of this VariationFeature,
721 calculating them on the fly from the TranscriptVariations if necessary
722 Returntype : listref of Bio::EnsEMBL::Variation::OverlapConsequence objects
723 Exceptions : none
724 Status : At Risk
725
726 =cut
727
728 sub get_all_OverlapConsequences {
729 my $self = shift;
730
731 unless ($self->{overlap_consequences}) {
732
733 # work them out and store them in a hash keyed by SO_term as we don't
734 # want duplicates from different VFOs
735
736 my %overlap_cons;
737
738 for my $vfo (@{ $self->get_all_TranscriptVariations }) {
739 for my $allele (@{ $vfo->get_all_alternate_VariationFeatureOverlapAlleles }) {
740 for my $cons (@{ $allele->get_all_OverlapConsequences }) {
741 $overlap_cons{$cons->SO_term} = $cons;
742 }
743 }
744 }
745
746 # if we don't have any consequences we use a default from Constants.pm
747 # (currently set to the intergenic consequence)
748
749 $self->{overlap_consequences} = [
750 %overlap_cons ? values %overlap_cons : $DEFAULT_OVERLAP_CONSEQUENCE
751 ];
752 }
753
754 return $self->{overlap_consequences};
755 }
756
757 =head2 add_OverlapConsequence
758
759 Arg [1] : Bio::EnsEMBL::Variation::OverlapConsequence instance
760 Description: Add an OverlapConsequence to this VariationFeature's list
761 Returntype : none
762 Exceptions : throws if the argument is the wrong type
763 Status : At Risk
764
765 =cut
766
767 sub add_OverlapConsequence {
768 my ($self, $oc) = @_;
769 assert_ref($oc, 'Bio::EnsEMBL::Variation::OverlapConsequence');
770 push @{ $self->{overlap_consequences} ||= [] }, $oc;
771 }
772
773 =head2 most_severe_OverlapConsequence
774
775 Description: Get the OverlapConsequence considered (by Ensembl) to be the most severe
776 consequence of all the alleles of this VariationFeature
777 Returntype : Bio::EnsEMBL::Variation::OverlapConsequence
778 Exceptions : none
779 Status : At Risk
780
781 =cut
782
783 sub most_severe_OverlapConsequence {
784 my $self = shift;
785
786 unless ($self->{_most_severe_consequence}) {
787
788 my $highest;
789
790 for my $cons (@{ $self->get_all_OverlapConsequences }) {
791 $highest ||= $cons;
792 if ($cons->rank < $highest->rank) {
793 $highest = $cons;
794 }
795 }
796
797 $self->{_most_severe_consequence} = $highest;
798 }
799
800 return $self->{_most_severe_consequence};
801 }
802
803 =head2 display_consequence
804
805 Arg [1] : (optional) String $term_type
806 Description: Get the term for the most severe consequence of this
807 VariationFeature. By default returns Ensembl display terms
808 (e.g. 'NON_SYNONYMOUS_CODING'). $term_type can also be 'label'
809 (e.g. 'Non-synonymous coding'), 'SO' (Sequence Ontology, e.g.
810 'non_synonymous_codon') or 'NCBI' (e.g. 'missense')
811 Returntype : string
812 Exceptions : none
813 Status : At Risk
814
815 =cut
816
817 sub display_consequence {
818 my $self = shift;
819 my $term_type = shift;
820
821 my $method_name;
822
823 # delete cached term
824 if(defined($term_type)) {
825 $method_name = $term_type.($term_type eq 'label' ? '' : '_term');
826 $method_name = 'SO_term' unless @{$self->get_all_OverlapConsequences} && $self->get_all_OverlapConsequences->[0]->can($method_name);
827 }
828
829 $method_name ||= 'SO_term';
830
831 return $self->most_severe_OverlapConsequence->$method_name;
832 }
833
834 =head2 add_consequence_type
835
836 Status : Deprecated, use add_OverlapConsequence instead
837
838 =cut
839
840 sub add_consequence_type{
841 my $self = shift;
842 warning('Deprecated method, use add_OverlapConsequence instead');
843 return $self->add_OverlapConsequence(@_);
844 }
845
846 =head2 get_consequence_type
847
848 Status : Deprecated, use consequence_type instead
849
850 =cut
851
852 sub get_consequence_type {
853 my $self = shift;
854 warning('Deprecated method, use consequence_type instead');
855 return $self->consequence_type;
856 }
857
858 =head2 ambig_code
859
860 Args : None
861 Example : my $ambiguity_code = $vf->ambig_code()
862 Description : Returns the ambigutiy code for the alleles in the VariationFeature
863 ReturnType : String $ambiguity_code
864 Exceptions : none
865 Caller : General
866 Status : At Risk
867
868 =cut
869
870 sub ambig_code{
871 my $self = shift;
872
873 return &ambiguity_code($self->allele_string());
874 }
875
876 =head2 var_class
877
878 Args[1] : (optional) no_db - don't use the term from the database, always calculate it from the allele string
879 (used by the ensembl variation pipeline)
880 Example : my $variation_class = $vf->var_class
881 Description : returns the Ensembl term for the class of this variation
882 ReturnType : string
883 Exceptions : throws if we can't find a corresponding display term for an SO term
884 Caller : General
885 Status : At Risk
886
887 =cut
888
889 sub var_class {
890
891 my $self = shift;
892 my $no_db = shift;
893
894 unless ($self->{class_display_term}) {
895
896 my $so_term = $self->class_SO_term(undef, $no_db);
897
898 # convert the SO term to the ensembl display term
899
900 $self->{class_display_term} = $self->is_somatic ?
901 $VARIATION_CLASSES{$so_term}->{somatic_display_term} :
902 $VARIATION_CLASSES{$so_term}->{display_term};
903 }
904
905 return $self->{class_display_term};
906 }
907
908 =head2 class_SO_term
909
910 Args[1] : (optional) class_SO_term - the SO term for the class of this variation feature
911 Args[2] : (optional) no_db - don't use the term from the database, always calculate it from the allele string
912 (used by the ensembl variation pipeline)
913 Example : my $SO_variation_class = $vf->class_SO_term()
914 Description : Get/set the SO term for the class of this variation
915 ReturnType : string
916 Exceptions : none
917 Caller : General
918 Status : At Risk
919
920 =cut
921
922 sub class_SO_term {
923 my ($self, $class_SO_term, $no_db) = @_;
924
925 $self->{class_SO_term} = $class_SO_term if $class_SO_term;
926
927 if ($no_db || !$self->{class_SO_term}) {
928 $self->{class_SO_term} = SO_variation_class($self->allele_string);
929 }
930
931 return $self->{class_SO_term};
932 }
933
934 =head2 get_all_validation_states
935
936 Arg [1] : none
937 Example : my @vstates = @{$vf->get_all_validation_states()};
938 Description: Retrieves all validation states for this variationFeature. Current
939 possible validation statuses are 'cluster','freq','submitter',
940 'doublehit', 'hapmap'
941 Returntype : reference to list of strings
942 Exceptions : none
943 Caller : general
944 Status : At Risk
945
946 =cut
947
948 sub get_all_validation_states {
949 my $self = shift;
950 return Bio::EnsEMBL::Variation::Utils::Sequence::get_all_validation_states($self->{'validation_code'});
951 }
952
953
954 =head2 add_validation_state
955
956 Arg [1] : string $state
957 Example : $vf->add_validation_state('cluster');
958 Description: Adds a validation state to this variation.
959 Returntype : none
960 Exceptions : warning if validation state is not a recognised type
961 Caller : general
962 Status : At Risk
963
964 =cut
965
966 sub add_validation_state {
967 Bio::EnsEMBL::Variation::Utils::Sequence::add_validation_state(@_);
968 }
969
970 =head2 source
971
972 Arg [1] : string $source_name (optional) - the new value to set the source attribute to
973 Example : $source = $vf->source;
974 Description: Getter/Setter for the source attribute
975 Returntype : the source name as a string,
976 Exceptions : none
977 Caller : general
978 Status : At Risk
979
980 =cut
981
982 sub source {
983 my ($self, $source) = @_;
984 $self->{source} = $source if $source;
985 return $self->{source};
986 }
987
988 =head2 source_version
989
990 Arg [1] : number $source_version (optional) - the new value to set the source version attribute to
991 Example : $source_version = $vf->source_version;
992 Description: Getter/Setter for the source version attribute
993 Returntype : the source version as a number
994 Exceptions : none
995 Caller : general
996 Status : At Risk
997
998 =cut
999
1000 sub source_version {
1001 my ($self, $source_version) = @_;
1002 $self->{source_version} = $source_version if $source_version;
1003 return $self->{source_version};
1004 }
1005
1006 =head2 is_somatic
1007
1008 Arg [1] : boolean $is_somatic (optional)
1009 The new value to set the is_somatic flag to
1010 Example : $is_somatic = $vf->is_somatic
1011 Description: Getter/Setter for the is_somatic flag, which identifies this variation feature as either somatic or germline
1012 Returntype : boolean
1013 Exceptions : none
1014 Caller : general
1015 Status : Stable
1016
1017 =cut
1018
1019 sub is_somatic {
1020 my ($self, $is_somatic) = @_;
1021 $self->{'is_somatic'} = $is_somatic if defined $is_somatic;
1022 return $self->{'is_somatic'};
1023 }
1024
1025 =head2 is_tagged
1026
1027 Args : None
1028 Example : my $populations = $vf->is_tagged();
1029 Description : If the variation is tagged in any population, returns an array with the populations where the variation_feature
1030 is tagged (using a criteria of r2 > 0.99). Otherwise, returns null
1031 ReturnType : list of Bio::EnsEMBL::Variation::Population
1032 Exceptions : none
1033 Caller : general
1034 Status : At Risk
1035
1036 =cut
1037
1038 sub is_tagged{
1039 my $self = shift;
1040
1041 if ($self->adaptor()){
1042 my $population_adaptor = $self->adaptor()->db()->get_PopulationAdaptor();
1043 return $population_adaptor->fetch_tagged_Population($self);
1044 }
1045 }
1046
1047 =head2 is_tag
1048
1049 Args : None
1050 Example : my $populations = $vf->is_tag();
1051 Description : Returns an array of populations in which this variation feature
1052 is a tag SNP.
1053 ReturnType : list of Bio::EnsEMBL::Variation::Population
1054 Exceptions : none
1055 Caller : general
1056 Status : At Risk
1057
1058 =cut
1059
1060 sub is_tag{
1061 my $self = shift;
1062
1063 if ($self->adaptor()){
1064 my $population_adaptor = $self->adaptor()->db()->get_PopulationAdaptor();
1065 return $population_adaptor->fetch_tag_Population($self);
1066 }
1067 }
1068
1069 =head2 get_all_tagged_VariationFeatures
1070
1071 Args : Bio::EnsEMBL::Variation::Population $pop (optional)
1072 Example : my $vfs = $vf->get_all_tagged_VariationFeatures();
1073 Description : Returns an arrayref of variation features that are tagged by
1074 this variation feature, in the population $pop if specified.
1075 ReturnType : list of Bio::EnsEMBL::Variation::VariationFeature
1076 Exceptions : none
1077 Caller : general
1078 Status : At Risk
1079
1080 =cut
1081
1082 sub get_all_tagged_VariationFeatures {
1083 return $_[0]->adaptor->fetch_all_tagged_by_VariationFeature(@_);
1084 }
1085
1086 =head2 get_all_tag_VariationFeatures
1087
1088 Args : Bio::EnsEMBL::Variation::Population $pop (optional)
1089 Example : my $vfs = $vf->get_all_tag_VariationFeatures();
1090 Description : Returns an arrayref of variation features that tag this
1091 variation feature, in the population $pop if specified.
1092 ReturnType : list of Bio::EnsEMBL::Variation::VariationFeature
1093 Exceptions : none
1094 Caller : general
1095 Status : At Risk
1096
1097 =cut
1098
1099 sub get_all_tag_VariationFeatures {
1100 return $_[0]->adaptor->fetch_all_tags_by_VariationFeature(@_);
1101 }
1102
1103 =head2 get_all_tag_and_tagged_VariationFeatures
1104
1105 Args : Bio::EnsEMBL::Variation::Population $pop (optional)
1106 Example : my $vfs = $vf->get_all_tag_and_tagged_VariationFeatures();
1107 Description : Returns an arrayref of variation features that either tag or are
1108 tagged by this variation feature, in the population $pop if
1109 specified.
1110 ReturnType : list of Bio::EnsEMBL::Variation::VariationFeature
1111 Exceptions : none
1112 Caller : general
1113 Status : At Risk
1114
1115 =cut
1116
1117 sub get_all_tag_and_tagged_VariationFeatures {
1118 return $_[0]->adaptor->fetch_all_tags_and_tagged_by_VariationFeature(@_);
1119 }
1120
1121
1122
1123 =head2 is_reference
1124 Arg : none
1125 Example : my $reference = $vf->is_reference()
1126 Description: Returns 1 if VF's slice is a reference slice else 0
1127 Returntype : int
1128 Caller : general
1129 Status : At Risk
1130
1131 =cut
1132
1133 sub is_reference {
1134 my ($self) = @_;
1135 my $slice = $self->slice;
1136
1137 if ( !defined( $self->{'is_reference'} ) ) {
1138 $self->{'is_reference'} = $slice->is_reference();
1139 }
1140
1141 return $self->{'is_reference'};
1142 }
1143
1144 =head2 convert_to_SNP
1145
1146 Args : None
1147 Example : my $snp = $vf->convert_to_SNP()
1148 Description : Creates a Bio::EnsEMBL::SNP object from Bio::EnsEMBL::VariationFeature. Mainly used for
1149 backwards compatibility
1150 ReturnType : Bio::EnsEMBL::SNP
1151 Exceptions : None
1152 Caller : general
1153 Status : At Risk
1154
1155 =cut
1156
1157 sub convert_to_SNP{
1158 my $self = shift;
1159
1160 require Bio::EnsEMBL::SNP; #for backwards compatibility. It will only be loaded if the function is called
1161
1162 my $snp = Bio::EnsEMBL::SNP->new_fast({
1163 'dbID' => $self->variation()->dbID(),
1164 '_gsf_start' => $self->start,
1165 '_gsf_end' => $self->end,
1166 '_snp_strand' => $self->strand,
1167 '_gsf_score' => 1,
1168 '_type' => $self->var_class,
1169 '_validated' => $self->get_all_validation_states(),
1170 'alleles' => $self->allele_string,
1171 '_ambiguity_code' => $self->ambig_code,
1172 '_mapweight' => $self->map_weight,
1173 '_source' => $self->source
1174 });
1175 return $snp;
1176 }
1177
1178 =head2 get_all_LD_values
1179
1180 Args : none
1181 Description : returns all LD values for this variation feature. This function will only work correctly if the variation
1182 database has been attached to the core database.
1183 ReturnType : Bio::EnsEMBL::Variation::LDFeatureContainer
1184 Exceptions : none
1185 Caller : snpview
1186 Status : At Risk
1187 : Variation database is under development.
1188
1189 =cut
1190
1191 sub get_all_LD_values{
1192 my $self = shift;
1193
1194 if ($self->adaptor()){
1195 my $ld_adaptor = $self->adaptor()->db()->get_LDFeatureContainerAdaptor();
1196 return $ld_adaptor->fetch_by_VariationFeature($self);
1197 }
1198 return {};
1199 }
1200
1201 =head2 get_all_LD_Populations
1202
1203 Args : none
1204 Description : returns a list of populations that could produces LD values
1205 for this VariationFeature
1206 ReturnType : listref of Bio::EnsEMBL::Variation::Population objects
1207 Exceptions : none
1208 Caller : snpview
1209 Status : At Risk
1210
1211 =cut
1212
1213 sub get_all_LD_Populations{
1214 my $self = shift;
1215
1216 my $pa = $self->adaptor->db->get_PopulationAdaptor;
1217 return [] unless $pa;
1218
1219 my $ld_pops = $pa->fetch_all_LD_Populations;
1220 return [] unless $ld_pops;
1221
1222 my $sth = $self->adaptor->db->prepare(qq{
1223 SELECT ip.population_sample_id, c.seq_region_start, c.genotypes
1224 FROM compressed_genotype_region c, individual_population ip
1225 WHERE c.sample_id = ip.individual_sample_id
1226 AND c.seq_region_id = ?
1227 AND c.seq_region_start < ?
1228 AND c.seq_region_end > ?
1229 });
1230
1231 my $this_vf_start = $self->seq_region_start;
1232
1233 $sth->bind_param(1, $self->feature_Slice->get_seq_region_id);
1234 $sth->bind_param(2, $self->seq_region_end);
1235 $sth->bind_param(3, $this_vf_start);
1236
1237 $sth->execute;
1238
1239 my ($sample_id, $seq_region_start, $genotypes);
1240 $sth->bind_columns(\$sample_id, \$seq_region_start, \$genotypes);
1241
1242 my %have_genotypes = ();
1243
1244 while($sth->fetch()) {
1245
1246 next if $have_genotypes{$sample_id};
1247
1248 if($seq_region_start == $this_vf_start) {
1249 $have_genotypes{$sample_id} = 1;
1250 next;
1251 }
1252
1253 my @genotypes = unpack '(www)*', $genotypes;
1254 my $gt_start = $seq_region_start;
1255
1256 while(my( $var_id, $gt_code, $gap ) = splice @genotypes, 0, 3 ) {
1257 if($gt_start == $this_vf_start) {
1258 $have_genotypes{$sample_id} = 1;
1259 last;
1260 }
1261 $gt_start += $gap + 1 if defined $gap;
1262 }
1263 }
1264
1265 my @final_list = grep {$have_genotypes{$_->dbID}} @$ld_pops;
1266
1267 return \@final_list;
1268 }
1269
1270 =head2 get_all_sources
1271
1272 Args : none
1273 Example : my @sources = @{$vf->get_all_sources()};
1274 Description : returns a list of all the sources for this
1275 VariationFeature
1276 ReturnType : reference to list of strings
1277 Exceptions : none
1278 Caller : general
1279 Status : At Risk
1280 : Variation database is under development.
1281 =cut
1282
1283 sub get_all_sources{
1284 my $self = shift;
1285
1286 my @sources;
1287 my %sources;
1288 if ($self->adaptor()){
1289 map {$sources{$_}++} @{$self->adaptor()->get_all_synonym_sources($self)};
1290 $sources{$self->source}++;
1291 @sources = keys %sources;
1292 return \@sources;
1293 }
1294 return \@sources;
1295 }
1296
1297 =head2 ref_allele_string
1298
1299 Args : none
1300 Example : $reference_allele_string = $self->ref_allele_string()
1301 Description: Getter for the reference allele_string, always the first.
1302 Returntype : string
1303 Exceptions : none
1304 Caller : general
1305 Status : Stable
1306
1307 =cut
1308
1309 sub ref_allele_string{
1310 my $self = shift;
1311
1312 my @alleles = split /[\|\\\/]/,$self->allele_string;
1313 return $alleles[0];
1314 }
1315
1316
1317 =head2 get_all_VariationSets
1318
1319 Args : none
1320 Example : my @vs = @{$vf->get_all_VariationSets()};
1321 Description : returns a reference to a list of all the VariationSets this
1322 VariationFeature is a member of
1323 ReturnType : reference to list of Bio::EnsEMBL::Variation::VariationSets
1324 Exceptions : if no adaptor is attached to this object
1325 Caller : general
1326 Status : At Risk
1327 =cut
1328
1329 sub get_all_VariationSets {
1330 my $self = shift;
1331
1332 if (!$self->adaptor()) {
1333 throw('An adaptor must be attached in order to get all variation sets');
1334 }
1335 my $vs_adaptor = $self->adaptor()->db()->get_VariationSetAdaptor();
1336 my $variation_sets = $vs_adaptor->fetch_all_by_Variation($self->variation());
1337
1338 return $variation_sets;
1339 }
1340
1341
1342 =head2 get_all_Alleles
1343
1344 Args : none
1345 Example : @alleles = @{$vf->get_all_Alleles}
1346 Description: Gets all Allele objects from the underlying variation object,
1347 with reference alleles first.
1348 Returntype : listref of Bio::EnsEMBL::Variation::Allele objects
1349 Exceptions : none
1350 Caller : general
1351 Status : Stable
1352
1353 =cut
1354
1355 sub get_all_Alleles{
1356 my $self = shift;
1357
1358 my @alleles = @{$self->variation->get_all_Alleles};
1359
1360 # put all alleles in a hash
1361 my %order = ();
1362 foreach my $allele(@alleles) {
1363 $order{$allele->allele} = 1;
1364 }
1365
1366 $order{$self->ref_allele_string} = 2;
1367
1368 # now sort them by population, submitter, allele
1369 my @new_alleles = sort {
1370 ($a->population ? $a->population->name : "") cmp ($b->population ? $b->population->name : "") ||
1371 ($a->subsnp ? $a->subsnp : "") cmp ($b->subsnp ? $b->subsnp : "") ||
1372 $order{$b->allele} <=> $order{$a->allele}
1373 } @alleles;
1374
1375 return \@new_alleles;
1376 }
1377
1378
1379 =head2 get_all_PopulationGenotypes
1380
1381 Args : none
1382 Example : @pop_gens = @{$vf->get_all_PopulationGenotypes}
1383 Description: Gets all PopulationGenotype objects from the underlying variation
1384 object, with reference genotypes first.
1385 Returntype : listref of Bio::EnsEMBL::Variation::PopulationGenotype objects
1386 Exceptions : none
1387 Caller : general
1388 Status : Stable
1389
1390 =cut
1391
1392 sub get_all_PopulationGenotypes{
1393 my $self = shift;
1394
1395 my @gens = @{$self->variation->get_all_PopulationGenotypes};
1396
1397 # put all alleles in a hash
1398 my %order = ();
1399 foreach my $gen(@gens) {
1400 # homs low priority, hets higher
1401 $order{$gen->allele1.$gen->allele2} = ($gen->allele1 eq $gen->allele2 ? 1 : 2);
1402 }
1403
1404 # ref hom highest priority
1405 $order{$self->ref_allele_string x 2} = 3;
1406
1407 # now sort them by population, submitter, genotype
1408 my @new_gens = sort {
1409 ($a->population ? $a->population->name : "") cmp ($b->population ? $b->population->name : "") ||
1410 ($a->subsnp ? $a->subsnp : "") cmp ($b->subsnp ? $b->subsnp : "") ||
1411 $order{$b->allele1.$b->allele2} <=> $order{$a->allele1.$a->allele2}
1412 } @gens;
1413
1414 return \@new_gens;
1415 }
1416
1417 =head2 get_all_hgvs_notations
1418
1419 Arg [1] : Bio::EnsEMBL::Feature $ref_feature (optional)
1420 Get the HGVS notation of this VariationFeature relative to the slice it is on. If an optional reference feature is supplied, returns the coordinates
1421 relative to this feature.
1422 Arg [2] : string (Optional)
1423 Indicate whether the HGVS notation should be reported in genomic coordinates or cDNA coordinates.
1424 'g' -> Genomic position numbering
1425 'c' -> cDNA position numbering
1426 'p' -> protein position numbering
1427 Arg [3] : string (Optional)
1428 A name to use for the reference can be supplied. By default the name returned by the display_id() method of the reference feature will be used.
1429 Arg [4] : string (Optional)
1430 Return just the HGVS notation corresponding to this allele
1431
1432 Example : my $vf = $variation_feature_adaptor->fetch_by_dbID(565770);
1433 my $tr = $transcript_adaptor->fetch_by_stable_id('ENST00000335295');
1434 my $hgvs = $vf->get_all_hgvs_notations($tr,'p');
1435 while (my ($allele,$hgvs_str) = each(%{$hgvs})) {
1436 print "Allele $allele :\t$hgvs_str\n"; # Will print 'Allele - : ENSP00000333994.3:p.Val34_Tyr36delinsAsp'
1437 }
1438
1439 Description: Returns a reference to a hash with the allele as key and a string with the HGVS notation of this VariationFeature as value. By default uses the
1440 slice it is plcaed on as reference but a different reference feature can be supplied.
1441 Returntype : Hash reference
1442 Exceptions : Throws exception if VariationFeature can not be described relative to the feature_Slice of the supplied reference feature
1443 Caller : general
1444 Status : Experimental
1445
1446 =cut
1447 sub get_all_hgvs_notations {
1448
1449 my $self = shift;
1450 my $ref_feature = shift;
1451 my $numbering = shift; ## HGVS system g=genomic, c=coding, p=protein
1452 my $reference_name = shift; ## If the ref_feature is a slice, this is over-written
1453 my $use_allele = shift; ## optional single allele to check
1454 my $transcript_variation = shift; ## optional transcript variation - looked up for c|p if not supplied
1455
1456 my %hgvs;
1457
1458 ##### don't get them for HGMD mutations or CNV probes
1459 return {} if ($self->allele_string =~ /INS|DEL|HGMD|CNV/ig || $self->var_class() =~ /microsat/i);
1460 ##### By default, use genomic position numbering
1461 $numbering ||= 'g';
1462
1463 # If no reference feature is supplied, set it to the slice underlying this VariationFeature
1464 $ref_feature ||= $self->slice();
1465
1466 # Special parsing for LRG
1467 if (defined $reference_name && $reference_name =~ /^LRG_/) {
1468 # Remove version
1469 if ($reference_name =~ /(.+)\.\d+$/) {
1470 $reference_name = $1;
1471 }
1472 }
1473
1474 ### Check/get transcript variation available for protein & coding
1475 if ($ref_feature->isa('Bio::EnsEMBL::Transcript')) {
1476
1477 # Get a TranscriptVariation object for this VariationFeature and the supplied Transcript if it wasn't passed in the call
1478 $transcript_variation = $self->get_all_TranscriptVariations([$ref_feature])->[0] if (!defined($transcript_variation));
1479
1480 ##### call new TranscriptVariationAllele method for each allele
1481 }
1482
1483
1484 if ($numbering eq 'p') {
1485
1486 #### If there is no transcript variation supplied and the variant
1487 #### is not in the translated region there is no protein change
1488 return {} if (!defined($transcript_variation) ||
1489 !defined($transcript_variation->translation_start()) ||
1490 !defined($transcript_variation->translation_end()));
1491
1492 ##### call TranscriptVariationAllele method for each allele
1493 foreach my $transcriptVariationAllele (@{$transcript_variation->get_all_alternate_TranscriptVariationAlleles()} ){
1494
1495 my $allele_string = $transcriptVariationAllele->feature_seq();
1496 my $hgvs_full_string = $transcriptVariationAllele->hgvs_protein();
1497
1498 if($allele_string ne (split/\//,$self->allele_string())[1] ){
1499 reverse_comp(\$allele_string); ### hash returned relative to input variation feature strand regardless of transcript strand
1500 }
1501 $hgvs{$allele_string} = $hgvs_full_string ;
1502 }
1503 return \%hgvs;
1504 }
1505
1506 elsif ( $numbering =~ m/c|n/) { ### coding or non- coding transcript
1507
1508 return {} if (!defined $transcript_variation);
1509
1510 foreach my $transcriptVariationAllele (@{$transcript_variation->get_all_alternate_TranscriptVariationAlleles()} ){
1511
1512 my $allele_string = $transcriptVariationAllele->feature_seq();
1513 my $hgvs_full_string = $transcriptVariationAllele->hgvs_transcript();
1514
1515 if($allele_string ne (split/\//,$self->allele_string())[1] ){
1516 reverse_comp(\$allele_string); ### hash returned relative to input variation feature strand regardless of transcript strand
1517 }
1518 $hgvs{$allele_string} = $hgvs_full_string ;
1519 }
1520 return \%hgvs;
1521 }
1522
1523 elsif( $numbering =~ m/g/ ) {
1524 #### handling both alleles together locally for genomic class
1525 my $hgvs = $self->hgvs_genomic($ref_feature, $reference_name, $use_allele );
1526 return $hgvs;
1527 }
1528 else{
1529 warn("HGVS notation is not available for coordinate system: $numbering");
1530 return {};
1531 }
1532 }
1533
1534 ### HGVS short flanking sequence required to check if HGVS variant class should be dup rather than ins
1535 sub _get_flank_seq{
1536
1537 my $self = shift;
1538
1539 # Get the underlying slice and sequence
1540 my $ref_slice = $self->slice();
1541
1542 my @allele = split(/\//,$self->allele_string());
1543 #### add flank at least as long as longest allele to allow checking
1544 my $add_length = 0;
1545
1546 foreach my $al(@allele){ ## may have >2 alleles
1547 if(length($al) > $add_length){
1548 $add_length = length $al ;
1549 }
1550 }
1551 $add_length++;
1552
1553 my $ref_start = $add_length ;
1554 my $ref_end = $add_length + ($self->end() - $self->start());
1555 my $seq_start = ($self->start() - $ref_start);
1556
1557 # Should we be at the beginning of the sequence, adjust the coordinates to not cause an exception
1558 if ($seq_start < 0) {
1559 $ref_start += $seq_start;
1560 $ref_end += $seq_start;
1561 $seq_start = 0;
1562 }
1563
1564 my $flank_seq = $ref_slice->subseq($seq_start +1 , $seq_start + $ref_end, 1);
1565
1566
1567 return ($flank_seq, $ref_start, $ref_end );
1568 }
1569
1570 #### format HGVS hash for genomic reference sequence
1571 ### Input: Variation feature
1572 ### Output: $hgvs_notation hash
1573
1574
1575
1576 =head2 hgvs_genomic
1577
1578 Arg [1] : Bio::EnsEMBL::Feature $ref_feature (optional)
1579 Get the HGVS notation of this VariationFeature relative to the slice it is on. If an optional reference feature is supplied, returns the coordinates
1580 relative to this feature.
1581 Arg [2] : string (Optional)
1582 A name to use for the reference can be supplied. By default the name returned by the display_id() method of the reference feature will be used.
1583 Arg [4] : string (Optional)
1584 Return just the HGVS notation corresponding to this allele
1585
1586
1587
1588 Description: Returns a reference to a hash with the allele as key and a string with the genomic HGVS notation of this VariationFeature as value. By default uses the
1589 slice it is placed on as reference but a different reference feature can be supplied.
1590 Returntype : Hash reference
1591 Exceptions : Throws exception if VariationFeature can not be described relative to the feature_Slice of the supplied reference feature
1592 Caller : general
1593 Status : Experimental
1594
1595 =cut
1596 sub hgvs_genomic{
1597
1598 my $self = shift;
1599 my $ref_feature = shift; ## can be a transcript
1600 my $reference_name = shift; ## If the ref_feature is a slice, this is over-written
1601 my $use_allele = shift; ## optional single allele to check
1602
1603 my %hgvs;
1604 ########set up sequence reference
1605 my $ref_slice; #### need this for genomic notation
1606
1607 if ($ref_feature->isa('Bio::EnsEMBL::Slice')) {
1608 $ref_slice = $ref_feature;
1609 }
1610 else {
1611 # get slice if not supplied
1612 $ref_slice = $ref_feature->feature_Slice;
1613 }
1614
1615 # Create new VariationFeature on the slice of the reference feature (unless the reference feature is the slice the VF is on)
1616 my $tr_vf;
1617 if ( $self->slice->coord_system->name() eq "chromosome") {
1618 $tr_vf = $self;
1619 }
1620 else {
1621 $tr_vf = $self->transfer($ref_slice);
1622 }
1623 # Return undef if this VariationFeature could not be transferred
1624 return {} if (!defined($tr_vf));
1625
1626
1627 # Return undef if this VariationFeature does not fall within the supplied feature.
1628 return {} if ($tr_vf->start < 1 ||
1629 $tr_vf->end < 1 ||
1630 $tr_vf->start > ($ref_feature->end - $ref_feature->start + 1) ||
1631 $tr_vf->end > ($ref_feature->end - $ref_feature->start + 1));
1632
1633 ######### define reference sequence name ###################################
1634
1635 # If the reference is a slice, use the seq_region_name as identifier
1636 $reference_name ||= $ref_feature->seq_region_name if ($ref_feature->isa('Bio::EnsEMBL::Slice'));
1637
1638 # Use the feature's display id as reference name unless specified otherwise.
1639 # If the feature is a transcript or translation, append the version number as well
1640 $reference_name ||= $ref_feature->display_id() . ($ref_feature->isa('Bio::EnsEMBL::Transcript') &&
1641 $ref_feature->display_id !~ /\.\d+$/ ? '.' . $ref_feature->version() : '');
1642
1643
1644 ##### get short flank sequence for duplication checking & adjusted variation coordinates
1645 my ($ref_seq, $ref_start, $ref_end) = _get_flank_seq($tr_vf);;
1646
1647 foreach my $allele ( split(/\//,$tr_vf->allele_string()) ) {
1648
1649 ## If a particular allele was requested, ignore others
1650 next if (defined($use_allele) && $allele ne $use_allele);
1651
1652 # Skip if the allele contains weird characters
1653 next if $allele =~ m/[^ACGT\-]/ig;
1654
1655 ##### vf strand is relative to slice - if transcript feature slice, may need complimenting
1656 my $check_allele = $allele;
1657 if( $self->strand() <0 && $ref_slice->strand >0 ||
1658 $self->strand() >0 && $ref_slice->strand < 0
1659 ){
1660 reverse_comp(\$check_allele);
1661 if($DEBUG ==1){print "***************Flipping alt allele $allele => $check_allele to match coding strand\n";}
1662 }
1663
1664 ## work out chrom coord for hgvs string if transcript slice supplied
1665 my ($chr_start,$chr_end);
1666 if ( $tr_vf->slice->is_toplevel() == 1) {
1667 $chr_start = $tr_vf->seq_region_start();
1668 $chr_end = $tr_vf->seq_region_end();
1669 }
1670 else{
1671 ## add feature start to start of var-in-feature
1672 if( $tr_vf->seq_region_start() < $tr_vf->seq_region_end()){
1673 $chr_start = $tr_vf->start() + $tr_vf->seq_region_start() ;
1674 $chr_end = $tr_vf->end() + $tr_vf->seq_region_start() ;
1675 }
1676 else{
1677 $chr_start = $tr_vf->seq_region_start() - $tr_vf->start() ;
1678 $chr_end = $tr_vf->seq_region_start() - $tr_vf->end() ;
1679 }
1680 }
1681
1682
1683 my $hgvs_notation = hgvs_variant_notation( $check_allele, ## alt allele in refseq strand orientation
1684 $ref_seq, ## substring of slice for ref allele extraction
1685 $ref_start, ## start on substring of slice for ref allele extraction
1686 $ref_end,
1687 $chr_start, ## start wrt seq region slice is on (eg. chrom)
1688 $chr_end,
1689 "");
1690
1691
1692 # Skip if e.g. allele is identical to the reference slice
1693 next if (!defined($hgvs_notation));
1694
1695 # Add the name of the reference
1696 $hgvs_notation->{'ref_name'} = $reference_name;
1697 # Add the position_numbering scheme
1698 $hgvs_notation->{'numbering'} = 'g';
1699
1700 # Construct the HGVS notation from the data in the hash
1701 $hgvs_notation->{'hgvs'} = format_hgvs_string( $hgvs_notation);
1702
1703 $hgvs{$allele} = $hgvs_notation->{'hgvs'};
1704 }
1705 return \%hgvs;
1706
1707 }
1708
1709
1710
1711 sub length {
1712 my $self = shift;
1713 return $self->{'end'} - $self->{'start'} + 1;
1714 }
1715
1716 =head2 summary_as_hash
1717
1718 Example : $feature_summary = $feature->summary_as_hash();
1719 Description : Extends Feature::summary_as_hash
1720 Retrieves a summary of this VariationFeature object.
1721
1722 Returns : hashref of descriptive strings
1723
1724 =cut
1725
1726 sub summary_as_hash {
1727 my $self = shift;
1728 my $summary_ref = $self->SUPER::summary_as_hash;
1729 $summary_ref->{'consequence_type'} = $self->display_consequence;
1730 my @allele_list = split(/\//,$self->allele_string);
1731 $summary_ref->{'alt_alleles'} = \@allele_list;
1732 return $summary_ref;
1733 }
1734
1735 1;