Mercurial > repos > willmclaren > ensembl_vep
comparison variant_effect_predictor/Bio/EnsEMBL/Variation/VariationFeature.pm @ 0:21066c0abaf5 draft
Uploaded
author | willmclaren |
---|---|
date | Fri, 03 Aug 2012 10:04:48 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:21066c0abaf5 |
---|---|
1 =head1 LICENSE | |
2 | |
3 Copyright (c) 1999-2012 The European Bioinformatics Institute and | |
4 Genome Research Limited. All rights reserved. | |
5 | |
6 This software is distributed under a modified Apache license. | |
7 For license details, please see | |
8 | |
9 http://www.ensembl.org/info/about/code_licence.html | |
10 | |
11 =head1 CONTACT | |
12 | |
13 Please email comments or questions to the public Ensembl | |
14 developers list at <dev@ensembl.org>. | |
15 | |
16 Questions may also be sent to the Ensembl help desk at | |
17 <helpdesk@ensembl.org>. | |
18 | |
19 =cut | |
20 | |
21 # Ensembl module for Bio::EnsEMBL::Variation::VariationFeature | |
22 # | |
23 # Copyright (c) 2004 Ensembl | |
24 # | |
25 | |
26 | |
27 =head1 NAME | |
28 | |
29 Bio::EnsEMBL::Variation::VariationFeature - A genomic position for a nucleotide variation. | |
30 | |
31 =head1 SYNOPSIS | |
32 | |
33 # Variation feature representing a single nucleotide polymorphism | |
34 $vf = Bio::EnsEMBL::Variation::VariationFeature->new | |
35 (-start => 100, | |
36 -end => 100, | |
37 -strand => 1, | |
38 -slice => $slice, | |
39 -allele_string => 'A/T', | |
40 -variation_name => 'rs635421', | |
41 -map_weight => 1, | |
42 -variation => $v); | |
43 | |
44 # Variation feature representing a 2bp insertion | |
45 $vf = Bio::EnsEMBL::Variation::VariationFeature->new | |
46 (-start => 1522, | |
47 -end => 1521, # end = start-1 for insert | |
48 -strand => -1, | |
49 -slice => $slice, | |
50 -allele_string => '-/AA', | |
51 -variation_name => 'rs12111', | |
52 -map_weight => 1, | |
53 -variation => $v2); | |
54 | |
55 ... | |
56 | |
57 # a variation feature is like any other ensembl feature, can be | |
58 # transformed etc. | |
59 $vf = $vf->transform('supercontig'); | |
60 | |
61 print $vf->start(), "-", $vf->end(), '(', $vf->strand(), ')', "\n"; | |
62 | |
63 print $vf->name(), ":", $vf->allele_string(); | |
64 | |
65 # Get the Variation object which this feature represents the genomic | |
66 # position of. If not already retrieved from the DB, this will be | |
67 # transparently lazy-loaded | |
68 my $v = $vf->variation(); | |
69 | |
70 =head1 DESCRIPTION | |
71 | |
72 This is a class representing the genomic position of a nucleotide variation | |
73 from the ensembl-variation database. The actual variation information is | |
74 represented by an associated Bio::EnsEMBL::Variation::Variation object. Some | |
75 of the information has been denormalized and is available on the feature for | |
76 speed purposes. A VariationFeature behaves as any other Ensembl feature. | |
77 See B<Bio::EnsEMBL::Feature> and B<Bio::EnsEMBL::Variation::Variation>. | |
78 | |
79 =head1 METHODS | |
80 | |
81 =cut | |
82 | |
83 use strict; | |
84 use warnings; | |
85 | |
86 package Bio::EnsEMBL::Variation::VariationFeature; | |
87 | |
88 use Scalar::Util qw(weaken isweak); | |
89 | |
90 use Bio::EnsEMBL::Variation::BaseVariationFeature; | |
91 use Bio::EnsEMBL::Utils::Exception qw(throw warning); | |
92 use Bio::EnsEMBL::Utils::Scalar qw(assert_ref); | |
93 use Bio::EnsEMBL::Utils::Argument qw(rearrange); | |
94 use Bio::EnsEMBL::Utils::Sequence qw(reverse_comp); | |
95 use Bio::EnsEMBL::Variation::Utils::Sequence qw(ambiguity_code hgvs_variant_notation SO_variation_class format_hgvs_string); | |
96 use Bio::EnsEMBL::Variation::Utils::Sequence; | |
97 use Bio::EnsEMBL::Variation::Variation; | |
98 use Bio::EnsEMBL::Variation::Utils::VariationEffect qw(MAX_DISTANCE_FROM_TRANSCRIPT); | |
99 use Bio::EnsEMBL::Variation::Utils::Constants qw($DEFAULT_OVERLAP_CONSEQUENCE %VARIATION_CLASSES); | |
100 use Bio::EnsEMBL::Variation::RegulatoryFeatureVariation; | |
101 use Bio::EnsEMBL::Variation::MotifFeatureVariation; | |
102 use Bio::EnsEMBL::Variation::ExternalFeatureVariation; | |
103 use Bio::EnsEMBL::Variation::IntergenicVariation; | |
104 use Bio::EnsEMBL::Slice; | |
105 use Bio::EnsEMBL::Variation::DBSQL::TranscriptVariationAdaptor; | |
106 use Bio::PrimarySeq; | |
107 use Bio::SeqUtils; | |
108 | |
109 our @ISA = ('Bio::EnsEMBL::Variation::BaseVariationFeature'); | |
110 | |
111 our $DEBUG = 0; | |
112 =head2 new | |
113 | |
114 Arg [-dbID] : | |
115 see superclass constructor | |
116 | |
117 Arg [-ADAPTOR] : | |
118 see superclass constructor | |
119 | |
120 Arg [-START] : | |
121 see superclass constructor | |
122 Arg [-END] : | |
123 see superclass constructor | |
124 | |
125 Arg [-STRAND] : | |
126 see superclass constructor | |
127 | |
128 Arg [-SLICE] : | |
129 see superclass constructor | |
130 | |
131 Arg [-VARIATION_NAME] : | |
132 string - the name of the variation this feature is for (denormalisation | |
133 from Variation object). | |
134 | |
135 Arg [-MAP_WEIGHT] : | |
136 int - the number of times that the variation associated with this feature | |
137 has hit the genome. If this was the only feature associated with this | |
138 variation_feature the map_weight would be 1. | |
139 | |
140 Arg [-VARIATION] : | |
141 int - the variation object associated with this feature. | |
142 | |
143 Arg [-SOURCE] : | |
144 string - the name of the source where the SNP comes from | |
145 | |
146 Arg [-SOURCE_VERSION] : | |
147 number - the version of the source where the SNP comes from | |
148 | |
149 Arg [-VALIDATION_CODE] : | |
150 reference to list of strings | |
151 | |
152 Arg [-OVERLAP_CONSEQUENCES] : | |
153 listref of Bio::EnsEMBL::Variation::OverlapConsequences - all the consequences of this VariationFeature | |
154 | |
155 Arg [-VARIATION_ID] : | |
156 int - the internal id of the variation object associated with this | |
157 identifier. This may be provided instead of a variation object so that | |
158 the variation may be lazy-loaded from the database on demand. | |
159 | |
160 Example : | |
161 $vf = Bio::EnsEMBL::Variation::VariationFeature->new( | |
162 -start => 100, | |
163 -end => 100, | |
164 -strand => 1, | |
165 -slice => $slice, | |
166 -allele_string => 'A/T', | |
167 -variation_name => 'rs635421', | |
168 -map_weight => 1, | |
169 -source => 'dbSNP', | |
170 -validation_code => ['cluster','doublehit'], | |
171 -variation => $v | |
172 ); | |
173 | |
174 Description: Constructor. Instantiates a new VariationFeature object. | |
175 Returntype : Bio::EnsEMBL::Variation::Variation | |
176 Exceptions : none | |
177 Caller : general | |
178 Status : At Risk | |
179 | |
180 =cut | |
181 | |
182 sub new { | |
183 my $caller = shift; | |
184 my $class = ref($caller) || $caller; | |
185 | |
186 my $self = $class->SUPER::new(@_); | |
187 | |
188 my ( | |
189 $allele_str, | |
190 $var_name, | |
191 $map_weight, | |
192 $variation, | |
193 $variation_id, | |
194 $source, | |
195 $source_version, | |
196 $is_somatic, | |
197 $validation_code, | |
198 $overlap_consequences, | |
199 $class_so_term, | |
200 $minor_allele, | |
201 $minor_allele_freq, | |
202 $minor_allele_count | |
203 ) = rearrange([qw( | |
204 ALLELE_STRING | |
205 VARIATION_NAME | |
206 MAP_WEIGHT | |
207 VARIATION | |
208 _VARIATION_ID | |
209 SOURCE | |
210 SOURCE_VERSION | |
211 IS_SOMATIC | |
212 VALIDATION_CODE | |
213 OVERLAP_CONSEQUENCES | |
214 CLASS_SO_TERM | |
215 MINOR_ALLELE | |
216 MINOR_ALLELE_FREQUENCY | |
217 MINOR_ALLELE_COUNT | |
218 )], @_); | |
219 | |
220 $self->{'allele_string'} = $allele_str; | |
221 $self->{'variation_name'} = $var_name; | |
222 $self->{'map_weight'} = $map_weight; | |
223 $self->{'variation'} = $variation; | |
224 $self->{'_variation_id'} = $variation_id; | |
225 $self->{'source'} = $source; | |
226 $self->{'source_version'} = $source_version; | |
227 $self->{'is_somatic'} = $is_somatic; | |
228 $self->{'validation_code'} = $validation_code; | |
229 $self->{'overlap_consequences'} = $overlap_consequences; | |
230 $self->{'class_SO_term'} = $class_so_term; | |
231 $self->{'minor_allele'} = $minor_allele; | |
232 $self->{'minor_allele_frequency'} = $minor_allele_freq; | |
233 $self->{'minor_allele_count'} = $minor_allele_count; | |
234 | |
235 return $self; | |
236 } | |
237 | |
238 | |
239 | |
240 sub new_fast { | |
241 | |
242 my $class = shift; | |
243 my $hashref = shift; | |
244 my $self = bless $hashref, $class; | |
245 weaken($self->{'adaptor'}) if ( ! isweak($self->{'adaptor'}) ); | |
246 return $self; | |
247 | |
248 } | |
249 | |
250 | |
251 =head2 allele_string | |
252 | |
253 Arg [1] : string $newval (optional) | |
254 The new value to set the allele_string attribute to | |
255 Example : $allele_string = $obj->allele_string() | |
256 Description: Getter/Setter for the allele_string attribute. | |
257 The allele_string is a '/' demimited string representing the | |
258 alleles associated with this features variation. | |
259 Returntype : string | |
260 Exceptions : none | |
261 Caller : general | |
262 Status : Stable | |
263 | |
264 =cut | |
265 | |
266 sub allele_string{ | |
267 my $self = shift; | |
268 return $self->{'allele_string'} = shift if(@_); | |
269 return $self->{'allele_string'}; | |
270 } | |
271 | |
272 =head2 display_id | |
273 | |
274 Arg [1] : none | |
275 Example : print $vf->display_id(), "\n"; | |
276 Description: Returns the 'display' identifier for this feature. For | |
277 VariationFeatures this is simply the name of the variation | |
278 it is associated with. | |
279 Returntype : string | |
280 Exceptions : none | |
281 Caller : webcode | |
282 Status : At Risk | |
283 | |
284 =cut | |
285 | |
286 sub display_id { | |
287 my $self = shift; | |
288 return $self->{'variation_name'} || ''; | |
289 } | |
290 | |
291 | |
292 | |
293 =head2 variation_name | |
294 | |
295 Arg [1] : string $newval (optional) | |
296 The new value to set the variation_name attribute to | |
297 Example : $variation_name = $obj->variation_name() | |
298 Description: Getter/Setter for the variation_name attribute. This is the | |
299 name of the variation associated with this feature. | |
300 Returntype : string | |
301 Exceptions : none | |
302 Caller : general | |
303 Status : Stable | |
304 | |
305 =cut | |
306 | |
307 sub variation_name{ | |
308 my $self = shift; | |
309 return $self->{'variation_name'} = shift if(@_); | |
310 return $self->{'variation_name'}; | |
311 } | |
312 | |
313 | |
314 | |
315 =head2 map_weight | |
316 | |
317 Arg [1] : int $newval (optional) | |
318 The new value to set the map_weight attribute to | |
319 Example : $map_weight = $obj->map_weight() | |
320 Description: Getter/Setter for the map_weight attribute. The map_weight | |
321 is the number of times this features variation was mapped to | |
322 the genome. | |
323 Returntype : int | |
324 Exceptions : none | |
325 Caller : general | |
326 Status : At Risk | |
327 | |
328 =cut | |
329 | |
330 sub map_weight{ | |
331 my $self = shift; | |
332 return $self->{'map_weight'} = shift if(@_); | |
333 return $self->{'map_weight'}; | |
334 } | |
335 | |
336 =head2 minor_allele | |
337 | |
338 Arg [1] : string $minor_allele (optional) | |
339 The new minor allele string | |
340 Example : $ma = $obj->minor_allele() | |
341 Description: Get/set the minor allele of this variation, as reported by dbSNP | |
342 Returntype : string | |
343 Exceptions : none | |
344 Caller : general | |
345 Status : Stable | |
346 | |
347 =cut | |
348 | |
349 sub minor_allele { | |
350 my ($self, $minor_allele) = @_; | |
351 $self->{minor_allele} = $minor_allele if defined $minor_allele; | |
352 return $self->{minor_allele} | |
353 } | |
354 | |
355 =head2 minor_allele_frequency | |
356 | |
357 Arg [1] : float $minor_allele_frequency (optional) | |
358 The new minor allele frequency | |
359 Example : $maf = $obj->minor_allele_frequency() | |
360 Description: Get/set the frequency of the minor allele of this variation, as reported by dbSNP | |
361 Returntype : float | |
362 Exceptions : none | |
363 Caller : general | |
364 Status : Stable | |
365 | |
366 =cut | |
367 | |
368 sub minor_allele_frequency { | |
369 my ($self, $minor_allele_frequency) = @_; | |
370 $self->{minor_allele_frequency} = $minor_allele_frequency if defined $minor_allele_frequency; | |
371 return $self->{minor_allele_frequency} | |
372 } | |
373 | |
374 =head2 minor_allele_count | |
375 | |
376 Arg [1] : int $minor_allele_count (optional) | |
377 The new minor allele count | |
378 Example : $maf_count = $obj->minor_allele_count() | |
379 Description: Get/set the sample count of the minor allele of this variation, as reported by dbSNP | |
380 Returntype : int | |
381 Exceptions : none | |
382 Caller : general | |
383 Status : Stable | |
384 | |
385 =cut | |
386 | |
387 sub minor_allele_count { | |
388 my ($self, $minor_allele_count) = @_; | |
389 $self->{minor_allele_count} = $minor_allele_count if defined $minor_allele_count; | |
390 return $self->{minor_allele_count} | |
391 } | |
392 | |
393 | |
394 | |
395 =head2 get_all_TranscriptVariations | |
396 | |
397 Arg [1] : (optional) listref of Bio::EnsEMBL::Transcript objects | |
398 Example : $vf->get_all_TranscriptVariations; | |
399 Description : Get all the TranscriptVariations associated with this VariationFeature. | |
400 If the optional list of Transcripts is supplied, get only TranscriptVariations | |
401 associated with those Transcripts. | |
402 Returntype : listref of Bio::EnsEMBL::Variation::TranscriptVariation objects | |
403 Exceptions : Thrown on wrong argument type | |
404 Caller : general | |
405 Status : At Risk | |
406 | |
407 =cut | |
408 | |
409 sub get_all_TranscriptVariations { | |
410 | |
411 my ($self, $transcripts) = @_; | |
412 | |
413 if ($transcripts) { | |
414 assert_ref($transcripts, 'ARRAY'); | |
415 map { assert_ref($_, 'Bio::EnsEMBL::Transcript') } @$transcripts; | |
416 } | |
417 | |
418 #die unless $self->{transcript_variations}; | |
419 | |
420 if ($self->dbID && not defined $self->{transcript_variations}) { | |
421 # this VariationFeature is from the database, so we can just fetch the | |
422 # TranscriptVariations from the database as well | |
423 | |
424 if (my $db = $self->adaptor->db) { | |
425 my $tva = $db->get_TranscriptVariationAdaptor; | |
426 | |
427 # just fetch TVs for all Transcripts because that's more efficient, | |
428 # we'll only return those asked for later on | |
429 | |
430 my $tvs = $tva->fetch_all_by_VariationFeatures([$self]); | |
431 | |
432 map { $self->add_TranscriptVariation($_) } @$tvs; | |
433 } | |
434 } | |
435 elsif (not defined $self->{transcript_variations}) { | |
436 # this VariationFeature is not in the database so we have to build the | |
437 # TranscriptVariations ourselves | |
438 | |
439 unless ($transcripts) { | |
440 # if the caller didn't supply some transcripts fetch those around this VariationFeature | |
441 | |
442 # get a slice around this transcript including the maximum distance up and down-stream | |
443 # that we still call consequences for | |
444 | |
445 my $slice = $self->feature_Slice->expand( | |
446 MAX_DISTANCE_FROM_TRANSCRIPT, | |
447 MAX_DISTANCE_FROM_TRANSCRIPT | |
448 ); | |
449 | |
450 # fetch all transcripts on this slice | |
451 | |
452 $transcripts = $slice->get_all_Transcripts(1); | |
453 } | |
454 | |
455 my @unfetched_transcripts = grep { | |
456 not exists $self->{transcript_variations}->{$_->stable_id} | |
457 } @$transcripts; | |
458 | |
459 for my $transcript (@unfetched_transcripts) { | |
460 $self->add_TranscriptVariation( | |
461 Bio::EnsEMBL::Variation::TranscriptVariation->new( | |
462 -variation_feature => $self, | |
463 -transcript => $transcript, | |
464 -adaptor => ($self->adaptor->db ? $self->adaptor->db->get_TranscriptVariationAdaptor : undef), | |
465 ) | |
466 ); | |
467 } | |
468 } | |
469 | |
470 if ($transcripts) { | |
471 # just return TranscriptVariations for the requested Transcripts | |
472 return [ map { $self->{transcript_variations}->{$_->stable_id} } @$transcripts ]; | |
473 } | |
474 else { | |
475 # return all TranscriptVariations | |
476 return [ values %{ $self->{transcript_variations} } ]; | |
477 } | |
478 } | |
479 | |
480 =head2 get_all_RegulatoryFeatureVariations | |
481 | |
482 Description : Get all the RegulatoryFeatureVariations associated with this VariationFeature. | |
483 Returntype : listref of Bio::EnsEMBL::Variation::RegulatoryFeatureVariation objects | |
484 Exceptions : none | |
485 Status : At Risk | |
486 | |
487 =cut | |
488 | |
489 sub get_all_RegulatoryFeatureVariations { | |
490 my $self = shift; | |
491 return $self->_get_all_RegulationVariations('RegulatoryFeature', @_); | |
492 } | |
493 | |
494 =head2 get_all_MotifFeatureVariations | |
495 | |
496 Description : Get all the MotifFeatureVariations associated with this VariationFeature. | |
497 Returntype : listref of Bio::EnsEMBL::Variation::MotifFeatureVariation objects | |
498 Exceptions : none | |
499 Status : At Risk | |
500 | |
501 =cut | |
502 | |
503 sub get_all_MotifFeatureVariations { | |
504 my $self = shift; | |
505 return $self->_get_all_RegulationVariations('MotifFeature', @_); | |
506 } | |
507 | |
508 =head2 get_all_ExternalFeatureVariations | |
509 | |
510 Description : Get all the ExternalFeatureVariations associated with this VariationFeature. | |
511 Returntype : listref of Bio::EnsEMBL::Variation::ExternalFeatureVariation objects | |
512 Exceptions : none | |
513 Status : At Risk | |
514 | |
515 =cut | |
516 | |
517 sub get_all_ExternalFeatureVariations { | |
518 my $self = shift; | |
519 return $self->_get_all_RegulationVariations('ExternalFeature', @_); | |
520 } | |
521 | |
522 sub _get_all_RegulationVariations { | |
523 my ($self, $type) = @_; | |
524 | |
525 unless ($type && ($type eq 'RegulatoryFeature' || $type eq 'MotifFeature' || $type eq 'ExternalFeature')) { | |
526 throw("Invalid Ensembl Regulation type '$type'"); | |
527 } | |
528 | |
529 unless ($self->{regulation_variations}->{$type}) { | |
530 | |
531 my $fg_adaptor; | |
532 | |
533 if (my $adap = $self->adaptor) { | |
534 if(my $db = $adap->db) { | |
535 $fg_adaptor = Bio::EnsEMBL::DBSQL::MergedAdaptor->new( | |
536 -species => $adap->db->species, | |
537 -type => $type, | |
538 ); | |
539 } | |
540 | |
541 unless ($fg_adaptor) { | |
542 warning("Failed to get adaptor for $type"); | |
543 return []; | |
544 } | |
545 } | |
546 else { | |
547 warning('Cannot get variation features without attached adaptor'); | |
548 return []; | |
549 } | |
550 | |
551 my $slice = $self->feature_Slice; | |
552 | |
553 my $constructor = 'Bio::EnsEMBL::Variation::'.$type.'Variation'; | |
554 | |
555 eval { | |
556 $self->{regulation_variations}->{$type} = [ | |
557 map { | |
558 $constructor->new( | |
559 -variation_feature => $self, | |
560 -feature => $_, | |
561 ); | |
562 } map { $_->transfer($self->slice) } @{ $fg_adaptor->fetch_all_by_Slice($slice) } | |
563 ]; | |
564 }; | |
565 | |
566 $self->{regulation_variations}->{$type} ||= []; | |
567 } | |
568 | |
569 return $self->{regulation_variations}->{$type}; | |
570 } | |
571 | |
572 sub get_IntergenicVariation { | |
573 my $self = shift; | |
574 my $no_ref_check = shift; | |
575 | |
576 unless (exists $self->{intergenic_variation}) { | |
577 if (scalar(@{ $self->get_all_TranscriptVariations }) == 0) { | |
578 $self->{intergenic_variation} = Bio::EnsEMBL::Variation::IntergenicVariation->new( | |
579 -variation_feature => $self, | |
580 -no_ref_check => $no_ref_check, | |
581 ); | |
582 } | |
583 else { | |
584 $self->{intergenic_variation} = undef; | |
585 } | |
586 } | |
587 | |
588 return $self->{intergenic_variation}; | |
589 } | |
590 | |
591 =head2 get_all_VariationFeatureOverlaps | |
592 | |
593 Description : Get all the VariationFeatureOverlaps associated with this VariationFeature, this | |
594 includes TranscriptVariations and regulatory feature overlap object. | |
595 Returntype : listref of Bio::EnsEMBL::Variation::VariationFeatureOverlap objects | |
596 Exceptions : none | |
597 Status : At Risk | |
598 | |
599 =cut | |
600 | |
601 sub get_all_VariationFeatureOverlaps { | |
602 my $self = shift; | |
603 | |
604 my $vfos = [ | |
605 @{ $self->get_all_TranscriptVariations }, | |
606 @{ $self->get_all_RegulatoryFeatureVariations }, | |
607 @{ $self->get_all_MotifFeatureVariations }, | |
608 @{ $self->get_all_ExternalFeatureVariations }, | |
609 ]; | |
610 | |
611 if (my $iv = $self->get_IntergenicVariation) { | |
612 push @$vfos, $iv; | |
613 } | |
614 | |
615 return $vfos; | |
616 } | |
617 | |
618 =head2 add_TranscriptVariation | |
619 | |
620 Arg [1] : Bio::EnsEMBL::Variation::TranscriptVariation | |
621 Example : $vf->add_TranscriptVariation($tv); | |
622 Description : Adds a TranscriptVariation to the variation feature object. | |
623 Exceptions : thrown on bad argument | |
624 Caller : Bio::EnsEMBL::Variation::TranscriptVariationAdaptor | |
625 Status : At Risk | |
626 | |
627 =cut | |
628 | |
629 sub add_TranscriptVariation { | |
630 my ($self, $tv) = @_; | |
631 assert_ref($tv, 'Bio::EnsEMBL::Variation::TranscriptVariation'); | |
632 # we need to weaken the reference back to us to avoid a circular reference | |
633 weaken($tv->{base_variation_feature}); | |
634 $self->{transcript_variations}->{$tv->transcript_stable_id} = $tv; | |
635 } | |
636 | |
637 =head2 variation | |
638 | |
639 Arg [1] : (optional) Bio::EnsEMBL::Variation::Variation $variation | |
640 Example : $v = $vf->variation(); | |
641 Description: Getter/Setter for the variation associated with this feature. | |
642 If not set, and this VariationFeature has an associated adaptor | |
643 an attempt will be made to lazy-load the variation from the | |
644 database. | |
645 Returntype : Bio::EnsEMBL::Variation::Variation | |
646 Exceptions : throw on incorrect argument | |
647 Caller : general | |
648 Status : Stable | |
649 | |
650 =cut | |
651 | |
652 sub variation { | |
653 my $self = shift; | |
654 | |
655 if(@_) { | |
656 if(!ref($_[0]) || !$_[0]->isa('Bio::EnsEMBL::Variation::Variation')) { | |
657 throw("Bio::EnsEMBL::Variation::Variation argument expected"); | |
658 } | |
659 $self->{'variation'} = shift; | |
660 } | |
661 elsif(!defined($self->{'variation'}) && $self->adaptor() && | |
662 defined($self->{'_variation_id'})) { | |
663 # lazy-load from database on demand | |
664 my $va = $self->adaptor->db()->get_VariationAdaptor(); | |
665 $self->{'variation'} = $va->fetch_by_dbID($self->{'_variation_id'}); | |
666 } | |
667 | |
668 return $self->{'variation'}; | |
669 } | |
670 | |
671 =head2 consequence_type | |
672 | |
673 Arg [1] : (optional) String $term_type | |
674 Description: Get a list of all the unique consequence terms of this | |
675 VariationFeature. By default returns Ensembl display terms | |
676 (e.g. 'NON_SYNONYMOUS_CODING'). $term_type can also be 'label' | |
677 (e.g. 'Non-synonymous coding'), 'SO' (Sequence Ontology, e.g. | |
678 'non_synonymous_codon') or 'NCBI' (e.g. 'missense') | |
679 Returntype : listref of strings | |
680 Exceptions : none | |
681 Status : At Risk | |
682 | |
683 =cut | |
684 | |
685 sub consequence_type { | |
686 | |
687 my $self = shift; | |
688 my $term_type = shift; | |
689 | |
690 my $method_name; | |
691 | |
692 # delete cached term | |
693 if(defined($term_type)) { | |
694 delete $self->{consequence_types}; | |
695 $method_name = $term_type.($term_type eq 'label' ? '' : '_term'); | |
696 $method_name = 'SO_term' unless @{$self->get_all_OverlapConsequences} && $self->get_all_OverlapConsequences->[0]->can($method_name); | |
697 } | |
698 | |
699 $method_name ||= 'SO_term'; | |
700 | |
701 if (exists($self->{current_consequence_method}) && $self->{current_consequence_method} ne $method_name) { | |
702 delete $self->{consequence_type}; | |
703 } | |
704 | |
705 unless ($self->{consequence_types}) { | |
706 | |
707 # work out the terms from the OverlapConsequence objects | |
708 | |
709 $self->{consequence_types} = | |
710 [ map { $_->$method_name } @{ $self->get_all_OverlapConsequences } ]; | |
711 } | |
712 | |
713 $self->{current_consequence_method} = $method_name; | |
714 | |
715 return $self->{consequence_types}; | |
716 } | |
717 | |
718 =head2 get_all_OverlapConsequences | |
719 | |
720 Description: Get a list of all the unique OverlapConsequences of this VariationFeature, | |
721 calculating them on the fly from the TranscriptVariations if necessary | |
722 Returntype : listref of Bio::EnsEMBL::Variation::OverlapConsequence objects | |
723 Exceptions : none | |
724 Status : At Risk | |
725 | |
726 =cut | |
727 | |
728 sub get_all_OverlapConsequences { | |
729 my $self = shift; | |
730 | |
731 unless ($self->{overlap_consequences}) { | |
732 | |
733 # work them out and store them in a hash keyed by SO_term as we don't | |
734 # want duplicates from different VFOs | |
735 | |
736 my %overlap_cons; | |
737 | |
738 for my $vfo (@{ $self->get_all_TranscriptVariations }) { | |
739 for my $allele (@{ $vfo->get_all_alternate_VariationFeatureOverlapAlleles }) { | |
740 for my $cons (@{ $allele->get_all_OverlapConsequences }) { | |
741 $overlap_cons{$cons->SO_term} = $cons; | |
742 } | |
743 } | |
744 } | |
745 | |
746 # if we don't have any consequences we use a default from Constants.pm | |
747 # (currently set to the intergenic consequence) | |
748 | |
749 $self->{overlap_consequences} = [ | |
750 %overlap_cons ? values %overlap_cons : $DEFAULT_OVERLAP_CONSEQUENCE | |
751 ]; | |
752 } | |
753 | |
754 return $self->{overlap_consequences}; | |
755 } | |
756 | |
757 =head2 add_OverlapConsequence | |
758 | |
759 Arg [1] : Bio::EnsEMBL::Variation::OverlapConsequence instance | |
760 Description: Add an OverlapConsequence to this VariationFeature's list | |
761 Returntype : none | |
762 Exceptions : throws if the argument is the wrong type | |
763 Status : At Risk | |
764 | |
765 =cut | |
766 | |
767 sub add_OverlapConsequence { | |
768 my ($self, $oc) = @_; | |
769 assert_ref($oc, 'Bio::EnsEMBL::Variation::OverlapConsequence'); | |
770 push @{ $self->{overlap_consequences} ||= [] }, $oc; | |
771 } | |
772 | |
773 =head2 most_severe_OverlapConsequence | |
774 | |
775 Description: Get the OverlapConsequence considered (by Ensembl) to be the most severe | |
776 consequence of all the alleles of this VariationFeature | |
777 Returntype : Bio::EnsEMBL::Variation::OverlapConsequence | |
778 Exceptions : none | |
779 Status : At Risk | |
780 | |
781 =cut | |
782 | |
783 sub most_severe_OverlapConsequence { | |
784 my $self = shift; | |
785 | |
786 unless ($self->{_most_severe_consequence}) { | |
787 | |
788 my $highest; | |
789 | |
790 for my $cons (@{ $self->get_all_OverlapConsequences }) { | |
791 $highest ||= $cons; | |
792 if ($cons->rank < $highest->rank) { | |
793 $highest = $cons; | |
794 } | |
795 } | |
796 | |
797 $self->{_most_severe_consequence} = $highest; | |
798 } | |
799 | |
800 return $self->{_most_severe_consequence}; | |
801 } | |
802 | |
803 =head2 display_consequence | |
804 | |
805 Arg [1] : (optional) String $term_type | |
806 Description: Get the term for the most severe consequence of this | |
807 VariationFeature. By default returns Ensembl display terms | |
808 (e.g. 'NON_SYNONYMOUS_CODING'). $term_type can also be 'label' | |
809 (e.g. 'Non-synonymous coding'), 'SO' (Sequence Ontology, e.g. | |
810 'non_synonymous_codon') or 'NCBI' (e.g. 'missense') | |
811 Returntype : string | |
812 Exceptions : none | |
813 Status : At Risk | |
814 | |
815 =cut | |
816 | |
817 sub display_consequence { | |
818 my $self = shift; | |
819 my $term_type = shift; | |
820 | |
821 my $method_name; | |
822 | |
823 # delete cached term | |
824 if(defined($term_type)) { | |
825 $method_name = $term_type.($term_type eq 'label' ? '' : '_term'); | |
826 $method_name = 'SO_term' unless @{$self->get_all_OverlapConsequences} && $self->get_all_OverlapConsequences->[0]->can($method_name); | |
827 } | |
828 | |
829 $method_name ||= 'SO_term'; | |
830 | |
831 return $self->most_severe_OverlapConsequence->$method_name; | |
832 } | |
833 | |
834 =head2 add_consequence_type | |
835 | |
836 Status : Deprecated, use add_OverlapConsequence instead | |
837 | |
838 =cut | |
839 | |
840 sub add_consequence_type{ | |
841 my $self = shift; | |
842 warning('Deprecated method, use add_OverlapConsequence instead'); | |
843 return $self->add_OverlapConsequence(@_); | |
844 } | |
845 | |
846 =head2 get_consequence_type | |
847 | |
848 Status : Deprecated, use consequence_type instead | |
849 | |
850 =cut | |
851 | |
852 sub get_consequence_type { | |
853 my $self = shift; | |
854 warning('Deprecated method, use consequence_type instead'); | |
855 return $self->consequence_type; | |
856 } | |
857 | |
858 =head2 ambig_code | |
859 | |
860 Args : None | |
861 Example : my $ambiguity_code = $vf->ambig_code() | |
862 Description : Returns the ambigutiy code for the alleles in the VariationFeature | |
863 ReturnType : String $ambiguity_code | |
864 Exceptions : none | |
865 Caller : General | |
866 Status : At Risk | |
867 | |
868 =cut | |
869 | |
870 sub ambig_code{ | |
871 my $self = shift; | |
872 | |
873 return &ambiguity_code($self->allele_string()); | |
874 } | |
875 | |
876 =head2 var_class | |
877 | |
878 Args[1] : (optional) no_db - don't use the term from the database, always calculate it from the allele string | |
879 (used by the ensembl variation pipeline) | |
880 Example : my $variation_class = $vf->var_class | |
881 Description : returns the Ensembl term for the class of this variation | |
882 ReturnType : string | |
883 Exceptions : throws if we can't find a corresponding display term for an SO term | |
884 Caller : General | |
885 Status : At Risk | |
886 | |
887 =cut | |
888 | |
889 sub var_class { | |
890 | |
891 my $self = shift; | |
892 my $no_db = shift; | |
893 | |
894 unless ($self->{class_display_term}) { | |
895 | |
896 my $so_term = $self->class_SO_term(undef, $no_db); | |
897 | |
898 # convert the SO term to the ensembl display term | |
899 | |
900 $self->{class_display_term} = $self->is_somatic ? | |
901 $VARIATION_CLASSES{$so_term}->{somatic_display_term} : | |
902 $VARIATION_CLASSES{$so_term}->{display_term}; | |
903 } | |
904 | |
905 return $self->{class_display_term}; | |
906 } | |
907 | |
908 =head2 class_SO_term | |
909 | |
910 Args[1] : (optional) class_SO_term - the SO term for the class of this variation feature | |
911 Args[2] : (optional) no_db - don't use the term from the database, always calculate it from the allele string | |
912 (used by the ensembl variation pipeline) | |
913 Example : my $SO_variation_class = $vf->class_SO_term() | |
914 Description : Get/set the SO term for the class of this variation | |
915 ReturnType : string | |
916 Exceptions : none | |
917 Caller : General | |
918 Status : At Risk | |
919 | |
920 =cut | |
921 | |
922 sub class_SO_term { | |
923 my ($self, $class_SO_term, $no_db) = @_; | |
924 | |
925 $self->{class_SO_term} = $class_SO_term if $class_SO_term; | |
926 | |
927 if ($no_db || !$self->{class_SO_term}) { | |
928 $self->{class_SO_term} = SO_variation_class($self->allele_string); | |
929 } | |
930 | |
931 return $self->{class_SO_term}; | |
932 } | |
933 | |
934 =head2 get_all_validation_states | |
935 | |
936 Arg [1] : none | |
937 Example : my @vstates = @{$vf->get_all_validation_states()}; | |
938 Description: Retrieves all validation states for this variationFeature. Current | |
939 possible validation statuses are 'cluster','freq','submitter', | |
940 'doublehit', 'hapmap' | |
941 Returntype : reference to list of strings | |
942 Exceptions : none | |
943 Caller : general | |
944 Status : At Risk | |
945 | |
946 =cut | |
947 | |
948 sub get_all_validation_states { | |
949 my $self = shift; | |
950 return Bio::EnsEMBL::Variation::Utils::Sequence::get_all_validation_states($self->{'validation_code'}); | |
951 } | |
952 | |
953 | |
954 =head2 add_validation_state | |
955 | |
956 Arg [1] : string $state | |
957 Example : $vf->add_validation_state('cluster'); | |
958 Description: Adds a validation state to this variation. | |
959 Returntype : none | |
960 Exceptions : warning if validation state is not a recognised type | |
961 Caller : general | |
962 Status : At Risk | |
963 | |
964 =cut | |
965 | |
966 sub add_validation_state { | |
967 Bio::EnsEMBL::Variation::Utils::Sequence::add_validation_state(@_); | |
968 } | |
969 | |
970 =head2 source | |
971 | |
972 Arg [1] : string $source_name (optional) - the new value to set the source attribute to | |
973 Example : $source = $vf->source; | |
974 Description: Getter/Setter for the source attribute | |
975 Returntype : the source name as a string, | |
976 Exceptions : none | |
977 Caller : general | |
978 Status : At Risk | |
979 | |
980 =cut | |
981 | |
982 sub source { | |
983 my ($self, $source) = @_; | |
984 $self->{source} = $source if $source; | |
985 return $self->{source}; | |
986 } | |
987 | |
988 =head2 source_version | |
989 | |
990 Arg [1] : number $source_version (optional) - the new value to set the source version attribute to | |
991 Example : $source_version = $vf->source_version; | |
992 Description: Getter/Setter for the source version attribute | |
993 Returntype : the source version as a number | |
994 Exceptions : none | |
995 Caller : general | |
996 Status : At Risk | |
997 | |
998 =cut | |
999 | |
1000 sub source_version { | |
1001 my ($self, $source_version) = @_; | |
1002 $self->{source_version} = $source_version if $source_version; | |
1003 return $self->{source_version}; | |
1004 } | |
1005 | |
1006 =head2 is_somatic | |
1007 | |
1008 Arg [1] : boolean $is_somatic (optional) | |
1009 The new value to set the is_somatic flag to | |
1010 Example : $is_somatic = $vf->is_somatic | |
1011 Description: Getter/Setter for the is_somatic flag, which identifies this variation feature as either somatic or germline | |
1012 Returntype : boolean | |
1013 Exceptions : none | |
1014 Caller : general | |
1015 Status : Stable | |
1016 | |
1017 =cut | |
1018 | |
1019 sub is_somatic { | |
1020 my ($self, $is_somatic) = @_; | |
1021 $self->{'is_somatic'} = $is_somatic if defined $is_somatic; | |
1022 return $self->{'is_somatic'}; | |
1023 } | |
1024 | |
1025 =head2 is_tagged | |
1026 | |
1027 Args : None | |
1028 Example : my $populations = $vf->is_tagged(); | |
1029 Description : If the variation is tagged in any population, returns an array with the populations where the variation_feature | |
1030 is tagged (using a criteria of r2 > 0.99). Otherwise, returns null | |
1031 ReturnType : list of Bio::EnsEMBL::Variation::Population | |
1032 Exceptions : none | |
1033 Caller : general | |
1034 Status : At Risk | |
1035 | |
1036 =cut | |
1037 | |
1038 sub is_tagged{ | |
1039 my $self = shift; | |
1040 | |
1041 if ($self->adaptor()){ | |
1042 my $population_adaptor = $self->adaptor()->db()->get_PopulationAdaptor(); | |
1043 return $population_adaptor->fetch_tagged_Population($self); | |
1044 } | |
1045 } | |
1046 | |
1047 =head2 is_tag | |
1048 | |
1049 Args : None | |
1050 Example : my $populations = $vf->is_tag(); | |
1051 Description : Returns an array of populations in which this variation feature | |
1052 is a tag SNP. | |
1053 ReturnType : list of Bio::EnsEMBL::Variation::Population | |
1054 Exceptions : none | |
1055 Caller : general | |
1056 Status : At Risk | |
1057 | |
1058 =cut | |
1059 | |
1060 sub is_tag{ | |
1061 my $self = shift; | |
1062 | |
1063 if ($self->adaptor()){ | |
1064 my $population_adaptor = $self->adaptor()->db()->get_PopulationAdaptor(); | |
1065 return $population_adaptor->fetch_tag_Population($self); | |
1066 } | |
1067 } | |
1068 | |
1069 =head2 get_all_tagged_VariationFeatures | |
1070 | |
1071 Args : Bio::EnsEMBL::Variation::Population $pop (optional) | |
1072 Example : my $vfs = $vf->get_all_tagged_VariationFeatures(); | |
1073 Description : Returns an arrayref of variation features that are tagged by | |
1074 this variation feature, in the population $pop if specified. | |
1075 ReturnType : list of Bio::EnsEMBL::Variation::VariationFeature | |
1076 Exceptions : none | |
1077 Caller : general | |
1078 Status : At Risk | |
1079 | |
1080 =cut | |
1081 | |
1082 sub get_all_tagged_VariationFeatures { | |
1083 return $_[0]->adaptor->fetch_all_tagged_by_VariationFeature(@_); | |
1084 } | |
1085 | |
1086 =head2 get_all_tag_VariationFeatures | |
1087 | |
1088 Args : Bio::EnsEMBL::Variation::Population $pop (optional) | |
1089 Example : my $vfs = $vf->get_all_tag_VariationFeatures(); | |
1090 Description : Returns an arrayref of variation features that tag this | |
1091 variation feature, in the population $pop if specified. | |
1092 ReturnType : list of Bio::EnsEMBL::Variation::VariationFeature | |
1093 Exceptions : none | |
1094 Caller : general | |
1095 Status : At Risk | |
1096 | |
1097 =cut | |
1098 | |
1099 sub get_all_tag_VariationFeatures { | |
1100 return $_[0]->adaptor->fetch_all_tags_by_VariationFeature(@_); | |
1101 } | |
1102 | |
1103 =head2 get_all_tag_and_tagged_VariationFeatures | |
1104 | |
1105 Args : Bio::EnsEMBL::Variation::Population $pop (optional) | |
1106 Example : my $vfs = $vf->get_all_tag_and_tagged_VariationFeatures(); | |
1107 Description : Returns an arrayref of variation features that either tag or are | |
1108 tagged by this variation feature, in the population $pop if | |
1109 specified. | |
1110 ReturnType : list of Bio::EnsEMBL::Variation::VariationFeature | |
1111 Exceptions : none | |
1112 Caller : general | |
1113 Status : At Risk | |
1114 | |
1115 =cut | |
1116 | |
1117 sub get_all_tag_and_tagged_VariationFeatures { | |
1118 return $_[0]->adaptor->fetch_all_tags_and_tagged_by_VariationFeature(@_); | |
1119 } | |
1120 | |
1121 | |
1122 | |
1123 =head2 is_reference | |
1124 Arg : none | |
1125 Example : my $reference = $vf->is_reference() | |
1126 Description: Returns 1 if VF's slice is a reference slice else 0 | |
1127 Returntype : int | |
1128 Caller : general | |
1129 Status : At Risk | |
1130 | |
1131 =cut | |
1132 | |
1133 sub is_reference { | |
1134 my ($self) = @_; | |
1135 my $slice = $self->slice; | |
1136 | |
1137 if ( !defined( $self->{'is_reference'} ) ) { | |
1138 $self->{'is_reference'} = $slice->is_reference(); | |
1139 } | |
1140 | |
1141 return $self->{'is_reference'}; | |
1142 } | |
1143 | |
1144 =head2 convert_to_SNP | |
1145 | |
1146 Args : None | |
1147 Example : my $snp = $vf->convert_to_SNP() | |
1148 Description : Creates a Bio::EnsEMBL::SNP object from Bio::EnsEMBL::VariationFeature. Mainly used for | |
1149 backwards compatibility | |
1150 ReturnType : Bio::EnsEMBL::SNP | |
1151 Exceptions : None | |
1152 Caller : general | |
1153 Status : At Risk | |
1154 | |
1155 =cut | |
1156 | |
1157 sub convert_to_SNP{ | |
1158 my $self = shift; | |
1159 | |
1160 require Bio::EnsEMBL::SNP; #for backwards compatibility. It will only be loaded if the function is called | |
1161 | |
1162 my $snp = Bio::EnsEMBL::SNP->new_fast({ | |
1163 'dbID' => $self->variation()->dbID(), | |
1164 '_gsf_start' => $self->start, | |
1165 '_gsf_end' => $self->end, | |
1166 '_snp_strand' => $self->strand, | |
1167 '_gsf_score' => 1, | |
1168 '_type' => $self->var_class, | |
1169 '_validated' => $self->get_all_validation_states(), | |
1170 'alleles' => $self->allele_string, | |
1171 '_ambiguity_code' => $self->ambig_code, | |
1172 '_mapweight' => $self->map_weight, | |
1173 '_source' => $self->source | |
1174 }); | |
1175 return $snp; | |
1176 } | |
1177 | |
1178 =head2 get_all_LD_values | |
1179 | |
1180 Args : none | |
1181 Description : returns all LD values for this variation feature. This function will only work correctly if the variation | |
1182 database has been attached to the core database. | |
1183 ReturnType : Bio::EnsEMBL::Variation::LDFeatureContainer | |
1184 Exceptions : none | |
1185 Caller : snpview | |
1186 Status : At Risk | |
1187 : Variation database is under development. | |
1188 | |
1189 =cut | |
1190 | |
1191 sub get_all_LD_values{ | |
1192 my $self = shift; | |
1193 | |
1194 if ($self->adaptor()){ | |
1195 my $ld_adaptor = $self->adaptor()->db()->get_LDFeatureContainerAdaptor(); | |
1196 return $ld_adaptor->fetch_by_VariationFeature($self); | |
1197 } | |
1198 return {}; | |
1199 } | |
1200 | |
1201 =head2 get_all_LD_Populations | |
1202 | |
1203 Args : none | |
1204 Description : returns a list of populations that could produces LD values | |
1205 for this VariationFeature | |
1206 ReturnType : listref of Bio::EnsEMBL::Variation::Population objects | |
1207 Exceptions : none | |
1208 Caller : snpview | |
1209 Status : At Risk | |
1210 | |
1211 =cut | |
1212 | |
1213 sub get_all_LD_Populations{ | |
1214 my $self = shift; | |
1215 | |
1216 my $pa = $self->adaptor->db->get_PopulationAdaptor; | |
1217 return [] unless $pa; | |
1218 | |
1219 my $ld_pops = $pa->fetch_all_LD_Populations; | |
1220 return [] unless $ld_pops; | |
1221 | |
1222 my $sth = $self->adaptor->db->prepare(qq{ | |
1223 SELECT ip.population_sample_id, c.seq_region_start, c.genotypes | |
1224 FROM compressed_genotype_region c, individual_population ip | |
1225 WHERE c.sample_id = ip.individual_sample_id | |
1226 AND c.seq_region_id = ? | |
1227 AND c.seq_region_start < ? | |
1228 AND c.seq_region_end > ? | |
1229 }); | |
1230 | |
1231 my $this_vf_start = $self->seq_region_start; | |
1232 | |
1233 $sth->bind_param(1, $self->feature_Slice->get_seq_region_id); | |
1234 $sth->bind_param(2, $self->seq_region_end); | |
1235 $sth->bind_param(3, $this_vf_start); | |
1236 | |
1237 $sth->execute; | |
1238 | |
1239 my ($sample_id, $seq_region_start, $genotypes); | |
1240 $sth->bind_columns(\$sample_id, \$seq_region_start, \$genotypes); | |
1241 | |
1242 my %have_genotypes = (); | |
1243 | |
1244 while($sth->fetch()) { | |
1245 | |
1246 next if $have_genotypes{$sample_id}; | |
1247 | |
1248 if($seq_region_start == $this_vf_start) { | |
1249 $have_genotypes{$sample_id} = 1; | |
1250 next; | |
1251 } | |
1252 | |
1253 my @genotypes = unpack '(www)*', $genotypes; | |
1254 my $gt_start = $seq_region_start; | |
1255 | |
1256 while(my( $var_id, $gt_code, $gap ) = splice @genotypes, 0, 3 ) { | |
1257 if($gt_start == $this_vf_start) { | |
1258 $have_genotypes{$sample_id} = 1; | |
1259 last; | |
1260 } | |
1261 $gt_start += $gap + 1 if defined $gap; | |
1262 } | |
1263 } | |
1264 | |
1265 my @final_list = grep {$have_genotypes{$_->dbID}} @$ld_pops; | |
1266 | |
1267 return \@final_list; | |
1268 } | |
1269 | |
1270 =head2 get_all_sources | |
1271 | |
1272 Args : none | |
1273 Example : my @sources = @{$vf->get_all_sources()}; | |
1274 Description : returns a list of all the sources for this | |
1275 VariationFeature | |
1276 ReturnType : reference to list of strings | |
1277 Exceptions : none | |
1278 Caller : general | |
1279 Status : At Risk | |
1280 : Variation database is under development. | |
1281 =cut | |
1282 | |
1283 sub get_all_sources{ | |
1284 my $self = shift; | |
1285 | |
1286 my @sources; | |
1287 my %sources; | |
1288 if ($self->adaptor()){ | |
1289 map {$sources{$_}++} @{$self->adaptor()->get_all_synonym_sources($self)}; | |
1290 $sources{$self->source}++; | |
1291 @sources = keys %sources; | |
1292 return \@sources; | |
1293 } | |
1294 return \@sources; | |
1295 } | |
1296 | |
1297 =head2 ref_allele_string | |
1298 | |
1299 Args : none | |
1300 Example : $reference_allele_string = $self->ref_allele_string() | |
1301 Description: Getter for the reference allele_string, always the first. | |
1302 Returntype : string | |
1303 Exceptions : none | |
1304 Caller : general | |
1305 Status : Stable | |
1306 | |
1307 =cut | |
1308 | |
1309 sub ref_allele_string{ | |
1310 my $self = shift; | |
1311 | |
1312 my @alleles = split /[\|\\\/]/,$self->allele_string; | |
1313 return $alleles[0]; | |
1314 } | |
1315 | |
1316 | |
1317 =head2 get_all_VariationSets | |
1318 | |
1319 Args : none | |
1320 Example : my @vs = @{$vf->get_all_VariationSets()}; | |
1321 Description : returns a reference to a list of all the VariationSets this | |
1322 VariationFeature is a member of | |
1323 ReturnType : reference to list of Bio::EnsEMBL::Variation::VariationSets | |
1324 Exceptions : if no adaptor is attached to this object | |
1325 Caller : general | |
1326 Status : At Risk | |
1327 =cut | |
1328 | |
1329 sub get_all_VariationSets { | |
1330 my $self = shift; | |
1331 | |
1332 if (!$self->adaptor()) { | |
1333 throw('An adaptor must be attached in order to get all variation sets'); | |
1334 } | |
1335 my $vs_adaptor = $self->adaptor()->db()->get_VariationSetAdaptor(); | |
1336 my $variation_sets = $vs_adaptor->fetch_all_by_Variation($self->variation()); | |
1337 | |
1338 return $variation_sets; | |
1339 } | |
1340 | |
1341 | |
1342 =head2 get_all_Alleles | |
1343 | |
1344 Args : none | |
1345 Example : @alleles = @{$vf->get_all_Alleles} | |
1346 Description: Gets all Allele objects from the underlying variation object, | |
1347 with reference alleles first. | |
1348 Returntype : listref of Bio::EnsEMBL::Variation::Allele objects | |
1349 Exceptions : none | |
1350 Caller : general | |
1351 Status : Stable | |
1352 | |
1353 =cut | |
1354 | |
1355 sub get_all_Alleles{ | |
1356 my $self = shift; | |
1357 | |
1358 my @alleles = @{$self->variation->get_all_Alleles}; | |
1359 | |
1360 # put all alleles in a hash | |
1361 my %order = (); | |
1362 foreach my $allele(@alleles) { | |
1363 $order{$allele->allele} = 1; | |
1364 } | |
1365 | |
1366 $order{$self->ref_allele_string} = 2; | |
1367 | |
1368 # now sort them by population, submitter, allele | |
1369 my @new_alleles = sort { | |
1370 ($a->population ? $a->population->name : "") cmp ($b->population ? $b->population->name : "") || | |
1371 ($a->subsnp ? $a->subsnp : "") cmp ($b->subsnp ? $b->subsnp : "") || | |
1372 $order{$b->allele} <=> $order{$a->allele} | |
1373 } @alleles; | |
1374 | |
1375 return \@new_alleles; | |
1376 } | |
1377 | |
1378 | |
1379 =head2 get_all_PopulationGenotypes | |
1380 | |
1381 Args : none | |
1382 Example : @pop_gens = @{$vf->get_all_PopulationGenotypes} | |
1383 Description: Gets all PopulationGenotype objects from the underlying variation | |
1384 object, with reference genotypes first. | |
1385 Returntype : listref of Bio::EnsEMBL::Variation::PopulationGenotype objects | |
1386 Exceptions : none | |
1387 Caller : general | |
1388 Status : Stable | |
1389 | |
1390 =cut | |
1391 | |
1392 sub get_all_PopulationGenotypes{ | |
1393 my $self = shift; | |
1394 | |
1395 my @gens = @{$self->variation->get_all_PopulationGenotypes}; | |
1396 | |
1397 # put all alleles in a hash | |
1398 my %order = (); | |
1399 foreach my $gen(@gens) { | |
1400 # homs low priority, hets higher | |
1401 $order{$gen->allele1.$gen->allele2} = ($gen->allele1 eq $gen->allele2 ? 1 : 2); | |
1402 } | |
1403 | |
1404 # ref hom highest priority | |
1405 $order{$self->ref_allele_string x 2} = 3; | |
1406 | |
1407 # now sort them by population, submitter, genotype | |
1408 my @new_gens = sort { | |
1409 ($a->population ? $a->population->name : "") cmp ($b->population ? $b->population->name : "") || | |
1410 ($a->subsnp ? $a->subsnp : "") cmp ($b->subsnp ? $b->subsnp : "") || | |
1411 $order{$b->allele1.$b->allele2} <=> $order{$a->allele1.$a->allele2} | |
1412 } @gens; | |
1413 | |
1414 return \@new_gens; | |
1415 } | |
1416 | |
1417 =head2 get_all_hgvs_notations | |
1418 | |
1419 Arg [1] : Bio::EnsEMBL::Feature $ref_feature (optional) | |
1420 Get the HGVS notation of this VariationFeature relative to the slice it is on. If an optional reference feature is supplied, returns the coordinates | |
1421 relative to this feature. | |
1422 Arg [2] : string (Optional) | |
1423 Indicate whether the HGVS notation should be reported in genomic coordinates or cDNA coordinates. | |
1424 'g' -> Genomic position numbering | |
1425 'c' -> cDNA position numbering | |
1426 'p' -> protein position numbering | |
1427 Arg [3] : string (Optional) | |
1428 A name to use for the reference can be supplied. By default the name returned by the display_id() method of the reference feature will be used. | |
1429 Arg [4] : string (Optional) | |
1430 Return just the HGVS notation corresponding to this allele | |
1431 | |
1432 Example : my $vf = $variation_feature_adaptor->fetch_by_dbID(565770); | |
1433 my $tr = $transcript_adaptor->fetch_by_stable_id('ENST00000335295'); | |
1434 my $hgvs = $vf->get_all_hgvs_notations($tr,'p'); | |
1435 while (my ($allele,$hgvs_str) = each(%{$hgvs})) { | |
1436 print "Allele $allele :\t$hgvs_str\n"; # Will print 'Allele - : ENSP00000333994.3:p.Val34_Tyr36delinsAsp' | |
1437 } | |
1438 | |
1439 Description: Returns a reference to a hash with the allele as key and a string with the HGVS notation of this VariationFeature as value. By default uses the | |
1440 slice it is plcaed on as reference but a different reference feature can be supplied. | |
1441 Returntype : Hash reference | |
1442 Exceptions : Throws exception if VariationFeature can not be described relative to the feature_Slice of the supplied reference feature | |
1443 Caller : general | |
1444 Status : Experimental | |
1445 | |
1446 =cut | |
1447 sub get_all_hgvs_notations { | |
1448 | |
1449 my $self = shift; | |
1450 my $ref_feature = shift; | |
1451 my $numbering = shift; ## HGVS system g=genomic, c=coding, p=protein | |
1452 my $reference_name = shift; ## If the ref_feature is a slice, this is over-written | |
1453 my $use_allele = shift; ## optional single allele to check | |
1454 my $transcript_variation = shift; ## optional transcript variation - looked up for c|p if not supplied | |
1455 | |
1456 my %hgvs; | |
1457 | |
1458 ##### don't get them for HGMD mutations or CNV probes | |
1459 return {} if ($self->allele_string =~ /INS|DEL|HGMD|CNV/ig || $self->var_class() =~ /microsat/i); | |
1460 ##### By default, use genomic position numbering | |
1461 $numbering ||= 'g'; | |
1462 | |
1463 # If no reference feature is supplied, set it to the slice underlying this VariationFeature | |
1464 $ref_feature ||= $self->slice(); | |
1465 | |
1466 # Special parsing for LRG | |
1467 if (defined $reference_name && $reference_name =~ /^LRG_/) { | |
1468 # Remove version | |
1469 if ($reference_name =~ /(.+)\.\d+$/) { | |
1470 $reference_name = $1; | |
1471 } | |
1472 } | |
1473 | |
1474 ### Check/get transcript variation available for protein & coding | |
1475 if ($ref_feature->isa('Bio::EnsEMBL::Transcript')) { | |
1476 | |
1477 # Get a TranscriptVariation object for this VariationFeature and the supplied Transcript if it wasn't passed in the call | |
1478 $transcript_variation = $self->get_all_TranscriptVariations([$ref_feature])->[0] if (!defined($transcript_variation)); | |
1479 | |
1480 ##### call new TranscriptVariationAllele method for each allele | |
1481 } | |
1482 | |
1483 | |
1484 if ($numbering eq 'p') { | |
1485 | |
1486 #### If there is no transcript variation supplied and the variant | |
1487 #### is not in the translated region there is no protein change | |
1488 return {} if (!defined($transcript_variation) || | |
1489 !defined($transcript_variation->translation_start()) || | |
1490 !defined($transcript_variation->translation_end())); | |
1491 | |
1492 ##### call TranscriptVariationAllele method for each allele | |
1493 foreach my $transcriptVariationAllele (@{$transcript_variation->get_all_alternate_TranscriptVariationAlleles()} ){ | |
1494 | |
1495 my $allele_string = $transcriptVariationAllele->feature_seq(); | |
1496 my $hgvs_full_string = $transcriptVariationAllele->hgvs_protein(); | |
1497 | |
1498 if($allele_string ne (split/\//,$self->allele_string())[1] ){ | |
1499 reverse_comp(\$allele_string); ### hash returned relative to input variation feature strand regardless of transcript strand | |
1500 } | |
1501 $hgvs{$allele_string} = $hgvs_full_string ; | |
1502 } | |
1503 return \%hgvs; | |
1504 } | |
1505 | |
1506 elsif ( $numbering =~ m/c|n/) { ### coding or non- coding transcript | |
1507 | |
1508 return {} if (!defined $transcript_variation); | |
1509 | |
1510 foreach my $transcriptVariationAllele (@{$transcript_variation->get_all_alternate_TranscriptVariationAlleles()} ){ | |
1511 | |
1512 my $allele_string = $transcriptVariationAllele->feature_seq(); | |
1513 my $hgvs_full_string = $transcriptVariationAllele->hgvs_transcript(); | |
1514 | |
1515 if($allele_string ne (split/\//,$self->allele_string())[1] ){ | |
1516 reverse_comp(\$allele_string); ### hash returned relative to input variation feature strand regardless of transcript strand | |
1517 } | |
1518 $hgvs{$allele_string} = $hgvs_full_string ; | |
1519 } | |
1520 return \%hgvs; | |
1521 } | |
1522 | |
1523 elsif( $numbering =~ m/g/ ) { | |
1524 #### handling both alleles together locally for genomic class | |
1525 my $hgvs = $self->hgvs_genomic($ref_feature, $reference_name, $use_allele ); | |
1526 return $hgvs; | |
1527 } | |
1528 else{ | |
1529 warn("HGVS notation is not available for coordinate system: $numbering"); | |
1530 return {}; | |
1531 } | |
1532 } | |
1533 | |
1534 ### HGVS short flanking sequence required to check if HGVS variant class should be dup rather than ins | |
1535 sub _get_flank_seq{ | |
1536 | |
1537 my $self = shift; | |
1538 | |
1539 # Get the underlying slice and sequence | |
1540 my $ref_slice = $self->slice(); | |
1541 | |
1542 my @allele = split(/\//,$self->allele_string()); | |
1543 #### add flank at least as long as longest allele to allow checking | |
1544 my $add_length = 0; | |
1545 | |
1546 foreach my $al(@allele){ ## may have >2 alleles | |
1547 if(length($al) > $add_length){ | |
1548 $add_length = length $al ; | |
1549 } | |
1550 } | |
1551 $add_length++; | |
1552 | |
1553 my $ref_start = $add_length ; | |
1554 my $ref_end = $add_length + ($self->end() - $self->start()); | |
1555 my $seq_start = ($self->start() - $ref_start); | |
1556 | |
1557 # Should we be at the beginning of the sequence, adjust the coordinates to not cause an exception | |
1558 if ($seq_start < 0) { | |
1559 $ref_start += $seq_start; | |
1560 $ref_end += $seq_start; | |
1561 $seq_start = 0; | |
1562 } | |
1563 | |
1564 my $flank_seq = $ref_slice->subseq($seq_start +1 , $seq_start + $ref_end, 1); | |
1565 | |
1566 | |
1567 return ($flank_seq, $ref_start, $ref_end ); | |
1568 } | |
1569 | |
1570 #### format HGVS hash for genomic reference sequence | |
1571 ### Input: Variation feature | |
1572 ### Output: $hgvs_notation hash | |
1573 | |
1574 | |
1575 | |
1576 =head2 hgvs_genomic | |
1577 | |
1578 Arg [1] : Bio::EnsEMBL::Feature $ref_feature (optional) | |
1579 Get the HGVS notation of this VariationFeature relative to the slice it is on. If an optional reference feature is supplied, returns the coordinates | |
1580 relative to this feature. | |
1581 Arg [2] : string (Optional) | |
1582 A name to use for the reference can be supplied. By default the name returned by the display_id() method of the reference feature will be used. | |
1583 Arg [4] : string (Optional) | |
1584 Return just the HGVS notation corresponding to this allele | |
1585 | |
1586 | |
1587 | |
1588 Description: Returns a reference to a hash with the allele as key and a string with the genomic HGVS notation of this VariationFeature as value. By default uses the | |
1589 slice it is placed on as reference but a different reference feature can be supplied. | |
1590 Returntype : Hash reference | |
1591 Exceptions : Throws exception if VariationFeature can not be described relative to the feature_Slice of the supplied reference feature | |
1592 Caller : general | |
1593 Status : Experimental | |
1594 | |
1595 =cut | |
1596 sub hgvs_genomic{ | |
1597 | |
1598 my $self = shift; | |
1599 my $ref_feature = shift; ## can be a transcript | |
1600 my $reference_name = shift; ## If the ref_feature is a slice, this is over-written | |
1601 my $use_allele = shift; ## optional single allele to check | |
1602 | |
1603 my %hgvs; | |
1604 ########set up sequence reference | |
1605 my $ref_slice; #### need this for genomic notation | |
1606 | |
1607 if ($ref_feature->isa('Bio::EnsEMBL::Slice')) { | |
1608 $ref_slice = $ref_feature; | |
1609 } | |
1610 else { | |
1611 # get slice if not supplied | |
1612 $ref_slice = $ref_feature->feature_Slice; | |
1613 } | |
1614 | |
1615 # Create new VariationFeature on the slice of the reference feature (unless the reference feature is the slice the VF is on) | |
1616 my $tr_vf; | |
1617 if ( $self->slice->coord_system->name() eq "chromosome") { | |
1618 $tr_vf = $self; | |
1619 } | |
1620 else { | |
1621 $tr_vf = $self->transfer($ref_slice); | |
1622 } | |
1623 # Return undef if this VariationFeature could not be transferred | |
1624 return {} if (!defined($tr_vf)); | |
1625 | |
1626 | |
1627 # Return undef if this VariationFeature does not fall within the supplied feature. | |
1628 return {} if ($tr_vf->start < 1 || | |
1629 $tr_vf->end < 1 || | |
1630 $tr_vf->start > ($ref_feature->end - $ref_feature->start + 1) || | |
1631 $tr_vf->end > ($ref_feature->end - $ref_feature->start + 1)); | |
1632 | |
1633 ######### define reference sequence name ################################### | |
1634 | |
1635 # If the reference is a slice, use the seq_region_name as identifier | |
1636 $reference_name ||= $ref_feature->seq_region_name if ($ref_feature->isa('Bio::EnsEMBL::Slice')); | |
1637 | |
1638 # Use the feature's display id as reference name unless specified otherwise. | |
1639 # If the feature is a transcript or translation, append the version number as well | |
1640 $reference_name ||= $ref_feature->display_id() . ($ref_feature->isa('Bio::EnsEMBL::Transcript') && | |
1641 $ref_feature->display_id !~ /\.\d+$/ ? '.' . $ref_feature->version() : ''); | |
1642 | |
1643 | |
1644 ##### get short flank sequence for duplication checking & adjusted variation coordinates | |
1645 my ($ref_seq, $ref_start, $ref_end) = _get_flank_seq($tr_vf);; | |
1646 | |
1647 foreach my $allele ( split(/\//,$tr_vf->allele_string()) ) { | |
1648 | |
1649 ## If a particular allele was requested, ignore others | |
1650 next if (defined($use_allele) && $allele ne $use_allele); | |
1651 | |
1652 # Skip if the allele contains weird characters | |
1653 next if $allele =~ m/[^ACGT\-]/ig; | |
1654 | |
1655 ##### vf strand is relative to slice - if transcript feature slice, may need complimenting | |
1656 my $check_allele = $allele; | |
1657 if( $self->strand() <0 && $ref_slice->strand >0 || | |
1658 $self->strand() >0 && $ref_slice->strand < 0 | |
1659 ){ | |
1660 reverse_comp(\$check_allele); | |
1661 if($DEBUG ==1){print "***************Flipping alt allele $allele => $check_allele to match coding strand\n";} | |
1662 } | |
1663 | |
1664 ## work out chrom coord for hgvs string if transcript slice supplied | |
1665 my ($chr_start,$chr_end); | |
1666 if ( $tr_vf->slice->is_toplevel() == 1) { | |
1667 $chr_start = $tr_vf->seq_region_start(); | |
1668 $chr_end = $tr_vf->seq_region_end(); | |
1669 } | |
1670 else{ | |
1671 ## add feature start to start of var-in-feature | |
1672 if( $tr_vf->seq_region_start() < $tr_vf->seq_region_end()){ | |
1673 $chr_start = $tr_vf->start() + $tr_vf->seq_region_start() ; | |
1674 $chr_end = $tr_vf->end() + $tr_vf->seq_region_start() ; | |
1675 } | |
1676 else{ | |
1677 $chr_start = $tr_vf->seq_region_start() - $tr_vf->start() ; | |
1678 $chr_end = $tr_vf->seq_region_start() - $tr_vf->end() ; | |
1679 } | |
1680 } | |
1681 | |
1682 | |
1683 my $hgvs_notation = hgvs_variant_notation( $check_allele, ## alt allele in refseq strand orientation | |
1684 $ref_seq, ## substring of slice for ref allele extraction | |
1685 $ref_start, ## start on substring of slice for ref allele extraction | |
1686 $ref_end, | |
1687 $chr_start, ## start wrt seq region slice is on (eg. chrom) | |
1688 $chr_end, | |
1689 ""); | |
1690 | |
1691 | |
1692 # Skip if e.g. allele is identical to the reference slice | |
1693 next if (!defined($hgvs_notation)); | |
1694 | |
1695 # Add the name of the reference | |
1696 $hgvs_notation->{'ref_name'} = $reference_name; | |
1697 # Add the position_numbering scheme | |
1698 $hgvs_notation->{'numbering'} = 'g'; | |
1699 | |
1700 # Construct the HGVS notation from the data in the hash | |
1701 $hgvs_notation->{'hgvs'} = format_hgvs_string( $hgvs_notation); | |
1702 | |
1703 $hgvs{$allele} = $hgvs_notation->{'hgvs'}; | |
1704 } | |
1705 return \%hgvs; | |
1706 | |
1707 } | |
1708 | |
1709 | |
1710 | |
1711 sub length { | |
1712 my $self = shift; | |
1713 return $self->{'end'} - $self->{'start'} + 1; | |
1714 } | |
1715 | |
1716 =head2 summary_as_hash | |
1717 | |
1718 Example : $feature_summary = $feature->summary_as_hash(); | |
1719 Description : Extends Feature::summary_as_hash | |
1720 Retrieves a summary of this VariationFeature object. | |
1721 | |
1722 Returns : hashref of descriptive strings | |
1723 | |
1724 =cut | |
1725 | |
1726 sub summary_as_hash { | |
1727 my $self = shift; | |
1728 my $summary_ref = $self->SUPER::summary_as_hash; | |
1729 $summary_ref->{'consequence_type'} = $self->display_consequence; | |
1730 my @allele_list = split(/\//,$self->allele_string); | |
1731 $summary_ref->{'alt_alleles'} = \@allele_list; | |
1732 return $summary_ref; | |
1733 } | |
1734 | |
1735 1; |