comparison variant_effect_predictor/Bio/EnsEMBL/Variation/VariationFeatureOverlap.pm @ 0:1f6dce3d34e0

Uploaded
author mahtabm
date Thu, 11 Apr 2013 02:01:53 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:1f6dce3d34e0
1 =head1 LICENSE
2
3 Copyright (c) 1999-2012 The European Bioinformatics Institute and
4 Genome Research Limited. All rights reserved.
5
6 This software is distributed under a modified Apache license.
7 For license details, please see
8
9 http://www.ensembl.org/info/about/code_licence.html
10
11 =head1 CONTACT
12
13 Please email comments or questions to the public Ensembl
14 developers list at <dev@ensembl.org>.
15
16 Questions may also be sent to the Ensembl help desk at
17 <helpdesk@ensembl.org>.
18
19 =cut
20
21 =head1 NAME
22
23 Bio::EnsEMBL::Variation::VariationFeatureOverlap
24
25 =head1 SYNOPSIS
26
27 use Bio::EnsEMBL::Variation::VariationFeatureOverlap;
28
29 my $vfo = Bio::EnsEMBL::Variation::VariationFeatureOverlap->new(
30 -feature => $feature,
31 -variation_feature => $var_feat
32 );
33
34 print "consequence type: ", (join ",", @{ $vfo->consequence_type }), "\n";
35 print "most severe consequence: ", $vfo->display_consequence, "\n";
36
37 =head1 DESCRIPTION
38
39 A VariationFeatureOverlap represents a VariationFeature which is in close
40 proximity to another Ensembl Feature. It is the superclass of feature-specific
41 objects such as TranscriptVariation and RegulatoryFeatureVariation, and has
42 methods common to all such objects. You will not normally instantiate this
43 class directly, instead instantiating one of the feature-specific subclasses.
44
45 =cut
46
47 package Bio::EnsEMBL::Variation::VariationFeatureOverlap;
48
49 use strict;
50 use warnings;
51
52 use Bio::EnsEMBL::Utils::Scalar qw(assert_ref);
53 use Bio::EnsEMBL::Utils::Exception qw(throw warning);
54 use Bio::EnsEMBL::Utils::Argument qw(rearrange);
55 use Bio::EnsEMBL::Utils::Sequence qw(expand);
56 use Bio::EnsEMBL::Variation::Utils::Sequence qw(unambiguity_code);
57 use Bio::EnsEMBL::Variation::VariationFeatureOverlapAllele;
58
59 use base qw(Bio::EnsEMBL::Variation::BaseVariationFeatureOverlap);
60
61 =head2 new
62
63 Arg [-FEATURE] :
64 The Bio::EnsEMBL::Feature associated with the given VariationFeature
65
66 Arg [-VARIATION_FEATURE] :
67 The Bio::EnsEMBL::VariationFeature associated with the given Feature
68
69 Arg [-ADAPTOR] :
70 A Bio::EnsEMBL::Variation::DBSQL::VariationFeatureOverlapAdaptor
71
72 Arg [-DISAMBIGUATE_SINGLE_NUCLEOTIDE_ALLELES] :
73 A flag indiciating if ambiguous single nucleotide alleles should be disambiguated
74 when constructing the VariationFeatureOverlapAllele objects, e.g. a Variationfeature
75 with an allele string like 'T/M' would be treated as if it were 'T/A/C'. We limit
76 ourselves to single nucleotide alleles to avoid the combinatorial explosion if we
77 allowed longer alleles with potentially many ambiguous bases.
78
79 Example :
80 my $vfo = Bio::EnsEMBL::Variation::VariationFeatureOverlap->new(
81 -feature => $feature,
82 -variation_feature => $var_feat
83 );
84
85 Description: Constructs a new VariationFeatureOverlap instance given a VariationFeature
86 and a Feature
87 Returntype : A new Bio::EnsEMBL::Variation::VariationFeatureOverlap instance
88 Exceptions : throws unless both VARIATION_FEATURE and FEATURE are supplied, or if the
89 supplied ADAPTOR is the wrong class
90 Status : At Risk
91
92 =cut
93
94 sub new {
95
96 my $class = shift;
97
98 my %args = @_;
99
100 # swap a '-variation_feature' argument for a '-base_variation_feature' one for the superclass
101
102 for my $arg (keys %args) {
103 if (lc($arg) eq '-variation_feature') {
104 $args{'-base_variation_feature'} = delete $args{$arg};
105 }
106 }
107
108 my $self = $class->SUPER::new(%args);
109
110 my (
111 $adaptor,
112 $ref_feature,
113 $disambiguate_sn_alleles,
114 $no_ref_check,
115 ) = rearrange([qw(
116 ADAPTOR
117 REF_FEATURE
118 DISAMBIGUATE_SINGLE_NUCLEOTIDE_ALLELES
119 NO_REF_CHECK
120 )], %args);
121
122 my $variation_feature = $self->base_variation_feature;
123
124 assert_ref($variation_feature, 'Bio::EnsEMBL::Variation::VariationFeature');
125 assert_ref($adaptor, 'Bio::EnsEMBL::Variation::DBSQL::VariationFeatureOverlapAdaptor') if $adaptor;
126
127 $ref_feature ||= $variation_feature->slice;
128
129 $self->{adaptor} = $adaptor;
130 $self->{ref_feature} = $ref_feature;
131
132 my $ref_allele;
133
134 # we take the reference allele sequence from the reference sequence, not from the allele string
135 unless($no_ref_check) {
136 $ref_allele = $ref_feature->subseq(
137 $variation_feature->start,
138 $variation_feature->end,
139 $variation_feature->strand
140 );
141 }
142
143 # get the variation feature allele string, expand it, and split it into separate alleles
144
145 my $allele_string = $variation_feature->allele_string;
146
147 expand(\$allele_string);
148
149 my @alleles = split /\//, $allele_string;
150
151 $ref_allele = $alleles[0] if $no_ref_check;
152 $ref_allele = '-' unless $ref_allele;
153
154 if ($disambiguate_sn_alleles) {
155
156 # if this flag is set, disambiguate any ambiguous single nucleotide alleles, so
157 # e.g. an allele string like T/M would be equivalent to an allele string of T/A/C
158 # we only do this for single nucleotide alleles to avoid the combinatorial explosion
159 # of long allele strings with potentially many ambiguous bases (because ensembl
160 # genomes want this functionality)
161
162 my @possible_alleles;
163
164 for my $allele (@alleles) {
165
166 if ($allele !~ /^[ACGT-]+$/ && length($allele) == 1) {
167 for my $possible ( split //, unambiguity_code($allele) ) {
168 push @possible_alleles, $possible;
169 }
170 }
171 else {
172 # the allele is either unambiguous or longer than 1 nucleotide, so add it unaltered
173 push @possible_alleles, $allele;
174 }
175 }
176
177 @alleles = @possible_alleles;
178 }
179
180 # make sure the alleles are unique
181
182 # we also want to deal with alleles like (T)0 which expand into
183 # an empty string and we want to treat this as a deletion, so
184 # we replace
185 # any empty strings with '-'
186
187 @alleles = keys %{ { map { ($_ || '-') => 1 } @alleles } };
188
189 # create an object representing the reference allele
190
191 my $ref_vfoa = Bio::EnsEMBL::Variation::VariationFeatureOverlapAllele->new(
192 -variation_feature_overlap => $self,
193 -variation_feature_seq => $ref_allele,
194 -is_reference => 1,
195 );
196
197 $self->add_VariationFeatureOverlapAllele($ref_vfoa);
198
199 # create objects representing the alternate alleles
200
201 for my $allele (@alleles) {
202
203 next if $allele eq $ref_allele;
204
205 my $vfoa = Bio::EnsEMBL::Variation::VariationFeatureOverlapAllele->new(
206 -variation_feature_overlap => $self,
207 -variation_feature_seq => $allele,
208 -is_reference => 0,
209 );
210
211 $self->add_VariationFeatureOverlapAllele($vfoa);
212 }
213
214 return $self;
215 }
216
217 sub new_fast {
218 my ($class, $hashref) = @_;
219
220 # swap a variation_feature argument for a base_variation_feature one
221
222 if ($hashref->{variation_feature}) {
223 $hashref->{base_variation_feature} = delete $hashref->{variation_feature};
224 }
225
226 return $class->SUPER::new_fast($hashref);
227 }
228
229 sub dbID {
230 my $self = shift;
231
232 unless ($self->{dbID}) {
233 # we don't really have a dbID, so concatenate all the dbIDs of our alleles
234
235 $self->{dbID} = join '_', map { $_->dbID } @{ $self->get_all_alternate_VariationFeatureOverlapAlleles };
236 }
237
238 return $self->{dbID};
239 }
240
241 =head2 variation_feature
242
243 Arg [1] : (optional) A Bio::EnsEMBL::Variation::VariationFeature
244 Description: Get/set the associated VariationFeature, lazy-loading it if required
245 Returntype : Bio::EnsEMBL::Variation::VariationFeature
246 Exceptions : throws if the argument is the wrong type
247 Status : At Risk
248
249 =cut
250
251 sub variation_feature {
252 my ($self, $variation_feature) = @_;
253
254 if ($variation_feature) {
255 assert_ref($variation_feature, 'Bio::EnsEMBL::Variation::VariationFeature');
256 $self->base_variation_feature($variation_feature);
257 }
258
259 if (my $vf_id = $self->{_variation_feature_id}) {
260
261 # lazy-load the VariationFeature
262
263 if (my $adap = $self->{adaptor}) {
264 if (my $vfa = $adap->db->get_VariationFeatureAdaptor) {
265 if (my $vf = $vfa->fetch_by_dbID($vf_id)) {
266 $self->base_variation_feature($vf);
267 delete $self->{_variation_feature_id};
268 }
269 }
270 }
271 }
272
273 return $self->base_variation_feature;
274 }
275
276 sub _variation_feature_id {
277
278 # get the dbID of the variation feature, using the VariationFeature object
279 # if we have one, or the internal hash value if we don't
280
281 my $self = shift;
282
283 if (my $vf = $self->{variation_feature}) {
284 return $vf->dbID;
285 }
286 elsif (my $id = $self->{_variation_feature_id}) {
287 return $id;
288 }
289 else {
290 return undef;
291 }
292 }
293
294 sub get_VariationFeatureOverlapAllele_for_allele_seq {
295 my ($self, $allele_seq) = @_;
296 return $self->{_alleles_by_seq}->{$allele_seq};
297 }
298
299 =head2 add_VariationFeatureOverlapAllele
300
301 Arg [1] : A Bio::EnsEMBL::Variation::VariationFeatureOverlapAllele instance
302 Description: Add an allele to this VariationFeatureOverlap
303 Returntype : none
304 Exceptions : throws if the argument is not the expected type
305 Status : At Risk
306
307 =cut
308
309 sub add_VariationFeatureOverlapAllele {
310 my ($self, $vfoa) = @_;
311
312 assert_ref($vfoa, 'Bio::EnsEMBL::Variation::VariationFeatureOverlapAllele');
313
314 $self->add_BaseVariationFeatureOverlapAllele($vfoa);
315
316 $self->{_alleles_by_seq}->{ $vfoa->variation_feature_seq } = $vfoa;
317 }
318
319 =head2 get_reference_VariationFeatureOverlapAllele
320
321 Description: Get the object representing the reference allele of this VariationFeatureOverlapAllele
322 Returntype : Bio::EnsEMBL::Variation::VariationFeatureOverlapAllele instance
323 Exceptions : none
324 Status : At Risk
325
326 =cut
327
328 sub get_reference_VariationFeatureOverlapAllele {
329 my $self = shift;
330 return $self->get_reference_BaseVariationFeatureOverlapAllele(@_);
331 }
332
333 =head2 get_all_alternate_VariationFeatureOverlapAlleles
334
335 Description: Get a list of the alternate alleles of this VariationFeatureOverlapAllele
336 Returntype : listref of Bio::EnsEMBL::Variation::VariationFeatureOverlapAllele objects
337 Exceptions : none
338 Status : At Risk
339
340 =cut
341
342 sub get_all_alternate_VariationFeatureOverlapAlleles {
343 my $self = shift;
344 return $self->get_all_alternate_BaseVariationFeatureOverlapAlleles(@_);
345 }
346
347 =head2 get_all_VariationFeatureOverlapAlleles
348
349 Description: Get a list of the all the alleles, both reference and alternate, of this
350 VariationFeatureOverlap
351 Returntype : listref of Bio::EnsEMBL::Variation::VariationFeatureOverlapAllele objects
352 Exceptions : none
353 Status : At Risk
354
355 =cut
356
357 sub get_all_VariationFeatureOverlapAlleles {
358 my $self = shift;
359 return $self->get_all_BaseVariationFeatureOverlapAlleles(@_);
360 }
361
362 sub _convert_to_sara {
363 my $self = shift;
364
365 my $ref_allele = $self->{reference_allele};
366 $ref_allele->_convert_to_sara;
367
368 $self->{alt_alleles} = [$ref_allele];
369 }
370
371 sub _rearrange_alleles {
372 my $self = shift;
373 my $keep_alleles = shift;
374
375 # fix alt alleles
376 my $alt_alleles = $self->{alt_alleles};
377 my @new_alleles = grep {$keep_alleles->{$_->variation_feature_seq}} @$alt_alleles;
378 $self->{alt_alleles} = scalar @new_alleles ? \@new_alleles : $alt_alleles;
379
380 # copy to ref allele if homozygous non-ref
381 $self->{reference_allele} = $self->{alt_alleles}->[0] if scalar keys %$keep_alleles == 1;
382 }
383
384 1;