Mercurial > repos > mahtabm > ensembl
comparison variant_effect_predictor/Bio/EnsEMBL/Variation/VariationFeatureOverlap.pm @ 0:1f6dce3d34e0
Uploaded
author | mahtabm |
---|---|
date | Thu, 11 Apr 2013 02:01:53 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:1f6dce3d34e0 |
---|---|
1 =head1 LICENSE | |
2 | |
3 Copyright (c) 1999-2012 The European Bioinformatics Institute and | |
4 Genome Research Limited. All rights reserved. | |
5 | |
6 This software is distributed under a modified Apache license. | |
7 For license details, please see | |
8 | |
9 http://www.ensembl.org/info/about/code_licence.html | |
10 | |
11 =head1 CONTACT | |
12 | |
13 Please email comments or questions to the public Ensembl | |
14 developers list at <dev@ensembl.org>. | |
15 | |
16 Questions may also be sent to the Ensembl help desk at | |
17 <helpdesk@ensembl.org>. | |
18 | |
19 =cut | |
20 | |
21 =head1 NAME | |
22 | |
23 Bio::EnsEMBL::Variation::VariationFeatureOverlap | |
24 | |
25 =head1 SYNOPSIS | |
26 | |
27 use Bio::EnsEMBL::Variation::VariationFeatureOverlap; | |
28 | |
29 my $vfo = Bio::EnsEMBL::Variation::VariationFeatureOverlap->new( | |
30 -feature => $feature, | |
31 -variation_feature => $var_feat | |
32 ); | |
33 | |
34 print "consequence type: ", (join ",", @{ $vfo->consequence_type }), "\n"; | |
35 print "most severe consequence: ", $vfo->display_consequence, "\n"; | |
36 | |
37 =head1 DESCRIPTION | |
38 | |
39 A VariationFeatureOverlap represents a VariationFeature which is in close | |
40 proximity to another Ensembl Feature. It is the superclass of feature-specific | |
41 objects such as TranscriptVariation and RegulatoryFeatureVariation, and has | |
42 methods common to all such objects. You will not normally instantiate this | |
43 class directly, instead instantiating one of the feature-specific subclasses. | |
44 | |
45 =cut | |
46 | |
47 package Bio::EnsEMBL::Variation::VariationFeatureOverlap; | |
48 | |
49 use strict; | |
50 use warnings; | |
51 | |
52 use Bio::EnsEMBL::Utils::Scalar qw(assert_ref); | |
53 use Bio::EnsEMBL::Utils::Exception qw(throw warning); | |
54 use Bio::EnsEMBL::Utils::Argument qw(rearrange); | |
55 use Bio::EnsEMBL::Utils::Sequence qw(expand); | |
56 use Bio::EnsEMBL::Variation::Utils::Sequence qw(unambiguity_code); | |
57 use Bio::EnsEMBL::Variation::VariationFeatureOverlapAllele; | |
58 | |
59 use base qw(Bio::EnsEMBL::Variation::BaseVariationFeatureOverlap); | |
60 | |
61 =head2 new | |
62 | |
63 Arg [-FEATURE] : | |
64 The Bio::EnsEMBL::Feature associated with the given VariationFeature | |
65 | |
66 Arg [-VARIATION_FEATURE] : | |
67 The Bio::EnsEMBL::VariationFeature associated with the given Feature | |
68 | |
69 Arg [-ADAPTOR] : | |
70 A Bio::EnsEMBL::Variation::DBSQL::VariationFeatureOverlapAdaptor | |
71 | |
72 Arg [-DISAMBIGUATE_SINGLE_NUCLEOTIDE_ALLELES] : | |
73 A flag indiciating if ambiguous single nucleotide alleles should be disambiguated | |
74 when constructing the VariationFeatureOverlapAllele objects, e.g. a Variationfeature | |
75 with an allele string like 'T/M' would be treated as if it were 'T/A/C'. We limit | |
76 ourselves to single nucleotide alleles to avoid the combinatorial explosion if we | |
77 allowed longer alleles with potentially many ambiguous bases. | |
78 | |
79 Example : | |
80 my $vfo = Bio::EnsEMBL::Variation::VariationFeatureOverlap->new( | |
81 -feature => $feature, | |
82 -variation_feature => $var_feat | |
83 ); | |
84 | |
85 Description: Constructs a new VariationFeatureOverlap instance given a VariationFeature | |
86 and a Feature | |
87 Returntype : A new Bio::EnsEMBL::Variation::VariationFeatureOverlap instance | |
88 Exceptions : throws unless both VARIATION_FEATURE and FEATURE are supplied, or if the | |
89 supplied ADAPTOR is the wrong class | |
90 Status : At Risk | |
91 | |
92 =cut | |
93 | |
94 sub new { | |
95 | |
96 my $class = shift; | |
97 | |
98 my %args = @_; | |
99 | |
100 # swap a '-variation_feature' argument for a '-base_variation_feature' one for the superclass | |
101 | |
102 for my $arg (keys %args) { | |
103 if (lc($arg) eq '-variation_feature') { | |
104 $args{'-base_variation_feature'} = delete $args{$arg}; | |
105 } | |
106 } | |
107 | |
108 my $self = $class->SUPER::new(%args); | |
109 | |
110 my ( | |
111 $adaptor, | |
112 $ref_feature, | |
113 $disambiguate_sn_alleles, | |
114 $no_ref_check, | |
115 ) = rearrange([qw( | |
116 ADAPTOR | |
117 REF_FEATURE | |
118 DISAMBIGUATE_SINGLE_NUCLEOTIDE_ALLELES | |
119 NO_REF_CHECK | |
120 )], %args); | |
121 | |
122 my $variation_feature = $self->base_variation_feature; | |
123 | |
124 assert_ref($variation_feature, 'Bio::EnsEMBL::Variation::VariationFeature'); | |
125 assert_ref($adaptor, 'Bio::EnsEMBL::Variation::DBSQL::VariationFeatureOverlapAdaptor') if $adaptor; | |
126 | |
127 $ref_feature ||= $variation_feature->slice; | |
128 | |
129 $self->{adaptor} = $adaptor; | |
130 $self->{ref_feature} = $ref_feature; | |
131 | |
132 my $ref_allele; | |
133 | |
134 # we take the reference allele sequence from the reference sequence, not from the allele string | |
135 unless($no_ref_check) { | |
136 $ref_allele = $ref_feature->subseq( | |
137 $variation_feature->start, | |
138 $variation_feature->end, | |
139 $variation_feature->strand | |
140 ); | |
141 } | |
142 | |
143 # get the variation feature allele string, expand it, and split it into separate alleles | |
144 | |
145 my $allele_string = $variation_feature->allele_string; | |
146 | |
147 expand(\$allele_string); | |
148 | |
149 my @alleles = split /\//, $allele_string; | |
150 | |
151 $ref_allele = $alleles[0] if $no_ref_check; | |
152 $ref_allele = '-' unless $ref_allele; | |
153 | |
154 if ($disambiguate_sn_alleles) { | |
155 | |
156 # if this flag is set, disambiguate any ambiguous single nucleotide alleles, so | |
157 # e.g. an allele string like T/M would be equivalent to an allele string of T/A/C | |
158 # we only do this for single nucleotide alleles to avoid the combinatorial explosion | |
159 # of long allele strings with potentially many ambiguous bases (because ensembl | |
160 # genomes want this functionality) | |
161 | |
162 my @possible_alleles; | |
163 | |
164 for my $allele (@alleles) { | |
165 | |
166 if ($allele !~ /^[ACGT-]+$/ && length($allele) == 1) { | |
167 for my $possible ( split //, unambiguity_code($allele) ) { | |
168 push @possible_alleles, $possible; | |
169 } | |
170 } | |
171 else { | |
172 # the allele is either unambiguous or longer than 1 nucleotide, so add it unaltered | |
173 push @possible_alleles, $allele; | |
174 } | |
175 } | |
176 | |
177 @alleles = @possible_alleles; | |
178 } | |
179 | |
180 # make sure the alleles are unique | |
181 | |
182 # we also want to deal with alleles like (T)0 which expand into | |
183 # an empty string and we want to treat this as a deletion, so | |
184 # we replace | |
185 # any empty strings with '-' | |
186 | |
187 @alleles = keys %{ { map { ($_ || '-') => 1 } @alleles } }; | |
188 | |
189 # create an object representing the reference allele | |
190 | |
191 my $ref_vfoa = Bio::EnsEMBL::Variation::VariationFeatureOverlapAllele->new( | |
192 -variation_feature_overlap => $self, | |
193 -variation_feature_seq => $ref_allele, | |
194 -is_reference => 1, | |
195 ); | |
196 | |
197 $self->add_VariationFeatureOverlapAllele($ref_vfoa); | |
198 | |
199 # create objects representing the alternate alleles | |
200 | |
201 for my $allele (@alleles) { | |
202 | |
203 next if $allele eq $ref_allele; | |
204 | |
205 my $vfoa = Bio::EnsEMBL::Variation::VariationFeatureOverlapAllele->new( | |
206 -variation_feature_overlap => $self, | |
207 -variation_feature_seq => $allele, | |
208 -is_reference => 0, | |
209 ); | |
210 | |
211 $self->add_VariationFeatureOverlapAllele($vfoa); | |
212 } | |
213 | |
214 return $self; | |
215 } | |
216 | |
217 sub new_fast { | |
218 my ($class, $hashref) = @_; | |
219 | |
220 # swap a variation_feature argument for a base_variation_feature one | |
221 | |
222 if ($hashref->{variation_feature}) { | |
223 $hashref->{base_variation_feature} = delete $hashref->{variation_feature}; | |
224 } | |
225 | |
226 return $class->SUPER::new_fast($hashref); | |
227 } | |
228 | |
229 sub dbID { | |
230 my $self = shift; | |
231 | |
232 unless ($self->{dbID}) { | |
233 # we don't really have a dbID, so concatenate all the dbIDs of our alleles | |
234 | |
235 $self->{dbID} = join '_', map { $_->dbID } @{ $self->get_all_alternate_VariationFeatureOverlapAlleles }; | |
236 } | |
237 | |
238 return $self->{dbID}; | |
239 } | |
240 | |
241 =head2 variation_feature | |
242 | |
243 Arg [1] : (optional) A Bio::EnsEMBL::Variation::VariationFeature | |
244 Description: Get/set the associated VariationFeature, lazy-loading it if required | |
245 Returntype : Bio::EnsEMBL::Variation::VariationFeature | |
246 Exceptions : throws if the argument is the wrong type | |
247 Status : At Risk | |
248 | |
249 =cut | |
250 | |
251 sub variation_feature { | |
252 my ($self, $variation_feature) = @_; | |
253 | |
254 if ($variation_feature) { | |
255 assert_ref($variation_feature, 'Bio::EnsEMBL::Variation::VariationFeature'); | |
256 $self->base_variation_feature($variation_feature); | |
257 } | |
258 | |
259 if (my $vf_id = $self->{_variation_feature_id}) { | |
260 | |
261 # lazy-load the VariationFeature | |
262 | |
263 if (my $adap = $self->{adaptor}) { | |
264 if (my $vfa = $adap->db->get_VariationFeatureAdaptor) { | |
265 if (my $vf = $vfa->fetch_by_dbID($vf_id)) { | |
266 $self->base_variation_feature($vf); | |
267 delete $self->{_variation_feature_id}; | |
268 } | |
269 } | |
270 } | |
271 } | |
272 | |
273 return $self->base_variation_feature; | |
274 } | |
275 | |
276 sub _variation_feature_id { | |
277 | |
278 # get the dbID of the variation feature, using the VariationFeature object | |
279 # if we have one, or the internal hash value if we don't | |
280 | |
281 my $self = shift; | |
282 | |
283 if (my $vf = $self->{variation_feature}) { | |
284 return $vf->dbID; | |
285 } | |
286 elsif (my $id = $self->{_variation_feature_id}) { | |
287 return $id; | |
288 } | |
289 else { | |
290 return undef; | |
291 } | |
292 } | |
293 | |
294 sub get_VariationFeatureOverlapAllele_for_allele_seq { | |
295 my ($self, $allele_seq) = @_; | |
296 return $self->{_alleles_by_seq}->{$allele_seq}; | |
297 } | |
298 | |
299 =head2 add_VariationFeatureOverlapAllele | |
300 | |
301 Arg [1] : A Bio::EnsEMBL::Variation::VariationFeatureOverlapAllele instance | |
302 Description: Add an allele to this VariationFeatureOverlap | |
303 Returntype : none | |
304 Exceptions : throws if the argument is not the expected type | |
305 Status : At Risk | |
306 | |
307 =cut | |
308 | |
309 sub add_VariationFeatureOverlapAllele { | |
310 my ($self, $vfoa) = @_; | |
311 | |
312 assert_ref($vfoa, 'Bio::EnsEMBL::Variation::VariationFeatureOverlapAllele'); | |
313 | |
314 $self->add_BaseVariationFeatureOverlapAllele($vfoa); | |
315 | |
316 $self->{_alleles_by_seq}->{ $vfoa->variation_feature_seq } = $vfoa; | |
317 } | |
318 | |
319 =head2 get_reference_VariationFeatureOverlapAllele | |
320 | |
321 Description: Get the object representing the reference allele of this VariationFeatureOverlapAllele | |
322 Returntype : Bio::EnsEMBL::Variation::VariationFeatureOverlapAllele instance | |
323 Exceptions : none | |
324 Status : At Risk | |
325 | |
326 =cut | |
327 | |
328 sub get_reference_VariationFeatureOverlapAllele { | |
329 my $self = shift; | |
330 return $self->get_reference_BaseVariationFeatureOverlapAllele(@_); | |
331 } | |
332 | |
333 =head2 get_all_alternate_VariationFeatureOverlapAlleles | |
334 | |
335 Description: Get a list of the alternate alleles of this VariationFeatureOverlapAllele | |
336 Returntype : listref of Bio::EnsEMBL::Variation::VariationFeatureOverlapAllele objects | |
337 Exceptions : none | |
338 Status : At Risk | |
339 | |
340 =cut | |
341 | |
342 sub get_all_alternate_VariationFeatureOverlapAlleles { | |
343 my $self = shift; | |
344 return $self->get_all_alternate_BaseVariationFeatureOverlapAlleles(@_); | |
345 } | |
346 | |
347 =head2 get_all_VariationFeatureOverlapAlleles | |
348 | |
349 Description: Get a list of the all the alleles, both reference and alternate, of this | |
350 VariationFeatureOverlap | |
351 Returntype : listref of Bio::EnsEMBL::Variation::VariationFeatureOverlapAllele objects | |
352 Exceptions : none | |
353 Status : At Risk | |
354 | |
355 =cut | |
356 | |
357 sub get_all_VariationFeatureOverlapAlleles { | |
358 my $self = shift; | |
359 return $self->get_all_BaseVariationFeatureOverlapAlleles(@_); | |
360 } | |
361 | |
362 sub _convert_to_sara { | |
363 my $self = shift; | |
364 | |
365 my $ref_allele = $self->{reference_allele}; | |
366 $ref_allele->_convert_to_sara; | |
367 | |
368 $self->{alt_alleles} = [$ref_allele]; | |
369 } | |
370 | |
371 sub _rearrange_alleles { | |
372 my $self = shift; | |
373 my $keep_alleles = shift; | |
374 | |
375 # fix alt alleles | |
376 my $alt_alleles = $self->{alt_alleles}; | |
377 my @new_alleles = grep {$keep_alleles->{$_->variation_feature_seq}} @$alt_alleles; | |
378 $self->{alt_alleles} = scalar @new_alleles ? \@new_alleles : $alt_alleles; | |
379 | |
380 # copy to ref allele if homozygous non-ref | |
381 $self->{reference_allele} = $self->{alt_alleles}->[0] if scalar keys %$keep_alleles == 1; | |
382 } | |
383 | |
384 1; |