0
|
1 =head1 LICENSE
|
|
2
|
|
3 Copyright (c) 1999-2012 The European Bioinformatics Institute and
|
|
4 Genome Research Limited. All rights reserved.
|
|
5
|
|
6 This software is distributed under a modified Apache license.
|
|
7 For license details, please see
|
|
8
|
|
9 http://www.ensembl.org/info/about/code_licence.html
|
|
10
|
|
11 =head1 CONTACT
|
|
12
|
|
13 Please email comments or questions to the public Ensembl
|
|
14 developers list at <dev@ensembl.org>.
|
|
15
|
|
16 Questions may also be sent to the Ensembl help desk at
|
|
17 <helpdesk@ensembl.org>.
|
|
18
|
|
19 =cut
|
|
20
|
|
21 =head1 NAME
|
|
22
|
|
23 Bio::EnsEMBL::Variation::VariationFeatureOverlap
|
|
24
|
|
25 =head1 SYNOPSIS
|
|
26
|
|
27 use Bio::EnsEMBL::Variation::VariationFeatureOverlap;
|
|
28
|
|
29 my $vfo = Bio::EnsEMBL::Variation::VariationFeatureOverlap->new(
|
|
30 -feature => $feature,
|
|
31 -variation_feature => $var_feat
|
|
32 );
|
|
33
|
|
34 print "consequence type: ", (join ",", @{ $vfo->consequence_type }), "\n";
|
|
35 print "most severe consequence: ", $vfo->display_consequence, "\n";
|
|
36
|
|
37 =head1 DESCRIPTION
|
|
38
|
|
39 A VariationFeatureOverlap represents a VariationFeature which is in close
|
|
40 proximity to another Ensembl Feature. It is the superclass of feature-specific
|
|
41 objects such as TranscriptVariation and RegulatoryFeatureVariation, and has
|
|
42 methods common to all such objects. You will not normally instantiate this
|
|
43 class directly, instead instantiating one of the feature-specific subclasses.
|
|
44
|
|
45 =cut
|
|
46
|
|
47 package Bio::EnsEMBL::Variation::VariationFeatureOverlap;
|
|
48
|
|
49 use strict;
|
|
50 use warnings;
|
|
51
|
|
52 use Bio::EnsEMBL::Utils::Scalar qw(assert_ref);
|
|
53 use Bio::EnsEMBL::Utils::Exception qw(throw warning);
|
|
54 use Bio::EnsEMBL::Utils::Argument qw(rearrange);
|
|
55 use Bio::EnsEMBL::Utils::Sequence qw(expand);
|
|
56 use Bio::EnsEMBL::Variation::Utils::Sequence qw(unambiguity_code);
|
|
57 use Bio::EnsEMBL::Variation::VariationFeatureOverlapAllele;
|
|
58
|
|
59 use base qw(Bio::EnsEMBL::Variation::BaseVariationFeatureOverlap);
|
|
60
|
|
61 =head2 new
|
|
62
|
|
63 Arg [-FEATURE] :
|
|
64 The Bio::EnsEMBL::Feature associated with the given VariationFeature
|
|
65
|
|
66 Arg [-VARIATION_FEATURE] :
|
|
67 The Bio::EnsEMBL::VariationFeature associated with the given Feature
|
|
68
|
|
69 Arg [-ADAPTOR] :
|
|
70 A Bio::EnsEMBL::Variation::DBSQL::VariationFeatureOverlapAdaptor
|
|
71
|
|
72 Arg [-DISAMBIGUATE_SINGLE_NUCLEOTIDE_ALLELES] :
|
|
73 A flag indiciating if ambiguous single nucleotide alleles should be disambiguated
|
|
74 when constructing the VariationFeatureOverlapAllele objects, e.g. a Variationfeature
|
|
75 with an allele string like 'T/M' would be treated as if it were 'T/A/C'. We limit
|
|
76 ourselves to single nucleotide alleles to avoid the combinatorial explosion if we
|
|
77 allowed longer alleles with potentially many ambiguous bases.
|
|
78
|
|
79 Example :
|
|
80 my $vfo = Bio::EnsEMBL::Variation::VariationFeatureOverlap->new(
|
|
81 -feature => $feature,
|
|
82 -variation_feature => $var_feat
|
|
83 );
|
|
84
|
|
85 Description: Constructs a new VariationFeatureOverlap instance given a VariationFeature
|
|
86 and a Feature
|
|
87 Returntype : A new Bio::EnsEMBL::Variation::VariationFeatureOverlap instance
|
|
88 Exceptions : throws unless both VARIATION_FEATURE and FEATURE are supplied, or if the
|
|
89 supplied ADAPTOR is the wrong class
|
|
90 Status : At Risk
|
|
91
|
|
92 =cut
|
|
93
|
|
94 sub new {
|
|
95
|
|
96 my $class = shift;
|
|
97
|
|
98 my %args = @_;
|
|
99
|
|
100 # swap a '-variation_feature' argument for a '-base_variation_feature' one for the superclass
|
|
101
|
|
102 for my $arg (keys %args) {
|
|
103 if (lc($arg) eq '-variation_feature') {
|
|
104 $args{'-base_variation_feature'} = delete $args{$arg};
|
|
105 }
|
|
106 }
|
|
107
|
|
108 my $self = $class->SUPER::new(%args);
|
|
109
|
|
110 my (
|
|
111 $adaptor,
|
|
112 $ref_feature,
|
|
113 $disambiguate_sn_alleles,
|
|
114 $no_ref_check,
|
|
115 ) = rearrange([qw(
|
|
116 ADAPTOR
|
|
117 REF_FEATURE
|
|
118 DISAMBIGUATE_SINGLE_NUCLEOTIDE_ALLELES
|
|
119 NO_REF_CHECK
|
|
120 )], %args);
|
|
121
|
|
122 my $variation_feature = $self->base_variation_feature;
|
|
123
|
|
124 assert_ref($variation_feature, 'Bio::EnsEMBL::Variation::VariationFeature');
|
|
125 assert_ref($adaptor, 'Bio::EnsEMBL::Variation::DBSQL::VariationFeatureOverlapAdaptor') if $adaptor;
|
|
126
|
|
127 $ref_feature ||= $variation_feature->slice;
|
|
128
|
|
129 $self->{adaptor} = $adaptor;
|
|
130 $self->{ref_feature} = $ref_feature;
|
|
131
|
|
132 my $ref_allele;
|
|
133
|
|
134 # we take the reference allele sequence from the reference sequence, not from the allele string
|
|
135 unless($no_ref_check) {
|
|
136 $ref_allele = $ref_feature->subseq(
|
|
137 $variation_feature->start,
|
|
138 $variation_feature->end,
|
|
139 $variation_feature->strand
|
|
140 );
|
|
141 }
|
|
142
|
|
143 # get the variation feature allele string, expand it, and split it into separate alleles
|
|
144
|
|
145 my $allele_string = $variation_feature->allele_string;
|
|
146
|
|
147 expand(\$allele_string);
|
|
148
|
|
149 my @alleles = split /\//, $allele_string;
|
|
150
|
|
151 $ref_allele = $alleles[0] if $no_ref_check;
|
|
152 $ref_allele = '-' unless $ref_allele;
|
|
153
|
|
154 if ($disambiguate_sn_alleles) {
|
|
155
|
|
156 # if this flag is set, disambiguate any ambiguous single nucleotide alleles, so
|
|
157 # e.g. an allele string like T/M would be equivalent to an allele string of T/A/C
|
|
158 # we only do this for single nucleotide alleles to avoid the combinatorial explosion
|
|
159 # of long allele strings with potentially many ambiguous bases (because ensembl
|
|
160 # genomes want this functionality)
|
|
161
|
|
162 my @possible_alleles;
|
|
163
|
|
164 for my $allele (@alleles) {
|
|
165
|
|
166 if ($allele !~ /^[ACGT-]+$/ && length($allele) == 1) {
|
|
167 for my $possible ( split //, unambiguity_code($allele) ) {
|
|
168 push @possible_alleles, $possible;
|
|
169 }
|
|
170 }
|
|
171 else {
|
|
172 # the allele is either unambiguous or longer than 1 nucleotide, so add it unaltered
|
|
173 push @possible_alleles, $allele;
|
|
174 }
|
|
175 }
|
|
176
|
|
177 @alleles = @possible_alleles;
|
|
178 }
|
|
179
|
|
180 # make sure the alleles are unique
|
|
181
|
|
182 # we also want to deal with alleles like (T)0 which expand into
|
|
183 # an empty string and we want to treat this as a deletion, so
|
|
184 # we replace
|
|
185 # any empty strings with '-'
|
|
186
|
|
187 @alleles = keys %{ { map { ($_ || '-') => 1 } @alleles } };
|
|
188
|
|
189 # create an object representing the reference allele
|
|
190
|
|
191 my $ref_vfoa = Bio::EnsEMBL::Variation::VariationFeatureOverlapAllele->new(
|
|
192 -variation_feature_overlap => $self,
|
|
193 -variation_feature_seq => $ref_allele,
|
|
194 -is_reference => 1,
|
|
195 );
|
|
196
|
|
197 $self->add_VariationFeatureOverlapAllele($ref_vfoa);
|
|
198
|
|
199 # create objects representing the alternate alleles
|
|
200
|
|
201 for my $allele (@alleles) {
|
|
202
|
|
203 next if $allele eq $ref_allele;
|
|
204
|
|
205 my $vfoa = Bio::EnsEMBL::Variation::VariationFeatureOverlapAllele->new(
|
|
206 -variation_feature_overlap => $self,
|
|
207 -variation_feature_seq => $allele,
|
|
208 -is_reference => 0,
|
|
209 );
|
|
210
|
|
211 $self->add_VariationFeatureOverlapAllele($vfoa);
|
|
212 }
|
|
213
|
|
214 return $self;
|
|
215 }
|
|
216
|
|
217 sub new_fast {
|
|
218 my ($class, $hashref) = @_;
|
|
219
|
|
220 # swap a variation_feature argument for a base_variation_feature one
|
|
221
|
|
222 if ($hashref->{variation_feature}) {
|
|
223 $hashref->{base_variation_feature} = delete $hashref->{variation_feature};
|
|
224 }
|
|
225
|
|
226 return $class->SUPER::new_fast($hashref);
|
|
227 }
|
|
228
|
|
229 sub dbID {
|
|
230 my $self = shift;
|
|
231
|
|
232 unless ($self->{dbID}) {
|
|
233 # we don't really have a dbID, so concatenate all the dbIDs of our alleles
|
|
234
|
|
235 $self->{dbID} = join '_', map { $_->dbID } @{ $self->get_all_alternate_VariationFeatureOverlapAlleles };
|
|
236 }
|
|
237
|
|
238 return $self->{dbID};
|
|
239 }
|
|
240
|
|
241 =head2 variation_feature
|
|
242
|
|
243 Arg [1] : (optional) A Bio::EnsEMBL::Variation::VariationFeature
|
|
244 Description: Get/set the associated VariationFeature, lazy-loading it if required
|
|
245 Returntype : Bio::EnsEMBL::Variation::VariationFeature
|
|
246 Exceptions : throws if the argument is the wrong type
|
|
247 Status : At Risk
|
|
248
|
|
249 =cut
|
|
250
|
|
251 sub variation_feature {
|
|
252 my ($self, $variation_feature) = @_;
|
|
253
|
|
254 if ($variation_feature) {
|
|
255 assert_ref($variation_feature, 'Bio::EnsEMBL::Variation::VariationFeature');
|
|
256 $self->base_variation_feature($variation_feature);
|
|
257 }
|
|
258
|
|
259 if (my $vf_id = $self->{_variation_feature_id}) {
|
|
260
|
|
261 # lazy-load the VariationFeature
|
|
262
|
|
263 if (my $adap = $self->{adaptor}) {
|
|
264 if (my $vfa = $adap->db->get_VariationFeatureAdaptor) {
|
|
265 if (my $vf = $vfa->fetch_by_dbID($vf_id)) {
|
|
266 $self->base_variation_feature($vf);
|
|
267 delete $self->{_variation_feature_id};
|
|
268 }
|
|
269 }
|
|
270 }
|
|
271 }
|
|
272
|
|
273 return $self->base_variation_feature;
|
|
274 }
|
|
275
|
|
276 sub _variation_feature_id {
|
|
277
|
|
278 # get the dbID of the variation feature, using the VariationFeature object
|
|
279 # if we have one, or the internal hash value if we don't
|
|
280
|
|
281 my $self = shift;
|
|
282
|
|
283 if (my $vf = $self->{variation_feature}) {
|
|
284 return $vf->dbID;
|
|
285 }
|
|
286 elsif (my $id = $self->{_variation_feature_id}) {
|
|
287 return $id;
|
|
288 }
|
|
289 else {
|
|
290 return undef;
|
|
291 }
|
|
292 }
|
|
293
|
|
294 sub get_VariationFeatureOverlapAllele_for_allele_seq {
|
|
295 my ($self, $allele_seq) = @_;
|
|
296 return $self->{_alleles_by_seq}->{$allele_seq};
|
|
297 }
|
|
298
|
|
299 =head2 add_VariationFeatureOverlapAllele
|
|
300
|
|
301 Arg [1] : A Bio::EnsEMBL::Variation::VariationFeatureOverlapAllele instance
|
|
302 Description: Add an allele to this VariationFeatureOverlap
|
|
303 Returntype : none
|
|
304 Exceptions : throws if the argument is not the expected type
|
|
305 Status : At Risk
|
|
306
|
|
307 =cut
|
|
308
|
|
309 sub add_VariationFeatureOverlapAllele {
|
|
310 my ($self, $vfoa) = @_;
|
|
311
|
|
312 assert_ref($vfoa, 'Bio::EnsEMBL::Variation::VariationFeatureOverlapAllele');
|
|
313
|
|
314 $self->add_BaseVariationFeatureOverlapAllele($vfoa);
|
|
315
|
|
316 $self->{_alleles_by_seq}->{ $vfoa->variation_feature_seq } = $vfoa;
|
|
317 }
|
|
318
|
|
319 =head2 get_reference_VariationFeatureOverlapAllele
|
|
320
|
|
321 Description: Get the object representing the reference allele of this VariationFeatureOverlapAllele
|
|
322 Returntype : Bio::EnsEMBL::Variation::VariationFeatureOverlapAllele instance
|
|
323 Exceptions : none
|
|
324 Status : At Risk
|
|
325
|
|
326 =cut
|
|
327
|
|
328 sub get_reference_VariationFeatureOverlapAllele {
|
|
329 my $self = shift;
|
|
330 return $self->get_reference_BaseVariationFeatureOverlapAllele(@_);
|
|
331 }
|
|
332
|
|
333 =head2 get_all_alternate_VariationFeatureOverlapAlleles
|
|
334
|
|
335 Description: Get a list of the alternate alleles of this VariationFeatureOverlapAllele
|
|
336 Returntype : listref of Bio::EnsEMBL::Variation::VariationFeatureOverlapAllele objects
|
|
337 Exceptions : none
|
|
338 Status : At Risk
|
|
339
|
|
340 =cut
|
|
341
|
|
342 sub get_all_alternate_VariationFeatureOverlapAlleles {
|
|
343 my $self = shift;
|
|
344 return $self->get_all_alternate_BaseVariationFeatureOverlapAlleles(@_);
|
|
345 }
|
|
346
|
|
347 =head2 get_all_VariationFeatureOverlapAlleles
|
|
348
|
|
349 Description: Get a list of the all the alleles, both reference and alternate, of this
|
|
350 VariationFeatureOverlap
|
|
351 Returntype : listref of Bio::EnsEMBL::Variation::VariationFeatureOverlapAllele objects
|
|
352 Exceptions : none
|
|
353 Status : At Risk
|
|
354
|
|
355 =cut
|
|
356
|
|
357 sub get_all_VariationFeatureOverlapAlleles {
|
|
358 my $self = shift;
|
|
359 return $self->get_all_BaseVariationFeatureOverlapAlleles(@_);
|
|
360 }
|
|
361
|
|
362 sub _convert_to_sara {
|
|
363 my $self = shift;
|
|
364
|
|
365 my $ref_allele = $self->{reference_allele};
|
|
366 $ref_allele->_convert_to_sara;
|
|
367
|
|
368 $self->{alt_alleles} = [$ref_allele];
|
|
369 }
|
|
370
|
|
371 sub _rearrange_alleles {
|
|
372 my $self = shift;
|
|
373 my $keep_alleles = shift;
|
|
374
|
|
375 # fix alt alleles
|
|
376 my $alt_alleles = $self->{alt_alleles};
|
|
377 my @new_alleles = grep {$keep_alleles->{$_->variation_feature_seq}} @$alt_alleles;
|
|
378 $self->{alt_alleles} = scalar @new_alleles ? \@new_alleles : $alt_alleles;
|
|
379
|
|
380 # copy to ref allele if homozygous non-ref
|
|
381 $self->{reference_allele} = $self->{alt_alleles}->[0] if scalar keys %$keep_alleles == 1;
|
|
382 }
|
|
383
|
|
384 1;
|