0
|
1 =head1 LICENSE
|
|
2
|
|
3 Copyright (c) 1999-2012 The European Bioinformatics Institute and
|
|
4 Genome Research Limited. All rights reserved.
|
|
5
|
|
6 This software is distributed under a modified Apache license.
|
|
7 For license details, please see
|
|
8
|
|
9 http://www.ensembl.org/info/about/code_licence.html
|
|
10
|
|
11 =head1 CONTACT
|
|
12
|
|
13 Please email comments or questions to the public Ensembl
|
|
14 developers list at <dev@ensembl.org>.
|
|
15
|
|
16 Questions may also be sent to the Ensembl help desk at
|
|
17 <helpdesk@ensembl.org>.
|
|
18
|
|
19 =cut
|
|
20
|
|
21 =head1 NAME
|
|
22
|
|
23 Bio::EnsEMBL::Variation::VariationFeatureOverlapAllele
|
|
24
|
|
25 =head1 SYNOPSIS
|
|
26
|
|
27 use Bio::EnsEMBL::Variation::VariationFeatureOverlapAllele;
|
|
28
|
|
29 my $vfoa = Bio::EnsEMBL::Variation::VariationFeatureOverlapAllele->new(
|
|
30 -variation_feature_overlap => $vfo,
|
|
31 -variation_feature_seq => 'A',
|
|
32 -is_reference => 0,
|
|
33 );
|
|
34
|
|
35 print "sequence with respect to the feature: ", $vfoa->feature_seq, "\n";
|
|
36 print "sequence with respect to the variation feature: ", $vfoa->variation_feature_seq, "\n";
|
|
37 print "consequence SO terms: ", (join ",", map { $_->SO_term } @{ $vfoa->get_all_OverlapConsequences }), "\n";
|
|
38
|
|
39 =head1 DESCRIPTION
|
|
40
|
|
41 A VariationFeatureOverlapAllele object represents a single allele of a
|
|
42 VariationFeatureOverlap. It is the super-class of various feature-specific allele
|
|
43 classes such as TranscriptVariationAllele and RegulatoryFeatureVariationAllele and
|
|
44 contains methods not specific to any particular feature type. Ordinarily you will
|
|
45 not create these objects yourself, but instead you would create e.g. a
|
|
46 TranscriptVariation object which will then create VariationFeatureOverlapAlleles
|
|
47 based on the allele string of the associated VariationFeature.
|
|
48
|
|
49 =cut
|
|
50
|
|
51 package Bio::EnsEMBL::Variation::VariationFeatureOverlapAllele;
|
|
52
|
|
53 use strict;
|
|
54 use warnings;
|
|
55
|
|
56 use Bio::EnsEMBL::Utils::Argument qw(rearrange);
|
|
57 use Bio::EnsEMBL::Utils::Scalar qw(assert_ref);
|
|
58 use Bio::EnsEMBL::Utils::Exception qw(throw);
|
|
59 use Bio::EnsEMBL::Utils::Sequence qw(reverse_comp);
|
|
60
|
|
61 use base qw(Bio::EnsEMBL::Variation::BaseVariationFeatureOverlapAllele);
|
|
62
|
|
63 our $UNAMBIGUOUS_NUCLEOTIDES = qr/^[ACGT-]+$/i;
|
|
64
|
|
65 our $ALL_NUCLEOTIDES = qr/^[ACGTUMRWSYKVHDBXN-]+$/i;
|
|
66
|
|
67 our $SPECIFIED_LENGTH = qr /(\d+) BP (INSERTION|DELETION)/i;
|
|
68
|
|
69 =head2 new
|
|
70
|
|
71 Arg [-VARIATION_FEATURE_OVERLAP] :
|
|
72 The Bio::EnsEMBL::VariationFeatureOverlap with which this allele is
|
|
73 associated
|
|
74
|
|
75 Arg [-VARIATION_FEATURE_SEQ] :
|
|
76 The allele sequence with respect to the associated VariationFeature
|
|
77
|
|
78 Arg [-IS_REFERENCE] :
|
|
79 A flag indicating if this allele is the reference allele or not
|
|
80
|
|
81 Example :
|
|
82 my $vfoa = Bio::EnsEMBL::Variation::VariationFeatureOverlapAllele->new(
|
|
83 -variation_feature_ovelap => $vfo,
|
|
84 -variation_feature_seq => 'A',
|
|
85 -is_reference => 0
|
|
86 );
|
|
87
|
|
88 Description: Constructs a new VariationFeatureOverlapAllele instance given a
|
|
89 VariationFeatureOverlap and the sequence of the allele
|
|
90 Returntype : A new Bio::EnsEMBL::Variation::VariationFeatureOverlapAllele instance
|
|
91 Exceptions : throws unless both VARIATION_FEATURE_OVERLAP and VARIATION_FEATURE_SEQ
|
|
92 are supplied
|
|
93 Status : At Risk
|
|
94
|
|
95 =cut
|
|
96
|
|
97 sub new {
|
|
98 my $class = shift;
|
|
99
|
|
100 my %args = @_;
|
|
101
|
|
102 # swap a '-variation_feature_overlap' argument for a '-base_variation_feature_overlap'
|
|
103 # and a '-variation_feature' for a '-base_variation_feature' for the superclass
|
|
104
|
|
105 for my $arg (keys %args) {
|
|
106 if (lc($arg) eq '-variation_feature_overlap') {
|
|
107 $args{'-base_variation_feature_overlap'} = delete $args{$arg};
|
|
108 }
|
|
109 }
|
|
110
|
|
111 my $self = $class->SUPER::new(%args);
|
|
112
|
|
113 assert_ref($self->base_variation_feature_overlap, 'Bio::EnsEMBL::Variation::VariationFeatureOverlap');
|
|
114
|
|
115 my (
|
|
116 $variation_feature_seq,
|
|
117 ) = rearrange([qw(
|
|
118 VARIATION_FEATURE_SEQ
|
|
119 )], %args);
|
|
120
|
|
121
|
|
122 throw("Allele sequence required (variation "+$self->variation_feature->variation_name+")")
|
|
123 unless $variation_feature_seq;
|
|
124
|
|
125 $self->{variation_feature_seq} = $variation_feature_seq;
|
|
126
|
|
127 return $self;
|
|
128 }
|
|
129
|
|
130 sub new_fast {
|
|
131 my ($class, $hashref) = @_;
|
|
132
|
|
133 # swap a variation_feature_overlap argument for a base_variation_feature_overlap one
|
|
134
|
|
135 if ($hashref->{variation_feature_overlap}) {
|
|
136 $hashref->{base_variation_feature_overlap} = delete $hashref->{variation_feature_overlap};
|
|
137 }
|
|
138
|
|
139 # and call the superclass
|
|
140
|
|
141 return $class->SUPER::new_fast($hashref);
|
|
142 }
|
|
143
|
|
144 =head2 dbID
|
|
145
|
|
146 Description: Get/set the dbID of this VariationFeatureOverlapAllele
|
|
147 Returntype : integer
|
|
148 Exceptions : none
|
|
149 Status : At Risk
|
|
150
|
|
151 =cut
|
|
152
|
|
153 sub dbID {
|
|
154 my ($self, $dbID) = @_;
|
|
155 $self->{dbID} = $dbID if defined $dbID;
|
|
156 return $self->{dbID};
|
|
157 }
|
|
158
|
|
159 =head2 variation_feature_overlap
|
|
160
|
|
161 Description: Get/set the associated VariationFeatureOverlap
|
|
162 Returntype : Bio::EnsEMBL::Variation::VariationFeatureOverlap
|
|
163 Exceptions : throws if the argument is the wrong type
|
|
164 Status : At Risk
|
|
165
|
|
166 =cut
|
|
167
|
|
168 sub variation_feature_overlap {
|
|
169 my ($self, $variation_feature_overlap) = @_;
|
|
170
|
|
171 if ($variation_feature_overlap) {
|
|
172 assert_ref($variation_feature_overlap, 'Bio::EnsEMBL::Variation::VariationFeatureOverlap');
|
|
173 }
|
|
174
|
|
175 return $self->base_variation_feature_overlap($variation_feature_overlap);
|
|
176 }
|
|
177
|
|
178 =head2 variation_feature
|
|
179
|
|
180 Description: Get the associated VariationFeature
|
|
181 Returntype : Bio::EnsEMBL::Variation::VariationFeature
|
|
182 Exceptions : none
|
|
183 Status : At Risk
|
|
184
|
|
185 =cut
|
|
186
|
|
187 sub variation_feature {
|
|
188 my $self = shift;
|
|
189 return $self->variation_feature_overlap->variation_feature;
|
|
190 }
|
|
191
|
|
192 =head2 feature_seq
|
|
193
|
|
194 Description: Get the sequence of this allele relative to the associated Feature.
|
|
195 This will be the same as the variation_feature_seq when the associated
|
|
196 VariationFeature is on the same strand as the Feature, or the reverse
|
|
197 complement when the strands differ.
|
|
198 Returntype : string
|
|
199 Exceptions : none
|
|
200 Status : At Risk
|
|
201
|
|
202 =cut
|
|
203
|
|
204 sub feature_seq {
|
|
205 my $self = shift;
|
|
206
|
|
207 unless ($self->{feature_seq}) {
|
|
208
|
|
209 # check if we need to reverse complement the variation_feature_seq
|
|
210
|
|
211 if (($self->variation_feature->strand != $self->feature->strand) && $self->seq_is_dna) {
|
|
212 my $vf_seq = $self->variation_feature_seq;
|
|
213 reverse_comp(\$vf_seq);
|
|
214 $self->{feature_seq} = $vf_seq;
|
|
215 }
|
|
216 else {
|
|
217 $self->{feature_seq} = $self->{variation_feature_seq};
|
|
218 }
|
|
219 }
|
|
220
|
|
221 return $self->{feature_seq};
|
|
222 }
|
|
223
|
|
224 =head2 variation_feature_seq
|
|
225
|
|
226 Args [1] : The allele sequence relative to the VariationFeature
|
|
227 Description: Get/set the sequence of this allele relative to the associated VariationFeature.
|
|
228 Returntype : string
|
|
229 Exceptions : none
|
|
230 Status : At Risk
|
|
231
|
|
232 =cut
|
|
233
|
|
234 sub variation_feature_seq {
|
|
235 # the sequence of this allele relative to the variation feature
|
|
236 my ($self, $variation_feature_seq) = @_;
|
|
237 $self->{variation_feature_seq} = $variation_feature_seq if $variation_feature_seq;
|
|
238 return $self->{variation_feature_seq};
|
|
239 }
|
|
240
|
|
241 =head2 seq_is_unambiguous_dna
|
|
242
|
|
243 Description: identify if the sequence of this allele is unambiguous DNA
|
|
244 i.e. if we can meaningfully translate it
|
|
245 Returntype : bool
|
|
246 Exceptions : none
|
|
247 Status : At Risk
|
|
248
|
|
249 =cut
|
|
250
|
|
251 sub seq_is_unambiguous_dna {
|
|
252 my $self = shift;
|
|
253
|
|
254 unless (defined $self->{seq_is_unambiguous_dna}) {
|
|
255 $self->{seq_is_unambiguous_dna} =
|
|
256 $self->{variation_feature_seq} =~ /$UNAMBIGUOUS_NUCLEOTIDES/ ? 1 : 0;
|
|
257 }
|
|
258
|
|
259 return $self->{seq_is_unambiguous_dna};
|
|
260 }
|
|
261
|
|
262 =head2 seq_is_dna
|
|
263
|
|
264 Description: identify if the sequence of this allele is DNA including ambiguity
|
|
265 codes, use seq_is_unambiguous_dna to check for alleles that do not
|
|
266 include ambiguity codes
|
|
267 Returntype : bool
|
|
268 Exceptions : none
|
|
269 Status : At Risk
|
|
270
|
|
271 =cut
|
|
272
|
|
273 sub seq_is_dna {
|
|
274 my $self = shift;
|
|
275
|
|
276 unless (defined $self->{seq_is_dna}) {
|
|
277 $self->{seq_is_dna} =
|
|
278 $self->{variation_feature_seq} =~ /$ALL_NUCLEOTIDES/ ? 1 : 0;
|
|
279 }
|
|
280
|
|
281 return $self->{seq_is_dna};
|
|
282 }
|
|
283
|
|
284 =head2 seq_length
|
|
285
|
|
286 Description: return the length of this allele sequence, this is better than
|
|
287 just using length($vfoa->feature_seq) because we check if the
|
|
288 sequence is valid DNA, and also look for allele strings like
|
|
289 "(3 BP INSERTION)" to determine the length
|
|
290 Returntype : int or undef if we cannot determine the length
|
|
291 Exceptions : none
|
|
292 Status : At Risk
|
|
293
|
|
294 =cut
|
|
295
|
|
296 sub seq_length {
|
|
297 my $self = shift;
|
|
298
|
|
299 my $seq = $self->variation_feature_seq;
|
|
300
|
|
301 if ($self->seq_is_dna) {
|
|
302 if ($seq eq '-') {
|
|
303 return 0;
|
|
304 }
|
|
305 else {
|
|
306 return length($seq);
|
|
307 }
|
|
308 }
|
|
309 elsif ($seq =~ /$SPECIFIED_LENGTH/) {
|
|
310 return $1;
|
|
311 }
|
|
312
|
|
313 return undef;
|
|
314 }
|
|
315
|
|
316 =head2 allele_string
|
|
317
|
|
318 Description: Return a '/' delimited string of the reference allele variation_feature_seq
|
|
319 and the variation_feature_seq of this allele
|
|
320 Returntype : string
|
|
321 Exceptions : none
|
|
322 Status : At Risk
|
|
323
|
|
324 =cut
|
|
325
|
|
326 sub allele_string {
|
|
327 my $self = shift;
|
|
328
|
|
329 my $ref = $self->variation_feature_overlap->get_reference_VariationFeatureOverlapAllele->variation_feature_seq;
|
|
330
|
|
331 # for the HGMDs and CNV probes where the alleles are artificially set to be
|
|
332 # the same, just return the reference sequence
|
|
333
|
|
334 if ($ref eq $self->variation_feature_seq) {
|
|
335 return $ref;
|
|
336 }
|
|
337 else {
|
|
338 return $ref.'/'.$self->variation_feature_seq;
|
|
339 }
|
|
340 }
|
|
341
|
|
342
|
|
343 sub _convert_to_sara {
|
|
344 my $self = shift;
|
|
345
|
|
346 my $oc = Bio::EnsEMBL::Variation::OverlapConsequence->new_fast({
|
|
347 'label' => 'SARA',
|
|
348 'description' => 'Same as reference allele',
|
|
349 'rank' => '99',
|
|
350 'display_term' => 'SARA',
|
|
351 'SO_term' => 'SARA',
|
|
352 });
|
|
353
|
|
354 $self->add_OverlapConsequence($oc);
|
|
355
|
|
356 return $self;
|
|
357 }
|
|
358
|
|
359 1;
|
|
360
|