comparison variant_effect_predictor/Bio/EnsEMBL/Variation/VariationFeatureOverlapAllele.pm @ 0:1f6dce3d34e0

Uploaded
author mahtabm
date Thu, 11 Apr 2013 02:01:53 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:1f6dce3d34e0
1 =head1 LICENSE
2
3 Copyright (c) 1999-2012 The European Bioinformatics Institute and
4 Genome Research Limited. All rights reserved.
5
6 This software is distributed under a modified Apache license.
7 For license details, please see
8
9 http://www.ensembl.org/info/about/code_licence.html
10
11 =head1 CONTACT
12
13 Please email comments or questions to the public Ensembl
14 developers list at <dev@ensembl.org>.
15
16 Questions may also be sent to the Ensembl help desk at
17 <helpdesk@ensembl.org>.
18
19 =cut
20
21 =head1 NAME
22
23 Bio::EnsEMBL::Variation::VariationFeatureOverlapAllele
24
25 =head1 SYNOPSIS
26
27 use Bio::EnsEMBL::Variation::VariationFeatureOverlapAllele;
28
29 my $vfoa = Bio::EnsEMBL::Variation::VariationFeatureOverlapAllele->new(
30 -variation_feature_overlap => $vfo,
31 -variation_feature_seq => 'A',
32 -is_reference => 0,
33 );
34
35 print "sequence with respect to the feature: ", $vfoa->feature_seq, "\n";
36 print "sequence with respect to the variation feature: ", $vfoa->variation_feature_seq, "\n";
37 print "consequence SO terms: ", (join ",", map { $_->SO_term } @{ $vfoa->get_all_OverlapConsequences }), "\n";
38
39 =head1 DESCRIPTION
40
41 A VariationFeatureOverlapAllele object represents a single allele of a
42 VariationFeatureOverlap. It is the super-class of various feature-specific allele
43 classes such as TranscriptVariationAllele and RegulatoryFeatureVariationAllele and
44 contains methods not specific to any particular feature type. Ordinarily you will
45 not create these objects yourself, but instead you would create e.g. a
46 TranscriptVariation object which will then create VariationFeatureOverlapAlleles
47 based on the allele string of the associated VariationFeature.
48
49 =cut
50
51 package Bio::EnsEMBL::Variation::VariationFeatureOverlapAllele;
52
53 use strict;
54 use warnings;
55
56 use Bio::EnsEMBL::Utils::Argument qw(rearrange);
57 use Bio::EnsEMBL::Utils::Scalar qw(assert_ref);
58 use Bio::EnsEMBL::Utils::Exception qw(throw);
59 use Bio::EnsEMBL::Utils::Sequence qw(reverse_comp);
60
61 use base qw(Bio::EnsEMBL::Variation::BaseVariationFeatureOverlapAllele);
62
63 our $UNAMBIGUOUS_NUCLEOTIDES = qr/^[ACGT-]+$/i;
64
65 our $ALL_NUCLEOTIDES = qr/^[ACGTUMRWSYKVHDBXN-]+$/i;
66
67 our $SPECIFIED_LENGTH = qr /(\d+) BP (INSERTION|DELETION)/i;
68
69 =head2 new
70
71 Arg [-VARIATION_FEATURE_OVERLAP] :
72 The Bio::EnsEMBL::VariationFeatureOverlap with which this allele is
73 associated
74
75 Arg [-VARIATION_FEATURE_SEQ] :
76 The allele sequence with respect to the associated VariationFeature
77
78 Arg [-IS_REFERENCE] :
79 A flag indicating if this allele is the reference allele or not
80
81 Example :
82 my $vfoa = Bio::EnsEMBL::Variation::VariationFeatureOverlapAllele->new(
83 -variation_feature_ovelap => $vfo,
84 -variation_feature_seq => 'A',
85 -is_reference => 0
86 );
87
88 Description: Constructs a new VariationFeatureOverlapAllele instance given a
89 VariationFeatureOverlap and the sequence of the allele
90 Returntype : A new Bio::EnsEMBL::Variation::VariationFeatureOverlapAllele instance
91 Exceptions : throws unless both VARIATION_FEATURE_OVERLAP and VARIATION_FEATURE_SEQ
92 are supplied
93 Status : At Risk
94
95 =cut
96
97 sub new {
98 my $class = shift;
99
100 my %args = @_;
101
102 # swap a '-variation_feature_overlap' argument for a '-base_variation_feature_overlap'
103 # and a '-variation_feature' for a '-base_variation_feature' for the superclass
104
105 for my $arg (keys %args) {
106 if (lc($arg) eq '-variation_feature_overlap') {
107 $args{'-base_variation_feature_overlap'} = delete $args{$arg};
108 }
109 }
110
111 my $self = $class->SUPER::new(%args);
112
113 assert_ref($self->base_variation_feature_overlap, 'Bio::EnsEMBL::Variation::VariationFeatureOverlap');
114
115 my (
116 $variation_feature_seq,
117 ) = rearrange([qw(
118 VARIATION_FEATURE_SEQ
119 )], %args);
120
121
122 throw("Allele sequence required (variation "+$self->variation_feature->variation_name+")")
123 unless $variation_feature_seq;
124
125 $self->{variation_feature_seq} = $variation_feature_seq;
126
127 return $self;
128 }
129
130 sub new_fast {
131 my ($class, $hashref) = @_;
132
133 # swap a variation_feature_overlap argument for a base_variation_feature_overlap one
134
135 if ($hashref->{variation_feature_overlap}) {
136 $hashref->{base_variation_feature_overlap} = delete $hashref->{variation_feature_overlap};
137 }
138
139 # and call the superclass
140
141 return $class->SUPER::new_fast($hashref);
142 }
143
144 =head2 dbID
145
146 Description: Get/set the dbID of this VariationFeatureOverlapAllele
147 Returntype : integer
148 Exceptions : none
149 Status : At Risk
150
151 =cut
152
153 sub dbID {
154 my ($self, $dbID) = @_;
155 $self->{dbID} = $dbID if defined $dbID;
156 return $self->{dbID};
157 }
158
159 =head2 variation_feature_overlap
160
161 Description: Get/set the associated VariationFeatureOverlap
162 Returntype : Bio::EnsEMBL::Variation::VariationFeatureOverlap
163 Exceptions : throws if the argument is the wrong type
164 Status : At Risk
165
166 =cut
167
168 sub variation_feature_overlap {
169 my ($self, $variation_feature_overlap) = @_;
170
171 if ($variation_feature_overlap) {
172 assert_ref($variation_feature_overlap, 'Bio::EnsEMBL::Variation::VariationFeatureOverlap');
173 }
174
175 return $self->base_variation_feature_overlap($variation_feature_overlap);
176 }
177
178 =head2 variation_feature
179
180 Description: Get the associated VariationFeature
181 Returntype : Bio::EnsEMBL::Variation::VariationFeature
182 Exceptions : none
183 Status : At Risk
184
185 =cut
186
187 sub variation_feature {
188 my $self = shift;
189 return $self->variation_feature_overlap->variation_feature;
190 }
191
192 =head2 feature_seq
193
194 Description: Get the sequence of this allele relative to the associated Feature.
195 This will be the same as the variation_feature_seq when the associated
196 VariationFeature is on the same strand as the Feature, or the reverse
197 complement when the strands differ.
198 Returntype : string
199 Exceptions : none
200 Status : At Risk
201
202 =cut
203
204 sub feature_seq {
205 my $self = shift;
206
207 unless ($self->{feature_seq}) {
208
209 # check if we need to reverse complement the variation_feature_seq
210
211 if (($self->variation_feature->strand != $self->feature->strand) && $self->seq_is_dna) {
212 my $vf_seq = $self->variation_feature_seq;
213 reverse_comp(\$vf_seq);
214 $self->{feature_seq} = $vf_seq;
215 }
216 else {
217 $self->{feature_seq} = $self->{variation_feature_seq};
218 }
219 }
220
221 return $self->{feature_seq};
222 }
223
224 =head2 variation_feature_seq
225
226 Args [1] : The allele sequence relative to the VariationFeature
227 Description: Get/set the sequence of this allele relative to the associated VariationFeature.
228 Returntype : string
229 Exceptions : none
230 Status : At Risk
231
232 =cut
233
234 sub variation_feature_seq {
235 # the sequence of this allele relative to the variation feature
236 my ($self, $variation_feature_seq) = @_;
237 $self->{variation_feature_seq} = $variation_feature_seq if $variation_feature_seq;
238 return $self->{variation_feature_seq};
239 }
240
241 =head2 seq_is_unambiguous_dna
242
243 Description: identify if the sequence of this allele is unambiguous DNA
244 i.e. if we can meaningfully translate it
245 Returntype : bool
246 Exceptions : none
247 Status : At Risk
248
249 =cut
250
251 sub seq_is_unambiguous_dna {
252 my $self = shift;
253
254 unless (defined $self->{seq_is_unambiguous_dna}) {
255 $self->{seq_is_unambiguous_dna} =
256 $self->{variation_feature_seq} =~ /$UNAMBIGUOUS_NUCLEOTIDES/ ? 1 : 0;
257 }
258
259 return $self->{seq_is_unambiguous_dna};
260 }
261
262 =head2 seq_is_dna
263
264 Description: identify if the sequence of this allele is DNA including ambiguity
265 codes, use seq_is_unambiguous_dna to check for alleles that do not
266 include ambiguity codes
267 Returntype : bool
268 Exceptions : none
269 Status : At Risk
270
271 =cut
272
273 sub seq_is_dna {
274 my $self = shift;
275
276 unless (defined $self->{seq_is_dna}) {
277 $self->{seq_is_dna} =
278 $self->{variation_feature_seq} =~ /$ALL_NUCLEOTIDES/ ? 1 : 0;
279 }
280
281 return $self->{seq_is_dna};
282 }
283
284 =head2 seq_length
285
286 Description: return the length of this allele sequence, this is better than
287 just using length($vfoa->feature_seq) because we check if the
288 sequence is valid DNA, and also look for allele strings like
289 "(3 BP INSERTION)" to determine the length
290 Returntype : int or undef if we cannot determine the length
291 Exceptions : none
292 Status : At Risk
293
294 =cut
295
296 sub seq_length {
297 my $self = shift;
298
299 my $seq = $self->variation_feature_seq;
300
301 if ($self->seq_is_dna) {
302 if ($seq eq '-') {
303 return 0;
304 }
305 else {
306 return length($seq);
307 }
308 }
309 elsif ($seq =~ /$SPECIFIED_LENGTH/) {
310 return $1;
311 }
312
313 return undef;
314 }
315
316 =head2 allele_string
317
318 Description: Return a '/' delimited string of the reference allele variation_feature_seq
319 and the variation_feature_seq of this allele
320 Returntype : string
321 Exceptions : none
322 Status : At Risk
323
324 =cut
325
326 sub allele_string {
327 my $self = shift;
328
329 my $ref = $self->variation_feature_overlap->get_reference_VariationFeatureOverlapAllele->variation_feature_seq;
330
331 # for the HGMDs and CNV probes where the alleles are artificially set to be
332 # the same, just return the reference sequence
333
334 if ($ref eq $self->variation_feature_seq) {
335 return $ref;
336 }
337 else {
338 return $ref.'/'.$self->variation_feature_seq;
339 }
340 }
341
342
343 sub _convert_to_sara {
344 my $self = shift;
345
346 my $oc = Bio::EnsEMBL::Variation::OverlapConsequence->new_fast({
347 'label' => 'SARA',
348 'description' => 'Same as reference allele',
349 'rank' => '99',
350 'display_term' => 'SARA',
351 'SO_term' => 'SARA',
352 });
353
354 $self->add_OverlapConsequence($oc);
355
356 return $self;
357 }
358
359 1;
360