Mercurial > repos > mahtabm > ensembl
comparison variant_effect_predictor/Bio/EnsEMBL/Variation/VariationFeatureOverlapAllele.pm @ 0:1f6dce3d34e0
Uploaded
author | mahtabm |
---|---|
date | Thu, 11 Apr 2013 02:01:53 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:1f6dce3d34e0 |
---|---|
1 =head1 LICENSE | |
2 | |
3 Copyright (c) 1999-2012 The European Bioinformatics Institute and | |
4 Genome Research Limited. All rights reserved. | |
5 | |
6 This software is distributed under a modified Apache license. | |
7 For license details, please see | |
8 | |
9 http://www.ensembl.org/info/about/code_licence.html | |
10 | |
11 =head1 CONTACT | |
12 | |
13 Please email comments or questions to the public Ensembl | |
14 developers list at <dev@ensembl.org>. | |
15 | |
16 Questions may also be sent to the Ensembl help desk at | |
17 <helpdesk@ensembl.org>. | |
18 | |
19 =cut | |
20 | |
21 =head1 NAME | |
22 | |
23 Bio::EnsEMBL::Variation::VariationFeatureOverlapAllele | |
24 | |
25 =head1 SYNOPSIS | |
26 | |
27 use Bio::EnsEMBL::Variation::VariationFeatureOverlapAllele; | |
28 | |
29 my $vfoa = Bio::EnsEMBL::Variation::VariationFeatureOverlapAllele->new( | |
30 -variation_feature_overlap => $vfo, | |
31 -variation_feature_seq => 'A', | |
32 -is_reference => 0, | |
33 ); | |
34 | |
35 print "sequence with respect to the feature: ", $vfoa->feature_seq, "\n"; | |
36 print "sequence with respect to the variation feature: ", $vfoa->variation_feature_seq, "\n"; | |
37 print "consequence SO terms: ", (join ",", map { $_->SO_term } @{ $vfoa->get_all_OverlapConsequences }), "\n"; | |
38 | |
39 =head1 DESCRIPTION | |
40 | |
41 A VariationFeatureOverlapAllele object represents a single allele of a | |
42 VariationFeatureOverlap. It is the super-class of various feature-specific allele | |
43 classes such as TranscriptVariationAllele and RegulatoryFeatureVariationAllele and | |
44 contains methods not specific to any particular feature type. Ordinarily you will | |
45 not create these objects yourself, but instead you would create e.g. a | |
46 TranscriptVariation object which will then create VariationFeatureOverlapAlleles | |
47 based on the allele string of the associated VariationFeature. | |
48 | |
49 =cut | |
50 | |
51 package Bio::EnsEMBL::Variation::VariationFeatureOverlapAllele; | |
52 | |
53 use strict; | |
54 use warnings; | |
55 | |
56 use Bio::EnsEMBL::Utils::Argument qw(rearrange); | |
57 use Bio::EnsEMBL::Utils::Scalar qw(assert_ref); | |
58 use Bio::EnsEMBL::Utils::Exception qw(throw); | |
59 use Bio::EnsEMBL::Utils::Sequence qw(reverse_comp); | |
60 | |
61 use base qw(Bio::EnsEMBL::Variation::BaseVariationFeatureOverlapAllele); | |
62 | |
63 our $UNAMBIGUOUS_NUCLEOTIDES = qr/^[ACGT-]+$/i; | |
64 | |
65 our $ALL_NUCLEOTIDES = qr/^[ACGTUMRWSYKVHDBXN-]+$/i; | |
66 | |
67 our $SPECIFIED_LENGTH = qr /(\d+) BP (INSERTION|DELETION)/i; | |
68 | |
69 =head2 new | |
70 | |
71 Arg [-VARIATION_FEATURE_OVERLAP] : | |
72 The Bio::EnsEMBL::VariationFeatureOverlap with which this allele is | |
73 associated | |
74 | |
75 Arg [-VARIATION_FEATURE_SEQ] : | |
76 The allele sequence with respect to the associated VariationFeature | |
77 | |
78 Arg [-IS_REFERENCE] : | |
79 A flag indicating if this allele is the reference allele or not | |
80 | |
81 Example : | |
82 my $vfoa = Bio::EnsEMBL::Variation::VariationFeatureOverlapAllele->new( | |
83 -variation_feature_ovelap => $vfo, | |
84 -variation_feature_seq => 'A', | |
85 -is_reference => 0 | |
86 ); | |
87 | |
88 Description: Constructs a new VariationFeatureOverlapAllele instance given a | |
89 VariationFeatureOverlap and the sequence of the allele | |
90 Returntype : A new Bio::EnsEMBL::Variation::VariationFeatureOverlapAllele instance | |
91 Exceptions : throws unless both VARIATION_FEATURE_OVERLAP and VARIATION_FEATURE_SEQ | |
92 are supplied | |
93 Status : At Risk | |
94 | |
95 =cut | |
96 | |
97 sub new { | |
98 my $class = shift; | |
99 | |
100 my %args = @_; | |
101 | |
102 # swap a '-variation_feature_overlap' argument for a '-base_variation_feature_overlap' | |
103 # and a '-variation_feature' for a '-base_variation_feature' for the superclass | |
104 | |
105 for my $arg (keys %args) { | |
106 if (lc($arg) eq '-variation_feature_overlap') { | |
107 $args{'-base_variation_feature_overlap'} = delete $args{$arg}; | |
108 } | |
109 } | |
110 | |
111 my $self = $class->SUPER::new(%args); | |
112 | |
113 assert_ref($self->base_variation_feature_overlap, 'Bio::EnsEMBL::Variation::VariationFeatureOverlap'); | |
114 | |
115 my ( | |
116 $variation_feature_seq, | |
117 ) = rearrange([qw( | |
118 VARIATION_FEATURE_SEQ | |
119 )], %args); | |
120 | |
121 | |
122 throw("Allele sequence required (variation "+$self->variation_feature->variation_name+")") | |
123 unless $variation_feature_seq; | |
124 | |
125 $self->{variation_feature_seq} = $variation_feature_seq; | |
126 | |
127 return $self; | |
128 } | |
129 | |
130 sub new_fast { | |
131 my ($class, $hashref) = @_; | |
132 | |
133 # swap a variation_feature_overlap argument for a base_variation_feature_overlap one | |
134 | |
135 if ($hashref->{variation_feature_overlap}) { | |
136 $hashref->{base_variation_feature_overlap} = delete $hashref->{variation_feature_overlap}; | |
137 } | |
138 | |
139 # and call the superclass | |
140 | |
141 return $class->SUPER::new_fast($hashref); | |
142 } | |
143 | |
144 =head2 dbID | |
145 | |
146 Description: Get/set the dbID of this VariationFeatureOverlapAllele | |
147 Returntype : integer | |
148 Exceptions : none | |
149 Status : At Risk | |
150 | |
151 =cut | |
152 | |
153 sub dbID { | |
154 my ($self, $dbID) = @_; | |
155 $self->{dbID} = $dbID if defined $dbID; | |
156 return $self->{dbID}; | |
157 } | |
158 | |
159 =head2 variation_feature_overlap | |
160 | |
161 Description: Get/set the associated VariationFeatureOverlap | |
162 Returntype : Bio::EnsEMBL::Variation::VariationFeatureOverlap | |
163 Exceptions : throws if the argument is the wrong type | |
164 Status : At Risk | |
165 | |
166 =cut | |
167 | |
168 sub variation_feature_overlap { | |
169 my ($self, $variation_feature_overlap) = @_; | |
170 | |
171 if ($variation_feature_overlap) { | |
172 assert_ref($variation_feature_overlap, 'Bio::EnsEMBL::Variation::VariationFeatureOverlap'); | |
173 } | |
174 | |
175 return $self->base_variation_feature_overlap($variation_feature_overlap); | |
176 } | |
177 | |
178 =head2 variation_feature | |
179 | |
180 Description: Get the associated VariationFeature | |
181 Returntype : Bio::EnsEMBL::Variation::VariationFeature | |
182 Exceptions : none | |
183 Status : At Risk | |
184 | |
185 =cut | |
186 | |
187 sub variation_feature { | |
188 my $self = shift; | |
189 return $self->variation_feature_overlap->variation_feature; | |
190 } | |
191 | |
192 =head2 feature_seq | |
193 | |
194 Description: Get the sequence of this allele relative to the associated Feature. | |
195 This will be the same as the variation_feature_seq when the associated | |
196 VariationFeature is on the same strand as the Feature, or the reverse | |
197 complement when the strands differ. | |
198 Returntype : string | |
199 Exceptions : none | |
200 Status : At Risk | |
201 | |
202 =cut | |
203 | |
204 sub feature_seq { | |
205 my $self = shift; | |
206 | |
207 unless ($self->{feature_seq}) { | |
208 | |
209 # check if we need to reverse complement the variation_feature_seq | |
210 | |
211 if (($self->variation_feature->strand != $self->feature->strand) && $self->seq_is_dna) { | |
212 my $vf_seq = $self->variation_feature_seq; | |
213 reverse_comp(\$vf_seq); | |
214 $self->{feature_seq} = $vf_seq; | |
215 } | |
216 else { | |
217 $self->{feature_seq} = $self->{variation_feature_seq}; | |
218 } | |
219 } | |
220 | |
221 return $self->{feature_seq}; | |
222 } | |
223 | |
224 =head2 variation_feature_seq | |
225 | |
226 Args [1] : The allele sequence relative to the VariationFeature | |
227 Description: Get/set the sequence of this allele relative to the associated VariationFeature. | |
228 Returntype : string | |
229 Exceptions : none | |
230 Status : At Risk | |
231 | |
232 =cut | |
233 | |
234 sub variation_feature_seq { | |
235 # the sequence of this allele relative to the variation feature | |
236 my ($self, $variation_feature_seq) = @_; | |
237 $self->{variation_feature_seq} = $variation_feature_seq if $variation_feature_seq; | |
238 return $self->{variation_feature_seq}; | |
239 } | |
240 | |
241 =head2 seq_is_unambiguous_dna | |
242 | |
243 Description: identify if the sequence of this allele is unambiguous DNA | |
244 i.e. if we can meaningfully translate it | |
245 Returntype : bool | |
246 Exceptions : none | |
247 Status : At Risk | |
248 | |
249 =cut | |
250 | |
251 sub seq_is_unambiguous_dna { | |
252 my $self = shift; | |
253 | |
254 unless (defined $self->{seq_is_unambiguous_dna}) { | |
255 $self->{seq_is_unambiguous_dna} = | |
256 $self->{variation_feature_seq} =~ /$UNAMBIGUOUS_NUCLEOTIDES/ ? 1 : 0; | |
257 } | |
258 | |
259 return $self->{seq_is_unambiguous_dna}; | |
260 } | |
261 | |
262 =head2 seq_is_dna | |
263 | |
264 Description: identify if the sequence of this allele is DNA including ambiguity | |
265 codes, use seq_is_unambiguous_dna to check for alleles that do not | |
266 include ambiguity codes | |
267 Returntype : bool | |
268 Exceptions : none | |
269 Status : At Risk | |
270 | |
271 =cut | |
272 | |
273 sub seq_is_dna { | |
274 my $self = shift; | |
275 | |
276 unless (defined $self->{seq_is_dna}) { | |
277 $self->{seq_is_dna} = | |
278 $self->{variation_feature_seq} =~ /$ALL_NUCLEOTIDES/ ? 1 : 0; | |
279 } | |
280 | |
281 return $self->{seq_is_dna}; | |
282 } | |
283 | |
284 =head2 seq_length | |
285 | |
286 Description: return the length of this allele sequence, this is better than | |
287 just using length($vfoa->feature_seq) because we check if the | |
288 sequence is valid DNA, and also look for allele strings like | |
289 "(3 BP INSERTION)" to determine the length | |
290 Returntype : int or undef if we cannot determine the length | |
291 Exceptions : none | |
292 Status : At Risk | |
293 | |
294 =cut | |
295 | |
296 sub seq_length { | |
297 my $self = shift; | |
298 | |
299 my $seq = $self->variation_feature_seq; | |
300 | |
301 if ($self->seq_is_dna) { | |
302 if ($seq eq '-') { | |
303 return 0; | |
304 } | |
305 else { | |
306 return length($seq); | |
307 } | |
308 } | |
309 elsif ($seq =~ /$SPECIFIED_LENGTH/) { | |
310 return $1; | |
311 } | |
312 | |
313 return undef; | |
314 } | |
315 | |
316 =head2 allele_string | |
317 | |
318 Description: Return a '/' delimited string of the reference allele variation_feature_seq | |
319 and the variation_feature_seq of this allele | |
320 Returntype : string | |
321 Exceptions : none | |
322 Status : At Risk | |
323 | |
324 =cut | |
325 | |
326 sub allele_string { | |
327 my $self = shift; | |
328 | |
329 my $ref = $self->variation_feature_overlap->get_reference_VariationFeatureOverlapAllele->variation_feature_seq; | |
330 | |
331 # for the HGMDs and CNV probes where the alleles are artificially set to be | |
332 # the same, just return the reference sequence | |
333 | |
334 if ($ref eq $self->variation_feature_seq) { | |
335 return $ref; | |
336 } | |
337 else { | |
338 return $ref.'/'.$self->variation_feature_seq; | |
339 } | |
340 } | |
341 | |
342 | |
343 sub _convert_to_sara { | |
344 my $self = shift; | |
345 | |
346 my $oc = Bio::EnsEMBL::Variation::OverlapConsequence->new_fast({ | |
347 'label' => 'SARA', | |
348 'description' => 'Same as reference allele', | |
349 'rank' => '99', | |
350 'display_term' => 'SARA', | |
351 'SO_term' => 'SARA', | |
352 }); | |
353 | |
354 $self->add_OverlapConsequence($oc); | |
355 | |
356 return $self; | |
357 } | |
358 | |
359 1; | |
360 |