Mercurial > repos > mahtabm > ensembl
comparison variant_effect_predictor/Bio/EnsEMBL/Variation/LDFeatureContainer.pm @ 0:1f6dce3d34e0
Uploaded
author | mahtabm |
---|---|
date | Thu, 11 Apr 2013 02:01:53 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:1f6dce3d34e0 |
---|---|
1 =head1 LICENSE | |
2 | |
3 Copyright (c) 1999-2012 The European Bioinformatics Institute and | |
4 Genome Research Limited. All rights reserved. | |
5 | |
6 This software is distributed under a modified Apache license. | |
7 For license details, please see | |
8 | |
9 http://www.ensembl.org/info/about/code_licence.html | |
10 | |
11 =head1 CONTACT | |
12 | |
13 Please email comments or questions to the public Ensembl | |
14 developers list at <dev@ensembl.org>. | |
15 | |
16 Questions may also be sent to the Ensembl help desk at | |
17 <helpdesk@ensembl.org>. | |
18 | |
19 =cut | |
20 | |
21 # Ensembl module for Bio::EnsEMBL::Variation::LDFeatureContainer | |
22 # | |
23 # Copyright (c) 2004 Ensembl | |
24 # | |
25 | |
26 | |
27 =head1 NAME | |
28 | |
29 Bio::EnsEMBL::Variation::LDFeatureContainer - A container with all the ld values for quick access | |
30 | |
31 =head1 SYNOPSIS | |
32 | |
33 # LDFeature Container representing all the LD values for a certain contig | |
34 $ldContainer = Bio::EnsEMBL::Variation::LDFeatureContainer->new | |
35 (-name => NT_085213, | |
36 -ldContainer => $ldhash, | |
37 -variation_features => $vfhash); | |
38 | |
39 ... | |
40 | |
41 #get the d_prime values for a certain pair of variation_features | |
42 d_prime = $ldContainer->get_d_prime($variation_feature_1,$variation_feature_2); | |
43 #get the list of variation in the container | |
44 $variations = $ldContainer->get_variations(); | |
45 | |
46 ... | |
47 | |
48 =head1 DESCRIPTION | |
49 | |
50 This is a class representing the LD information for a certain region | |
51 from the ensembl-variation database. | |
52 See B<Bio::EnsEMBL::Variation::Variation>. | |
53 | |
54 =head1 METHODS | |
55 | |
56 =cut | |
57 | |
58 use strict; | |
59 use warnings; | |
60 | |
61 package Bio::EnsEMBL::Variation::LDFeatureContainer; | |
62 | |
63 use Bio::EnsEMBL::Storable; | |
64 use Bio::EnsEMBL::Utils::Exception qw(throw warning); | |
65 use Bio::EnsEMBL::Utils::Argument qw(rearrange); | |
66 | |
67 use vars qw(@ISA); | |
68 | |
69 @ISA = qw(Bio::EnsEMBL::Storable); | |
70 | |
71 =head2 new | |
72 | |
73 Arg [-NAME] : | |
74 string - name of the feature object that the LD container refers to. (chr1,NT_08542,...) | |
75 | |
76 Arg [-LDCONTAINER] : | |
77 reference - hash containing all the LD information present, with the key | |
78 (variation_feature_1-variation_feature_2) to access the information | |
79 | |
80 Arg [-VARIATIONFEATURES] : | |
81 reference - hash containing all the Bio::EnsEMBL::Variation::VariationFeature objects that are present in the Container | |
82 | |
83 Example : | |
84 $ldContainer = Bio::EnsEMBL::Variation::LDFeatureContainer->new | |
85 (-name => 'chr1' | |
86 -ldContainer => {'variation_feature_1-variation_feature_2' => { 'sample_id_1' => | |
87 { 'd_prime' => 0.5, | |
88 'r2' => 0.421, | |
89 'sample_count' => 120 | |
90 }, | |
91 'sample_id_2' => | |
92 { 'd_prime' => 0.3, | |
93 'r2' => 0.321, | |
94 'sample_count' => 35 | |
95 } | |
96 } | |
97 | |
98 } | |
99 -variationFeatures => hash of Bio::EnsEMBL::Variation::VariationFeature | |
100 ); | |
101 | |
102 | |
103 Description: Constructor. Instantiates a new LDFeatureContainer object. | |
104 Returntype : Bio::EnsEMBL::Variation::LDFeatureContainer | |
105 Exceptions : none | |
106 Caller : general | |
107 Status : At Risk | |
108 | |
109 =cut | |
110 | |
111 sub new { | |
112 my $caller = shift; | |
113 my $class = ref($caller) || $caller; | |
114 | |
115 my ($ldContainer,$name,$variationFeatures) = rearrange([qw(LDCONTAINER NAME VARIATIONFEATURES)], @_); | |
116 if (defined($ldContainer) && ref($ldContainer ne 'HASH')){ | |
117 throw("Reference to a hash object expected as a LDContainer"); | |
118 } | |
119 $ldContainer ||= {}; | |
120 | |
121 return bless {'name' => $name, | |
122 'ldContainer' => $ldContainer, | |
123 'variationFeatures' => $variationFeatures}, $class; | |
124 } | |
125 | |
126 =head2 name | |
127 | |
128 Arg [1] : string $newval (optional) | |
129 The new value to set the name attribute to | |
130 Example : $name = $obj->name() | |
131 Description: Getter/Setter for the name attribute. | |
132 Returntype : string | |
133 Exceptions : none | |
134 Caller : general | |
135 Status : At Risk | |
136 | |
137 =cut | |
138 | |
139 sub name{ | |
140 my $self = shift; | |
141 return $self->{'name'} = shift if(@_); | |
142 return $self->{'name'}; | |
143 } | |
144 | |
145 | |
146 =head2 get_variations | |
147 | |
148 Example : $variations = $obj->get_variations() | |
149 Description : Gets all the variation objects contained in the LDFeatureContainer | |
150 ReturnType : list of Bio::EnsEMBL::Variation::Variation | |
151 Exceptions : none | |
152 Caller : general | |
153 Status : At Risk | |
154 | |
155 =cut | |
156 | |
157 sub get_variations{ | |
158 my $self = shift; | |
159 my @variations; | |
160 | |
161 foreach my $variation_feature (keys %{$self->{'variationFeatures'}}){ | |
162 push @variations,$self->{'variationFeatures'}->{$variation_feature}->variation(); | |
163 } | |
164 return \@variations; | |
165 } | |
166 | |
167 =head2 get_r_square | |
168 | |
169 Arg [1] : Bio::EnsEMBL::Variation::VariationFeature $variationFeature | |
170 Arg [2] : Bio::EnsEMBL::Variation::VariationFeature $variationFeature | |
171 Arg [3] : (optional) int - sample_id of population you want to get the r_square value | |
172 Example : $r_square = $obj->get_r_square($vf1,$vf2,$sample_id); | |
173 Description : Get the r_square value for a pair of variation features in the given population. If no population is provided, | |
174 return the r_square for the default population with more sample counts (in case more than 1) | |
175 ReturnType : float | |
176 Exceptions : throw on incorrect arguments | |
177 Caller : general | |
178 Status : At Risk | |
179 | |
180 =cut | |
181 | |
182 sub get_r_square{ | |
183 my $self = shift; | |
184 my $variation_feature_1 = shift; | |
185 my $variation_feature_2 = shift; | |
186 my $sample_id = shift; | |
187 | |
188 $sample_id ||= 0; #in case no population provided, to avoid warning in the hash | |
189 my $r_square; | |
190 my $key; | |
191 | |
192 #check if the default poppulation has been calculated, otherwise, find it | |
193 if (! defined $self->{'_default_population'}){ | |
194 $self->{'_default_population'} = $self->_get_major_population; | |
195 } | |
196 #first of all, check that both arguments have been properly provided | |
197 if (!ref($variation_feature_1) || !$variation_feature_1->isa('Bio::EnsEMBL::Variation::VariationFeature') || !ref($variation_feature_2) || !$variation_feature_2->isa('Bio::EnsEMBL::Variation::VariationFeature')){ | |
198 throw("Bio::EnsEMBL::Variation::VariationFeature arguments expected"); | |
199 } | |
200 else{ | |
201 #check if the ldContainer does not contain pairwise information for the variation features provided | |
202 if (!exists $self->{'ldContainer'}->{$variation_feature_1->dbID() . '-' . $variation_feature_2->dbID()} && !exists $self->{'ldContainer'}->{$variation_feature_2->dbID() . '-' . $variation_feature_1->dbID()}){ | |
203 warning("variation features have no pairwise ld information"); | |
204 } | |
205 else{ | |
206 #find out the key in the ld Hash: vf1 - vf2 or vf2 - vf1 | |
207 if (exists $self->{'ldContainer'}->{$variation_feature_1->dbID() . '-' . $variation_feature_2->dbID()}){ | |
208 $key = $variation_feature_1->dbID() . '-' . $variation_feature_2->dbID(); | |
209 } | |
210 else{ | |
211 $key = $variation_feature_2->dbID() . '-' . $variation_feature_1->dbID(); | |
212 } | |
213 #and finally, if population provided or the only population | |
214 if (exists $self->{'ldContainer'}->{$key}->{$sample_id}){ | |
215 $r_square = $self->{'ldContainer'}->{$key}->{$sample_id}->{'r2'} | |
216 } | |
217 else{ | |
218 if (exists $self->{'ldContainer'}->{$key}->{$self->{'_default_population'}}){ | |
219 #there was no population provided, return the r_square for the default population | |
220 $r_square = $self->{'ldContainer'}->{$key}->{$self->{'_default_population'}}->{'r2'}; | |
221 } | |
222 else{ | |
223 warning("variation features have no pairwise ld information for default population ", $self->{'_default_population'}); | |
224 } | |
225 } | |
226 } | |
227 | |
228 } | |
229 return $r_square; | |
230 } | |
231 | |
232 =head2 get_d_prime | |
233 | |
234 Arg [1] : Bio::EnsEMBL::Variation::VariationFeature $variationFeature | |
235 Arg [2] : Bio::EnsEMBL::Variation::VariationFeature $variationFeature | |
236 Arg [3] : (optional) int - sample_id of population you want to get the d_prime value | |
237 Example : $d_prime = $obj->get_d_prime($vf1,$vf2,$sample_id); | |
238 Description : Get the d_prime value for a pair of variation features for a known or unknown population. In case of an unknown population, the default | |
239 poulation is used | |
240 ReturnType : float | |
241 Exceptions : throw on incorrect arguments | |
242 Caller : general | |
243 Status : At Risk | |
244 | |
245 =cut | |
246 | |
247 sub get_d_prime{ | |
248 my $self = shift; | |
249 my $variation_feature_1 = shift; | |
250 my $variation_feature_2 = shift; | |
251 my $sample_id = shift; | |
252 | |
253 $sample_id ||= 0; #in case no population provided, to avoid warning in the hash | |
254 my $d_prime; | |
255 my $key; | |
256 | |
257 if (! defined $self->{'_default_population'}){ | |
258 $self->{'_default_population'} = $self->_get_major_population; | |
259 } | |
260 #first of all, check that both arguments have been properly provided | |
261 if (!ref($variation_feature_1) || !$variation_feature_1->isa('Bio::EnsEMBL::Variation::VariationFeature') || !ref($variation_feature_2) || !$variation_feature_2->isa('Bio::EnsEMBL::Variation::VariationFeature')){ | |
262 throw("Bio::EnsEMBL::Variation::VariationFeature arguments expected"); | |
263 } | |
264 else{ | |
265 #check if the ldContainer does not contain pairwise information for the variation features provided | |
266 if (!exists $self->{'ldContainer'}->{$variation_feature_1->dbID() . '-' . $variation_feature_2->dbID()} && !exists $self->{'ldContainer'}->{$variation_feature_2->dbID() . '-' . $variation_feature_1->dbID()}){ | |
267 warning("variation features have no pairwise ld information"); | |
268 } | |
269 else{ | |
270 #find out the key in the ld Hash: vf1 - vf2 or vf2 - vf1 | |
271 if (exists $self->{'ldContainer'}->{$variation_feature_1->dbID() . '-' . $variation_feature_2->dbID()}){ | |
272 $key = $variation_feature_1->dbID() . '-' . $variation_feature_2->dbID(); | |
273 } | |
274 else{ | |
275 $key = $variation_feature_2->dbID() . '-' . $variation_feature_1->dbID(); | |
276 } | |
277 #and finally, if population provided or the only population | |
278 if (exists $self->{'ldContainer'}->{$key}->{$sample_id}){ | |
279 $d_prime = $self->{'ldContainer'}->{$key}->{$sample_id}->{'d_prime'}; | |
280 } | |
281 else{ | |
282 if (exists $self->{'ldContainer'}->{$key}->{$self->{'_default_population'}}){ | |
283 #there was no population provided, return the r_square for the default population | |
284 $d_prime = $self->{'ldContainer'}->{$key}->{$self->{'_default_population'}}->{'d_prime'}; | |
285 } | |
286 else{ | |
287 warning("variation features have no pairwise ld information for default population ", $self->{'_default_population'}); | |
288 } | |
289 } | |
290 } | |
291 } | |
292 return $d_prime; | |
293 } | |
294 | |
295 | |
296 =head2 get_all_ld_values | |
297 | |
298 Example : $ld_values = $obj->get_all_ld_values(); | |
299 Description : Get all the information contained in the LDFeatureContainer object | |
300 ReturnType : reference to list of hashes [{variation1 => Bio::EnsEMBL::Variation::VariationFeature, variation2=>Bio::EnsEMBL::Variation::VariationFeature, d_prime=>d_prime, r2=>r2, sample_count=>sample_count, sample_id=>population_sample_id}] | |
301 Exceptions : no exceptions | |
302 Caller : general | |
303 Status : At Risk | |
304 | |
305 =cut | |
306 | |
307 | |
308 sub get_all_ld_values{ | |
309 my $self = shift; | |
310 my @ld_values; #contains ALL the ld values in the container | |
311 | |
312 #the keys in the ldContainer hash | |
313 my $variation_feature_id_1; | |
314 my $variation_feature_id_2; | |
315 | |
316 if (! defined $self->{'_default_population'}){ | |
317 $self->{'_default_population'} = $self->_get_major_population; | |
318 } | |
319 foreach my $key_ld (keys %{$self->{'ldContainer'}}){ | |
320 my %ld_value; #contains a single ld value in the container {variation_feature variation_feature d_prime r2 snp_distance_count} | |
321 ($variation_feature_id_1, $variation_feature_id_2) = split /-/,$key_ld; #get the variation_features ids | |
322 if (exists $self->{'ldContainer'}->{$key_ld}->{$self->{'_default_population'}}){ | |
323 #add the information to the ld_value hash | |
324 $ld_value{'variation1'} = $self->{'variationFeatures'}->{$variation_feature_id_1}; | |
325 $ld_value{'variation2'} = $self->{'variationFeatures'}->{$variation_feature_id_2}; | |
326 $ld_value{'d_prime'} = $self->{'ldContainer'}->{$key_ld}->{$self->{'_default_population'}}->{'d_prime'}; | |
327 $ld_value{'r2'} = $self->{'ldContainer'}->{$key_ld}->{$self->{'_default_population'}}->{'r2'}; | |
328 $ld_value{'sample_count'} = $self->{'ldContainer'}->{$key_ld}->{$self->{'_default_population'}}->{'sample_count'}; | |
329 $ld_value{'sample_id'} = $self->{'_default_population'}; | |
330 push @ld_values, \%ld_value; | |
331 } | |
332 } | |
333 return \@ld_values; | |
334 } | |
335 | |
336 | |
337 =head2 get_all_r_square_values | |
338 | |
339 Example : $r_square_values = $obj->get_all_r_square_values(); | |
340 Description : Get all r_square values contained in the LDFeatureContainer object | |
341 ReturnType : reference to list of [{variation1=>Bio::EnsEMBL::Variation::VariationFeature, variation2=>Bio::EnsEMBL::Variation::VariationFeature, r2=>r2, sample_id=>population_sample_id}] | |
342 Exceptions : no exceptions | |
343 Caller : general | |
344 Status : At Risk | |
345 | |
346 =cut | |
347 | |
348 | |
349 sub get_all_r_square_values{ | |
350 my $self = shift; | |
351 my @r_squares; #contains ALL the r2 values in the container | |
352 | |
353 #the keys in the ldContainer hash | |
354 my $variation_feature_id_1; | |
355 my $variation_feature_id_2; | |
356 | |
357 if (! defined $self->{'_default_population'}){ | |
358 $self->{'_default_population'} = $self->_get_major_population; | |
359 } | |
360 foreach my $key_ld (keys %{$self->{'ldContainer'}}){ | |
361 my %r_square; #contains a single r2 value in the container {variation_feature r2 population_id} | |
362 ($variation_feature_id_1, $variation_feature_id_2) = split /-/,$key_ld; #get the variation_features ids | |
363 if (exists $self->{'ldContainer'}->{$key_ld}->{$self->{'_default_population'}}){ | |
364 $r_square{'variation1'} = $self->{'variationFeatures'}->{$variation_feature_id_1}; | |
365 $r_square{'variation2'} = $self->{'variationFeatures'}->{$variation_feature_id_2}; | |
366 $r_square{'r2'} = $self->{'ldContainer'}->{$key_ld}->{$self->{'_default_population'}}->{'r2'}; | |
367 $r_square{'sample_id'} = $self->{'_default_population'}; | |
368 #and add all the ld information to the final list | |
369 push @r_squares, \%r_square; | |
370 } | |
371 } | |
372 return \@r_squares; | |
373 } | |
374 | |
375 =head2 get_all_d_prime_values | |
376 | |
377 Example : $d_prime_values = $obj->get_all_d_prime_values(); | |
378 Description : Get all d_prime values contained in the LDFeatureContainer object | |
379 ReturnType : reference to list of [{variation1=>Bio::EnsEMBL::Variation::VariationFeature, variation2=>Bio::EnsEMBL::Variation::VariationFeature, d_prime=>d_prime, sample_id=>population_sample_id}] | |
380 Exceptions : no exceptions | |
381 Caller : general | |
382 Status : At Risk | |
383 | |
384 =cut | |
385 | |
386 | |
387 sub get_all_d_prime_values{ | |
388 my $self = shift; | |
389 my @d_primes; #contains ALL the d_prime values in the container | |
390 | |
391 #the keys in the ldContainer hash | |
392 my $variation_feature_id_1; | |
393 my $variation_feature_id_2; | |
394 | |
395 if (! defined $self->{'_default_population'}){ | |
396 $self->{'_default_population'} = $self->_get_major_population; | |
397 } | |
398 foreach my $key_ld (keys %{$self->{'ldContainer'}}){ | |
399 my %d_prime; #contains a single d_prime value in the container {variation_feature d_prime population_id} | |
400 ($variation_feature_id_1, $variation_feature_id_2) = split /-/,$key_ld; #get the variation_features ids | |
401 #add the information to the ld_value array | |
402 if (exists $self->{'ldContainer'}->{$key_ld}->{$self->{'_default_population'}}){ | |
403 $d_prime{'variation1'} = $self->{'variationFeatures'}->{$variation_feature_id_1}; | |
404 $d_prime{'variation2'} = $self->{'variationFeatures'}->{$variation_feature_id_2}; | |
405 $d_prime{'d_prime'} = $self->{'ldContainer'}->{$key_ld}->{$self->{'_default_population'}}->{'d_prime'}; | |
406 $d_prime{'sample_id'} = $self->{'_default_population'}; | |
407 #and add all the ld information to the final list if exists the value | |
408 push @d_primes, \%d_prime; | |
409 } | |
410 | |
411 } | |
412 return \@d_primes; | |
413 } | |
414 | |
415 =head2 get_all_populations | |
416 | |
417 Arg [1] : (optional) Bio::EnsEMBL::Variation::VariationFeature $variationFeature | |
418 Arg [2] : (optional) Bio::EnsEMBL::Variation::VariationFeature $variationFeature | |
419 Example : $populations = $obj->get_all_populations($vf1,$vf2); | |
420 Description : If no arguments provided, returns ALL the populations present in the container. When 2 variation features provided, returns the | |
421 population/populations where these variation features occurs | |
422 ReturnType : ref to an array of int | |
423 Exceptions : throw on incorrect arguments | |
424 Caller : general | |
425 Status : At Risk | |
426 | |
427 =cut | |
428 | |
429 sub get_all_populations{ | |
430 my $self = shift; | |
431 my $variation_feature_1 = shift; | |
432 my $variation_feature_2 = shift; | |
433 my %populations; | |
434 my @populations; | |
435 my $key; | |
436 | |
437 #if no variation provided, return ALL the populations in the container | |
438 if (! defined($variation_feature_1) && ! defined($variation_feature_2)){ | |
439 foreach my $key (keys %{$self->{'ldContainer'}}){ | |
440 map {$populations{$_}++} keys %{$self->{'ldContainer'}->{$key}}; | |
441 } | |
442 @populations = keys %populations; | |
443 } | |
444 else{ | |
445 #first, check if both arguments have been properly provided | |
446 if (!ref($variation_feature_1) || !$variation_feature_1->isa('Bio::EnsEMBL::Variation::VariationFeature') || !ref($variation_feature_2) || !$variation_feature_2->isa('Bio::EnsEMBL::Variation::VariationFeature')){ | |
447 throw("Bio::EnsEMBL::Variation::VariationFeature arguments expected"); | |
448 } | |
449 #if the variation_features are correct, return the list of populations | |
450 else{ | |
451 #find out the key in the ld Hash: vf1 - vf2 or vf2 - vf1 | |
452 if (exists $self->{'ldContainer'}->{$variation_feature_1->dbID() . '-' . $variation_feature_2->dbID()}){ | |
453 $key = $variation_feature_1->dbID() . '-' . $variation_feature_2->dbID(); | |
454 } | |
455 else{ | |
456 $key = $variation_feature_2->dbID() . '-' . $variation_feature_1->dbID(); | |
457 } | |
458 @populations = keys %{$self->{'ldContainer'}->{$key}}; | |
459 } | |
460 } | |
461 | |
462 return \@populations; | |
463 } | |
464 | |
465 #returns from the container the population_id with the maximum number of pairwise_ld | |
466 sub _get_populations { | |
467 my $self = shift; | |
468 my %populations; | |
469 | |
470 foreach my $key (keys %{$self->{'ldContainer'}}){ | |
471 map {$populations{$_}++} keys %{$self->{'ldContainer'}->{$key}}; | |
472 } | |
473 my @sorted_populations = sort{$populations{$b} <=> $populations{$a}} keys %populations; | |
474 return @sorted_populations; | |
475 } | |
476 | |
477 sub _get_major_population { | |
478 my( $pop ) = $_[0]->_get_populations; | |
479 return $pop; | |
480 } | |
481 1; | |
482 |