0
|
1 =head1 LICENSE
|
|
2
|
|
3 Copyright (c) 1999-2012 The European Bioinformatics Institute and
|
|
4 Genome Research Limited. All rights reserved.
|
|
5
|
|
6 This software is distributed under a modified Apache license.
|
|
7 For license details, please see
|
|
8
|
|
9 http://www.ensembl.org/info/about/code_licence.html
|
|
10
|
|
11 =head1 CONTACT
|
|
12
|
|
13 Please email comments or questions to the public Ensembl
|
|
14 developers list at <dev@ensembl.org>.
|
|
15
|
|
16 Questions may also be sent to the Ensembl help desk at
|
|
17 <helpdesk@ensembl.org>.
|
|
18
|
|
19 =cut
|
|
20
|
|
21 # Ensembl module for Bio::EnsEMBL::Variation::LDFeatureContainer
|
|
22 #
|
|
23 # Copyright (c) 2004 Ensembl
|
|
24 #
|
|
25
|
|
26
|
|
27 =head1 NAME
|
|
28
|
|
29 Bio::EnsEMBL::Variation::LDFeatureContainer - A container with all the ld values for quick access
|
|
30
|
|
31 =head1 SYNOPSIS
|
|
32
|
|
33 # LDFeature Container representing all the LD values for a certain contig
|
|
34 $ldContainer = Bio::EnsEMBL::Variation::LDFeatureContainer->new
|
|
35 (-name => NT_085213,
|
|
36 -ldContainer => $ldhash,
|
|
37 -variation_features => $vfhash);
|
|
38
|
|
39 ...
|
|
40
|
|
41 #get the d_prime values for a certain pair of variation_features
|
|
42 d_prime = $ldContainer->get_d_prime($variation_feature_1,$variation_feature_2);
|
|
43 #get the list of variation in the container
|
|
44 $variations = $ldContainer->get_variations();
|
|
45
|
|
46 ...
|
|
47
|
|
48 =head1 DESCRIPTION
|
|
49
|
|
50 This is a class representing the LD information for a certain region
|
|
51 from the ensembl-variation database.
|
|
52 See B<Bio::EnsEMBL::Variation::Variation>.
|
|
53
|
|
54 =head1 METHODS
|
|
55
|
|
56 =cut
|
|
57
|
|
58 use strict;
|
|
59 use warnings;
|
|
60
|
|
61 package Bio::EnsEMBL::Variation::LDFeatureContainer;
|
|
62
|
|
63 use Bio::EnsEMBL::Storable;
|
|
64 use Bio::EnsEMBL::Utils::Exception qw(throw warning);
|
|
65 use Bio::EnsEMBL::Utils::Argument qw(rearrange);
|
|
66
|
|
67 use vars qw(@ISA);
|
|
68
|
|
69 @ISA = qw(Bio::EnsEMBL::Storable);
|
|
70
|
|
71 =head2 new
|
|
72
|
|
73 Arg [-NAME] :
|
|
74 string - name of the feature object that the LD container refers to. (chr1,NT_08542,...)
|
|
75
|
|
76 Arg [-LDCONTAINER] :
|
|
77 reference - hash containing all the LD information present, with the key
|
|
78 (variation_feature_1-variation_feature_2) to access the information
|
|
79
|
|
80 Arg [-VARIATIONFEATURES] :
|
|
81 reference - hash containing all the Bio::EnsEMBL::Variation::VariationFeature objects that are present in the Container
|
|
82
|
|
83 Example :
|
|
84 $ldContainer = Bio::EnsEMBL::Variation::LDFeatureContainer->new
|
|
85 (-name => 'chr1'
|
|
86 -ldContainer => {'variation_feature_1-variation_feature_2' => { 'sample_id_1' =>
|
|
87 { 'd_prime' => 0.5,
|
|
88 'r2' => 0.421,
|
|
89 'sample_count' => 120
|
|
90 },
|
|
91 'sample_id_2' =>
|
|
92 { 'd_prime' => 0.3,
|
|
93 'r2' => 0.321,
|
|
94 'sample_count' => 35
|
|
95 }
|
|
96 }
|
|
97
|
|
98 }
|
|
99 -variationFeatures => hash of Bio::EnsEMBL::Variation::VariationFeature
|
|
100 );
|
|
101
|
|
102
|
|
103 Description: Constructor. Instantiates a new LDFeatureContainer object.
|
|
104 Returntype : Bio::EnsEMBL::Variation::LDFeatureContainer
|
|
105 Exceptions : none
|
|
106 Caller : general
|
|
107 Status : At Risk
|
|
108
|
|
109 =cut
|
|
110
|
|
111 sub new {
|
|
112 my $caller = shift;
|
|
113 my $class = ref($caller) || $caller;
|
|
114
|
|
115 my ($ldContainer,$name,$variationFeatures) = rearrange([qw(LDCONTAINER NAME VARIATIONFEATURES)], @_);
|
|
116 if (defined($ldContainer) && ref($ldContainer ne 'HASH')){
|
|
117 throw("Reference to a hash object expected as a LDContainer");
|
|
118 }
|
|
119 $ldContainer ||= {};
|
|
120
|
|
121 return bless {'name' => $name,
|
|
122 'ldContainer' => $ldContainer,
|
|
123 'variationFeatures' => $variationFeatures}, $class;
|
|
124 }
|
|
125
|
|
126 =head2 name
|
|
127
|
|
128 Arg [1] : string $newval (optional)
|
|
129 The new value to set the name attribute to
|
|
130 Example : $name = $obj->name()
|
|
131 Description: Getter/Setter for the name attribute.
|
|
132 Returntype : string
|
|
133 Exceptions : none
|
|
134 Caller : general
|
|
135 Status : At Risk
|
|
136
|
|
137 =cut
|
|
138
|
|
139 sub name{
|
|
140 my $self = shift;
|
|
141 return $self->{'name'} = shift if(@_);
|
|
142 return $self->{'name'};
|
|
143 }
|
|
144
|
|
145
|
|
146 =head2 get_variations
|
|
147
|
|
148 Example : $variations = $obj->get_variations()
|
|
149 Description : Gets all the variation objects contained in the LDFeatureContainer
|
|
150 ReturnType : list of Bio::EnsEMBL::Variation::Variation
|
|
151 Exceptions : none
|
|
152 Caller : general
|
|
153 Status : At Risk
|
|
154
|
|
155 =cut
|
|
156
|
|
157 sub get_variations{
|
|
158 my $self = shift;
|
|
159 my @variations;
|
|
160
|
|
161 foreach my $variation_feature (keys %{$self->{'variationFeatures'}}){
|
|
162 push @variations,$self->{'variationFeatures'}->{$variation_feature}->variation();
|
|
163 }
|
|
164 return \@variations;
|
|
165 }
|
|
166
|
|
167 =head2 get_r_square
|
|
168
|
|
169 Arg [1] : Bio::EnsEMBL::Variation::VariationFeature $variationFeature
|
|
170 Arg [2] : Bio::EnsEMBL::Variation::VariationFeature $variationFeature
|
|
171 Arg [3] : (optional) int - sample_id of population you want to get the r_square value
|
|
172 Example : $r_square = $obj->get_r_square($vf1,$vf2,$sample_id);
|
|
173 Description : Get the r_square value for a pair of variation features in the given population. If no population is provided,
|
|
174 return the r_square for the default population with more sample counts (in case more than 1)
|
|
175 ReturnType : float
|
|
176 Exceptions : throw on incorrect arguments
|
|
177 Caller : general
|
|
178 Status : At Risk
|
|
179
|
|
180 =cut
|
|
181
|
|
182 sub get_r_square{
|
|
183 my $self = shift;
|
|
184 my $variation_feature_1 = shift;
|
|
185 my $variation_feature_2 = shift;
|
|
186 my $sample_id = shift;
|
|
187
|
|
188 $sample_id ||= 0; #in case no population provided, to avoid warning in the hash
|
|
189 my $r_square;
|
|
190 my $key;
|
|
191
|
|
192 #check if the default poppulation has been calculated, otherwise, find it
|
|
193 if (! defined $self->{'_default_population'}){
|
|
194 $self->{'_default_population'} = $self->_get_major_population;
|
|
195 }
|
|
196 #first of all, check that both arguments have been properly provided
|
|
197 if (!ref($variation_feature_1) || !$variation_feature_1->isa('Bio::EnsEMBL::Variation::VariationFeature') || !ref($variation_feature_2) || !$variation_feature_2->isa('Bio::EnsEMBL::Variation::VariationFeature')){
|
|
198 throw("Bio::EnsEMBL::Variation::VariationFeature arguments expected");
|
|
199 }
|
|
200 else{
|
|
201 #check if the ldContainer does not contain pairwise information for the variation features provided
|
|
202 if (!exists $self->{'ldContainer'}->{$variation_feature_1->dbID() . '-' . $variation_feature_2->dbID()} && !exists $self->{'ldContainer'}->{$variation_feature_2->dbID() . '-' . $variation_feature_1->dbID()}){
|
|
203 warning("variation features have no pairwise ld information");
|
|
204 }
|
|
205 else{
|
|
206 #find out the key in the ld Hash: vf1 - vf2 or vf2 - vf1
|
|
207 if (exists $self->{'ldContainer'}->{$variation_feature_1->dbID() . '-' . $variation_feature_2->dbID()}){
|
|
208 $key = $variation_feature_1->dbID() . '-' . $variation_feature_2->dbID();
|
|
209 }
|
|
210 else{
|
|
211 $key = $variation_feature_2->dbID() . '-' . $variation_feature_1->dbID();
|
|
212 }
|
|
213 #and finally, if population provided or the only population
|
|
214 if (exists $self->{'ldContainer'}->{$key}->{$sample_id}){
|
|
215 $r_square = $self->{'ldContainer'}->{$key}->{$sample_id}->{'r2'}
|
|
216 }
|
|
217 else{
|
|
218 if (exists $self->{'ldContainer'}->{$key}->{$self->{'_default_population'}}){
|
|
219 #there was no population provided, return the r_square for the default population
|
|
220 $r_square = $self->{'ldContainer'}->{$key}->{$self->{'_default_population'}}->{'r2'};
|
|
221 }
|
|
222 else{
|
|
223 warning("variation features have no pairwise ld information for default population ", $self->{'_default_population'});
|
|
224 }
|
|
225 }
|
|
226 }
|
|
227
|
|
228 }
|
|
229 return $r_square;
|
|
230 }
|
|
231
|
|
232 =head2 get_d_prime
|
|
233
|
|
234 Arg [1] : Bio::EnsEMBL::Variation::VariationFeature $variationFeature
|
|
235 Arg [2] : Bio::EnsEMBL::Variation::VariationFeature $variationFeature
|
|
236 Arg [3] : (optional) int - sample_id of population you want to get the d_prime value
|
|
237 Example : $d_prime = $obj->get_d_prime($vf1,$vf2,$sample_id);
|
|
238 Description : Get the d_prime value for a pair of variation features for a known or unknown population. In case of an unknown population, the default
|
|
239 poulation is used
|
|
240 ReturnType : float
|
|
241 Exceptions : throw on incorrect arguments
|
|
242 Caller : general
|
|
243 Status : At Risk
|
|
244
|
|
245 =cut
|
|
246
|
|
247 sub get_d_prime{
|
|
248 my $self = shift;
|
|
249 my $variation_feature_1 = shift;
|
|
250 my $variation_feature_2 = shift;
|
|
251 my $sample_id = shift;
|
|
252
|
|
253 $sample_id ||= 0; #in case no population provided, to avoid warning in the hash
|
|
254 my $d_prime;
|
|
255 my $key;
|
|
256
|
|
257 if (! defined $self->{'_default_population'}){
|
|
258 $self->{'_default_population'} = $self->_get_major_population;
|
|
259 }
|
|
260 #first of all, check that both arguments have been properly provided
|
|
261 if (!ref($variation_feature_1) || !$variation_feature_1->isa('Bio::EnsEMBL::Variation::VariationFeature') || !ref($variation_feature_2) || !$variation_feature_2->isa('Bio::EnsEMBL::Variation::VariationFeature')){
|
|
262 throw("Bio::EnsEMBL::Variation::VariationFeature arguments expected");
|
|
263 }
|
|
264 else{
|
|
265 #check if the ldContainer does not contain pairwise information for the variation features provided
|
|
266 if (!exists $self->{'ldContainer'}->{$variation_feature_1->dbID() . '-' . $variation_feature_2->dbID()} && !exists $self->{'ldContainer'}->{$variation_feature_2->dbID() . '-' . $variation_feature_1->dbID()}){
|
|
267 warning("variation features have no pairwise ld information");
|
|
268 }
|
|
269 else{
|
|
270 #find out the key in the ld Hash: vf1 - vf2 or vf2 - vf1
|
|
271 if (exists $self->{'ldContainer'}->{$variation_feature_1->dbID() . '-' . $variation_feature_2->dbID()}){
|
|
272 $key = $variation_feature_1->dbID() . '-' . $variation_feature_2->dbID();
|
|
273 }
|
|
274 else{
|
|
275 $key = $variation_feature_2->dbID() . '-' . $variation_feature_1->dbID();
|
|
276 }
|
|
277 #and finally, if population provided or the only population
|
|
278 if (exists $self->{'ldContainer'}->{$key}->{$sample_id}){
|
|
279 $d_prime = $self->{'ldContainer'}->{$key}->{$sample_id}->{'d_prime'};
|
|
280 }
|
|
281 else{
|
|
282 if (exists $self->{'ldContainer'}->{$key}->{$self->{'_default_population'}}){
|
|
283 #there was no population provided, return the r_square for the default population
|
|
284 $d_prime = $self->{'ldContainer'}->{$key}->{$self->{'_default_population'}}->{'d_prime'};
|
|
285 }
|
|
286 else{
|
|
287 warning("variation features have no pairwise ld information for default population ", $self->{'_default_population'});
|
|
288 }
|
|
289 }
|
|
290 }
|
|
291 }
|
|
292 return $d_prime;
|
|
293 }
|
|
294
|
|
295
|
|
296 =head2 get_all_ld_values
|
|
297
|
|
298 Example : $ld_values = $obj->get_all_ld_values();
|
|
299 Description : Get all the information contained in the LDFeatureContainer object
|
|
300 ReturnType : reference to list of hashes [{variation1 => Bio::EnsEMBL::Variation::VariationFeature, variation2=>Bio::EnsEMBL::Variation::VariationFeature, d_prime=>d_prime, r2=>r2, sample_count=>sample_count, sample_id=>population_sample_id}]
|
|
301 Exceptions : no exceptions
|
|
302 Caller : general
|
|
303 Status : At Risk
|
|
304
|
|
305 =cut
|
|
306
|
|
307
|
|
308 sub get_all_ld_values{
|
|
309 my $self = shift;
|
|
310 my @ld_values; #contains ALL the ld values in the container
|
|
311
|
|
312 #the keys in the ldContainer hash
|
|
313 my $variation_feature_id_1;
|
|
314 my $variation_feature_id_2;
|
|
315
|
|
316 if (! defined $self->{'_default_population'}){
|
|
317 $self->{'_default_population'} = $self->_get_major_population;
|
|
318 }
|
|
319 foreach my $key_ld (keys %{$self->{'ldContainer'}}){
|
|
320 my %ld_value; #contains a single ld value in the container {variation_feature variation_feature d_prime r2 snp_distance_count}
|
|
321 ($variation_feature_id_1, $variation_feature_id_2) = split /-/,$key_ld; #get the variation_features ids
|
|
322 if (exists $self->{'ldContainer'}->{$key_ld}->{$self->{'_default_population'}}){
|
|
323 #add the information to the ld_value hash
|
|
324 $ld_value{'variation1'} = $self->{'variationFeatures'}->{$variation_feature_id_1};
|
|
325 $ld_value{'variation2'} = $self->{'variationFeatures'}->{$variation_feature_id_2};
|
|
326 $ld_value{'d_prime'} = $self->{'ldContainer'}->{$key_ld}->{$self->{'_default_population'}}->{'d_prime'};
|
|
327 $ld_value{'r2'} = $self->{'ldContainer'}->{$key_ld}->{$self->{'_default_population'}}->{'r2'};
|
|
328 $ld_value{'sample_count'} = $self->{'ldContainer'}->{$key_ld}->{$self->{'_default_population'}}->{'sample_count'};
|
|
329 $ld_value{'sample_id'} = $self->{'_default_population'};
|
|
330 push @ld_values, \%ld_value;
|
|
331 }
|
|
332 }
|
|
333 return \@ld_values;
|
|
334 }
|
|
335
|
|
336
|
|
337 =head2 get_all_r_square_values
|
|
338
|
|
339 Example : $r_square_values = $obj->get_all_r_square_values();
|
|
340 Description : Get all r_square values contained in the LDFeatureContainer object
|
|
341 ReturnType : reference to list of [{variation1=>Bio::EnsEMBL::Variation::VariationFeature, variation2=>Bio::EnsEMBL::Variation::VariationFeature, r2=>r2, sample_id=>population_sample_id}]
|
|
342 Exceptions : no exceptions
|
|
343 Caller : general
|
|
344 Status : At Risk
|
|
345
|
|
346 =cut
|
|
347
|
|
348
|
|
349 sub get_all_r_square_values{
|
|
350 my $self = shift;
|
|
351 my @r_squares; #contains ALL the r2 values in the container
|
|
352
|
|
353 #the keys in the ldContainer hash
|
|
354 my $variation_feature_id_1;
|
|
355 my $variation_feature_id_2;
|
|
356
|
|
357 if (! defined $self->{'_default_population'}){
|
|
358 $self->{'_default_population'} = $self->_get_major_population;
|
|
359 }
|
|
360 foreach my $key_ld (keys %{$self->{'ldContainer'}}){
|
|
361 my %r_square; #contains a single r2 value in the container {variation_feature r2 population_id}
|
|
362 ($variation_feature_id_1, $variation_feature_id_2) = split /-/,$key_ld; #get the variation_features ids
|
|
363 if (exists $self->{'ldContainer'}->{$key_ld}->{$self->{'_default_population'}}){
|
|
364 $r_square{'variation1'} = $self->{'variationFeatures'}->{$variation_feature_id_1};
|
|
365 $r_square{'variation2'} = $self->{'variationFeatures'}->{$variation_feature_id_2};
|
|
366 $r_square{'r2'} = $self->{'ldContainer'}->{$key_ld}->{$self->{'_default_population'}}->{'r2'};
|
|
367 $r_square{'sample_id'} = $self->{'_default_population'};
|
|
368 #and add all the ld information to the final list
|
|
369 push @r_squares, \%r_square;
|
|
370 }
|
|
371 }
|
|
372 return \@r_squares;
|
|
373 }
|
|
374
|
|
375 =head2 get_all_d_prime_values
|
|
376
|
|
377 Example : $d_prime_values = $obj->get_all_d_prime_values();
|
|
378 Description : Get all d_prime values contained in the LDFeatureContainer object
|
|
379 ReturnType : reference to list of [{variation1=>Bio::EnsEMBL::Variation::VariationFeature, variation2=>Bio::EnsEMBL::Variation::VariationFeature, d_prime=>d_prime, sample_id=>population_sample_id}]
|
|
380 Exceptions : no exceptions
|
|
381 Caller : general
|
|
382 Status : At Risk
|
|
383
|
|
384 =cut
|
|
385
|
|
386
|
|
387 sub get_all_d_prime_values{
|
|
388 my $self = shift;
|
|
389 my @d_primes; #contains ALL the d_prime values in the container
|
|
390
|
|
391 #the keys in the ldContainer hash
|
|
392 my $variation_feature_id_1;
|
|
393 my $variation_feature_id_2;
|
|
394
|
|
395 if (! defined $self->{'_default_population'}){
|
|
396 $self->{'_default_population'} = $self->_get_major_population;
|
|
397 }
|
|
398 foreach my $key_ld (keys %{$self->{'ldContainer'}}){
|
|
399 my %d_prime; #contains a single d_prime value in the container {variation_feature d_prime population_id}
|
|
400 ($variation_feature_id_1, $variation_feature_id_2) = split /-/,$key_ld; #get the variation_features ids
|
|
401 #add the information to the ld_value array
|
|
402 if (exists $self->{'ldContainer'}->{$key_ld}->{$self->{'_default_population'}}){
|
|
403 $d_prime{'variation1'} = $self->{'variationFeatures'}->{$variation_feature_id_1};
|
|
404 $d_prime{'variation2'} = $self->{'variationFeatures'}->{$variation_feature_id_2};
|
|
405 $d_prime{'d_prime'} = $self->{'ldContainer'}->{$key_ld}->{$self->{'_default_population'}}->{'d_prime'};
|
|
406 $d_prime{'sample_id'} = $self->{'_default_population'};
|
|
407 #and add all the ld information to the final list if exists the value
|
|
408 push @d_primes, \%d_prime;
|
|
409 }
|
|
410
|
|
411 }
|
|
412 return \@d_primes;
|
|
413 }
|
|
414
|
|
415 =head2 get_all_populations
|
|
416
|
|
417 Arg [1] : (optional) Bio::EnsEMBL::Variation::VariationFeature $variationFeature
|
|
418 Arg [2] : (optional) Bio::EnsEMBL::Variation::VariationFeature $variationFeature
|
|
419 Example : $populations = $obj->get_all_populations($vf1,$vf2);
|
|
420 Description : If no arguments provided, returns ALL the populations present in the container. When 2 variation features provided, returns the
|
|
421 population/populations where these variation features occurs
|
|
422 ReturnType : ref to an array of int
|
|
423 Exceptions : throw on incorrect arguments
|
|
424 Caller : general
|
|
425 Status : At Risk
|
|
426
|
|
427 =cut
|
|
428
|
|
429 sub get_all_populations{
|
|
430 my $self = shift;
|
|
431 my $variation_feature_1 = shift;
|
|
432 my $variation_feature_2 = shift;
|
|
433 my %populations;
|
|
434 my @populations;
|
|
435 my $key;
|
|
436
|
|
437 #if no variation provided, return ALL the populations in the container
|
|
438 if (! defined($variation_feature_1) && ! defined($variation_feature_2)){
|
|
439 foreach my $key (keys %{$self->{'ldContainer'}}){
|
|
440 map {$populations{$_}++} keys %{$self->{'ldContainer'}->{$key}};
|
|
441 }
|
|
442 @populations = keys %populations;
|
|
443 }
|
|
444 else{
|
|
445 #first, check if both arguments have been properly provided
|
|
446 if (!ref($variation_feature_1) || !$variation_feature_1->isa('Bio::EnsEMBL::Variation::VariationFeature') || !ref($variation_feature_2) || !$variation_feature_2->isa('Bio::EnsEMBL::Variation::VariationFeature')){
|
|
447 throw("Bio::EnsEMBL::Variation::VariationFeature arguments expected");
|
|
448 }
|
|
449 #if the variation_features are correct, return the list of populations
|
|
450 else{
|
|
451 #find out the key in the ld Hash: vf1 - vf2 or vf2 - vf1
|
|
452 if (exists $self->{'ldContainer'}->{$variation_feature_1->dbID() . '-' . $variation_feature_2->dbID()}){
|
|
453 $key = $variation_feature_1->dbID() . '-' . $variation_feature_2->dbID();
|
|
454 }
|
|
455 else{
|
|
456 $key = $variation_feature_2->dbID() . '-' . $variation_feature_1->dbID();
|
|
457 }
|
|
458 @populations = keys %{$self->{'ldContainer'}->{$key}};
|
|
459 }
|
|
460 }
|
|
461
|
|
462 return \@populations;
|
|
463 }
|
|
464
|
|
465 #returns from the container the population_id with the maximum number of pairwise_ld
|
|
466 sub _get_populations {
|
|
467 my $self = shift;
|
|
468 my %populations;
|
|
469
|
|
470 foreach my $key (keys %{$self->{'ldContainer'}}){
|
|
471 map {$populations{$_}++} keys %{$self->{'ldContainer'}->{$key}};
|
|
472 }
|
|
473 my @sorted_populations = sort{$populations{$b} <=> $populations{$a}} keys %populations;
|
|
474 return @sorted_populations;
|
|
475 }
|
|
476
|
|
477 sub _get_major_population {
|
|
478 my( $pop ) = $_[0]->_get_populations;
|
|
479 return $pop;
|
|
480 }
|
|
481 1;
|
|
482
|