annotate dir_plugins/LD.pm @ 3:49397129aec0 draft

Uploaded
author dvanzessen
date Mon, 15 Jul 2019 05:20:39 -0400
parents e545d0a25ffe
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
1 =head1 LICENSE
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
2
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
3 Copyright [1999-2015] Wellcome Trust Sanger Institute and the EMBL-European Bioinformatics Institute
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
4 Copyright [2016-2018] EMBL-European Bioinformatics Institute
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
5
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
6 Licensed under the Apache License, Version 2.0 (the "License");
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
7 you may not use this file except in compliance with the License.
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
8 You may obtain a copy of the License at
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
9
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
10 http://www.apache.org/licenses/LICENSE-2.0
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
11
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
12 Unless required by applicable law or agreed to in writing, software
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
13 distributed under the License is distributed on an "AS IS" BASIS,
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
14 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
15 See the License for the specific language governing permissions and
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
16 limitations under the License.
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
17
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
18 =head1 CONTACT
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
19
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
20 Ensembl <http://www.ensembl.org/info/about/contact/index.html>
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
21
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
22 =cut
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
23
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
24 =head1 NAME
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
25
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
26 LD
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
27
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
28 =head1 SYNOPSIS
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
29
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
30 mv LD.pm ~/.vep/Plugins
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
31 ./vep -i variations.vcf --plugin LD,1000GENOMES:phase_3:CEU,0.8
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
32
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
33 =head1 DESCRIPTION
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
34
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
35 This is a plugin for the Ensembl Variant Effect Predictor (VEP) that
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
36 finds variants in linkage disequilibrium with any overlapping existing
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
37 variants from the Ensembl variation databases. You can configure the
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
38 population used to calculate the r2 value, and the r2 cutoff used by
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
39 passing arguments to the plugin via the VEP command line (separated
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
40 by commas). This plugin adds a single new entry to the Extra column
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
41 with a comma-separated list of linked variant IDs and the associated
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
42 r2 values, e.g.:
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
43
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
44 LinkedVariants=rs123:0.879,rs234:0.943
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
45
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
46 If no arguments are supplied, the default population used is the CEU
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
47 sample from the 1000 Genomes Project phase 3, and the default r2
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
48 cutoff used is 0.8.
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
49
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
50 WARNING: Calculating LD is a relatively slow procedure, so this will
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
51 slow VEP down considerably when running on large numbers of
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
52 variants. Consider running vep followed by filter_vep to get a smaller
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
53 input set:
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
54
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
55 ./vep -i input.vcf -cache -vcf -o input_vep.vcf
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
56 ./filter_vep -i input_vep.vcf -filter "Consequence is missense_variant" > input_vep_filtered.vcf
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
57 ./vep -i input_vep_filtered.vcf -cache -plugin LD
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
58
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
59 =cut
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
60
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
61 =head1 INSTALLATION
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
62
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
63 LD calculation requires additional installation steps.
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
64
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
65 The JSON perl library is required; see VEP's installation instructions
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
66 for guidance: http://www.ensembl.org/info/docs/tools/vep/script/vep_download.html#additional
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
67
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
68 A binary from the ensembl-variation git repository must be compiled and either
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
69 added to your PATH or specified on the command line. In the ensembl-vep
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
70 directory:
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
71
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
72 export HTSLIB_DIR=${PWD}/htslib
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
73 git clone https://github.com/Ensembl/ensembl-variation
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
74 cd ensembl-variation/C_code
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
75 make
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
76
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
77 You may EITHER add this path to your PATH environment variable (add this line
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
78 to your $HOME/.bashrc to make the change permanent):
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
79
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
80 export PATH=${PATH}:${PWD}
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
81
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
82 OR you may specify the full path to the ld_vcf binary on the vep command line:
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
83
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
84 ./vep -i variations.vcf --plugin LD,1000GENOMES:phase_3:CEU,0.8,$PWD/ensembl-variation/C_code/ld_vcf
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
85
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
86 =cut
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
87
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
88 =head1 DATA
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
89
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
90 By default genotype data to calculate LD is retrieved from tabix-indexed
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
91 VCF files hosted on Ensembl's FTP servers. It is possible to download this
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
92 data to your local machine and have the LD plugin read genotype data from
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
93 there instead, giving faster performance and reducing network traffic.
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
94
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
95 These commands show how to get the data files for GRCh38.
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
96
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
97 mkdir variation_genotype
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
98 cd variation_genotype
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
99 lftp -e "mget ALL.chr*.phase3_shapeit2_mvncall_integrated_v3plus_nounphased.rsID.genotypes.GRCh38_dbSNP.vcf.gz*" ftp://ftp.ensembl.org/pub/data_files/homo_sapiens/GRCh38/variation_genotype/
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
100 cd ..
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
101
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
102 For GRCh37 replace the lftp command with:
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
103
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
104 lftp -e "mget ALL.chr*.phase3_shapeit2_mvncall_integrated_v3plus_nounphased.rsID.genotypes.vcf.gz*" ftp://ftp.ensembl.org/pub/data_files/homo_sapiens/GRCh37/variation_genotype/
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
105
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
106 We must now modify the JSON configuration file used to find the data. Starting
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
107 in the ensembl-vep directory:
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
108
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
109 perl -pi -e "s|ftp://ftp.ensembl.org/pub/data_files/homo_sapiens/GRCh38|$PWD|" Bio/EnsEMBL/Variation/DBSQL/vcf_config.json
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
110
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
111 Or for GRCh37:
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
112
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
113 perl -pi -e "s|ftp://ftp.ensembl.org/pub/data_files/homo_sapiens/GRCh37|$PWD|" Bio/EnsEMBL/Variation/DBSQL/vcf_config.json
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
114
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
115 =cut
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
116
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
117 package LD;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
118
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
119 use strict;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
120 use warnings;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
121
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
122 use Bio::EnsEMBL::Registry;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
123
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
124 use base qw(Bio::EnsEMBL::Variation::Utils::BaseVepPlugin);
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
125
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
126 sub feature_types {
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
127 return ['Feature','Intergenic'];
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
128 }
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
129
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
130 sub get_header_info {
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
131
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
132 my $self = shift;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
133
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
134 return {
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
135 LinkedVariants => "Variants in LD (r2 >= ".$self->{r2_cutoff}.
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
136 ") with overlapping existing variants from the ".
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
137 $self->{pop}->name." population",
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
138 };
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
139 }
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
140
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
141 sub new {
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
142 my $class = shift;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
143
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
144 my $self = $class->SUPER::new(@_);
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
145
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
146 if ($self->config->{offline}) {
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
147 warn "Warning: a connection to the database is required to calculate LD\n";
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
148 }
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
149
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
150 my $reg = 'Bio::EnsEMBL::Registry';
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
151
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
152 # turn on the check for existing variants
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
153
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
154 $self->config->{check_existing} = 1;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
155
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
156 # fetch our population
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
157
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
158 my ($pop_name, $r2_cutoff, $ld_binary) = @{ $self->params };
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
159
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
160 # set some defaults
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
161
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
162 $pop_name ||= '1000GENOMES:phase_3:CEU';
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
163
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
164 $r2_cutoff = 0.8 unless defined $r2_cutoff;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
165
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
166 my $pop_adap = $reg->get_adaptor('human', 'variation', 'population')
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
167 || die "Failed to get population adaptor\n";
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
168
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
169 my $valid_pops = $pop_adap->fetch_all_LD_Populations();
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
170 my ($pop) = grep {$_->name eq $pop_name} @$valid_pops;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
171 die "Invalid population '$pop_name'; valid populations are:\n".join(", ", map {$_->name} @$valid_pops)."\n" unless $pop;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
172
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
173 $self->{pop} = $pop;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
174 $self->{r2_cutoff} = $r2_cutoff;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
175
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
176 # prefetch the necessary adaptors
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
177
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
178 my $ld_adap = $reg->get_adaptor('human', 'variation', 'ldfeaturecontainer')
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
179 || die "Failed to get LD adaptor\n";
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
180 $ld_adap->db->use_vcf(1);
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
181 my $var_adap = $reg->get_adaptor('human', 'variation', 'variation')
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
182 || die "Failed to get variation adaptor\n";
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
183
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
184 my $var_feat_adap = $reg->get_adaptor('human', 'variation', 'variationfeature')
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
185 || die "Failed to get variation feature adaptor\n";
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
186
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
187 if($ld_binary) {
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
188 die("Specified LD binary \"$ld_binary\" does not exist\n") unless -e $ld_binary;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
189 $Bio::EnsEMBL::Variation::DBSQL::LDFeatureContainerAdaptor::VCF_BINARY_FILE = $ld_binary;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
190 }
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
191
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
192 $self->{ld_adap} = $ld_adap;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
193 $self->{var_adap} = $var_adap;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
194 $self->{var_feat_adap} = $var_feat_adap;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
195
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
196 return $self;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
197 }
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
198
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
199 sub run {
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
200 my ($self, $vfoa, $line_hash) = @_;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
201
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
202 # fetch the existing variants from the line hash
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
203 return {} unless $line_hash->{Existing_variation};
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
204
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
205 my @vars = ref($line_hash->{Existing_variation}) eq 'ARRAY' ? @{$line_hash->{Existing_variation}} : split(',', $line_hash->{Existing_variation});
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
206
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
207 my @linked;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
208
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
209
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
210 for my $var (@vars) {
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
211
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
212 # check cache
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
213 my $res;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
214
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
215 if($self->{cache}) {
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
216 ($res) = grep {$_->{var} eq $var} @{$self->{cache}};
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
217 }
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
218
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
219 unless($res) {
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
220 my @this_linked;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
221
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
222 # fetch a variation for each overlapping variant ID
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
223 if (my $v = $self->{var_adap}->fetch_by_name($var)) {
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
224
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
225 # and fetch the associated variation features
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
226
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
227 for my $vf (@{ $self->{var_feat_adap}->fetch_all_by_Variation($v) }) {
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
228
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
229 # we're only interested in variation features that overlap our variant
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
230
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
231 if ($vf->slice->name eq $vfoa->variation_feature->slice->name) {
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
232
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
233 # fetch an LD feature container for this variation feature and our preconfigured population
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
234 if (my $ldfc = $self->{ld_adap}->fetch_by_VariationFeature($vf, $self->{pop})) {
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
235
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
236 # loop over all the linked variants
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
237 # we pass 1 to get_all_ld_values() so that it doesn't lazy load
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
238 # VariationFeature objects - we only need the name here anyway
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
239 for my $result (@{ $ldfc->get_all_ld_values(1) }) {
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
240
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
241 # apply our r2 cutoff
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
242
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
243 if ($result->{r2} >= $self->{r2_cutoff}) {
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
244
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
245 my $v1 = $result->{variation_name1};
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
246 my $v2 = $result->{variation_name2};
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
247
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
248 # I'm not sure which of these are the query variant, so just check the names
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
249
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
250 my $linked = $v1 eq $var ? $v2 : $v1;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
251
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
252 push @this_linked, sprintf("%s:%.3f", $linked, $result->{r2});
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
253 }
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
254 }
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
255 }
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
256 }
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
257 }
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
258 }
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
259
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
260 # cache it
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
261 $res = {
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
262 var => $var,
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
263 linked => \@this_linked
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
264 };
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
265
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
266 push @{$self->{cache}}, $res;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
267 shift @{$self->{cache}} while scalar @{$self->{cache}} > 50;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
268 }
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
269
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
270 push @linked, @{$res->{linked}};
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
271 }
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
272
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
273 return scalar @linked ? {LinkedVariants => join(',', @linked)} : {};
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
274 }
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
275
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
276 1;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
277