annotate dir_plugins/ExAC.pm @ 10:f594c6bed58f draft default tip

Uploaded
author dvanzessen
date Tue, 21 Apr 2020 11:40:19 +0000
parents e545d0a25ffe
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
1 =head1 LICENSE
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
2
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
3 Copyright [1999-2015] Wellcome Trust Sanger Institute and the EMBL-European Bioinformatics Institute
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
4 Copyright [2016-2018] EMBL-European Bioinformatics Institute
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
5
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
6 Licensed under the Apache License, Version 2.0 (the "License");
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
7 you may not use this file except in compliance with the License.
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
8 You may obtain a copy of the License at
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
9
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
10 http://www.apache.org/licenses/LICENSE-2.0
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
11
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
12 Unless required by applicable law or agreed to in writing, software
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
13 distributed under the License is distributed on an "AS IS" BASIS,
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
14 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
15 See the License for the specific language governing permissions and
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
16 limitations under the License.
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
17
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
18 =head1 CONTACT
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
19
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
20 Ensembl <http://www.ensembl.org/info/about/contact/index.html>
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
21
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
22 =cut
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
23
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
24 =head1 NAME
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
25
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
26 ExAC
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
27
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
28 =head1 SYNOPSIS
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
29
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
30 mv ExAC.pm ~/.vep/Plugins
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
31 ./vep -i variations.vcf --plugin ExAC,/path/to/ExAC/ExAC.r0.3.sites.vep.vcf.gz
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
32 ./vep -i variations.vcf --plugin ExAC,/path/to/ExAC/ExAC.r0.3.sites.vep.vcf.gz,AC
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
33 ./vep -i variations.vcf --plugin ExAC,/path/to/ExAC/ExAC.r0.3.sites.vep.vcf.gz,,AN
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
34 ./vep -i variations.vcf --plugin ExAC,/path/to/ExAC/ExAC.r0.3.sites.vep.vcf.gz,AC,AN
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
35
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
36
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
37
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
38 =head1 DESCRIPTION
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
39
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
40 A VEP plugin that retrieves ExAC allele frequencies.
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
41
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
42 Visit ftp://ftp.broadinstitute.org/pub/ExAC_release/current to download the latest ExAC VCF.
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
43
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
44 Note that the currently available version of the ExAC data file (0.3) is only available
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
45 on the GRCh37 assembly; therefore it can only be used with this plugin when using the
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
46 VEP on GRCh37. See http://www.ensembl.org/info/docs/tools/vep/script/vep_other.html#assembly
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
47
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
48 The tabix utility must be installed in your path to use this plugin.
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
49
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
50 The plugin takes 3 command line arguments. Second and third arguments are not mandatory. If AC specified as second
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
51 argument Allele counts per population will be included in output. If AN specified as third argument Allele specific
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
52 chromosome counts will be included in output.
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
53
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
54
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
55 =cut
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
56
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
57 package ExAC;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
58
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
59 use strict;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
60 use warnings;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
61
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
62 use Bio::EnsEMBL::Utils::Sequence qw(reverse_comp);
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
63 use Bio::EnsEMBL::Variation::Utils::Sequence qw(get_matched_variant_alleles);
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
64
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
65 use Bio::EnsEMBL::Variation::Utils::VEP qw(parse_line get_slice);
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
66
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
67 use Bio::EnsEMBL::Variation::Utils::BaseVepPlugin;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
68
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
69 use base qw(Bio::EnsEMBL::Variation::Utils::BaseVepPlugin);
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
70
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
71 sub new {
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
72 my $class = shift;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
73
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
74 my $self = $class->SUPER::new(@_);
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
75
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
76 # test tabix
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
77 die "ERROR: tabix does not seem to be in your path\n" unless `which tabix 2>&1` =~ /tabix$/;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
78
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
79 # get ExAC file
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
80 my $file = $self->params->[0];
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
81
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
82 # get AC,AN options
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
83 if (exists($self->params->[1]) && $self->params->[1] eq 'AC'){
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
84 $self->{display_ac} = 1;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
85 }
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
86 else {
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
87 $self->{display_ac} = 0;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
88 }
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
89
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
90 if (exists($self->params->[2]) && $self->params->[2] eq 'AN'){
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
91 $self->{display_an} = 1;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
92 }
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
93 else {
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
94 $self->{display_an} = 0;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
95 }
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
96
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
97 # remote files?
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
98 if($file =~ /tp\:\/\//) {
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
99 my $remote_test = `tabix -f $file 1:1-1 2>&1`;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
100 print STDERR "$remote_test\n";
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
101 # if($remote_test && $remote_test !~ /get_local_version/) {
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
102 # die "$remote_test\nERROR: Could not find file or index file for remote annotation file $file\n";
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
103 # }
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
104 }
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
105
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
106 # check files exist
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
107 else {
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
108 die "ERROR: ExAC file $file not found; you can download it from ftp://ftp.broadinstitute.org/pub/ExAC_release/current\n" unless -e $file;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
109 die "ERROR: Tabix index file $file\.tbi not found - perhaps you need to create it first?\n" unless -e $file.'.tbi';
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
110 }
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
111
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
112 $self->{file} = $file;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
113
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
114 return $self;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
115 }
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
116
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
117 sub feature_types {
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
118 return ['Feature','Intergenic'];
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
119 }
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
120
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
121 sub get_header_info {
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
122 my $self = shift;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
123
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
124 if(!exists($self->{header_info})) {
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
125 open IN, "tabix -f -h ".$self->{file}." 1:1-1 |";
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
126
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
127 my %headers = ();
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
128 my @lines = <IN>;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
129
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
130 while(my $line = shift @lines) {
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
131 if($line =~ /ID\=AC(\_[A-Zdj]+)?\,.*\"(.+)\"/) {
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
132 my ($pop, $desc) = ($1, $2);
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
133
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
134 $desc =~ s/Counts?/frequency/i;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
135 $pop ||= '';
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
136
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
137 my $field_name = 'ExAC_AF'.$pop;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
138 $headers{$field_name} = 'ExAC '.$desc;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
139
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
140 if ($self->{display_ac}){
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
141 $field_name = 'ExAC_AC'.$pop;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
142 $headers{$field_name} = 'ExAC'.$pop.' Allele count';
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
143 }
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
144 if ($self->{display_an}){
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
145 $field_name = 'ExAC_AN'.$pop;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
146 $headers{$field_name} = 'ExAC'.$pop.' Allele number';
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
147 }
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
148
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
149 # store this header on self
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
150 push @{$self->{headers}}, 'AC'.$pop;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
151 }
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
152 }
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
153
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
154 close IN;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
155
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
156 die "ERROR: No valid headers found in ExAC VCF file\n" unless scalar keys %headers;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
157
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
158 $self->{header_info} = \%headers;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
159 }
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
160
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
161 return $self->{header_info};
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
162 }
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
163
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
164 sub run {
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
165 my ($self, $tva) = @_;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
166 # make sure headers have been loaded
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
167 $self->get_header_info();
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
168
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
169 my $vf = $tva->variation_feature;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
170 my $name = $vf->variation_name;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
171
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
172 # get allele, reverse comp if needed
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
173 my $allele;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
174
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
175 $allele = $tva->variation_feature_seq;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
176 reverse_comp(\$allele) if $vf->{strand} < 0;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
177
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
178 # adjust coords to account for VCF-like storage of indels
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
179 my ($s, $e) = ($vf->{start} - 1, $vf->{end} + 1);
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
180
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
181 my $vf_chr = $vf->{chr};
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
182 $vf_chr =~ s/chr//;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
183 my $pos_string = sprintf("%s:%i-%i", $vf_chr, $s, $e);
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
184
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
185
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
186 # clear cache if it looks like the coords are the same
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
187 # but allele type is different
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
188 delete $self->{cache} if
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
189 defined($self->{cache}->{$pos_string}) &&
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
190 scalar keys %{$self->{cache}->{$pos_string}} &&
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
191 !defined($self->{cache}->{$pos_string}->{$allele});
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
192
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
193 my $data = {};
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
194
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
195 # cached?
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
196 if(defined($self->{cache}) && defined($self->{cache}->{$pos_string})) {
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
197 $data = $self->{cache}->{$pos_string};
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
198 }
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
199
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
200 # read from file
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
201 else {
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
202 open TABIX, sprintf("tabix -f %s %s |", $self->{file}, $pos_string);
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
203
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
204 while(<TABIX>) {
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
205 chomp;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
206 s/\r$//g;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
207 # parse VCF line into a VariationFeature object
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
208 my ($vcf_vf) = @{parse_line({format => 'vcf', minimal => 1}, $_)};
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
209
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
210 # check parsed OK
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
211 next unless $vcf_vf && $vcf_vf->isa('Bio::EnsEMBL::Variation::VariationFeature');
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
212
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
213 my @vcf_alleles = split /\//, $vcf_vf->allele_string;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
214 my $ref_allele = shift @vcf_alleles;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
215 my $vcf_vf_start = $vcf_vf->{start};
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
216 my $vcf_vf_end = $vcf_vf->{end};
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
217
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
218 my @vf_alleles = split /\//, $vf->allele_string;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
219 my $vf_ref_allele = shift @vf_alleles;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
220
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
221 if ($vcf_vf_start != $vf->{start} || $vcf_vf_end != $vf->{end}) {
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
222 my $matched_alleles = get_matched_variant_alleles({ref => $vf_ref_allele, alts => [$allele], pos => $vf->{start}}, {ref => $ref_allele, alts => \@vcf_alleles, pos => $vcf_vf_start});
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
223
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
224 next unless (@$matched_alleles);
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
225 # We only match one alt allele from the input VF against alleles from the VCF line. b_allele is the matched allele from the VCF alt alleles
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
226 $allele = $matched_alleles->[0]->{b_allele};
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
227 }
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
228 # iterate over required headers
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
229 HEADER:
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
230 foreach my $h(@{$self->{headers} || []}) {
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
231 my $total_ac = 0;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
232
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
233 if(/$h\=([0-9\,]+)/) {
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
234
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
235 # grab AC
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
236 my @ac = split /\,/, $1;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
237 next unless scalar @ac == scalar @vcf_alleles;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
238
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
239 # now sed header to get AN
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
240 my $anh = $h;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
241 $anh =~ s/AC/AN/;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
242
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
243 my $afh = $h;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
244 $afh =~ s/AC/AF/;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
245
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
246 # get AC from header
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
247 my $ach = $h;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
248
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
249 if(/$anh\=([0-9\,]+)/) {
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
250
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
251 # grab AN
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
252 my @an = split /\,/, $1;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
253 next unless @an;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
254 my $an;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
255
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
256 foreach my $a(@vcf_alleles) {
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
257 my $ac = shift @ac;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
258 $an = shift @an if @an;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
259
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
260 $total_ac += $ac;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
261 if ($self->{display_ac}){
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
262 $data->{$a}->{'ExAC_'.$ach} = $ac;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
263 }
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
264 if ($self->{display_an}){
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
265 $data->{$a}->{'ExAC_'.$anh} = $an;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
266 }
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
267
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
268 $data->{$a}->{'ExAC_'.$afh} = sprintf("%.3g", $ac / $an) if $an;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
269 }
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
270
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
271 # use total to get ref allele freq
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
272 if ($self->{display_ac}){
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
273 $data->{$ref_allele}->{'ExAC_'.$ach} = $total_ac;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
274 }
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
275 if ($self->{display_an}){
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
276 $data->{$ref_allele}->{'ExAC_'.$anh} = $an;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
277 }
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
278 $data->{$ref_allele}->{'ExAC_'.$afh} = sprintf("%.3g", 1 - ($total_ac / $an)) if $an;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
279 }
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
280 }
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
281 }
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
282 }
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
283
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
284 close TABIX;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
285 }
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
286
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
287 # overwrite cache
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
288 $self->{cache} = {$pos_string => $data};
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
289 return defined($data->{$allele}) ? $data->{$allele} : {};
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
290 }
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
291
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
292 1;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
293