comparison dir_plugins/Conservation.pm @ 3:49397129aec0 draft

Uploaded
author dvanzessen
date Mon, 15 Jul 2019 05:20:39 -0400
parents e545d0a25ffe
children
comparison
equal deleted inserted replaced
2:17c98d091710 3:49397129aec0
1 =head1 LICENSE
2
3 Copyright [1999-2015] Wellcome Trust Sanger Institute and the EMBL-European Bioinformatics Institute
4 Copyright [2016-2018] EMBL-European Bioinformatics Institute
5
6 Licensed under the Apache License, Version 2.0 (the "License");
7 you may not use this file except in compliance with the License.
8 You may obtain a copy of the License at
9
10 http://www.apache.org/licenses/LICENSE-2.0
11
12 Unless required by applicable law or agreed to in writing, software
13 distributed under the License is distributed on an "AS IS" BASIS,
14 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 See the License for the specific language governing permissions and
16 limitations under the License.
17
18 =head1 CONTACT
19
20 Ensembl <http://www.ensembl.org/info/about/contact/index.html>
21
22 =cut
23
24 =head1 NAME
25
26 Conservation
27
28 =head1 SYNOPSIS
29
30 mv Conservation.pm ~/.vep/Plugins
31 ./vep -i variations.vcf --plugin Conservation,GERP_CONSERVATION_SCORE,mammals
32
33 =head1 DESCRIPTION
34
35 This is a plugin for the Ensembl Variant Effect Predictor (VEP) that
36 retrieves a conservation score from the Ensembl Compara databases
37 for variant positions. You can specify the method link type and
38 species sets as command line options, the default is to fetch GERP
39 scores from the EPO 35 way mammalian alignment (please refer to the
40 Compara documentation for more details of available analyses).
41
42 If a variant affects multiple nucleotides the average score for the
43 position will be returned, and for insertions the average score of
44 the 2 flanking bases will be returned.
45
46 The plugin requires the ensembl-compara API module to be installed;
47 see http://www.ensembl.org/info/docs/api/index.html
48
49 =cut
50
51 package Conservation;
52
53 use strict;
54 use warnings;
55
56 use Bio::EnsEMBL::Registry;
57 use Bio::EnsEMBL::Slice;
58
59 use base qw(Bio::EnsEMBL::Variation::Utils::BaseVepPlugin);
60
61 sub version {
62 return '2.3';
63 }
64
65 sub feature_types {
66 return ['Feature','Intergenic'];
67 }
68
69 sub get_header_info {
70
71 my $self = shift;
72
73 return {
74 Conservation => "The conservation score for this site (method_link_type=\"".
75 $self->{method_link_type}."\", species_set=\"".$self->{species_set}."\")",
76 };
77 }
78
79 sub new {
80 my $class = shift;
81
82 my $self = $class->SUPER::new(@_);
83
84 my $params = $self->params;
85
86 # REST API passes 1 as first param
87 shift @$params if $params->[0] && $params->[0] eq '1';
88
89 $self->{method_link_type} = $params->[0] || 'GERP_CONSERVATION_SCORE';
90 $self->{species_set} = $params->[1] || 'mammals';
91
92 my $config = $self->{config};
93 my $reg = 'Bio::EnsEMBL::Registry';
94
95 # reconnect to DB without species param
96 if($config->{host}) {
97 $reg->load_registry_from_db(
98 -host => $config->{host},
99 -user => $config->{user},
100 -pass => $config->{password},
101 -port => $config->{port},
102 -db_version => $config->{db_version},
103 -no_cache => $config->{no_slice_cache},
104 );
105 }
106
107 my $mlss_adap = $config->{mlssa} ||= $reg->get_adaptor('Multi', 'compara', 'MethodLinkSpeciesSet')
108 or die "Failed to connect to compara database\n";
109
110 $self->{mlss} = $mlss_adap->fetch_by_method_link_type_species_set_name($self->{method_link_type}, $self->{species_set})
111 or die "Failed to fetch MLSS for ".$self->{method_link_type}." and ".$self->{species_set}."\n";
112
113 $self->{cs_adap} = $config->{cosa} ||= $reg->get_adaptor('Multi', 'compara', 'ConservationScore')
114 or die "Failed to fetch conservation adaptor\n";
115
116 return $self;
117 }
118
119 sub run {
120 my ($self, $bvfoa) = @_;
121
122 my $bvf = $bvfoa->base_variation_feature;
123
124 # we cache the score on the BaseVariationFeature so we don't have to
125 # fetch it multiple times if this variant overlaps multiple Features
126
127 unless (exists $bvf->{_conservation_score}) {
128
129 my $slice;
130
131 my $true_snp = 0;
132
133 if ($bvf->{end} >= $bvf->{start}) {
134
135 if ($bvf->{start} == $bvf->{end}) {
136
137 # work around a bug in the compara API that means you can't fetch
138 # conservation scores for 1bp slices by creating a 2bp slice for
139 # SNPs and then ignoring the score returned for the second position
140
141 my $s = $bvf->slice;
142
143 $slice = Bio::EnsEMBL::Slice->new(
144 -seq_region_name => $s->seq_region_name,
145 -seq_region_length => $s->seq_region_length,
146 -coord_system => $s->coord_system,
147 -start => $bvf->{start},
148 -end => $bvf->{end} + 1,
149 -strand => $bvf->{strand},
150 -adaptor => $s->adaptor
151 );
152
153 $true_snp = 1;
154 }
155 else {
156
157 # otherwise, just get a slice that covers our variant feature
158
159 $slice = $bvf->feature_Slice;
160 }
161 }
162 else {
163
164 # this is an insertion, we return the average score of the flanking
165 # bases, so we create a 2bp slice around the insertion site
166
167 my $s = $bvf->slice;
168
169 $slice = Bio::EnsEMBL::Slice->new(
170 -seq_region_name => $s->seq_region_name,
171 -seq_region_length => $s->seq_region_length,
172 -coord_system => $s->coord_system,
173 -start => $bvf->{end},
174 -end => $bvf->{start},
175 -strand => $bvf->{strand},
176 -adaptor => $s->adaptor
177 );
178 }
179
180 my $scores = $self->{cs_adap}->fetch_all_by_MethodLinkSpeciesSet_Slice(
181 $self->{mlss}, # our MLSS for the conservation metric and the set of species
182 $slice, # our slice
183 ($slice->end - $slice->start + 1), # the number of scores we want back (one for each base)
184 );
185
186 if (@$scores > 0) {
187
188 # we use the simple average of the diff_scores as the overall score
189
190 pop @$scores if $true_snp; # get rid of our spurious second score for SNPs
191
192 if (@$scores > 0) {
193
194 my $tot_score = 0;
195
196 $tot_score += $_->diff_score for @$scores;
197
198 $tot_score /= @$scores;
199
200 $bvf->{_conservation_score} = sprintf "%.3f", $tot_score;
201 }
202 else {
203 $bvf->{_conservation_score} = undef;
204 }
205 }
206 else {
207 $bvf->{_conservation_score} = undef;
208 }
209 }
210
211 if (defined $bvf->{_conservation_score}) {
212 return {
213 Conservation => $bvf->{_conservation_score}
214 };
215 }
216 else {
217 return {};
218 }
219 }
220
221 1;
222