annotate dir_plugins/LocalID.pm @ 3:49397129aec0 draft

Uploaded
author dvanzessen
date Mon, 15 Jul 2019 05:20:39 -0400
parents e545d0a25ffe
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
1 =head1 LICENSE
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
2
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
3 Copyright [1999-2015] Wellcome Trust Sanger Institute and the EMBL-European Bioinformatics Institute
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
4 Copyright [2016-2018] EMBL-European Bioinformatics Institute
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
5
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
6 Licensed under the Apache License, Version 2.0 (the "License");
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
7 you may not use this file except in compliance with the License.
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
8 You may obtain a copy of the License at
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
9
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
10 http://www.apache.org/licenses/LICENSE-2.0
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
11
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
12 Unless required by applicable law or agreed to in writing, software
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
13 distributed under the License is distributed on an "AS IS" BASIS,
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
14 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
15 See the License for the specific language governing permissions and
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
16 limitations under the License.
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
17
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
18 =head1 CONTACT
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
19
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
20 Ensembl <http://www.ensembl.org/info/about/contact/index.html>
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
21
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
22 =cut
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
23
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
24 =head1 NAME
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
25
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
26 LocalID
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
27
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
28 =head1 SYNOPSIS
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
29
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
30 mv LocalID.pm ~/.vep/Plugins
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
31
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
32 ## first run create database
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
33
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
34 # EITHER create from Ensembl variation database
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
35 # VERY slow but includes variant synonyms, if not required see next command
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
36 ./vep -i variant_ids.txt --plugin LocalID,create_db=1 -safe
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
37
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
38 # OR create from cache directory
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
39 # faster but does not include synonyms
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
40 # parameter passed to from_cache may be full path to cache e.g. $HOME/.vep/homo_sapiens/88_GRCh38
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
41 # cache may be tabix converted or in default state (http://www.ensembl.org/info/docs/tools/vep/script/vep_cache.html#convert)
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
42 ./vep -i variant_ids.txt --plugin LocalID,create_db=1,from_cache=1 -safe
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
43
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
44 # subsequent runs
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
45 ./vep -i variant_ids.txt --plugin LocalID
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
46
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
47 # db file can be specified with db=[file]
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
48 # default file name is $HOME/.vep/[species]_[version]_[assembly].variant_ids.sqlite3
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
49 ./vep -i variant_ids.txt --plugin LocalID,db=my_db_file.txt
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
50
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
51 =head1 DESCRIPTION
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
52
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
53 The LocalID plugin allows you to use variant IDs as input without making a database connection.
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
54
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
55 Requires sqlite3.
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
56
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
57 A local sqlite3 database is used to look up variant IDs; this is generated either from Ensembl's
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
58 public database (very slow, but includes synonyms), or from a VEP cache file (faster, excludes
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
59 synonyms).
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
60
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
61 NB this plugin is NOT compatible with the ensembl-tools variant_effect_predictor.pl version of VEP.
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
62
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
63 =cut
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
64
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
65 package LocalID;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
66
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
67 use strict;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
68 use warnings;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
69
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
70 use Bio::EnsEMBL::Variation::Utils::BaseVepPlugin;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
71 use Bio::EnsEMBL::VEP::Parser::ID;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
72 use Bio::EnsEMBL::VEP::Constants;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
73 use Bio::EnsEMBL::VEP::Utils qw(get_compressed_filehandle);
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
74
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
75
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
76 use base qw(Bio::EnsEMBL::Variation::Utils::BaseVepPlugin);
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
77
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
78 sub new {
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
79 my $class = shift;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
80
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
81 my $self = $class->SUPER::new(@_);
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
82
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
83 my $param_hash = $self->params_to_hash();
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
84 my $config = $self->{config};
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
85 my $species = $config->{species} || 'homo_sapiens';
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
86 my $db;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
87
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
88 unless($db = $param_hash->{db}) {
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
89 my $version =
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
90 $config->{db_version} ||
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
91 $config->{cache_version} ||
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
92 $Bio::EnsEMBL::VEP::Constants::VEP_VERSION ||
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
93 'Bio::EnsEMBL::Registry'->software_version ||
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
94 undef;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
95 my $assembly = $config->{assembly};
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
96 my $dir = $param_hash->{dir} || $config->{dir};
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
97
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
98 die("ERROR: Unable to determine software version - if using --offline, add --cache_version [version] or add the ID database name to your --plugin string as \"db=[file]\"\n") unless $version;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
99 die("ERROR: Unable to determine assembly version - if using --offline, add --assembly [version] or add the ID database name to your --plugin string as \"db=[file]\"\n") unless $assembly;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
100 $db = sprintf("%s/%s_%i_%s.variant_ids.sqlite3", $dir, $species, $version, $assembly);
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
101 }
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
102
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
103 # create DB?
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
104 $self->create_db($db, $species, $param_hash) if $param_hash->{create_db};
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
105
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
106 die("ERROR: DB file $db not found - you need to download or create it first, see documentation in plugin file\n") unless -e $db;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
107
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
108 $self->config->{_localid_db_file} = $db;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
109
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
110 return $self;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
111 }
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
112
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
113 sub create_db {
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
114 my ($self, $db, $species, $param_hash) = @_;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
115
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
116 # requites sqlite3 command line tool
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
117 die("ERROR: sqlite3 command not found in path\n") unless `which sqlite3` =~ /\/sqlite3/;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
118
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
119 my $config = $self->{config};
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
120
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
121 die("ERROR: DB file $db already exists - remove and re-run to overwrite\n") if -e $db;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
122
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
123 print STDERR "## LocalID plugin\n # Creating database of variant IDs - this may take some time\n" unless $config->{quiet};
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
124
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
125 my $tmpfile = "$db.tmp$$";
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
126 open my $tmp_handle, ">$tmpfile" or die "ERROR: Unable to write to $tmpfile\n";
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
127
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
128 if(my $cache_dir = $param_hash->{from_cache}) {
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
129
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
130 # attempt to interpret cache dir from command line opts
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
131 if($cache_dir eq '1') {
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
132 my $version =
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
133 $config->{cache_version} ||
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
134 $config->{db_version} ||
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
135 $Bio::EnsEMBL::VEP::Constants::VEP_VERSION ||
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
136 ($config->{reg} ? $config->{reg}->software_version : undef);
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
137 my $assembly = $config->{assembly};
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
138 my $dir = $config->{dir_cache} || $config->{dir};
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
139
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
140 $cache_dir = "$dir\/$species\/$version\_$assembly";
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
141 }
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
142
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
143 print STDERR " # attempting to create from $cache_dir\n" unless $config->{quiet};
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
144 $self->_tmp_file_from_cache($cache_dir, $tmp_handle);
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
145 }
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
146 else {
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
147 print STDERR " # attempting to create from variation database for $species\n" unless $config->{quiet};
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
148 $self->_tmp_file_from_var_db($species, $tmp_handle);
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
149 }
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
150
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
151 close $tmp_handle;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
152
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
153 # create database
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
154 my $dbh = DBI->connect("dbi:SQLite:dbname=$db","","");
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
155 $dbh->do("CREATE TABLE ids(id, chr, start, end, alleles, strand)");
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
156
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
157 # load tmp file into table
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
158 print STDERR " # loading database\n" unless $config->{quiet};
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
159 my $cmd = qq{sqlite3 $db '.import $tmpfile ids'};
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
160 `$cmd 2>&1` and die("ERROR: Failed to import $tmpfile to $db\n");
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
161 unlink($tmpfile);
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
162
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
163 # index
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
164 print STDERR " # indexing database\n" unless $config->{quiet};
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
165 $dbh->do("CREATE INDEX id_idx ON ids(id)");
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
166
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
167 print STDERR " # successfully built database $db\n" unless $config->{quiet};
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
168 }
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
169
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
170 sub _tmp_file_from_cache {
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
171 my ($self, $cache_dir, $tmp_handle) = @_;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
172 my $config = $self->{config};
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
173
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
174 die("ERROR: Cache dir $cache_dir not found or not a directory\n") unless -d $cache_dir;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
175
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
176 # read info
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
177 open INFO, $cache_dir.'/info.txt' or die("ERROR: No info.txt file found in $cache_dir\n");
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
178
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
179 my %cols;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
180 while(<INFO>) {
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
181 next unless /^variation_cols/;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
182 chomp;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
183 my @tmp_cols = split(',', (split("\t", $_))[1]);
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
184 $cols{$tmp_cols[$_]} = $_ for 0..$#tmp_cols;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
185 last;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
186 }
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
187 close INFO;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
188
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
189 # get all chromosome dirs
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
190 opendir DIR, $cache_dir or die("ERROR: Could not read dir $cache_dir\n");
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
191 my @chrs = grep {-d $cache_dir.'/'.$_ && !/^\./} readdir DIR;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
192 closedir DIR;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
193
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
194 foreach my $chr(@chrs) {
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
195 opendir CHR, $cache_dir.'/'.$chr;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
196 my @all_files = grep {/var/ && !/\.(tb|cs)i$/} readdir CHR;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
197 closedir CHR;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
198
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
199 my @files = grep {/all_vars/} @all_files;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
200 @files = @all_files unless @files;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
201
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
202 foreach my $file(@files) {
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
203 my $fh = get_compressed_filehandle($cache_dir.'/'.$chr.'/'.$file, 1);
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
204
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
205 my $delim;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
206
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
207 while(<$fh>) {
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
208 unless($delim) {
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
209 $delim = /\t/ ? "\t" : " ";
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
210 }
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
211
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
212 chomp;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
213 my @split = map {($_ || '') eq '.' ? undef : $_} split($delim);
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
214
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
215 # id, chr, start, end, alleles, strand
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
216 print $tmp_handle join("|",
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
217 $split[$cols{variation_name}],
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
218 $chr,
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
219 $split[$cols{start}],
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
220 $split[$cols{end}] || $split[$cols{start}],
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
221 $split[$cols{allele_string}] || '',
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
222 $split[$cols{strand}] || 1,
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
223 )."\n";
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
224 }
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
225
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
226 close $fh;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
227 }
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
228 }
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
229 }
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
230
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
231 sub _tmp_file_from_var_db {
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
232 my ($self, $species, $tmp_handle) = @_;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
233 my $config = $self->{config};
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
234
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
235 my $var_dbc = Bio::EnsEMBL::Registry->get_adaptor($species, 'variation', 'variation')->db->dbc;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
236
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
237 my $mysql = $var_dbc->prepare(qq{
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
238 SELECT v.name, s.name, vf.seq_region_start, vf.seq_region_end, vf.allele_string, vf.seq_region_strand
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
239 FROM variation v, variation_feature vf, seq_region s
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
240 WHERE v.variation_id = vf.variation_id
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
241 AND vf.seq_region_id = s.seq_region_id
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
242 }, {mysql_use_result => 1});
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
243
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
244 my ($i, $c, $s, $e, $a, $d);
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
245 $mysql->execute();
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
246 $mysql->bind_columns(\$i, \$c, \$s, \$e, \$a, \$d);
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
247 print $tmp_handle join("|", ($i, $c, $s, $e, $a, $d))."\n" while $mysql->fetch();
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
248 $mysql->finish();
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
249
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
250 # do synonyms
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
251 print STDERR "Processing synonyms\n" unless $config->{quiet};
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
252 $mysql = $var_dbc->prepare(qq{
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
253 SELECT v.name, s.name, vf.seq_region_start, vf.seq_region_end, vf.allele_string, vf.seq_region_strand
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
254 FROM variation_synonym v, variation_feature vf, seq_region s
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
255 WHERE v.variation_id = vf.variation_id
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
256 AND vf.seq_region_id = s.seq_region_id
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
257 }, {mysql_use_result => 1});
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
258
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
259 $mysql->execute();
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
260 $mysql->bind_columns(\$i, \$c, \$s, \$e, \$a, \$d);
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
261 print $tmp_handle join("|", ($i, $c, $s, $e, $a, $d))."\n" while $mysql->fetch();
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
262 $mysql->finish();
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
263 }
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
264
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
265 sub run {
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
266 return {};
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
267 }
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
268
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
269 1;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
270
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
271
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
272
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
273
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
274 ###########################################
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
275 ### Redefine methods in existing module ###
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
276 ###########################################
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
277
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
278 package Bio::EnsEMBL::VEP::Parser::ID;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
279
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
280 no warnings qw(redefine);
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
281 sub new {
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
282 my $caller = shift;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
283 my $class = ref($caller) || $caller;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
284
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
285 my $self = $class->SUPER::new(@_);
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
286
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
287 return $self;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
288 }
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
289
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
290 sub create_VariationFeatures {
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
291 my $self = shift;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
292
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
293 my $parser = $self->parser;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
294 $parser->next();
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
295
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
296 $self->skip_empty_lines();
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
297
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
298 return [] unless $parser->{record};
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
299
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
300 $self->line_number($self->line_number + 1);
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
301
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
302 my $id = $parser->get_value;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
303
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
304 # remove whitespace
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
305 $id =~ s/\s+//g;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
306
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
307 my $db = $self->id_db;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
308 my $sth = $self->{_id_sth} ||= $db->prepare("SELECT chr, start, end, alleles, strand FROM ids WHERE id = ?");
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
309 my $ad = $self->{_var_ad} ||= $self->get_adaptor('variation', 'VariationFeature');
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
310
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
311 my @vfs;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
312 my ($c, $s, $e, $a, $d);
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
313 $sth->execute($id);
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
314 $sth->bind_columns(\$c, \$s, \$e, \$a, \$d);
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
315
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
316 push @vfs, Bio::EnsEMBL::Variation::VariationFeature->new_fast({
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
317 start => $s,
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
318 end => $e,
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
319 allele_string => $a,
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
320 strand => $d,
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
321 map_weight => 1,
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
322 adaptor => $ad,
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
323 variation_name => $id,
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
324 chr => $c,
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
325 }) while $sth->fetch;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
326
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
327 $sth->finish();
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
328
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
329 return $self->post_process_vfs(\@vfs);
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
330 }
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
331
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
332 sub id_db {
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
333 my $self = shift;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
334
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
335 unless(exists($self->{_id_db})) {
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
336 throw("ERROR: ID database not defined or detected - possible plugin compile failure\n") unless my $db = $self->config->{_params}->{_localid_db_file};
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
337 throw("ERROR: ID database file $db not found - you need to download or create it first, see documentation in plugin file\n") unless -e $db;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
338 $self->{_id_db} = DBI->connect("dbi:SQLite:dbname=$db","","");
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
339 }
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
340
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
341 return $self->{_id_db};
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
342 }
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
343
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
344 1;