annotate variant_effect_predictor/variant_effect_predictor.pl @ 0:1f6dce3d34e0

Uploaded
author mahtabm
date Thu, 11 Apr 2013 02:01:53 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1 #!/usr/bin/perl
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3 =head1 LICENSE
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
5 Copyright (c) 1999-2012 The European Bioinformatics Institute and
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
6 Genome Research Limited. All rights reserved.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
7
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
8 This software is distributed under a modified Apache license.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
9 For license details, please see
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
10
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
11 http://www.ensembl.org/info/about/code_licence.html
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
12
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
13 =head1 CONTACT
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
14
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
15 Please email comments or questions to the public Ensembl
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
16 developers list at <dev@ensembl.org>.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
17
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
18 Questions may also be sent to the Ensembl help desk at
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
19 <helpdesk@ensembl.org>.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
20
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
21 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
22
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
23 =head1 NAME
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
24
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
25 Variant Effect Predictor - a script to predict the consequences of genomic variants
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
26
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
27 http://www.ensembl.org/info/docs/variation/vep/vep_script.html
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
28
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
29 Version 2.6
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
30
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
31 by Will McLaren (wm2@ebi.ac.uk)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
32 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
33
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
34 use strict;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
35 use Getopt::Long;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
36 use FileHandle;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
37 use FindBin qw($Bin);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
38 use lib $Bin;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
39
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
40 use Bio::EnsEMBL::Variation::Utils::Sequence qw(unambiguity_code);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
41 use Bio::EnsEMBL::Variation::Utils::VEP qw(
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
42 parse_line
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
43 vf_to_consequences
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
44 validate_vf
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
45 convert_to_vcf
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
46 load_dumped_adaptor_cache
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
47 dump_adaptor_cache
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
48 get_all_consequences
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
49 get_slice
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
50 build_full_cache
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
51 read_cache_info
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
52 get_time
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
53 debug
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
54 @OUTPUT_COLS
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
55 @REG_FEAT_TYPES
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
56 %FILTER_SHORTCUTS
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
57 );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
58
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
59 # global vars
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
60 my $VERSION = '2.6';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
61
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
62
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
63 # define headers that would normally go in the extra field
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
64 # keyed on the config parameter used to turn it on
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
65 my %extra_headers = (
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
66 protein => ['ENSP'],
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
67 canonical => ['CANONICAL'],
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
68 ccds => ['CCDS'],
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
69 hgvs => ['HGVSc','HGVSp'],
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
70 hgnc => ['HGNC'],
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
71 sift => ['SIFT'],
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
72 polyphen => ['PolyPhen'],
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
73 numbers => ['EXON','INTRON'],
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
74 domains => ['DOMAINS'],
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
75 regulatory => ['MOTIF_NAME','MOTIF_POS','HIGH_INF_POS','MOTIF_SCORE_CHANGE'],
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
76 cell_type => ['CELL_TYPE'],
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
77 individual => ['IND'],
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
78 xref_refseq => ['RefSeq'],
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
79 check_svs => ['SV'],
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
80 check_frequency => ['FREQS'],
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
81 gmaf => ['GMAF'],
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
82 user => ['DISTANCE'],
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
83 );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
84
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
85 my %extra_descs = (
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
86 'CANONICAL' => 'Indicates if transcript is canonical for this gene',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
87 'CCDS' => 'Indicates if transcript is a CCDS transcript',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
88 'HGNC' => 'HGNC gene identifier',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
89 'ENSP' => 'Ensembl protein identifer',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
90 'HGVSc' => 'HGVS coding sequence name',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
91 'HGVSp' => 'HGVS protein sequence name',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
92 'SIFT' => 'SIFT prediction',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
93 'PolyPhen' => 'PolyPhen prediction',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
94 'EXON' => 'Exon number(s) / total',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
95 'INTRON' => 'Intron number(s) / total',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
96 'DOMAINS' => 'The source and identifer of any overlapping protein domains',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
97 'MOTIF_NAME' => 'The source and identifier of a transcription factor binding profile (TFBP) aligned at this position',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
98 'MOTIF_POS' => 'The relative position of the variation in the aligned TFBP',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
99 'HIGH_INF_POS' => 'A flag indicating if the variant falls in a high information position of the TFBP',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
100 'MOTIF_SCORE_CHANGE' => 'The difference in motif score of the reference and variant sequences for the TFBP',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
101 'CELL_TYPE' => 'List of cell types and classifications for regulatory feature',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
102 'IND' => 'Individual name',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
103 'SV' => 'IDs of overlapping structural variants',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
104 'FREQS' => 'Frequencies of overlapping variants used in filtering',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
105 'GMAF' => 'Minor allele and frequency of existing variation in 1000 Genomes Phase 1',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
106 'DISTANCE' => 'Shortest distance from variant to transcript',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
107 );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
108
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
109 # set output autoflush for progress bars
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
110 $| = 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
111
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
112 # configure from command line opts
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
113 my $config = &configure(scalar @ARGV);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
114
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
115 # run the main sub routine
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
116 &main($config);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
117
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
118 # this is the main sub-routine - it needs the configured $config hash
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
119 sub main {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
120 my $config = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
121
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
122 debug("Starting...") unless defined $config->{quiet};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
123
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
124 $config->{start_time} = time();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
125 $config->{last_time} = time();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
126
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
127 my $tr_cache = {};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
128 my $rf_cache = {};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
129
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
130 # create a hash to hold slices so we don't get the same one twice
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
131 my %slice_cache = ();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
132
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
133 my @vfs;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
134 my ($vf_count, $total_vf_count);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
135 my $in_file_handle = $config->{in_file_handle};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
136
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
137 # initialize line number in config
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
138 $config->{line_number} = 0;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
139
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
140 # read the file
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
141 while(<$in_file_handle>) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
142 chomp;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
143
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
144 $config->{line_number}++;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
145
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
146 # header line?
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
147 if(/^\#/) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
148
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
149 # retain header lines if we are outputting VCF
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
150 if(defined($config->{vcf})) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
151 push @{$config->{headers}}, $_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
152 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
153
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
154 # line with sample labels in VCF
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
155 if(defined($config->{individual}) && /^#CHROM/) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
156 my @split = split /\s+/;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
157
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
158 # no individuals
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
159 die("ERROR: No individual data found in VCF\n") if scalar @split <= 9;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
160
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
161 # get individual column indices
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
162 my %ind_cols = map {$split[$_] => $_} (9..$#split);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
163
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
164 # all?
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
165 if(scalar @{$config->{individual}} == 1 && $config->{individual}->[0] =~ /^all$/i) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
166 $config->{ind_cols} = \%ind_cols;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
167 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
168 else {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
169 my %new_ind_cols;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
170
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
171 # check we have specified individual(s)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
172 foreach my $ind(@{$config->{individual}}) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
173 die("ERROR: Individual named \"$ind\" not found in VCF\n") unless defined $ind_cols{$ind};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
174 $new_ind_cols{$ind} = $ind_cols{$ind};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
175 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
176
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
177 $config->{ind_cols} = \%new_ind_cols;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
178 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
179 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
180
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
181 next;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
182 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
183
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
184 # configure output file
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
185 $config->{out_file_handle} ||= &get_out_file_handle($config);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
186
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
187 # some lines (pileup) may actually parse out into more than one variant
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
188 foreach my $vf(@{&parse_line($config, $_)}) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
189
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
190 $vf->{_line} = $_ ;#if defined($config->{vcf}) || defined($config->{original});
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
191
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
192 # now get the slice
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
193 if(!defined($vf->{slice})) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
194 my $slice;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
195
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
196 # don't get slices if we're using cache
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
197 # we can steal them from transcript objects later
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
198 if((!defined($config->{cache}) && !defined($config->{whole_genome})) || defined($config->{check_ref}) || defined($config->{convert})) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
199
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
200 # check if we have fetched this slice already
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
201 if(defined $slice_cache{$vf->{chr}}) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
202 $slice = $slice_cache{$vf->{chr}};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
203 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
204
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
205 # if not create a new one
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
206 else {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
207
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
208 $slice = &get_slice($config, $vf->{chr});
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
209
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
210 # if failed, warn and skip this line
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
211 if(!defined($slice)) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
212 warn("WARNING: Could not fetch slice named ".$vf->{chr}." on line ".$config->{line_number}."\n") unless defined $config->{quiet};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
213 next;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
214 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
215
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
216 # store the hash
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
217 $slice_cache{$vf->{chr}} = $slice;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
218 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
219 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
220
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
221 $vf->{slice} = $slice;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
222 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
223
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
224 # validate the VF
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
225 next unless validate_vf($config, $vf);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
226
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
227 # make a name if one doesn't exist
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
228 $vf->{variation_name} ||= $vf->{chr}.'_'.$vf->{start}.'_'.($vf->{allele_string} || $vf->{class_SO_term});
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
229
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
230 # jump out to convert here
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
231 if(defined($config->{convert})) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
232 &convert_vf($config, $vf);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
233 next;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
234 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
235
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
236 if(defined $config->{whole_genome}) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
237 push @vfs, $vf;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
238 $vf_count++;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
239 $total_vf_count++;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
240
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
241 if($vf_count == $config->{buffer_size}) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
242 debug("Read $vf_count variants into buffer") unless defined($config->{quiet});
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
243
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
244 print_line($config, $_) foreach @{get_all_consequences($config, \@vfs)};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
245
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
246 # calculate stats
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
247 my $total_rate = sprintf("%.0f vars/sec", $total_vf_count / ((time() - $config->{start_time}) || 1));
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
248 my $rate = sprintf("%.0f vars/sec", $vf_count / ((time() - $config->{last_time}) || 1));
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
249 $config->{last_time} = time();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
250
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
251 debug("Processed $total_vf_count total variants ($rate, $total_rate total)") unless defined($config->{quiet});
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
252
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
253 @vfs = ();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
254 $vf_count = 0;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
255 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
256 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
257 else {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
258 print_line($config, $_) foreach @{vf_to_consequences($config, $vf)};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
259 $vf_count++;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
260 $total_vf_count++;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
261 debug("Processed $vf_count variants") if $vf_count =~ /0$/ && defined($config->{verbose});
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
262 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
263 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
264 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
265
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
266 # if in whole-genome mode, finish off the rest of the buffer
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
267 if(defined $config->{whole_genome} && scalar @vfs) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
268 debug("Read $vf_count variants into buffer") unless defined($config->{quiet});
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
269
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
270 print_line($config, $_) foreach @{get_all_consequences($config, \@vfs)};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
271
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
272 # calculate stats
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
273 my $total_rate = sprintf("%.0f vars/sec", $total_vf_count / ((time() - $config->{start_time}) || 1));
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
274 my $rate = sprintf("%.0f vars/sec", $vf_count / ((time() - $config->{last_time}) || 1));
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
275 $config->{last_time} = time();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
276
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
277 debug("Processed $total_vf_count total variants ($rate, $total_rate total)") unless defined($config->{quiet});
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
278
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
279 debug($config->{filter_count}, "/$total_vf_count variants remain after filtering") if defined($config->{filter}) && !defined($config->{quiet});
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
280 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
281
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
282 debug("Executed ", defined($Bio::EnsEMBL::DBSQL::StatementHandle::count_queries) ? $Bio::EnsEMBL::DBSQL::StatementHandle::count_queries : 'unknown number of', " SQL statements") if defined($config->{count_queries}) && !defined($config->{quiet});
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
283
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
284 debug("Finished!") unless defined $config->{quiet};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
285 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
286
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
287 # sets up configuration hash that is used throughout the script
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
288 sub configure {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
289 my $args = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
290
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
291 my $config = {};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
292
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
293 GetOptions(
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
294 $config,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
295 'help', # displays help message
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
296
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
297 # input options,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
298 'config=s', # config file name
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
299 'input_file|i=s', # input file name
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
300 'format=s', # input file format
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
301
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
302 # DB options
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
303 'species=s', # species e.g. human, homo_sapiens
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
304 'registry=s', # registry file
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
305 'host=s', # database host
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
306 'port=s', # database port
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
307 'user=s', # database user name
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
308 'password=s', # database password
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
309 'db_version=i', # Ensembl database version to use e.g. 62
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
310 'genomes', # automatically sets DB params for e!Genomes
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
311 'refseq', # use otherfeatures RefSeq DB instead of Ensembl
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
312 #'no_disconnect', # disables disconnect_when_inactive
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
313
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
314 # runtime options
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
315 'most_severe', # only return most severe consequence
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
316 'summary', # only return one line per variation with all consquence types
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
317 'per_gene', # only return most severe per gene
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
318 'buffer_size=i', # number of variations to read in before analysis
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
319 'chunk_size=s', # size in bases of "chunks" used in internal hash structure
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
320 'failed=i', # include failed variations when finding existing
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
321 'no_whole_genome', # disables now default whole-genome mode
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
322 'whole_genome', # proxy for whole genome mode - now just warns user
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
323 'gp', # read coords from GP part of INFO column in VCF (probably only relevant to 1KG)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
324 'chr=s', # analyse only these chromosomes, e.g. 1-5,10,MT
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
325 'check_ref', # check supplied reference allele against DB
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
326 'check_existing', # find existing co-located variations
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
327 'check_svs', # find overlapping structural variations
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
328 'check_alleles', # only attribute co-located if alleles are the same
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
329 'check_frequency', # enable frequency checking
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
330 'gmaf', # add global MAF of existing var
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
331 'freq_filter=s', # exclude or include
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
332 'freq_freq=f', # frequency to filter on
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
333 'freq_gt_lt=s', # gt or lt (greater than or less than)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
334 'freq_pop=s', # population to filter on
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
335 'allow_non_variant', # allow non-variant VCF lines through
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
336 'individual=s', # give results by genotype for individuals
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
337 'phased', # force VCF genotypes to be interpreted as phased
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
338 'fork=i', # fork into N processes
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
339
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
340 # verbosity options
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
341 'verbose|v', # print out a bit more info while running
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
342 'quiet', # print nothing to STDOUT (unless using -o stdout)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
343 'no_progress', # don't display progress bars
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
344
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
345 # output options
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
346 'everything|e', # switch on EVERYTHING :-)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
347 'output_file|o=s', # output file name
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
348 'force_overwrite', # force overwrite of output file if already exists
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
349 'terms|t=s', # consequence terms to use e.g. NCBI, SO
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
350 'coding_only', # only return results for consequences in coding regions
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
351 'canonical', # indicates if transcript is canonical
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
352 'ccds', # output CCDS identifer
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
353 'xref_refseq', # output refseq mrna xref
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
354 'protein', # add e! protein ID to extra column
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
355 'hgnc', # add HGNC gene ID to extra column
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
356 'hgvs', # add HGVS names to extra column
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
357 'sift=s', # SIFT predictions
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
358 'polyphen=s', # PolyPhen predictions
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
359 'condel=s', # Condel predictions
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
360 'regulatory', # enable regulatory stuff
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
361 'cell_type=s', # filter cell types for regfeats
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
362 'convert=s', # convert input to another format (doesn't run VEP)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
363 'filter=s', # run in filtering mode
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
364 'no_intergenic', # don't print out INTERGENIC consequences
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
365 'gvf', # produce gvf output
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
366 'vcf', # produce vcf output
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
367 'original', # produce output in input format
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
368 'no_consequences', # don't calculate consequences
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
369 'lrg', # enable LRG-based features
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
370 'fields=s', # define your own output fields
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
371 'domains', # output overlapping protein features
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
372 'numbers', # include exon and intron numbers
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
373
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
374 # cache stuff
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
375 'cache', # use cache
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
376 'write_cache', # enables writing to the cache
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
377 'build=s', # builds cache from DB from scratch; arg is either all (all top-level seqs) or a list of chrs
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
378 'no_adaptor_cache', # don't write adaptor cache
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
379 'prefetch', # prefetch exons, translation, introns, codon table etc for each transcript
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
380 'strip', # strips adaptors etc from objects before caching them
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
381 'rebuild=s', # rebuilds cache by reading in existing then redumping - probably don't need to use this any more
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
382 'dir=s', # dir where cache is found (defaults to $HOME/.vep/)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
383 'cache_region_size=i', # size of region in bases for each cache file
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
384 'no_slice_cache', # tell API not to cache features on slice
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
385 'standalone', # standalone renamed offline
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
386 'offline', # offline mode uses minimal set of modules installed in same dir, no DB connection
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
387 'skip_db_check', # don't compare DB parameters with cached
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
388 'compress=s', # by default we use zcat to decompress; user may want to specify gzcat or "gzip -dc"
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
389 'custom=s' => ($config->{custom} ||= []), # specify custom tabixed bgzipped file with annotation
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
390 'tmpdir=s', # tmp dir used for BigWig retrieval
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
391 'plugin=s' => ($config->{plugin} ||= []), # specify a method in a module in the plugins directory
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
392
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
393 # debug
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
394 'cluck', # these two need some mods to Bio::EnsEMBL::DBSQL::StatementHandle to work. Clucks callback trace and SQL
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
395 'count_queries', # counts SQL queries executed
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
396 'admin', # allows me to build off public hosts
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
397 'debug', # print out debug info
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
398 'tabix', # experimental use tabix cache files
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
399 ) or die "ERROR: Failed to parse command-line flags\n";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
400
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
401 # print usage message if requested or no args supplied
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
402 if(defined($config->{help}) || !$args) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
403 &usage;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
404 exit(0);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
405 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
406
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
407 # dir is where the cache and plugins live
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
408 $config->{dir} ||= join '/', ($ENV{'HOME'}, '.vep');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
409
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
410 # dir gets set to the specific cache directory later on, so take a copy to use
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
411 # when configuring plugins
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
412
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
413 $config->{toplevel_dir} = $config->{dir};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
414
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
415 # ini file?
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
416 my $ini_file = $config->{dir}.'/vep.ini';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
417
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
418 if(-e $ini_file) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
419 read_config_from_file($config, $ini_file);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
420 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
421
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
422 # config file?
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
423 if(defined $config->{config}) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
424 read_config_from_file($config, $config->{config});
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
425 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
426
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
427 # can't be both quiet and verbose
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
428 die "ERROR: Can't be both quiet and verbose!\n" if defined($config->{quiet}) && defined($config->{verbose});
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
429
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
430 # check forking
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
431 if(defined($config->{fork})) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
432 die "ERROR: Fork number must be greater than 1\n" if $config->{fork} <= 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
433
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
434 # check we can use MIME::Base64
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
435 eval q{ use MIME::Base64; };
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
436
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
437 if($@) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
438 debug("WARNING: Unable to load MIME::Base64, forking disabled") unless defined($config->{quiet});
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
439 delete $config->{fork};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
440 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
441 else {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
442
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
443 # try a practice fork
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
444 my $pid = fork;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
445
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
446 if(!defined($pid)) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
447 debug("WARNING: Fork test failed, forking disabled") unless defined($config->{quiet});
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
448 delete $config->{fork};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
449 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
450 elsif($pid) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
451 waitpid($pid, 0);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
452 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
453 elsif($pid == 0) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
454 exit(0);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
455 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
456 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
457 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
458
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
459 # check file format
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
460 if(defined $config->{format}) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
461 die "ERROR: Unrecognised input format specified \"".$config->{format}."\"\n" unless $config->{format} =~ /^(pileup|vcf|guess|hgvs|ensembl|id|vep)$/i;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
462 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
463
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
464 # check convert format
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
465 if(defined $config->{convert}) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
466 die "ERROR: Unrecognised output format for conversion specified \"".$config->{convert}."\"\n" unless $config->{convert} =~ /vcf|ensembl|pileup|hgvs/i;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
467 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
468
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
469 # check if user still using --standalone
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
470 if(defined $config->{standalone}) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
471 die "ERROR: --standalone replaced by --offline\n";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
472 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
473
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
474 # connection settings for Ensembl Genomes
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
475 if($config->{genomes}) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
476 $config->{host} ||= 'mysql.ebi.ac.uk';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
477 $config->{port} ||= 4157;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
478 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
479
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
480 # connection settings for main Ensembl
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
481 else {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
482 $config->{species} ||= "homo_sapiens";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
483 $config->{host} ||= 'ensembldb.ensembl.org';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
484 $config->{port} ||= 5306;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
485 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
486
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
487 # refseq or core?
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
488 if(defined($config->{refseq})) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
489 $config->{core_type} = 'otherfeatures';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
490 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
491 else {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
492 $config->{core_type} = 'core';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
493 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
494
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
495 # output term
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
496 if(defined $config->{terms}) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
497 die "ERROR: Unrecognised consequence term type specified \"".$config->{terms}."\" - must be one of ensembl, so, ncbi\n" unless $config->{terms} =~ /ensembl|display|so|ncbi/i;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
498 if($config->{terms} =~ /ensembl|display/i) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
499 $config->{terms} = 'display';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
500 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
501 else {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
502 $config->{terms} = uc($config->{terms});
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
503 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
504 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
505
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
506 # everything?
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
507 if(defined($config->{everything})) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
508 my %everything = (
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
509 sift => 'b',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
510 polyphen => 'b',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
511 ccds => 1,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
512 hgvs => 1,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
513 hgnc => 1,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
514 numbers => 1,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
515 domains => 1,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
516 regulatory => 1,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
517 canonical => 1,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
518 protein => 1,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
519 gmaf => 1,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
520 );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
521
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
522 $config->{$_} = $everything{$_} for keys %everything;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
523
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
524 # these ones won't work with offline
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
525 delete $config->{hgvs} if defined($config->{offline});
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
526 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
527
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
528 # check nsSNP tools
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
529 foreach my $tool(grep {defined $config->{lc($_)}} qw(SIFT PolyPhen Condel)) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
530 die "ERROR: Unrecognised option for $tool \"", $config->{lc($tool)}, "\" - must be one of p (prediction), s (score) or b (both)\n" unless $config->{lc($tool)} =~ /^(s|p|b)/;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
531
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
532 die "ERROR: $tool not available for this species\n" unless $config->{species} =~ /human|homo/i;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
533
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
534 die "ERROR: $tool functionality is now available as a VEP Plugin - see http://www.ensembl.org/info/docs/variation/vep/vep_script.html#plugins\n" if $tool eq 'Condel';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
535 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
536
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
537 # force quiet if outputting to STDOUT
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
538 if(defined($config->{output_file}) && $config->{output_file} =~ /stdout/i) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
539 delete $config->{verbose} if defined($config->{verbose});
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
540 $config->{quiet} = 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
541 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
542
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
543 # individual(s) specified?
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
544 if(defined($config->{individual})) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
545 $config->{individual} = [split /\,/, $config->{individual}];
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
546
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
547 # force allow_non_variant
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
548 $config->{allow_non_variant} = 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
549 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
550
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
551 # summarise options if verbose
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
552 if(defined $config->{verbose}) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
553 my $header =<<INTRO;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
554 #----------------------------------#
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
555 # ENSEMBL VARIANT EFFECT PREDICTOR #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
556 #----------------------------------#
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
557
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
558 version $VERSION
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
559
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
560 By Will McLaren (wm2\@ebi.ac.uk)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
561
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
562 Configuration options:
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
563
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
564 INTRO
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
565 print $header;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
566
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
567 my $max_length = (sort {$a <=> $b} map {length($_)} keys %$config)[-1];
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
568
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
569 foreach my $key(sort keys %$config) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
570 next if ref($config->{$key}) eq 'ARRAY' && scalar @{$config->{$key}} == 0;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
571 print $key.(' ' x (($max_length - length($key)) + 4)).(ref($config->{$key}) eq 'ARRAY' ? join "\t", @{$config->{$key}} : $config->{$key})."\n";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
572 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
573
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
574 print "\n".("-" x 20)."\n\n";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
575 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
576
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
577 # check custom annotations
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
578 for my $i(0..$#{$config->{custom}}) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
579 my $custom = $config->{custom}->[$i];
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
580
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
581 my ($filepath, $shortname, $format, $type, $coords) = split /\,/, $custom;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
582 $type ||= 'exact';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
583 $format ||= 'bed';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
584 $coords ||= 0;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
585
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
586 # check type
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
587 die "ERROR: Type $type for custom annotation file $filepath is not allowed (must be one of \"exact\", \"overlap\")\n" unless $type =~ /exact|overlap/;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
588
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
589 # check format
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
590 die "ERROR: Format $format for custom annotation file $filepath is not allowed (must be one of \"bed\", \"vcf\", \"gtf\", \"gff\", \"bigwig\")\n" unless $format =~ /bed|vcf|gff|gtf|bigwig/;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
591
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
592 # bigwig format
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
593 if($format eq 'bigwig') {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
594 # check for bigWigToWig
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
595 die "ERROR: bigWigToWig does not seem to be in your path - this is required to use bigwig format custom annotations\n" unless `which bigWigToWig 2>&1` =~ /bigWigToWig$/;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
596 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
597
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
598 else {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
599 # check for tabix
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
600 die "ERROR: tabix does not seem to be in your path - this is required to use custom annotations\n" unless `which tabix 2>&1` =~ /tabix$/;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
601
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
602 # remote files?
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
603 if($filepath =~ /tp\:\/\//) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
604 my $remote_test = `tabix $filepath 1:1-1 2>&1`;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
605 if($remote_test =~ /fail/) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
606 die "$remote_test\nERROR: Could not find file or index file for remote annotation file $filepath\n";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
607 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
608 elsif($remote_test =~ /get_local_version/) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
609 debug("Downloaded tabix index file for remote annotation file $filepath") unless defined($config->{quiet});
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
610 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
611 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
612
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
613 # check files exist
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
614 else {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
615 die "ERROR: Custom annotation file $filepath not found\n" unless -e $filepath;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
616 die "ERROR: Tabix index file $filepath\.tbi not found - perhaps you need to create it first?\n" unless -e $filepath.'.tbi';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
617 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
618 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
619
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
620 $config->{custom}->[$i] = {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
621 'file' => $filepath,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
622 'name' => $shortname || 'CUSTOM'.($i + 1),
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
623 'type' => $type,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
624 'format' => $format,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
625 'coords' => $coords,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
626 };
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
627 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
628
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
629 # check if using filter and original
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
630 die "ERROR: You must also provide output filters using --filter to use --original\n" if defined($config->{original}) && !defined($config->{filter});
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
631
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
632 # filter by consequence?
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
633 if(defined($config->{filter})) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
634
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
635 my %filters = map {$_ => 1} split /\,/, $config->{filter};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
636
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
637 # add in shortcuts
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
638 foreach my $filter(keys %filters) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
639 my $value = 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
640 if($filter =~ /^no_/) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
641 delete $filters{$filter};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
642 $filter =~ s/^no_//g;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
643 $value = 0;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
644 $filters{$filter} = $value;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
645 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
646
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
647 if(defined($FILTER_SHORTCUTS{$filter})) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
648 delete $filters{$filter};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
649 $filters{$_} = $value for keys %{$FILTER_SHORTCUTS{$filter}};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
650 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
651 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
652
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
653 $config->{filter} = \%filters;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
654
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
655 $config->{filter_count} = 0;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
656 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
657
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
658 # set defaults
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
659 $config->{user} ||= 'anonymous';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
660 $config->{buffer_size} ||= 5000;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
661 $config->{chunk_size} ||= '50kb';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
662 $config->{output_file} ||= "variant_effect_output.txt";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
663 $config->{tmpdir} ||= '/tmp';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
664 $config->{format} ||= 'guess';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
665 $config->{terms} ||= 'SO';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
666 $config->{cache_region_size} ||= 1000000;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
667 $config->{compress} ||= 'zcat';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
668
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
669 # regulatory has to be on for cell_type
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
670 if(defined($config->{cell_type})) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
671 $config->{regulatory} = 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
672 $config->{cell_type} = [split /\,/, $config->{cell_type}] if defined($config->{cell_type});
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
673 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
674
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
675 # can't use a whole bunch of options with most_severe
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
676 if(defined($config->{most_severe})) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
677 foreach my $flag(qw(no_intergenic protein hgnc sift polyphen coding_only ccds canonical xref_refseq numbers domains summary)) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
678 die "ERROR: --most_severe is not compatible with --$flag\n" if defined($config->{$flag});
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
679 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
680 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
681
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
682 # can't use a whole bunch of options with summary
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
683 if(defined($config->{summary})) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
684 foreach my $flag(qw(no_intergenic protein hgnc sift polyphen coding_only ccds canonical xref_refseq numbers domains most_severe)) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
685 die "ERROR: --summary is not compatible with --$flag\n" if defined($config->{$flag});
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
686 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
687 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
688
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
689 # frequency filtering
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
690 if(defined($config->{check_frequency})) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
691 foreach my $flag(qw(freq_freq freq_filter freq_pop freq_gt_lt)) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
692 die "ERROR: To use --check_frequency you must also specify flag --$flag\n" unless defined $config->{$flag};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
693 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
694
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
695 # need to set check_existing
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
696 $config->{check_existing} = 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
697 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
698
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
699 $config->{check_existing} = 1 if defined $config->{check_alleles} || defined $config->{gmaf};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
700
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
701 # warn users still using whole_genome flag
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
702 if(defined($config->{whole_genome})) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
703 debug("INFO: Whole-genome mode is now the default run-mode for the script. To disable it, use --no_whole_genome") unless defined($config->{quiet});
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
704 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
705
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
706 $config->{whole_genome} = 1 unless defined $config->{no_whole_genome};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
707 $config->{failed} = 0 unless defined $config->{failed};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
708 $config->{chunk_size} =~ s/mb?/000000/i;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
709 $config->{chunk_size} =~ s/kb?/000/i;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
710 $config->{cache_region_size} =~ s/mb?/000000/i;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
711 $config->{cache_region_size} =~ s/kb?/000/i;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
712
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
713 # cluck and display executed SQL?
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
714 $Bio::EnsEMBL::DBSQL::StatementHandle::cluck = 1 if defined($config->{cluck});
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
715
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
716 # offline needs cache, can't use HGVS
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
717 if(defined($config->{offline})) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
718 $config->{cache} = 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
719
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
720 #die("ERROR: Cannot generate HGVS coordinates in offline mode\n") if defined($config->{hgvs});
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
721 die("ERROR: Cannot use HGVS as input in offline mode\n") if $config->{format} eq 'hgvs';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
722 die("ERROR: Cannot use variant identifiers as input in offline mode\n") if $config->{format} eq 'id';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
723 die("ERROR: Cannot do frequency filtering in offline mode\n") if defined($config->{check_frequency});
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
724 die("ERROR: Cannot retrieve overlapping structural variants in offline mode\n") if defined($config->{check_sv});
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
725 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
726
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
727 # write_cache needs cache
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
728 $config->{cache} = 1 if defined $config->{write_cache};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
729
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
730 # no_slice_cache, prefetch and whole_genome have to be on to use cache
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
731 if(defined($config->{cache})) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
732 $config->{prefetch} = 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
733 $config->{no_slice_cache} = 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
734 $config->{whole_genome} = 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
735 $config->{strip} = 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
736 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
737
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
738 $config->{build} = $config->{rebuild} if defined($config->{rebuild});
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
739
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
740 # force options for full build
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
741 if(defined($config->{build})) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
742 $config->{prefetch} = 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
743 $config->{hgnc} = 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
744 $config->{no_slice_cache} = 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
745 $config->{cache} = 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
746 $config->{strip} = 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
747 $config->{write_cache} = 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
748 $config->{cell_type} = 1 if defined($config->{regulatory});
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
749 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
750
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
751 # connect to databases
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
752 $config->{reg} = &connect_to_dbs($config);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
753
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
754 # complete dir with species name and db_version
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
755 $config->{dir} .= '/'.(
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
756 join '/', (
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
757 defined($config->{offline}) ? $config->{species} : ($config->{reg}->get_alias($config->{species}) || $config->{species}),
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
758 $config->{db_version} || $config->{reg}->software_version
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
759 )
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
760 );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
761
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
762 # warn user cache directory doesn't exist
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
763 if(!-e $config->{dir}) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
764
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
765 # if using write_cache
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
766 if(defined($config->{write_cache})) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
767 debug("INFO: Cache directory ", $config->{dir}, " not found - it will be created") unless defined($config->{quiet});
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
768 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
769
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
770 # want to read cache, not found
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
771 elsif(defined($config->{cache})) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
772 die("ERROR: Cache directory ", $config->{dir}, " not found");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
773 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
774 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
775
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
776 if(defined($config->{cache})) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
777 # read cache info
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
778 if(read_cache_info($config)) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
779 debug("Read existing cache info") unless defined $config->{quiet};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
780 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
781 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
782
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
783 # we configure plugins here because they can sometimes switch on the
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
784 # regulatory config option
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
785 configure_plugins($config);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
786
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
787 # include regulatory modules if requested
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
788 if(defined($config->{regulatory})) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
789 # do the use statements here so that users don't have to have the
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
790 # funcgen API installed to use the rest of the script
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
791 eval q{
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
792 use Bio::EnsEMBL::Funcgen::DBSQL::RegulatoryFeatureAdaptor;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
793 use Bio::EnsEMBL::Funcgen::DBSQL::MotifFeatureAdaptor;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
794 use Bio::EnsEMBL::Funcgen::MotifFeature;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
795 use Bio::EnsEMBL::Funcgen::RegulatoryFeature;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
796 use Bio::EnsEMBL::Funcgen::BindingMatrix;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
797 };
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
798
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
799 if($@) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
800 die("ERROR: Ensembl Funcgen API must be installed to use --regulatory or plugins that deal with regulatory features\n");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
801 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
802 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
803
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
804 # user defined custom output fields
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
805 if(defined($config->{fields})) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
806 $config->{fields} = [split ',', $config->{fields}];
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
807 debug("Output fields redefined (".scalar @{$config->{fields}}." defined)") unless defined($config->{quiet});
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
808 $config->{fields_redefined} = 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
809 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
810 $config->{fields} ||= \@OUTPUT_COLS;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
811
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
812 # suppress warnings that the FeatureAdpators spit if using no_slice_cache
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
813 Bio::EnsEMBL::Utils::Exception::verbose(1999) if defined($config->{no_slice_cache});
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
814
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
815 # get adaptors (don't get them in offline mode)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
816 unless(defined($config->{offline})) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
817
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
818 if(defined($config->{cache}) && !defined($config->{write_cache})) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
819
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
820 # try and load adaptors from cache
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
821 if(!&load_dumped_adaptor_cache($config)) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
822 &get_adaptors($config);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
823 &dump_adaptor_cache($config) if defined($config->{write_cache}) && !defined($config->{no_adaptor_cache});
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
824 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
825
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
826 # check cached adaptors match DB params
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
827 else {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
828 my $dbc = $config->{sa}->{dbc};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
829
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
830 my $ok = 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
831
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
832 if($dbc->{_host} ne $config->{host}) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
833
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
834 # ens-livemirror, useastdb and ensembldb should all have identical DBs
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
835 unless(
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
836 (
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
837 $dbc->{_host} eq 'ens-livemirror'
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
838 || $dbc->{_host} eq 'ensembldb.ensembl.org'
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
839 || $dbc->{_host} eq 'useastdb.ensembl.org'
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
840 ) && (
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
841 $config->{host} eq 'ens-livemirror'
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
842 || $config->{host} eq 'ensembldb.ensembl.org'
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
843 || $config->{host} eq 'useastdb.ensembl.org'
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
844 )
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
845 ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
846 $ok = 0;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
847 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
848
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
849 unless(defined($config->{skip_db_check})) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
850 # but we still need to reconnect
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
851 debug("INFO: Defined host ", $config->{host}, " is different from cached ", $dbc->{_host}, " - reconnecting to host") unless defined($config->{quiet});
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
852
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
853 &get_adaptors($config);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
854 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
855 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
856
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
857 if(!$ok) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
858 if(defined($config->{skip_db_check})) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
859 debug("INFO: Defined host ", $config->{host}, " is different from cached ", $dbc->{_host}) unless defined($config->{quiet});
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
860 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
861 else {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
862 die "ERROR: Defined host ", $config->{host}, " is different from cached ", $dbc->{_host}, ". If you are sure this is OK, rerun with -skip_db_check flag set";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
863 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
864 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
865 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
866 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
867 else {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
868 &get_adaptors($config);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
869 &dump_adaptor_cache($config) if defined($config->{write_cache}) && !defined($config->{no_adaptor_cache});
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
870 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
871
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
872 # reg adaptors (only fetches if not retrieved from cache already)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
873 &get_reg_adaptors($config) if defined($config->{regulatory});
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
874 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
875
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
876 # check cell types
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
877 if(defined($config->{cell_type}) && !defined($config->{build})) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
878 my $cls = '';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
879
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
880 if(defined($config->{cache})) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
881 $cls = $config->{cache_cell_types};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
882 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
883 else {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
884 my $cta = $config->{RegulatoryFeature_adaptor}->db->get_CellTypeAdaptor();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
885 $cls = join ",", map {$_->name} @{$cta->fetch_all};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
886 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
887
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
888 foreach my $cl(@{$config->{cell_type}}) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
889 die "ERROR: cell type $cl not recognised; available cell types are:\n$cls\n" unless $cls =~ /(^|,)$cl(,|$)/;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
890 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
891 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
892
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
893 # get terminal width for progress bars
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
894 unless(defined($config->{quiet})) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
895 my $width;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
896
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
897 # module may not be installed
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
898 eval q{
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
899 use Term::ReadKey;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
900 };
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
901
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
902 if(!$@) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
903 my ($w, $h);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
904
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
905 # module may be installed, but e.g.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
906 eval {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
907 #($w, $h) = GetTerminalSize();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
908 $w = 167;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
909 $h = 30;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
910 };
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
911
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
912 $width = $w if defined $w;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
913 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
914
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
915 $width ||= 60;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
916 $width -= 12;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
917 $config->{terminal_width} = $width;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
918 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
919
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
920 # jump out to build cache if requested
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
921 if(defined($config->{build})) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
922
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
923 if($config->{host} =~ /^(ensembl|useast)db\.ensembl\.org$/ && !defined($config->{admin})) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
924 die("ERROR: Cannot build cache using public database server ", $config->{host}, "\n");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
925 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
926
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
927 # build the cache
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
928 debug("Building cache for ".$config->{species}) unless defined($config->{quiet});
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
929 build_full_cache($config);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
930
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
931 # exit script
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
932 debug("Finished building cache") unless defined($config->{quiet});
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
933 exit(0);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
934 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
935
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
936
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
937 # warn user DB will be used for SIFT/PolyPhen/HGVS/frequency/LRG
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
938 if(defined($config->{cache})) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
939
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
940 # these two def depend on DB
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
941 foreach my $param(grep {defined $config->{$_}} qw(hgvs check_frequency lrg check_sv)) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
942 debug("INFO: Database will be accessed when using --$param") unless defined($config->{quiet});
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
943 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
944
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
945 # as does using HGVS or IDs as input
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
946 debug("INFO: Database will be accessed when using --format ", $config->{format}) if ($config->{format} eq 'id' || $config->{format} eq 'hgvs') && !defined($config->{quiet});
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
947
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
948 # the rest may be in the cache
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
949 foreach my $param(grep {defined $config->{$_}} qw(sift polyphen regulatory)) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
950 next if defined($config->{'cache_'.$param});
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
951 debug("INFO: Database will be accessed when using --$param; consider using the complete cache containing $param data (see documentation for details)") unless defined($config->{quiet});
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
952 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
953 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
954
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
955 # get list of chrs if supplied
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
956 if(defined($config->{chr})) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
957 my %chrs;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
958
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
959 foreach my $val(split /\,/, $config->{chr}) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
960 my @nnn = split /\-/, $val;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
961
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
962 foreach my $chr($nnn[0]..$nnn[-1]) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
963 $chrs{$chr} = 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
964 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
965 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
966
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
967 $config->{chr} = \%chrs;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
968 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
969
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
970 # get input file handle
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
971 $config->{in_file_handle} = &get_in_file_handle($config);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
972
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
973 return $config;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
974 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
975
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
976 # reads config from a file
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
977 sub read_config_from_file {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
978 my $config = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
979 my $file = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
980
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
981 open CONFIG, $file or die "ERROR: Could not open config file \"$file\"\n";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
982
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
983 while(<CONFIG>) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
984 next if /^\#/;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
985 my @split = split /\s+|\=/;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
986 my $key = shift @split;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
987 $key =~ s/^\-//g;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
988
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
989 if(defined($config->{$key}) && ref($config->{$key}) eq 'ARRAY') {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
990 push @{$config->{$key}}, @split;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
991 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
992 else {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
993 $config->{$key} ||= $split[0];
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
994 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
995 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
996
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
997 close CONFIG;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
998
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
999 # force quiet if outputting to STDOUT
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1000 if(defined($config->{output_file}) && $config->{output_file} =~ /stdout/i) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1001 delete $config->{verbose} if defined($config->{verbose});
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1002 $config->{quiet} = 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1003 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1004
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1005 debug("Read configuration from $file") unless defined($config->{quiet});
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1006 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1007
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1008 # configures custom VEP plugins
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1009 sub configure_plugins {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1010
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1011 my $config = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1012
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1013 $config->{plugins} = [];
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1014
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1015 if (my @plugins = @{ $config->{plugin} }) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1016
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1017 # add the Plugins directory onto @INC
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1018
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1019 unshift @INC, $config->{toplevel_dir}."/Plugins";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1020
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1021 for my $plugin (@plugins) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1022
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1023 # parse out the module name and parameters
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1024
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1025 my ($module, @params) = split /,/, $plugin;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1026
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1027 # check we can use the module
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1028
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1029 eval qq{
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1030 use $module;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1031 };
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1032 if ($@) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1033 debug("Failed to compile plugin $module: $@") unless defined($config->{quiet});
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1034 next;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1035 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1036
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1037 # now check we can instantiate it, passing any parameters to the constructor
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1038
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1039 my $instance;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1040
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1041 eval {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1042 $instance = $module->new($config, @params);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1043 };
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1044 if ($@) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1045 debug("Failed to instantiate plugin $module: $@") unless defined($config->{quiet});
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1046 next;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1047 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1048
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1049 # check that the versions match
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1050
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1051 my $plugin_version;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1052
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1053 if ($instance->can('version')) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1054 $plugin_version = $instance->version;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1055 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1056
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1057 my $version_ok = 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1058
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1059 if ($plugin_version) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1060 my ($plugin_major, $plugin_minor, $plugin_maintenance) = split /\./, $plugin_version;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1061 my ($major, $minor, $maintenance) = split /\./, $VERSION;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1062
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1063 if ($plugin_major != $major) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1064 debug("Warning: plugin $plugin version ($plugin_version) does not match the current VEP version ($VERSION)") unless defined($config->{quiet});
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1065 $version_ok = 0;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1066 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1067 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1068 else {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1069 debug("Warning: plugin $plugin does not define a version number") unless defined($config->{quiet});
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1070 $version_ok = 0;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1071 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1072
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1073 debug("You may experience unexpected behaviour with this plugin") unless defined($config->{quiet}) || $version_ok;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1074
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1075 # check that it implements all necessary methods
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1076
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1077 for my $required(qw(run get_header_info check_feature_type check_variant_feature_type)) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1078 unless ($instance->can($required)) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1079 debug("Plugin $module doesn't implement a required method '$required', does it inherit from BaseVepPlugin?") unless defined($config->{quiet});
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1080 next;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1081 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1082 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1083
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1084 # all's good, so save the instance in our list of plugins
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1085
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1086 push @{ $config->{plugins} }, $instance;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1087
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1088 debug("Loaded plugin: $module") unless defined($config->{quiet});
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1089
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1090 # for convenience, check if the plugin wants regulatory stuff and turn on the config option if so
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1091
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1092 if (grep { $_ =~ /motif|regulatory/i } @{ $instance->feature_types }) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1093 debug("Fetching regulatory features for plugin: $module") unless defined($config->{quiet});
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1094 $config->{regulatory} = 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1095 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1096 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1097 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1098 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1099
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1100 # connects to DBs (not done in offline mode)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1101 sub connect_to_dbs {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1102 my $config = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1103
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1104 # get registry
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1105 my $reg = 'Bio::EnsEMBL::Registry';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1106
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1107 unless(defined($config->{offline})) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1108 # load DB options from registry file if given
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1109 if(defined($config->{registry})) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1110 debug("Loading DB config from registry file ", $config->{registry}) unless defined($config->{quiet});
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1111 $reg->load_all(
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1112 $config->{registry},
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1113 $config->{verbose},
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1114 undef,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1115 $config->{no_slice_cache}
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1116 );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1117 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1118
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1119 # otherwise manually connect to DB server
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1120 else {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1121 $reg->load_registry_from_db(
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1122 -host => $config->{host},
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1123 -user => $config->{user},
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1124 -pass => $config->{password},
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1125 -port => $config->{port},
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1126 -db_version => $config->{db_version},
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1127 -species => $config->{species} =~ /^[a-z]+\_[a-z]+/i ? $config->{species} : undef,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1128 -verbose => $config->{verbose},
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1129 -no_cache => $config->{no_slice_cache},
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1130 );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1131 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1132
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1133 eval { $reg->set_reconnect_when_lost() };
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1134
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1135 if(defined($config->{verbose})) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1136 # get a meta container adaptors to check version
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1137 my $core_mca = $reg->get_adaptor($config->{species}, 'core', 'metacontainer');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1138 my $var_mca = $reg->get_adaptor($config->{species}, 'variation', 'metacontainer');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1139
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1140 if($core_mca && $var_mca) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1141 debug(
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1142 "Connected to core version ", $core_mca->get_schema_version, " database ",
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1143 "and variation version ", $var_mca->get_schema_version, " database"
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1144 );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1145 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1146 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1147 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1148
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1149 return $reg;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1150 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1151
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1152 # get adaptors from DB
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1153 sub get_adaptors {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1154 my $config = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1155
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1156 die "ERROR: No registry" unless defined $config->{reg};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1157
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1158 $config->{vfa} = $config->{reg}->get_adaptor($config->{species}, 'variation', 'variationfeature');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1159 $config->{svfa} = $config->{reg}->get_adaptor($config->{species}, 'variation', 'structuralvariationfeature');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1160 $config->{tva} = $config->{reg}->get_adaptor($config->{species}, 'variation', 'transcriptvariation');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1161 $config->{pfpma} = $config->{reg}->get_adaptor($config->{species}, 'variation', 'proteinfunctionpredictionmatrix');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1162 $config->{va} = $config->{reg}->get_adaptor($config->{species}, 'variation', 'variation');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1163
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1164 # get fake ones for species with no var DB
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1165 if(!defined($config->{vfa})) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1166 $config->{vfa} = Bio::EnsEMBL::Variation::DBSQL::VariationFeatureAdaptor->new_fake($config->{species});
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1167 $config->{svfa} = Bio::EnsEMBL::Variation::DBSQL::StructuralVariationFeatureAdaptor->new_fake($config->{species});
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1168 $config->{tva} = Bio::EnsEMBL::Variation::DBSQL::TranscriptVariationAdaptor->new_fake($config->{species});
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1169 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1170
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1171 $config->{sa} = $config->{reg}->get_adaptor($config->{species}, $config->{core_type}, 'slice');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1172 $config->{ga} = $config->{reg}->get_adaptor($config->{species}, $config->{core_type}, 'gene');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1173 $config->{ta} = $config->{reg}->get_adaptor($config->{species}, $config->{core_type}, 'transcript');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1174 $config->{mca} = $config->{reg}->get_adaptor($config->{species}, $config->{core_type}, 'metacontainer');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1175 $config->{csa} = $config->{reg}->get_adaptor($config->{species}, $config->{core_type}, 'coordsystem');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1176
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1177 # cache schema version
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1178 $config->{mca}->get_schema_version if defined $config->{mca};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1179
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1180 # check we got slice adaptor - can't continue without a core DB
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1181 die("ERROR: Could not connect to core database\n") unless defined $config->{sa};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1182 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1183
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1184 # gets regulatory adaptors
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1185 sub get_reg_adaptors {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1186 my $config = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1187
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1188 foreach my $type(@REG_FEAT_TYPES) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1189 next if defined($config->{$type.'_adaptor'});
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1190
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1191 my $adaptor = $config->{reg}->get_adaptor($config->{species}, 'funcgen', $type);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1192 if(defined($adaptor)) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1193 $config->{$type.'_adaptor'} = $adaptor;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1194 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1195 else {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1196 delete $config->{regulatory};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1197 last;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1198 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1199 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1200 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1201
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1202 # gets file handle for input
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1203 sub get_in_file_handle {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1204 my $config = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1205
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1206 # define the filehandle to read input from
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1207 my $in_file_handle = new FileHandle;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1208
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1209 if(defined($config->{input_file})) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1210
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1211 # check defined input file exists
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1212 die("ERROR: Could not find input file ", $config->{input_file}, "\n") unless -e $config->{input_file};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1213
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1214 if($config->{input_file} =~ /\.gz$/){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1215 $in_file_handle->open($config->{compress}." ". $config->{input_file} . " | " ) or die("ERROR: Could not read from input file ", $config->{input_file}, "\n");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1216 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1217 else {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1218 $in_file_handle->open( $config->{input_file} ) or die("ERROR: Could not read from input file ", $config->{input_file}, "\n");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1219 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1220 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1221
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1222 # no file specified - try to read data off command line
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1223 else {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1224 $in_file_handle = 'STDIN';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1225 debug("Reading input from STDIN (or maybe you forgot to specify an input file?)...") unless defined $config->{quiet};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1226 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1227
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1228 return $in_file_handle;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1229 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1230
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1231 # gets file handle for output and adds header
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1232 sub get_out_file_handle {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1233 my $config = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1234
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1235 # define filehandle to write to
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1236 my $out_file_handle = new FileHandle;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1237
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1238 # check if file exists
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1239 if(-e $config->{output_file} && !defined($config->{force_overwrite})) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1240 # die("ERROR: Output file ", $config->{output_file}, " already exists. Specify a different output file with --output_file or overwrite existing file with -- force_overwrite\n");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1241 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1242
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1243 if($config->{output_file} =~ /stdout/i) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1244 $out_file_handle = *STDOUT;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1245 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1246 else {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1247 $out_file_handle->open(">".$config->{output_file}) or die("ERROR: Could not write to output file ", $config->{output_file}, "\n");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1248 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1249
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1250 # define headers for a VCF file
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1251 my @vcf_headers = (
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1252 '#CHROM',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1253 'POS',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1254 'ID',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1255 'REF',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1256 'ALT',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1257 'QUAL',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1258 'FILTER',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1259 'INFO'
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1260 );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1261
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1262 # file conversion, don't want to add normal headers
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1263 if(defined($config->{convert})) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1264 # header for VCF
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1265 if($config->{convert} =~ /vcf/i) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1266 print $out_file_handle "##fileformat=VCFv4.0\n";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1267 print $out_file_handle join "\t", @vcf_headers;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1268 print $out_file_handle "\n";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1269 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1270
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1271 return $out_file_handle;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1272 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1273
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1274 # GVF output, no header
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1275 elsif(defined($config->{gvf}) || defined($config->{original})) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1276 print $out_file_handle join "\n", @{$config->{headers}} if defined($config->{headers}) && defined($config->{original});
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1277 return $out_file_handle;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1278 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1279
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1280 elsif(defined($config->{vcf})) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1281
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1282 # create an info string for the VCF header
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1283 my @new_headers;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1284
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1285 # if the user has defined the fields themselves, we don't need to worry
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1286 if(defined $config->{fields_redefined}) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1287 @new_headers = @{$config->{fields}};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1288 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1289 else {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1290 @new_headers = (
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1291
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1292 # get default headers, minus variation name and location (already encoded in VCF)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1293 grep {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1294 $_ ne 'Uploaded_variation' and
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1295 $_ ne 'Location' and
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1296 $_ ne 'Extra'
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1297 } @{$config->{fields}},
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1298
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1299 # get extra headers
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1300 map {@{$extra_headers{$_}}}
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1301 grep {defined $config->{$_}}
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1302 keys %extra_headers
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1303 );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1304
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1305 # plugin headers
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1306 foreach my $plugin_header(split /\n/, get_plugin_headers($config)) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1307 $plugin_header =~ /\#\# (.+?)\t\:.+/;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1308 push @new_headers, $1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1309 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1310
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1311 # redefine the main headers list in config
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1312 $config->{fields} = \@new_headers;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1313 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1314
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1315 # add the newly defined headers as a header to the VCF
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1316 my $string = join '|', @{$config->{fields}};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1317 my @vcf_info_strings = ('##INFO=<ID=CSQ,Number=.,Type=String,Description="Consequence type as predicted by VEP. Format: '.$string.'">');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1318
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1319 # add custom headers
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1320 foreach my $custom(@{$config->{custom}}) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1321 push @vcf_info_strings, '##INFO=<ID='.$custom->{name}.',Number=.,Type=String,Description="'.$custom->{file}.' ('.$custom->{type}.')">';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1322 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1323
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1324 # if this is already a VCF file, we need to add our new headers in the right place
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1325 if(defined($config->{headers})) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1326
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1327 for my $i(0..$#{$config->{headers}}) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1328 if($config->{headers}->[$i] =~ /^\#CHROM\s+POS\s+ID/) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1329 splice(@{$config->{headers}}, $i, 0, @vcf_info_strings);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1330 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1331 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1332
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1333 print $out_file_handle join "\n", @{$config->{headers}};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1334 print $out_file_handle "\n";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1335 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1336
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1337 else {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1338 print $out_file_handle "##fileformat=VCFv4.0\n";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1339 print $out_file_handle join "\n", @vcf_info_strings;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1340 print $out_file_handle "\n";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1341 print $out_file_handle join "\t", @vcf_headers;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1342 print $out_file_handle "\n";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1343 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1344
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1345 return $out_file_handle;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1346 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1347
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1348 # make header
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1349 my $time = &get_time;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1350 my $db_string = $config->{mca}->dbc->dbname." on ".$config->{mca}->dbc->host if defined $config->{mca};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1351 $db_string .= "\n## Using cache in ".$config->{dir} if defined($config->{cache});
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1352 my $version_string =
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1353 "Using API version ".$config->{reg}->software_version.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1354 ", DB version ".(defined $config->{mca} && $config->{mca}->get_schema_version ? $config->{mca}->get_schema_version : '?');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1355
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1356 # add key for extra column headers based on config
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1357 my $extra_column_keys = join "\n",
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1358 map {'## '.$_.' : '.$extra_descs{$_}}
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1359 sort map {@{$extra_headers{$_}}}
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1360 grep {defined $config->{$_}}
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1361 keys %extra_headers;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1362
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1363 my $header =<<HEAD;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1364 ## ENSEMBL VARIANT EFFECT PREDICTOR v$VERSION
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1365 ## Output produced at $time
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1366 ## Connected to $db_string
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1367 ## $version_string
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1368 ## Extra column keys:
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1369 $extra_column_keys
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1370 HEAD
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1371
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1372 $header .= get_plugin_headers($config);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1373
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1374 # add headers
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1375 print $out_file_handle $header;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1376
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1377 # add custom data defs
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1378 if(defined($config->{custom})) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1379 foreach my $custom(@{$config->{custom}}) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1380 print $out_file_handle '## '.$custom->{name}."\t: ".$custom->{file}.' ('.$custom->{type}.")\n";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1381 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1382 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1383
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1384 # add column headers
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1385 print $out_file_handle '#', (join "\t", @{$config->{fields}});
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1386 print $out_file_handle "\n";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1387
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1388 return $out_file_handle;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1389 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1390
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1391 sub get_plugin_headers {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1392
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1393 my $config = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1394
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1395 my $header = "";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1396
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1397 for my $plugin (@{ $config->{plugins} }) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1398 if (my $hdr = $plugin->get_header_info) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1399 for my $key (keys %$hdr) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1400 my $val = $hdr->{$key};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1401
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1402 $header .= "## $key\t: $val\n";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1403 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1404 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1405 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1406
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1407 return $header;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1408 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1409
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1410 # convert a variation feature to a line of output
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1411 sub convert_vf {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1412 my $config = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1413 my $vf = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1414
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1415 my $convert_method = 'convert_to_'.lc($config->{convert});
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1416 my $method_ref = \&$convert_method;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1417
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1418 my $line = &$method_ref($config, $vf);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1419 my $handle = $config->{out_file_handle};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1420
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1421 if(scalar @$line) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1422 print $handle join "\t", @$line;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1423 print $handle "\n";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1424 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1425 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1426
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1427 # converts to Ensembl format
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1428 sub convert_to_ensembl {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1429 my $config = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1430 my $vf = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1431
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1432 return [
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1433 $vf->{chr} || $vf->seq_region_name,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1434 $vf->start,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1435 $vf->end,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1436 $vf->allele_string,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1437 $vf->strand,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1438 $vf->variation_name
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1439 ];
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1440 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1441
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1442
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1443 # converts to pileup format
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1444 sub convert_to_pileup {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1445 my $config = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1446 my $vf = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1447
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1448 # look for imbalance in the allele string
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1449 my %allele_lengths;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1450 my @alleles = split /\//, $vf->allele_string;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1451
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1452 foreach my $allele(@alleles) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1453 $allele =~ s/\-//g;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1454 $allele_lengths{length($allele)} = 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1455 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1456
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1457 # in/del
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1458 if(scalar keys %allele_lengths > 1) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1459
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1460 if($vf->allele_string =~ /\-/) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1461
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1462 # insertion?
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1463 if($alleles[0] eq '-') {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1464 shift @alleles;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1465
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1466 for my $i(0..$#alleles) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1467 $alleles[$i] =~ s/\-//g;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1468 $alleles[$i] = '+'.$alleles[$i];
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1469 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1470 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1471
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1472 else {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1473 @alleles = grep {$_ ne '-'} @alleles;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1474
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1475 for my $i(0..$#alleles) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1476 $alleles[$i] =~ s/\-//g;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1477 $alleles[$i] = '-'.$alleles[$i];
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1478 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1479 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1480
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1481 @alleles = grep {$_ ne '-' && $_ ne '+'} @alleles;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1482
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1483 return [
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1484 $vf->{chr} || $vf->seq_region_name,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1485 $vf->start - 1,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1486 '*',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1487 (join "/", @alleles),
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1488 ];
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1489 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1490
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1491 else {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1492 warn "WARNING: Unable to convert variant to pileup format on line number ", $config->{line_number} unless defined($config->{quiet});
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1493 return [];
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1494 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1495
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1496 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1497
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1498 # balanced sub
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1499 else {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1500 return [
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1501 $vf->{chr} || $vf->seq_region_name,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1502 $vf->start,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1503 shift @alleles,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1504 (join "/", @alleles),
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1505 ];
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1506 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1507 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1508
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1509 # converts to HGVS (hackily returns many lines)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1510 sub convert_to_hgvs {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1511 my $config = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1512 my $vf = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1513
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1514 # ensure we have a slice
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1515 $vf->{slice} ||= get_slice($config, $vf->{chr});
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1516
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1517 my $tvs = $vf->get_all_TranscriptVariations;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1518
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1519 my @return = values %{$vf->get_all_hgvs_notations()};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1520
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1521 if(defined($tvs)) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1522 push @return, map {values %{$vf->get_all_hgvs_notations($_->transcript, 'c')}} @$tvs;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1523 push @return, map {values %{$vf->get_all_hgvs_notations($_->transcript, 'p')}} @$tvs;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1524 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1525
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1526 return [join "\n", @return];
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1527 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1528
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1529 # prints a line of output from the hash
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1530 sub print_line {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1531 my $config = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1532 my $line = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1533 return unless defined($line);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1534
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1535 my $output;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1536
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1537 # normal
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1538 if(ref($line) eq 'HASH') {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1539 my %extra = %{$line->{Extra}};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1540
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1541 $line->{Extra} = join ';', map { $_.'='.$line->{Extra}->{$_} } keys %{ $line->{Extra} || {} };
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1542
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1543 # if the fields have been redefined we need to search through in case
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1544 # any of the defined fields are actually part of the Extra hash
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1545 $output = join "\t", map {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1546 (defined $line->{$_} ? $line->{$_} : (defined $extra{$_} ? $extra{$_} : '-'))
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1547 } @{$config->{fields}};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1548 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1549
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1550 # gvf/vcf
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1551 else {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1552 $output = $$line;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1553 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1554
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1555 my $fh = $config->{out_file_handle};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1556 print $fh "$output\n";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1557 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1558
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1559 # outputs usage message
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1560 sub usage {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1561 my $usage =<<END;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1562 #----------------------------------#
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1563 # ENSEMBL VARIANT EFFECT PREDICTOR #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1564 #----------------------------------#
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1565
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1566 version $VERSION
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1567
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1568 By Will McLaren (wm2\@ebi.ac.uk)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1569
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1570 http://www.ensembl.org/info/docs/variation/vep/vep_script.html
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1571
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1572 Usage:
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1573 perl variant_effect_predictor.pl [arguments]
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1574
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1575 Options
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1576 =======
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1577
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1578 --help Display this message and quit
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1579 --verbose Display verbose output as the script runs [default: off]
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1580 --quiet Suppress status and warning messages [default: off]
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1581 --no_progress Suppress progress bars [default: off]
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1582
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1583 --config Load configuration from file. Any command line options
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1584 specified overwrite those in the file [default: off]
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1585
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1586 --everything Shortcut switch to turn on commonly used options. See web
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1587 documentation for details [default: off]
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1588
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1589 --fork [num_forks] Use forking to improve script runtime [default: off]
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1590
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1591 -i | --input_file Input file - if not specified, reads from STDIN. Files
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1592 may be gzip compressed.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1593 --format Specify input file format - one of "ensembl", "pileup",
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1594 "vcf", "hgvs", "id" or "guess" to try and work out format.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1595 -o | --output_file Output file. Write to STDOUT by specifying -o STDOUT - this
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1596 will force --quiet [default: "variant_effect_output.txt"]
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1597 --force_overwrite Force overwriting of output file [default: quit if file
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1598 exists]
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1599 --original Writes output as it was in input - must be used with --filter
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1600 since no consequence data is added [default: off]
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1601 --vcf Write output as VCF [default: off]
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1602 --gvf Write output as GVF [default: off]
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1603 --fields [field list] Define a custom output format by specifying a comma-separated
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1604 list of field names. Field names normally present in the
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1605 "Extra" field may also be specified, including those added by
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1606 plugin modules. Can also be used to configure VCF output
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1607 columns [default: off]
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1608
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1609 --species [species] Species to use [default: "human"]
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1610
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1611 -t | --terms Type of consequence terms to output - one of "SO", "ensembl"
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1612 [default: SO]
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1613
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1614 --sift=[p|s|b] Add SIFT [p]rediction, [s]core or [b]oth [default: off]
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1615 --polyphen=[p|s|b] Add PolyPhen [p]rediction, [s]core or [b]oth [default: off]
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1616
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1617 NB: SIFT and PolyPhen predictions are currently available for human only
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1618 NB: Condel support has been moved to a VEP plugin module - see documentation
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1619
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1620 --regulatory Look for overlaps with regulatory regions. The script can
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1621 also call if a variant falls in a high information position
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1622 within a transcription factor binding site. Output lines have
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1623 a Feature type of RegulatoryFeature or MotifFeature
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1624 [default: off]
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1625 --cell_type [types] Report only regulatory regions that are found in the given cell
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1626 type(s). Can be a single cell type or a comma-separated list.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1627 The functional type in each cell type is reported under
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1628 CELL_TYPE in the output. To retrieve a list of cell types, use
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1629 "--cell_type list" [default: off]
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1630
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1631 NB: Regulatory consequences are currently available for human and mouse only
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1632
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1633 --custom [file list] Add custom annotations from tabix-indexed files. See
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1634 documentation for full details [default: off]
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1635 --plugin [plugin_name] Use named plugin module [default: off]
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1636 --hgnc Add HGNC gene identifiers to output [default: off]
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1637 --hgvs Output HGVS identifiers (coding and protein). Requires database
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1638 connection [default: off]
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1639 --ccds Output CCDS transcript identifiers [default: off]
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1640 --xref_refseq Output aligned RefSeq mRNA identifier for transcript. NB: the
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1641 RefSeq and Ensembl transcripts aligned in this way MAY NOT, AND
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1642 FREQUENTLY WILL NOT, match exactly in sequence, exon structure
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1643 and protein product [default: off]
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1644 --protein Output Ensembl protein identifer [default: off]
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1645 --canonical Indicate if the transcript for this consequence is the canonical
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1646 transcript for this gene [default: off]
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1647 --domains Include details of any overlapping protein domains [default: off]
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1648 --numbers Include exon & intron numbers [default: off]
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1649
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1650 --no_intergenic Excludes intergenic consequences from the output [default: off]
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1651 --coding_only Only return consequences that fall in the coding region of
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1652 transcripts [default: off]
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1653 --most_severe Ouptut only the most severe consequence per variation.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1654 Transcript-specific columns will be left blank. [default: off]
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1655 --summary Output only a comma-separated list of all consequences per
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1656 variation. Transcript-specific columns will be left blank.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1657 [default: off]
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1658 --per_gene Output only the most severe consequence per gene. Where more
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1659 than one transcript has the same consequence, the transcript
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1660 chosen is arbitrary. [default: off]
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1661
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1662
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1663 --check_ref If specified, checks supplied reference allele against stored
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1664 entry in Ensembl Core database [default: off]
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1665 --check_existing If specified, checks for existing co-located variations in the
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1666 Ensembl Variation database [default: off]
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1667 --failed [0|1] Include (1) or exclude (0) variants that have been flagged as
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1668 failed by Ensembl when checking for existing variants.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1669 [default: exclude]
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1670 --check_alleles If specified, the alleles of existing co-located variations
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1671 are compared to the input; an existing variation will only
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1672 be reported if no novel allele is in the input (strand is
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1673 accounted for) [default: off]
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1674 --check_svs Report overlapping structural variants [default: off]
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1675
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1676 --filter [filters] Filter output by consequence type. Use this to output only
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1677 variants that have at least one consequence type matching the
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1678 filter. Multiple filters can be used separated by ",". By
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1679 combining this with --original it is possible to run the VEP
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1680 iteratively to progressively filter a set of variants. See
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1681 documentation for full details [default: off]
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1682
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1683 --check_frequency Turns on frequency filtering. Use this to include or exclude
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1684 variants based on the frequency of co-located existing
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1685 variants in the Ensembl Variation database. You must also
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1686 specify all of the following --freq flags [default: off]
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1687 --freq_pop [pop] Name of the population to use e.g. hapmap_ceu for CEU HapMap,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1688 1kg_yri for YRI 1000 genomes. See documentation for more
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1689 details
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1690 --freq_freq [freq] Frequency to use in filter. Must be a number between 0 and 0.5
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1691 --freq_gt_lt [gt|lt] Specify whether the frequency should be greater than (gt) or
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1692 less than (lt) --freq_freq
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1693 --freq_filter Specify whether variants that pass the above should be included
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1694 [exclude|include] or excluded from analysis
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1695 --gmaf Include global MAF of existing variant from 1000 Genomes
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1696 Phase 1 in output
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1697
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1698 --individual [id] Consider only alternate alleles present in the genotypes of the
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1699 specified individual(s). May be a single individual, a comma-
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1700 separated list or "all" to assess all individuals separately.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1701 Each individual and variant combination is given on a separate
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1702 line of output. Only works with VCF files containing individual
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1703 genotype data; individual IDs are taken from column headers.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1704 --allow_non_variant Prints out non-variant lines when using VCF input
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1705 --phased Force VCF individual genotypes to be interpreted as phased.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1706 For use with plugins that depend on phased state.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1707
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1708 --chr [list] Select a subset of chromosomes to analyse from your file. Any
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1709 data not on this chromosome in the input will be skipped. The
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1710 list can be comma separated, with "-" characters representing
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1711 a range e.g. 1-5,8,15,X [default: off]
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1712 --gp If specified, tries to read GRCh37 position from GP field in the
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1713 INFO column of a VCF file. Only applies when VCF is the input
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1714 format and human is the species [default: off]
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1715
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1716 --convert Convert the input file to the output format specified.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1717 [ensembl|vcf|pileup] Converted output is written to the file specified in
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1718 --output_file. No consequence calculation is carried out when
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1719 doing file conversion. [default: off]
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1720
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1721 --refseq Use the otherfeatures database to retrieve transcripts - this
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1722 database contains RefSeq transcripts (as well as CCDS and
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1723 Ensembl EST alignments) [default: off]
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1724 --host Manually define database host [default: "ensembldb.ensembl.org"]
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1725 -u | --user Database username [default: "anonymous"]
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1726 --port Database port [default: 5306]
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1727 --password Database password [default: no password]
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1728 --genomes Sets DB connection params for Ensembl Genomes [default: off]
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1729 --registry Registry file to use defines DB connections [default: off]
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1730 Defining a registry file overrides above connection settings.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1731 --db_version=[number] Force script to load DBs from a specific Ensembl version. Not
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1732 advised due to likely incompatibilities between API and DB
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1733
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1734 --no_whole_genome Run in old-style, non-whole genome mode [default: off]
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1735 --buffer_size Sets the number of variants sent in each batch [default: 5000]
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1736 Increasing buffer size can retrieve results more quickly
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1737 but requires more memory. Only applies to whole genome mode.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1738
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1739 --cache Enables read-only use of cache [default: off]
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1740 --dir [directory] Specify the base cache directory to use [default: "\$HOME/.vep/"]
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1741 --write_cache Enable writing to cache [default: off]
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1742 --build [all|list] Build a complete cache for the selected species. Build for all
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1743 chromosomes with --build all, or a list of chromosomes (see
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1744 --chr). DO NOT USE WHEN CONNECTED TO PUBLIC DB SERVERS AS THIS
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1745 VIOLATES OUR FAIR USAGE POLICY [default: off]
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1746
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1747 --compress Specify utility to decompress cache files - may be "gzcat" or
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1748 "gzip -dc" Only use if default does not work [default: zcat]
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1749
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1750 --skip_db_check ADVANCED! Force the script to use a cache built from a different
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1751 database than specified with --host. Only use this if you are
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1752 sure the hosts are compatible (e.g. ensembldb.ensembl.org and
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1753 useastdb.ensembl.org) [default: off]
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1754 --cache_region_size ADVANCED! The size in base-pairs of the region covered by one
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1755 file in the cache. [default: 1MB]
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1756 END
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1757
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1758 print $usage;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1759 }