annotate variant_effect_predictor/variant_effect_predictor.pl @ 1:d6778b5d8382 draft default tip

Deleted selected files
author willmclaren
date Fri, 03 Aug 2012 10:05:43 -0400
parents 21066c0abaf5
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1 #!/usr/bin/perl
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
2
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
3 =head1 LICENSE
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
4
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
5 Copyright (c) 1999-2012 The European Bioinformatics Institute and
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
6 Genome Research Limited. All rights reserved.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
7
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
8 This software is distributed under a modified Apache license.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
9 For license details, please see
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
10
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
11 http://www.ensembl.org/info/about/code_licence.html
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
12
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
13 =head1 CONTACT
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
14
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
15 Please email comments or questions to the public Ensembl
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
16 developers list at <dev@ensembl.org>.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
17
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
18 Questions may also be sent to the Ensembl help desk at
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
19 <helpdesk@ensembl.org>.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
20
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
21 =cut
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
22
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
23 =head1 NAME
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
24
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
25 Variant Effect Predictor - a script to predict the consequences of genomic variants
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
26
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
27 http://www.ensembl.org/info/docs/variation/vep/vep_script.html
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
28
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
29 Version 2.6
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
30
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
31 by Will McLaren (wm2@ebi.ac.uk)
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
32 =cut
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
33
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
34 use strict;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
35 use Getopt::Long;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
36 use FileHandle;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
37 use FindBin qw($Bin);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
38 use lib $Bin;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
39
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
40 use Bio::EnsEMBL::Variation::Utils::Sequence qw(unambiguity_code);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
41 use Bio::EnsEMBL::Variation::Utils::VEP qw(
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
42 parse_line
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
43 vf_to_consequences
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
44 validate_vf
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
45 convert_to_vcf
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
46 load_dumped_adaptor_cache
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
47 dump_adaptor_cache
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
48 get_all_consequences
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
49 get_slice
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
50 build_full_cache
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
51 read_cache_info
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
52 get_time
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
53 debug
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
54 @OUTPUT_COLS
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
55 @REG_FEAT_TYPES
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
56 %FILTER_SHORTCUTS
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
57 );
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
58
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
59 # global vars
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
60 my $VERSION = '2.6';
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
61
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
62
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
63 # define headers that would normally go in the extra field
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
64 # keyed on the config parameter used to turn it on
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
65 my %extra_headers = (
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
66 protein => ['ENSP'],
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
67 canonical => ['CANONICAL'],
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
68 ccds => ['CCDS'],
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
69 hgvs => ['HGVSc','HGVSp'],
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
70 hgnc => ['HGNC'],
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
71 sift => ['SIFT'],
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
72 polyphen => ['PolyPhen'],
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
73 numbers => ['EXON','INTRON'],
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
74 domains => ['DOMAINS'],
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
75 regulatory => ['MOTIF_NAME','MOTIF_POS','HIGH_INF_POS','MOTIF_SCORE_CHANGE'],
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
76 cell_type => ['CELL_TYPE'],
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
77 individual => ['IND'],
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
78 xref_refseq => ['RefSeq'],
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
79 check_svs => ['SV'],
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
80 check_frequency => ['FREQS'],
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
81 gmaf => ['GMAF'],
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
82 user => ['DISTANCE'],
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
83 );
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
84
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
85 my %extra_descs = (
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
86 'CANONICAL' => 'Indicates if transcript is canonical for this gene',
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
87 'CCDS' => 'Indicates if transcript is a CCDS transcript',
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
88 'HGNC' => 'HGNC gene identifier',
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
89 'ENSP' => 'Ensembl protein identifer',
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
90 'HGVSc' => 'HGVS coding sequence name',
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
91 'HGVSp' => 'HGVS protein sequence name',
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
92 'SIFT' => 'SIFT prediction',
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
93 'PolyPhen' => 'PolyPhen prediction',
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
94 'EXON' => 'Exon number(s) / total',
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
95 'INTRON' => 'Intron number(s) / total',
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
96 'DOMAINS' => 'The source and identifer of any overlapping protein domains',
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
97 'MOTIF_NAME' => 'The source and identifier of a transcription factor binding profile (TFBP) aligned at this position',
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
98 'MOTIF_POS' => 'The relative position of the variation in the aligned TFBP',
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
99 'HIGH_INF_POS' => 'A flag indicating if the variant falls in a high information position of the TFBP',
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
100 'MOTIF_SCORE_CHANGE' => 'The difference in motif score of the reference and variant sequences for the TFBP',
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
101 'CELL_TYPE' => 'List of cell types and classifications for regulatory feature',
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
102 'IND' => 'Individual name',
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
103 'SV' => 'IDs of overlapping structural variants',
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
104 'FREQS' => 'Frequencies of overlapping variants used in filtering',
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
105 'GMAF' => 'Minor allele and frequency of existing variation in 1000 Genomes Phase 1',
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
106 'DISTANCE' => 'Shortest distance from variant to transcript',
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
107 );
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
108
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
109 # set output autoflush for progress bars
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
110 $| = 1;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
111
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
112 # configure from command line opts
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
113 my $config = &configure(scalar @ARGV);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
114
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
115 # run the main sub routine
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
116 &main($config);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
117
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
118 # this is the main sub-routine - it needs the configured $config hash
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
119 sub main {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
120 my $config = shift;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
121
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
122 debug("Starting...") unless defined $config->{quiet};
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
123
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
124 $config->{start_time} = time();
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
125 $config->{last_time} = time();
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
126
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
127 my $tr_cache = {};
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
128 my $rf_cache = {};
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
129
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
130 # create a hash to hold slices so we don't get the same one twice
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
131 my %slice_cache = ();
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
132
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
133 my @vfs;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
134 my ($vf_count, $total_vf_count);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
135 my $in_file_handle = $config->{in_file_handle};
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
136
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
137 # initialize line number in config
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
138 $config->{line_number} = 0;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
139
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
140 # read the file
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
141 while(<$in_file_handle>) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
142 chomp;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
143
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
144 $config->{line_number}++;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
145
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
146 # header line?
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
147 if(/^\#/) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
148
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
149 # retain header lines if we are outputting VCF
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
150 if(defined($config->{vcf})) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
151 push @{$config->{headers}}, $_;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
152 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
153
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
154 # line with sample labels in VCF
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
155 if(defined($config->{individual}) && /^#CHROM/) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
156 my @split = split /\s+/;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
157
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
158 # no individuals
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
159 die("ERROR: No individual data found in VCF\n") if scalar @split <= 9;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
160
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
161 # get individual column indices
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
162 my %ind_cols = map {$split[$_] => $_} (9..$#split);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
163
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
164 # all?
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
165 if(scalar @{$config->{individual}} == 1 && $config->{individual}->[0] =~ /^all$/i) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
166 $config->{ind_cols} = \%ind_cols;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
167 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
168 else {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
169 my %new_ind_cols;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
170
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
171 # check we have specified individual(s)
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
172 foreach my $ind(@{$config->{individual}}) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
173 die("ERROR: Individual named \"$ind\" not found in VCF\n") unless defined $ind_cols{$ind};
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
174 $new_ind_cols{$ind} = $ind_cols{$ind};
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
175 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
176
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
177 $config->{ind_cols} = \%new_ind_cols;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
178 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
179 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
180
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
181 next;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
182 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
183
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
184 # configure output file
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
185 $config->{out_file_handle} ||= &get_out_file_handle($config);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
186
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
187 # some lines (pileup) may actually parse out into more than one variant
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
188 foreach my $vf(@{&parse_line($config, $_)}) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
189
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
190 $vf->{_line} = $_ ;#if defined($config->{vcf}) || defined($config->{original});
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
191
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
192 # now get the slice
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
193 if(!defined($vf->{slice})) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
194 my $slice;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
195
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
196 # don't get slices if we're using cache
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
197 # we can steal them from transcript objects later
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
198 if((!defined($config->{cache}) && !defined($config->{whole_genome})) || defined($config->{check_ref}) || defined($config->{convert})) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
199
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
200 # check if we have fetched this slice already
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
201 if(defined $slice_cache{$vf->{chr}}) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
202 $slice = $slice_cache{$vf->{chr}};
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
203 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
204
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
205 # if not create a new one
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
206 else {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
207
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
208 $slice = &get_slice($config, $vf->{chr});
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
209
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
210 # if failed, warn and skip this line
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
211 if(!defined($slice)) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
212 warn("WARNING: Could not fetch slice named ".$vf->{chr}." on line ".$config->{line_number}."\n") unless defined $config->{quiet};
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
213 next;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
214 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
215
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
216 # store the hash
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
217 $slice_cache{$vf->{chr}} = $slice;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
218 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
219 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
220
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
221 $vf->{slice} = $slice;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
222 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
223
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
224 # validate the VF
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
225 next unless validate_vf($config, $vf);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
226
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
227 # make a name if one doesn't exist
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
228 $vf->{variation_name} ||= $vf->{chr}.'_'.$vf->{start}.'_'.($vf->{allele_string} || $vf->{class_SO_term});
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
229
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
230 # jump out to convert here
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
231 if(defined($config->{convert})) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
232 &convert_vf($config, $vf);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
233 next;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
234 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
235
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
236 if(defined $config->{whole_genome}) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
237 push @vfs, $vf;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
238 $vf_count++;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
239 $total_vf_count++;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
240
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
241 if($vf_count == $config->{buffer_size}) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
242 debug("Read $vf_count variants into buffer") unless defined($config->{quiet});
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
243
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
244 print_line($config, $_) foreach @{get_all_consequences($config, \@vfs)};
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
245
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
246 # calculate stats
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
247 my $total_rate = sprintf("%.0f vars/sec", $total_vf_count / ((time() - $config->{start_time}) || 1));
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
248 my $rate = sprintf("%.0f vars/sec", $vf_count / ((time() - $config->{last_time}) || 1));
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
249 $config->{last_time} = time();
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
250
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
251 debug("Processed $total_vf_count total variants ($rate, $total_rate total)") unless defined($config->{quiet});
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
252
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
253 @vfs = ();
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
254 $vf_count = 0;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
255 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
256 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
257 else {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
258 print_line($config, $_) foreach @{vf_to_consequences($config, $vf)};
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
259 $vf_count++;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
260 $total_vf_count++;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
261 debug("Processed $vf_count variants") if $vf_count =~ /0$/ && defined($config->{verbose});
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
262 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
263 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
264 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
265
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
266 # if in whole-genome mode, finish off the rest of the buffer
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
267 if(defined $config->{whole_genome} && scalar @vfs) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
268 debug("Read $vf_count variants into buffer") unless defined($config->{quiet});
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
269
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
270 print_line($config, $_) foreach @{get_all_consequences($config, \@vfs)};
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
271
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
272 # calculate stats
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
273 my $total_rate = sprintf("%.0f vars/sec", $total_vf_count / ((time() - $config->{start_time}) || 1));
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
274 my $rate = sprintf("%.0f vars/sec", $vf_count / ((time() - $config->{last_time}) || 1));
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
275 $config->{last_time} = time();
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
276
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
277 debug("Processed $total_vf_count total variants ($rate, $total_rate total)") unless defined($config->{quiet});
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
278
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
279 debug($config->{filter_count}, "/$total_vf_count variants remain after filtering") if defined($config->{filter}) && !defined($config->{quiet});
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
280 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
281
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
282 debug("Executed ", defined($Bio::EnsEMBL::DBSQL::StatementHandle::count_queries) ? $Bio::EnsEMBL::DBSQL::StatementHandle::count_queries : 'unknown number of', " SQL statements") if defined($config->{count_queries}) && !defined($config->{quiet});
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
283
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
284 debug("Finished!") unless defined $config->{quiet};
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
285 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
286
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
287 # sets up configuration hash that is used throughout the script
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
288 sub configure {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
289 my $args = shift;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
290
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
291 my $config = {};
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
292
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
293 GetOptions(
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
294 $config,
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
295 'help', # displays help message
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
296
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
297 # input options,
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
298 'config=s', # config file name
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
299 'input_file|i=s', # input file name
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
300 'format=s', # input file format
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
301
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
302 # DB options
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
303 'species=s', # species e.g. human, homo_sapiens
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
304 'registry=s', # registry file
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
305 'host=s', # database host
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
306 'port=s', # database port
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
307 'user=s', # database user name
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
308 'password=s', # database password
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
309 'db_version=i', # Ensembl database version to use e.g. 62
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
310 'genomes', # automatically sets DB params for e!Genomes
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
311 'refseq', # use otherfeatures RefSeq DB instead of Ensembl
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
312 #'no_disconnect', # disables disconnect_when_inactive
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
313
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
314 # runtime options
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
315 'most_severe', # only return most severe consequence
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
316 'summary', # only return one line per variation with all consquence types
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
317 'per_gene', # only return most severe per gene
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
318 'buffer_size=i', # number of variations to read in before analysis
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
319 'chunk_size=s', # size in bases of "chunks" used in internal hash structure
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
320 'failed=i', # include failed variations when finding existing
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
321 'no_whole_genome', # disables now default whole-genome mode
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
322 'whole_genome', # proxy for whole genome mode - now just warns user
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
323 'gp', # read coords from GP part of INFO column in VCF (probably only relevant to 1KG)
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
324 'chr=s', # analyse only these chromosomes, e.g. 1-5,10,MT
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
325 'check_ref', # check supplied reference allele against DB
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
326 'check_existing', # find existing co-located variations
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
327 'check_svs', # find overlapping structural variations
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
328 'check_alleles', # only attribute co-located if alleles are the same
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
329 'check_frequency', # enable frequency checking
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
330 'gmaf', # add global MAF of existing var
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
331 'freq_filter=s', # exclude or include
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
332 'freq_freq=f', # frequency to filter on
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
333 'freq_gt_lt=s', # gt or lt (greater than or less than)
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
334 'freq_pop=s', # population to filter on
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
335 'allow_non_variant', # allow non-variant VCF lines through
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
336 'individual=s', # give results by genotype for individuals
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
337 'phased', # force VCF genotypes to be interpreted as phased
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
338 'fork=i', # fork into N processes
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
339
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
340 # verbosity options
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
341 'verbose|v', # print out a bit more info while running
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
342 'quiet', # print nothing to STDOUT (unless using -o stdout)
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
343 'no_progress', # don't display progress bars
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
344
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
345 # output options
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
346 'everything|e', # switch on EVERYTHING :-)
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
347 'output_file|o=s', # output file name
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
348 'force_overwrite', # force overwrite of output file if already exists
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
349 'terms|t=s', # consequence terms to use e.g. NCBI, SO
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
350 'coding_only', # only return results for consequences in coding regions
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
351 'canonical', # indicates if transcript is canonical
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
352 'ccds', # output CCDS identifer
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
353 'xref_refseq', # output refseq mrna xref
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
354 'protein', # add e! protein ID to extra column
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
355 'hgnc', # add HGNC gene ID to extra column
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
356 'hgvs', # add HGVS names to extra column
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
357 'sift=s', # SIFT predictions
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
358 'polyphen=s', # PolyPhen predictions
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
359 'condel=s', # Condel predictions
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
360 'regulatory', # enable regulatory stuff
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
361 'cell_type=s', # filter cell types for regfeats
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
362 'convert=s', # convert input to another format (doesn't run VEP)
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
363 'filter=s', # run in filtering mode
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
364 'no_intergenic', # don't print out INTERGENIC consequences
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
365 'gvf', # produce gvf output
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
366 'vcf', # produce vcf output
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
367 'original', # produce output in input format
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
368 'no_consequences', # don't calculate consequences
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
369 'lrg', # enable LRG-based features
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
370 'fields=s', # define your own output fields
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
371 'domains', # output overlapping protein features
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
372 'numbers', # include exon and intron numbers
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
373
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
374 # cache stuff
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
375 'cache', # use cache
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
376 'write_cache', # enables writing to the cache
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
377 'build=s', # builds cache from DB from scratch; arg is either all (all top-level seqs) or a list of chrs
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
378 'no_adaptor_cache', # don't write adaptor cache
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
379 'prefetch', # prefetch exons, translation, introns, codon table etc for each transcript
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
380 'strip', # strips adaptors etc from objects before caching them
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
381 'rebuild=s', # rebuilds cache by reading in existing then redumping - probably don't need to use this any more
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
382 'dir=s', # dir where cache is found (defaults to $HOME/.vep/)
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
383 'cache_region_size=i', # size of region in bases for each cache file
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
384 'no_slice_cache', # tell API not to cache features on slice
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
385 'standalone', # standalone renamed offline
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
386 'offline', # offline mode uses minimal set of modules installed in same dir, no DB connection
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
387 'skip_db_check', # don't compare DB parameters with cached
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
388 'compress=s', # by default we use zcat to decompress; user may want to specify gzcat or "gzip -dc"
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
389 'custom=s' => ($config->{custom} ||= []), # specify custom tabixed bgzipped file with annotation
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
390 'tmpdir=s', # tmp dir used for BigWig retrieval
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
391 'plugin=s' => ($config->{plugin} ||= []), # specify a method in a module in the plugins directory
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
392
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
393 # debug
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
394 'cluck', # these two need some mods to Bio::EnsEMBL::DBSQL::StatementHandle to work. Clucks callback trace and SQL
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
395 'count_queries', # counts SQL queries executed
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
396 'admin', # allows me to build off public hosts
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
397 'debug', # print out debug info
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
398 'tabix', # experimental use tabix cache files
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
399 ) or die "ERROR: Failed to parse command-line flags\n";
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
400
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
401 # print usage message if requested or no args supplied
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
402 if(defined($config->{help}) || !$args) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
403 &usage;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
404 exit(0);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
405 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
406
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
407 # dir is where the cache and plugins live
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
408 $config->{dir} ||= join '/', ($ENV{'HOME'}, '.vep');
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
409
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
410 # dir gets set to the specific cache directory later on, so take a copy to use
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
411 # when configuring plugins
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
412
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
413 $config->{toplevel_dir} = $config->{dir};
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
414
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
415 # ini file?
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
416 my $ini_file = $config->{dir}.'/vep.ini';
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
417
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
418 if(-e $ini_file) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
419 read_config_from_file($config, $ini_file);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
420 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
421
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
422 # config file?
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
423 if(defined $config->{config}) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
424 read_config_from_file($config, $config->{config});
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
425 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
426
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
427 # can't be both quiet and verbose
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
428 die "ERROR: Can't be both quiet and verbose!\n" if defined($config->{quiet}) && defined($config->{verbose});
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
429
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
430 # check forking
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
431 if(defined($config->{fork})) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
432 die "ERROR: Fork number must be greater than 1\n" if $config->{fork} <= 1;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
433
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
434 # check we can use MIME::Base64
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
435 eval q{ use MIME::Base64; };
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
436
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
437 if($@) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
438 debug("WARNING: Unable to load MIME::Base64, forking disabled") unless defined($config->{quiet});
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
439 delete $config->{fork};
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
440 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
441 else {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
442
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
443 # try a practice fork
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
444 my $pid = fork;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
445
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
446 if(!defined($pid)) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
447 debug("WARNING: Fork test failed, forking disabled") unless defined($config->{quiet});
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
448 delete $config->{fork};
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
449 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
450 elsif($pid) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
451 waitpid($pid, 0);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
452 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
453 elsif($pid == 0) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
454 exit(0);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
455 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
456 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
457 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
458
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
459 # check file format
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
460 if(defined $config->{format}) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
461 die "ERROR: Unrecognised input format specified \"".$config->{format}."\"\n" unless $config->{format} =~ /^(pileup|vcf|guess|hgvs|ensembl|id|vep)$/i;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
462 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
463
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
464 # check convert format
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
465 if(defined $config->{convert}) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
466 die "ERROR: Unrecognised output format for conversion specified \"".$config->{convert}."\"\n" unless $config->{convert} =~ /vcf|ensembl|pileup|hgvs/i;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
467 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
468
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
469 # check if user still using --standalone
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
470 if(defined $config->{standalone}) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
471 die "ERROR: --standalone replaced by --offline\n";
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
472 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
473
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
474 # connection settings for Ensembl Genomes
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
475 if($config->{genomes}) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
476 $config->{host} ||= 'mysql.ebi.ac.uk';
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
477 $config->{port} ||= 4157;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
478 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
479
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
480 # connection settings for main Ensembl
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
481 else {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
482 $config->{species} ||= "homo_sapiens";
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
483 $config->{host} ||= 'ensembldb.ensembl.org';
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
484 $config->{port} ||= 5306;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
485 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
486
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
487 # refseq or core?
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
488 if(defined($config->{refseq})) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
489 $config->{core_type} = 'otherfeatures';
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
490 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
491 else {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
492 $config->{core_type} = 'core';
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
493 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
494
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
495 # output term
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
496 if(defined $config->{terms}) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
497 die "ERROR: Unrecognised consequence term type specified \"".$config->{terms}."\" - must be one of ensembl, so, ncbi\n" unless $config->{terms} =~ /ensembl|display|so|ncbi/i;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
498 if($config->{terms} =~ /ensembl|display/i) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
499 $config->{terms} = 'display';
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
500 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
501 else {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
502 $config->{terms} = uc($config->{terms});
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
503 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
504 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
505
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
506 # everything?
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
507 if(defined($config->{everything})) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
508 my %everything = (
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
509 sift => 'b',
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
510 polyphen => 'b',
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
511 ccds => 1,
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
512 hgvs => 1,
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
513 hgnc => 1,
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
514 numbers => 1,
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
515 domains => 1,
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
516 regulatory => 1,
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
517 canonical => 1,
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
518 protein => 1,
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
519 gmaf => 1,
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
520 );
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
521
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
522 $config->{$_} = $everything{$_} for keys %everything;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
523
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
524 # these ones won't work with offline
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
525 delete $config->{hgvs} if defined($config->{offline});
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
526 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
527
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
528 # check nsSNP tools
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
529 foreach my $tool(grep {defined $config->{lc($_)}} qw(SIFT PolyPhen Condel)) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
530 die "ERROR: Unrecognised option for $tool \"", $config->{lc($tool)}, "\" - must be one of p (prediction), s (score) or b (both)\n" unless $config->{lc($tool)} =~ /^(s|p|b)/;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
531
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
532 die "ERROR: $tool not available for this species\n" unless $config->{species} =~ /human|homo/i;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
533
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
534 die "ERROR: $tool functionality is now available as a VEP Plugin - see http://www.ensembl.org/info/docs/variation/vep/vep_script.html#plugins\n" if $tool eq 'Condel';
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
535 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
536
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
537 # force quiet if outputting to STDOUT
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
538 if(defined($config->{output_file}) && $config->{output_file} =~ /stdout/i) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
539 delete $config->{verbose} if defined($config->{verbose});
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
540 $config->{quiet} = 1;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
541 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
542
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
543 # individual(s) specified?
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
544 if(defined($config->{individual})) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
545 $config->{individual} = [split /\,/, $config->{individual}];
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
546
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
547 # force allow_non_variant
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
548 $config->{allow_non_variant} = 1;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
549 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
550
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
551 # summarise options if verbose
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
552 if(defined $config->{verbose}) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
553 my $header =<<INTRO;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
554 #----------------------------------#
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
555 # ENSEMBL VARIANT EFFECT PREDICTOR #
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
556 #----------------------------------#
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
557
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
558 version $VERSION
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
559
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
560 By Will McLaren (wm2\@ebi.ac.uk)
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
561
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
562 Configuration options:
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
563
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
564 INTRO
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
565 print $header;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
566
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
567 my $max_length = (sort {$a <=> $b} map {length($_)} keys %$config)[-1];
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
568
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
569 foreach my $key(sort keys %$config) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
570 next if ref($config->{$key}) eq 'ARRAY' && scalar @{$config->{$key}} == 0;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
571 print $key.(' ' x (($max_length - length($key)) + 4)).(ref($config->{$key}) eq 'ARRAY' ? join "\t", @{$config->{$key}} : $config->{$key})."\n";
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
572 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
573
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
574 print "\n".("-" x 20)."\n\n";
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
575 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
576
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
577 # check custom annotations
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
578 for my $i(0..$#{$config->{custom}}) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
579 my $custom = $config->{custom}->[$i];
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
580
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
581 my ($filepath, $shortname, $format, $type, $coords) = split /\,/, $custom;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
582 $type ||= 'exact';
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
583 $format ||= 'bed';
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
584 $coords ||= 0;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
585
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
586 # check type
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
587 die "ERROR: Type $type for custom annotation file $filepath is not allowed (must be one of \"exact\", \"overlap\")\n" unless $type =~ /exact|overlap/;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
588
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
589 # check format
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
590 die "ERROR: Format $format for custom annotation file $filepath is not allowed (must be one of \"bed\", \"vcf\", \"gtf\", \"gff\", \"bigwig\")\n" unless $format =~ /bed|vcf|gff|gtf|bigwig/;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
591
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
592 # bigwig format
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
593 if($format eq 'bigwig') {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
594 # check for bigWigToWig
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
595 die "ERROR: bigWigToWig does not seem to be in your path - this is required to use bigwig format custom annotations\n" unless `which bigWigToWig 2>&1` =~ /bigWigToWig$/;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
596 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
597
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
598 else {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
599 # check for tabix
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
600 die "ERROR: tabix does not seem to be in your path - this is required to use custom annotations\n" unless `which tabix 2>&1` =~ /tabix$/;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
601
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
602 # remote files?
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
603 if($filepath =~ /tp\:\/\//) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
604 my $remote_test = `tabix $filepath 1:1-1 2>&1`;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
605 if($remote_test =~ /fail/) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
606 die "$remote_test\nERROR: Could not find file or index file for remote annotation file $filepath\n";
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
607 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
608 elsif($remote_test =~ /get_local_version/) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
609 debug("Downloaded tabix index file for remote annotation file $filepath") unless defined($config->{quiet});
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
610 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
611 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
612
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
613 # check files exist
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
614 else {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
615 die "ERROR: Custom annotation file $filepath not found\n" unless -e $filepath;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
616 die "ERROR: Tabix index file $filepath\.tbi not found - perhaps you need to create it first?\n" unless -e $filepath.'.tbi';
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
617 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
618 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
619
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
620 $config->{custom}->[$i] = {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
621 'file' => $filepath,
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
622 'name' => $shortname || 'CUSTOM'.($i + 1),
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
623 'type' => $type,
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
624 'format' => $format,
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
625 'coords' => $coords,
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
626 };
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
627 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
628
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
629 # check if using filter and original
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
630 die "ERROR: You must also provide output filters using --filter to use --original\n" if defined($config->{original}) && !defined($config->{filter});
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
631
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
632 # filter by consequence?
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
633 if(defined($config->{filter})) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
634
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
635 my %filters = map {$_ => 1} split /\,/, $config->{filter};
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
636
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
637 # add in shortcuts
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
638 foreach my $filter(keys %filters) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
639 my $value = 1;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
640 if($filter =~ /^no_/) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
641 delete $filters{$filter};
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
642 $filter =~ s/^no_//g;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
643 $value = 0;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
644 $filters{$filter} = $value;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
645 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
646
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
647 if(defined($FILTER_SHORTCUTS{$filter})) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
648 delete $filters{$filter};
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
649 $filters{$_} = $value for keys %{$FILTER_SHORTCUTS{$filter}};
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
650 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
651 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
652
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
653 $config->{filter} = \%filters;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
654
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
655 $config->{filter_count} = 0;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
656 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
657
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
658 # set defaults
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
659 $config->{user} ||= 'anonymous';
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
660 $config->{buffer_size} ||= 5000;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
661 $config->{chunk_size} ||= '50kb';
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
662 $config->{output_file} ||= "variant_effect_output.txt";
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
663 $config->{tmpdir} ||= '/tmp';
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
664 $config->{format} ||= 'guess';
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
665 $config->{terms} ||= 'SO';
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
666 $config->{cache_region_size} ||= 1000000;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
667 $config->{compress} ||= 'zcat';
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
668
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
669 # regulatory has to be on for cell_type
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
670 if(defined($config->{cell_type})) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
671 $config->{regulatory} = 1;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
672 $config->{cell_type} = [split /\,/, $config->{cell_type}] if defined($config->{cell_type});
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
673 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
674
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
675 # can't use a whole bunch of options with most_severe
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
676 if(defined($config->{most_severe})) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
677 foreach my $flag(qw(no_intergenic protein hgnc sift polyphen coding_only ccds canonical xref_refseq numbers domains summary)) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
678 die "ERROR: --most_severe is not compatible with --$flag\n" if defined($config->{$flag});
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
679 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
680 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
681
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
682 # can't use a whole bunch of options with summary
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
683 if(defined($config->{summary})) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
684 foreach my $flag(qw(no_intergenic protein hgnc sift polyphen coding_only ccds canonical xref_refseq numbers domains most_severe)) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
685 die "ERROR: --summary is not compatible with --$flag\n" if defined($config->{$flag});
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
686 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
687 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
688
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
689 # frequency filtering
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
690 if(defined($config->{check_frequency})) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
691 foreach my $flag(qw(freq_freq freq_filter freq_pop freq_gt_lt)) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
692 die "ERROR: To use --check_frequency you must also specify flag --$flag\n" unless defined $config->{$flag};
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
693 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
694
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
695 # need to set check_existing
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
696 $config->{check_existing} = 1;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
697 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
698
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
699 $config->{check_existing} = 1 if defined $config->{check_alleles} || defined $config->{gmaf};
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
700
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
701 # warn users still using whole_genome flag
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
702 if(defined($config->{whole_genome})) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
703 debug("INFO: Whole-genome mode is now the default run-mode for the script. To disable it, use --no_whole_genome") unless defined($config->{quiet});
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
704 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
705
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
706 $config->{whole_genome} = 1 unless defined $config->{no_whole_genome};
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
707 $config->{failed} = 0 unless defined $config->{failed};
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
708 $config->{chunk_size} =~ s/mb?/000000/i;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
709 $config->{chunk_size} =~ s/kb?/000/i;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
710 $config->{cache_region_size} =~ s/mb?/000000/i;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
711 $config->{cache_region_size} =~ s/kb?/000/i;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
712
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
713 # cluck and display executed SQL?
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
714 $Bio::EnsEMBL::DBSQL::StatementHandle::cluck = 1 if defined($config->{cluck});
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
715
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
716 # offline needs cache, can't use HGVS
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
717 if(defined($config->{offline})) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
718 $config->{cache} = 1;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
719
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
720 #die("ERROR: Cannot generate HGVS coordinates in offline mode\n") if defined($config->{hgvs});
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
721 die("ERROR: Cannot use HGVS as input in offline mode\n") if $config->{format} eq 'hgvs';
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
722 die("ERROR: Cannot use variant identifiers as input in offline mode\n") if $config->{format} eq 'id';
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
723 die("ERROR: Cannot do frequency filtering in offline mode\n") if defined($config->{check_frequency});
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
724 die("ERROR: Cannot retrieve overlapping structural variants in offline mode\n") if defined($config->{check_sv});
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
725 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
726
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
727 # write_cache needs cache
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
728 $config->{cache} = 1 if defined $config->{write_cache};
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
729
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
730 # no_slice_cache, prefetch and whole_genome have to be on to use cache
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
731 if(defined($config->{cache})) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
732 $config->{prefetch} = 1;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
733 $config->{no_slice_cache} = 1;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
734 $config->{whole_genome} = 1;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
735 $config->{strip} = 1;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
736 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
737
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
738 $config->{build} = $config->{rebuild} if defined($config->{rebuild});
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
739
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
740 # force options for full build
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
741 if(defined($config->{build})) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
742 $config->{prefetch} = 1;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
743 $config->{hgnc} = 1;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
744 $config->{no_slice_cache} = 1;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
745 $config->{cache} = 1;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
746 $config->{strip} = 1;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
747 $config->{write_cache} = 1;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
748 $config->{cell_type} = 1 if defined($config->{regulatory});
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
749 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
750
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
751 # connect to databases
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
752 $config->{reg} = &connect_to_dbs($config);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
753
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
754 # complete dir with species name and db_version
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
755 $config->{dir} .= '/'.(
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
756 join '/', (
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
757 defined($config->{offline}) ? $config->{species} : ($config->{reg}->get_alias($config->{species}) || $config->{species}),
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
758 $config->{db_version} || $config->{reg}->software_version
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
759 )
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
760 );
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
761
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
762 # warn user cache directory doesn't exist
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
763 if(!-e $config->{dir}) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
764
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
765 # if using write_cache
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
766 if(defined($config->{write_cache})) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
767 debug("INFO: Cache directory ", $config->{dir}, " not found - it will be created") unless defined($config->{quiet});
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
768 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
769
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
770 # want to read cache, not found
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
771 elsif(defined($config->{cache})) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
772 die("ERROR: Cache directory ", $config->{dir}, " not found");
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
773 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
774 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
775
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
776 if(defined($config->{cache})) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
777 # read cache info
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
778 if(read_cache_info($config)) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
779 debug("Read existing cache info") unless defined $config->{quiet};
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
780 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
781 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
782
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
783 # we configure plugins here because they can sometimes switch on the
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
784 # regulatory config option
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
785 configure_plugins($config);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
786
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
787 # include regulatory modules if requested
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
788 if(defined($config->{regulatory})) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
789 # do the use statements here so that users don't have to have the
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
790 # funcgen API installed to use the rest of the script
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
791 eval q{
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
792 use Bio::EnsEMBL::Funcgen::DBSQL::RegulatoryFeatureAdaptor;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
793 use Bio::EnsEMBL::Funcgen::DBSQL::MotifFeatureAdaptor;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
794 use Bio::EnsEMBL::Funcgen::MotifFeature;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
795 use Bio::EnsEMBL::Funcgen::RegulatoryFeature;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
796 use Bio::EnsEMBL::Funcgen::BindingMatrix;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
797 };
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
798
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
799 if($@) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
800 die("ERROR: Ensembl Funcgen API must be installed to use --regulatory or plugins that deal with regulatory features\n");
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
801 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
802 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
803
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
804 # user defined custom output fields
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
805 if(defined($config->{fields})) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
806 $config->{fields} = [split ',', $config->{fields}];
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
807 debug("Output fields redefined (".scalar @{$config->{fields}}." defined)") unless defined($config->{quiet});
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
808 $config->{fields_redefined} = 1;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
809 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
810 $config->{fields} ||= \@OUTPUT_COLS;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
811
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
812 # suppress warnings that the FeatureAdpators spit if using no_slice_cache
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
813 Bio::EnsEMBL::Utils::Exception::verbose(1999) if defined($config->{no_slice_cache});
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
814
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
815 # get adaptors (don't get them in offline mode)
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
816 unless(defined($config->{offline})) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
817
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
818 if(defined($config->{cache}) && !defined($config->{write_cache})) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
819
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
820 # try and load adaptors from cache
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
821 if(!&load_dumped_adaptor_cache($config)) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
822 &get_adaptors($config);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
823 &dump_adaptor_cache($config) if defined($config->{write_cache}) && !defined($config->{no_adaptor_cache});
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
824 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
825
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
826 # check cached adaptors match DB params
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
827 else {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
828 my $dbc = $config->{sa}->{dbc};
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
829
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
830 my $ok = 1;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
831
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
832 if($dbc->{_host} ne $config->{host}) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
833
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
834 # ens-livemirror, useastdb and ensembldb should all have identical DBs
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
835 unless(
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
836 (
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
837 $dbc->{_host} eq 'ens-livemirror'
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
838 || $dbc->{_host} eq 'ensembldb.ensembl.org'
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
839 || $dbc->{_host} eq 'useastdb.ensembl.org'
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
840 ) && (
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
841 $config->{host} eq 'ens-livemirror'
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
842 || $config->{host} eq 'ensembldb.ensembl.org'
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
843 || $config->{host} eq 'useastdb.ensembl.org'
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
844 )
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
845 ) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
846 $ok = 0;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
847 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
848
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
849 unless(defined($config->{skip_db_check})) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
850 # but we still need to reconnect
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
851 debug("INFO: Defined host ", $config->{host}, " is different from cached ", $dbc->{_host}, " - reconnecting to host") unless defined($config->{quiet});
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
852
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
853 &get_adaptors($config);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
854 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
855 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
856
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
857 if(!$ok) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
858 if(defined($config->{skip_db_check})) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
859 debug("INFO: Defined host ", $config->{host}, " is different from cached ", $dbc->{_host}) unless defined($config->{quiet});
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
860 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
861 else {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
862 die "ERROR: Defined host ", $config->{host}, " is different from cached ", $dbc->{_host}, ". If you are sure this is OK, rerun with -skip_db_check flag set";
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
863 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
864 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
865 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
866 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
867 else {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
868 &get_adaptors($config);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
869 &dump_adaptor_cache($config) if defined($config->{write_cache}) && !defined($config->{no_adaptor_cache});
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
870 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
871
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
872 # reg adaptors (only fetches if not retrieved from cache already)
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
873 &get_reg_adaptors($config) if defined($config->{regulatory});
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
874 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
875
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
876 # check cell types
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
877 if(defined($config->{cell_type}) && !defined($config->{build})) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
878 my $cls = '';
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
879
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
880 if(defined($config->{cache})) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
881 $cls = $config->{cache_cell_types};
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
882 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
883 else {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
884 my $cta = $config->{RegulatoryFeature_adaptor}->db->get_CellTypeAdaptor();
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
885 $cls = join ",", map {$_->name} @{$cta->fetch_all};
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
886 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
887
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
888 foreach my $cl(@{$config->{cell_type}}) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
889 die "ERROR: cell type $cl not recognised; available cell types are:\n$cls\n" unless $cls =~ /(^|,)$cl(,|$)/;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
890 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
891 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
892
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
893 # get terminal width for progress bars
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
894 unless(defined($config->{quiet})) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
895 my $width;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
896
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
897 # module may not be installed
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
898 eval q{
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
899 use Term::ReadKey;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
900 };
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
901
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
902 if(!$@) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
903 my ($w, $h);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
904
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
905 # module may be installed, but e.g.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
906 eval {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
907 ($w, $h) = GetTerminalSize();
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
908 };
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
909
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
910 $width = $w if defined $w;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
911 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
912
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
913 $width ||= 60;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
914 $width -= 12;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
915 $config->{terminal_width} = $width;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
916 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
917
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
918 # jump out to build cache if requested
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
919 if(defined($config->{build})) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
920
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
921 if($config->{host} =~ /^(ensembl|useast)db\.ensembl\.org$/ && !defined($config->{admin})) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
922 die("ERROR: Cannot build cache using public database server ", $config->{host}, "\n");
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
923 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
924
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
925 # build the cache
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
926 debug("Building cache for ".$config->{species}) unless defined($config->{quiet});
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
927 build_full_cache($config);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
928
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
929 # exit script
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
930 debug("Finished building cache") unless defined($config->{quiet});
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
931 exit(0);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
932 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
933
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
934
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
935 # warn user DB will be used for SIFT/PolyPhen/HGVS/frequency/LRG
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
936 if(defined($config->{cache})) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
937
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
938 # these two def depend on DB
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
939 foreach my $param(grep {defined $config->{$_}} qw(hgvs check_frequency lrg check_sv)) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
940 debug("INFO: Database will be accessed when using --$param") unless defined($config->{quiet});
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
941 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
942
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
943 # as does using HGVS or IDs as input
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
944 debug("INFO: Database will be accessed when using --format ", $config->{format}) if ($config->{format} eq 'id' || $config->{format} eq 'hgvs') && !defined($config->{quiet});
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
945
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
946 # the rest may be in the cache
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
947 foreach my $param(grep {defined $config->{$_}} qw(sift polyphen regulatory)) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
948 next if defined($config->{'cache_'.$param});
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
949 debug("INFO: Database will be accessed when using --$param; consider using the complete cache containing $param data (see documentation for details)") unless defined($config->{quiet});
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
950 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
951 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
952
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
953 # get list of chrs if supplied
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
954 if(defined($config->{chr})) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
955 my %chrs;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
956
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
957 foreach my $val(split /\,/, $config->{chr}) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
958 my @nnn = split /\-/, $val;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
959
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
960 foreach my $chr($nnn[0]..$nnn[-1]) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
961 $chrs{$chr} = 1;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
962 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
963 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
964
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
965 $config->{chr} = \%chrs;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
966 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
967
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
968 # get input file handle
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
969 $config->{in_file_handle} = &get_in_file_handle($config);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
970
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
971 return $config;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
972 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
973
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
974 # reads config from a file
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
975 sub read_config_from_file {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
976 my $config = shift;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
977 my $file = shift;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
978
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
979 open CONFIG, $file or die "ERROR: Could not open config file \"$file\"\n";
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
980
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
981 while(<CONFIG>) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
982 next if /^\#/;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
983 my @split = split /\s+|\=/;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
984 my $key = shift @split;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
985 $key =~ s/^\-//g;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
986
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
987 if(defined($config->{$key}) && ref($config->{$key}) eq 'ARRAY') {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
988 push @{$config->{$key}}, @split;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
989 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
990 else {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
991 $config->{$key} ||= $split[0];
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
992 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
993 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
994
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
995 close CONFIG;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
996
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
997 # force quiet if outputting to STDOUT
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
998 if(defined($config->{output_file}) && $config->{output_file} =~ /stdout/i) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
999 delete $config->{verbose} if defined($config->{verbose});
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1000 $config->{quiet} = 1;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1001 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1002
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1003 debug("Read configuration from $file") unless defined($config->{quiet});
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1004 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1005
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1006 # configures custom VEP plugins
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1007 sub configure_plugins {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1008
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1009 my $config = shift;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1010
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1011 $config->{plugins} = [];
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1012
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1013 if (my @plugins = @{ $config->{plugin} }) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1014
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1015 # add the Plugins directory onto @INC
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1016
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1017 unshift @INC, $config->{toplevel_dir}."/Plugins";
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1018
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1019 for my $plugin (@plugins) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1020
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1021 # parse out the module name and parameters
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1022
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1023 my ($module, @params) = split /,/, $plugin;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1024
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1025 # check we can use the module
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1026
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1027 eval qq{
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1028 use $module;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1029 };
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1030 if ($@) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1031 debug("Failed to compile plugin $module: $@") unless defined($config->{quiet});
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1032 next;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1033 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1034
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1035 # now check we can instantiate it, passing any parameters to the constructor
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1036
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1037 my $instance;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1038
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1039 eval {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1040 $instance = $module->new($config, @params);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1041 };
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1042 if ($@) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1043 debug("Failed to instantiate plugin $module: $@") unless defined($config->{quiet});
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1044 next;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1045 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1046
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1047 # check that the versions match
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1048
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1049 my $plugin_version;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1050
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1051 if ($instance->can('version')) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1052 $plugin_version = $instance->version;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1053 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1054
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1055 my $version_ok = 1;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1056
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1057 if ($plugin_version) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1058 my ($plugin_major, $plugin_minor, $plugin_maintenance) = split /\./, $plugin_version;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1059 my ($major, $minor, $maintenance) = split /\./, $VERSION;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1060
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1061 if ($plugin_major != $major) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1062 debug("Warning: plugin $plugin version ($plugin_version) does not match the current VEP version ($VERSION)") unless defined($config->{quiet});
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1063 $version_ok = 0;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1064 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1065 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1066 else {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1067 debug("Warning: plugin $plugin does not define a version number") unless defined($config->{quiet});
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1068 $version_ok = 0;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1069 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1070
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1071 debug("You may experience unexpected behaviour with this plugin") unless defined($config->{quiet}) || $version_ok;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1072
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1073 # check that it implements all necessary methods
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1074
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1075 for my $required(qw(run get_header_info check_feature_type check_variant_feature_type)) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1076 unless ($instance->can($required)) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1077 debug("Plugin $module doesn't implement a required method '$required', does it inherit from BaseVepPlugin?") unless defined($config->{quiet});
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1078 next;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1079 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1080 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1081
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1082 # all's good, so save the instance in our list of plugins
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1083
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1084 push @{ $config->{plugins} }, $instance;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1085
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1086 debug("Loaded plugin: $module") unless defined($config->{quiet});
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1087
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1088 # for convenience, check if the plugin wants regulatory stuff and turn on the config option if so
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1089
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1090 if (grep { $_ =~ /motif|regulatory/i } @{ $instance->feature_types }) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1091 debug("Fetching regulatory features for plugin: $module") unless defined($config->{quiet});
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1092 $config->{regulatory} = 1;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1093 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1094 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1095 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1096 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1097
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1098 # connects to DBs (not done in offline mode)
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1099 sub connect_to_dbs {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1100 my $config = shift;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1101
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1102 # get registry
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1103 my $reg = 'Bio::EnsEMBL::Registry';
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1104
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1105 unless(defined($config->{offline})) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1106 # load DB options from registry file if given
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1107 if(defined($config->{registry})) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1108 debug("Loading DB config from registry file ", $config->{registry}) unless defined($config->{quiet});
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1109 $reg->load_all(
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1110 $config->{registry},
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1111 $config->{verbose},
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1112 undef,
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1113 $config->{no_slice_cache}
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1114 );
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1115 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1116
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1117 # otherwise manually connect to DB server
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1118 else {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1119 $reg->load_registry_from_db(
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1120 -host => $config->{host},
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1121 -user => $config->{user},
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1122 -pass => $config->{password},
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1123 -port => $config->{port},
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1124 -db_version => $config->{db_version},
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1125 -species => $config->{species} =~ /^[a-z]+\_[a-z]+/i ? $config->{species} : undef,
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1126 -verbose => $config->{verbose},
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1127 -no_cache => $config->{no_slice_cache},
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1128 );
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1129 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1130
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1131 eval { $reg->set_reconnect_when_lost() };
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1132
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1133 if(defined($config->{verbose})) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1134 # get a meta container adaptors to check version
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1135 my $core_mca = $reg->get_adaptor($config->{species}, 'core', 'metacontainer');
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1136 my $var_mca = $reg->get_adaptor($config->{species}, 'variation', 'metacontainer');
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1137
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1138 if($core_mca && $var_mca) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1139 debug(
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1140 "Connected to core version ", $core_mca->get_schema_version, " database ",
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1141 "and variation version ", $var_mca->get_schema_version, " database"
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1142 );
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1143 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1144 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1145 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1146
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1147 return $reg;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1148 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1149
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1150 # get adaptors from DB
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1151 sub get_adaptors {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1152 my $config = shift;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1153
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1154 die "ERROR: No registry" unless defined $config->{reg};
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1155
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1156 $config->{vfa} = $config->{reg}->get_adaptor($config->{species}, 'variation', 'variationfeature');
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1157 $config->{svfa} = $config->{reg}->get_adaptor($config->{species}, 'variation', 'structuralvariationfeature');
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1158 $config->{tva} = $config->{reg}->get_adaptor($config->{species}, 'variation', 'transcriptvariation');
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1159 $config->{pfpma} = $config->{reg}->get_adaptor($config->{species}, 'variation', 'proteinfunctionpredictionmatrix');
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1160 $config->{va} = $config->{reg}->get_adaptor($config->{species}, 'variation', 'variation');
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1161
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1162 # get fake ones for species with no var DB
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1163 if(!defined($config->{vfa})) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1164 $config->{vfa} = Bio::EnsEMBL::Variation::DBSQL::VariationFeatureAdaptor->new_fake($config->{species});
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1165 $config->{svfa} = Bio::EnsEMBL::Variation::DBSQL::StructuralVariationFeatureAdaptor->new_fake($config->{species});
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1166 $config->{tva} = Bio::EnsEMBL::Variation::DBSQL::TranscriptVariationAdaptor->new_fake($config->{species});
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1167 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1168
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1169 $config->{sa} = $config->{reg}->get_adaptor($config->{species}, $config->{core_type}, 'slice');
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1170 $config->{ga} = $config->{reg}->get_adaptor($config->{species}, $config->{core_type}, 'gene');
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1171 $config->{ta} = $config->{reg}->get_adaptor($config->{species}, $config->{core_type}, 'transcript');
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1172 $config->{mca} = $config->{reg}->get_adaptor($config->{species}, $config->{core_type}, 'metacontainer');
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1173 $config->{csa} = $config->{reg}->get_adaptor($config->{species}, $config->{core_type}, 'coordsystem');
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1174
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1175 # cache schema version
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1176 $config->{mca}->get_schema_version if defined $config->{mca};
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1177
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1178 # check we got slice adaptor - can't continue without a core DB
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1179 die("ERROR: Could not connect to core database\n") unless defined $config->{sa};
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1180 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1181
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1182 # gets regulatory adaptors
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1183 sub get_reg_adaptors {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1184 my $config = shift;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1185
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1186 foreach my $type(@REG_FEAT_TYPES) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1187 next if defined($config->{$type.'_adaptor'});
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1188
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1189 my $adaptor = $config->{reg}->get_adaptor($config->{species}, 'funcgen', $type);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1190 if(defined($adaptor)) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1191 $config->{$type.'_adaptor'} = $adaptor;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1192 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1193 else {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1194 delete $config->{regulatory};
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1195 last;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1196 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1197 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1198 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1199
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1200 # gets file handle for input
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1201 sub get_in_file_handle {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1202 my $config = shift;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1203
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1204 # define the filehandle to read input from
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1205 my $in_file_handle = new FileHandle;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1206
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1207 if(defined($config->{input_file})) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1208
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1209 # check defined input file exists
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1210 die("ERROR: Could not find input file ", $config->{input_file}, "\n") unless -e $config->{input_file};
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1211
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1212 if($config->{input_file} =~ /\.gz$/){
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1213 $in_file_handle->open($config->{compress}." ". $config->{input_file} . " | " ) or die("ERROR: Could not read from input file ", $config->{input_file}, "\n");
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1214 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1215 else {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1216 $in_file_handle->open( $config->{input_file} ) or die("ERROR: Could not read from input file ", $config->{input_file}, "\n");
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1217 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1218 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1219
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1220 # no file specified - try to read data off command line
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1221 else {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1222 $in_file_handle = 'STDIN';
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1223 debug("Reading input from STDIN (or maybe you forgot to specify an input file?)...") unless defined $config->{quiet};
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1224 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1225
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1226 return $in_file_handle;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1227 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1228
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1229 # gets file handle for output and adds header
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1230 sub get_out_file_handle {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1231 my $config = shift;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1232
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1233 # define filehandle to write to
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1234 my $out_file_handle = new FileHandle;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1235
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1236 # check if file exists
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1237 if(-e $config->{output_file} && !defined($config->{force_overwrite})) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1238 die("ERROR: Output file ", $config->{output_file}, " already exists. Specify a different output file with --output_file or overwrite existing file with --force_overwrite\n");
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1239 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1240
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1241 if($config->{output_file} =~ /stdout/i) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1242 $out_file_handle = *STDOUT;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1243 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1244 else {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1245 $out_file_handle->open(">".$config->{output_file}) or die("ERROR: Could not write to output file ", $config->{output_file}, "\n");
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1246 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1247
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1248 # define headers for a VCF file
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1249 my @vcf_headers = (
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1250 '#CHROM',
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1251 'POS',
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1252 'ID',
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1253 'REF',
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1254 'ALT',
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1255 'QUAL',
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1256 'FILTER',
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1257 'INFO'
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1258 );
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1259
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1260 # file conversion, don't want to add normal headers
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1261 if(defined($config->{convert})) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1262 # header for VCF
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1263 if($config->{convert} =~ /vcf/i) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1264 print $out_file_handle "##fileformat=VCFv4.0\n";
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1265 print $out_file_handle join "\t", @vcf_headers;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1266 print $out_file_handle "\n";
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1267 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1268
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1269 return $out_file_handle;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1270 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1271
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1272 # GVF output, no header
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1273 elsif(defined($config->{gvf}) || defined($config->{original})) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1274 print $out_file_handle join "\n", @{$config->{headers}} if defined($config->{headers}) && defined($config->{original});
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1275 return $out_file_handle;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1276 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1277
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1278 elsif(defined($config->{vcf})) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1279
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1280 # create an info string for the VCF header
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1281 my @new_headers;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1282
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1283 # if the user has defined the fields themselves, we don't need to worry
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1284 if(defined $config->{fields_redefined}) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1285 @new_headers = @{$config->{fields}};
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1286 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1287 else {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1288 @new_headers = (
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1289
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1290 # get default headers, minus variation name and location (already encoded in VCF)
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1291 grep {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1292 $_ ne 'Uploaded_variation' and
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1293 $_ ne 'Location' and
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1294 $_ ne 'Extra'
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1295 } @{$config->{fields}},
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1296
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1297 # get extra headers
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1298 map {@{$extra_headers{$_}}}
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1299 grep {defined $config->{$_}}
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1300 keys %extra_headers
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1301 );
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1302
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1303 # plugin headers
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1304 foreach my $plugin_header(split /\n/, get_plugin_headers($config)) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1305 $plugin_header =~ /\#\# (.+?)\t\:.+/;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1306 push @new_headers, $1;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1307 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1308
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1309 # redefine the main headers list in config
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1310 $config->{fields} = \@new_headers;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1311 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1312
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1313 # add the newly defined headers as a header to the VCF
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1314 my $string = join '|', @{$config->{fields}};
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1315 my @vcf_info_strings = ('##INFO=<ID=CSQ,Number=.,Type=String,Description="Consequence type as predicted by VEP. Format: '.$string.'">');
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1316
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1317 # add custom headers
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1318 foreach my $custom(@{$config->{custom}}) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1319 push @vcf_info_strings, '##INFO=<ID='.$custom->{name}.',Number=.,Type=String,Description="'.$custom->{file}.' ('.$custom->{type}.')">';
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1320 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1321
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1322 # if this is already a VCF file, we need to add our new headers in the right place
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1323 if(defined($config->{headers})) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1324
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1325 for my $i(0..$#{$config->{headers}}) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1326 if($config->{headers}->[$i] =~ /^\#CHROM\s+POS\s+ID/) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1327 splice(@{$config->{headers}}, $i, 0, @vcf_info_strings);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1328 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1329 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1330
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1331 print $out_file_handle join "\n", @{$config->{headers}};
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1332 print $out_file_handle "\n";
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1333 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1334
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1335 else {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1336 print $out_file_handle "##fileformat=VCFv4.0\n";
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1337 print $out_file_handle join "\n", @vcf_info_strings;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1338 print $out_file_handle "\n";
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1339 print $out_file_handle join "\t", @vcf_headers;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1340 print $out_file_handle "\n";
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1341 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1342
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1343 return $out_file_handle;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1344 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1345
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1346 # make header
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1347 my $time = &get_time;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1348 my $db_string = $config->{mca}->dbc->dbname." on ".$config->{mca}->dbc->host if defined $config->{mca};
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1349 $db_string .= "\n## Using cache in ".$config->{dir} if defined($config->{cache});
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1350 my $version_string =
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1351 "Using API version ".$config->{reg}->software_version.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1352 ", DB version ".(defined $config->{mca} && $config->{mca}->get_schema_version ? $config->{mca}->get_schema_version : '?');
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1353
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1354 # add key for extra column headers based on config
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1355 my $extra_column_keys = join "\n",
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1356 map {'## '.$_.' : '.$extra_descs{$_}}
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1357 sort map {@{$extra_headers{$_}}}
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1358 grep {defined $config->{$_}}
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1359 keys %extra_headers;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1360
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1361 my $header =<<HEAD;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1362 ## ENSEMBL VARIANT EFFECT PREDICTOR v$VERSION
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1363 ## Output produced at $time
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1364 ## Connected to $db_string
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1365 ## $version_string
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1366 ## Extra column keys:
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1367 $extra_column_keys
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1368 HEAD
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1369
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1370 $header .= get_plugin_headers($config);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1371
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1372 # add headers
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1373 print $out_file_handle $header;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1374
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1375 # add custom data defs
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1376 if(defined($config->{custom})) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1377 foreach my $custom(@{$config->{custom}}) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1378 print $out_file_handle '## '.$custom->{name}."\t: ".$custom->{file}.' ('.$custom->{type}.")\n";
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1379 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1380 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1381
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1382 # add column headers
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1383 print $out_file_handle '#', (join "\t", @{$config->{fields}});
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1384 print $out_file_handle "\n";
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1385
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1386 return $out_file_handle;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1387 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1388
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1389 sub get_plugin_headers {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1390
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1391 my $config = shift;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1392
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1393 my $header = "";
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1394
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1395 for my $plugin (@{ $config->{plugins} }) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1396 if (my $hdr = $plugin->get_header_info) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1397 for my $key (keys %$hdr) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1398 my $val = $hdr->{$key};
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1399
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1400 $header .= "## $key\t: $val\n";
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1401 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1402 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1403 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1404
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1405 return $header;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1406 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1407
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1408 # convert a variation feature to a line of output
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1409 sub convert_vf {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1410 my $config = shift;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1411 my $vf = shift;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1412
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1413 my $convert_method = 'convert_to_'.lc($config->{convert});
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1414 my $method_ref = \&$convert_method;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1415
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1416 my $line = &$method_ref($config, $vf);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1417 my $handle = $config->{out_file_handle};
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1418
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1419 if(scalar @$line) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1420 print $handle join "\t", @$line;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1421 print $handle "\n";
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1422 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1423 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1424
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1425 # converts to Ensembl format
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1426 sub convert_to_ensembl {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1427 my $config = shift;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1428 my $vf = shift;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1429
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1430 return [
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1431 $vf->{chr} || $vf->seq_region_name,
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1432 $vf->start,
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1433 $vf->end,
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1434 $vf->allele_string,
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1435 $vf->strand,
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1436 $vf->variation_name
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1437 ];
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1438 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1439
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1440
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1441 # converts to pileup format
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1442 sub convert_to_pileup {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1443 my $config = shift;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1444 my $vf = shift;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1445
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1446 # look for imbalance in the allele string
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1447 my %allele_lengths;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1448 my @alleles = split /\//, $vf->allele_string;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1449
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1450 foreach my $allele(@alleles) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1451 $allele =~ s/\-//g;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1452 $allele_lengths{length($allele)} = 1;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1453 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1454
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1455 # in/del
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1456 if(scalar keys %allele_lengths > 1) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1457
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1458 if($vf->allele_string =~ /\-/) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1459
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1460 # insertion?
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1461 if($alleles[0] eq '-') {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1462 shift @alleles;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1463
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1464 for my $i(0..$#alleles) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1465 $alleles[$i] =~ s/\-//g;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1466 $alleles[$i] = '+'.$alleles[$i];
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1467 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1468 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1469
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1470 else {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1471 @alleles = grep {$_ ne '-'} @alleles;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1472
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1473 for my $i(0..$#alleles) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1474 $alleles[$i] =~ s/\-//g;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1475 $alleles[$i] = '-'.$alleles[$i];
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1476 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1477 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1478
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1479 @alleles = grep {$_ ne '-' && $_ ne '+'} @alleles;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1480
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1481 return [
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1482 $vf->{chr} || $vf->seq_region_name,
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1483 $vf->start - 1,
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1484 '*',
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1485 (join "/", @alleles),
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1486 ];
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1487 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1488
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1489 else {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1490 warn "WARNING: Unable to convert variant to pileup format on line number ", $config->{line_number} unless defined($config->{quiet});
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1491 return [];
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1492 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1493
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1494 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1495
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1496 # balanced sub
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1497 else {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1498 return [
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1499 $vf->{chr} || $vf->seq_region_name,
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1500 $vf->start,
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1501 shift @alleles,
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1502 (join "/", @alleles),
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1503 ];
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1504 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1505 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1506
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1507 # converts to HGVS (hackily returns many lines)
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1508 sub convert_to_hgvs {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1509 my $config = shift;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1510 my $vf = shift;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1511
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1512 # ensure we have a slice
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1513 $vf->{slice} ||= get_slice($config, $vf->{chr});
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1514
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1515 my $tvs = $vf->get_all_TranscriptVariations;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1516
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1517 my @return = values %{$vf->get_all_hgvs_notations()};
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1518
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1519 if(defined($tvs)) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1520 push @return, map {values %{$vf->get_all_hgvs_notations($_->transcript, 'c')}} @$tvs;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1521 push @return, map {values %{$vf->get_all_hgvs_notations($_->transcript, 'p')}} @$tvs;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1522 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1523
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1524 return [join "\n", @return];
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1525 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1526
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1527 # prints a line of output from the hash
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1528 sub print_line {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1529 my $config = shift;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1530 my $line = shift;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1531 return unless defined($line);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1532
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1533 my $output;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1534
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1535 # normal
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1536 if(ref($line) eq 'HASH') {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1537 my %extra = %{$line->{Extra}};
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1538
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1539 $line->{Extra} = join ';', map { $_.'='.$line->{Extra}->{$_} } keys %{ $line->{Extra} || {} };
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1540
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1541 # if the fields have been redefined we need to search through in case
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1542 # any of the defined fields are actually part of the Extra hash
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1543 $output = join "\t", map {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1544 (defined $line->{$_} ? $line->{$_} : (defined $extra{$_} ? $extra{$_} : '-'))
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1545 } @{$config->{fields}};
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1546 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1547
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1548 # gvf/vcf
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1549 else {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1550 $output = $$line;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1551 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1552
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1553 my $fh = $config->{out_file_handle};
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1554 print $fh "$output\n";
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1555 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1556
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1557 # outputs usage message
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1558 sub usage {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1559 my $usage =<<END;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1560 #----------------------------------#
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1561 # ENSEMBL VARIANT EFFECT PREDICTOR #
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1562 #----------------------------------#
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1563
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1564 version $VERSION
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1565
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1566 By Will McLaren (wm2\@ebi.ac.uk)
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1567
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1568 http://www.ensembl.org/info/docs/variation/vep/vep_script.html
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1569
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1570 Usage:
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1571 perl variant_effect_predictor.pl [arguments]
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1572
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1573 Options
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1574 =======
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1575
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1576 --help Display this message and quit
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1577 --verbose Display verbose output as the script runs [default: off]
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1578 --quiet Suppress status and warning messages [default: off]
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1579 --no_progress Suppress progress bars [default: off]
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1580
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1581 --config Load configuration from file. Any command line options
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1582 specified overwrite those in the file [default: off]
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1583
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1584 --everything Shortcut switch to turn on commonly used options. See web
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1585 documentation for details [default: off]
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1586
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1587 --fork [num_forks] Use forking to improve script runtime [default: off]
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1588
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1589 -i | --input_file Input file - if not specified, reads from STDIN. Files
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1590 may be gzip compressed.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1591 --format Specify input file format - one of "ensembl", "pileup",
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1592 "vcf", "hgvs", "id" or "guess" to try and work out format.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1593 -o | --output_file Output file. Write to STDOUT by specifying -o STDOUT - this
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1594 will force --quiet [default: "variant_effect_output.txt"]
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1595 --force_overwrite Force overwriting of output file [default: quit if file
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1596 exists]
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1597 --original Writes output as it was in input - must be used with --filter
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1598 since no consequence data is added [default: off]
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1599 --vcf Write output as VCF [default: off]
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1600 --gvf Write output as GVF [default: off]
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1601 --fields [field list] Define a custom output format by specifying a comma-separated
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1602 list of field names. Field names normally present in the
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1603 "Extra" field may also be specified, including those added by
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1604 plugin modules. Can also be used to configure VCF output
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1605 columns [default: off]
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1606
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1607 --species [species] Species to use [default: "human"]
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1608
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1609 -t | --terms Type of consequence terms to output - one of "SO", "ensembl"
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1610 [default: SO]
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1611
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1612 --sift=[p|s|b] Add SIFT [p]rediction, [s]core or [b]oth [default: off]
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1613 --polyphen=[p|s|b] Add PolyPhen [p]rediction, [s]core or [b]oth [default: off]
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1614
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1615 NB: SIFT and PolyPhen predictions are currently available for human only
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1616 NB: Condel support has been moved to a VEP plugin module - see documentation
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1617
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1618 --regulatory Look for overlaps with regulatory regions. The script can
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1619 also call if a variant falls in a high information position
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1620 within a transcription factor binding site. Output lines have
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1621 a Feature type of RegulatoryFeature or MotifFeature
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1622 [default: off]
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1623 --cell_type [types] Report only regulatory regions that are found in the given cell
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1624 type(s). Can be a single cell type or a comma-separated list.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1625 The functional type in each cell type is reported under
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1626 CELL_TYPE in the output. To retrieve a list of cell types, use
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1627 "--cell_type list" [default: off]
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1628
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1629 NB: Regulatory consequences are currently available for human and mouse only
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1630
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1631 --custom [file list] Add custom annotations from tabix-indexed files. See
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1632 documentation for full details [default: off]
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1633 --plugin [plugin_name] Use named plugin module [default: off]
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1634 --hgnc Add HGNC gene identifiers to output [default: off]
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1635 --hgvs Output HGVS identifiers (coding and protein). Requires database
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1636 connection [default: off]
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1637 --ccds Output CCDS transcript identifiers [default: off]
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1638 --xref_refseq Output aligned RefSeq mRNA identifier for transcript. NB: the
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1639 RefSeq and Ensembl transcripts aligned in this way MAY NOT, AND
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1640 FREQUENTLY WILL NOT, match exactly in sequence, exon structure
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1641 and protein product [default: off]
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1642 --protein Output Ensembl protein identifer [default: off]
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1643 --canonical Indicate if the transcript for this consequence is the canonical
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1644 transcript for this gene [default: off]
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1645 --domains Include details of any overlapping protein domains [default: off]
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1646 --numbers Include exon & intron numbers [default: off]
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1647
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1648 --no_intergenic Excludes intergenic consequences from the output [default: off]
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1649 --coding_only Only return consequences that fall in the coding region of
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1650 transcripts [default: off]
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1651 --most_severe Ouptut only the most severe consequence per variation.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1652 Transcript-specific columns will be left blank. [default: off]
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1653 --summary Output only a comma-separated list of all consequences per
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1654 variation. Transcript-specific columns will be left blank.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1655 [default: off]
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1656 --per_gene Output only the most severe consequence per gene. Where more
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1657 than one transcript has the same consequence, the transcript
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1658 chosen is arbitrary. [default: off]
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1659
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1660
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1661 --check_ref If specified, checks supplied reference allele against stored
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1662 entry in Ensembl Core database [default: off]
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1663 --check_existing If specified, checks for existing co-located variations in the
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1664 Ensembl Variation database [default: off]
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1665 --failed [0|1] Include (1) or exclude (0) variants that have been flagged as
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1666 failed by Ensembl when checking for existing variants.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1667 [default: exclude]
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1668 --check_alleles If specified, the alleles of existing co-located variations
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1669 are compared to the input; an existing variation will only
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1670 be reported if no novel allele is in the input (strand is
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1671 accounted for) [default: off]
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1672 --check_svs Report overlapping structural variants [default: off]
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1673
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1674 --filter [filters] Filter output by consequence type. Use this to output only
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1675 variants that have at least one consequence type matching the
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1676 filter. Multiple filters can be used separated by ",". By
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1677 combining this with --original it is possible to run the VEP
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1678 iteratively to progressively filter a set of variants. See
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1679 documentation for full details [default: off]
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1680
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1681 --check_frequency Turns on frequency filtering. Use this to include or exclude
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1682 variants based on the frequency of co-located existing
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1683 variants in the Ensembl Variation database. You must also
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1684 specify all of the following --freq flags [default: off]
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1685 --freq_pop [pop] Name of the population to use e.g. hapmap_ceu for CEU HapMap,
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1686 1kg_yri for YRI 1000 genomes. See documentation for more
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1687 details
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1688 --freq_freq [freq] Frequency to use in filter. Must be a number between 0 and 0.5
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1689 --freq_gt_lt [gt|lt] Specify whether the frequency should be greater than (gt) or
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1690 less than (lt) --freq_freq
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1691 --freq_filter Specify whether variants that pass the above should be included
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1692 [exclude|include] or excluded from analysis
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1693 --gmaf Include global MAF of existing variant from 1000 Genomes
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1694 Phase 1 in output
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1695
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1696 --individual [id] Consider only alternate alleles present in the genotypes of the
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1697 specified individual(s). May be a single individual, a comma-
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1698 separated list or "all" to assess all individuals separately.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1699 Each individual and variant combination is given on a separate
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1700 line of output. Only works with VCF files containing individual
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1701 genotype data; individual IDs are taken from column headers.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1702 --allow_non_variant Prints out non-variant lines when using VCF input
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1703 --phased Force VCF individual genotypes to be interpreted as phased.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1704 For use with plugins that depend on phased state.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1705
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1706 --chr [list] Select a subset of chromosomes to analyse from your file. Any
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1707 data not on this chromosome in the input will be skipped. The
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1708 list can be comma separated, with "-" characters representing
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1709 a range e.g. 1-5,8,15,X [default: off]
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1710 --gp If specified, tries to read GRCh37 position from GP field in the
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1711 INFO column of a VCF file. Only applies when VCF is the input
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1712 format and human is the species [default: off]
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1713
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1714 --convert Convert the input file to the output format specified.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1715 [ensembl|vcf|pileup] Converted output is written to the file specified in
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1716 --output_file. No consequence calculation is carried out when
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1717 doing file conversion. [default: off]
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1718
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1719 --refseq Use the otherfeatures database to retrieve transcripts - this
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1720 database contains RefSeq transcripts (as well as CCDS and
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1721 Ensembl EST alignments) [default: off]
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1722 --host Manually define database host [default: "ensembldb.ensembl.org"]
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1723 -u | --user Database username [default: "anonymous"]
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1724 --port Database port [default: 5306]
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1725 --password Database password [default: no password]
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1726 --genomes Sets DB connection params for Ensembl Genomes [default: off]
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1727 --registry Registry file to use defines DB connections [default: off]
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1728 Defining a registry file overrides above connection settings.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1729 --db_version=[number] Force script to load DBs from a specific Ensembl version. Not
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1730 advised due to likely incompatibilities between API and DB
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1731
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1732 --no_whole_genome Run in old-style, non-whole genome mode [default: off]
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1733 --buffer_size Sets the number of variants sent in each batch [default: 5000]
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1734 Increasing buffer size can retrieve results more quickly
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1735 but requires more memory. Only applies to whole genome mode.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1736
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1737 --cache Enables read-only use of cache [default: off]
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1738 --dir [directory] Specify the base cache directory to use [default: "\$HOME/.vep/"]
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1739 --write_cache Enable writing to cache [default: off]
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1740 --build [all|list] Build a complete cache for the selected species. Build for all
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1741 chromosomes with --build all, or a list of chromosomes (see
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1742 --chr). DO NOT USE WHEN CONNECTED TO PUBLIC DB SERVERS AS THIS
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1743 VIOLATES OUR FAIR USAGE POLICY [default: off]
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1744
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1745 --compress Specify utility to decompress cache files - may be "gzcat" or
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1746 "gzip -dc" Only use if default does not work [default: zcat]
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1747
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1748 --skip_db_check ADVANCED! Force the script to use a cache built from a different
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1749 database than specified with --host. Only use this if you are
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1750 sure the hosts are compatible (e.g. ensembldb.ensembl.org and
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1751 useastdb.ensembl.org) [default: off]
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1752 --cache_region_size ADVANCED! The size in base-pairs of the region covered by one
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1753 file in the cache. [default: 1MB]
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1754 END
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1755
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1756 print $usage;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1757 }