Mercurial > repos > dvanzessen > vep_emc
diff dir_plugins/FATHMM.pm @ 3:49397129aec0 draft
Uploaded
| author | dvanzessen |
|---|---|
| date | Mon, 15 Jul 2019 05:20:39 -0400 |
| parents | e545d0a25ffe |
| children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/dir_plugins/FATHMM.pm Mon Jul 15 05:20:39 2019 -0400 @@ -0,0 +1,149 @@ +=head1 LICENSE + +Copyright [1999-2015] Wellcome Trust Sanger Institute and the EMBL-European Bioinformatics Institute +Copyright [2016-2018] EMBL-European Bioinformatics Institute + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + +=head1 CONTACT + + Ensembl <http://www.ensembl.org/info/about/contact/index.html> + +=cut + +=head1 NAME + + FATHMM + +=head1 SYNOPSIS + + mv FATHMM.pm ~/.vep/Plugins + ./vep -i variations.vcf --plugin FATHMM,"python /path/to/fathmm/fathmm.py" + +=head1 DESCRIPTION + + A VEP plugin that gets FATHMM scores and predictions for missense variants. + + You will need the fathmm.py script and its dependencies (Python, Python + MySQLdb). You should create a "config.ini" file in the same directory as the + fathmm.py script with the database connection options. More information about + how to set up FATHMM can be found on the FATHMM website at + https://github.com/HAShihab/fathmm. + + A typical installation could consist of: + + > wget https://raw.github.com/HAShihab/fathmm/master/cgi-bin/fathmm.py + > wget ftp://supfam2.cs.bris.ac.uk/FATHMM/database/fathmm.v2.1.SQL + > mysql -h[host] -P[port] -u[user] -p[pass] -e"CREATE DATABASE fathmm" + > mysql -h[host] -P[port] -u[user] -p[pass] -Dfathmm < fathmm.v2.1.SQL + > echo "[DATABASE]\nHOST = [host]\nPORT = [port]\nUSER = [user]\nPASSWD = [pass]\nDB = fathmm\n" > config.ini + +=cut + +package FATHMM; + +use strict; +use warnings; + +use Bio::EnsEMBL::Variation::Utils::BaseVepPlugin; + +use base qw(Bio::EnsEMBL::Variation::Utils::BaseVepPlugin); + +sub new { + my $class = shift; + + my $self = $class->SUPER::new(@_); + + # get command + my $command = $self->params->[0]; + + die 'ERROR: No FATHMM command specified. Specify path to FATHMM with e.g. --plugin FATHMM,"python /path/to/fathmm/fathmm.py"\n' unless defined($command); + + die 'ERROR: Your FATHMM command does not look correct; it should looks something like "python /path/to/fathmm/fathmm.py"\n' unless $command =~ /python.+fathmm\.py/; + + $self->{command} = $command; + + die 'ERROR: Temporary directory '.$self->{config}->{tmpdir}.' not found - specify an existing directory with --tmpdir [dir]\n' unless -d $self->{config}->{tmpdir}; + + return $self; +} + +sub version { + return 71; +} + +sub feature_types { + return ['Transcript']; +} + +sub get_header_info { + return { + FATHMM => "FATHMM prediction (score)", + }; +} + +sub run { + my ($self, $tva) = @_; + + # only for missense variants + return {} unless grep {$_->SO_term eq 'missense_variant'} @{$tva->get_all_OverlapConsequences}; + + # configure command + my $command = $self->{command}; + $command =~ m/(\s.+)\/.+/; + my $command_dir = $1; + + # configure tmp dir and in/out files for FATHMM + my $tmp_dir = $self->{config}->{tmpdir}; + my $tmp_in_file = $tmp_dir."/fathmm_$$\.in"; + my $tmp_out_file = $tmp_dir."/fatmm_$$\.out"; + + # get required input data from TVA + my $protein = $tva->transcript->{_protein} || $tva->transcript->translation->stable_id; + my $aa_change = $tva->pep_allele_string; + my $aa_pos = $tva->transcript_variation->translation_start; + $aa_change =~ s/\//$aa_pos/; + + # check we have valid strings + return {} unless $protein && $aa_change =~ /^[A-Z]\d+[A-Z]$/; + + # write input file + open IN, ">$tmp_in_file" or die "ERROR: Could not write to file $tmp_in_file\n"; + print IN "$protein $aa_change\n"; + close IN; + + # run command + my $fathmm_err = `cd $command_dir; $command $tmp_in_file $tmp_out_file;`; + + # read output file + open OUT, $tmp_out_file or die "ERROR: Could not read from file $tmp_out_file\n"; + + my ($pred, $score); + while(<OUT>) { + next if /^\#/; + chomp; + my @data = split; + ($pred, $score) = ($data[4], $data[5]); + } + close OUT; + + # delete temporary files + unlink($tmp_in_file, $tmp_out_file); + + return $pred && $score ? { + FATHMM => "$pred($score)", + } : {}; +} + +1; +
