comparison dir_plugins/MTR.pm @ 0:e545d0a25ffe draft

Uploaded
author dvanzessen
date Mon, 15 Jul 2019 05:17:17 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:e545d0a25ffe
1 =head1 CONTACT
2
3 Slave Petrovski <slavep@unimelb.edu.au>
4 Michael Silk <silkm@student.unimelb.edu.au>
5
6 =cut
7
8 =head1 NAME
9
10 MTR (Missense Tolerance Ratio)
11
12 =head1 SYNOPSIS
13
14 mv MTR.pm ~/.vep/Plugins
15 curl -O ftp://mtr-viewer.mdhs.unimelb.edu.au/pub/mtrflatfile_1.0.txt.gz
16 curl -O ftp://mtr-viewer.mdhs.unimelb.edu.au/pub/mtrflatfile_1.0.txt.gz.tbi
17 perl variant_effect_predictor.pl -i variations.vcf --plugin MTR,mtrflatfile_1.0.txt.gz
18
19 =head1 DESCRIPTION
20
21 A VEP plugin that retrieves Missense Tolerance Ratio (MTR) scores for
22 variants from a tabix-indexed flat file.
23
24 MTR scores quantify the amount of purifying selection acting
25 specifically on missense variants in a given window of protein-coding
26 sequence. It is estimated across a sliding window of 31 codons and uses
27 observed standing variation data from the WES component of the Exome
28 Aggregation Consortium Database (ExAC), version 2.0
29 (http://gnomad.broadinstitute.org).
30
31 Please cite the MTR publication alongside the VEP if you use this resource:
32 http://genome.cshlp.org/content/27/10/1715
33
34 The Bio::DB::HTS perl library or tabix utility must be installed in your path
35 to use this plugin. MTR flat files can be downloaded from:
36 ftp://mtr-viewer.mdhs.unimelb.edu.au/pub
37
38 NB: Data are available for GRCh37 only
39
40 =cut
41
42 package MTR;
43
44 use strict;
45 use warnings;
46
47 use Bio::EnsEMBL::Utils::Sequence qw(reverse_comp);
48
49 use Bio::EnsEMBL::Variation::Utils::BaseVepTabixPlugin;
50
51 use base qw(Bio::EnsEMBL::Variation::Utils::BaseVepTabixPlugin);
52
53 sub new {
54 my $class = shift;
55 my $self = $class->SUPER::new(@_);
56
57 # test tabix
58 die "ERROR: tabix does not seem to be in your path\n" unless `which tabix 2>&1` =~ /tabix$/;
59
60 # get MTR file
61 my $file = $self->params->[0];
62 $self->add_file($file);
63
64 # remote files?
65 if($file =~ /tp\:\/\//) {
66 my $remote_test = `tabix -f $file 1:1-1 2>&1`;
67 if($remote_test && $remote_test !~ /get_local_version/) {
68 die "$remote_test\nERROR: Could not find file or index file for remote annotation file $file\n";
69 }
70 }
71
72 # check files exist
73 else {
74 die "ERROR: MTR file $file not found\n" unless -e $file;
75 die "ERROR: Tabix index file $file\.tbi not found - perhaps you need to create it first?\n" unless -e $file.'.tbi';
76 }
77
78 # get headers and store on self
79 open HEAD, "tabix -fh $file 1:1-1 2>&1 | ";
80 while(<HEAD>) {
81 next unless /^\#/;
82 chomp;
83 $self->{headers} = [split];
84 }
85 close HEAD;
86
87 return $self;
88 }
89
90 sub feature_types {
91 return ['Transcript'];
92 }
93
94 sub variation_feature_types {
95 return ['VariationFeature'];
96 }
97
98 sub get_header_info {
99 my $self = shift;
100 return {
101 MTR => 'MTR score',
102 FDR => 'MTR false discovery rate adjusted binomial exact test.',
103 MTR_centile => 'MTR gene-specific percentile'
104 }
105 }
106
107 sub run {
108 my ($self, $tva) = @_;
109 my $vf = $tva->variation_feature;
110
111 # get allele, reverse comp if needed
112 my $allele = $tva->variation_feature_seq;
113 reverse_comp(\$allele) if $vf->{strand} < 0;
114
115 return {} unless $allele =~ /^[ACGT]$/;
116
117 my $tr_id = $tva->transcript->stable_id;
118
119 # data is written by pos, allele, transcript ID (feature)
120 # grep lines read in matched on position so that they also are matched on allele and transcript ID
121 my ($res) = grep {
122 $_->{Genomic_position} eq $vf->{start} &&
123 $_->{Genomic_position} eq $vf->{end} &&
124 $_->{alt} eq $allele &&
125 $_->{Feature} eq $tr_id
126 } @{$self->get_data($vf->{chr}, $vf->{start}, $vf->{end})};
127
128 # return only the keys defined by get_header_info()
129 return $res ? { map {$_ => $res->{$_}} grep {defined($res->{$_}) && $res->{$_} ne '.'} keys %{$self->get_header_info} } : {};
130 }
131
132 sub parse_data {
133 my ($self, $line) = @_;
134
135 $line =~ s/\r$//g;
136
137 my @split = split /\t/, $line;
138
139 # parse data into hash of col names and values
140 my %data = map {$self->{headers}->[$_] => $split[$_]} (0..(scalar @{$self->{headers}} - 1));
141
142 return \%data;
143 }
144
145 sub get_start {
146 return $_[1]->{'Genomic_position'};
147 }
148
149 sub get_end {
150 return $_[1]->{'Genomic_position'};
151 }
152
153 1;