|
0
|
1 =head1 CONTACT
|
|
|
2
|
|
|
3 Slave Petrovski <slavep@unimelb.edu.au>
|
|
|
4 Michael Silk <silkm@student.unimelb.edu.au>
|
|
|
5
|
|
|
6 =cut
|
|
|
7
|
|
|
8 =head1 NAME
|
|
|
9
|
|
|
10 MTR (Missense Tolerance Ratio)
|
|
|
11
|
|
|
12 =head1 SYNOPSIS
|
|
|
13
|
|
|
14 mv MTR.pm ~/.vep/Plugins
|
|
|
15 curl -O ftp://mtr-viewer.mdhs.unimelb.edu.au/pub/mtrflatfile_1.0.txt.gz
|
|
|
16 curl -O ftp://mtr-viewer.mdhs.unimelb.edu.au/pub/mtrflatfile_1.0.txt.gz.tbi
|
|
|
17 perl variant_effect_predictor.pl -i variations.vcf --plugin MTR,mtrflatfile_1.0.txt.gz
|
|
|
18
|
|
|
19 =head1 DESCRIPTION
|
|
|
20
|
|
|
21 A VEP plugin that retrieves Missense Tolerance Ratio (MTR) scores for
|
|
|
22 variants from a tabix-indexed flat file.
|
|
|
23
|
|
|
24 MTR scores quantify the amount of purifying selection acting
|
|
|
25 specifically on missense variants in a given window of protein-coding
|
|
|
26 sequence. It is estimated across a sliding window of 31 codons and uses
|
|
|
27 observed standing variation data from the WES component of the Exome
|
|
|
28 Aggregation Consortium Database (ExAC), version 2.0
|
|
|
29 (http://gnomad.broadinstitute.org).
|
|
|
30
|
|
|
31 Please cite the MTR publication alongside the VEP if you use this resource:
|
|
|
32 http://genome.cshlp.org/content/27/10/1715
|
|
|
33
|
|
|
34 The Bio::DB::HTS perl library or tabix utility must be installed in your path
|
|
|
35 to use this plugin. MTR flat files can be downloaded from:
|
|
|
36 ftp://mtr-viewer.mdhs.unimelb.edu.au/pub
|
|
|
37
|
|
|
38 NB: Data are available for GRCh37 only
|
|
|
39
|
|
|
40 =cut
|
|
|
41
|
|
|
42 package MTR;
|
|
|
43
|
|
|
44 use strict;
|
|
|
45 use warnings;
|
|
|
46
|
|
|
47 use Bio::EnsEMBL::Utils::Sequence qw(reverse_comp);
|
|
|
48
|
|
|
49 use Bio::EnsEMBL::Variation::Utils::BaseVepTabixPlugin;
|
|
|
50
|
|
|
51 use base qw(Bio::EnsEMBL::Variation::Utils::BaseVepTabixPlugin);
|
|
|
52
|
|
|
53 sub new {
|
|
|
54 my $class = shift;
|
|
|
55 my $self = $class->SUPER::new(@_);
|
|
|
56
|
|
|
57 # test tabix
|
|
|
58 die "ERROR: tabix does not seem to be in your path\n" unless `which tabix 2>&1` =~ /tabix$/;
|
|
|
59
|
|
|
60 # get MTR file
|
|
|
61 my $file = $self->params->[0];
|
|
|
62 $self->add_file($file);
|
|
|
63
|
|
|
64 # remote files?
|
|
|
65 if($file =~ /tp\:\/\//) {
|
|
|
66 my $remote_test = `tabix -f $file 1:1-1 2>&1`;
|
|
|
67 if($remote_test && $remote_test !~ /get_local_version/) {
|
|
|
68 die "$remote_test\nERROR: Could not find file or index file for remote annotation file $file\n";
|
|
|
69 }
|
|
|
70 }
|
|
|
71
|
|
|
72 # check files exist
|
|
|
73 else {
|
|
|
74 die "ERROR: MTR file $file not found\n" unless -e $file;
|
|
|
75 die "ERROR: Tabix index file $file\.tbi not found - perhaps you need to create it first?\n" unless -e $file.'.tbi';
|
|
|
76 }
|
|
|
77
|
|
|
78 # get headers and store on self
|
|
|
79 open HEAD, "tabix -fh $file 1:1-1 2>&1 | ";
|
|
|
80 while(<HEAD>) {
|
|
|
81 next unless /^\#/;
|
|
|
82 chomp;
|
|
|
83 $self->{headers} = [split];
|
|
|
84 }
|
|
|
85 close HEAD;
|
|
|
86
|
|
|
87 return $self;
|
|
|
88 }
|
|
|
89
|
|
|
90 sub feature_types {
|
|
|
91 return ['Transcript'];
|
|
|
92 }
|
|
|
93
|
|
|
94 sub variation_feature_types {
|
|
|
95 return ['VariationFeature'];
|
|
|
96 }
|
|
|
97
|
|
|
98 sub get_header_info {
|
|
|
99 my $self = shift;
|
|
|
100 return {
|
|
|
101 MTR => 'MTR score',
|
|
|
102 FDR => 'MTR false discovery rate adjusted binomial exact test.',
|
|
|
103 MTR_centile => 'MTR gene-specific percentile'
|
|
|
104 }
|
|
|
105 }
|
|
|
106
|
|
|
107 sub run {
|
|
|
108 my ($self, $tva) = @_;
|
|
|
109 my $vf = $tva->variation_feature;
|
|
|
110
|
|
|
111 # get allele, reverse comp if needed
|
|
|
112 my $allele = $tva->variation_feature_seq;
|
|
|
113 reverse_comp(\$allele) if $vf->{strand} < 0;
|
|
|
114
|
|
|
115 return {} unless $allele =~ /^[ACGT]$/;
|
|
|
116
|
|
|
117 my $tr_id = $tva->transcript->stable_id;
|
|
|
118
|
|
|
119 # data is written by pos, allele, transcript ID (feature)
|
|
|
120 # grep lines read in matched on position so that they also are matched on allele and transcript ID
|
|
|
121 my ($res) = grep {
|
|
|
122 $_->{Genomic_position} eq $vf->{start} &&
|
|
|
123 $_->{Genomic_position} eq $vf->{end} &&
|
|
|
124 $_->{alt} eq $allele &&
|
|
|
125 $_->{Feature} eq $tr_id
|
|
|
126 } @{$self->get_data($vf->{chr}, $vf->{start}, $vf->{end})};
|
|
|
127
|
|
|
128 # return only the keys defined by get_header_info()
|
|
|
129 return $res ? { map {$_ => $res->{$_}} grep {defined($res->{$_}) && $res->{$_} ne '.'} keys %{$self->get_header_info} } : {};
|
|
|
130 }
|
|
|
131
|
|
|
132 sub parse_data {
|
|
|
133 my ($self, $line) = @_;
|
|
|
134
|
|
|
135 $line =~ s/\r$//g;
|
|
|
136
|
|
|
137 my @split = split /\t/, $line;
|
|
|
138
|
|
|
139 # parse data into hash of col names and values
|
|
|
140 my %data = map {$self->{headers}->[$_] => $split[$_]} (0..(scalar @{$self->{headers}} - 1));
|
|
|
141
|
|
|
142 return \%data;
|
|
|
143 }
|
|
|
144
|
|
|
145 sub get_start {
|
|
|
146 return $_[1]->{'Genomic_position'};
|
|
|
147 }
|
|
|
148
|
|
|
149 sub get_end {
|
|
|
150 return $_[1]->{'Genomic_position'};
|
|
|
151 }
|
|
|
152
|
|
|
153 1;
|