annotate dir_plugins/MTR.pm @ 10:f594c6bed58f draft default tip

Uploaded
author dvanzessen
date Tue, 21 Apr 2020 11:40:19 +0000
parents e545d0a25ffe
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
1 =head1 CONTACT
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
2
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
3 Slave Petrovski <slavep@unimelb.edu.au>
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
4 Michael Silk <silkm@student.unimelb.edu.au>
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
5
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
6 =cut
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
7
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
8 =head1 NAME
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
9
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
10 MTR (Missense Tolerance Ratio)
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
11
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
12 =head1 SYNOPSIS
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
13
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
14 mv MTR.pm ~/.vep/Plugins
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
15 curl -O ftp://mtr-viewer.mdhs.unimelb.edu.au/pub/mtrflatfile_1.0.txt.gz
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
16 curl -O ftp://mtr-viewer.mdhs.unimelb.edu.au/pub/mtrflatfile_1.0.txt.gz.tbi
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
17 perl variant_effect_predictor.pl -i variations.vcf --plugin MTR,mtrflatfile_1.0.txt.gz
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
18
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
19 =head1 DESCRIPTION
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
20
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
21 A VEP plugin that retrieves Missense Tolerance Ratio (MTR) scores for
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
22 variants from a tabix-indexed flat file.
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
23
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
24 MTR scores quantify the amount of purifying selection acting
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
25 specifically on missense variants in a given window of protein-coding
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
26 sequence. It is estimated across a sliding window of 31 codons and uses
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
27 observed standing variation data from the WES component of the Exome
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
28 Aggregation Consortium Database (ExAC), version 2.0
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
29 (http://gnomad.broadinstitute.org).
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
30
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
31 Please cite the MTR publication alongside the VEP if you use this resource:
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
32 http://genome.cshlp.org/content/27/10/1715
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
33
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
34 The Bio::DB::HTS perl library or tabix utility must be installed in your path
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
35 to use this plugin. MTR flat files can be downloaded from:
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
36 ftp://mtr-viewer.mdhs.unimelb.edu.au/pub
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
37
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
38 NB: Data are available for GRCh37 only
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
39
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
40 =cut
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
41
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
42 package MTR;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
43
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
44 use strict;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
45 use warnings;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
46
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
47 use Bio::EnsEMBL::Utils::Sequence qw(reverse_comp);
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
48
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
49 use Bio::EnsEMBL::Variation::Utils::BaseVepTabixPlugin;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
50
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
51 use base qw(Bio::EnsEMBL::Variation::Utils::BaseVepTabixPlugin);
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
52
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
53 sub new {
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
54 my $class = shift;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
55 my $self = $class->SUPER::new(@_);
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
56
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
57 # test tabix
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
58 die "ERROR: tabix does not seem to be in your path\n" unless `which tabix 2>&1` =~ /tabix$/;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
59
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
60 # get MTR file
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
61 my $file = $self->params->[0];
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
62 $self->add_file($file);
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
63
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
64 # remote files?
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
65 if($file =~ /tp\:\/\//) {
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
66 my $remote_test = `tabix -f $file 1:1-1 2>&1`;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
67 if($remote_test && $remote_test !~ /get_local_version/) {
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
68 die "$remote_test\nERROR: Could not find file or index file for remote annotation file $file\n";
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
69 }
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
70 }
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
71
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
72 # check files exist
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
73 else {
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
74 die "ERROR: MTR file $file not found\n" unless -e $file;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
75 die "ERROR: Tabix index file $file\.tbi not found - perhaps you need to create it first?\n" unless -e $file.'.tbi';
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
76 }
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
77
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
78 # get headers and store on self
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
79 open HEAD, "tabix -fh $file 1:1-1 2>&1 | ";
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
80 while(<HEAD>) {
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
81 next unless /^\#/;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
82 chomp;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
83 $self->{headers} = [split];
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
84 }
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
85 close HEAD;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
86
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
87 return $self;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
88 }
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
89
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
90 sub feature_types {
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
91 return ['Transcript'];
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
92 }
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
93
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
94 sub variation_feature_types {
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
95 return ['VariationFeature'];
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
96 }
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
97
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
98 sub get_header_info {
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
99 my $self = shift;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
100 return {
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
101 MTR => 'MTR score',
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
102 FDR => 'MTR false discovery rate adjusted binomial exact test.',
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
103 MTR_centile => 'MTR gene-specific percentile'
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
104 }
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
105 }
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
106
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
107 sub run {
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
108 my ($self, $tva) = @_;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
109 my $vf = $tva->variation_feature;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
110
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
111 # get allele, reverse comp if needed
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
112 my $allele = $tva->variation_feature_seq;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
113 reverse_comp(\$allele) if $vf->{strand} < 0;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
114
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
115 return {} unless $allele =~ /^[ACGT]$/;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
116
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
117 my $tr_id = $tva->transcript->stable_id;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
118
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
119 # data is written by pos, allele, transcript ID (feature)
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
120 # grep lines read in matched on position so that they also are matched on allele and transcript ID
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
121 my ($res) = grep {
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
122 $_->{Genomic_position} eq $vf->{start} &&
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
123 $_->{Genomic_position} eq $vf->{end} &&
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
124 $_->{alt} eq $allele &&
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
125 $_->{Feature} eq $tr_id
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
126 } @{$self->get_data($vf->{chr}, $vf->{start}, $vf->{end})};
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
127
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
128 # return only the keys defined by get_header_info()
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
129 return $res ? { map {$_ => $res->{$_}} grep {defined($res->{$_}) && $res->{$_} ne '.'} keys %{$self->get_header_info} } : {};
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
130 }
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
131
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
132 sub parse_data {
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
133 my ($self, $line) = @_;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
134
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
135 $line =~ s/\r$//g;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
136
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
137 my @split = split /\t/, $line;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
138
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
139 # parse data into hash of col names and values
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
140 my %data = map {$self->{headers}->[$_] => $split[$_]} (0..(scalar @{$self->{headers}} - 1));
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
141
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
142 return \%data;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
143 }
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
144
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
145 sub get_start {
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
146 return $_[1]->{'Genomic_position'};
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
147 }
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
148
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
149 sub get_end {
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
150 return $_[1]->{'Genomic_position'};
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
151 }
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
152
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
153 1;