Mercurial > repos > dvanzessen > vep_emc
comparison dir_plugins/MTR.pm @ 0:e545d0a25ffe draft
Uploaded
| author | dvanzessen |
|---|---|
| date | Mon, 15 Jul 2019 05:17:17 -0400 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:e545d0a25ffe |
|---|---|
| 1 =head1 CONTACT | |
| 2 | |
| 3 Slave Petrovski <slavep@unimelb.edu.au> | |
| 4 Michael Silk <silkm@student.unimelb.edu.au> | |
| 5 | |
| 6 =cut | |
| 7 | |
| 8 =head1 NAME | |
| 9 | |
| 10 MTR (Missense Tolerance Ratio) | |
| 11 | |
| 12 =head1 SYNOPSIS | |
| 13 | |
| 14 mv MTR.pm ~/.vep/Plugins | |
| 15 curl -O ftp://mtr-viewer.mdhs.unimelb.edu.au/pub/mtrflatfile_1.0.txt.gz | |
| 16 curl -O ftp://mtr-viewer.mdhs.unimelb.edu.au/pub/mtrflatfile_1.0.txt.gz.tbi | |
| 17 perl variant_effect_predictor.pl -i variations.vcf --plugin MTR,mtrflatfile_1.0.txt.gz | |
| 18 | |
| 19 =head1 DESCRIPTION | |
| 20 | |
| 21 A VEP plugin that retrieves Missense Tolerance Ratio (MTR) scores for | |
| 22 variants from a tabix-indexed flat file. | |
| 23 | |
| 24 MTR scores quantify the amount of purifying selection acting | |
| 25 specifically on missense variants in a given window of protein-coding | |
| 26 sequence. It is estimated across a sliding window of 31 codons and uses | |
| 27 observed standing variation data from the WES component of the Exome | |
| 28 Aggregation Consortium Database (ExAC), version 2.0 | |
| 29 (http://gnomad.broadinstitute.org). | |
| 30 | |
| 31 Please cite the MTR publication alongside the VEP if you use this resource: | |
| 32 http://genome.cshlp.org/content/27/10/1715 | |
| 33 | |
| 34 The Bio::DB::HTS perl library or tabix utility must be installed in your path | |
| 35 to use this plugin. MTR flat files can be downloaded from: | |
| 36 ftp://mtr-viewer.mdhs.unimelb.edu.au/pub | |
| 37 | |
| 38 NB: Data are available for GRCh37 only | |
| 39 | |
| 40 =cut | |
| 41 | |
| 42 package MTR; | |
| 43 | |
| 44 use strict; | |
| 45 use warnings; | |
| 46 | |
| 47 use Bio::EnsEMBL::Utils::Sequence qw(reverse_comp); | |
| 48 | |
| 49 use Bio::EnsEMBL::Variation::Utils::BaseVepTabixPlugin; | |
| 50 | |
| 51 use base qw(Bio::EnsEMBL::Variation::Utils::BaseVepTabixPlugin); | |
| 52 | |
| 53 sub new { | |
| 54 my $class = shift; | |
| 55 my $self = $class->SUPER::new(@_); | |
| 56 | |
| 57 # test tabix | |
| 58 die "ERROR: tabix does not seem to be in your path\n" unless `which tabix 2>&1` =~ /tabix$/; | |
| 59 | |
| 60 # get MTR file | |
| 61 my $file = $self->params->[0]; | |
| 62 $self->add_file($file); | |
| 63 | |
| 64 # remote files? | |
| 65 if($file =~ /tp\:\/\//) { | |
| 66 my $remote_test = `tabix -f $file 1:1-1 2>&1`; | |
| 67 if($remote_test && $remote_test !~ /get_local_version/) { | |
| 68 die "$remote_test\nERROR: Could not find file or index file for remote annotation file $file\n"; | |
| 69 } | |
| 70 } | |
| 71 | |
| 72 # check files exist | |
| 73 else { | |
| 74 die "ERROR: MTR file $file not found\n" unless -e $file; | |
| 75 die "ERROR: Tabix index file $file\.tbi not found - perhaps you need to create it first?\n" unless -e $file.'.tbi'; | |
| 76 } | |
| 77 | |
| 78 # get headers and store on self | |
| 79 open HEAD, "tabix -fh $file 1:1-1 2>&1 | "; | |
| 80 while(<HEAD>) { | |
| 81 next unless /^\#/; | |
| 82 chomp; | |
| 83 $self->{headers} = [split]; | |
| 84 } | |
| 85 close HEAD; | |
| 86 | |
| 87 return $self; | |
| 88 } | |
| 89 | |
| 90 sub feature_types { | |
| 91 return ['Transcript']; | |
| 92 } | |
| 93 | |
| 94 sub variation_feature_types { | |
| 95 return ['VariationFeature']; | |
| 96 } | |
| 97 | |
| 98 sub get_header_info { | |
| 99 my $self = shift; | |
| 100 return { | |
| 101 MTR => 'MTR score', | |
| 102 FDR => 'MTR false discovery rate adjusted binomial exact test.', | |
| 103 MTR_centile => 'MTR gene-specific percentile' | |
| 104 } | |
| 105 } | |
| 106 | |
| 107 sub run { | |
| 108 my ($self, $tva) = @_; | |
| 109 my $vf = $tva->variation_feature; | |
| 110 | |
| 111 # get allele, reverse comp if needed | |
| 112 my $allele = $tva->variation_feature_seq; | |
| 113 reverse_comp(\$allele) if $vf->{strand} < 0; | |
| 114 | |
| 115 return {} unless $allele =~ /^[ACGT]$/; | |
| 116 | |
| 117 my $tr_id = $tva->transcript->stable_id; | |
| 118 | |
| 119 # data is written by pos, allele, transcript ID (feature) | |
| 120 # grep lines read in matched on position so that they also are matched on allele and transcript ID | |
| 121 my ($res) = grep { | |
| 122 $_->{Genomic_position} eq $vf->{start} && | |
| 123 $_->{Genomic_position} eq $vf->{end} && | |
| 124 $_->{alt} eq $allele && | |
| 125 $_->{Feature} eq $tr_id | |
| 126 } @{$self->get_data($vf->{chr}, $vf->{start}, $vf->{end})}; | |
| 127 | |
| 128 # return only the keys defined by get_header_info() | |
| 129 return $res ? { map {$_ => $res->{$_}} grep {defined($res->{$_}) && $res->{$_} ne '.'} keys %{$self->get_header_info} } : {}; | |
| 130 } | |
| 131 | |
| 132 sub parse_data { | |
| 133 my ($self, $line) = @_; | |
| 134 | |
| 135 $line =~ s/\r$//g; | |
| 136 | |
| 137 my @split = split /\t/, $line; | |
| 138 | |
| 139 # parse data into hash of col names and values | |
| 140 my %data = map {$self->{headers}->[$_] => $split[$_]} (0..(scalar @{$self->{headers}} - 1)); | |
| 141 | |
| 142 return \%data; | |
| 143 } | |
| 144 | |
| 145 sub get_start { | |
| 146 return $_[1]->{'Genomic_position'}; | |
| 147 } | |
| 148 | |
| 149 sub get_end { | |
| 150 return $_[1]->{'Genomic_position'}; | |
| 151 } | |
| 152 | |
| 153 1; |
