Mercurial > repos > dvanzessen > vep_emc
comparison dir_plugins/GXA.pm @ 3:49397129aec0 draft
Uploaded
| author | dvanzessen | 
|---|---|
| date | Mon, 15 Jul 2019 05:20:39 -0400 | 
| parents | e545d0a25ffe | 
| children | 
   comparison
  equal
  deleted
  inserted
  replaced
| 2:17c98d091710 | 3:49397129aec0 | 
|---|---|
| 1 =head1 LICENSE | |
| 2 | |
| 3 Copyright [1999-2015] Wellcome Trust Sanger Institute and the EMBL-European Bioinformatics Institute | |
| 4 Copyright [2016-2018] EMBL-European Bioinformatics Institute | |
| 5 | |
| 6 Licensed under the Apache License, Version 2.0 (the "License"); | |
| 7 you may not use this file except in compliance with the License. | |
| 8 You may obtain a copy of the License at | |
| 9 | |
| 10 http://www.apache.org/licenses/LICENSE-2.0 | |
| 11 | |
| 12 Unless required by applicable law or agreed to in writing, software | |
| 13 distributed under the License is distributed on an "AS IS" BASIS, | |
| 14 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
| 15 See the License for the specific language governing permissions and | |
| 16 limitations under the License. | |
| 17 | |
| 18 =head1 CONTACT | |
| 19 | |
| 20 Ensembl <http://www.ensembl.org/info/about/contact/index.html> | |
| 21 | |
| 22 =cut | |
| 23 | |
| 24 =head1 NAME | |
| 25 | |
| 26 GXA | |
| 27 | |
| 28 =head1 SYNOPSIS | |
| 29 | |
| 30 mv GXA.pm ~/.vep/Plugins | |
| 31 ./vep -i variations.vcf --cache --plugin GXA | |
| 32 | |
| 33 =head1 DESCRIPTION | |
| 34 | |
| 35 This is a plugin for the Ensembl Variant Effect Predictor (VEP) that | |
| 36 reports data from the Gene Expression Atlas. | |
| 37 | |
| 38 NB: no account is taken for comparing values across experiments; if values | |
| 39 exist for the same tissue in more than one experiment, the highest value | |
| 40 is reported. | |
| 41 | |
| 42 =cut | |
| 43 | |
| 44 package GXA; | |
| 45 | |
| 46 use strict; | |
| 47 use warnings; | |
| 48 | |
| 49 use Bio::EnsEMBL::Variation::Utils::BaseVepPlugin; | |
| 50 | |
| 51 use base qw(Bio::EnsEMBL::Variation::Utils::BaseVepPlugin); | |
| 52 | |
| 53 sub new { | |
| 54 my $class = shift; | |
| 55 | |
| 56 my $self = $class->SUPER::new(@_); | |
| 57 | |
| 58 die("ERROR: This plugin is currently non-functional due to changes in the Gene Expression Atlas API"); | |
| 59 | |
| 60 $self->{species} = $self->{config}->{species}; | |
| 61 $self->{species} =~ s/\_/\%20/; | |
| 62 | |
| 63 $self->{url} = 'https://www.ebi.ac.uk/gxa/widgets/heatmap/multiExperiment.tsv?propertyType=bioentity_identifier'; | |
| 64 | |
| 65 return $self; | |
| 66 } | |
| 67 | |
| 68 sub feature_types { | |
| 69 return ['Transcript']; | |
| 70 } | |
| 71 | |
| 72 sub variant_feature_types { | |
| 73 return ['BaseVariationFeature']; | |
| 74 } | |
| 75 | |
| 76 sub get_header_info { | |
| 77 my $self = shift; | |
| 78 | |
| 79 if(!exists($self->{_header_info})) { | |
| 80 | |
| 81 # get tissues using BRCA2 | |
| 82 my $url = sprintf( | |
| 83 '%s&species=%s&geneQuery=%s', | |
| 84 $self->{url}, | |
| 85 $self->{species}, | |
| 86 'BRCA2' | |
| 87 ); | |
| 88 | |
| 89 open IN, "curl -s \"$url\" |"; | |
| 90 my @lines = <IN>; | |
| 91 | |
| 92 my %headers = (); | |
| 93 | |
| 94 while(my $line = shift @lines) { | |
| 95 next if $line =~ /^#/; | |
| 96 chomp $line; | |
| 97 $line =~ s/ /\_/g; | |
| 98 %headers = map {'GXA_'.$_ => "Tissue expression level in $_ from Gene Expression Atlas"} (split /\t/, $line); | |
| 99 last; | |
| 100 } | |
| 101 | |
| 102 close IN; | |
| 103 | |
| 104 $self->{_header_info} = \%headers; | |
| 105 }; | |
| 106 | |
| 107 return $self->{_header_info}; | |
| 108 } | |
| 109 | |
| 110 sub run { | |
| 111 my ($self, $tva) = @_; | |
| 112 | |
| 113 my $tr = $tva->transcript; | |
| 114 my $gene_id = $tr->{_gene_stable_id} || $tr->{_gene}->stable_id; | |
| 115 return {} unless $gene_id; | |
| 116 | |
| 117 if(!exists($self->{_cache}) || !exists($self->{_cache}->{$gene_id})) { | |
| 118 | |
| 119 my $url = sprintf( | |
| 120 '%s&species=%s&geneQuery=%s', | |
| 121 $self->{url}, | |
| 122 $self->{species}, | |
| 123 $gene_id | |
| 124 ); | |
| 125 | |
| 126 open IN, "curl -s \"$url\" |"; | |
| 127 | |
| 128 my $first = 1; | |
| 129 my (@headers, %data); | |
| 130 | |
| 131 while(<IN>) { | |
| 132 next if /^#/; | |
| 133 chomp; | |
| 134 | |
| 135 if($first) { | |
| 136 s/ /\_/g; | |
| 137 @headers = split /\t/, $_; | |
| 138 $first = 0; | |
| 139 } | |
| 140 else { | |
| 141 my @tmp = split /\t/, $_; | |
| 142 | |
| 143 for(my $i=0; $i<=$#headers; $i++) { | |
| 144 my ($h, $d) = ('GXA_'.$headers[$i], $tmp[$i]); | |
| 145 next unless defined($d) && $d =~ /^[0-9\.]+$/; | |
| 146 | |
| 147 if(exists($data{$h})) { | |
| 148 $data{$h} = $d if $d > $data{$h}; | |
| 149 } | |
| 150 else { | |
| 151 $data{$h} = $d; | |
| 152 } | |
| 153 } | |
| 154 } | |
| 155 } | |
| 156 | |
| 157 close IN; | |
| 158 | |
| 159 $self->{_cache}->{$gene_id} = \%data; | |
| 160 } | |
| 161 | |
| 162 return $self->{_cache}->{$gene_id}; | |
| 163 } | |
| 164 | |
| 165 1; | 
