|
0
|
1 =head1 LICENSE
|
|
|
2
|
|
|
3 Copyright [1999-2015] Wellcome Trust Sanger Institute and the EMBL-European Bioinformatics Institute
|
|
|
4 Copyright [2016-2018] EMBL-European Bioinformatics Institute
|
|
|
5
|
|
|
6 Licensed under the Apache License, Version 2.0 (the "License");
|
|
|
7 you may not use this file except in compliance with the License.
|
|
|
8 You may obtain a copy of the License at
|
|
|
9
|
|
|
10 http://www.apache.org/licenses/LICENSE-2.0
|
|
|
11
|
|
|
12 Unless required by applicable law or agreed to in writing, software
|
|
|
13 distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
14 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
15 See the License for the specific language governing permissions and
|
|
|
16 limitations under the License.
|
|
|
17
|
|
|
18 =head1 CONTACT
|
|
|
19
|
|
|
20 Ensembl <http://www.ensembl.org/info/about/contact/index.html>
|
|
|
21
|
|
|
22 =cut
|
|
|
23
|
|
|
24 =head1 NAME
|
|
|
25
|
|
|
26 GXA
|
|
|
27
|
|
|
28 =head1 SYNOPSIS
|
|
|
29
|
|
|
30 mv GXA.pm ~/.vep/Plugins
|
|
|
31 ./vep -i variations.vcf --cache --plugin GXA
|
|
|
32
|
|
|
33 =head1 DESCRIPTION
|
|
|
34
|
|
|
35 This is a plugin for the Ensembl Variant Effect Predictor (VEP) that
|
|
|
36 reports data from the Gene Expression Atlas.
|
|
|
37
|
|
|
38 NB: no account is taken for comparing values across experiments; if values
|
|
|
39 exist for the same tissue in more than one experiment, the highest value
|
|
|
40 is reported.
|
|
|
41
|
|
|
42 =cut
|
|
|
43
|
|
|
44 package GXA;
|
|
|
45
|
|
|
46 use strict;
|
|
|
47 use warnings;
|
|
|
48
|
|
|
49 use Bio::EnsEMBL::Variation::Utils::BaseVepPlugin;
|
|
|
50
|
|
|
51 use base qw(Bio::EnsEMBL::Variation::Utils::BaseVepPlugin);
|
|
|
52
|
|
|
53 sub new {
|
|
|
54 my $class = shift;
|
|
|
55
|
|
|
56 my $self = $class->SUPER::new(@_);
|
|
|
57
|
|
|
58 die("ERROR: This plugin is currently non-functional due to changes in the Gene Expression Atlas API");
|
|
|
59
|
|
|
60 $self->{species} = $self->{config}->{species};
|
|
|
61 $self->{species} =~ s/\_/\%20/;
|
|
|
62
|
|
|
63 $self->{url} = 'https://www.ebi.ac.uk/gxa/widgets/heatmap/multiExperiment.tsv?propertyType=bioentity_identifier';
|
|
|
64
|
|
|
65 return $self;
|
|
|
66 }
|
|
|
67
|
|
|
68 sub feature_types {
|
|
|
69 return ['Transcript'];
|
|
|
70 }
|
|
|
71
|
|
|
72 sub variant_feature_types {
|
|
|
73 return ['BaseVariationFeature'];
|
|
|
74 }
|
|
|
75
|
|
|
76 sub get_header_info {
|
|
|
77 my $self = shift;
|
|
|
78
|
|
|
79 if(!exists($self->{_header_info})) {
|
|
|
80
|
|
|
81 # get tissues using BRCA2
|
|
|
82 my $url = sprintf(
|
|
|
83 '%s&species=%s&geneQuery=%s',
|
|
|
84 $self->{url},
|
|
|
85 $self->{species},
|
|
|
86 'BRCA2'
|
|
|
87 );
|
|
|
88
|
|
|
89 open IN, "curl -s \"$url\" |";
|
|
|
90 my @lines = <IN>;
|
|
|
91
|
|
|
92 my %headers = ();
|
|
|
93
|
|
|
94 while(my $line = shift @lines) {
|
|
|
95 next if $line =~ /^#/;
|
|
|
96 chomp $line;
|
|
|
97 $line =~ s/ /\_/g;
|
|
|
98 %headers = map {'GXA_'.$_ => "Tissue expression level in $_ from Gene Expression Atlas"} (split /\t/, $line);
|
|
|
99 last;
|
|
|
100 }
|
|
|
101
|
|
|
102 close IN;
|
|
|
103
|
|
|
104 $self->{_header_info} = \%headers;
|
|
|
105 };
|
|
|
106
|
|
|
107 return $self->{_header_info};
|
|
|
108 }
|
|
|
109
|
|
|
110 sub run {
|
|
|
111 my ($self, $tva) = @_;
|
|
|
112
|
|
|
113 my $tr = $tva->transcript;
|
|
|
114 my $gene_id = $tr->{_gene_stable_id} || $tr->{_gene}->stable_id;
|
|
|
115 return {} unless $gene_id;
|
|
|
116
|
|
|
117 if(!exists($self->{_cache}) || !exists($self->{_cache}->{$gene_id})) {
|
|
|
118
|
|
|
119 my $url = sprintf(
|
|
|
120 '%s&species=%s&geneQuery=%s',
|
|
|
121 $self->{url},
|
|
|
122 $self->{species},
|
|
|
123 $gene_id
|
|
|
124 );
|
|
|
125
|
|
|
126 open IN, "curl -s \"$url\" |";
|
|
|
127
|
|
|
128 my $first = 1;
|
|
|
129 my (@headers, %data);
|
|
|
130
|
|
|
131 while(<IN>) {
|
|
|
132 next if /^#/;
|
|
|
133 chomp;
|
|
|
134
|
|
|
135 if($first) {
|
|
|
136 s/ /\_/g;
|
|
|
137 @headers = split /\t/, $_;
|
|
|
138 $first = 0;
|
|
|
139 }
|
|
|
140 else {
|
|
|
141 my @tmp = split /\t/, $_;
|
|
|
142
|
|
|
143 for(my $i=0; $i<=$#headers; $i++) {
|
|
|
144 my ($h, $d) = ('GXA_'.$headers[$i], $tmp[$i]);
|
|
|
145 next unless defined($d) && $d =~ /^[0-9\.]+$/;
|
|
|
146
|
|
|
147 if(exists($data{$h})) {
|
|
|
148 $data{$h} = $d if $d > $data{$h};
|
|
|
149 }
|
|
|
150 else {
|
|
|
151 $data{$h} = $d;
|
|
|
152 }
|
|
|
153 }
|
|
|
154 }
|
|
|
155 }
|
|
|
156
|
|
|
157 close IN;
|
|
|
158
|
|
|
159 $self->{_cache}->{$gene_id} = \%data;
|
|
|
160 }
|
|
|
161
|
|
|
162 return $self->{_cache}->{$gene_id};
|
|
|
163 }
|
|
|
164
|
|
|
165 1;
|