annotate dir_plugins/GXA.pm @ 7:01748ca044e6 draft

Uploaded
author dvanzessen
date Wed, 17 Jul 2019 05:41:41 -0400
parents e545d0a25ffe
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
1 =head1 LICENSE
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
2
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
3 Copyright [1999-2015] Wellcome Trust Sanger Institute and the EMBL-European Bioinformatics Institute
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
4 Copyright [2016-2018] EMBL-European Bioinformatics Institute
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
5
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
6 Licensed under the Apache License, Version 2.0 (the "License");
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
7 you may not use this file except in compliance with the License.
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
8 You may obtain a copy of the License at
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
9
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
10 http://www.apache.org/licenses/LICENSE-2.0
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
11
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
12 Unless required by applicable law or agreed to in writing, software
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
13 distributed under the License is distributed on an "AS IS" BASIS,
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
14 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
15 See the License for the specific language governing permissions and
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
16 limitations under the License.
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
17
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
18 =head1 CONTACT
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
19
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
20 Ensembl <http://www.ensembl.org/info/about/contact/index.html>
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
21
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
22 =cut
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
23
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
24 =head1 NAME
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
25
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
26 GXA
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
27
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
28 =head1 SYNOPSIS
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
29
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
30 mv GXA.pm ~/.vep/Plugins
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
31 ./vep -i variations.vcf --cache --plugin GXA
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
32
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
33 =head1 DESCRIPTION
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
34
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
35 This is a plugin for the Ensembl Variant Effect Predictor (VEP) that
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
36 reports data from the Gene Expression Atlas.
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
37
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
38 NB: no account is taken for comparing values across experiments; if values
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
39 exist for the same tissue in more than one experiment, the highest value
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
40 is reported.
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
41
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
42 =cut
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
43
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
44 package GXA;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
45
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
46 use strict;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
47 use warnings;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
48
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
49 use Bio::EnsEMBL::Variation::Utils::BaseVepPlugin;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
50
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
51 use base qw(Bio::EnsEMBL::Variation::Utils::BaseVepPlugin);
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
52
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
53 sub new {
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
54 my $class = shift;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
55
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
56 my $self = $class->SUPER::new(@_);
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
57
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
58 die("ERROR: This plugin is currently non-functional due to changes in the Gene Expression Atlas API");
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
59
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
60 $self->{species} = $self->{config}->{species};
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
61 $self->{species} =~ s/\_/\%20/;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
62
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
63 $self->{url} = 'https://www.ebi.ac.uk/gxa/widgets/heatmap/multiExperiment.tsv?propertyType=bioentity_identifier';
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
64
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
65 return $self;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
66 }
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
67
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
68 sub feature_types {
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
69 return ['Transcript'];
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
70 }
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
71
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
72 sub variant_feature_types {
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
73 return ['BaseVariationFeature'];
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
74 }
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
75
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
76 sub get_header_info {
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
77 my $self = shift;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
78
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
79 if(!exists($self->{_header_info})) {
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
80
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
81 # get tissues using BRCA2
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
82 my $url = sprintf(
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
83 '%s&species=%s&geneQuery=%s',
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
84 $self->{url},
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
85 $self->{species},
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
86 'BRCA2'
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
87 );
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
88
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
89 open IN, "curl -s \"$url\" |";
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
90 my @lines = <IN>;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
91
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
92 my %headers = ();
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
93
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
94 while(my $line = shift @lines) {
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
95 next if $line =~ /^#/;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
96 chomp $line;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
97 $line =~ s/ /\_/g;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
98 %headers = map {'GXA_'.$_ => "Tissue expression level in $_ from Gene Expression Atlas"} (split /\t/, $line);
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
99 last;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
100 }
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
101
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
102 close IN;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
103
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
104 $self->{_header_info} = \%headers;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
105 };
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
106
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
107 return $self->{_header_info};
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
108 }
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
109
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
110 sub run {
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
111 my ($self, $tva) = @_;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
112
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
113 my $tr = $tva->transcript;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
114 my $gene_id = $tr->{_gene_stable_id} || $tr->{_gene}->stable_id;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
115 return {} unless $gene_id;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
116
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
117 if(!exists($self->{_cache}) || !exists($self->{_cache}->{$gene_id})) {
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
118
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
119 my $url = sprintf(
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
120 '%s&species=%s&geneQuery=%s',
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
121 $self->{url},
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
122 $self->{species},
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
123 $gene_id
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
124 );
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
125
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
126 open IN, "curl -s \"$url\" |";
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
127
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
128 my $first = 1;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
129 my (@headers, %data);
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
130
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
131 while(<IN>) {
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
132 next if /^#/;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
133 chomp;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
134
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
135 if($first) {
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
136 s/ /\_/g;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
137 @headers = split /\t/, $_;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
138 $first = 0;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
139 }
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
140 else {
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
141 my @tmp = split /\t/, $_;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
142
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
143 for(my $i=0; $i<=$#headers; $i++) {
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
144 my ($h, $d) = ('GXA_'.$headers[$i], $tmp[$i]);
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
145 next unless defined($d) && $d =~ /^[0-9\.]+$/;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
146
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
147 if(exists($data{$h})) {
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
148 $data{$h} = $d if $d > $data{$h};
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
149 }
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
150 else {
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
151 $data{$h} = $d;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
152 }
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
153 }
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
154 }
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
155 }
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
156
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
157 close IN;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
158
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
159 $self->{_cache}->{$gene_id} = \%data;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
160 }
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
161
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
162 return $self->{_cache}->{$gene_id};
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
163 }
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
164
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
165 1;