annotate variant_effect_predictor/Bio/EnsEMBL/Variation/Pipeline/SetVariationClass.pm @ 3:d30fa12e4cc5 default tip

Merge heads 2:a5976b2dce6f and 1:09613ce8151e which were created as a result of a recently fixed bug.
author devteam <devteam@galaxyproject.org>
date Mon, 13 Jan 2014 10:38:30 -0500
parents 1f6dce3d34e0
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1 package Bio::EnsEMBL::Variation::Pipeline::SetVariationClass;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3 use strict;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4 use warnings;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
5
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
6 use base qw(Bio::EnsEMBL::Variation::Pipeline::BaseVariationProcess);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
7
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
8 use Bio::EnsEMBL::Variation::Utils::Sequence qw(SO_variation_class);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
9
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
10
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
11 sub run {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
12
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
13 my $self = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
14
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
15 my $var_id_start = $self->required_param('variation_id_start');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
16
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
17 my $var_id_stop = $self->required_param('variation_id_stop');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
18
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
19 my $temp_var_table = $self->param('temp_var_table');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
20
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
21 my $temp_var_feat_table = $self->param('temp_var_feat_table');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
22
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
23 my $var_dba = $self->get_species_adaptor('variation');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
24
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
25 my $aa = $var_dba->get_AttributeAdaptor;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
26
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
27 my $dbc = $var_dba->dbc;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
28
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
29 # fetch the failed_descriptions to avoid a join
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
30
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
31 my $fds_sth = $dbc->prepare(qq{
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
32 SELECT failed_description_id, description
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
33 FROM failed_description
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
34 });
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
35
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
36 $fds_sth->execute;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
37
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
38 my %fds;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
39
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
40 while (my ($fd_id, $desc) = $fds_sth->fetchrow_array) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
41 $fds{$fd_id} = $desc;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
42 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
43
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
44 $fds_sth->finish();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
45
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
46 my $all_sth = $dbc->prepare(qq{
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
47 SELECT v.variation_id, vf.variation_feature_id, vf.allele_string, fv.failed_description_id
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
48 FROM (variation v LEFT JOIN variation_feature vf ON v.variation_id = vf.variation_id)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
49 LEFT JOIN failed_variation fv ON v.variation_id = fv.variation_id, source s
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
50 WHERE v.variation_id >= ?
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
51 AND v.variation_id <= ?
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
52 AND v.source_id = s.source_id
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
53 AND s.name != 'HGMD-PUBLIC'
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
54 });
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
55
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
56 my $vf_insert_sth;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
57 my $v_insert_sth;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
58
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
59 if (defined $temp_var_feat_table) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
60 $vf_insert_sth = $dbc->prepare(qq{
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
61 INSERT IGNORE INTO $temp_var_feat_table (class_attrib_id, variation_feature_id)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
62 VALUES (?,?)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
63 });
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
64 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
65 else {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
66 $vf_insert_sth = $dbc->prepare(qq{
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
67 UPDATE variation_feature SET class_attrib_id = ? WHERE variation_feature_id = ?
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
68 });
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
69 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
70
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
71 if (defined $temp_var_table) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
72 $v_insert_sth = $dbc->prepare(qq{
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
73 INSERT IGNORE INTO $temp_var_table (class_attrib_id, variation_id)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
74 VALUES (?,?)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
75 });
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
76 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
77 else {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
78 $v_insert_sth = $dbc->prepare(qq{
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
79 UPDATE variation SET class_attrib_id = ? WHERE variation_id = ?
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
80 });
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
81 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
82
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
83 $all_sth->execute($var_id_start, $var_id_stop);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
84
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
85 my @unmapped_v_ids;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
86
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
87 while (my ($v_id, $vf_id, $allele_string, $fd_id) = $all_sth->fetchrow_array) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
88
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
89 unless ($vf_id) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
90 # this variation doesn't have a corresponding variation_feature
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
91 push @unmapped_v_ids, $v_id;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
92 next;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
93 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
94
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
95 my $ref_correct = 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
96
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
97 # check to see if this variation_feature is known not to match the reference allele,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
98 # as this tells us if we can call insertions or deletions, or have to resort to indel
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
99
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
100 if (defined $fd_id) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
101 my $fail_reason = $fds{$fd_id};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
102
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
103 if ($fail_reason eq 'None of the variant alleles match the reference allele') {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
104 $ref_correct = 0;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
105 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
106 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
107
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
108 my $so_term = SO_variation_class($allele_string, $ref_correct);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
109
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
110 my $attrib_id = $aa->attrib_id_for_type_value('SO_term', $so_term);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
111
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
112 die "No attrib_id for $so_term" unless defined $attrib_id;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
113
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
114 $vf_insert_sth->execute($attrib_id, $vf_id);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
115
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
116 $v_insert_sth->execute($attrib_id, $v_id);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
117 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
118
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
119 $all_sth->finish();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
120
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
121 # now we need to fetch the alleles for any variations that are not mapped
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
122 # and work out their class
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
123
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
124 if (@unmapped_v_ids) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
125
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
126 my $id_str = join ',', @unmapped_v_ids;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
127
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
128 my $unmapped_sth = $dbc->prepare(qq{
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
129 SELECT a.variation_id, ac.allele
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
130 FROM allele a, allele_code ac
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
131 WHERE a.variation_id IN ($id_str)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
132 AND a.allele_code_id = ac.allele_code_id
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
133 GROUP BY ac.allele
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
134 });
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
135
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
136 $unmapped_sth->execute or die "Failed to fetch unmapped variation alleles for variation ids: $id_str";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
137
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
138 my $unmapped_alleles;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
139
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
140 while (my ($v_id, $allele) = $unmapped_sth->fetchrow_array) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
141 push @{ $unmapped_alleles->{$v_id} ||= [] }, $allele;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
142 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
143
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
144 $unmapped_sth->finish();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
145
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
146 for my $v_id (keys %$unmapped_alleles) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
147
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
148 my $allele_string = join '/', @{ $unmapped_alleles->{$v_id} };
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
149
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
150 # we don't know what the reference is here
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
151
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
152 my $ref_correct = 0;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
153
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
154 my $so_term = SO_variation_class($allele_string, $ref_correct);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
155
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
156 my $attrib_id = $aa->attrib_id_for_type_value('SO_term', $so_term);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
157
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
158 die "No attrib_id for $so_term" unless defined $attrib_id;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
159
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
160 $v_insert_sth->execute($attrib_id, $v_id);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
161 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
162 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
163
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
164 $vf_insert_sth->finish();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
165 $v_insert_sth->finish();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
166 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
167
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
168 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
169