0
|
1 package Bio::EnsEMBL::Variation::Pipeline::SetVariationClass;
|
|
2
|
|
3 use strict;
|
|
4 use warnings;
|
|
5
|
|
6 use base qw(Bio::EnsEMBL::Variation::Pipeline::BaseVariationProcess);
|
|
7
|
|
8 use Bio::EnsEMBL::Variation::Utils::Sequence qw(SO_variation_class);
|
|
9
|
|
10
|
|
11 sub run {
|
|
12
|
|
13 my $self = shift;
|
|
14
|
|
15 my $var_id_start = $self->required_param('variation_id_start');
|
|
16
|
|
17 my $var_id_stop = $self->required_param('variation_id_stop');
|
|
18
|
|
19 my $temp_var_table = $self->param('temp_var_table');
|
|
20
|
|
21 my $temp_var_feat_table = $self->param('temp_var_feat_table');
|
|
22
|
|
23 my $var_dba = $self->get_species_adaptor('variation');
|
|
24
|
|
25 my $aa = $var_dba->get_AttributeAdaptor;
|
|
26
|
|
27 my $dbc = $var_dba->dbc;
|
|
28
|
|
29 # fetch the failed_descriptions to avoid a join
|
|
30
|
|
31 my $fds_sth = $dbc->prepare(qq{
|
|
32 SELECT failed_description_id, description
|
|
33 FROM failed_description
|
|
34 });
|
|
35
|
|
36 $fds_sth->execute;
|
|
37
|
|
38 my %fds;
|
|
39
|
|
40 while (my ($fd_id, $desc) = $fds_sth->fetchrow_array) {
|
|
41 $fds{$fd_id} = $desc;
|
|
42 }
|
|
43
|
|
44 $fds_sth->finish();
|
|
45
|
|
46 my $all_sth = $dbc->prepare(qq{
|
|
47 SELECT v.variation_id, vf.variation_feature_id, vf.allele_string, fv.failed_description_id
|
|
48 FROM (variation v LEFT JOIN variation_feature vf ON v.variation_id = vf.variation_id)
|
|
49 LEFT JOIN failed_variation fv ON v.variation_id = fv.variation_id, source s
|
|
50 WHERE v.variation_id >= ?
|
|
51 AND v.variation_id <= ?
|
|
52 AND v.source_id = s.source_id
|
|
53 AND s.name != 'HGMD-PUBLIC'
|
|
54 });
|
|
55
|
|
56 my $vf_insert_sth;
|
|
57 my $v_insert_sth;
|
|
58
|
|
59 if (defined $temp_var_feat_table) {
|
|
60 $vf_insert_sth = $dbc->prepare(qq{
|
|
61 INSERT IGNORE INTO $temp_var_feat_table (class_attrib_id, variation_feature_id)
|
|
62 VALUES (?,?)
|
|
63 });
|
|
64 }
|
|
65 else {
|
|
66 $vf_insert_sth = $dbc->prepare(qq{
|
|
67 UPDATE variation_feature SET class_attrib_id = ? WHERE variation_feature_id = ?
|
|
68 });
|
|
69 }
|
|
70
|
|
71 if (defined $temp_var_table) {
|
|
72 $v_insert_sth = $dbc->prepare(qq{
|
|
73 INSERT IGNORE INTO $temp_var_table (class_attrib_id, variation_id)
|
|
74 VALUES (?,?)
|
|
75 });
|
|
76 }
|
|
77 else {
|
|
78 $v_insert_sth = $dbc->prepare(qq{
|
|
79 UPDATE variation SET class_attrib_id = ? WHERE variation_id = ?
|
|
80 });
|
|
81 }
|
|
82
|
|
83 $all_sth->execute($var_id_start, $var_id_stop);
|
|
84
|
|
85 my @unmapped_v_ids;
|
|
86
|
|
87 while (my ($v_id, $vf_id, $allele_string, $fd_id) = $all_sth->fetchrow_array) {
|
|
88
|
|
89 unless ($vf_id) {
|
|
90 # this variation doesn't have a corresponding variation_feature
|
|
91 push @unmapped_v_ids, $v_id;
|
|
92 next;
|
|
93 }
|
|
94
|
|
95 my $ref_correct = 1;
|
|
96
|
|
97 # check to see if this variation_feature is known not to match the reference allele,
|
|
98 # as this tells us if we can call insertions or deletions, or have to resort to indel
|
|
99
|
|
100 if (defined $fd_id) {
|
|
101 my $fail_reason = $fds{$fd_id};
|
|
102
|
|
103 if ($fail_reason eq 'None of the variant alleles match the reference allele') {
|
|
104 $ref_correct = 0;
|
|
105 }
|
|
106 }
|
|
107
|
|
108 my $so_term = SO_variation_class($allele_string, $ref_correct);
|
|
109
|
|
110 my $attrib_id = $aa->attrib_id_for_type_value('SO_term', $so_term);
|
|
111
|
|
112 die "No attrib_id for $so_term" unless defined $attrib_id;
|
|
113
|
|
114 $vf_insert_sth->execute($attrib_id, $vf_id);
|
|
115
|
|
116 $v_insert_sth->execute($attrib_id, $v_id);
|
|
117 }
|
|
118
|
|
119 $all_sth->finish();
|
|
120
|
|
121 # now we need to fetch the alleles for any variations that are not mapped
|
|
122 # and work out their class
|
|
123
|
|
124 if (@unmapped_v_ids) {
|
|
125
|
|
126 my $id_str = join ',', @unmapped_v_ids;
|
|
127
|
|
128 my $unmapped_sth = $dbc->prepare(qq{
|
|
129 SELECT a.variation_id, ac.allele
|
|
130 FROM allele a, allele_code ac
|
|
131 WHERE a.variation_id IN ($id_str)
|
|
132 AND a.allele_code_id = ac.allele_code_id
|
|
133 GROUP BY ac.allele
|
|
134 });
|
|
135
|
|
136 $unmapped_sth->execute or die "Failed to fetch unmapped variation alleles for variation ids: $id_str";
|
|
137
|
|
138 my $unmapped_alleles;
|
|
139
|
|
140 while (my ($v_id, $allele) = $unmapped_sth->fetchrow_array) {
|
|
141 push @{ $unmapped_alleles->{$v_id} ||= [] }, $allele;
|
|
142 }
|
|
143
|
|
144 $unmapped_sth->finish();
|
|
145
|
|
146 for my $v_id (keys %$unmapped_alleles) {
|
|
147
|
|
148 my $allele_string = join '/', @{ $unmapped_alleles->{$v_id} };
|
|
149
|
|
150 # we don't know what the reference is here
|
|
151
|
|
152 my $ref_correct = 0;
|
|
153
|
|
154 my $so_term = SO_variation_class($allele_string, $ref_correct);
|
|
155
|
|
156 my $attrib_id = $aa->attrib_id_for_type_value('SO_term', $so_term);
|
|
157
|
|
158 die "No attrib_id for $so_term" unless defined $attrib_id;
|
|
159
|
|
160 $v_insert_sth->execute($attrib_id, $v_id);
|
|
161 }
|
|
162 }
|
|
163
|
|
164 $vf_insert_sth->finish();
|
|
165 $v_insert_sth->finish();
|
|
166 }
|
|
167
|
|
168 1;
|
|
169
|