annotate variant_effect_predictor/Bio/EnsEMBL/Variation/Pipeline/InitVariationClass.pm @ 2:a5976b2dce6f

changing defualt values for ensembl database
author mahtabm
date Thu, 11 Apr 2013 17:15:42 +1000
parents 1f6dce3d34e0
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1 package Bio::EnsEMBL::Variation::Pipeline::InitVariationClass;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3 use strict;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4 use warnings;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
5
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
6 use base qw(Bio::EnsEMBL::Variation::Pipeline::BaseVariationProcess);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
7
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
8 use POSIX qw(ceil);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
9
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
10 my $DEBUG = 0;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
11
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
12 sub fetch_input {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
13
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
14 my $self = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
15
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
16 my $num_chunks = $self->required_param('num_chunks');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
17
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
18 my $var_dba = $self->get_species_adaptor('variation');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
19
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
20 my $aa = $var_dba->get_AttributeAdaptor;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
21
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
22 my $dbc = $var_dba->dbc();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
23
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
24 # first set everything in variation (except HGMDs) to 'sequence_alteration' by default
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
25 # because sometimes we miss them because there is no variation_feature
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
26 # or any alleles (though this should become unnecessary as we move to the
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
27 # new approach to failing for all species)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
28
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
29 my $default_attrib_id = $aa->attrib_id_for_type_value('SO_term', 'sequence_alteration');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
30
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
31 die "No attrib_id for 'sequence_alteration'" unless defined $default_attrib_id;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
32
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
33 $dbc->do(qq{
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
34 UPDATE variation v, source s
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
35 SET v.class_attrib_id = $default_attrib_id
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
36 WHERE v.source_id = s.source_id
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
37 AND s.name != 'HGMD-PUBLIC'
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
38 });
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
39
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
40 # now create some temp tables to store the class attribs
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
41
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
42 my $temp_var_table = 'temp_variation_class';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
43 my $temp_var_feat_table = 'temp_variation_feature_class';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
44
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
45 $dbc->do(qq{DROP TABLE IF EXISTS $temp_var_table});
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
46 $dbc->do(qq{DROP TABLE IF EXISTS $temp_var_feat_table});
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
47
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
48 $dbc->do(qq{CREATE TABLE $temp_var_table LIKE variation});
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
49 $dbc->do(qq{CREATE TABLE $temp_var_feat_table LIKE variation_feature});
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
50
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
51 $dbc->do(qq{ALTER TABLE $temp_var_table DISABLE KEYS});
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
52 $dbc->do(qq{ALTER TABLE $temp_var_feat_table DISABLE KEYS});
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
53
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
54 # now get an ordered list of all the variation_ids
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
55
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
56 my $get_var_ids_sth = $dbc->prepare(qq{
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
57 SELECT variation_id FROM variation ORDER BY variation_id
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
58 });
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
59
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
60 $get_var_ids_sth->execute;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
61
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
62 my @var_ids;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
63
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
64 while (my ($var_id) = $get_var_ids_sth->fetchrow_array) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
65 push @var_ids, $var_id;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
66 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
67
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
68 # and split them up into as many chunks as requested
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
69
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
70 my $num_vars = scalar @var_ids;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
71
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
72 my $chunk_size = ceil($num_vars / $num_chunks);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
73
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
74 my @output_ids;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
75
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
76 while (@var_ids) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
77
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
78 my $start = $var_ids[0];
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
79 my $stop = $chunk_size <= $#var_ids ? $var_ids[$chunk_size - 1] : $var_ids[$#var_ids];
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
80
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
81 splice(@var_ids, 0, $chunk_size);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
82
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
83 push @output_ids, {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
84 variation_id_start => $start,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
85 variation_id_stop => $stop,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
86 temp_var_table => $temp_var_table,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
87 temp_var_feat_table => $temp_var_feat_table,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
88 };
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
89 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
90
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
91 $self->param('chunk_output_ids', \@output_ids);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
92
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
93 $self->param(
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
94 'finish_var_class', [{
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
95 temp_var_table => $temp_var_table,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
96 temp_var_feat_table => $temp_var_feat_table,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
97 }]
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
98 );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
99 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
100
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
101 sub write_output {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
102 my $self = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
103
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
104 $self->dataflow_output_id($self->param('finish_var_class'), 1);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
105 $self->dataflow_output_id($self->param('chunk_output_ids'), 2);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
106 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
107
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
108 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
109