comparison variant_effect_predictor/Bio/EnsEMBL/Variation/Pipeline/InitVariationClass.pm @ 0:1f6dce3d34e0

Uploaded
author mahtabm
date Thu, 11 Apr 2013 02:01:53 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:1f6dce3d34e0
1 package Bio::EnsEMBL::Variation::Pipeline::InitVariationClass;
2
3 use strict;
4 use warnings;
5
6 use base qw(Bio::EnsEMBL::Variation::Pipeline::BaseVariationProcess);
7
8 use POSIX qw(ceil);
9
10 my $DEBUG = 0;
11
12 sub fetch_input {
13
14 my $self = shift;
15
16 my $num_chunks = $self->required_param('num_chunks');
17
18 my $var_dba = $self->get_species_adaptor('variation');
19
20 my $aa = $var_dba->get_AttributeAdaptor;
21
22 my $dbc = $var_dba->dbc();
23
24 # first set everything in variation (except HGMDs) to 'sequence_alteration' by default
25 # because sometimes we miss them because there is no variation_feature
26 # or any alleles (though this should become unnecessary as we move to the
27 # new approach to failing for all species)
28
29 my $default_attrib_id = $aa->attrib_id_for_type_value('SO_term', 'sequence_alteration');
30
31 die "No attrib_id for 'sequence_alteration'" unless defined $default_attrib_id;
32
33 $dbc->do(qq{
34 UPDATE variation v, source s
35 SET v.class_attrib_id = $default_attrib_id
36 WHERE v.source_id = s.source_id
37 AND s.name != 'HGMD-PUBLIC'
38 });
39
40 # now create some temp tables to store the class attribs
41
42 my $temp_var_table = 'temp_variation_class';
43 my $temp_var_feat_table = 'temp_variation_feature_class';
44
45 $dbc->do(qq{DROP TABLE IF EXISTS $temp_var_table});
46 $dbc->do(qq{DROP TABLE IF EXISTS $temp_var_feat_table});
47
48 $dbc->do(qq{CREATE TABLE $temp_var_table LIKE variation});
49 $dbc->do(qq{CREATE TABLE $temp_var_feat_table LIKE variation_feature});
50
51 $dbc->do(qq{ALTER TABLE $temp_var_table DISABLE KEYS});
52 $dbc->do(qq{ALTER TABLE $temp_var_feat_table DISABLE KEYS});
53
54 # now get an ordered list of all the variation_ids
55
56 my $get_var_ids_sth = $dbc->prepare(qq{
57 SELECT variation_id FROM variation ORDER BY variation_id
58 });
59
60 $get_var_ids_sth->execute;
61
62 my @var_ids;
63
64 while (my ($var_id) = $get_var_ids_sth->fetchrow_array) {
65 push @var_ids, $var_id;
66 }
67
68 # and split them up into as many chunks as requested
69
70 my $num_vars = scalar @var_ids;
71
72 my $chunk_size = ceil($num_vars / $num_chunks);
73
74 my @output_ids;
75
76 while (@var_ids) {
77
78 my $start = $var_ids[0];
79 my $stop = $chunk_size <= $#var_ids ? $var_ids[$chunk_size - 1] : $var_ids[$#var_ids];
80
81 splice(@var_ids, 0, $chunk_size);
82
83 push @output_ids, {
84 variation_id_start => $start,
85 variation_id_stop => $stop,
86 temp_var_table => $temp_var_table,
87 temp_var_feat_table => $temp_var_feat_table,
88 };
89 }
90
91 $self->param('chunk_output_ids', \@output_ids);
92
93 $self->param(
94 'finish_var_class', [{
95 temp_var_table => $temp_var_table,
96 temp_var_feat_table => $temp_var_feat_table,
97 }]
98 );
99 }
100
101 sub write_output {
102 my $self = shift;
103
104 $self->dataflow_output_id($self->param('finish_var_class'), 1);
105 $self->dataflow_output_id($self->param('chunk_output_ids'), 2);
106 }
107
108 1;
109