Mercurial > repos > mahtabm > ensembl
comparison variant_effect_predictor/Bio/EnsEMBL/Variation/Pipeline/InitVariationClass.pm @ 0:1f6dce3d34e0
Uploaded
author | mahtabm |
---|---|
date | Thu, 11 Apr 2013 02:01:53 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:1f6dce3d34e0 |
---|---|
1 package Bio::EnsEMBL::Variation::Pipeline::InitVariationClass; | |
2 | |
3 use strict; | |
4 use warnings; | |
5 | |
6 use base qw(Bio::EnsEMBL::Variation::Pipeline::BaseVariationProcess); | |
7 | |
8 use POSIX qw(ceil); | |
9 | |
10 my $DEBUG = 0; | |
11 | |
12 sub fetch_input { | |
13 | |
14 my $self = shift; | |
15 | |
16 my $num_chunks = $self->required_param('num_chunks'); | |
17 | |
18 my $var_dba = $self->get_species_adaptor('variation'); | |
19 | |
20 my $aa = $var_dba->get_AttributeAdaptor; | |
21 | |
22 my $dbc = $var_dba->dbc(); | |
23 | |
24 # first set everything in variation (except HGMDs) to 'sequence_alteration' by default | |
25 # because sometimes we miss them because there is no variation_feature | |
26 # or any alleles (though this should become unnecessary as we move to the | |
27 # new approach to failing for all species) | |
28 | |
29 my $default_attrib_id = $aa->attrib_id_for_type_value('SO_term', 'sequence_alteration'); | |
30 | |
31 die "No attrib_id for 'sequence_alteration'" unless defined $default_attrib_id; | |
32 | |
33 $dbc->do(qq{ | |
34 UPDATE variation v, source s | |
35 SET v.class_attrib_id = $default_attrib_id | |
36 WHERE v.source_id = s.source_id | |
37 AND s.name != 'HGMD-PUBLIC' | |
38 }); | |
39 | |
40 # now create some temp tables to store the class attribs | |
41 | |
42 my $temp_var_table = 'temp_variation_class'; | |
43 my $temp_var_feat_table = 'temp_variation_feature_class'; | |
44 | |
45 $dbc->do(qq{DROP TABLE IF EXISTS $temp_var_table}); | |
46 $dbc->do(qq{DROP TABLE IF EXISTS $temp_var_feat_table}); | |
47 | |
48 $dbc->do(qq{CREATE TABLE $temp_var_table LIKE variation}); | |
49 $dbc->do(qq{CREATE TABLE $temp_var_feat_table LIKE variation_feature}); | |
50 | |
51 $dbc->do(qq{ALTER TABLE $temp_var_table DISABLE KEYS}); | |
52 $dbc->do(qq{ALTER TABLE $temp_var_feat_table DISABLE KEYS}); | |
53 | |
54 # now get an ordered list of all the variation_ids | |
55 | |
56 my $get_var_ids_sth = $dbc->prepare(qq{ | |
57 SELECT variation_id FROM variation ORDER BY variation_id | |
58 }); | |
59 | |
60 $get_var_ids_sth->execute; | |
61 | |
62 my @var_ids; | |
63 | |
64 while (my ($var_id) = $get_var_ids_sth->fetchrow_array) { | |
65 push @var_ids, $var_id; | |
66 } | |
67 | |
68 # and split them up into as many chunks as requested | |
69 | |
70 my $num_vars = scalar @var_ids; | |
71 | |
72 my $chunk_size = ceil($num_vars / $num_chunks); | |
73 | |
74 my @output_ids; | |
75 | |
76 while (@var_ids) { | |
77 | |
78 my $start = $var_ids[0]; | |
79 my $stop = $chunk_size <= $#var_ids ? $var_ids[$chunk_size - 1] : $var_ids[$#var_ids]; | |
80 | |
81 splice(@var_ids, 0, $chunk_size); | |
82 | |
83 push @output_ids, { | |
84 variation_id_start => $start, | |
85 variation_id_stop => $stop, | |
86 temp_var_table => $temp_var_table, | |
87 temp_var_feat_table => $temp_var_feat_table, | |
88 }; | |
89 } | |
90 | |
91 $self->param('chunk_output_ids', \@output_ids); | |
92 | |
93 $self->param( | |
94 'finish_var_class', [{ | |
95 temp_var_table => $temp_var_table, | |
96 temp_var_feat_table => $temp_var_feat_table, | |
97 }] | |
98 ); | |
99 } | |
100 | |
101 sub write_output { | |
102 my $self = shift; | |
103 | |
104 $self->dataflow_output_id($self->param('finish_var_class'), 1); | |
105 $self->dataflow_output_id($self->param('chunk_output_ids'), 2); | |
106 } | |
107 | |
108 1; | |
109 |