Mercurial > repos > mahtabm > ensembl
diff variant_effect_predictor/Bio/EnsEMBL/Variation/Pipeline/SetVariationClass.pm @ 0:1f6dce3d34e0
Uploaded
author | mahtabm |
---|---|
date | Thu, 11 Apr 2013 02:01:53 -0400 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/variant_effect_predictor/Bio/EnsEMBL/Variation/Pipeline/SetVariationClass.pm Thu Apr 11 02:01:53 2013 -0400 @@ -0,0 +1,169 @@ +package Bio::EnsEMBL::Variation::Pipeline::SetVariationClass; + +use strict; +use warnings; + +use base qw(Bio::EnsEMBL::Variation::Pipeline::BaseVariationProcess); + +use Bio::EnsEMBL::Variation::Utils::Sequence qw(SO_variation_class); + + +sub run { + + my $self = shift; + + my $var_id_start = $self->required_param('variation_id_start'); + + my $var_id_stop = $self->required_param('variation_id_stop'); + + my $temp_var_table = $self->param('temp_var_table'); + + my $temp_var_feat_table = $self->param('temp_var_feat_table'); + + my $var_dba = $self->get_species_adaptor('variation'); + + my $aa = $var_dba->get_AttributeAdaptor; + + my $dbc = $var_dba->dbc; + + # fetch the failed_descriptions to avoid a join + + my $fds_sth = $dbc->prepare(qq{ + SELECT failed_description_id, description + FROM failed_description + }); + + $fds_sth->execute; + + my %fds; + + while (my ($fd_id, $desc) = $fds_sth->fetchrow_array) { + $fds{$fd_id} = $desc; + } + + $fds_sth->finish(); + + my $all_sth = $dbc->prepare(qq{ + SELECT v.variation_id, vf.variation_feature_id, vf.allele_string, fv.failed_description_id + FROM (variation v LEFT JOIN variation_feature vf ON v.variation_id = vf.variation_id) + LEFT JOIN failed_variation fv ON v.variation_id = fv.variation_id, source s + WHERE v.variation_id >= ? + AND v.variation_id <= ? + AND v.source_id = s.source_id + AND s.name != 'HGMD-PUBLIC' + }); + + my $vf_insert_sth; + my $v_insert_sth; + + if (defined $temp_var_feat_table) { + $vf_insert_sth = $dbc->prepare(qq{ + INSERT IGNORE INTO $temp_var_feat_table (class_attrib_id, variation_feature_id) + VALUES (?,?) + }); + } + else { + $vf_insert_sth = $dbc->prepare(qq{ + UPDATE variation_feature SET class_attrib_id = ? WHERE variation_feature_id = ? + }); + } + + if (defined $temp_var_table) { + $v_insert_sth = $dbc->prepare(qq{ + INSERT IGNORE INTO $temp_var_table (class_attrib_id, variation_id) + VALUES (?,?) + }); + } + else { + $v_insert_sth = $dbc->prepare(qq{ + UPDATE variation SET class_attrib_id = ? WHERE variation_id = ? + }); + } + + $all_sth->execute($var_id_start, $var_id_stop); + + my @unmapped_v_ids; + + while (my ($v_id, $vf_id, $allele_string, $fd_id) = $all_sth->fetchrow_array) { + + unless ($vf_id) { + # this variation doesn't have a corresponding variation_feature + push @unmapped_v_ids, $v_id; + next; + } + + my $ref_correct = 1; + + # check to see if this variation_feature is known not to match the reference allele, + # as this tells us if we can call insertions or deletions, or have to resort to indel + + if (defined $fd_id) { + my $fail_reason = $fds{$fd_id}; + + if ($fail_reason eq 'None of the variant alleles match the reference allele') { + $ref_correct = 0; + } + } + + my $so_term = SO_variation_class($allele_string, $ref_correct); + + my $attrib_id = $aa->attrib_id_for_type_value('SO_term', $so_term); + + die "No attrib_id for $so_term" unless defined $attrib_id; + + $vf_insert_sth->execute($attrib_id, $vf_id); + + $v_insert_sth->execute($attrib_id, $v_id); + } + + $all_sth->finish(); + + # now we need to fetch the alleles for any variations that are not mapped + # and work out their class + + if (@unmapped_v_ids) { + + my $id_str = join ',', @unmapped_v_ids; + + my $unmapped_sth = $dbc->prepare(qq{ + SELECT a.variation_id, ac.allele + FROM allele a, allele_code ac + WHERE a.variation_id IN ($id_str) + AND a.allele_code_id = ac.allele_code_id + GROUP BY ac.allele + }); + + $unmapped_sth->execute or die "Failed to fetch unmapped variation alleles for variation ids: $id_str"; + + my $unmapped_alleles; + + while (my ($v_id, $allele) = $unmapped_sth->fetchrow_array) { + push @{ $unmapped_alleles->{$v_id} ||= [] }, $allele; + } + + $unmapped_sth->finish(); + + for my $v_id (keys %$unmapped_alleles) { + + my $allele_string = join '/', @{ $unmapped_alleles->{$v_id} }; + + # we don't know what the reference is here + + my $ref_correct = 0; + + my $so_term = SO_variation_class($allele_string, $ref_correct); + + my $attrib_id = $aa->attrib_id_for_type_value('SO_term', $so_term); + + die "No attrib_id for $so_term" unless defined $attrib_id; + + $v_insert_sth->execute($attrib_id, $v_id); + } + } + + $vf_insert_sth->finish(); + $v_insert_sth->finish(); +} + +1; +