diff variant_effect_predictor/Bio/EnsEMBL/Variation/Pipeline/SetVariationClass.pm @ 0:1f6dce3d34e0

Uploaded
author mahtabm
date Thu, 11 Apr 2013 02:01:53 -0400
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/variant_effect_predictor/Bio/EnsEMBL/Variation/Pipeline/SetVariationClass.pm	Thu Apr 11 02:01:53 2013 -0400
@@ -0,0 +1,169 @@
+package Bio::EnsEMBL::Variation::Pipeline::SetVariationClass;
+
+use strict;
+use warnings;
+
+use base qw(Bio::EnsEMBL::Variation::Pipeline::BaseVariationProcess);
+
+use Bio::EnsEMBL::Variation::Utils::Sequence qw(SO_variation_class);
+
+
+sub run {
+
+    my $self = shift;
+    
+    my $var_id_start = $self->required_param('variation_id_start');
+
+    my $var_id_stop  = $self->required_param('variation_id_stop');
+
+    my $temp_var_table = $self->param('temp_var_table');
+    
+    my $temp_var_feat_table = $self->param('temp_var_feat_table');
+  
+    my $var_dba = $self->get_species_adaptor('variation');
+
+    my $aa = $var_dba->get_AttributeAdaptor;
+
+    my $dbc = $var_dba->dbc;
+
+    # fetch the failed_descriptions to avoid a join
+
+    my $fds_sth = $dbc->prepare(qq{
+        SELECT  failed_description_id, description
+        FROM    failed_description
+    });
+
+    $fds_sth->execute;
+
+    my %fds;
+
+    while (my ($fd_id, $desc) = $fds_sth->fetchrow_array) {
+        $fds{$fd_id} = $desc;
+    }
+    
+    $fds_sth->finish();
+
+    my $all_sth = $dbc->prepare(qq{
+        SELECT  v.variation_id, vf.variation_feature_id, vf.allele_string, fv.failed_description_id
+        FROM    (variation v LEFT JOIN variation_feature vf ON v.variation_id = vf.variation_id) 
+                LEFT JOIN failed_variation fv ON v.variation_id = fv.variation_id, source s
+        WHERE   v.variation_id >= ?
+        AND     v.variation_id <= ?
+        AND     v.source_id = s.source_id
+        AND     s.name != 'HGMD-PUBLIC'
+    });
+
+    my $vf_insert_sth;
+    my $v_insert_sth;
+
+    if (defined $temp_var_feat_table) {
+        $vf_insert_sth = $dbc->prepare(qq{
+            INSERT IGNORE INTO $temp_var_feat_table (class_attrib_id, variation_feature_id)
+            VALUES (?,?)
+        });
+    }
+    else {
+        $vf_insert_sth = $dbc->prepare(qq{
+            UPDATE variation_feature SET class_attrib_id = ? WHERE variation_feature_id = ?
+        });
+    }
+
+    if (defined $temp_var_table) {
+        $v_insert_sth = $dbc->prepare(qq{
+            INSERT IGNORE INTO $temp_var_table (class_attrib_id, variation_id)
+            VALUES (?,?)
+        });
+    }
+    else {
+        $v_insert_sth = $dbc->prepare(qq{
+            UPDATE variation SET class_attrib_id = ? WHERE variation_id = ?
+        });
+    }
+    
+    $all_sth->execute($var_id_start, $var_id_stop);
+
+    my @unmapped_v_ids;
+
+    while (my ($v_id, $vf_id, $allele_string, $fd_id) = $all_sth->fetchrow_array) {
+
+        unless ($vf_id) {
+            # this variation doesn't have a corresponding variation_feature
+            push @unmapped_v_ids, $v_id;
+            next;
+        }
+
+        my $ref_correct = 1;
+        
+        # check to see if this variation_feature is known not to match the reference allele,
+        # as this tells us if we can call insertions or deletions, or have to resort to indel
+
+        if (defined $fd_id) {
+            my $fail_reason = $fds{$fd_id};
+
+            if ($fail_reason eq 'None of the variant alleles match the reference allele') {
+                $ref_correct = 0;
+            }
+        }
+
+        my $so_term = SO_variation_class($allele_string, $ref_correct);
+
+        my $attrib_id = $aa->attrib_id_for_type_value('SO_term', $so_term);
+
+        die "No attrib_id for $so_term" unless defined $attrib_id;
+
+        $vf_insert_sth->execute($attrib_id, $vf_id);
+        
+        $v_insert_sth->execute($attrib_id, $v_id);
+    }
+    
+    $all_sth->finish();
+
+    # now we need to fetch the alleles for any variations that are not mapped
+    # and work out their class
+    
+    if (@unmapped_v_ids) {
+        
+        my $id_str = join ',', @unmapped_v_ids;
+
+        my $unmapped_sth = $dbc->prepare(qq{
+            SELECT  a.variation_id, ac.allele
+            FROM    allele a, allele_code ac
+            WHERE   a.variation_id IN ($id_str)
+            AND     a.allele_code_id = ac.allele_code_id
+            GROUP BY ac.allele
+        });
+
+        $unmapped_sth->execute or die "Failed to fetch unmapped variation alleles for variation ids: $id_str";
+
+        my $unmapped_alleles;
+
+        while (my ($v_id, $allele) = $unmapped_sth->fetchrow_array) {
+            push @{ $unmapped_alleles->{$v_id} ||= [] }, $allele; 
+        }
+        
+        $unmapped_sth->finish();
+
+        for my $v_id (keys %$unmapped_alleles) {
+
+            my $allele_string = join '/', @{ $unmapped_alleles->{$v_id} };
+
+            # we don't know what the reference is here
+
+            my $ref_correct = 0;
+
+            my $so_term = SO_variation_class($allele_string, $ref_correct);
+
+            my $attrib_id = $aa->attrib_id_for_type_value('SO_term', $so_term);
+
+            die "No attrib_id for $so_term" unless defined $attrib_id;
+
+            $v_insert_sth->execute($attrib_id, $v_id);
+        }
+    }
+    
+    $vf_insert_sth->finish();
+    $v_insert_sth->finish();
+}
+
+1;
+