view variant_effect_predictor/Bio/EnsEMBL/Variation/Pipeline/SetVariationClass.pm @ 3:d30fa12e4cc5 default tip

Merge heads 2:a5976b2dce6f and 1:09613ce8151e which were created as a result of a recently fixed bug.
author devteam <devteam@galaxyproject.org>
date Mon, 13 Jan 2014 10:38:30 -0500
parents 1f6dce3d34e0
children
line wrap: on
line source

package Bio::EnsEMBL::Variation::Pipeline::SetVariationClass;

use strict;
use warnings;

use base qw(Bio::EnsEMBL::Variation::Pipeline::BaseVariationProcess);

use Bio::EnsEMBL::Variation::Utils::Sequence qw(SO_variation_class);


sub run {

    my $self = shift;
    
    my $var_id_start = $self->required_param('variation_id_start');

    my $var_id_stop  = $self->required_param('variation_id_stop');

    my $temp_var_table = $self->param('temp_var_table');
    
    my $temp_var_feat_table = $self->param('temp_var_feat_table');
  
    my $var_dba = $self->get_species_adaptor('variation');

    my $aa = $var_dba->get_AttributeAdaptor;

    my $dbc = $var_dba->dbc;

    # fetch the failed_descriptions to avoid a join

    my $fds_sth = $dbc->prepare(qq{
        SELECT  failed_description_id, description
        FROM    failed_description
    });

    $fds_sth->execute;

    my %fds;

    while (my ($fd_id, $desc) = $fds_sth->fetchrow_array) {
        $fds{$fd_id} = $desc;
    }
    
    $fds_sth->finish();

    my $all_sth = $dbc->prepare(qq{
        SELECT  v.variation_id, vf.variation_feature_id, vf.allele_string, fv.failed_description_id
        FROM    (variation v LEFT JOIN variation_feature vf ON v.variation_id = vf.variation_id) 
                LEFT JOIN failed_variation fv ON v.variation_id = fv.variation_id, source s
        WHERE   v.variation_id >= ?
        AND     v.variation_id <= ?
        AND     v.source_id = s.source_id
        AND     s.name != 'HGMD-PUBLIC'
    });

    my $vf_insert_sth;
    my $v_insert_sth;

    if (defined $temp_var_feat_table) {
        $vf_insert_sth = $dbc->prepare(qq{
            INSERT IGNORE INTO $temp_var_feat_table (class_attrib_id, variation_feature_id)
            VALUES (?,?)
        });
    }
    else {
        $vf_insert_sth = $dbc->prepare(qq{
            UPDATE variation_feature SET class_attrib_id = ? WHERE variation_feature_id = ?
        });
    }

    if (defined $temp_var_table) {
        $v_insert_sth = $dbc->prepare(qq{
            INSERT IGNORE INTO $temp_var_table (class_attrib_id, variation_id)
            VALUES (?,?)
        });
    }
    else {
        $v_insert_sth = $dbc->prepare(qq{
            UPDATE variation SET class_attrib_id = ? WHERE variation_id = ?
        });
    }
    
    $all_sth->execute($var_id_start, $var_id_stop);

    my @unmapped_v_ids;

    while (my ($v_id, $vf_id, $allele_string, $fd_id) = $all_sth->fetchrow_array) {

        unless ($vf_id) {
            # this variation doesn't have a corresponding variation_feature
            push @unmapped_v_ids, $v_id;
            next;
        }

        my $ref_correct = 1;
        
        # check to see if this variation_feature is known not to match the reference allele,
        # as this tells us if we can call insertions or deletions, or have to resort to indel

        if (defined $fd_id) {
            my $fail_reason = $fds{$fd_id};

            if ($fail_reason eq 'None of the variant alleles match the reference allele') {
                $ref_correct = 0;
            }
        }

        my $so_term = SO_variation_class($allele_string, $ref_correct);

        my $attrib_id = $aa->attrib_id_for_type_value('SO_term', $so_term);

        die "No attrib_id for $so_term" unless defined $attrib_id;

        $vf_insert_sth->execute($attrib_id, $vf_id);
        
        $v_insert_sth->execute($attrib_id, $v_id);
    }
    
    $all_sth->finish();

    # now we need to fetch the alleles for any variations that are not mapped
    # and work out their class
    
    if (@unmapped_v_ids) {
        
        my $id_str = join ',', @unmapped_v_ids;

        my $unmapped_sth = $dbc->prepare(qq{
            SELECT  a.variation_id, ac.allele
            FROM    allele a, allele_code ac
            WHERE   a.variation_id IN ($id_str)
            AND     a.allele_code_id = ac.allele_code_id
            GROUP BY ac.allele
        });

        $unmapped_sth->execute or die "Failed to fetch unmapped variation alleles for variation ids: $id_str";

        my $unmapped_alleles;

        while (my ($v_id, $allele) = $unmapped_sth->fetchrow_array) {
            push @{ $unmapped_alleles->{$v_id} ||= [] }, $allele; 
        }
        
        $unmapped_sth->finish();

        for my $v_id (keys %$unmapped_alleles) {

            my $allele_string = join '/', @{ $unmapped_alleles->{$v_id} };

            # we don't know what the reference is here

            my $ref_correct = 0;

            my $so_term = SO_variation_class($allele_string, $ref_correct);

            my $attrib_id = $aa->attrib_id_for_type_value('SO_term', $so_term);

            die "No attrib_id for $so_term" unless defined $attrib_id;

            $v_insert_sth->execute($attrib_id, $v_id);
        }
    }
    
    $vf_insert_sth->finish();
    $v_insert_sth->finish();
}

1;