Mercurial > repos > mahtabm > ensembl
diff variant_effect_predictor/Bio/EnsEMBL/Variation/Pipeline/InitTranscriptEffect.pm @ 0:1f6dce3d34e0
Uploaded
author | mahtabm |
---|---|
date | Thu, 11 Apr 2013 02:01:53 -0400 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/variant_effect_predictor/Bio/EnsEMBL/Variation/Pipeline/InitTranscriptEffect.pm Thu Apr 11 02:01:53 2013 -0400 @@ -0,0 +1,113 @@ +=head1 LICENSE + + Copyright (c) 1999-2012 The European Bioinformatics Institute and + Genome Research Limited. All rights reserved. + + This software is distributed under a modified Apache license. + For license details, please see + + http://www.ensembl.org/info/about/code_licence.html + +=head1 CONTACT + + Please email comments or questions to the public Ensembl + developers list at <dev@ensembl.org>. + + Questions may also be sent to the Ensembl help desk at + <helpdesk@ensembl.org>. + +=cut + +package Bio::EnsEMBL::Variation::Pipeline::InitTranscriptEffect; + +use strict; +use warnings; + +use base qw(Bio::EnsEMBL::Variation::Pipeline::BaseVariationProcess); + +my $DEBUG = 0; + +sub fetch_input { + + my $self = shift; + + my $include_lrg = $self->param('include_lrg'); + + my $core_dba = $self->get_species_adaptor('core'); + my $var_dba = $self->get_species_adaptor('variation'); + + my $dbc = $var_dba->dbc(); + + my $ga = $core_dba->get_GeneAdaptor or die "Failed to get gene adaptor"; + + my @transcript_output_ids; + + my $gene_count = 0; + + # fetch all the regular genes + + my @genes = @{ $ga->fetch_all }; + + if ($include_lrg) { + # fetch the LRG genes as well + + push @genes, @{ $ga->fetch_all_by_biotype('LRG_gene') } + } + + for my $gene (@genes) { + $gene_count++; + + for my $transcript (@{ $gene->get_all_Transcripts }) { + + push @transcript_output_ids, { + transcript_stable_id => $transcript->stable_id, + }; + } + if ($DEBUG) { + last if $gene_count >= 100; + } + } + + if (@transcript_output_ids) { + + # check we actually found some transcripts + + # truncate the table because we don't want duplicates + + $dbc->do("TRUNCATE TABLE transcript_variation"); + + # disable the indexes on the table we're going to insert into as + # this significantly speeds up the TranscriptEffect process + + $dbc->do("ALTER TABLE transcript_variation DISABLE KEYS"); + + $self->param('transcript_output_ids', \@transcript_output_ids); + + $self->param( + 'rebuild_indexes', [{ + tables => ['transcript_variation'], + }] + ); + + # we need to kick off the update_vf analysis as well, + # but it doesn't have any parameters we need to set here + + $self->param( + 'update_vf', [{}] + ); + } +} + +sub write_output { + my $self = shift; + + if (my $transcript_output_ids = $self->param('transcript_output_ids')) { + $self->dataflow_output_id($self->param('rebuild_indexes'), 2); + $self->dataflow_output_id($self->param('update_vf'), 3); + $self->dataflow_output_id($transcript_output_ids, 4); + } + + return; +} + +1;