Mercurial > repos > mahtabm > ensembl
diff variant_effect_predictor/Bio/EnsEMBL/DBSQL/PredictionTranscriptAdaptor.pm @ 0:1f6dce3d34e0
Uploaded
| author | mahtabm |
|---|---|
| date | Thu, 11 Apr 2013 02:01:53 -0400 |
| parents | |
| children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/variant_effect_predictor/Bio/EnsEMBL/DBSQL/PredictionTranscriptAdaptor.pm Thu Apr 11 02:01:53 2013 -0400 @@ -0,0 +1,587 @@ +=head1 LICENSE + + Copyright (c) 1999-2012 The European Bioinformatics Institute and + Genome Research Limited. All rights reserved. + + This software is distributed under a modified Apache license. + For license details, please see + + http://www.ensembl.org/info/about/code_licence.html + +=head1 CONTACT + + Please email comments or questions to the public Ensembl + developers list at <dev@ensembl.org>. + + Questions may also be sent to the Ensembl help desk at + <helpdesk@ensembl.org>. + +=cut + +=head1 NAME + +Bio::EnsEMBL::DBSQL::PredictionTranscriptAdaptor - +Performs database interaction related to PredictionTranscripts + +=head1 SYNOPSIS + + # get a prediction transcript adaptor from the database + $pta = $database_adaptor->get_PredictionTranscriptAdaptor(); + + # get a slice on a region of chromosome 1 + $sa = $database_adaptor->get_SliceAdaptor(); + + $slice = $sa->fetch_by_region( 'chromosome', 'x', 100000, 200000 ); + + # get all the prediction transcripts from the slice region + $prediction_transcripts = @{ $pta->fetch_all_by_Slice($slice) }; + +=head1 METHODS + +=cut + +package Bio::EnsEMBL::DBSQL::PredictionTranscriptAdaptor; + +use vars qw( @ISA ); +use strict; + +use Bio::EnsEMBL::DBSQL::BaseFeatureAdaptor; +use Bio::EnsEMBL::DBSQL::AnalysisAdaptor; +use Bio::EnsEMBL::PredictionTranscript; +use Bio::EnsEMBL::Utils::Exception qw(deprecate throw warning); + +@ISA = qw( Bio::EnsEMBL::DBSQL::BaseFeatureAdaptor ); + + +# _tables +# +# Arg [1] : none +# Example : none +# Description: Implements abstract superclass method to define the table used +# to retrieve prediction transcripts from the database +# Returntype : string +# Exceptions : none +# Caller : generic_fetch + +sub _tables { + my $self = shift; + + return ['prediction_transcript', 'pt']; +} + + +# _columns + +# Arg [1] : none +# Example : none +# Description: Implements abstract superclass method to define the columns +# retrieved in database queries used to create prediction +# transcripts. +# Returntype : list of strings +# Exceptions : none +# Caller : generic_fetch +# + +sub _columns { + my $self = shift; + + return qw( pt.prediction_transcript_id + pt.seq_region_id + pt.seq_region_start + pt.seq_region_end + pt.seq_region_strand + pt.analysis_id + pt.display_label); +} + + +=head2 fetch_by_stable_id + + Arg [1] : string $stable_id + The stable id of the transcript to retrieve + Example : $trans = $trans_adptr->fetch_by_stable_id('GENSCAN00000001234'); + Description: Retrieves a prediction transcript via its display_label. + This method is called fetch_by_stable_id for polymorphism with + the TranscriptAdaptor. Prediction transcript display_labels are + not necessarily stable in that the same identifier may be reused + for a completely different prediction transcript in a subsequent + database release. + Returntype : Bio::EnsEMBL::PredictionTranscript + Caller : general + Status : Stable + +=cut + +sub fetch_by_stable_id { + my $self = shift; + my $stable_id = shift; + + throw('Stable_id argument expected') if(!$stable_id); + + my $syn = $self->_tables()->[1]; + + $self->bind_param_generic_fetch($stable_id,SQL_VARCHAR); + my $pts = $self->generic_fetch("$syn.display_label = ?"); + + return (@$pts) ? $pts->[0] : undef; +} + + + +=head2 fetch_all_by_Slice + + Arg [1] : Bio::EnsEMBL::Slice $slice + The slice to fetch transcripts on. + Arg [3] : (optional) boolean $load_exons + if true, exons will be loaded immediately rather than + lazy loaded later. + Example : $transcripts = $ + Description: Overrides superclass method to optionally load exons + immediately rather than lazy-loading them later. This + is more efficient when there are a lot of transcripts whose + exons are going to be used. + Returntype : reference to list of transcripts + Exceptions : thrown if exon cannot be placed on transcript slice + Caller : Slice::get_all_Transcripts + Status : Stable + +=cut + +sub fetch_all_by_Slice { + my $self = shift; + my $slice = shift; + my $logic_name = shift; + my $load_exons = shift; + + my $transcripts = $self->SUPER::fetch_all_by_Slice($slice,$logic_name); + + # if there are 0 or 1 transcripts still do lazy-loading + if(!$load_exons || @$transcripts < 2) { + return $transcripts; + } + + # preload all of the exons now, instead of lazy loading later + # faster than 1 query per transcript + + # get extent of region spanned by transcripts + my ($min_start, $max_end); + foreach my $tr (@$transcripts) { + if(!defined($min_start) || $tr->seq_region_start() < $min_start) { + $min_start = $tr->seq_region_start(); + } + if(!defined($max_end) || $tr->seq_region_end() > $max_end) { + $max_end = $tr->seq_region_end(); + } + } + +# mades no sense, the limit for the slice will be defined by the transcripts +# $min_start += $slice->start() - 1; +# $max_end += $slice->start() - 1; + + my $ext_slice; + + if($min_start >= $slice->start() && $max_end <= $slice->end()) { + $ext_slice = $slice; + } else { + my $sa = $self->db()->get_SliceAdaptor(); + $ext_slice = $sa->fetch_by_region + ($slice->coord_system->name(), $slice->seq_region_name(), + $min_start,$max_end, $slice->strand(), $slice->coord_system->version()); + } + + # associate exon identifiers with transcripts + + my %tr_hash = map {$_->dbID => $_} @$transcripts; + + my $tr_id_str = '(' . join(',', keys %tr_hash) . ')'; + + my $sth = $self->prepare + ("SELECT prediction_transcript_id, prediction_exon_id, exon_rank " . + "FROM prediction_exon " . + "WHERE prediction_transcript_id IN $tr_id_str"); + + $sth->execute(); + + my ($ex_id, $tr_id, $rank); + $sth->bind_columns(\$tr_id, \$ex_id, \$rank); + + my %ex_tr_hash; + + while($sth->fetch()) { + $ex_tr_hash{$ex_id} ||= []; + push @{$ex_tr_hash{$ex_id}}, [$tr_hash{$tr_id}, $rank]; + } + + $sth->finish(); + + my $ea = $self->db()->get_PredictionExonAdaptor(); + my $exons = $ea->fetch_all_by_Slice($ext_slice); + + # move exons onto transcript slice, and add them to transcripts + foreach my $ex (@$exons) { + $ex = $ex->transfer($slice) if($slice != $ext_slice); + + if(!$ex) { + throw("Unexpected. PredictionExon could not be transfered onto " . + "PredictionTranscript slice."); + } + + foreach my $row (@{$ex_tr_hash{$ex->dbID()}}) { + my ($tr, $rank) = @$row; + $tr->add_Exon($ex, $rank); + } + } + + return $transcripts; +} + + + + + +=head2 _objs_from_sth + + Arg [1] : DBI:st $sth + An executed DBI statement handle + Arg [2] : (optional) Bio::EnsEMBL::Mapper $mapper + An mapper to be used to convert contig coordinates + to assembly coordinates. + Arg [3] : (optional) Bio::EnsEMBL::Slice $slice + A slice to map the prediction transcript to. + Example : $p_transcripts = $self->_objs_from_sth($sth); + Description: Creates a list of Prediction transcripts from an executed DBI + statement handle. The columns retrieved via the statement + handle must be in the same order as the columns defined by the + _columns method. If the slice argument is provided then the + the prediction transcripts will be in returned in the coordinate + system of the $slice argument. Otherwise the prediction + transcripts will be returned in the RawContig coordinate system. + Returntype : reference to a list of Bio::EnsEMBL::PredictionTranscripts + Exceptions : none + Caller : superclass generic_fetch + Status : Stable + +=cut + +sub _objs_from_sth { + my ($self, $sth, $mapper, $dest_slice) = @_; + + # + # This code is ugly because an attempt has been made to remove as many + # function calls as possible for speed purposes. Thus many caches and + # a fair bit of gymnastics is used. + # + + my $sa = $self->db()->get_SliceAdaptor(); + my $aa = $self->db()->get_AnalysisAdaptor(); + + my @ptranscripts; + my %analysis_hash; + my %slice_hash; + my %sr_name_hash; + my %sr_cs_hash; + + my ($prediction_transcript_id, + $seq_region_id, + $seq_region_start, + $seq_region_end, + $seq_region_strand, + $analysis_id, + $display_label); + + $sth->bind_columns(\$prediction_transcript_id, + \$seq_region_id, + \$seq_region_start, + \$seq_region_end, + \$seq_region_strand, + \$analysis_id, + \$display_label); + + my $asm_cs; + my $cmp_cs; + my $asm_cs_vers; + my $asm_cs_name; + my $cmp_cs_vers; + my $cmp_cs_name; + if($mapper) { + $asm_cs = $mapper->assembled_CoordSystem(); + $cmp_cs = $mapper->component_CoordSystem(); + $asm_cs_name = $asm_cs->name(); + $asm_cs_vers = $asm_cs->version(); + $cmp_cs_name = $cmp_cs->name(); + $cmp_cs_vers = $cmp_cs->version(); + } + + my $dest_slice_start; + my $dest_slice_end; + my $dest_slice_strand; + my $dest_slice_length; + my $dest_slice_sr_name; + my $dest_slice_sr_id; + if($dest_slice) { + $dest_slice_start = $dest_slice->start(); + $dest_slice_end = $dest_slice->end(); + $dest_slice_strand = $dest_slice->strand(); + $dest_slice_length = $dest_slice->length(); + $dest_slice_sr_name = $dest_slice->seq_region_name(); + $dest_slice_sr_id = $dest_slice->get_seq_region_id(); + } + + FEATURE: while($sth->fetch()) { + + #get the analysis object + my $analysis = $analysis_hash{$analysis_id} ||= + $aa->fetch_by_dbID($analysis_id); + #need to get the internal_seq_region, if present + $seq_region_id = $self->get_seq_region_id_internal($seq_region_id); + my $slice = $slice_hash{"ID:".$seq_region_id}; + + if(!$slice) { + $slice = $sa->fetch_by_seq_region_id($seq_region_id); + $slice_hash{"ID:".$seq_region_id} = $slice; + $sr_name_hash{$seq_region_id} = $slice->seq_region_name(); + $sr_cs_hash{$seq_region_id} = $slice->coord_system(); + } + + my $sr_name = $sr_name_hash{$seq_region_id}; + my $sr_cs = $sr_cs_hash{$seq_region_id}; + # + # remap the feature coordinates to another coord system + # if a mapper was provided + # + if($mapper) { + + if (defined $dest_slice && $mapper->isa('Bio::EnsEMBL::ChainedAssemblyMapper') ) { + ( $seq_region_id, $seq_region_start, + $seq_region_end, $seq_region_strand ) + = + $mapper->map( $sr_name, $seq_region_start, $seq_region_end, + $seq_region_strand, $sr_cs, 1, $dest_slice); + + } else { + + ( $seq_region_id, $seq_region_start, + $seq_region_end, $seq_region_strand ) + = + $mapper->fastmap( $sr_name, $seq_region_start, $seq_region_end, + $seq_region_strand, $sr_cs ); + } + + #skip features that map to gaps or coord system boundaries + next FEATURE if(!defined($seq_region_id)); + + #get a slice in the coord system we just mapped to +# if($asm_cs == $sr_cs || ($cmp_cs != $sr_cs && $asm_cs->equals($sr_cs))) { + $slice = $slice_hash{"ID:".$seq_region_id} ||= + $sa->fetch_by_seq_region_id($seq_region_id); +# } else { +# $slice = $slice_hash{"NAME:$sr_name:$asm_cs_name:$asm_cs_vers"} ||= +# $sa->fetch_by_region($asm_cs_name, $sr_name, undef, undef, undef, +# $asm_cs_vers); +# } + } + + # + # If a destination slice was provided convert the coords + # If the dest_slice starts at 1 and is foward strand, nothing needs doing + # + if($dest_slice) { + if($dest_slice_start != 1 || $dest_slice_strand != 1) { + if($dest_slice_strand == 1) { + $seq_region_start = $seq_region_start - $dest_slice_start + 1; + $seq_region_end = $seq_region_end - $dest_slice_start + 1; + } else { + my $tmp_seq_region_start = $seq_region_start; + $seq_region_start = $dest_slice_end - $seq_region_end + 1; + $seq_region_end = $dest_slice_end - $tmp_seq_region_start + 1; + $seq_region_strand *= -1; + } + } + + #throw away features off the end of the requested slice + if($seq_region_end < 1 || $seq_region_start > $dest_slice_length || + ( $dest_slice_sr_id ne $seq_region_id )) { + next FEATURE; + } + + + $slice = $dest_slice; + } + + # Finally, create the new PredictionTranscript. + push( @ptranscripts, + $self->_create_feature('Bio::EnsEMBL::PredictionTranscript', { + '-start' => $seq_region_start, + '-end' => $seq_region_end, + '-strand' => $seq_region_strand, + '-adaptor' => $self, + '-slice' => $slice, + '-analysis' => $analysis, + '-dbID' => $prediction_transcript_id, + '-display_label' => $display_label + } ) ); + + } + + return \@ptranscripts; +} + + + +=head2 store + + Arg [1] : list of Bio::EnsEMBL::PredictionTranscript @pre_transcripts + Example : $prediction_transcript_adaptor->store(@pre_transcripts); + Description: Stores a list of given prediction transcripts in database. + Puts dbID and Adaptor into each object stored object. + Returntype : none + Exceptions : on wrong argument type + Caller : general + Status : Stable + +=cut + +sub store { + my ( $self, @pre_transcripts ) = @_; + + my $ptstore_sth = $self->prepare + (qq{INSERT INTO prediction_transcript (seq_region_id, seq_region_start, + seq_region_end, seq_region_strand, + analysis_id, display_label) + VALUES( ?, ?, ?, ?, ?, ?)}); + + my $ptupdate_sth = $self->prepare + (qq{UPDATE prediction_transcript SET display_label = ? + WHERE prediction_transcript_id = ?}); + + my $db = $self->db(); + my $analysis_adaptor = $db->get_AnalysisAdaptor(); + my $pexon_adaptor = $db->get_PredictionExonAdaptor(); + + FEATURE: foreach my $pt (@pre_transcripts) { + if(!ref($pt) || !$pt->isa('Bio::EnsEMBL::PredictionTranscript')) { + throw('Expected PredictionTranscript argument not [' . ref($pt).']'); + } + + #skip prediction transcripts that have already been stored + if($pt->is_stored($db)) { + warning('Not storing already stored prediction transcript '. $pt->dbID); + next FEATURE; + } + + #get analysis and store it if it is not in the db + my $analysis = $pt->analysis(); + if(!$analysis) { + throw('Prediction transcript must have analysis to be stored.'); + } + if(!$analysis->is_stored($db)) { + $analysis_adaptor->store($analysis); + } + + #ensure that the transcript coordinates are correct, they may not be, + #if somebody has done some exon coordinate juggling and not recalculated + #the transcript coords. + $pt->recalculate_coordinates(); + + my $original = $pt; + my $seq_region_id; + ($pt, $seq_region_id) = $self->_pre_store($pt); + + #store the prediction transcript + $ptstore_sth->bind_param(1,$seq_region_id,SQL_INTEGER); + $ptstore_sth->bind_param(2,$pt->start,SQL_INTEGER); + $ptstore_sth->bind_param(3,$pt->end,SQL_INTEGER); + $ptstore_sth->bind_param(4,$pt->strand,SQL_TINYINT); + $ptstore_sth->bind_param(5,$analysis->dbID,SQL_INTEGER); + $ptstore_sth->bind_param(6,$pt->display_label,SQL_VARCHAR); + + $ptstore_sth->execute(); + + my $pt_id = $ptstore_sth->{'mysql_insertid'}; + $original->dbID($pt_id); + $original->adaptor($self); + + #store the exons + my $rank = 1; + foreach my $pexon (@{$original->get_all_Exons}) { + $pexon_adaptor->store($pexon, $pt_id, $rank++); + } + + # if a display label was not defined autogenerate one + if(!defined($pt->display_label())) { + my $zeros = '0' x (11 - length($pt_id)); + my $display_label = uc($analysis->logic_name()) . $zeros . $pt_id; + $ptupdate_sth->bind_param(1,$display_label,SQL_VARCHAR); + $ptupdate_sth->bind_param(2,$pt_id,SQL_INTEGER); + $ptupdate_sth->execute(); + $original->display_label($display_label); + } + } +} + + + +=head2 remove + + Arg [1] : Bio::EnsEMBL::PredictionTranscript $pt + Example : $prediction_transcript_adaptor->remove($pt); + Description: removes given prediction transcript $pt from database. + Returntype : none + Exceptions : throws if argument not a Bio::EnsEMBL::PredictionTranscript + Caller : general + Status : Stable + +=cut + +sub remove { + my $self = shift; + my $pre_trans = shift; + + if(!ref($pre_trans)||!$pre_trans->isa('Bio::EnsEMBL::PredictionTranscript')){ + throw('Expected PredictionTranscript argument.'); + } + + if(!$pre_trans->is_stored($self->db())) { + warning('PredictionTranscript is not stored in this DB - not removing.'); + return; + } + + #remove all associated prediction exons + my $pexon_adaptor = $self->db()->get_PredictionExonAdaptor(); + foreach my $pexon (@{$pre_trans->get_all_Exons}) { + $pexon_adaptor->remove($pexon); + } + + #remove the prediction transcript + my $sth = $self->prepare( "DELETE FROM prediction_transcript + WHERE prediction_transcript_id = ?" ); + $sth->bind_param(1,$pre_trans->dbID,SQL_INTEGER); + $sth->execute(); + + #unset the adaptor and internal id + $pre_trans->dbID(undef); + $pre_trans->adaptor(undef); +} + + +=head2 list_dbIDs + + Arg [1] : none + Example : @feature_ids = @{$prediction_transcript_adaptor->list_dbIDs()}; + Description: Gets an array of internal ids for all prediction transcript + features in the current db + Arg[1] : <optional> int. not 0 for the ids to be sorted by the seq_region. + Returntype : list of ints + Exceptions : none + Caller : ? + Status : Stable + +=cut + +sub list_dbIDs { + my ($self, $ordered) = @_; + + return $self->_list_dbIDs("prediction_transcript", undef, $ordered); +} + +1;
