Mercurial > repos > mahtabm > ensembl
diff variant_effect_predictor/Bio/EnsEMBL/DBSQL/PredictionExonAdaptor.pm @ 0:1f6dce3d34e0
Uploaded
author | mahtabm |
---|---|
date | Thu, 11 Apr 2013 02:01:53 -0400 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/variant_effect_predictor/Bio/EnsEMBL/DBSQL/PredictionExonAdaptor.pm Thu Apr 11 02:01:53 2013 -0400 @@ -0,0 +1,468 @@ +=head1 LICENSE + + Copyright (c) 1999-2012 The European Bioinformatics Institute and + Genome Research Limited. All rights reserved. + + This software is distributed under a modified Apache license. + For license details, please see + + http://www.ensembl.org/info/about/code_licence.html + +=head1 CONTACT + + Please email comments or questions to the public Ensembl + developers list at <dev@ensembl.org>. + + Questions may also be sent to the Ensembl help desk at + <helpdesk@ensembl.org>. + +=cut + +=head1 NAME + +Bio::EnsEMBL::DBSQL::PredictionExonAdaptor - Performs database interaction for +PredictionExons. + +=head1 SYNOPSIS + + $pea = $database_adaptor->get_PredictionExonAdaptor(); + $pexon = $pea->fetch_by_dbID(); + + my $slice = + $database_adaptor->get_SliceAdaptor->fetch_by_region( 'X', 1, 1e6 ); + + my @pexons = @{ $pea->fetch_all_by_Slice($slice) }; + +=head1 METHODS + +=cut + +package Bio::EnsEMBL::DBSQL::PredictionExonAdaptor; + +use vars qw( @ISA ); +use strict; + + +use Bio::EnsEMBL::DBSQL::BaseFeatureAdaptor; +use Bio::EnsEMBL::PredictionExon; +use Bio::EnsEMBL::Utils::Exception qw( warning throw deprecate ); + + +@ISA = qw( Bio::EnsEMBL::DBSQL::BaseFeatureAdaptor ); + + +#_tables +# +# Arg [1] : none +# Example : none +# Description: PROTECTED implementation of superclass abstract method +# returns the names, aliases of the tables to use for queries +# Returntype : list of listrefs of strings +# Exceptions : none +# Caller : internal +# + +sub _tables { + return ([ 'prediction_exon', 'pe' ] ); +} + + + +#_columns +# +# Arg [1] : none +# Example : none +# Description: PROTECTED implementation of superclass abstract method +# returns a list of columns to use for queries +# Returntype : list of strings +# Exceptions : none +# Caller : internal + +sub _columns { + my $self = shift; + + return qw( pe.prediction_exon_id + pe.seq_region_id + pe.seq_region_start + pe.seq_region_end + pe.seq_region_strand + pe.start_phase + pe.score + pe.p_value ); +} + + +# _final_clause +# +# Arg [1] : none +# Example : none +# Description: PROTECTED implementation of superclass abstract method +# returns a default end for the SQL-query (ORDER BY) +# Returntype : string +# Exceptions : none +# Caller : internal + +sub _final_clause { + return "ORDER BY pe.prediction_transcript_id, pe.exon_rank"; +} + + +=head2 fetch_all_by_PredictionTranscript + + Arg [1] : Bio::EnsEMBL::PredcitionTranscript $transcript + Example : none + Description: Retrieves all Exons for the Transcript in 5-3 order + Returntype : listref Bio::EnsEMBL::Exon on Transcript slice + Exceptions : throws if transcript does not have a slice + Caller : Transcript->get_all_Exons() + Status : Stable + +=cut + +sub fetch_all_by_PredictionTranscript { + my ( $self, $transcript ) = @_; + my $constraint = "pe.prediction_transcript_id = ".$transcript->dbID(); + + # use 'keep_all' option to keep exons that are off end of slice + + my $tslice = $transcript->slice(); + my $slice; + + if(!$tslice) { + throw("Transcript must have attached slice to retrieve exons."); + } + + # use a small slice the same size as the prediction transcript + $slice = $self->db->get_SliceAdaptor->fetch_by_Feature($transcript); + + my $exons = $self->fetch_all_by_Slice_constraint($slice, $constraint); + + # remap exon coordinates if necessary + if($slice->name() ne $tslice->name()) { + my @out; + foreach my $ex (@$exons) { + push @out, $ex->transfer($tslice); + } + $exons = \@out; + } + + return $exons; +} + + + +=head2 store + + Arg [1] : Bio::EnsEMBL::PredictionExon $exon + The exon to store in this database + Arg [2] : int $prediction_transcript_id + The internal identifier of the prediction exon that that this + exon is associated with. + Arg [3] : int $rank + The rank of the exon in the transcript (starting at 1) + Example : $pexon_adaptor->store($pexon, 1211, 2); + Description: Stores a PredictionExon in the database + Returntype : none + Exceptions : thrown if exon does not have a slice attached + or if $exon->start, $exon->end, $exon->strand, or $exon->phase + are not defined or if $exon is not a Bio::EnsEMBL::PredictionExon + Caller : general + Status : Stable + +=cut + +sub store { + my ( $self, $pexon, $pt_id, $rank ) = @_; + + if(!ref($pexon) || !$pexon->isa('Bio::EnsEMBL::PredictionExon') ) { + throw("Expected PredictionExon argument"); + } + + throw("Expected PredictionTranscript id argument.") if(!$pt_id); + throw("Expected rank argument.") if(!$rank); + + my $db = $self->db(); + + if($pexon->is_stored($db)) { + warning('PredictionExon is already stored in this DB.'); + return $pexon->dbID(); + } + + if( ! $pexon->start || ! $pexon->end || + ! $pexon->strand || ! defined $pexon->phase ) { + throw("PredictionExon does not have all attributes to store.\n" . + "start, end, strand and phase attributes must be set."); + } + + #maintain reference to original passed-in prediction exon + my $original = $pexon; + my $seq_region_id; + ($pexon, $seq_region_id) = $self->_pre_store($pexon); + + my $sth = $db->dbc->prepare + ("INSERT into prediction_exon (prediction_transcript_id, exon_rank, " . + "seq_region_id, seq_region_start, seq_region_end, " . + "seq_region_strand, start_phase, score, p_value) " . + "VALUES ( ?, ?, ?, ?, ?, ?, ?, ?, ? )"); + + $sth->bind_param(1,$pt_id,SQL_INTEGER); + $sth->bind_param(2,$rank,SQL_SMALLINT); + $sth->bind_param(3,$seq_region_id,SQL_INTEGER); + $sth->bind_param(4,$pexon->start,SQL_INTEGER); + $sth->bind_param(5,$pexon->end,SQL_INTEGER); + $sth->bind_param(6,$pexon->strand,SQL_TINYINT); + $sth->bind_param(7,$pexon->phase,SQL_TINYINT); + $sth->bind_param(8,$pexon->score,SQL_DOUBLE); + $sth->bind_param(9,$pexon->p_value,SQL_DOUBLE); + + $sth->execute(); + + my $dbID = $sth->{'mysql_insertid'}; + + #set the adaptor and dbID of the object they passed in + $original->dbID($dbID); + $original->adaptor($self); + + return $dbID; +} + + + +=head2 remove + + Arg [1] : Bio::EnsEMBL::PredictionExon $exon + the exon to remove from the database + Example : $exon_adaptor->remove($exon); + Description: Removes an exon from the database + Returntype : none + Exceptions : none + Caller : general + Status : Stable + +=cut + +sub remove { + my $self = shift; + my $pexon = shift; + + my $db = $self->db(); + + if(!$pexon->is_stored($db)) { + warning('PredictionExon is not in this DB - not removing'); + return undef; + } + + my $sth = $self->prepare( + "DELETE FROM prediction_exon WHERE prediction_exon_id = ?"); + $sth->bind_param( 1, $pexon->dbID, SQL_INTEGER ); + $sth->execute(); + + $pexon->dbID(undef); + $pexon->adaptor(undef); +} + + + +=head2 list_dbIDs + + Arg [1] : none + Example : @exon_ids = @{$exon_adaptor->list_dbIDs()}; + Description: Gets an array of internal ids for all exons in the current db + Arg[1] : <optional> int. not 0 for the ids to be sorted by the seq_region. + Returntype : list of ints + Exceptions : none + Caller : ? + Status : Stable + +=cut + +sub list_dbIDs { + my ($self,$ordered) = @_; + + return $self->_list_dbIDs("prediction_exon",undef, $ordered); +} + + + +#_objs_from_sth + +# Arg [1] : Hashreference $hashref +# Example : none +# Description: PROTECTED implementation of abstract superclass method. +# responsible for the creation of Genes +# Returntype : listref of Bio::EnsEMBL::Genes in target coordinate system +# Exceptions : none +# Caller : internal +# + +sub _objs_from_sth { + my ($self, $sth, $mapper, $dest_slice) = @_; + + # + # This code is ugly because an attempt has been made to remove as many + # function calls as possible for speed purposes. Thus many caches and + # a fair bit of gymnastics is used. + # + my $sa = $self->db()->get_SliceAdaptor(); + + my @exons; + my %slice_hash; + my %sr_name_hash; + my %sr_cs_hash; + + my($prediction_exon_id,$seq_region_id, + $seq_region_start, $seq_region_end, $seq_region_strand, + $start_phase, $score, $p_value); + + $sth->bind_columns(\$prediction_exon_id,\$seq_region_id, + \$seq_region_start, \$seq_region_end, \$seq_region_strand, + \$start_phase, \$score, \$p_value); + + my $asm_cs; + my $cmp_cs; + my $asm_cs_vers; + my $asm_cs_name; + my $cmp_cs_vers; + my $cmp_cs_name; + if($mapper) { + $asm_cs = $mapper->assembled_CoordSystem(); + $cmp_cs = $mapper->component_CoordSystem(); + $asm_cs_name = $asm_cs->name(); + $asm_cs_vers = $asm_cs->version(); + $cmp_cs_name = $cmp_cs->name(); + $cmp_cs_vers = $cmp_cs->version(); + } + + my $dest_slice_start; + my $dest_slice_end; + my $dest_slice_strand; + my $dest_slice_length; + my $dest_slice_cs; + my $asma; + my $dest_slice_sr_name; + my $dest_slice_sr_id; + + if($dest_slice) { + $dest_slice_start = $dest_slice->start(); + $dest_slice_end = $dest_slice->end(); + $dest_slice_strand = $dest_slice->strand(); + $dest_slice_length = $dest_slice->length(); + $dest_slice_cs = $dest_slice->coord_system; + $dest_slice_sr_name = $dest_slice->seq_region_name(); + $dest_slice_sr_id = $dest_slice->get_seq_region_id(); + $asma = $self->db->get_AssemblyMapperAdaptor(); + } + + FEATURE: while($sth->fetch()) { + #need to get the internal_seq_region, if present + $seq_region_id = $self->get_seq_region_id_internal($seq_region_id); + my $slice = $slice_hash{"ID:".$seq_region_id}; + my $dest_mapper = $mapper; + + + if(!$slice) { + $slice = $sa->fetch_by_seq_region_id($seq_region_id); + $slice_hash{"ID:".$seq_region_id} = $slice; + $sr_name_hash{$seq_region_id} = $slice->seq_region_name(); + $sr_cs_hash{$seq_region_id} = $slice->coord_system(); + } + + #obtain a mapper if none was defined, but a dest_seq_region was + if(!$dest_mapper && $dest_slice && + !$dest_slice_cs->equals($slice->coord_system)) { + $dest_mapper = $asma->fetch_by_CoordSystems($dest_slice_cs, + $slice->coord_system); + $asm_cs = $dest_mapper->assembled_CoordSystem(); + $cmp_cs = $dest_mapper->component_CoordSystem(); + $asm_cs_name = $asm_cs->name(); + $asm_cs_vers = $asm_cs->version(); + $cmp_cs_name = $cmp_cs->name(); + $cmp_cs_vers = $cmp_cs->version(); + } + + my $sr_name = $sr_name_hash{$seq_region_id}; + my $sr_cs = $sr_cs_hash{$seq_region_id}; + + # + # remap the feature coordinates to another coord system + # if a mapper was provided + # + if($dest_mapper) { + + if (defined $dest_slice && $dest_mapper->isa('Bio::EnsEMBL::ChainedAssemblyMapper') ) { + ( $seq_region_id, $seq_region_start, + $seq_region_end, $seq_region_strand ) + = + $dest_mapper->map( $sr_name, $seq_region_start, $seq_region_end, + $seq_region_strand, $sr_cs, 1, $dest_slice); + + } else { + + ( $seq_region_id, $seq_region_start, + $seq_region_end, $seq_region_strand ) + = $dest_mapper->fastmap( $sr_name, $seq_region_start, + $seq_region_end, $seq_region_strand, + $sr_cs ); + } + + #skip features that map to gaps or coord system boundaries + next FEATURE if(!defined($seq_region_id)); + + #get a slice in the coord system we just mapped to +# if($asm_cs == $sr_cs || ($cmp_cs != $sr_cs && $asm_cs->equals($sr_cs))) { + $slice = $slice_hash{"ID:".$seq_region_id} ||= + $sa->fetch_by_seq_region_id($seq_region_id); +# } else { +# $slice = $slice_hash{"NAME:$sr_name:$asm_cs_name:$asm_cs_vers"} ||= +# $sa->fetch_by_region($asm_cs_name, $sr_name, undef, undef, undef, +# $asm_cs_vers); +# } + } + + # + # If a destination slice was provided convert the coords + # If the dest_slice starts at 1 and is foward strand, nothing needs doing + # + if($dest_slice) { + if($dest_slice_start != 1 || $dest_slice_strand != 1) { + if($dest_slice_strand == 1) { + $seq_region_start = $seq_region_start - $dest_slice_start + 1; + $seq_region_end = $seq_region_end - $dest_slice_start + 1; + } else { + my $tmp_seq_region_start = $seq_region_start; + $seq_region_start = $dest_slice_end - $seq_region_end + 1; + $seq_region_end = $dest_slice_end - $tmp_seq_region_start + 1; + $seq_region_strand *= -1; + } + } + + #throw away features off the end of the requested slice + if($seq_region_end < 1 || $seq_region_start > $dest_slice_length || + ( $dest_slice_sr_id ne $seq_region_id )) { + next FEATURE; + } + + $slice = $dest_slice; + } + + # Finally, create the new PredictionExon. + push( @exons, + $self->_create_feature( 'Bio::EnsEMBL::PredictionExon', { + '-start' => $seq_region_start, + '-end' => $seq_region_end, + '-strand' => $seq_region_strand, + '-adaptor' => $self, + '-slice' => $slice, + '-dbID' => $prediction_exon_id, + '-phase' => $start_phase, + '-score' => $score, + '-p_value' => $p_value + } ) ); + + } + + return \@exons; +} + + +1;