Mercurial > repos > mahtabm > ensembl
diff variant_effect_predictor/Bio/EnsEMBL/DBSQL/DnaAlignFeatureAdaptor.pm @ 0:1f6dce3d34e0
Uploaded
author | mahtabm |
---|---|
date | Thu, 11 Apr 2013 02:01:53 -0400 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/variant_effect_predictor/Bio/EnsEMBL/DBSQL/DnaAlignFeatureAdaptor.pm Thu Apr 11 02:01:53 2013 -0400 @@ -0,0 +1,583 @@ +=head1 LICENSE + + Copyright (c) 1999-2012 The European Bioinformatics Institute and + Genome Research Limited. All rights reserved. + + This software is distributed under a modified Apache license. + For license details, please see + + http://www.ensembl.org/info/about/code_licence.html + +=head1 CONTACT + + Please email comments or questions to the public Ensembl + developers list at <dev@ensembl.org>. + + Questions may also be sent to the Ensembl help desk at + <helpdesk@ensembl.org>. + +=cut + +=head1 NAME + +Bio::EnsEMBL::DBSQL::DnaAlignFeatureAdaptor - Adaptor for DnaAlignFeatures + +=head1 SYNOPSIS + + $dafa = $registry->get_adaptor( 'Human', 'Core', 'DnaAlignFeature' ); + + my @features = @{ $dafa->fetch_all_by_Slice($slice) }; + + $dafa->store(@features); + +=head1 DESCRIPTION + +This is an adaptor responsible for the retrieval and storage of +DnaDnaAlignFeatures from the database. This adaptor inherits most of its +functionality from the BaseAlignFeatureAdaptor and BaseFeatureAdaptor +superclasses. + +=head1 METHODS + +=cut + + +package Bio::EnsEMBL::DBSQL::DnaAlignFeatureAdaptor; +use vars qw(@ISA); +use strict; +use Bio::EnsEMBL::DnaDnaAlignFeature; +use Bio::EnsEMBL::DBSQL::BaseAlignFeatureAdaptor; +use Bio::EnsEMBL::Utils::Exception qw(throw warning); + +@ISA = qw(Bio::EnsEMBL::DBSQL::BaseAlignFeatureAdaptor); + + +=head2 _tables + + Args : none + Example : @tabs = $self->_tables + Description: PROTECTED implementation of the abstract method inherited from + BaseFeatureAdaptor. Returns list of [tablename, alias] pairs + Returntype : list of listrefs of strings + Exceptions : none + Caller : Bio::EnsEMBL::DBSQL::BaseFeatureAdaptor::generic_fetch + Status : Stable + +=cut + +sub _tables { + my $self = shift; + + return (['dna_align_feature', 'daf'],['external_db','exdb']); +} + + +sub _left_join{ + return (['external_db',"exdb.external_db_id = daf.external_db_id"]); +} + +=head2 _columns + + Args : none + Example : @columns = $self->_columns + Description: PROTECTED implementation of abstract superclass method. + Returns a list of columns that are needed for object creation. + Returntype : list of strings + Exceptions : none + Caller : Bio::EnsEMBL::DBSQL::BaseFeatureAdaptor::generic_fetch + Status : Stable + +=cut + +sub _columns { + my $self = shift; + + #warning, implementation of _objs_from_sth method depends on order of list + return qw(daf.dna_align_feature_id + daf.seq_region_id + daf.analysis_id + daf.seq_region_start + daf.seq_region_end + daf.seq_region_strand + daf.hit_start + daf.hit_end + daf.hit_name + daf.hit_strand + daf.cigar_line + daf.evalue + daf.perc_ident + daf.score + daf.external_db_id + daf.hcoverage + daf.external_data + daf.pair_dna_align_feature_id + exdb.db_name + exdb.db_display_name); +} + + +=head2 store + + Arg [1] : list of Bio::EnsEMBL::DnaAlignFeatures @feats + the features to store in the database + Example : $dna_align_feature_adaptor->store(@features); + Description: Stores a list of DnaAlignFeatures in the database + Returntype : none + Exceptions : throw if any of the provided features cannot be stored + which may occur if: + * The feature does not have an associate Slice + * The feature does not have an associated analysis + * The Slice the feature is associated with is on a seq_region + unknown to this database + A warning is given if: + * The feature has already been stored in this db + Caller : Pipeline + Status : Stable + +=cut + +sub store { + my ( $self, @feats ) = @_; + + throw("Must call store with features") if ( scalar(@feats) == 0 ); + + my @tabs = $self->_tables; + my ($tablename) = @{ $tabs[0] }; + + my $db = $self->db(); + my $analysis_adaptor = $db->get_AnalysisAdaptor(); + + my $sth = $self->prepare( + "INSERT INTO $tablename (seq_region_id, seq_region_start, + seq_region_end, seq_region_strand, + hit_start, hit_end, hit_strand, hit_name, + cigar_line, analysis_id, score, evalue, + perc_ident, external_db_id, hcoverage, + pair_dna_align_feature_id) + VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)" # 16 arguments + ); + +FEATURE: + foreach my $feat (@feats) { + if ( !ref $feat || !$feat->isa("Bio::EnsEMBL::DnaDnaAlignFeature") ) + { + throw("feature must be a Bio::EnsEMBL::DnaDnaAlignFeature," + . " not a [" + . ref($feat) + . "]." ); + } + + if ( $feat->is_stored($db) ) { + warning( "DnaDnaAlignFeature [" + . $feat->dbID() + . "] is already stored in this database." ); + next FEATURE; + } + + my $hstart = $feat->hstart(); + my $hend = $feat->hend(); + my $hstrand = $feat->hstrand(); + $self->_check_start_end_strand( $hstart, $hend, $hstrand ); + + my $cigar_string = $feat->cigar_string(); + if ( !$cigar_string ) { + $cigar_string = $feat->length() . 'M'; + warning( "DnaDnaAlignFeature does not define a cigar_string.\n" + . "Assuming ungapped block with cigar_line=$cigar_string ." ); + } + + my $hseqname = $feat->hseqname(); + if ( !$hseqname ) { + throw("DnaDnaAlignFeature must define an hseqname."); + } + + if ( !defined( $feat->analysis ) ) { + throw( + "An analysis must be attached to the features to be stored."); + } + + #store the analysis if it has not been stored yet + if ( !$feat->analysis->is_stored($db) ) { + $analysis_adaptor->store( $feat->analysis() ); + } + + my $original = $feat; + my $seq_region_id; + ( $feat, $seq_region_id ) = $self->_pre_store($feat); + + $sth->bind_param( 1, $seq_region_id, SQL_INTEGER ); + $sth->bind_param( 2, $feat->start, SQL_INTEGER ); + $sth->bind_param( 3, $feat->end, SQL_INTEGER ); + $sth->bind_param( 4, $feat->strand, SQL_TINYINT ); + $sth->bind_param( 5, $hstart, SQL_INTEGER ); + $sth->bind_param( 6, $hend, SQL_INTEGER ); + $sth->bind_param( 7, $hstrand, SQL_TINYINT ); + $sth->bind_param( 8, $hseqname, SQL_VARCHAR ); + $sth->bind_param( 9, $cigar_string, SQL_LONGVARCHAR ); + $sth->bind_param( 10, $feat->analysis->dbID, SQL_INTEGER ); + $sth->bind_param( 11, $feat->score, SQL_DOUBLE ); + $sth->bind_param( 12, $feat->p_value, SQL_DOUBLE ); + $sth->bind_param( 13, $feat->percent_id, SQL_FLOAT ); + $sth->bind_param( 14, $feat->external_db_id, SQL_INTEGER ); + $sth->bind_param( 15, $feat->hcoverage, SQL_DOUBLE ); + $sth->bind_param( 16, $feat->pair_dna_align_feature_id, + SQL_INTEGER ); + + $sth->execute(); + + $original->dbID( $sth->{'mysql_insertid'} ); + $original->adaptor($self); + } ## end foreach my $feat (@feats) + + $sth->finish(); +} ## end sub store + + +sub save { + my ($self, $features) = @_; + + my @feats = @$features; + throw("Must call store with features") if( scalar(@feats) == 0 ); + + my @tabs = $self->_tables; + my ($tablename) = @{$tabs[0]}; + + my $db = $self->db(); + my $analysis_adaptor = $db->get_AnalysisAdaptor(); + + my $sql = qq{INSERT INTO $tablename (seq_region_id, seq_region_start, seq_region_end, seq_region_strand, hit_start, hit_end, hit_strand, hit_name, cigar_line, analysis_id, score, evalue, perc_ident, external_db_id, hcoverage, pair_dna_align_feature_id, external_data) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)}; + + my %analyses = (); + + my $sth = $self->prepare($sql); + + FEATURE: foreach my $feat ( @feats ) { + if( !ref $feat || !$feat->isa("Bio::EnsEMBL::DnaDnaAlignFeature") ) { + throw("feature must be a Bio::EnsEMBL::DnaDnaAlignFeature," + . " not a [".ref($feat)."]."); + } + + if($feat->is_stored($db)) { + warning("DnaDnaAlignFeature [".$feat->dbID."] is already stored" . + " in this database."); + next FEATURE; + } + + my $hstart = $feat->hstart || 0; # defined $feat->hstart ? $feat->hstart : $feat->start ; + my $hend = $feat->hend || 0; # defined $feat->hend ? $feat->hend : $feat->end; + my $hstrand = $feat->hstrand|| 0; # defined $feat->hstrand ? $feat->hstrand : $feat->strand; + if( $hstart && $hend ) { + if($hend < $hstart) { + throw("Invalid Feature start/end [$hstart/$hend]. Start must be less than or equal to end."); + } + } + my $cigar_string = $feat->cigar_string(); + if(!$cigar_string) { + $cigar_string = $feat->length() . 'M'; + warning("DnaDnaAlignFeature does not define a cigar_string.\n" . + "Assuming ungapped block with cigar_line=$cigar_string ."); + } + + my $hseqname = $feat->hseqname(); + if(!$hseqname) { + throw("DnaDnaAlignFeature must define an hseqname."); + } + + if(!defined($feat->analysis)) { + throw("An analysis must be attached to the features to be stored."); + } + + #store the analysis if it has not been stored yet + if(!$feat->analysis->is_stored($db)) { + $analysis_adaptor->store($feat->analysis()); + } + + $analyses{ $feat->analysis->dbID }++; + + my $original = $feat; + my $seq_region_id; + ($feat, $seq_region_id) = $self->_pre_store_userdata($feat); + + my $extra_data = $feat->extra_data ? $self->dump_data($feat->extra_data) : ''; + + $sth->bind_param(1,$seq_region_id,SQL_INTEGER); + $sth->bind_param(2,$feat->start,SQL_INTEGER); + $sth->bind_param(3,$feat->end,SQL_INTEGER); + $sth->bind_param(4,$feat->strand,SQL_TINYINT); + $sth->bind_param(5,$hstart,SQL_INTEGER); + $sth->bind_param(6,$hend,SQL_INTEGER); + $sth->bind_param(7,$hstrand,SQL_TINYINT); + $sth->bind_param(8,$hseqname,SQL_VARCHAR); + $sth->bind_param(9,$cigar_string,SQL_LONGVARCHAR); + $sth->bind_param(10,$feat->analysis->dbID,SQL_INTEGER); + $sth->bind_param(11,$feat->score,SQL_DOUBLE); +# $sth->bind_param(11,$feat->score); # if the above statement does not work it means you need to upgrade DBD::mysql, meantime you can replace it with this line + $sth->bind_param(12,$feat->p_value,SQL_DOUBLE); + $sth->bind_param(13,$feat->percent_id,SQL_FLOAT); + $sth->bind_param(14,$feat->external_db_id,SQL_INTEGER); + $sth->bind_param(15,$feat->hcoverage,SQL_DOUBLE); + $sth->bind_param(16,$feat->pair_dna_align_feature_id,SQL_INTEGER); + $sth->bind_param(17,$extra_data,SQL_LONGVARCHAR); + + + $sth->execute(); + $original->dbID($sth->{'mysql_insertid'}); + $original->adaptor($self); + } + + $sth->finish(); + +## js5 hack to update meta_coord table... + if( keys %analyses ) { + + my $sth = $self->prepare( 'select sr.coord_system_id, max(daf.seq_region_end-daf.seq_region_start) from seq_region as sr, dna_align_feature as daf where daf.seq_region_id=sr.seq_region_id and analysis_id in ('.join(',',keys %analyses).') group by coord_system_id' ); + $sth->execute; + + foreach( @{ $sth->fetchall_arrayref } ) { + my $sth2 = $self->prepare( qq(insert ignore into meta_coord values("dna_align_feature",$_->[0],$_->[1])) ); + $sth2->execute; + $sth2->finish; + + $sth2 = $self->prepare( qq(update meta_coord set max_length = $_->[1] where coord_system_id = $_->[0] and table_name="dna_align_feature" and max_length < $_->[1]) ); + $sth2->execute; + $sth2->finish; + } + + $sth->finish; + } + +} + + +=head2 _objs_from_sth + + Arg [1] : DBI statement handle $sth + an exectuted DBI statement handle generated by selecting + the columns specified by _columns() from the table specified + by _table() + Example : @dna_dna_align_feats = $self->_obj_from_hashref + Description: PROTECTED implementation of superclass abstract method. + Creates DnaDnaAlignFeature objects from a DBI hashref + Returntype : listref of Bio::EnsEMBL::DnaDnaAlignFeatures + Exceptions : none + Caller : Bio::EnsEMBL::BaseFeatureAdaptor::generic_fetch + Status : Stable + +=cut + +sub _objs_from_sth { + my ( $self, $sth, $mapper, $dest_slice ) = @_; + + # + # This code is ugly because an attempt has been made to remove as many + # function calls as possible for speed purposes. Thus many caches and + # a fair bit of gymnastics is used. + # + + # In case of userdata we need the features on the dest_slice. In case + # of get_all_supporting_features dest_slice is not provided. + my $sa = ( $dest_slice + ? $dest_slice->adaptor() + : $self->db()->get_SliceAdaptor() ); + my $aa = $self->db->get_AnalysisAdaptor(); + + my @features; + my %analysis_hash; + my %slice_hash; + my %sr_name_hash; + my %sr_cs_hash; + + my ( $dna_align_feature_id, $seq_region_id, + $analysis_id, $seq_region_start, + $seq_region_end, $seq_region_strand, + $hit_start, $hit_end, + $hit_name, $hit_strand, + $cigar_line, $evalue, + $perc_ident, $score, + $external_db_id, $hcoverage, + $extra_data, $pair_dna_align_feature_id, + $external_db_name, $external_display_db_name ); + + $sth->bind_columns( \( $dna_align_feature_id, $seq_region_id, + $analysis_id, $seq_region_start, + $seq_region_end, $seq_region_strand, + $hit_start, $hit_end, + $hit_name, $hit_strand, + $cigar_line, $evalue, + $perc_ident, $score, + $external_db_id, $hcoverage, + $extra_data, $pair_dna_align_feature_id, + $external_db_name, $external_display_db_name ) + ); + + my $asm_cs; + my $cmp_cs; + my $asm_cs_vers; + my $asm_cs_name; + my $cmp_cs_vers; + my $cmp_cs_name; + + if ( defined($mapper) ) { + $asm_cs = $mapper->assembled_CoordSystem(); + $cmp_cs = $mapper->component_CoordSystem(); + $asm_cs_name = $asm_cs->name(); + $asm_cs_vers = $asm_cs->version(); + $cmp_cs_name = $cmp_cs->name(); + $cmp_cs_vers = $cmp_cs->version(); + } + + my $dest_slice_start; + my $dest_slice_end; + my $dest_slice_strand; + my $dest_slice_length; + my $dest_slice_sr_name; + my $dest_slice_seq_region_id; + + if ( defined($dest_slice) ) { + $dest_slice_start = $dest_slice->start(); + $dest_slice_end = $dest_slice->end(); + $dest_slice_strand = $dest_slice->strand(); + $dest_slice_length = $dest_slice->length(); + $dest_slice_sr_name = $dest_slice->seq_region_name(); + $dest_slice_seq_region_id = $dest_slice->get_seq_region_id(); + } + +FEATURE: + while ( $sth->fetch() ) { + # Get the analysis object. + my $analysis = $analysis_hash{$analysis_id} ||= + $aa->fetch_by_dbID($analysis_id); + + # Get the slice object. + my $slice = $slice_hash{ "ID:" . $seq_region_id }; + + if ( !defined($slice) ) { + $slice = $sa->fetch_by_seq_region_id($seq_region_id); + if ( defined($slice) ) { + $slice_hash{ "ID:" . $seq_region_id } = $slice; + $sr_name_hash{$seq_region_id} = $slice->seq_region_name(); + $sr_cs_hash{$seq_region_id} = $slice->coord_system(); + } + } + + my $sr_name = $sr_name_hash{$seq_region_id}; + my $sr_cs = $sr_cs_hash{$seq_region_id}; + + # Remap the feature coordinates to another coord system + # if a mapper was provided. + if ( defined($mapper) ) { + + if (defined $dest_slice && $mapper->isa('Bio::EnsEMBL::ChainedAssemblyMapper') ) { + ( $seq_region_id, $seq_region_start, + $seq_region_end, $seq_region_strand ) + = + $mapper->map( $sr_name, $seq_region_start, $seq_region_end, + $seq_region_strand, $sr_cs, 1, $dest_slice); + + } else { + + ( $seq_region_id, $seq_region_start, + $seq_region_end, $seq_region_strand ) + = + $mapper->fastmap( $sr_name, $seq_region_start, $seq_region_end, + $seq_region_strand, $sr_cs ); + } + + # Skip features that map to gaps or coord system boundaries. + if ( !defined($seq_region_id) ) { next FEATURE } + + # Get a slice in the coord system we just mapped to. + if ( $asm_cs == $sr_cs + || ( $cmp_cs != $sr_cs && $asm_cs->equals($sr_cs) ) ) + { + $slice = $slice_hash{ "ID:" . $seq_region_id } ||= + $sa->fetch_by_seq_region_id($seq_region_id); + } else { + $slice = $slice_hash{ "ID:" . $seq_region_id } ||= + $sa->fetch_by_seq_region_id($seq_region_id); + } + } + + # If a destination slice was provided, convert the coords. If the + # dest_slice starts at 1 and is forward strand, nothing needs doing. + if ( defined($dest_slice) ) { + if ( $dest_slice_start != 1 || $dest_slice_strand != 1 ) { + if ( $dest_slice_strand == 1 ) { + $seq_region_start = $seq_region_start - $dest_slice_start + 1; + $seq_region_end = $seq_region_end - $dest_slice_start + 1; + } else { + my $tmp_seq_region_start = $seq_region_start; + $seq_region_start = $dest_slice_end - $seq_region_end + 1; + $seq_region_end = $dest_slice_end - $tmp_seq_region_start + 1; + $seq_region_strand = -$seq_region_strand; + } + + # Throw away features off the end of the requested slice. + if ( $seq_region_end < 1 + || $seq_region_start > $dest_slice_length + || ( $dest_slice_seq_region_id ne $seq_region_id ) ) + { + next FEATURE; + } + } + $slice = $dest_slice; + } + + # Finally, create the new DnaAlignFeature. + push( @features, + $self->_create_feature_fast( + 'Bio::EnsEMBL::DnaDnaAlignFeature', { + 'slice' => $slice, + 'start' => $seq_region_start, + 'end' => $seq_region_end, + 'strand' => $seq_region_strand, + 'hseqname' => $hit_name, + 'hstart' => $hit_start, + 'hend' => $hit_end, + 'hstrand' => $hit_strand, + 'score' => $score, + 'p_value' => $evalue, + 'percent_id' => $perc_ident, + 'cigar_string' => $cigar_line, + 'analysis' => $analysis, + 'adaptor' => $self, + 'dbID' => $dna_align_feature_id, + 'external_db_id' => $external_db_id, + 'hcoverage' => $hcoverage, + 'extra_data' => ( + $extra_data + ? $self->get_dumped_data($extra_data) + : '' ), + 'dbname' => $external_db_name, + 'db_display_name' => $external_display_db_name, + 'pair_dna_align_feature_id' => $pair_dna_align_feature_id + } ) ); + + } ## end while ( $sth->fetch() ) + + return \@features; +} ## end sub _objs_from_sth + +=head2 list_dbIDs + + Arg [1] : none + Example : @feature_ids = @{$dna_align_feature_adaptor->list_dbIDs()}; + Description: Gets an array of internal ids for all dna align features in + the current db + Arg[1] : <optional> int. not 0 for the ids to be sorted by the seq_region. + Returntype : list of ints + Exceptions : none + Caller : ? + Status : Stable + +=cut + +sub list_dbIDs { + my ($self, $ordered) = @_; + + return $self->_list_dbIDs("dna_align_feature",undef, $ordered); +} + +1; + +