Mercurial > repos > mahtabm > ensembl
diff variant_effect_predictor/Bio/EnsEMBL/DBSQL/SimpleFeatureAdaptor.pm @ 0:1f6dce3d34e0
Uploaded
| author | mahtabm |
|---|---|
| date | Thu, 11 Apr 2013 02:01:53 -0400 |
| parents | |
| children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/variant_effect_predictor/Bio/EnsEMBL/DBSQL/SimpleFeatureAdaptor.pm Thu Apr 11 02:01:53 2013 -0400 @@ -0,0 +1,366 @@ +=head1 LICENSE + + Copyright (c) 1999-2012 The European Bioinformatics Institute and + Genome Research Limited. All rights reserved. + + This software is distributed under a modified Apache license. + For license details, please see + + http://www.ensembl.org/info/about/code_licence.html + +=head1 CONTACT + + Please email comments or questions to the public Ensembl + developers list at <dev@ensembl.org>. + + Questions may also be sent to the Ensembl help desk at + <helpdesk@ensembl.org>. + +=cut + +=head1 NAME + +Bio::EnsEMBL::DBSQL::SimpleFeatureAdaptor + +=head1 SYNOPSIS + + my $reg = 'Bio::EnsEMBL::Registry'; + + $reg-> + load_registry_from_db( ... + + my $sfa = + $reg->get_adaptor('homo sapiens', 'core', 'SimpleFeature'); + + print ref($sfa), "\n"; + + my $sf_aref = + $sfa->fetch_all; + + print scalar @$sf_aref, "\n"; + +=head1 DESCRIPTION + +Simple Feature Adaptor - database access for simple features + +=head1 METHODS + +=cut + +package Bio::EnsEMBL::DBSQL::SimpleFeatureAdaptor; +use vars qw(@ISA); +use strict; + +use Bio::EnsEMBL::DBSQL::BaseFeatureAdaptor; +use Bio::EnsEMBL::SimpleFeature; +use Bio::EnsEMBL::Utils::Exception qw(throw warning); + +@ISA = qw(Bio::EnsEMBL::DBSQL::BaseFeatureAdaptor); + + +=head2 store + + Arg [1] : list of Bio::EnsEMBL::SimpleFeatures @sf + the simple features to store in the database + Example : $simple_feature_adaptor->store(@simple_feats); + Description: Stores a list of simple feature objects in the database + Returntype : none + Exceptions : thrown if @sf is not defined, if any of the features do not + have an attached slice. + or if any elements of @sf are not Bio::EnsEMBL::SimpleFeatures + Caller : general + Status : Stable + +=cut + +sub store{ + my ($self,@sf) = @_; + + if( scalar(@sf) == 0 ) { + throw("Must call store with list of SimpleFeatures"); + } + + my $sth = $self->prepare + ("INSERT INTO simple_feature (seq_region_id, seq_region_start, " . + "seq_region_end, seq_region_strand, " . + "display_label, analysis_id, score) " . + "VALUES (?,?,?,?,?,?,?)"); + + my $db = $self->db(); + my $analysis_adaptor = $db->get_AnalysisAdaptor(); + + FEATURE: foreach my $sf ( @sf ) { + + if( !ref $sf || !$sf->isa("Bio::EnsEMBL::SimpleFeature") ) { + throw("SimpleFeature must be an Ensembl SimpleFeature, " . + "not a [".ref($sf)."]"); + } + + if($sf->is_stored($db)) { + warning("SimpleFeature [".$sf->dbID."] is already stored" . + " in this database."); + next FEATURE; + } + + if(!defined($sf->analysis)) { + throw("An analysis must be attached to the features to be stored."); + } + + #store the analysis if it has not been stored yet + if(!$sf->analysis->is_stored($db)) { + $analysis_adaptor->store($sf->analysis()); + } + + my $original = $sf; + my $seq_region_id; + ($sf, $seq_region_id) = $self->_pre_store($sf); + + $sth->bind_param(1,$seq_region_id,SQL_INTEGER); + $sth->bind_param(2,$sf->start,SQL_INTEGER); + $sth->bind_param(3,$sf->end,SQL_INTEGER); + $sth->bind_param(4,$sf->strand,SQL_TINYINT); + $sth->bind_param(5,$sf->display_label,SQL_VARCHAR); + $sth->bind_param(6,$sf->analysis->dbID,SQL_INTEGER); + $sth->bind_param(7,$sf->score,SQL_DOUBLE); + + $sth->execute(); + + $original->dbID($sth->{'mysql_insertid'}); + $original->adaptor($self); + } +} + + +=head2 _tables + + Arg [1] : none + Example : none + Description: PROTECTED implementation of superclass abstract method + returns the names, aliases of the tables to use for queries + Returntype : list of listrefs of strings + Exceptions : none + Caller : internal + Status : Stable + +=cut + +sub _tables { + my $self = shift; + + return ['simple_feature', 'sf']; +} + + +=head2 _columns + + Arg [1] : none + Example : none + Description: PROTECTED implementation of superclass abstract method + returns a list of columns to use for queries + Returntype : list of strings + Exceptions : none + Caller : internal + Status : Stable + +=cut + +sub _columns { + my $self = shift; + + return qw( sf.simple_feature_id + sf.seq_region_id sf.seq_region_start sf.seq_region_end + sf.seq_region_strand sf.display_label sf.analysis_id sf.score ); +} + + +=head2 _objs_from_sth + + Arg [1] : hash reference $hashref + Example : none + Description: PROTECTED implementation of superclass abstract method. + creates SimpleFeatures from an executed DBI statement handle. + Returntype : list reference to Bio::EnsEMBL::SimpleFeature objects + Exceptions : none + Caller : internal + Status : Stable + +=cut + +sub _objs_from_sth { + my ($self, $sth, $mapper, $dest_slice) = @_; + + # + # This code is ugly because an attempt has been made to remove as many + # function calls as possible for speed purposes. Thus many caches and + # a fair bit of gymnastics is used. + # + + my $sa = $self->db()->get_SliceAdaptor(); + my $aa = $self->db->get_AnalysisAdaptor(); + + my @features; + my %analysis_hash; + my %slice_hash; + my %sr_name_hash; + my %sr_cs_hash; + + + my($simple_feature_id,$seq_region_id, $seq_region_start, $seq_region_end, + $seq_region_strand, $display_label, $analysis_id, $score); + + $sth->bind_columns(\$simple_feature_id,\$seq_region_id, \$seq_region_start, + \$seq_region_end, \$seq_region_strand, \$display_label, + \$analysis_id, \$score); + + my $asm_cs; + my $cmp_cs; + my $asm_cs_vers; + my $asm_cs_name; + my $cmp_cs_vers; + my $cmp_cs_name; + if($mapper) { + $asm_cs = $mapper->assembled_CoordSystem(); + $cmp_cs = $mapper->component_CoordSystem(); + $asm_cs_name = $asm_cs->name(); + $asm_cs_vers = $asm_cs->version(); + $cmp_cs_name = $cmp_cs->name(); + $cmp_cs_vers = $cmp_cs->version(); + } + + my $dest_slice_start; + my $dest_slice_end; + my $dest_slice_strand; + my $dest_slice_length; + my $dest_slice_sr_name; + my $dest_slice_seq_region_id; + if($dest_slice) { + $dest_slice_start = $dest_slice->start(); + $dest_slice_end = $dest_slice->end(); + $dest_slice_strand = $dest_slice->strand(); + $dest_slice_length = $dest_slice->length(); + $dest_slice_sr_name = $dest_slice->seq_region_name(); + $dest_slice_seq_region_id =$dest_slice->get_seq_region_id(); + } + + my $count =0; + FEATURE: while($sth->fetch()) { + $count++; + #get the analysis object + my $analysis = $analysis_hash{$analysis_id} ||= + $aa->fetch_by_dbID($analysis_id); + + #need to get the internal_seq_region, if present + $seq_region_id = $self->get_seq_region_id_internal($seq_region_id); + #get the slice object + my $slice = $slice_hash{"ID:".$seq_region_id}; + + if(!$slice) { + $slice = $sa->fetch_by_seq_region_id($seq_region_id); + $slice_hash{"ID:".$seq_region_id} = $slice; + $sr_name_hash{$seq_region_id} = $slice->seq_region_name(); + $sr_cs_hash{$seq_region_id} = $slice->coord_system(); + } + + my $sr_name = $sr_name_hash{$seq_region_id}; + my $sr_cs = $sr_cs_hash{$seq_region_id}; + # + # remap the feature coordinates to another coord system + # if a mapper was provided + # + if($mapper) { + + if (defined $dest_slice && $mapper->isa('Bio::EnsEMBL::ChainedAssemblyMapper') ) { + ( $seq_region_id, $seq_region_start, + $seq_region_end, $seq_region_strand ) + = + $mapper->map( $sr_name, $seq_region_start, $seq_region_end, + $seq_region_strand, $sr_cs, 1, $dest_slice); + + } else { + + ( $seq_region_id, $seq_region_start, + $seq_region_end, $seq_region_strand ) + = + $mapper->fastmap( $sr_name, $seq_region_start, $seq_region_end, + $seq_region_strand, $sr_cs ); + } + + #skip features that map to gaps or coord system boundaries + next FEATURE if(!defined($seq_region_id)); + + #get a slice in the coord system we just mapped to + if($asm_cs == $sr_cs || ($cmp_cs != $sr_cs && $asm_cs->equals($sr_cs))) { + $slice = $slice_hash{"ID:".$seq_region_id} ||= + $sa->fetch_by_seq_region_id($seq_region_id); + } else { + $slice = $slice_hash{"ID:".$seq_region_id} ||= + $sa->fetch_by_seq_region_id($seq_region_id); + } + } + + # + # If a destination slice was provided convert the coords + # If the dest_slice starts at 1 and is foward strand, nothing needs doing + # + if($dest_slice) { + if($dest_slice_start != 1 || $dest_slice_strand != 1) { + if($dest_slice_strand == 1) { + $seq_region_start = $seq_region_start - $dest_slice_start + 1; + $seq_region_end = $seq_region_end - $dest_slice_start + 1; + } else { + my $tmp_seq_region_start = $seq_region_start; + $seq_region_start = $dest_slice_end - $seq_region_end + 1; + $seq_region_end = $dest_slice_end - $tmp_seq_region_start + 1; + $seq_region_strand *= -1; + } + } + + #throw away features off the end of the requested slice + if($seq_region_end < 1 || $seq_region_start > $dest_slice_length || + ( $dest_slice_seq_region_id != $seq_region_id )) { +# print STDERR "IGNORED DUE TO CUTOFF $dest_slice_seq_region_id ne $seq_region_id . $sr_name\n"; + next FEATURE; + } + $slice = $dest_slice; + } + + push( @features, + $self->_create_feature_fast( + 'Bio::EnsEMBL::SimpleFeature', { + 'start' => $seq_region_start, + 'end' => $seq_region_end, + 'strand' => $seq_region_strand, + 'slice' => $slice, + 'analysis' => $analysis, + 'adaptor' => $self, + 'dbID' => $simple_feature_id, + 'display_label' => $display_label, + 'score' => $score + } ) ); + + } + + return \@features; +} + + +=head2 list_dbIDs + + Arg [1] : none + Example : @feature_ids = @{$simple_feature_adaptor->list_dbIDs()}; + Description: Gets an array of internal ids for all simple features in the current db + Arg[1] : <optional> int. not 0 for the ids to be sorted by the seq_region. + Returntype : list of ints + Exceptions : none + Caller : ? + Status : Stable + +=cut + +sub list_dbIDs { + my ($self, $ordered) = @_; + + return $self->_list_dbIDs("simple_feature", undef, $ordered); +} + +1;
