Mercurial > repos > mahtabm > ensembl
comparison variant_effect_predictor/Bio/EnsEMBL/DBSQL/BaseFeatureAdaptor.pm @ 0:1f6dce3d34e0
Uploaded
| author | mahtabm |
|---|---|
| date | Thu, 11 Apr 2013 02:01:53 -0400 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:1f6dce3d34e0 |
|---|---|
| 1 =head1 LICENSE | |
| 2 | |
| 3 Copyright (c) 1999-2012 The European Bioinformatics Institute and | |
| 4 Genome Research Limited. All rights reserved. | |
| 5 | |
| 6 This software is distributed under a modified Apache license. | |
| 7 For license details, please see | |
| 8 | |
| 9 http://www.ensembl.org/info/about/code_licence.html | |
| 10 | |
| 11 =head1 CONTACT | |
| 12 | |
| 13 Please email comments or questions to the public Ensembl | |
| 14 developers list at <dev@ensembl.org>. | |
| 15 | |
| 16 Questions may also be sent to the Ensembl help desk at | |
| 17 <helpdesk@ensembl.org>. | |
| 18 | |
| 19 =cut | |
| 20 | |
| 21 =head1 NAME | |
| 22 | |
| 23 Bio::EnsEMBL::DBSQL::BaseFeatureAdaptor - An Abstract Base class for all | |
| 24 FeatureAdaptors | |
| 25 | |
| 26 =head1 SYNOPSIS | |
| 27 | |
| 28 Abstract class - should not be instantiated. Implementation of | |
| 29 abstract methods must be performed by subclasses. | |
| 30 | |
| 31 =head1 DESCRIPTION | |
| 32 | |
| 33 This is a base adaptor for feature adaptors. This base class is simply a way | |
| 34 of eliminating code duplication through the implementation of methods | |
| 35 common to all feature adaptors. | |
| 36 | |
| 37 =head1 METHODS | |
| 38 | |
| 39 =cut | |
| 40 | |
| 41 package Bio::EnsEMBL::DBSQL::BaseFeatureAdaptor; | |
| 42 use vars qw(@ISA @EXPORT); | |
| 43 use strict; | |
| 44 | |
| 45 use Bio::EnsEMBL::DBSQL::BaseAdaptor; | |
| 46 use Bio::EnsEMBL::Utils::Cache; | |
| 47 use Bio::EnsEMBL::Utils::Exception qw(warning throw deprecate stack_trace_dump); | |
| 48 use Bio::EnsEMBL::Utils::Argument qw(rearrange); | |
| 49 use Bio::EnsEMBL::Utils::Iterator; | |
| 50 | |
| 51 @ISA = qw(Bio::EnsEMBL::DBSQL::BaseAdaptor); | |
| 52 | |
| 53 @EXPORT = (@{$DBI::EXPORT_TAGS{'sql_types'}}); | |
| 54 | |
| 55 our $SLICE_FEATURE_CACHE_SIZE = 4; | |
| 56 our $MAX_SPLIT_QUERY_SEQ_REGIONS = 3; | |
| 57 our $SILENCE_CACHE_WARNINGS = 0; | |
| 58 | |
| 59 =head2 new | |
| 60 | |
| 61 Arg [1] : list of args @args | |
| 62 Superclass constructor arguments | |
| 63 Example : none | |
| 64 Description: Constructor which warns if caching has been switched off | |
| 65 Returntype : Bio::EnsEMBL::BaseFeatureAdaptor | |
| 66 Exceptions : none | |
| 67 Caller : implementing subclass constructors | |
| 68 Status : Stable | |
| 69 | |
| 70 =cut | |
| 71 | |
| 72 sub new { | |
| 73 my ($class, @args) = @_; | |
| 74 my $self = $class->SUPER::new(@args); | |
| 75 if ( defined $self->db->no_cache() && $self->db->no_cache() && ! $SILENCE_CACHE_WARNINGS) { | |
| 76 warning( "You are using the API without caching most recent features. " | |
| 77 . "Performance might be affected." ); | |
| 78 } | |
| 79 return $self; | |
| 80 } | |
| 81 | |
| 82 =head2 start_equals_end | |
| 83 | |
| 84 Arg [1] : (optional) boolean $newval | |
| 85 Example : $bfa->start_equals_end(1); | |
| 86 Description: Getter/Setter for the start_equals_end flag. If set | |
| 87 to true sub _slice_fetch will use a simplified sql to retrieve 1bp slices. | |
| 88 Returntype : boolean | |
| 89 Exceptions : none | |
| 90 Caller : EnsemblGenomes variation DB build | |
| 91 Status : Stable | |
| 92 | |
| 93 =cut | |
| 94 | |
| 95 sub start_equals_end { | |
| 96 my ( $self, $value ) = @_; | |
| 97 | |
| 98 if ( defined($value) ) { | |
| 99 $self->{'start_equals_end'} = $value; | |
| 100 } | |
| 101 return $self->{'start_equals_end'}; | |
| 102 } | |
| 103 | |
| 104 | |
| 105 =head2 clear_cache | |
| 106 | |
| 107 Args : None | |
| 108 Example : my $sa = | |
| 109 $registry->get_adaptor( 'Mus musculus', 'Core', | |
| 110 'Slice' ); | |
| 111 my $ga = | |
| 112 $registry->get_adaptor( 'Mus musculus', 'Core', | |
| 113 'Gene' ); | |
| 114 | |
| 115 my $slice = | |
| 116 $sa->fetch_by_region( 'Chromosome', '1', 1e8, | |
| 117 1.05e8 ); | |
| 118 | |
| 119 my $genes = $ga->fetch_all_by_Slice($slice); | |
| 120 | |
| 121 $ga->clear_cache(); | |
| 122 | |
| 123 Description : Empties the feature cache associated with this | |
| 124 feature adaptor. | |
| 125 Return type : None | |
| 126 Exceptions : None | |
| 127 Caller : General | |
| 128 Status : At risk (under development) | |
| 129 | |
| 130 =cut | |
| 131 | |
| 132 sub clear_cache { | |
| 133 my ($self) = @_; | |
| 134 %{$self->{_slice_feature_cache}} = (); | |
| 135 return; | |
| 136 } | |
| 137 | |
| 138 =head2 _slice_feature_cache | |
| 139 | |
| 140 Description : Returns the feature cache if we are allowed to cache and | |
| 141 will build it if we need to. We will never return a reference | |
| 142 to the hash to avoid unintentional auto-vivfying caching | |
| 143 Returntype : Bio::EnsEMBL::Utils::Cache | |
| 144 Exceptions : None | |
| 145 Caller : Internal | |
| 146 | |
| 147 =cut | |
| 148 | |
| 149 sub _slice_feature_cache { | |
| 150 my ($self) = @_; | |
| 151 return if $self->db()->no_cache(); | |
| 152 if(! exists $self->{_slice_feature_cache}) { | |
| 153 tie my %cache, 'Bio::EnsEMBL::Utils::Cache', $SLICE_FEATURE_CACHE_SIZE; | |
| 154 $self->{_slice_feature_cache} = \%cache; | |
| 155 } | |
| 156 return $self->{_slice_feature_cache}; | |
| 157 } | |
| 158 | |
| 159 =head2 fetch_all_by_Slice | |
| 160 | |
| 161 Arg [1] : Bio::EnsEMBL::Slice $slice | |
| 162 the slice from which to obtain features | |
| 163 Arg [2] : (optional) string $logic_name | |
| 164 the logic name of the type of features to obtain | |
| 165 Example : $fts = $a->fetch_all_by_Slice($slice, 'Swall'); | |
| 166 Description: Returns a listref of features created from the database | |
| 167 which are on the Slice defined by $slice. If $logic_name is | |
| 168 defined only features with an analysis of type $logic_name | |
| 169 will be returned. | |
| 170 NOTE: only features that are entirely on the slice's seq_region | |
| 171 will be returned (i.e. if they hang off the start/end of a | |
| 172 seq_region they will be discarded). Features can extend over the | |
| 173 slice boundaries though (in cases where you have a slice that | |
| 174 doesn't span the whole seq_region). | |
| 175 Returntype : listref of Bio::EnsEMBL::SeqFeatures in Slice coordinates | |
| 176 Exceptions : none | |
| 177 Caller : Bio::EnsEMBL::Slice | |
| 178 Status : Stable | |
| 179 | |
| 180 =cut | |
| 181 | |
| 182 sub fetch_all_by_Slice { | |
| 183 my ($self, $slice, $logic_name) = @_; | |
| 184 #fetch by constraint with empty constraint | |
| 185 return $self->fetch_all_by_Slice_constraint($slice, '', $logic_name); | |
| 186 } | |
| 187 | |
| 188 | |
| 189 | |
| 190 =head2 fetch_Iterator_by_Slice_method | |
| 191 | |
| 192 Arg [1] : CODE ref of Slice fetch method | |
| 193 Arg [2] : ARRAY ref of parameters for Slice fetch method | |
| 194 Arg [3] : Optional int: Slice index in parameters array | |
| 195 Arg [4] : Optional int: Slice chunk size. Default=500000 | |
| 196 Example : my $slice_iter = $feature_adaptor->fetch_Iterator_by_Slice_method | |
| 197 ($feature_adaptor->can('fetch_all_by_Slice_Arrays'), | |
| 198 \@fetch_method_params, | |
| 199 0,#Slice idx | |
| 200 ); | |
| 201 | |
| 202 while(my $feature = $slice_iter->next && defined $feature){ | |
| 203 #Do something here | |
| 204 } | |
| 205 | |
| 206 Description: Creates an Iterator which chunks the query Slice to facilitate | |
| 207 large Slice queries which would have previously run out of memory | |
| 208 Returntype : Bio::EnsEMBL::Utils::Iterator | |
| 209 Exceptions : Throws if mandatory params not valid | |
| 210 Caller : general | |
| 211 Status : at risk | |
| 212 | |
| 213 =cut | |
| 214 | |
| 215 #Does not support Collections. See Funcgen ResultFeatureAdaptor::fetch_collection_Iterator_by_Slice_method | |
| 216 | |
| 217 sub fetch_Iterator_by_Slice_method{ | |
| 218 my ($self, $slice_method_ref, $params_ref, $slice_idx, $chunk_size) = @_; | |
| 219 | |
| 220 if(! ( defined $slice_method_ref && | |
| 221 ref($slice_method_ref) eq 'CODE') | |
| 222 ){ | |
| 223 throw('Must pass a valid Slice fetch method CODE ref'); | |
| 224 } | |
| 225 | |
| 226 if (! ($params_ref && | |
| 227 ref($params_ref) eq 'ARRAY')) { | |
| 228 #Don't need to check size here so long as we have valid Slice | |
| 229 throw('You must pass a method params ARRAYREF'); | |
| 230 } | |
| 231 | |
| 232 $slice_idx = 0 if(! defined $slice_idx); | |
| 233 my $slice = $params_ref->[$slice_idx]; | |
| 234 $chunk_size ||= 1000000; | |
| 235 | |
| 236 my @feat_cache; | |
| 237 my $finished = 0; | |
| 238 my $start = 1; #local coord for sub slice | |
| 239 my $end = $slice->length; | |
| 240 my $num_overlaps = 0; | |
| 241 | |
| 242 my $coderef = | |
| 243 sub { | |
| 244 | |
| 245 while (scalar(@feat_cache) == 0 && | |
| 246 ! $finished) { | |
| 247 | |
| 248 my $new_end = ($start + $chunk_size - 1); | |
| 249 | |
| 250 if ($new_end >= $end) { | |
| 251 # this is our last chunk | |
| 252 $new_end = $end; | |
| 253 $finished = 1; | |
| 254 } | |
| 255 | |
| 256 #Chunk by sub slicing | |
| 257 my $sub_slice = $slice->sub_Slice($start, $new_end); | |
| 258 $params_ref->[$slice_idx] = $sub_slice; | |
| 259 @feat_cache = @{ $slice_method_ref->($self, @$params_ref)}; | |
| 260 | |
| 261 #Remove & count overlapping features | |
| 262 splice(@feat_cache, 0, $num_overlaps) if($num_overlaps); | |
| 263 my $i; | |
| 264 | |
| 265 if (scalar(@feat_cache) > 0) { | |
| 266 | |
| 267 my $feat_end = $feat_cache[$#feat_cache]->seq_region_end; | |
| 268 my $slice_end = $sub_slice->end; | |
| 269 $num_overlaps = 0; | |
| 270 | |
| 271 for ($i = $#feat_cache; $i >=0; $i--) { | |
| 272 | |
| 273 if ($feat_end > $slice_end) { | |
| 274 $feat_end = $feat_cache[$i]->end; | |
| 275 $num_overlaps ++; | |
| 276 } else { | |
| 277 last; | |
| 278 } | |
| 279 | |
| 280 } | |
| 281 } | |
| 282 | |
| 283 # update the start coordinate | |
| 284 $start = $new_end + 1; | |
| 285 } | |
| 286 | |
| 287 #this maybe returning from an undef cache | |
| 288 #Need to sub this out even more? | |
| 289 return shift @feat_cache; | |
| 290 }; | |
| 291 | |
| 292 return Bio::EnsEMBL::Utils::Iterator->new($coderef); | |
| 293 } | |
| 294 | |
| 295 | |
| 296 =head2 fetch_Iterator_by_Slice | |
| 297 | |
| 298 Arg [1] : Bio::EnsEMBL::Slice | |
| 299 Arg [2] : Optional string: logic name of analysis | |
| 300 Arg [3] : Optional int: Chunk size to iterate over. Default is 500000 | |
| 301 Example : my $slice_iter = $feature_adaptor->fetch_Iterator_by_Slice($slice); | |
| 302 | |
| 303 while(my $feature = $slice_iter->next && defined $feature){ | |
| 304 #Do something here | |
| 305 } | |
| 306 | |
| 307 Description: Creates an Iterator which chunks the query Slice to facilitate | |
| 308 large Slice queries which would have previously run out of memory | |
| 309 Returntype : Bio::EnsEMBL::Utils::Iterator | |
| 310 Exceptions : None | |
| 311 Caller : general | |
| 312 Status : at risk | |
| 313 | |
| 314 =cut | |
| 315 | |
| 316 sub fetch_Iterator_by_Slice{ | |
| 317 my ($self, $slice, $logic_name, $chunk_size) = @_; | |
| 318 | |
| 319 my $method_ref = $self->can('fetch_all_by_Slice'); | |
| 320 | |
| 321 return $self->fetch_Iterator_by_Slice_method($method_ref, [$slice, $logic_name], 0, $chunk_size); | |
| 322 } | |
| 323 | |
| 324 | |
| 325 =head2 fetch_all_by_Slice_and_score | |
| 326 | |
| 327 Arg [1] : Bio::EnsEMBL::Slice $slice | |
| 328 the slice from which to obtain features | |
| 329 Arg [2] : (optional) float $score | |
| 330 lower bound of the the score of the features retrieved | |
| 331 Arg [3] : (optional) string $logic_name | |
| 332 the logic name of the type of features to obtain | |
| 333 Example : $fts = $a->fetch_all_by_Slice_and_score($slice,90,'Swall'); | |
| 334 Description: Returns a list of features created from the database which are | |
| 335 are on the Slice defined by $slice and which have a score | |
| 336 greater than $score. If $logic_name is defined, | |
| 337 only features with an analysis of type $logic_name will be | |
| 338 returned. | |
| 339 Returntype : listref of Bio::EnsEMBL::SeqFeatures in Slice coordinates | |
| 340 Exceptions : none | |
| 341 Caller : Bio::EnsEMBL::Slice | |
| 342 Status : Stable | |
| 343 | |
| 344 =cut | |
| 345 | |
| 346 sub fetch_all_by_Slice_and_score { | |
| 347 my ( $self, $slice, $score, $logic_name ) = @_; | |
| 348 | |
| 349 my $constraint; | |
| 350 if ( defined($score) ) { | |
| 351 # Get the synonym of the primary_table | |
| 352 my @tabs = $self->_tables(); | |
| 353 my $syn = $tabs[0]->[1]; | |
| 354 | |
| 355 $constraint = sprintf( "%s.score > %s", | |
| 356 $syn, | |
| 357 $self->dbc()->db_handle()->quote( $score, SQL_FLOAT ) ); | |
| 358 } | |
| 359 | |
| 360 return | |
| 361 $self->fetch_all_by_Slice_constraint( $slice, $constraint, | |
| 362 $logic_name ); | |
| 363 } | |
| 364 | |
| 365 | |
| 366 =head2 fetch_all_by_Slice_constraint | |
| 367 | |
| 368 Arg [1] : Bio::EnsEMBL::Slice $slice | |
| 369 the slice from which to obtain features | |
| 370 Arg [2] : (optional) string $constraint | |
| 371 An SQL query constraint (i.e. part of the WHERE clause) | |
| 372 Arg [3] : (optional) string $logic_name | |
| 373 the logic name of the type of features to obtain | |
| 374 Example : $fs = $a->fetch_all_by_Slice_constraint($slc, 'perc_ident > 5'); | |
| 375 Description: Returns a listref of features created from the database which | |
| 376 are on the Slice defined by $slice and fulfill the SQL | |
| 377 constraint defined by $constraint. If logic name is defined, | |
| 378 only features with an analysis of type $logic_name will be | |
| 379 returned. | |
| 380 Returntype : listref of Bio::EnsEMBL::SeqFeatures in Slice coordinates | |
| 381 Exceptions : thrown if $slice is not defined | |
| 382 Caller : Bio::EnsEMBL::Slice | |
| 383 Status : Stable | |
| 384 | |
| 385 =cut | |
| 386 | |
| 387 sub fetch_all_by_Slice_constraint { | |
| 388 my ( $self, $slice, $constraint, $logic_name ) = @_; | |
| 389 | |
| 390 | |
| 391 my @result = (); | |
| 392 | |
| 393 if ( !ref($slice) | |
| 394 || !( $slice->isa('Bio::EnsEMBL::Slice') | |
| 395 or $slice->isa('Bio::EnsEMBL::LRGSlice') ) ) | |
| 396 { | |
| 397 throw("Bio::EnsEMBL::Slice argument expected."); | |
| 398 } | |
| 399 | |
| 400 $constraint ||= ''; | |
| 401 $constraint = | |
| 402 $self->_logic_name_to_constraint( $constraint, $logic_name ); | |
| 403 | |
| 404 # If the logic name was invalid, undef was returned | |
| 405 if ( !defined($constraint) ) { return [] } | |
| 406 | |
| 407 my $key; | |
| 408 my $cache; | |
| 409 | |
| 410 # Will only use feature_cache if hasn't been no_cache attribute set | |
| 411 if ( | |
| 412 !( defined( $self->db()->no_cache() ) && $self->db()->no_cache() ) ) | |
| 413 { | |
| 414 | |
| 415 #strain test and add to constraint if so to stop caching. | |
| 416 if ( $slice->isa('Bio::EnsEMBL::StrainSlice') ) { | |
| 417 my $string = | |
| 418 $self->dbc()->db_handle()->quote( $slice->strain_name() ); | |
| 419 | |
| 420 if ( $constraint ne "" ) { | |
| 421 $constraint .= " AND $string = $string "; | |
| 422 } else { | |
| 423 $constraint .= " $string = $string "; | |
| 424 } | |
| 425 } | |
| 426 | |
| 427 # Check the cache and return the cached results if we have already | |
| 428 # done this query. The cache key is the made up from the slice | |
| 429 # name, the constraint, and the bound parameters (if there are any). | |
| 430 $key = uc( join( ':', $slice->name(), $constraint ) ); | |
| 431 | |
| 432 my $bind_params = $self->bind_param_generic_fetch(); | |
| 433 | |
| 434 if ( defined($bind_params) ) { | |
| 435 $key .= ':' | |
| 436 . join( ':', map { $_->[0] . '/' . $_->[1] } @{$bind_params} ); | |
| 437 } | |
| 438 | |
| 439 $cache = $self->_slice_feature_cache(); | |
| 440 if ( exists( $cache->{$key} ) ) { | |
| 441 # Clear the bound parameters and return the cached data. | |
| 442 $self->{'_bind_param_generic_fetch'} = (); | |
| 443 return $cache->{$key}; | |
| 444 } | |
| 445 } ## end if ( !( defined( $self...))) | |
| 446 | |
| 447 my $sa = $slice->adaptor(); | |
| 448 | |
| 449 # Hap/PAR support: retrieve normalized 'non-symlinked' slices. | |
| 450 my @proj = @{ $sa->fetch_normalized_slice_projection($slice) }; | |
| 451 | |
| 452 | |
| 453 | |
| 454 if ( !@proj ) { | |
| 455 throw( 'Could not retrieve normalized Slices. ' | |
| 456 . 'Database contains incorrect assembly_exception information.' | |
| 457 ); | |
| 458 } | |
| 459 | |
| 460 # Want to get features on the FULL original slice as well as any | |
| 461 # symlinked slices. | |
| 462 | |
| 463 # Filter out partial slices from projection that are on same | |
| 464 # seq_region as original slice. | |
| 465 | |
| 466 my $sr_id = $slice->get_seq_region_id(); | |
| 467 | |
| 468 @proj = grep { $_->to_Slice->get_seq_region_id() != $sr_id } @proj; | |
| 469 | |
| 470 my $segment = bless( [ 1, $slice->length(), $slice ], | |
| 471 'Bio::EnsEMBL::ProjectionSegment' ); | |
| 472 push( @proj, $segment ); | |
| 473 | |
| 474 # construct list of Hap/PAR boundaries for entire seq region | |
| 475 my @bounds; | |
| 476 | |
| 477 my $ent_slice = $sa->fetch_by_seq_region_id($sr_id); | |
| 478 if ( $slice->strand() == -1 ) { | |
| 479 $ent_slice = $ent_slice->invert(); | |
| 480 } | |
| 481 | |
| 482 my @ent_proj = | |
| 483 @{ $sa->fetch_normalized_slice_projection($ent_slice) }; | |
| 484 shift(@ent_proj); # skip first | |
| 485 | |
| 486 @bounds = map { $_->from_start() - $slice->start() + 1 } @ent_proj; | |
| 487 | |
| 488 | |
| 489 # fetch features for the primary slice AND all symlinked slices | |
| 490 foreach my $seg (@proj) { | |
| 491 | |
| 492 | |
| 493 my $offset = $seg->from_start(); | |
| 494 my $seg_slice = $seg->to_Slice(); | |
| 495 my $features = | |
| 496 $self->_slice_fetch( $seg_slice, $constraint ); | |
| 497 | |
| 498 # If this was a symlinked slice offset the feature coordinates as | |
| 499 # needed. | |
| 500 if ( $seg_slice->name() ne $slice->name() ) { | |
| 501 | |
| 502 FEATURE: | |
| 503 foreach my $f ( @{$features} ) { | |
| 504 if ( $offset != 1 ) { | |
| 505 $f->{'start'} += $offset - 1; | |
| 506 $f->{'end'} += $offset - 1; | |
| 507 } | |
| 508 | |
| 509 # discard boundary crossing features from symlinked regions | |
| 510 foreach my $bound (@bounds) { | |
| 511 if ( $f->{'start'} < $bound && $f->{'end'} >= $bound ) { | |
| 512 next FEATURE; | |
| 513 } | |
| 514 } | |
| 515 | |
| 516 $f->{'slice'} = $slice; | |
| 517 push( @result, $f ); | |
| 518 } | |
| 519 } else { | |
| 520 push( @result, @{$features} ); | |
| 521 } | |
| 522 } ## end foreach my $seg (@proj) | |
| 523 | |
| 524 # Will only use feature_cache when set attribute no_cache in DBAdaptor | |
| 525 if ( defined($key) ) { | |
| 526 $cache->{$key} = \@result; | |
| 527 } | |
| 528 | |
| 529 return \@result; | |
| 530 } ## end sub fetch_all_by_Slice_constraint | |
| 531 | |
| 532 | |
| 533 =head2 fetch_all_by_logic_name | |
| 534 | |
| 535 Arg [3] : string $logic_name | |
| 536 the logic name of the type of features to obtain | |
| 537 Example : $fs = $a->fetch_all_by_logic_name('foobar'); | |
| 538 Description: Returns a listref of features created from the database. | |
| 539 only features with an analysis of type $logic_name will | |
| 540 be returned. If the logic name is invalid (not in the | |
| 541 analysis table), a reference to an empty list will be | |
| 542 returned. | |
| 543 Returntype : listref of Bio::EnsEMBL::SeqFeatures | |
| 544 Exceptions : thrown if no $logic_name | |
| 545 Caller : General | |
| 546 Status : Stable | |
| 547 | |
| 548 =cut | |
| 549 | |
| 550 sub fetch_all_by_logic_name { | |
| 551 my ( $self, $logic_name ) = @_; | |
| 552 | |
| 553 if ( !defined($logic_name) ) { | |
| 554 throw("Need a logic_name"); | |
| 555 } | |
| 556 | |
| 557 my $constraint = $self->_logic_name_to_constraint( '', $logic_name ); | |
| 558 | |
| 559 if ( !defined($constraint) ) { | |
| 560 warning("Invalid logic name: $logic_name"); | |
| 561 return []; | |
| 562 } | |
| 563 | |
| 564 return $self->generic_fetch($constraint); | |
| 565 } | |
| 566 | |
| 567 # Method that creates an object. Called by the _objs_from_sth() method | |
| 568 # in the sub-classes (the various feature adaptors). Overridden by the | |
| 569 # feature collection classes. | |
| 570 | |
| 571 sub _create_feature { | |
| 572 my ( $self, $feature_type, $args ) = @_; | |
| 573 return $feature_type->new( %{$args} ); | |
| 574 } | |
| 575 | |
| 576 # This is the same as the above, but calls the new_fast() constructor of | |
| 577 # the feature type. | |
| 578 | |
| 579 sub _create_feature_fast { | |
| 580 my ( $self, $feature_type, $args ) = @_; | |
| 581 return $feature_type->new_fast($args); | |
| 582 } | |
| 583 | |
| 584 # | |
| 585 # helper function used by fetch_all_by_Slice_constraint method | |
| 586 # | |
| 587 sub _slice_fetch { | |
| 588 my ( $self, $slice, $orig_constraint ) = @_; | |
| 589 | |
| 590 my $slice_start = $slice->start(); | |
| 591 my $slice_end = $slice->end(); | |
| 592 my $slice_strand = $slice->strand(); | |
| 593 my $slice_cs = $slice->coord_system(); | |
| 594 my $slice_seq_region = $slice->seq_region_name(); | |
| 595 my $slice_seq_region_id = $slice->get_seq_region_id(); | |
| 596 | |
| 597 #get the synonym and name of the primary_table | |
| 598 my @tabs = $self->_tables; | |
| 599 my ( $tab_name, $tab_syn ) = @{ $tabs[0] }; | |
| 600 | |
| 601 #find out what coordinate systems the features are in | |
| 602 my $mcc = $self->db->get_MetaCoordContainer(); | |
| 603 my @feat_css = (); | |
| 604 | |
| 605 my $mca = $self->db->get_MetaContainer(); | |
| 606 my $value_list = $mca->list_value_by_key( $tab_name . "build.level" ); | |
| 607 if ( @$value_list and $slice->is_toplevel() ) { | |
| 608 push @feat_css, $slice_cs; | |
| 609 } else { | |
| 610 @feat_css = | |
| 611 @{ $mcc->fetch_all_CoordSystems_by_feature_type($tab_name) }; | |
| 612 } | |
| 613 | |
| 614 my $asma = $self->db->get_AssemblyMapperAdaptor(); | |
| 615 my @features; | |
| 616 | |
| 617 # fetch the features from each coordinate system they are stored in | |
| 618 COORD_SYSTEM: foreach my $feat_cs (@feat_css) { | |
| 619 my $mapper; | |
| 620 my @coords; | |
| 621 my @ids; | |
| 622 | |
| 623 if ( $feat_cs->equals($slice_cs) ) { | |
| 624 # no mapping is required if this is the same coord system | |
| 625 | |
| 626 my $max_len = $self->_max_feature_length() | |
| 627 || $mcc->fetch_max_length_by_CoordSystem_feature_type( $feat_cs, | |
| 628 $tab_name ); | |
| 629 | |
| 630 my $constraint = $orig_constraint; | |
| 631 | |
| 632 my $sr_id; | |
| 633 if ( $slice->adaptor() ) { | |
| 634 $sr_id = $slice->adaptor()->get_seq_region_id($slice); | |
| 635 } else { | |
| 636 $sr_id = | |
| 637 $self->db()->get_SliceAdaptor()->get_seq_region_id($slice); | |
| 638 } | |
| 639 | |
| 640 # If there is mapping information, use the external_seq_region_id | |
| 641 # to get features. | |
| 642 | |
| 643 my @sr_ids = ($sr_id); | |
| 644 | |
| 645 while (1) { | |
| 646 my $ext_sr_id = $self->get_seq_region_id_external($sr_id); | |
| 647 | |
| 648 if ( $ext_sr_id == $sr_id ) { last } | |
| 649 | |
| 650 push( @sr_ids, $ext_sr_id ); | |
| 651 $sr_id = $ext_sr_id; | |
| 652 } | |
| 653 | |
| 654 $constraint .= " AND " if ($constraint); | |
| 655 | |
| 656 | |
| 657 $constraint .= "${tab_syn}.seq_region_id IN (" | |
| 658 . join( ',', @sr_ids ) . ") AND"; | |
| 659 | |
| 660 #faster query for 1bp slices where SNP data is not compressed | |
| 661 if ( $self->start_equals_end && $slice_start == $slice_end ) { | |
| 662 $constraint .= | |
| 663 " AND ${tab_syn}.seq_region_start = $slice_end" . | |
| 664 " AND ${tab_syn}.seq_region_end = $slice_start"; | |
| 665 | |
| 666 } else { | |
| 667 | |
| 668 if ( !$slice->is_circular() ) { | |
| 669 # Deal with the default case of a non-circular chromosome. | |
| 670 $constraint .= " ${tab_syn}.seq_region_start <= $slice_end AND " | |
| 671 . "${tab_syn}.seq_region_end >= $slice_start"; | |
| 672 | |
| 673 if ( $max_len ) { | |
| 674 my $min_start = $slice_start - $max_len; | |
| 675 $constraint .= " AND ${tab_syn}.seq_region_start >= $min_start"; | |
| 676 } | |
| 677 | |
| 678 } else { | |
| 679 # Deal with the case of a circular chromosome. | |
| 680 if ( $slice_start > $slice_end ) { | |
| 681 $constraint .= " ( ${tab_syn}.seq_region_start >= $slice_start " | |
| 682 . "OR ${tab_syn}.seq_region_start <= $slice_end " | |
| 683 . "OR ${tab_syn}.seq_region_end >= $slice_start " | |
| 684 . "OR ${tab_syn}.seq_region_end <= $slice_end " | |
| 685 . "OR ${tab_syn}.seq_region_start > ${tab_syn}.seq_region_end)"; | |
| 686 | |
| 687 } else { | |
| 688 $constraint .= " ((${tab_syn}.seq_region_start <= $slice_end " | |
| 689 . "AND ${tab_syn}.seq_region_end >= $slice_start) " | |
| 690 . "OR (${tab_syn}.seq_region_start > ${tab_syn}.seq_region_end " | |
| 691 . "AND (${tab_syn}.seq_region_start <= $slice_end " | |
| 692 . "OR ${tab_syn}.seq_region_end >= $slice_start)))"; | |
| 693 } | |
| 694 } | |
| 695 | |
| 696 } | |
| 697 | |
| 698 my $fs = $self->generic_fetch( $constraint, undef, $slice ); | |
| 699 | |
| 700 # features may still have to have coordinates made relative to slice | |
| 701 # start | |
| 702 $fs = $self->_remap( $fs, $mapper, $slice ); | |
| 703 | |
| 704 push @features, @$fs; | |
| 705 } else { | |
| 706 $mapper = $asma->fetch_by_CoordSystems( $slice_cs, $feat_cs ); | |
| 707 | |
| 708 next unless defined $mapper; | |
| 709 | |
| 710 # Get list of coordinates and corresponding internal ids for | |
| 711 # regions the slice spans | |
| 712 @coords = | |
| 713 $mapper->map( $slice_seq_region, $slice_start, $slice_end, | |
| 714 $slice_strand, $slice_cs ); | |
| 715 | |
| 716 @coords = grep { !$_->isa('Bio::EnsEMBL::Mapper::Gap') } @coords; | |
| 717 | |
| 718 next COORD_SYSTEM if ( !@coords ); | |
| 719 | |
| 720 @ids = map { $_->id() } @coords; | |
| 721 #coords are now id rather than name | |
| 722 # @ids = @{$asma->seq_regions_to_ids($feat_cs, \@ids)}; | |
| 723 | |
| 724 # When regions are large and only partially spanned | |
| 725 # by slice it is faster to to limit the query with | |
| 726 # start and end constraints. Take simple approach: | |
| 727 # use regional constraints if there are less than a | |
| 728 # specific number of regions covered. | |
| 729 | |
| 730 if ( @coords > $MAX_SPLIT_QUERY_SEQ_REGIONS ) { | |
| 731 my $constraint = $orig_constraint; | |
| 732 my $id_str = join( ',', @ids ); | |
| 733 $constraint .= " AND " if ($constraint); | |
| 734 $constraint .= "${tab_syn}.seq_region_id IN ($id_str)"; | |
| 735 my $fs = $self->generic_fetch( $constraint, $mapper, $slice ); | |
| 736 | |
| 737 $fs = $self->_remap( $fs, $mapper, $slice ); | |
| 738 | |
| 739 push @features, @$fs; | |
| 740 | |
| 741 } else { | |
| 742 # do multiple split queries using start / end constraints | |
| 743 | |
| 744 my $max_len = ( | |
| 745 $self->_max_feature_length() | |
| 746 || $mcc->fetch_max_length_by_CoordSystem_feature_type( | |
| 747 $feat_cs, $tab_name | |
| 748 ) ); | |
| 749 | |
| 750 my $len = @coords; | |
| 751 for ( my $i = 0; $i < $len; $i++ ) { | |
| 752 my $constraint = $orig_constraint; | |
| 753 $constraint .= " AND " if ($constraint); | |
| 754 $constraint .= | |
| 755 "${tab_syn}.seq_region_id = " | |
| 756 . $ids[$i] . " AND " | |
| 757 . "${tab_syn}.seq_region_start <= " | |
| 758 . $coords[$i]->end() . " AND " | |
| 759 . "${tab_syn}.seq_region_end >= " | |
| 760 . $coords[$i]->start(); | |
| 761 | |
| 762 if ($max_len) { | |
| 763 my $min_start = $coords[$i]->start() - $max_len; | |
| 764 $constraint .= | |
| 765 " AND ${tab_syn}.seq_region_start >= $min_start"; | |
| 766 } | |
| 767 my $fs = $self->generic_fetch( $constraint, $mapper, $slice ); | |
| 768 | |
| 769 $fs = $self->_remap( $fs, $mapper, $slice ); | |
| 770 | |
| 771 push @features, @$fs; | |
| 772 } | |
| 773 } ## end else [ if ( @coords > $MAX_SPLIT_QUERY_SEQ_REGIONS)] | |
| 774 } ## end else [ if ( $feat_cs->equals(...))] | |
| 775 } ## end foreach my $feat_cs (@feat_css) | |
| 776 | |
| 777 return \@features; | |
| 778 } ## end sub _slice_fetch | |
| 779 | |
| 780 | |
| 781 #for a given seq_region_id, gets the one used in an external database, if present, otherwise, returns the internal one | |
| 782 sub get_seq_region_id_external { | |
| 783 my ( $self, $sr_id ) = @_; | |
| 784 my $cs_a = $self->db()->get_CoordSystemAdaptor(); | |
| 785 return ( exists( $cs_a->{'_internal_seq_region_mapping'}->{$sr_id} ) | |
| 786 ? $cs_a->{'_internal_seq_region_mapping'}->{$sr_id} | |
| 787 : $sr_id ); | |
| 788 } | |
| 789 | |
| 790 #for a given seq_region_id and coord_system, gets the one used in the internal (core) database | |
| 791 sub get_seq_region_id_internal{ | |
| 792 my ( $self, $sr_id ) = @_; | |
| 793 my $cs_a = $self->db()->get_CoordSystemAdaptor(); | |
| 794 return ( exists $cs_a->{'_external_seq_region_mapping'}->{$sr_id} | |
| 795 ? $cs_a->{'_external_seq_region_mapping'}->{$sr_id} | |
| 796 : $sr_id); | |
| 797 } | |
| 798 | |
| 799 # | |
| 800 # Helper function containing some common feature storing functionality | |
| 801 # | |
| 802 # Given a Feature this will return a copy (or the same feature if no changes | |
| 803 # to the feature are needed) of the feature which is relative to the start | |
| 804 # of the seq_region it is on. The seq_region_id of the seq_region it is on | |
| 805 # is also returned. | |
| 806 # | |
| 807 # This method will also ensure that the database knows which coordinate | |
| 808 # systems that this feature is stored in. | |
| 809 # | |
| 810 | |
| 811 sub _pre_store { | |
| 812 my $self = shift; | |
| 813 my $feature = shift; | |
| 814 | |
| 815 if(!ref($feature) || !$feature->isa('Bio::EnsEMBL::Feature')) { | |
| 816 throw('Expected Feature argument.'); | |
| 817 } | |
| 818 my $slice = $feature->slice(); | |
| 819 | |
| 820 $self->_check_start_end_strand($feature->start(),$feature->end(), | |
| 821 $feature->strand(), $slice); | |
| 822 | |
| 823 | |
| 824 my $db = $self->db(); | |
| 825 | |
| 826 my $slice_adaptor = $db->get_SliceAdaptor(); | |
| 827 | |
| 828 if(!ref($slice) || !($slice->isa('Bio::EnsEMBL::Slice') or $slice->isa('Bio::EnsEMBL::LRGSlice')) ) { | |
| 829 throw('Feature must be attached to Slice to be stored.'); | |
| 830 } | |
| 831 | |
| 832 # make sure feature coords are relative to start of entire seq_region | |
| 833 | |
| 834 if($slice->start != 1 || $slice->strand != 1) { | |
| 835 #move feature onto a slice of the entire seq_region | |
| 836 $slice = $slice_adaptor->fetch_by_region($slice->coord_system->name(), | |
| 837 $slice->seq_region_name(), | |
| 838 undef, #start | |
| 839 undef, #end | |
| 840 undef, #strand | |
| 841 $slice->coord_system->version()); | |
| 842 | |
| 843 $feature = $feature->transfer($slice); | |
| 844 | |
| 845 if(!$feature) { | |
| 846 throw('Could not transfer Feature to slice of ' . | |
| 847 'entire seq_region prior to storing'); | |
| 848 } | |
| 849 } | |
| 850 | |
| 851 # Ensure this type of feature is known to be stored in this coord system. | |
| 852 my $cs = $slice->coord_system; | |
| 853 | |
| 854 my ($tab) = $self->_tables(); | |
| 855 my $tabname = $tab->[0]; | |
| 856 | |
| 857 my $mcc = $db->get_MetaCoordContainer(); | |
| 858 | |
| 859 $mcc->add_feature_type($cs, $tabname, $feature->length); | |
| 860 | |
| 861 my $seq_region_id = $slice_adaptor->get_seq_region_id($slice); | |
| 862 | |
| 863 if(!$seq_region_id) { | |
| 864 throw('Feature is associated with seq_region which is not in this DB.'); | |
| 865 } | |
| 866 | |
| 867 return ($feature, $seq_region_id); | |
| 868 } | |
| 869 | |
| 870 | |
| 871 # The same function as _pre_store | |
| 872 # This one is used to store user uploaded features in XXX_userdata db | |
| 873 | |
| 874 sub _pre_store_userdata { | |
| 875 my $self = shift; | |
| 876 my $feature = shift; | |
| 877 | |
| 878 if(!ref($feature) || !$feature->isa('Bio::EnsEMBL::Feature')) { | |
| 879 throw('Expected Feature argument.'); | |
| 880 } | |
| 881 | |
| 882 my $slice = $feature->slice(); | |
| 883 my $slice_adaptor = $slice->adaptor; | |
| 884 | |
| 885 $self->_check_start_end_strand($feature->start(),$feature->end(), | |
| 886 $feature->strand(), $slice); | |
| 887 | |
| 888 | |
| 889 if(!ref($slice) || !($slice->isa('Bio::EnsEMBL::Slice') or $slice->isa('Bio::EnsEMBL::LRGSlice')) ) { | |
| 890 throw('Feature must be attached to Slice to be stored.'); | |
| 891 } | |
| 892 | |
| 893 # make sure feature coords are relative to start of entire seq_region | |
| 894 | |
| 895 if($slice->start != 1 || $slice->strand != 1) { | |
| 896 #move feature onto a slice of the entire seq_region | |
| 897 $slice = $slice_adaptor->fetch_by_region($slice->coord_system->name(), | |
| 898 $slice->seq_region_name(), | |
| 899 undef, #start | |
| 900 undef, #end | |
| 901 undef, #strand | |
| 902 $slice->coord_system->version()); | |
| 903 | |
| 904 $feature = $feature->transfer($slice); | |
| 905 | |
| 906 if(!$feature) { | |
| 907 throw('Could not transfer Feature to slice of ' . | |
| 908 'entire seq_region prior to storing'); | |
| 909 } | |
| 910 } | |
| 911 | |
| 912 # Ensure this type of feature is known to be stored in this coord system. | |
| 913 my $cs = $slice->coord_system; | |
| 914 | |
| 915 my ($tab) = $self->_tables(); | |
| 916 my $tabname = $tab->[0]; | |
| 917 | |
| 918 my $db = $self->db; | |
| 919 my $mcc = $db->get_MetaCoordContainer(); | |
| 920 | |
| 921 $mcc->add_feature_type($cs, $tabname, $feature->length); | |
| 922 | |
| 923 my $seq_region_id = $slice_adaptor->get_seq_region_id($slice); | |
| 924 | |
| 925 if(!$seq_region_id) { | |
| 926 throw('Feature is associated with seq_region which is not in this DB.'); | |
| 927 } | |
| 928 | |
| 929 return ($feature, $seq_region_id); | |
| 930 } | |
| 931 | |
| 932 | |
| 933 # | |
| 934 # helper function used to validate start/end/strand and | |
| 935 # hstart/hend/hstrand etc. | |
| 936 # | |
| 937 sub _check_start_end_strand { | |
| 938 my $self = shift; | |
| 939 my $start = shift; | |
| 940 my $end = shift; | |
| 941 my $strand = shift; | |
| 942 my $slice = shift; | |
| 943 | |
| 944 # | |
| 945 # Make sure that the start, end, strand are valid | |
| 946 # | |
| 947 if(int($start) != $start) { | |
| 948 throw("Invalid Feature start [$start]. Must be integer."); | |
| 949 } | |
| 950 if(int($end) != $end) { | |
| 951 throw("Invalid Feature end [$end]. Must be integer."); | |
| 952 } | |
| 953 if(int($strand) != $strand || $strand < -1 || $strand > 1) { | |
| 954 throw("Invalid Feature strand [$strand]. Must be -1, 0 or 1."); | |
| 955 } | |
| 956 if($end < $start && !$slice->is_circular()) { | |
| 957 throw("Invalid Feature start/end [$start/$end]. Start must be less " . | |
| 958 "than or equal to end."); | |
| 959 } | |
| 960 | |
| 961 return 1; | |
| 962 } | |
| 963 | |
| 964 | |
| 965 # | |
| 966 # Given a list of features checks if they are in the correct coord system | |
| 967 # by looking at the first features slice. If they are not then they are | |
| 968 # converted and placed on the slice. | |
| 969 # | |
| 970 sub _remap { | |
| 971 my ( $self, $features, $mapper, $slice ) = @_; | |
| 972 | |
| 973 #check if any remapping is actually needed | |
| 974 if(@$features && (!$features->[0]->isa('Bio::EnsEMBL::Feature') || | |
| 975 $features->[0]->slice == $slice)) { | |
| 976 return $features; | |
| 977 } | |
| 978 | |
| 979 #remapping has not been done, we have to do our own conversion from | |
| 980 #to slice coords | |
| 981 | |
| 982 my @out; | |
| 983 | |
| 984 my $slice_start = $slice->start(); | |
| 985 my $slice_end = $slice->end(); | |
| 986 my $slice_strand = $slice->strand(); | |
| 987 my $slice_cs = $slice->coord_system(); | |
| 988 | |
| 989 my ($seq_region, $start, $end, $strand); | |
| 990 | |
| 991 my $slice_seq_region_id = $slice->get_seq_region_id(); | |
| 992 my $slice_seq_region = $slice->seq_region_name(); | |
| 993 | |
| 994 foreach my $f (@$features) { | |
| 995 #since feats were obtained in contig coords, attached seq is a contig | |
| 996 my $fslice = $f->slice(); | |
| 997 if(!$fslice) { | |
| 998 throw("Feature does not have attached slice.\n"); | |
| 999 } | |
| 1000 my $fseq_region = $fslice->seq_region_name(); | |
| 1001 my $fseq_region_id = $fslice->get_seq_region_id(); | |
| 1002 my $fcs = $fslice->coord_system(); | |
| 1003 | |
| 1004 if(!$slice_cs->equals($fcs)) { | |
| 1005 #slice of feature in different coord system, mapping required | |
| 1006 | |
| 1007 ($seq_region, $start, $end, $strand) = | |
| 1008 $mapper->fastmap($fseq_region_id,$f->start(),$f->end(),$f->strand(),$fcs); | |
| 1009 | |
| 1010 # undefined start means gap | |
| 1011 next if(!defined $start); | |
| 1012 } else { | |
| 1013 $start = $f->start(); | |
| 1014 $end = $f->end(); | |
| 1015 $strand = $f->strand(); | |
| 1016 $seq_region = $f->slice->seq_region_name(); | |
| 1017 } | |
| 1018 | |
| 1019 # maps to region outside desired area | |
| 1020 next if ($start > $slice_end) || ($end < $slice_start) || | |
| 1021 ($slice_seq_region ne $seq_region); | |
| 1022 | |
| 1023 #shift the feature start, end and strand in one call | |
| 1024 if($slice_strand == -1) { | |
| 1025 $f->move( $slice_end - $end + 1, $slice_end - $start + 1, $strand * -1 ); | |
| 1026 } else { | |
| 1027 $f->move( $start - $slice_start + 1, $end - $slice_start + 1, $strand ); | |
| 1028 } | |
| 1029 | |
| 1030 $f->slice($slice); | |
| 1031 | |
| 1032 push @out,$f; | |
| 1033 } | |
| 1034 | |
| 1035 return \@out; | |
| 1036 } | |
| 1037 | |
| 1038 | |
| 1039 # | |
| 1040 # Given a logic name and an existing constraint this will | |
| 1041 # add an analysis table constraint to the feature. Note that if no | |
| 1042 # analysis_id exists in the columns of the primary table then no | |
| 1043 # constraint is added at all | |
| 1044 # | |
| 1045 sub _logic_name_to_constraint { | |
| 1046 my $self = shift; | |
| 1047 my $constraint = shift; | |
| 1048 my $logic_name = shift; | |
| 1049 | |
| 1050 return $constraint if(!$logic_name); | |
| 1051 | |
| 1052 #make sure that an analysis_id exists in the primary table | |
| 1053 my ($prim_tab) = $self->_tables(); | |
| 1054 my $prim_synonym = $prim_tab->[1]; | |
| 1055 | |
| 1056 my $found_analysis=0; | |
| 1057 foreach my $col ($self->_columns) { | |
| 1058 my ($syn,$col_name) = split(/\./,$col); | |
| 1059 next if($syn ne $prim_synonym); | |
| 1060 if($col_name eq 'analysis_id') { | |
| 1061 $found_analysis = 1; | |
| 1062 last; | |
| 1063 } | |
| 1064 } | |
| 1065 | |
| 1066 if(!$found_analysis) { | |
| 1067 warning("This feature is not associated with an analysis.\n" . | |
| 1068 "Ignoring logic_name argument = [$logic_name].\n"); | |
| 1069 return $constraint; | |
| 1070 } | |
| 1071 | |
| 1072 my $aa = $self->db->get_AnalysisAdaptor(); | |
| 1073 my $an = $aa->fetch_by_logic_name($logic_name); | |
| 1074 | |
| 1075 if ( !defined($an) ) { | |
| 1076 return undef; | |
| 1077 } | |
| 1078 | |
| 1079 my $an_id = $an->dbID(); | |
| 1080 | |
| 1081 $constraint .= ' AND' if($constraint); | |
| 1082 $constraint .= " ${prim_synonym}.analysis_id = $an_id"; | |
| 1083 return $constraint; | |
| 1084 } | |
| 1085 | |
| 1086 | |
| 1087 =head2 store | |
| 1088 | |
| 1089 Arg [1] : list of Bio::EnsEMBL::SeqFeature | |
| 1090 Example : $adaptor->store(@feats); | |
| 1091 Description: ABSTRACT Subclasses are responsible for implementing this | |
| 1092 method. It should take a list of features and store them in | |
| 1093 the database. | |
| 1094 Returntype : none | |
| 1095 Exceptions : thrown method is not implemented by subclass | |
| 1096 Caller : general | |
| 1097 Status : At Risk | |
| 1098 : throws if called. | |
| 1099 | |
| 1100 =cut | |
| 1101 | |
| 1102 sub store{ | |
| 1103 my $self = @_; | |
| 1104 | |
| 1105 throw("Abstract method store not defined by implementing subclass\n"); | |
| 1106 } | |
| 1107 | |
| 1108 | |
| 1109 =head2 remove | |
| 1110 | |
| 1111 Arg [1] : A feature $feature | |
| 1112 Example : $feature_adaptor->remove($feature); | |
| 1113 Description: This removes a feature from the database. The table the | |
| 1114 feature is removed from is defined by the abstract method | |
| 1115 _tablename, and the primary key of the table is assumed | |
| 1116 to be _tablename() . '_id'. The feature argument must | |
| 1117 be an object implementing the dbID method, and for the | |
| 1118 feature to be removed from the database a dbID value must | |
| 1119 be returned. | |
| 1120 Returntype : none | |
| 1121 Exceptions : thrown if $feature arg does not implement dbID(), or if | |
| 1122 $feature->dbID is not a true value | |
| 1123 Caller : general | |
| 1124 Status : Stable | |
| 1125 | |
| 1126 =cut | |
| 1127 | |
| 1128 | |
| 1129 sub remove { | |
| 1130 my ($self, $feature) = @_; | |
| 1131 | |
| 1132 if(!$feature || !ref($feature) || !$feature->isa('Bio::EnsEMBL::Feature')) { | |
| 1133 throw('Feature argument is required'); | |
| 1134 } | |
| 1135 | |
| 1136 if(!$feature->is_stored($self->db)) { | |
| 1137 throw("This feature is not stored in this database"); | |
| 1138 } | |
| 1139 | |
| 1140 my @tabs = $self->_tables; | |
| 1141 my ($table) = @{$tabs[0]}; | |
| 1142 | |
| 1143 my $sth = $self->prepare("DELETE FROM $table WHERE ${table}_id = ?"); | |
| 1144 $sth->bind_param(1,$feature->dbID,SQL_INTEGER); | |
| 1145 $sth->execute(); | |
| 1146 | |
| 1147 #unset the feature dbID ad adaptor | |
| 1148 $feature->dbID(undef); | |
| 1149 $feature->adaptor(undef); | |
| 1150 | |
| 1151 return; | |
| 1152 } | |
| 1153 | |
| 1154 | |
| 1155 =head2 remove_by_Slice | |
| 1156 | |
| 1157 Arg [1] : Bio::Ensembl::Slice $slice | |
| 1158 Example : $feature_adaptor->remove_by_Slice($slice); | |
| 1159 Description: This removes features from the database which lie on a region | |
| 1160 represented by the passed in slice. Only features which are | |
| 1161 fully contained by the slice are deleted; features which overlap | |
| 1162 the edge of the slice are not removed. | |
| 1163 The table the features are removed from is defined by | |
| 1164 the abstract method_tablename. | |
| 1165 Returntype : none | |
| 1166 Exceptions : thrown if no slice is supplied | |
| 1167 Caller : general | |
| 1168 Status : Stable | |
| 1169 | |
| 1170 =cut | |
| 1171 | |
| 1172 sub remove_by_Slice { | |
| 1173 my ($self, $slice) = @_; | |
| 1174 | |
| 1175 if(!$slice || !ref($slice) || !($slice->isa('Bio::EnsEMBL::Slice') or $slice->isa('Bio::EnsEMBL::LRGSlice')) ) { | |
| 1176 throw("Slice argument is required"); | |
| 1177 } | |
| 1178 | |
| 1179 my @tabs = $self->_tables; | |
| 1180 my ($table_name) = @{$tabs[0]}; | |
| 1181 | |
| 1182 my $seq_region_id = $self->db->get_SliceAdaptor->get_seq_region_id($slice); | |
| 1183 my $start = $slice->start(); | |
| 1184 my $end = $slice->end(); | |
| 1185 | |
| 1186 # | |
| 1187 # Delete only features fully on the slice, not overlapping ones | |
| 1188 # | |
| 1189 my $sth = $self->prepare("DELETE FROM $table_name " . | |
| 1190 "WHERE seq_region_id = ? " . | |
| 1191 "AND seq_region_start >= ? " . | |
| 1192 "AND seq_region_end <= ?"); | |
| 1193 | |
| 1194 $sth->bind_param(1,$seq_region_id,SQL_INTEGER); | |
| 1195 $sth->bind_param(2,$start,SQL_INTEGER); | |
| 1196 $sth->bind_param(3,$end,SQL_INTEGER); | |
| 1197 $sth->execute(); | |
| 1198 $sth->finish(); | |
| 1199 } | |
| 1200 | |
| 1201 | |
| 1202 # | |
| 1203 # Internal function. Allows the max feature length which is normally | |
| 1204 # retrieved from the meta_coord table to be overridden. This allows | |
| 1205 # for some significant optimizations to be put in when it is known | |
| 1206 # that requested features will not be over a certain size. | |
| 1207 # | |
| 1208 sub _max_feature_length { | |
| 1209 my $self = shift; | |
| 1210 return $self->{'_max_feature_length'} = shift if(@_); | |
| 1211 return $self->{'_max_feature_length'}; | |
| 1212 } | |
| 1213 | |
| 1214 | |
| 1215 # | |
| 1216 # Lists all seq_region_ids that a particular feature type is found on. | |
| 1217 # Useful e.g. for finding out which seq_regions have genes. | |
| 1218 # Returns a listref of seq_region_ids. | |
| 1219 # | |
| 1220 sub _list_seq_region_ids { | |
| 1221 my ($self, $table) = @_; | |
| 1222 | |
| 1223 my @out; | |
| 1224 | |
| 1225 my $sql = qq( | |
| 1226 SELECT DISTINCT | |
| 1227 sr.seq_region_id | |
| 1228 FROM seq_region sr, | |
| 1229 $table a, | |
| 1230 coord_system cs | |
| 1231 WHERE sr.seq_region_id = a.seq_region_id | |
| 1232 AND sr.coord_system_id = cs.coord_system_id | |
| 1233 AND cs.species_id = ?); | |
| 1234 | |
| 1235 my $sth = $self->prepare($sql); | |
| 1236 | |
| 1237 $sth->bind_param( 1, $self->species_id(), SQL_INTEGER ); | |
| 1238 | |
| 1239 $sth->execute(); | |
| 1240 | |
| 1241 while (my ($id) = $sth->fetchrow) { | |
| 1242 push(@out, $id); | |
| 1243 } | |
| 1244 | |
| 1245 $sth->finish; | |
| 1246 | |
| 1247 return \@out; | |
| 1248 } | |
| 1249 | |
| 1250 | |
| 1251 =head1 DEPRECATED METHODS | |
| 1252 | |
| 1253 =cut | |
| 1254 | |
| 1255 | |
| 1256 =head2 fetch_all_by_RawContig_constraint | |
| 1257 | |
| 1258 Description: DEPRECATED use fetch_all_by_RawContig_constraint instead | |
| 1259 | |
| 1260 =cut | |
| 1261 | |
| 1262 sub fetch_all_by_RawContig_constraint { | |
| 1263 my $self = shift; | |
| 1264 deprecate('Use fetch_all_by_Slice_constraint() instead.'); | |
| 1265 return $self->fetch_all_by_slice_constraint(@_); | |
| 1266 } | |
| 1267 | |
| 1268 =head2 fetch_all_by_RawContig | |
| 1269 | |
| 1270 Description: DEPRECATED use fetch_all_by_Slice instead | |
| 1271 | |
| 1272 =cut | |
| 1273 | |
| 1274 sub fetch_all_by_RawContig { | |
| 1275 my $self = shift; | |
| 1276 deprecate('Use fetch_all_by_Slice() instead.'); | |
| 1277 return $self->fetch_all_by_Slice(@_); | |
| 1278 } | |
| 1279 | |
| 1280 =head2 fetch_all_by_RawContig_and_score | |
| 1281 | |
| 1282 Description: DEPRECATED use fetch_all_by_Slice_and_score instead | |
| 1283 | |
| 1284 =cut | |
| 1285 | |
| 1286 sub fetch_all_by_RawContig_and_score{ | |
| 1287 my $self = shift; | |
| 1288 deprecate('Use fetch_all_by_Slice_and_score() instead.'); | |
| 1289 return $self->fetch_all_by_Slice_and_score(@_); | |
| 1290 } | |
| 1291 | |
| 1292 =head2 remove_by_RawContig | |
| 1293 | |
| 1294 Description: DEPRECATED use remove_by_Slice instead | |
| 1295 | |
| 1296 =cut | |
| 1297 | |
| 1298 sub remove_by_RawContig { | |
| 1299 my $self = shift; | |
| 1300 deprecate("Use remove_by_Slice instead"); | |
| 1301 return $self->remove_by_Slice(@_); | |
| 1302 } | |
| 1303 | |
| 1304 | |
| 1305 sub remove_by_analysis_id { | |
| 1306 my ($self, $analysis_id) = @_; | |
| 1307 | |
| 1308 $analysis_id or throw("Must call with analysis id"); | |
| 1309 | |
| 1310 my @tabs = $self->_tables; | |
| 1311 my ($tablename) = @{$tabs[0]}; | |
| 1312 | |
| 1313 my $sql = "DELETE FROM $tablename WHERE analysis_id = $analysis_id"; | |
| 1314 # warn "SQL : $sql"; | |
| 1315 | |
| 1316 my $sth = $self->prepare($sql); | |
| 1317 $sth->execute(); | |
| 1318 $sth->finish(); | |
| 1319 } | |
| 1320 | |
| 1321 sub remove_by_feature_id { | |
| 1322 my ($self, $features_list) = @_; | |
| 1323 | |
| 1324 my @feats = @$features_list or throw("Must call store with features"); | |
| 1325 | |
| 1326 my @tabs = $self->_tables; | |
| 1327 my ($tablename) = @{$tabs[0]}; | |
| 1328 | |
| 1329 my $sql = sprintf "DELETE FROM $tablename WHERE ${tablename}_id IN (%s)", join ', ', @feats; | |
| 1330 # warn "SQL : $sql"; | |
| 1331 | |
| 1332 my $sth = $self->prepare($sql); | |
| 1333 $sth->execute(); | |
| 1334 $sth->finish(); | |
| 1335 } | |
| 1336 | |
| 1337 | |
| 1338 1; |
