0
|
1 =head1 LICENSE
|
|
2
|
|
3 Copyright (c) 1999-2012 The European Bioinformatics Institute and
|
|
4 Genome Research Limited. All rights reserved.
|
|
5
|
|
6 This software is distributed under a modified Apache license.
|
|
7 For license details, please see
|
|
8
|
|
9 http://www.ensembl.org/info/about/code_licence.html
|
|
10
|
|
11 =head1 CONTACT
|
|
12
|
|
13 Please email comments or questions to the public Ensembl
|
|
14 developers list at <dev@ensembl.org>.
|
|
15
|
|
16 Questions may also be sent to the Ensembl help desk at
|
|
17 <helpdesk@ensembl.org>.
|
|
18
|
|
19 =cut
|
|
20
|
|
21 =head1 NAME
|
|
22
|
|
23 Bio::EnsEMBL::DBSQL::BaseFeatureAdaptor - An Abstract Base class for all
|
|
24 FeatureAdaptors
|
|
25
|
|
26 =head1 SYNOPSIS
|
|
27
|
|
28 Abstract class - should not be instantiated. Implementation of
|
|
29 abstract methods must be performed by subclasses.
|
|
30
|
|
31 =head1 DESCRIPTION
|
|
32
|
|
33 This is a base adaptor for feature adaptors. This base class is simply a way
|
|
34 of eliminating code duplication through the implementation of methods
|
|
35 common to all feature adaptors.
|
|
36
|
|
37 =head1 METHODS
|
|
38
|
|
39 =cut
|
|
40
|
|
41 package Bio::EnsEMBL::DBSQL::BaseFeatureAdaptor;
|
|
42 use vars qw(@ISA @EXPORT);
|
|
43 use strict;
|
|
44
|
|
45 use Bio::EnsEMBL::DBSQL::BaseAdaptor;
|
|
46 use Bio::EnsEMBL::Utils::Cache;
|
|
47 use Bio::EnsEMBL::Utils::Exception qw(warning throw deprecate stack_trace_dump);
|
|
48 use Bio::EnsEMBL::Utils::Argument qw(rearrange);
|
|
49 use Bio::EnsEMBL::Utils::Iterator;
|
|
50
|
|
51 @ISA = qw(Bio::EnsEMBL::DBSQL::BaseAdaptor);
|
|
52
|
|
53 @EXPORT = (@{$DBI::EXPORT_TAGS{'sql_types'}});
|
|
54
|
|
55 our $SLICE_FEATURE_CACHE_SIZE = 4;
|
|
56 our $MAX_SPLIT_QUERY_SEQ_REGIONS = 3;
|
|
57 our $SILENCE_CACHE_WARNINGS = 0;
|
|
58
|
|
59 =head2 new
|
|
60
|
|
61 Arg [1] : list of args @args
|
|
62 Superclass constructor arguments
|
|
63 Example : none
|
|
64 Description: Constructor which warns if caching has been switched off
|
|
65 Returntype : Bio::EnsEMBL::BaseFeatureAdaptor
|
|
66 Exceptions : none
|
|
67 Caller : implementing subclass constructors
|
|
68 Status : Stable
|
|
69
|
|
70 =cut
|
|
71
|
|
72 sub new {
|
|
73 my ($class, @args) = @_;
|
|
74 my $self = $class->SUPER::new(@args);
|
|
75 if ( defined $self->db->no_cache() && $self->db->no_cache() && ! $SILENCE_CACHE_WARNINGS) {
|
|
76 warning( "You are using the API without caching most recent features. "
|
|
77 . "Performance might be affected." );
|
|
78 }
|
|
79 return $self;
|
|
80 }
|
|
81
|
|
82 =head2 start_equals_end
|
|
83
|
|
84 Arg [1] : (optional) boolean $newval
|
|
85 Example : $bfa->start_equals_end(1);
|
|
86 Description: Getter/Setter for the start_equals_end flag. If set
|
|
87 to true sub _slice_fetch will use a simplified sql to retrieve 1bp slices.
|
|
88 Returntype : boolean
|
|
89 Exceptions : none
|
|
90 Caller : EnsemblGenomes variation DB build
|
|
91 Status : Stable
|
|
92
|
|
93 =cut
|
|
94
|
|
95 sub start_equals_end {
|
|
96 my ( $self, $value ) = @_;
|
|
97
|
|
98 if ( defined($value) ) {
|
|
99 $self->{'start_equals_end'} = $value;
|
|
100 }
|
|
101 return $self->{'start_equals_end'};
|
|
102 }
|
|
103
|
|
104
|
|
105 =head2 clear_cache
|
|
106
|
|
107 Args : None
|
|
108 Example : my $sa =
|
|
109 $registry->get_adaptor( 'Mus musculus', 'Core',
|
|
110 'Slice' );
|
|
111 my $ga =
|
|
112 $registry->get_adaptor( 'Mus musculus', 'Core',
|
|
113 'Gene' );
|
|
114
|
|
115 my $slice =
|
|
116 $sa->fetch_by_region( 'Chromosome', '1', 1e8,
|
|
117 1.05e8 );
|
|
118
|
|
119 my $genes = $ga->fetch_all_by_Slice($slice);
|
|
120
|
|
121 $ga->clear_cache();
|
|
122
|
|
123 Description : Empties the feature cache associated with this
|
|
124 feature adaptor.
|
|
125 Return type : None
|
|
126 Exceptions : None
|
|
127 Caller : General
|
|
128 Status : At risk (under development)
|
|
129
|
|
130 =cut
|
|
131
|
|
132 sub clear_cache {
|
|
133 my ($self) = @_;
|
|
134 %{$self->{_slice_feature_cache}} = ();
|
|
135 return;
|
|
136 }
|
|
137
|
|
138 =head2 _slice_feature_cache
|
|
139
|
|
140 Description : Returns the feature cache if we are allowed to cache and
|
|
141 will build it if we need to. We will never return a reference
|
|
142 to the hash to avoid unintentional auto-vivfying caching
|
|
143 Returntype : Bio::EnsEMBL::Utils::Cache
|
|
144 Exceptions : None
|
|
145 Caller : Internal
|
|
146
|
|
147 =cut
|
|
148
|
|
149 sub _slice_feature_cache {
|
|
150 my ($self) = @_;
|
|
151 return if $self->db()->no_cache();
|
|
152 if(! exists $self->{_slice_feature_cache}) {
|
|
153 tie my %cache, 'Bio::EnsEMBL::Utils::Cache', $SLICE_FEATURE_CACHE_SIZE;
|
|
154 $self->{_slice_feature_cache} = \%cache;
|
|
155 }
|
|
156 return $self->{_slice_feature_cache};
|
|
157 }
|
|
158
|
|
159 =head2 fetch_all_by_Slice
|
|
160
|
|
161 Arg [1] : Bio::EnsEMBL::Slice $slice
|
|
162 the slice from which to obtain features
|
|
163 Arg [2] : (optional) string $logic_name
|
|
164 the logic name of the type of features to obtain
|
|
165 Example : $fts = $a->fetch_all_by_Slice($slice, 'Swall');
|
|
166 Description: Returns a listref of features created from the database
|
|
167 which are on the Slice defined by $slice. If $logic_name is
|
|
168 defined only features with an analysis of type $logic_name
|
|
169 will be returned.
|
|
170 NOTE: only features that are entirely on the slice's seq_region
|
|
171 will be returned (i.e. if they hang off the start/end of a
|
|
172 seq_region they will be discarded). Features can extend over the
|
|
173 slice boundaries though (in cases where you have a slice that
|
|
174 doesn't span the whole seq_region).
|
|
175 Returntype : listref of Bio::EnsEMBL::SeqFeatures in Slice coordinates
|
|
176 Exceptions : none
|
|
177 Caller : Bio::EnsEMBL::Slice
|
|
178 Status : Stable
|
|
179
|
|
180 =cut
|
|
181
|
|
182 sub fetch_all_by_Slice {
|
|
183 my ($self, $slice, $logic_name) = @_;
|
|
184 #fetch by constraint with empty constraint
|
|
185 return $self->fetch_all_by_Slice_constraint($slice, '', $logic_name);
|
|
186 }
|
|
187
|
|
188
|
|
189
|
|
190 =head2 fetch_Iterator_by_Slice_method
|
|
191
|
|
192 Arg [1] : CODE ref of Slice fetch method
|
|
193 Arg [2] : ARRAY ref of parameters for Slice fetch method
|
|
194 Arg [3] : Optional int: Slice index in parameters array
|
|
195 Arg [4] : Optional int: Slice chunk size. Default=500000
|
|
196 Example : my $slice_iter = $feature_adaptor->fetch_Iterator_by_Slice_method
|
|
197 ($feature_adaptor->can('fetch_all_by_Slice_Arrays'),
|
|
198 \@fetch_method_params,
|
|
199 0,#Slice idx
|
|
200 );
|
|
201
|
|
202 while(my $feature = $slice_iter->next && defined $feature){
|
|
203 #Do something here
|
|
204 }
|
|
205
|
|
206 Description: Creates an Iterator which chunks the query Slice to facilitate
|
|
207 large Slice queries which would have previously run out of memory
|
|
208 Returntype : Bio::EnsEMBL::Utils::Iterator
|
|
209 Exceptions : Throws if mandatory params not valid
|
|
210 Caller : general
|
|
211 Status : at risk
|
|
212
|
|
213 =cut
|
|
214
|
|
215 #Does not support Collections. See Funcgen ResultFeatureAdaptor::fetch_collection_Iterator_by_Slice_method
|
|
216
|
|
217 sub fetch_Iterator_by_Slice_method{
|
|
218 my ($self, $slice_method_ref, $params_ref, $slice_idx, $chunk_size) = @_;
|
|
219
|
|
220 if(! ( defined $slice_method_ref &&
|
|
221 ref($slice_method_ref) eq 'CODE')
|
|
222 ){
|
|
223 throw('Must pass a valid Slice fetch method CODE ref');
|
|
224 }
|
|
225
|
|
226 if (! ($params_ref &&
|
|
227 ref($params_ref) eq 'ARRAY')) {
|
|
228 #Don't need to check size here so long as we have valid Slice
|
|
229 throw('You must pass a method params ARRAYREF');
|
|
230 }
|
|
231
|
|
232 $slice_idx = 0 if(! defined $slice_idx);
|
|
233 my $slice = $params_ref->[$slice_idx];
|
|
234 $chunk_size ||= 1000000;
|
|
235
|
|
236 my @feat_cache;
|
|
237 my $finished = 0;
|
|
238 my $start = 1; #local coord for sub slice
|
|
239 my $end = $slice->length;
|
|
240 my $num_overlaps = 0;
|
|
241
|
|
242 my $coderef =
|
|
243 sub {
|
|
244
|
|
245 while (scalar(@feat_cache) == 0 &&
|
|
246 ! $finished) {
|
|
247
|
|
248 my $new_end = ($start + $chunk_size - 1);
|
|
249
|
|
250 if ($new_end >= $end) {
|
|
251 # this is our last chunk
|
|
252 $new_end = $end;
|
|
253 $finished = 1;
|
|
254 }
|
|
255
|
|
256 #Chunk by sub slicing
|
|
257 my $sub_slice = $slice->sub_Slice($start, $new_end);
|
|
258 $params_ref->[$slice_idx] = $sub_slice;
|
|
259 @feat_cache = @{ $slice_method_ref->($self, @$params_ref)};
|
|
260
|
|
261 #Remove & count overlapping features
|
|
262 splice(@feat_cache, 0, $num_overlaps) if($num_overlaps);
|
|
263 my $i;
|
|
264
|
|
265 if (scalar(@feat_cache) > 0) {
|
|
266
|
|
267 my $feat_end = $feat_cache[$#feat_cache]->seq_region_end;
|
|
268 my $slice_end = $sub_slice->end;
|
|
269 $num_overlaps = 0;
|
|
270
|
|
271 for ($i = $#feat_cache; $i >=0; $i--) {
|
|
272
|
|
273 if ($feat_end > $slice_end) {
|
|
274 $feat_end = $feat_cache[$i]->end;
|
|
275 $num_overlaps ++;
|
|
276 } else {
|
|
277 last;
|
|
278 }
|
|
279
|
|
280 }
|
|
281 }
|
|
282
|
|
283 # update the start coordinate
|
|
284 $start = $new_end + 1;
|
|
285 }
|
|
286
|
|
287 #this maybe returning from an undef cache
|
|
288 #Need to sub this out even more?
|
|
289 return shift @feat_cache;
|
|
290 };
|
|
291
|
|
292 return Bio::EnsEMBL::Utils::Iterator->new($coderef);
|
|
293 }
|
|
294
|
|
295
|
|
296 =head2 fetch_Iterator_by_Slice
|
|
297
|
|
298 Arg [1] : Bio::EnsEMBL::Slice
|
|
299 Arg [2] : Optional string: logic name of analysis
|
|
300 Arg [3] : Optional int: Chunk size to iterate over. Default is 500000
|
|
301 Example : my $slice_iter = $feature_adaptor->fetch_Iterator_by_Slice($slice);
|
|
302
|
|
303 while(my $feature = $slice_iter->next && defined $feature){
|
|
304 #Do something here
|
|
305 }
|
|
306
|
|
307 Description: Creates an Iterator which chunks the query Slice to facilitate
|
|
308 large Slice queries which would have previously run out of memory
|
|
309 Returntype : Bio::EnsEMBL::Utils::Iterator
|
|
310 Exceptions : None
|
|
311 Caller : general
|
|
312 Status : at risk
|
|
313
|
|
314 =cut
|
|
315
|
|
316 sub fetch_Iterator_by_Slice{
|
|
317 my ($self, $slice, $logic_name, $chunk_size) = @_;
|
|
318
|
|
319 my $method_ref = $self->can('fetch_all_by_Slice');
|
|
320
|
|
321 return $self->fetch_Iterator_by_Slice_method($method_ref, [$slice, $logic_name], 0, $chunk_size);
|
|
322 }
|
|
323
|
|
324
|
|
325 =head2 fetch_all_by_Slice_and_score
|
|
326
|
|
327 Arg [1] : Bio::EnsEMBL::Slice $slice
|
|
328 the slice from which to obtain features
|
|
329 Arg [2] : (optional) float $score
|
|
330 lower bound of the the score of the features retrieved
|
|
331 Arg [3] : (optional) string $logic_name
|
|
332 the logic name of the type of features to obtain
|
|
333 Example : $fts = $a->fetch_all_by_Slice_and_score($slice,90,'Swall');
|
|
334 Description: Returns a list of features created from the database which are
|
|
335 are on the Slice defined by $slice and which have a score
|
|
336 greater than $score. If $logic_name is defined,
|
|
337 only features with an analysis of type $logic_name will be
|
|
338 returned.
|
|
339 Returntype : listref of Bio::EnsEMBL::SeqFeatures in Slice coordinates
|
|
340 Exceptions : none
|
|
341 Caller : Bio::EnsEMBL::Slice
|
|
342 Status : Stable
|
|
343
|
|
344 =cut
|
|
345
|
|
346 sub fetch_all_by_Slice_and_score {
|
|
347 my ( $self, $slice, $score, $logic_name ) = @_;
|
|
348
|
|
349 my $constraint;
|
|
350 if ( defined($score) ) {
|
|
351 # Get the synonym of the primary_table
|
|
352 my @tabs = $self->_tables();
|
|
353 my $syn = $tabs[0]->[1];
|
|
354
|
|
355 $constraint = sprintf( "%s.score > %s",
|
|
356 $syn,
|
|
357 $self->dbc()->db_handle()->quote( $score, SQL_FLOAT ) );
|
|
358 }
|
|
359
|
|
360 return
|
|
361 $self->fetch_all_by_Slice_constraint( $slice, $constraint,
|
|
362 $logic_name );
|
|
363 }
|
|
364
|
|
365
|
|
366 =head2 fetch_all_by_Slice_constraint
|
|
367
|
|
368 Arg [1] : Bio::EnsEMBL::Slice $slice
|
|
369 the slice from which to obtain features
|
|
370 Arg [2] : (optional) string $constraint
|
|
371 An SQL query constraint (i.e. part of the WHERE clause)
|
|
372 Arg [3] : (optional) string $logic_name
|
|
373 the logic name of the type of features to obtain
|
|
374 Example : $fs = $a->fetch_all_by_Slice_constraint($slc, 'perc_ident > 5');
|
|
375 Description: Returns a listref of features created from the database which
|
|
376 are on the Slice defined by $slice and fulfill the SQL
|
|
377 constraint defined by $constraint. If logic name is defined,
|
|
378 only features with an analysis of type $logic_name will be
|
|
379 returned.
|
|
380 Returntype : listref of Bio::EnsEMBL::SeqFeatures in Slice coordinates
|
|
381 Exceptions : thrown if $slice is not defined
|
|
382 Caller : Bio::EnsEMBL::Slice
|
|
383 Status : Stable
|
|
384
|
|
385 =cut
|
|
386
|
|
387 sub fetch_all_by_Slice_constraint {
|
|
388 my ( $self, $slice, $constraint, $logic_name ) = @_;
|
|
389
|
|
390
|
|
391 my @result = ();
|
|
392
|
|
393 if ( !ref($slice)
|
|
394 || !( $slice->isa('Bio::EnsEMBL::Slice')
|
|
395 or $slice->isa('Bio::EnsEMBL::LRGSlice') ) )
|
|
396 {
|
|
397 throw("Bio::EnsEMBL::Slice argument expected.");
|
|
398 }
|
|
399
|
|
400 $constraint ||= '';
|
|
401 $constraint =
|
|
402 $self->_logic_name_to_constraint( $constraint, $logic_name );
|
|
403
|
|
404 # If the logic name was invalid, undef was returned
|
|
405 if ( !defined($constraint) ) { return [] }
|
|
406
|
|
407 my $key;
|
|
408 my $cache;
|
|
409
|
|
410 # Will only use feature_cache if hasn't been no_cache attribute set
|
|
411 if (
|
|
412 !( defined( $self->db()->no_cache() ) && $self->db()->no_cache() ) )
|
|
413 {
|
|
414
|
|
415 #strain test and add to constraint if so to stop caching.
|
|
416 if ( $slice->isa('Bio::EnsEMBL::StrainSlice') ) {
|
|
417 my $string =
|
|
418 $self->dbc()->db_handle()->quote( $slice->strain_name() );
|
|
419
|
|
420 if ( $constraint ne "" ) {
|
|
421 $constraint .= " AND $string = $string ";
|
|
422 } else {
|
|
423 $constraint .= " $string = $string ";
|
|
424 }
|
|
425 }
|
|
426
|
|
427 # Check the cache and return the cached results if we have already
|
|
428 # done this query. The cache key is the made up from the slice
|
|
429 # name, the constraint, and the bound parameters (if there are any).
|
|
430 $key = uc( join( ':', $slice->name(), $constraint ) );
|
|
431
|
|
432 my $bind_params = $self->bind_param_generic_fetch();
|
|
433
|
|
434 if ( defined($bind_params) ) {
|
|
435 $key .= ':'
|
|
436 . join( ':', map { $_->[0] . '/' . $_->[1] } @{$bind_params} );
|
|
437 }
|
|
438
|
|
439 $cache = $self->_slice_feature_cache();
|
|
440 if ( exists( $cache->{$key} ) ) {
|
|
441 # Clear the bound parameters and return the cached data.
|
|
442 $self->{'_bind_param_generic_fetch'} = ();
|
|
443 return $cache->{$key};
|
|
444 }
|
|
445 } ## end if ( !( defined( $self...)))
|
|
446
|
|
447 my $sa = $slice->adaptor();
|
|
448
|
|
449 # Hap/PAR support: retrieve normalized 'non-symlinked' slices.
|
|
450 my @proj = @{ $sa->fetch_normalized_slice_projection($slice) };
|
|
451
|
|
452
|
|
453
|
|
454 if ( !@proj ) {
|
|
455 throw( 'Could not retrieve normalized Slices. '
|
|
456 . 'Database contains incorrect assembly_exception information.'
|
|
457 );
|
|
458 }
|
|
459
|
|
460 # Want to get features on the FULL original slice as well as any
|
|
461 # symlinked slices.
|
|
462
|
|
463 # Filter out partial slices from projection that are on same
|
|
464 # seq_region as original slice.
|
|
465
|
|
466 my $sr_id = $slice->get_seq_region_id();
|
|
467
|
|
468 @proj = grep { $_->to_Slice->get_seq_region_id() != $sr_id } @proj;
|
|
469
|
|
470 my $segment = bless( [ 1, $slice->length(), $slice ],
|
|
471 'Bio::EnsEMBL::ProjectionSegment' );
|
|
472 push( @proj, $segment );
|
|
473
|
|
474 # construct list of Hap/PAR boundaries for entire seq region
|
|
475 my @bounds;
|
|
476
|
|
477 my $ent_slice = $sa->fetch_by_seq_region_id($sr_id);
|
|
478 if ( $slice->strand() == -1 ) {
|
|
479 $ent_slice = $ent_slice->invert();
|
|
480 }
|
|
481
|
|
482 my @ent_proj =
|
|
483 @{ $sa->fetch_normalized_slice_projection($ent_slice) };
|
|
484 shift(@ent_proj); # skip first
|
|
485
|
|
486 @bounds = map { $_->from_start() - $slice->start() + 1 } @ent_proj;
|
|
487
|
|
488
|
|
489 # fetch features for the primary slice AND all symlinked slices
|
|
490 foreach my $seg (@proj) {
|
|
491
|
|
492
|
|
493 my $offset = $seg->from_start();
|
|
494 my $seg_slice = $seg->to_Slice();
|
|
495 my $features =
|
|
496 $self->_slice_fetch( $seg_slice, $constraint );
|
|
497
|
|
498 # If this was a symlinked slice offset the feature coordinates as
|
|
499 # needed.
|
|
500 if ( $seg_slice->name() ne $slice->name() ) {
|
|
501
|
|
502 FEATURE:
|
|
503 foreach my $f ( @{$features} ) {
|
|
504 if ( $offset != 1 ) {
|
|
505 $f->{'start'} += $offset - 1;
|
|
506 $f->{'end'} += $offset - 1;
|
|
507 }
|
|
508
|
|
509 # discard boundary crossing features from symlinked regions
|
|
510 foreach my $bound (@bounds) {
|
|
511 if ( $f->{'start'} < $bound && $f->{'end'} >= $bound ) {
|
|
512 next FEATURE;
|
|
513 }
|
|
514 }
|
|
515
|
|
516 $f->{'slice'} = $slice;
|
|
517 push( @result, $f );
|
|
518 }
|
|
519 } else {
|
|
520 push( @result, @{$features} );
|
|
521 }
|
|
522 } ## end foreach my $seg (@proj)
|
|
523
|
|
524 # Will only use feature_cache when set attribute no_cache in DBAdaptor
|
|
525 if ( defined($key) ) {
|
|
526 $cache->{$key} = \@result;
|
|
527 }
|
|
528
|
|
529 return \@result;
|
|
530 } ## end sub fetch_all_by_Slice_constraint
|
|
531
|
|
532
|
|
533 =head2 fetch_all_by_logic_name
|
|
534
|
|
535 Arg [3] : string $logic_name
|
|
536 the logic name of the type of features to obtain
|
|
537 Example : $fs = $a->fetch_all_by_logic_name('foobar');
|
|
538 Description: Returns a listref of features created from the database.
|
|
539 only features with an analysis of type $logic_name will
|
|
540 be returned. If the logic name is invalid (not in the
|
|
541 analysis table), a reference to an empty list will be
|
|
542 returned.
|
|
543 Returntype : listref of Bio::EnsEMBL::SeqFeatures
|
|
544 Exceptions : thrown if no $logic_name
|
|
545 Caller : General
|
|
546 Status : Stable
|
|
547
|
|
548 =cut
|
|
549
|
|
550 sub fetch_all_by_logic_name {
|
|
551 my ( $self, $logic_name ) = @_;
|
|
552
|
|
553 if ( !defined($logic_name) ) {
|
|
554 throw("Need a logic_name");
|
|
555 }
|
|
556
|
|
557 my $constraint = $self->_logic_name_to_constraint( '', $logic_name );
|
|
558
|
|
559 if ( !defined($constraint) ) {
|
|
560 warning("Invalid logic name: $logic_name");
|
|
561 return [];
|
|
562 }
|
|
563
|
|
564 return $self->generic_fetch($constraint);
|
|
565 }
|
|
566
|
|
567 # Method that creates an object. Called by the _objs_from_sth() method
|
|
568 # in the sub-classes (the various feature adaptors). Overridden by the
|
|
569 # feature collection classes.
|
|
570
|
|
571 sub _create_feature {
|
|
572 my ( $self, $feature_type, $args ) = @_;
|
|
573 return $feature_type->new( %{$args} );
|
|
574 }
|
|
575
|
|
576 # This is the same as the above, but calls the new_fast() constructor of
|
|
577 # the feature type.
|
|
578
|
|
579 sub _create_feature_fast {
|
|
580 my ( $self, $feature_type, $args ) = @_;
|
|
581 return $feature_type->new_fast($args);
|
|
582 }
|
|
583
|
|
584 #
|
|
585 # helper function used by fetch_all_by_Slice_constraint method
|
|
586 #
|
|
587 sub _slice_fetch {
|
|
588 my ( $self, $slice, $orig_constraint ) = @_;
|
|
589
|
|
590 my $slice_start = $slice->start();
|
|
591 my $slice_end = $slice->end();
|
|
592 my $slice_strand = $slice->strand();
|
|
593 my $slice_cs = $slice->coord_system();
|
|
594 my $slice_seq_region = $slice->seq_region_name();
|
|
595 my $slice_seq_region_id = $slice->get_seq_region_id();
|
|
596
|
|
597 #get the synonym and name of the primary_table
|
|
598 my @tabs = $self->_tables;
|
|
599 my ( $tab_name, $tab_syn ) = @{ $tabs[0] };
|
|
600
|
|
601 #find out what coordinate systems the features are in
|
|
602 my $mcc = $self->db->get_MetaCoordContainer();
|
|
603 my @feat_css = ();
|
|
604
|
|
605 my $mca = $self->db->get_MetaContainer();
|
|
606 my $value_list = $mca->list_value_by_key( $tab_name . "build.level" );
|
|
607 if ( @$value_list and $slice->is_toplevel() ) {
|
|
608 push @feat_css, $slice_cs;
|
|
609 } else {
|
|
610 @feat_css =
|
|
611 @{ $mcc->fetch_all_CoordSystems_by_feature_type($tab_name) };
|
|
612 }
|
|
613
|
|
614 my $asma = $self->db->get_AssemblyMapperAdaptor();
|
|
615 my @features;
|
|
616
|
|
617 # fetch the features from each coordinate system they are stored in
|
|
618 COORD_SYSTEM: foreach my $feat_cs (@feat_css) {
|
|
619 my $mapper;
|
|
620 my @coords;
|
|
621 my @ids;
|
|
622
|
|
623 if ( $feat_cs->equals($slice_cs) ) {
|
|
624 # no mapping is required if this is the same coord system
|
|
625
|
|
626 my $max_len = $self->_max_feature_length()
|
|
627 || $mcc->fetch_max_length_by_CoordSystem_feature_type( $feat_cs,
|
|
628 $tab_name );
|
|
629
|
|
630 my $constraint = $orig_constraint;
|
|
631
|
|
632 my $sr_id;
|
|
633 if ( $slice->adaptor() ) {
|
|
634 $sr_id = $slice->adaptor()->get_seq_region_id($slice);
|
|
635 } else {
|
|
636 $sr_id =
|
|
637 $self->db()->get_SliceAdaptor()->get_seq_region_id($slice);
|
|
638 }
|
|
639
|
|
640 # If there is mapping information, use the external_seq_region_id
|
|
641 # to get features.
|
|
642
|
|
643 my @sr_ids = ($sr_id);
|
|
644
|
|
645 while (1) {
|
|
646 my $ext_sr_id = $self->get_seq_region_id_external($sr_id);
|
|
647
|
|
648 if ( $ext_sr_id == $sr_id ) { last }
|
|
649
|
|
650 push( @sr_ids, $ext_sr_id );
|
|
651 $sr_id = $ext_sr_id;
|
|
652 }
|
|
653
|
|
654 $constraint .= " AND " if ($constraint);
|
|
655
|
|
656
|
|
657 $constraint .= "${tab_syn}.seq_region_id IN ("
|
|
658 . join( ',', @sr_ids ) . ") AND";
|
|
659
|
|
660 #faster query for 1bp slices where SNP data is not compressed
|
|
661 if ( $self->start_equals_end && $slice_start == $slice_end ) {
|
|
662 $constraint .=
|
|
663 " AND ${tab_syn}.seq_region_start = $slice_end" .
|
|
664 " AND ${tab_syn}.seq_region_end = $slice_start";
|
|
665
|
|
666 } else {
|
|
667
|
|
668 if ( !$slice->is_circular() ) {
|
|
669 # Deal with the default case of a non-circular chromosome.
|
|
670 $constraint .= " ${tab_syn}.seq_region_start <= $slice_end AND "
|
|
671 . "${tab_syn}.seq_region_end >= $slice_start";
|
|
672
|
|
673 if ( $max_len ) {
|
|
674 my $min_start = $slice_start - $max_len;
|
|
675 $constraint .= " AND ${tab_syn}.seq_region_start >= $min_start";
|
|
676 }
|
|
677
|
|
678 } else {
|
|
679 # Deal with the case of a circular chromosome.
|
|
680 if ( $slice_start > $slice_end ) {
|
|
681 $constraint .= " ( ${tab_syn}.seq_region_start >= $slice_start "
|
|
682 . "OR ${tab_syn}.seq_region_start <= $slice_end "
|
|
683 . "OR ${tab_syn}.seq_region_end >= $slice_start "
|
|
684 . "OR ${tab_syn}.seq_region_end <= $slice_end "
|
|
685 . "OR ${tab_syn}.seq_region_start > ${tab_syn}.seq_region_end)";
|
|
686
|
|
687 } else {
|
|
688 $constraint .= " ((${tab_syn}.seq_region_start <= $slice_end "
|
|
689 . "AND ${tab_syn}.seq_region_end >= $slice_start) "
|
|
690 . "OR (${tab_syn}.seq_region_start > ${tab_syn}.seq_region_end "
|
|
691 . "AND (${tab_syn}.seq_region_start <= $slice_end "
|
|
692 . "OR ${tab_syn}.seq_region_end >= $slice_start)))";
|
|
693 }
|
|
694 }
|
|
695
|
|
696 }
|
|
697
|
|
698 my $fs = $self->generic_fetch( $constraint, undef, $slice );
|
|
699
|
|
700 # features may still have to have coordinates made relative to slice
|
|
701 # start
|
|
702 $fs = $self->_remap( $fs, $mapper, $slice );
|
|
703
|
|
704 push @features, @$fs;
|
|
705 } else {
|
|
706 $mapper = $asma->fetch_by_CoordSystems( $slice_cs, $feat_cs );
|
|
707
|
|
708 next unless defined $mapper;
|
|
709
|
|
710 # Get list of coordinates and corresponding internal ids for
|
|
711 # regions the slice spans
|
|
712 @coords =
|
|
713 $mapper->map( $slice_seq_region, $slice_start, $slice_end,
|
|
714 $slice_strand, $slice_cs );
|
|
715
|
|
716 @coords = grep { !$_->isa('Bio::EnsEMBL::Mapper::Gap') } @coords;
|
|
717
|
|
718 next COORD_SYSTEM if ( !@coords );
|
|
719
|
|
720 @ids = map { $_->id() } @coords;
|
|
721 #coords are now id rather than name
|
|
722 # @ids = @{$asma->seq_regions_to_ids($feat_cs, \@ids)};
|
|
723
|
|
724 # When regions are large and only partially spanned
|
|
725 # by slice it is faster to to limit the query with
|
|
726 # start and end constraints. Take simple approach:
|
|
727 # use regional constraints if there are less than a
|
|
728 # specific number of regions covered.
|
|
729
|
|
730 if ( @coords > $MAX_SPLIT_QUERY_SEQ_REGIONS ) {
|
|
731 my $constraint = $orig_constraint;
|
|
732 my $id_str = join( ',', @ids );
|
|
733 $constraint .= " AND " if ($constraint);
|
|
734 $constraint .= "${tab_syn}.seq_region_id IN ($id_str)";
|
|
735 my $fs = $self->generic_fetch( $constraint, $mapper, $slice );
|
|
736
|
|
737 $fs = $self->_remap( $fs, $mapper, $slice );
|
|
738
|
|
739 push @features, @$fs;
|
|
740
|
|
741 } else {
|
|
742 # do multiple split queries using start / end constraints
|
|
743
|
|
744 my $max_len = (
|
|
745 $self->_max_feature_length()
|
|
746 || $mcc->fetch_max_length_by_CoordSystem_feature_type(
|
|
747 $feat_cs, $tab_name
|
|
748 ) );
|
|
749
|
|
750 my $len = @coords;
|
|
751 for ( my $i = 0; $i < $len; $i++ ) {
|
|
752 my $constraint = $orig_constraint;
|
|
753 $constraint .= " AND " if ($constraint);
|
|
754 $constraint .=
|
|
755 "${tab_syn}.seq_region_id = "
|
|
756 . $ids[$i] . " AND "
|
|
757 . "${tab_syn}.seq_region_start <= "
|
|
758 . $coords[$i]->end() . " AND "
|
|
759 . "${tab_syn}.seq_region_end >= "
|
|
760 . $coords[$i]->start();
|
|
761
|
|
762 if ($max_len) {
|
|
763 my $min_start = $coords[$i]->start() - $max_len;
|
|
764 $constraint .=
|
|
765 " AND ${tab_syn}.seq_region_start >= $min_start";
|
|
766 }
|
|
767 my $fs = $self->generic_fetch( $constraint, $mapper, $slice );
|
|
768
|
|
769 $fs = $self->_remap( $fs, $mapper, $slice );
|
|
770
|
|
771 push @features, @$fs;
|
|
772 }
|
|
773 } ## end else [ if ( @coords > $MAX_SPLIT_QUERY_SEQ_REGIONS)]
|
|
774 } ## end else [ if ( $feat_cs->equals(...))]
|
|
775 } ## end foreach my $feat_cs (@feat_css)
|
|
776
|
|
777 return \@features;
|
|
778 } ## end sub _slice_fetch
|
|
779
|
|
780
|
|
781 #for a given seq_region_id, gets the one used in an external database, if present, otherwise, returns the internal one
|
|
782 sub get_seq_region_id_external {
|
|
783 my ( $self, $sr_id ) = @_;
|
|
784 my $cs_a = $self->db()->get_CoordSystemAdaptor();
|
|
785 return ( exists( $cs_a->{'_internal_seq_region_mapping'}->{$sr_id} )
|
|
786 ? $cs_a->{'_internal_seq_region_mapping'}->{$sr_id}
|
|
787 : $sr_id );
|
|
788 }
|
|
789
|
|
790 #for a given seq_region_id and coord_system, gets the one used in the internal (core) database
|
|
791 sub get_seq_region_id_internal{
|
|
792 my ( $self, $sr_id ) = @_;
|
|
793 my $cs_a = $self->db()->get_CoordSystemAdaptor();
|
|
794 return ( exists $cs_a->{'_external_seq_region_mapping'}->{$sr_id}
|
|
795 ? $cs_a->{'_external_seq_region_mapping'}->{$sr_id}
|
|
796 : $sr_id);
|
|
797 }
|
|
798
|
|
799 #
|
|
800 # Helper function containing some common feature storing functionality
|
|
801 #
|
|
802 # Given a Feature this will return a copy (or the same feature if no changes
|
|
803 # to the feature are needed) of the feature which is relative to the start
|
|
804 # of the seq_region it is on. The seq_region_id of the seq_region it is on
|
|
805 # is also returned.
|
|
806 #
|
|
807 # This method will also ensure that the database knows which coordinate
|
|
808 # systems that this feature is stored in.
|
|
809 #
|
|
810
|
|
811 sub _pre_store {
|
|
812 my $self = shift;
|
|
813 my $feature = shift;
|
|
814
|
|
815 if(!ref($feature) || !$feature->isa('Bio::EnsEMBL::Feature')) {
|
|
816 throw('Expected Feature argument.');
|
|
817 }
|
|
818 my $slice = $feature->slice();
|
|
819
|
|
820 $self->_check_start_end_strand($feature->start(),$feature->end(),
|
|
821 $feature->strand(), $slice);
|
|
822
|
|
823
|
|
824 my $db = $self->db();
|
|
825
|
|
826 my $slice_adaptor = $db->get_SliceAdaptor();
|
|
827
|
|
828 if(!ref($slice) || !($slice->isa('Bio::EnsEMBL::Slice') or $slice->isa('Bio::EnsEMBL::LRGSlice')) ) {
|
|
829 throw('Feature must be attached to Slice to be stored.');
|
|
830 }
|
|
831
|
|
832 # make sure feature coords are relative to start of entire seq_region
|
|
833
|
|
834 if($slice->start != 1 || $slice->strand != 1) {
|
|
835 #move feature onto a slice of the entire seq_region
|
|
836 $slice = $slice_adaptor->fetch_by_region($slice->coord_system->name(),
|
|
837 $slice->seq_region_name(),
|
|
838 undef, #start
|
|
839 undef, #end
|
|
840 undef, #strand
|
|
841 $slice->coord_system->version());
|
|
842
|
|
843 $feature = $feature->transfer($slice);
|
|
844
|
|
845 if(!$feature) {
|
|
846 throw('Could not transfer Feature to slice of ' .
|
|
847 'entire seq_region prior to storing');
|
|
848 }
|
|
849 }
|
|
850
|
|
851 # Ensure this type of feature is known to be stored in this coord system.
|
|
852 my $cs = $slice->coord_system;
|
|
853
|
|
854 my ($tab) = $self->_tables();
|
|
855 my $tabname = $tab->[0];
|
|
856
|
|
857 my $mcc = $db->get_MetaCoordContainer();
|
|
858
|
|
859 $mcc->add_feature_type($cs, $tabname, $feature->length);
|
|
860
|
|
861 my $seq_region_id = $slice_adaptor->get_seq_region_id($slice);
|
|
862
|
|
863 if(!$seq_region_id) {
|
|
864 throw('Feature is associated with seq_region which is not in this DB.');
|
|
865 }
|
|
866
|
|
867 return ($feature, $seq_region_id);
|
|
868 }
|
|
869
|
|
870
|
|
871 # The same function as _pre_store
|
|
872 # This one is used to store user uploaded features in XXX_userdata db
|
|
873
|
|
874 sub _pre_store_userdata {
|
|
875 my $self = shift;
|
|
876 my $feature = shift;
|
|
877
|
|
878 if(!ref($feature) || !$feature->isa('Bio::EnsEMBL::Feature')) {
|
|
879 throw('Expected Feature argument.');
|
|
880 }
|
|
881
|
|
882 my $slice = $feature->slice();
|
|
883 my $slice_adaptor = $slice->adaptor;
|
|
884
|
|
885 $self->_check_start_end_strand($feature->start(),$feature->end(),
|
|
886 $feature->strand(), $slice);
|
|
887
|
|
888
|
|
889 if(!ref($slice) || !($slice->isa('Bio::EnsEMBL::Slice') or $slice->isa('Bio::EnsEMBL::LRGSlice')) ) {
|
|
890 throw('Feature must be attached to Slice to be stored.');
|
|
891 }
|
|
892
|
|
893 # make sure feature coords are relative to start of entire seq_region
|
|
894
|
|
895 if($slice->start != 1 || $slice->strand != 1) {
|
|
896 #move feature onto a slice of the entire seq_region
|
|
897 $slice = $slice_adaptor->fetch_by_region($slice->coord_system->name(),
|
|
898 $slice->seq_region_name(),
|
|
899 undef, #start
|
|
900 undef, #end
|
|
901 undef, #strand
|
|
902 $slice->coord_system->version());
|
|
903
|
|
904 $feature = $feature->transfer($slice);
|
|
905
|
|
906 if(!$feature) {
|
|
907 throw('Could not transfer Feature to slice of ' .
|
|
908 'entire seq_region prior to storing');
|
|
909 }
|
|
910 }
|
|
911
|
|
912 # Ensure this type of feature is known to be stored in this coord system.
|
|
913 my $cs = $slice->coord_system;
|
|
914
|
|
915 my ($tab) = $self->_tables();
|
|
916 my $tabname = $tab->[0];
|
|
917
|
|
918 my $db = $self->db;
|
|
919 my $mcc = $db->get_MetaCoordContainer();
|
|
920
|
|
921 $mcc->add_feature_type($cs, $tabname, $feature->length);
|
|
922
|
|
923 my $seq_region_id = $slice_adaptor->get_seq_region_id($slice);
|
|
924
|
|
925 if(!$seq_region_id) {
|
|
926 throw('Feature is associated with seq_region which is not in this DB.');
|
|
927 }
|
|
928
|
|
929 return ($feature, $seq_region_id);
|
|
930 }
|
|
931
|
|
932
|
|
933 #
|
|
934 # helper function used to validate start/end/strand and
|
|
935 # hstart/hend/hstrand etc.
|
|
936 #
|
|
937 sub _check_start_end_strand {
|
|
938 my $self = shift;
|
|
939 my $start = shift;
|
|
940 my $end = shift;
|
|
941 my $strand = shift;
|
|
942 my $slice = shift;
|
|
943
|
|
944 #
|
|
945 # Make sure that the start, end, strand are valid
|
|
946 #
|
|
947 if(int($start) != $start) {
|
|
948 throw("Invalid Feature start [$start]. Must be integer.");
|
|
949 }
|
|
950 if(int($end) != $end) {
|
|
951 throw("Invalid Feature end [$end]. Must be integer.");
|
|
952 }
|
|
953 if(int($strand) != $strand || $strand < -1 || $strand > 1) {
|
|
954 throw("Invalid Feature strand [$strand]. Must be -1, 0 or 1.");
|
|
955 }
|
|
956 if($end < $start && !$slice->is_circular()) {
|
|
957 throw("Invalid Feature start/end [$start/$end]. Start must be less " .
|
|
958 "than or equal to end.");
|
|
959 }
|
|
960
|
|
961 return 1;
|
|
962 }
|
|
963
|
|
964
|
|
965 #
|
|
966 # Given a list of features checks if they are in the correct coord system
|
|
967 # by looking at the first features slice. If they are not then they are
|
|
968 # converted and placed on the slice.
|
|
969 #
|
|
970 sub _remap {
|
|
971 my ( $self, $features, $mapper, $slice ) = @_;
|
|
972
|
|
973 #check if any remapping is actually needed
|
|
974 if(@$features && (!$features->[0]->isa('Bio::EnsEMBL::Feature') ||
|
|
975 $features->[0]->slice == $slice)) {
|
|
976 return $features;
|
|
977 }
|
|
978
|
|
979 #remapping has not been done, we have to do our own conversion from
|
|
980 #to slice coords
|
|
981
|
|
982 my @out;
|
|
983
|
|
984 my $slice_start = $slice->start();
|
|
985 my $slice_end = $slice->end();
|
|
986 my $slice_strand = $slice->strand();
|
|
987 my $slice_cs = $slice->coord_system();
|
|
988
|
|
989 my ($seq_region, $start, $end, $strand);
|
|
990
|
|
991 my $slice_seq_region_id = $slice->get_seq_region_id();
|
|
992 my $slice_seq_region = $slice->seq_region_name();
|
|
993
|
|
994 foreach my $f (@$features) {
|
|
995 #since feats were obtained in contig coords, attached seq is a contig
|
|
996 my $fslice = $f->slice();
|
|
997 if(!$fslice) {
|
|
998 throw("Feature does not have attached slice.\n");
|
|
999 }
|
|
1000 my $fseq_region = $fslice->seq_region_name();
|
|
1001 my $fseq_region_id = $fslice->get_seq_region_id();
|
|
1002 my $fcs = $fslice->coord_system();
|
|
1003
|
|
1004 if(!$slice_cs->equals($fcs)) {
|
|
1005 #slice of feature in different coord system, mapping required
|
|
1006
|
|
1007 ($seq_region, $start, $end, $strand) =
|
|
1008 $mapper->fastmap($fseq_region_id,$f->start(),$f->end(),$f->strand(),$fcs);
|
|
1009
|
|
1010 # undefined start means gap
|
|
1011 next if(!defined $start);
|
|
1012 } else {
|
|
1013 $start = $f->start();
|
|
1014 $end = $f->end();
|
|
1015 $strand = $f->strand();
|
|
1016 $seq_region = $f->slice->seq_region_name();
|
|
1017 }
|
|
1018
|
|
1019 # maps to region outside desired area
|
|
1020 next if ($start > $slice_end) || ($end < $slice_start) ||
|
|
1021 ($slice_seq_region ne $seq_region);
|
|
1022
|
|
1023 #shift the feature start, end and strand in one call
|
|
1024 if($slice_strand == -1) {
|
|
1025 $f->move( $slice_end - $end + 1, $slice_end - $start + 1, $strand * -1 );
|
|
1026 } else {
|
|
1027 $f->move( $start - $slice_start + 1, $end - $slice_start + 1, $strand );
|
|
1028 }
|
|
1029
|
|
1030 $f->slice($slice);
|
|
1031
|
|
1032 push @out,$f;
|
|
1033 }
|
|
1034
|
|
1035 return \@out;
|
|
1036 }
|
|
1037
|
|
1038
|
|
1039 #
|
|
1040 # Given a logic name and an existing constraint this will
|
|
1041 # add an analysis table constraint to the feature. Note that if no
|
|
1042 # analysis_id exists in the columns of the primary table then no
|
|
1043 # constraint is added at all
|
|
1044 #
|
|
1045 sub _logic_name_to_constraint {
|
|
1046 my $self = shift;
|
|
1047 my $constraint = shift;
|
|
1048 my $logic_name = shift;
|
|
1049
|
|
1050 return $constraint if(!$logic_name);
|
|
1051
|
|
1052 #make sure that an analysis_id exists in the primary table
|
|
1053 my ($prim_tab) = $self->_tables();
|
|
1054 my $prim_synonym = $prim_tab->[1];
|
|
1055
|
|
1056 my $found_analysis=0;
|
|
1057 foreach my $col ($self->_columns) {
|
|
1058 my ($syn,$col_name) = split(/\./,$col);
|
|
1059 next if($syn ne $prim_synonym);
|
|
1060 if($col_name eq 'analysis_id') {
|
|
1061 $found_analysis = 1;
|
|
1062 last;
|
|
1063 }
|
|
1064 }
|
|
1065
|
|
1066 if(!$found_analysis) {
|
|
1067 warning("This feature is not associated with an analysis.\n" .
|
|
1068 "Ignoring logic_name argument = [$logic_name].\n");
|
|
1069 return $constraint;
|
|
1070 }
|
|
1071
|
|
1072 my $aa = $self->db->get_AnalysisAdaptor();
|
|
1073 my $an = $aa->fetch_by_logic_name($logic_name);
|
|
1074
|
|
1075 if ( !defined($an) ) {
|
|
1076 return undef;
|
|
1077 }
|
|
1078
|
|
1079 my $an_id = $an->dbID();
|
|
1080
|
|
1081 $constraint .= ' AND' if($constraint);
|
|
1082 $constraint .= " ${prim_synonym}.analysis_id = $an_id";
|
|
1083 return $constraint;
|
|
1084 }
|
|
1085
|
|
1086
|
|
1087 =head2 store
|
|
1088
|
|
1089 Arg [1] : list of Bio::EnsEMBL::SeqFeature
|
|
1090 Example : $adaptor->store(@feats);
|
|
1091 Description: ABSTRACT Subclasses are responsible for implementing this
|
|
1092 method. It should take a list of features and store them in
|
|
1093 the database.
|
|
1094 Returntype : none
|
|
1095 Exceptions : thrown method is not implemented by subclass
|
|
1096 Caller : general
|
|
1097 Status : At Risk
|
|
1098 : throws if called.
|
|
1099
|
|
1100 =cut
|
|
1101
|
|
1102 sub store{
|
|
1103 my $self = @_;
|
|
1104
|
|
1105 throw("Abstract method store not defined by implementing subclass\n");
|
|
1106 }
|
|
1107
|
|
1108
|
|
1109 =head2 remove
|
|
1110
|
|
1111 Arg [1] : A feature $feature
|
|
1112 Example : $feature_adaptor->remove($feature);
|
|
1113 Description: This removes a feature from the database. The table the
|
|
1114 feature is removed from is defined by the abstract method
|
|
1115 _tablename, and the primary key of the table is assumed
|
|
1116 to be _tablename() . '_id'. The feature argument must
|
|
1117 be an object implementing the dbID method, and for the
|
|
1118 feature to be removed from the database a dbID value must
|
|
1119 be returned.
|
|
1120 Returntype : none
|
|
1121 Exceptions : thrown if $feature arg does not implement dbID(), or if
|
|
1122 $feature->dbID is not a true value
|
|
1123 Caller : general
|
|
1124 Status : Stable
|
|
1125
|
|
1126 =cut
|
|
1127
|
|
1128
|
|
1129 sub remove {
|
|
1130 my ($self, $feature) = @_;
|
|
1131
|
|
1132 if(!$feature || !ref($feature) || !$feature->isa('Bio::EnsEMBL::Feature')) {
|
|
1133 throw('Feature argument is required');
|
|
1134 }
|
|
1135
|
|
1136 if(!$feature->is_stored($self->db)) {
|
|
1137 throw("This feature is not stored in this database");
|
|
1138 }
|
|
1139
|
|
1140 my @tabs = $self->_tables;
|
|
1141 my ($table) = @{$tabs[0]};
|
|
1142
|
|
1143 my $sth = $self->prepare("DELETE FROM $table WHERE ${table}_id = ?");
|
|
1144 $sth->bind_param(1,$feature->dbID,SQL_INTEGER);
|
|
1145 $sth->execute();
|
|
1146
|
|
1147 #unset the feature dbID ad adaptor
|
|
1148 $feature->dbID(undef);
|
|
1149 $feature->adaptor(undef);
|
|
1150
|
|
1151 return;
|
|
1152 }
|
|
1153
|
|
1154
|
|
1155 =head2 remove_by_Slice
|
|
1156
|
|
1157 Arg [1] : Bio::Ensembl::Slice $slice
|
|
1158 Example : $feature_adaptor->remove_by_Slice($slice);
|
|
1159 Description: This removes features from the database which lie on a region
|
|
1160 represented by the passed in slice. Only features which are
|
|
1161 fully contained by the slice are deleted; features which overlap
|
|
1162 the edge of the slice are not removed.
|
|
1163 The table the features are removed from is defined by
|
|
1164 the abstract method_tablename.
|
|
1165 Returntype : none
|
|
1166 Exceptions : thrown if no slice is supplied
|
|
1167 Caller : general
|
|
1168 Status : Stable
|
|
1169
|
|
1170 =cut
|
|
1171
|
|
1172 sub remove_by_Slice {
|
|
1173 my ($self, $slice) = @_;
|
|
1174
|
|
1175 if(!$slice || !ref($slice) || !($slice->isa('Bio::EnsEMBL::Slice') or $slice->isa('Bio::EnsEMBL::LRGSlice')) ) {
|
|
1176 throw("Slice argument is required");
|
|
1177 }
|
|
1178
|
|
1179 my @tabs = $self->_tables;
|
|
1180 my ($table_name) = @{$tabs[0]};
|
|
1181
|
|
1182 my $seq_region_id = $self->db->get_SliceAdaptor->get_seq_region_id($slice);
|
|
1183 my $start = $slice->start();
|
|
1184 my $end = $slice->end();
|
|
1185
|
|
1186 #
|
|
1187 # Delete only features fully on the slice, not overlapping ones
|
|
1188 #
|
|
1189 my $sth = $self->prepare("DELETE FROM $table_name " .
|
|
1190 "WHERE seq_region_id = ? " .
|
|
1191 "AND seq_region_start >= ? " .
|
|
1192 "AND seq_region_end <= ?");
|
|
1193
|
|
1194 $sth->bind_param(1,$seq_region_id,SQL_INTEGER);
|
|
1195 $sth->bind_param(2,$start,SQL_INTEGER);
|
|
1196 $sth->bind_param(3,$end,SQL_INTEGER);
|
|
1197 $sth->execute();
|
|
1198 $sth->finish();
|
|
1199 }
|
|
1200
|
|
1201
|
|
1202 #
|
|
1203 # Internal function. Allows the max feature length which is normally
|
|
1204 # retrieved from the meta_coord table to be overridden. This allows
|
|
1205 # for some significant optimizations to be put in when it is known
|
|
1206 # that requested features will not be over a certain size.
|
|
1207 #
|
|
1208 sub _max_feature_length {
|
|
1209 my $self = shift;
|
|
1210 return $self->{'_max_feature_length'} = shift if(@_);
|
|
1211 return $self->{'_max_feature_length'};
|
|
1212 }
|
|
1213
|
|
1214
|
|
1215 #
|
|
1216 # Lists all seq_region_ids that a particular feature type is found on.
|
|
1217 # Useful e.g. for finding out which seq_regions have genes.
|
|
1218 # Returns a listref of seq_region_ids.
|
|
1219 #
|
|
1220 sub _list_seq_region_ids {
|
|
1221 my ($self, $table) = @_;
|
|
1222
|
|
1223 my @out;
|
|
1224
|
|
1225 my $sql = qq(
|
|
1226 SELECT DISTINCT
|
|
1227 sr.seq_region_id
|
|
1228 FROM seq_region sr,
|
|
1229 $table a,
|
|
1230 coord_system cs
|
|
1231 WHERE sr.seq_region_id = a.seq_region_id
|
|
1232 AND sr.coord_system_id = cs.coord_system_id
|
|
1233 AND cs.species_id = ?);
|
|
1234
|
|
1235 my $sth = $self->prepare($sql);
|
|
1236
|
|
1237 $sth->bind_param( 1, $self->species_id(), SQL_INTEGER );
|
|
1238
|
|
1239 $sth->execute();
|
|
1240
|
|
1241 while (my ($id) = $sth->fetchrow) {
|
|
1242 push(@out, $id);
|
|
1243 }
|
|
1244
|
|
1245 $sth->finish;
|
|
1246
|
|
1247 return \@out;
|
|
1248 }
|
|
1249
|
|
1250
|
|
1251 =head1 DEPRECATED METHODS
|
|
1252
|
|
1253 =cut
|
|
1254
|
|
1255
|
|
1256 =head2 fetch_all_by_RawContig_constraint
|
|
1257
|
|
1258 Description: DEPRECATED use fetch_all_by_RawContig_constraint instead
|
|
1259
|
|
1260 =cut
|
|
1261
|
|
1262 sub fetch_all_by_RawContig_constraint {
|
|
1263 my $self = shift;
|
|
1264 deprecate('Use fetch_all_by_Slice_constraint() instead.');
|
|
1265 return $self->fetch_all_by_slice_constraint(@_);
|
|
1266 }
|
|
1267
|
|
1268 =head2 fetch_all_by_RawContig
|
|
1269
|
|
1270 Description: DEPRECATED use fetch_all_by_Slice instead
|
|
1271
|
|
1272 =cut
|
|
1273
|
|
1274 sub fetch_all_by_RawContig {
|
|
1275 my $self = shift;
|
|
1276 deprecate('Use fetch_all_by_Slice() instead.');
|
|
1277 return $self->fetch_all_by_Slice(@_);
|
|
1278 }
|
|
1279
|
|
1280 =head2 fetch_all_by_RawContig_and_score
|
|
1281
|
|
1282 Description: DEPRECATED use fetch_all_by_Slice_and_score instead
|
|
1283
|
|
1284 =cut
|
|
1285
|
|
1286 sub fetch_all_by_RawContig_and_score{
|
|
1287 my $self = shift;
|
|
1288 deprecate('Use fetch_all_by_Slice_and_score() instead.');
|
|
1289 return $self->fetch_all_by_Slice_and_score(@_);
|
|
1290 }
|
|
1291
|
|
1292 =head2 remove_by_RawContig
|
|
1293
|
|
1294 Description: DEPRECATED use remove_by_Slice instead
|
|
1295
|
|
1296 =cut
|
|
1297
|
|
1298 sub remove_by_RawContig {
|
|
1299 my $self = shift;
|
|
1300 deprecate("Use remove_by_Slice instead");
|
|
1301 return $self->remove_by_Slice(@_);
|
|
1302 }
|
|
1303
|
|
1304
|
|
1305 sub remove_by_analysis_id {
|
|
1306 my ($self, $analysis_id) = @_;
|
|
1307
|
|
1308 $analysis_id or throw("Must call with analysis id");
|
|
1309
|
|
1310 my @tabs = $self->_tables;
|
|
1311 my ($tablename) = @{$tabs[0]};
|
|
1312
|
|
1313 my $sql = "DELETE FROM $tablename WHERE analysis_id = $analysis_id";
|
|
1314 # warn "SQL : $sql";
|
|
1315
|
|
1316 my $sth = $self->prepare($sql);
|
|
1317 $sth->execute();
|
|
1318 $sth->finish();
|
|
1319 }
|
|
1320
|
|
1321 sub remove_by_feature_id {
|
|
1322 my ($self, $features_list) = @_;
|
|
1323
|
|
1324 my @feats = @$features_list or throw("Must call store with features");
|
|
1325
|
|
1326 my @tabs = $self->_tables;
|
|
1327 my ($tablename) = @{$tabs[0]};
|
|
1328
|
|
1329 my $sql = sprintf "DELETE FROM $tablename WHERE ${tablename}_id IN (%s)", join ', ', @feats;
|
|
1330 # warn "SQL : $sql";
|
|
1331
|
|
1332 my $sth = $self->prepare($sql);
|
|
1333 $sth->execute();
|
|
1334 $sth->finish();
|
|
1335 }
|
|
1336
|
|
1337
|
|
1338 1;
|