comparison variant_effect_predictor/Bio/EnsEMBL/DBSQL/BaseFeatureAdaptor.pm @ 0:1f6dce3d34e0

Uploaded
author mahtabm
date Thu, 11 Apr 2013 02:01:53 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:1f6dce3d34e0
1 =head1 LICENSE
2
3 Copyright (c) 1999-2012 The European Bioinformatics Institute and
4 Genome Research Limited. All rights reserved.
5
6 This software is distributed under a modified Apache license.
7 For license details, please see
8
9 http://www.ensembl.org/info/about/code_licence.html
10
11 =head1 CONTACT
12
13 Please email comments or questions to the public Ensembl
14 developers list at <dev@ensembl.org>.
15
16 Questions may also be sent to the Ensembl help desk at
17 <helpdesk@ensembl.org>.
18
19 =cut
20
21 =head1 NAME
22
23 Bio::EnsEMBL::DBSQL::BaseFeatureAdaptor - An Abstract Base class for all
24 FeatureAdaptors
25
26 =head1 SYNOPSIS
27
28 Abstract class - should not be instantiated. Implementation of
29 abstract methods must be performed by subclasses.
30
31 =head1 DESCRIPTION
32
33 This is a base adaptor for feature adaptors. This base class is simply a way
34 of eliminating code duplication through the implementation of methods
35 common to all feature adaptors.
36
37 =head1 METHODS
38
39 =cut
40
41 package Bio::EnsEMBL::DBSQL::BaseFeatureAdaptor;
42 use vars qw(@ISA @EXPORT);
43 use strict;
44
45 use Bio::EnsEMBL::DBSQL::BaseAdaptor;
46 use Bio::EnsEMBL::Utils::Cache;
47 use Bio::EnsEMBL::Utils::Exception qw(warning throw deprecate stack_trace_dump);
48 use Bio::EnsEMBL::Utils::Argument qw(rearrange);
49 use Bio::EnsEMBL::Utils::Iterator;
50
51 @ISA = qw(Bio::EnsEMBL::DBSQL::BaseAdaptor);
52
53 @EXPORT = (@{$DBI::EXPORT_TAGS{'sql_types'}});
54
55 our $SLICE_FEATURE_CACHE_SIZE = 4;
56 our $MAX_SPLIT_QUERY_SEQ_REGIONS = 3;
57 our $SILENCE_CACHE_WARNINGS = 0;
58
59 =head2 new
60
61 Arg [1] : list of args @args
62 Superclass constructor arguments
63 Example : none
64 Description: Constructor which warns if caching has been switched off
65 Returntype : Bio::EnsEMBL::BaseFeatureAdaptor
66 Exceptions : none
67 Caller : implementing subclass constructors
68 Status : Stable
69
70 =cut
71
72 sub new {
73 my ($class, @args) = @_;
74 my $self = $class->SUPER::new(@args);
75 if ( defined $self->db->no_cache() && $self->db->no_cache() && ! $SILENCE_CACHE_WARNINGS) {
76 warning( "You are using the API without caching most recent features. "
77 . "Performance might be affected." );
78 }
79 return $self;
80 }
81
82 =head2 start_equals_end
83
84 Arg [1] : (optional) boolean $newval
85 Example : $bfa->start_equals_end(1);
86 Description: Getter/Setter for the start_equals_end flag. If set
87 to true sub _slice_fetch will use a simplified sql to retrieve 1bp slices.
88 Returntype : boolean
89 Exceptions : none
90 Caller : EnsemblGenomes variation DB build
91 Status : Stable
92
93 =cut
94
95 sub start_equals_end {
96 my ( $self, $value ) = @_;
97
98 if ( defined($value) ) {
99 $self->{'start_equals_end'} = $value;
100 }
101 return $self->{'start_equals_end'};
102 }
103
104
105 =head2 clear_cache
106
107 Args : None
108 Example : my $sa =
109 $registry->get_adaptor( 'Mus musculus', 'Core',
110 'Slice' );
111 my $ga =
112 $registry->get_adaptor( 'Mus musculus', 'Core',
113 'Gene' );
114
115 my $slice =
116 $sa->fetch_by_region( 'Chromosome', '1', 1e8,
117 1.05e8 );
118
119 my $genes = $ga->fetch_all_by_Slice($slice);
120
121 $ga->clear_cache();
122
123 Description : Empties the feature cache associated with this
124 feature adaptor.
125 Return type : None
126 Exceptions : None
127 Caller : General
128 Status : At risk (under development)
129
130 =cut
131
132 sub clear_cache {
133 my ($self) = @_;
134 %{$self->{_slice_feature_cache}} = ();
135 return;
136 }
137
138 =head2 _slice_feature_cache
139
140 Description : Returns the feature cache if we are allowed to cache and
141 will build it if we need to. We will never return a reference
142 to the hash to avoid unintentional auto-vivfying caching
143 Returntype : Bio::EnsEMBL::Utils::Cache
144 Exceptions : None
145 Caller : Internal
146
147 =cut
148
149 sub _slice_feature_cache {
150 my ($self) = @_;
151 return if $self->db()->no_cache();
152 if(! exists $self->{_slice_feature_cache}) {
153 tie my %cache, 'Bio::EnsEMBL::Utils::Cache', $SLICE_FEATURE_CACHE_SIZE;
154 $self->{_slice_feature_cache} = \%cache;
155 }
156 return $self->{_slice_feature_cache};
157 }
158
159 =head2 fetch_all_by_Slice
160
161 Arg [1] : Bio::EnsEMBL::Slice $slice
162 the slice from which to obtain features
163 Arg [2] : (optional) string $logic_name
164 the logic name of the type of features to obtain
165 Example : $fts = $a->fetch_all_by_Slice($slice, 'Swall');
166 Description: Returns a listref of features created from the database
167 which are on the Slice defined by $slice. If $logic_name is
168 defined only features with an analysis of type $logic_name
169 will be returned.
170 NOTE: only features that are entirely on the slice's seq_region
171 will be returned (i.e. if they hang off the start/end of a
172 seq_region they will be discarded). Features can extend over the
173 slice boundaries though (in cases where you have a slice that
174 doesn't span the whole seq_region).
175 Returntype : listref of Bio::EnsEMBL::SeqFeatures in Slice coordinates
176 Exceptions : none
177 Caller : Bio::EnsEMBL::Slice
178 Status : Stable
179
180 =cut
181
182 sub fetch_all_by_Slice {
183 my ($self, $slice, $logic_name) = @_;
184 #fetch by constraint with empty constraint
185 return $self->fetch_all_by_Slice_constraint($slice, '', $logic_name);
186 }
187
188
189
190 =head2 fetch_Iterator_by_Slice_method
191
192 Arg [1] : CODE ref of Slice fetch method
193 Arg [2] : ARRAY ref of parameters for Slice fetch method
194 Arg [3] : Optional int: Slice index in parameters array
195 Arg [4] : Optional int: Slice chunk size. Default=500000
196 Example : my $slice_iter = $feature_adaptor->fetch_Iterator_by_Slice_method
197 ($feature_adaptor->can('fetch_all_by_Slice_Arrays'),
198 \@fetch_method_params,
199 0,#Slice idx
200 );
201
202 while(my $feature = $slice_iter->next && defined $feature){
203 #Do something here
204 }
205
206 Description: Creates an Iterator which chunks the query Slice to facilitate
207 large Slice queries which would have previously run out of memory
208 Returntype : Bio::EnsEMBL::Utils::Iterator
209 Exceptions : Throws if mandatory params not valid
210 Caller : general
211 Status : at risk
212
213 =cut
214
215 #Does not support Collections. See Funcgen ResultFeatureAdaptor::fetch_collection_Iterator_by_Slice_method
216
217 sub fetch_Iterator_by_Slice_method{
218 my ($self, $slice_method_ref, $params_ref, $slice_idx, $chunk_size) = @_;
219
220 if(! ( defined $slice_method_ref &&
221 ref($slice_method_ref) eq 'CODE')
222 ){
223 throw('Must pass a valid Slice fetch method CODE ref');
224 }
225
226 if (! ($params_ref &&
227 ref($params_ref) eq 'ARRAY')) {
228 #Don't need to check size here so long as we have valid Slice
229 throw('You must pass a method params ARRAYREF');
230 }
231
232 $slice_idx = 0 if(! defined $slice_idx);
233 my $slice = $params_ref->[$slice_idx];
234 $chunk_size ||= 1000000;
235
236 my @feat_cache;
237 my $finished = 0;
238 my $start = 1; #local coord for sub slice
239 my $end = $slice->length;
240 my $num_overlaps = 0;
241
242 my $coderef =
243 sub {
244
245 while (scalar(@feat_cache) == 0 &&
246 ! $finished) {
247
248 my $new_end = ($start + $chunk_size - 1);
249
250 if ($new_end >= $end) {
251 # this is our last chunk
252 $new_end = $end;
253 $finished = 1;
254 }
255
256 #Chunk by sub slicing
257 my $sub_slice = $slice->sub_Slice($start, $new_end);
258 $params_ref->[$slice_idx] = $sub_slice;
259 @feat_cache = @{ $slice_method_ref->($self, @$params_ref)};
260
261 #Remove & count overlapping features
262 splice(@feat_cache, 0, $num_overlaps) if($num_overlaps);
263 my $i;
264
265 if (scalar(@feat_cache) > 0) {
266
267 my $feat_end = $feat_cache[$#feat_cache]->seq_region_end;
268 my $slice_end = $sub_slice->end;
269 $num_overlaps = 0;
270
271 for ($i = $#feat_cache; $i >=0; $i--) {
272
273 if ($feat_end > $slice_end) {
274 $feat_end = $feat_cache[$i]->end;
275 $num_overlaps ++;
276 } else {
277 last;
278 }
279
280 }
281 }
282
283 # update the start coordinate
284 $start = $new_end + 1;
285 }
286
287 #this maybe returning from an undef cache
288 #Need to sub this out even more?
289 return shift @feat_cache;
290 };
291
292 return Bio::EnsEMBL::Utils::Iterator->new($coderef);
293 }
294
295
296 =head2 fetch_Iterator_by_Slice
297
298 Arg [1] : Bio::EnsEMBL::Slice
299 Arg [2] : Optional string: logic name of analysis
300 Arg [3] : Optional int: Chunk size to iterate over. Default is 500000
301 Example : my $slice_iter = $feature_adaptor->fetch_Iterator_by_Slice($slice);
302
303 while(my $feature = $slice_iter->next && defined $feature){
304 #Do something here
305 }
306
307 Description: Creates an Iterator which chunks the query Slice to facilitate
308 large Slice queries which would have previously run out of memory
309 Returntype : Bio::EnsEMBL::Utils::Iterator
310 Exceptions : None
311 Caller : general
312 Status : at risk
313
314 =cut
315
316 sub fetch_Iterator_by_Slice{
317 my ($self, $slice, $logic_name, $chunk_size) = @_;
318
319 my $method_ref = $self->can('fetch_all_by_Slice');
320
321 return $self->fetch_Iterator_by_Slice_method($method_ref, [$slice, $logic_name], 0, $chunk_size);
322 }
323
324
325 =head2 fetch_all_by_Slice_and_score
326
327 Arg [1] : Bio::EnsEMBL::Slice $slice
328 the slice from which to obtain features
329 Arg [2] : (optional) float $score
330 lower bound of the the score of the features retrieved
331 Arg [3] : (optional) string $logic_name
332 the logic name of the type of features to obtain
333 Example : $fts = $a->fetch_all_by_Slice_and_score($slice,90,'Swall');
334 Description: Returns a list of features created from the database which are
335 are on the Slice defined by $slice and which have a score
336 greater than $score. If $logic_name is defined,
337 only features with an analysis of type $logic_name will be
338 returned.
339 Returntype : listref of Bio::EnsEMBL::SeqFeatures in Slice coordinates
340 Exceptions : none
341 Caller : Bio::EnsEMBL::Slice
342 Status : Stable
343
344 =cut
345
346 sub fetch_all_by_Slice_and_score {
347 my ( $self, $slice, $score, $logic_name ) = @_;
348
349 my $constraint;
350 if ( defined($score) ) {
351 # Get the synonym of the primary_table
352 my @tabs = $self->_tables();
353 my $syn = $tabs[0]->[1];
354
355 $constraint = sprintf( "%s.score > %s",
356 $syn,
357 $self->dbc()->db_handle()->quote( $score, SQL_FLOAT ) );
358 }
359
360 return
361 $self->fetch_all_by_Slice_constraint( $slice, $constraint,
362 $logic_name );
363 }
364
365
366 =head2 fetch_all_by_Slice_constraint
367
368 Arg [1] : Bio::EnsEMBL::Slice $slice
369 the slice from which to obtain features
370 Arg [2] : (optional) string $constraint
371 An SQL query constraint (i.e. part of the WHERE clause)
372 Arg [3] : (optional) string $logic_name
373 the logic name of the type of features to obtain
374 Example : $fs = $a->fetch_all_by_Slice_constraint($slc, 'perc_ident > 5');
375 Description: Returns a listref of features created from the database which
376 are on the Slice defined by $slice and fulfill the SQL
377 constraint defined by $constraint. If logic name is defined,
378 only features with an analysis of type $logic_name will be
379 returned.
380 Returntype : listref of Bio::EnsEMBL::SeqFeatures in Slice coordinates
381 Exceptions : thrown if $slice is not defined
382 Caller : Bio::EnsEMBL::Slice
383 Status : Stable
384
385 =cut
386
387 sub fetch_all_by_Slice_constraint {
388 my ( $self, $slice, $constraint, $logic_name ) = @_;
389
390
391 my @result = ();
392
393 if ( !ref($slice)
394 || !( $slice->isa('Bio::EnsEMBL::Slice')
395 or $slice->isa('Bio::EnsEMBL::LRGSlice') ) )
396 {
397 throw("Bio::EnsEMBL::Slice argument expected.");
398 }
399
400 $constraint ||= '';
401 $constraint =
402 $self->_logic_name_to_constraint( $constraint, $logic_name );
403
404 # If the logic name was invalid, undef was returned
405 if ( !defined($constraint) ) { return [] }
406
407 my $key;
408 my $cache;
409
410 # Will only use feature_cache if hasn't been no_cache attribute set
411 if (
412 !( defined( $self->db()->no_cache() ) && $self->db()->no_cache() ) )
413 {
414
415 #strain test and add to constraint if so to stop caching.
416 if ( $slice->isa('Bio::EnsEMBL::StrainSlice') ) {
417 my $string =
418 $self->dbc()->db_handle()->quote( $slice->strain_name() );
419
420 if ( $constraint ne "" ) {
421 $constraint .= " AND $string = $string ";
422 } else {
423 $constraint .= " $string = $string ";
424 }
425 }
426
427 # Check the cache and return the cached results if we have already
428 # done this query. The cache key is the made up from the slice
429 # name, the constraint, and the bound parameters (if there are any).
430 $key = uc( join( ':', $slice->name(), $constraint ) );
431
432 my $bind_params = $self->bind_param_generic_fetch();
433
434 if ( defined($bind_params) ) {
435 $key .= ':'
436 . join( ':', map { $_->[0] . '/' . $_->[1] } @{$bind_params} );
437 }
438
439 $cache = $self->_slice_feature_cache();
440 if ( exists( $cache->{$key} ) ) {
441 # Clear the bound parameters and return the cached data.
442 $self->{'_bind_param_generic_fetch'} = ();
443 return $cache->{$key};
444 }
445 } ## end if ( !( defined( $self...)))
446
447 my $sa = $slice->adaptor();
448
449 # Hap/PAR support: retrieve normalized 'non-symlinked' slices.
450 my @proj = @{ $sa->fetch_normalized_slice_projection($slice) };
451
452
453
454 if ( !@proj ) {
455 throw( 'Could not retrieve normalized Slices. '
456 . 'Database contains incorrect assembly_exception information.'
457 );
458 }
459
460 # Want to get features on the FULL original slice as well as any
461 # symlinked slices.
462
463 # Filter out partial slices from projection that are on same
464 # seq_region as original slice.
465
466 my $sr_id = $slice->get_seq_region_id();
467
468 @proj = grep { $_->to_Slice->get_seq_region_id() != $sr_id } @proj;
469
470 my $segment = bless( [ 1, $slice->length(), $slice ],
471 'Bio::EnsEMBL::ProjectionSegment' );
472 push( @proj, $segment );
473
474 # construct list of Hap/PAR boundaries for entire seq region
475 my @bounds;
476
477 my $ent_slice = $sa->fetch_by_seq_region_id($sr_id);
478 if ( $slice->strand() == -1 ) {
479 $ent_slice = $ent_slice->invert();
480 }
481
482 my @ent_proj =
483 @{ $sa->fetch_normalized_slice_projection($ent_slice) };
484 shift(@ent_proj); # skip first
485
486 @bounds = map { $_->from_start() - $slice->start() + 1 } @ent_proj;
487
488
489 # fetch features for the primary slice AND all symlinked slices
490 foreach my $seg (@proj) {
491
492
493 my $offset = $seg->from_start();
494 my $seg_slice = $seg->to_Slice();
495 my $features =
496 $self->_slice_fetch( $seg_slice, $constraint );
497
498 # If this was a symlinked slice offset the feature coordinates as
499 # needed.
500 if ( $seg_slice->name() ne $slice->name() ) {
501
502 FEATURE:
503 foreach my $f ( @{$features} ) {
504 if ( $offset != 1 ) {
505 $f->{'start'} += $offset - 1;
506 $f->{'end'} += $offset - 1;
507 }
508
509 # discard boundary crossing features from symlinked regions
510 foreach my $bound (@bounds) {
511 if ( $f->{'start'} < $bound && $f->{'end'} >= $bound ) {
512 next FEATURE;
513 }
514 }
515
516 $f->{'slice'} = $slice;
517 push( @result, $f );
518 }
519 } else {
520 push( @result, @{$features} );
521 }
522 } ## end foreach my $seg (@proj)
523
524 # Will only use feature_cache when set attribute no_cache in DBAdaptor
525 if ( defined($key) ) {
526 $cache->{$key} = \@result;
527 }
528
529 return \@result;
530 } ## end sub fetch_all_by_Slice_constraint
531
532
533 =head2 fetch_all_by_logic_name
534
535 Arg [3] : string $logic_name
536 the logic name of the type of features to obtain
537 Example : $fs = $a->fetch_all_by_logic_name('foobar');
538 Description: Returns a listref of features created from the database.
539 only features with an analysis of type $logic_name will
540 be returned. If the logic name is invalid (not in the
541 analysis table), a reference to an empty list will be
542 returned.
543 Returntype : listref of Bio::EnsEMBL::SeqFeatures
544 Exceptions : thrown if no $logic_name
545 Caller : General
546 Status : Stable
547
548 =cut
549
550 sub fetch_all_by_logic_name {
551 my ( $self, $logic_name ) = @_;
552
553 if ( !defined($logic_name) ) {
554 throw("Need a logic_name");
555 }
556
557 my $constraint = $self->_logic_name_to_constraint( '', $logic_name );
558
559 if ( !defined($constraint) ) {
560 warning("Invalid logic name: $logic_name");
561 return [];
562 }
563
564 return $self->generic_fetch($constraint);
565 }
566
567 # Method that creates an object. Called by the _objs_from_sth() method
568 # in the sub-classes (the various feature adaptors). Overridden by the
569 # feature collection classes.
570
571 sub _create_feature {
572 my ( $self, $feature_type, $args ) = @_;
573 return $feature_type->new( %{$args} );
574 }
575
576 # This is the same as the above, but calls the new_fast() constructor of
577 # the feature type.
578
579 sub _create_feature_fast {
580 my ( $self, $feature_type, $args ) = @_;
581 return $feature_type->new_fast($args);
582 }
583
584 #
585 # helper function used by fetch_all_by_Slice_constraint method
586 #
587 sub _slice_fetch {
588 my ( $self, $slice, $orig_constraint ) = @_;
589
590 my $slice_start = $slice->start();
591 my $slice_end = $slice->end();
592 my $slice_strand = $slice->strand();
593 my $slice_cs = $slice->coord_system();
594 my $slice_seq_region = $slice->seq_region_name();
595 my $slice_seq_region_id = $slice->get_seq_region_id();
596
597 #get the synonym and name of the primary_table
598 my @tabs = $self->_tables;
599 my ( $tab_name, $tab_syn ) = @{ $tabs[0] };
600
601 #find out what coordinate systems the features are in
602 my $mcc = $self->db->get_MetaCoordContainer();
603 my @feat_css = ();
604
605 my $mca = $self->db->get_MetaContainer();
606 my $value_list = $mca->list_value_by_key( $tab_name . "build.level" );
607 if ( @$value_list and $slice->is_toplevel() ) {
608 push @feat_css, $slice_cs;
609 } else {
610 @feat_css =
611 @{ $mcc->fetch_all_CoordSystems_by_feature_type($tab_name) };
612 }
613
614 my $asma = $self->db->get_AssemblyMapperAdaptor();
615 my @features;
616
617 # fetch the features from each coordinate system they are stored in
618 COORD_SYSTEM: foreach my $feat_cs (@feat_css) {
619 my $mapper;
620 my @coords;
621 my @ids;
622
623 if ( $feat_cs->equals($slice_cs) ) {
624 # no mapping is required if this is the same coord system
625
626 my $max_len = $self->_max_feature_length()
627 || $mcc->fetch_max_length_by_CoordSystem_feature_type( $feat_cs,
628 $tab_name );
629
630 my $constraint = $orig_constraint;
631
632 my $sr_id;
633 if ( $slice->adaptor() ) {
634 $sr_id = $slice->adaptor()->get_seq_region_id($slice);
635 } else {
636 $sr_id =
637 $self->db()->get_SliceAdaptor()->get_seq_region_id($slice);
638 }
639
640 # If there is mapping information, use the external_seq_region_id
641 # to get features.
642
643 my @sr_ids = ($sr_id);
644
645 while (1) {
646 my $ext_sr_id = $self->get_seq_region_id_external($sr_id);
647
648 if ( $ext_sr_id == $sr_id ) { last }
649
650 push( @sr_ids, $ext_sr_id );
651 $sr_id = $ext_sr_id;
652 }
653
654 $constraint .= " AND " if ($constraint);
655
656
657 $constraint .= "${tab_syn}.seq_region_id IN ("
658 . join( ',', @sr_ids ) . ") AND";
659
660 #faster query for 1bp slices where SNP data is not compressed
661 if ( $self->start_equals_end && $slice_start == $slice_end ) {
662 $constraint .=
663 " AND ${tab_syn}.seq_region_start = $slice_end" .
664 " AND ${tab_syn}.seq_region_end = $slice_start";
665
666 } else {
667
668 if ( !$slice->is_circular() ) {
669 # Deal with the default case of a non-circular chromosome.
670 $constraint .= " ${tab_syn}.seq_region_start <= $slice_end AND "
671 . "${tab_syn}.seq_region_end >= $slice_start";
672
673 if ( $max_len ) {
674 my $min_start = $slice_start - $max_len;
675 $constraint .= " AND ${tab_syn}.seq_region_start >= $min_start";
676 }
677
678 } else {
679 # Deal with the case of a circular chromosome.
680 if ( $slice_start > $slice_end ) {
681 $constraint .= " ( ${tab_syn}.seq_region_start >= $slice_start "
682 . "OR ${tab_syn}.seq_region_start <= $slice_end "
683 . "OR ${tab_syn}.seq_region_end >= $slice_start "
684 . "OR ${tab_syn}.seq_region_end <= $slice_end "
685 . "OR ${tab_syn}.seq_region_start > ${tab_syn}.seq_region_end)";
686
687 } else {
688 $constraint .= " ((${tab_syn}.seq_region_start <= $slice_end "
689 . "AND ${tab_syn}.seq_region_end >= $slice_start) "
690 . "OR (${tab_syn}.seq_region_start > ${tab_syn}.seq_region_end "
691 . "AND (${tab_syn}.seq_region_start <= $slice_end "
692 . "OR ${tab_syn}.seq_region_end >= $slice_start)))";
693 }
694 }
695
696 }
697
698 my $fs = $self->generic_fetch( $constraint, undef, $slice );
699
700 # features may still have to have coordinates made relative to slice
701 # start
702 $fs = $self->_remap( $fs, $mapper, $slice );
703
704 push @features, @$fs;
705 } else {
706 $mapper = $asma->fetch_by_CoordSystems( $slice_cs, $feat_cs );
707
708 next unless defined $mapper;
709
710 # Get list of coordinates and corresponding internal ids for
711 # regions the slice spans
712 @coords =
713 $mapper->map( $slice_seq_region, $slice_start, $slice_end,
714 $slice_strand, $slice_cs );
715
716 @coords = grep { !$_->isa('Bio::EnsEMBL::Mapper::Gap') } @coords;
717
718 next COORD_SYSTEM if ( !@coords );
719
720 @ids = map { $_->id() } @coords;
721 #coords are now id rather than name
722 # @ids = @{$asma->seq_regions_to_ids($feat_cs, \@ids)};
723
724 # When regions are large and only partially spanned
725 # by slice it is faster to to limit the query with
726 # start and end constraints. Take simple approach:
727 # use regional constraints if there are less than a
728 # specific number of regions covered.
729
730 if ( @coords > $MAX_SPLIT_QUERY_SEQ_REGIONS ) {
731 my $constraint = $orig_constraint;
732 my $id_str = join( ',', @ids );
733 $constraint .= " AND " if ($constraint);
734 $constraint .= "${tab_syn}.seq_region_id IN ($id_str)";
735 my $fs = $self->generic_fetch( $constraint, $mapper, $slice );
736
737 $fs = $self->_remap( $fs, $mapper, $slice );
738
739 push @features, @$fs;
740
741 } else {
742 # do multiple split queries using start / end constraints
743
744 my $max_len = (
745 $self->_max_feature_length()
746 || $mcc->fetch_max_length_by_CoordSystem_feature_type(
747 $feat_cs, $tab_name
748 ) );
749
750 my $len = @coords;
751 for ( my $i = 0; $i < $len; $i++ ) {
752 my $constraint = $orig_constraint;
753 $constraint .= " AND " if ($constraint);
754 $constraint .=
755 "${tab_syn}.seq_region_id = "
756 . $ids[$i] . " AND "
757 . "${tab_syn}.seq_region_start <= "
758 . $coords[$i]->end() . " AND "
759 . "${tab_syn}.seq_region_end >= "
760 . $coords[$i]->start();
761
762 if ($max_len) {
763 my $min_start = $coords[$i]->start() - $max_len;
764 $constraint .=
765 " AND ${tab_syn}.seq_region_start >= $min_start";
766 }
767 my $fs = $self->generic_fetch( $constraint, $mapper, $slice );
768
769 $fs = $self->_remap( $fs, $mapper, $slice );
770
771 push @features, @$fs;
772 }
773 } ## end else [ if ( @coords > $MAX_SPLIT_QUERY_SEQ_REGIONS)]
774 } ## end else [ if ( $feat_cs->equals(...))]
775 } ## end foreach my $feat_cs (@feat_css)
776
777 return \@features;
778 } ## end sub _slice_fetch
779
780
781 #for a given seq_region_id, gets the one used in an external database, if present, otherwise, returns the internal one
782 sub get_seq_region_id_external {
783 my ( $self, $sr_id ) = @_;
784 my $cs_a = $self->db()->get_CoordSystemAdaptor();
785 return ( exists( $cs_a->{'_internal_seq_region_mapping'}->{$sr_id} )
786 ? $cs_a->{'_internal_seq_region_mapping'}->{$sr_id}
787 : $sr_id );
788 }
789
790 #for a given seq_region_id and coord_system, gets the one used in the internal (core) database
791 sub get_seq_region_id_internal{
792 my ( $self, $sr_id ) = @_;
793 my $cs_a = $self->db()->get_CoordSystemAdaptor();
794 return ( exists $cs_a->{'_external_seq_region_mapping'}->{$sr_id}
795 ? $cs_a->{'_external_seq_region_mapping'}->{$sr_id}
796 : $sr_id);
797 }
798
799 #
800 # Helper function containing some common feature storing functionality
801 #
802 # Given a Feature this will return a copy (or the same feature if no changes
803 # to the feature are needed) of the feature which is relative to the start
804 # of the seq_region it is on. The seq_region_id of the seq_region it is on
805 # is also returned.
806 #
807 # This method will also ensure that the database knows which coordinate
808 # systems that this feature is stored in.
809 #
810
811 sub _pre_store {
812 my $self = shift;
813 my $feature = shift;
814
815 if(!ref($feature) || !$feature->isa('Bio::EnsEMBL::Feature')) {
816 throw('Expected Feature argument.');
817 }
818 my $slice = $feature->slice();
819
820 $self->_check_start_end_strand($feature->start(),$feature->end(),
821 $feature->strand(), $slice);
822
823
824 my $db = $self->db();
825
826 my $slice_adaptor = $db->get_SliceAdaptor();
827
828 if(!ref($slice) || !($slice->isa('Bio::EnsEMBL::Slice') or $slice->isa('Bio::EnsEMBL::LRGSlice')) ) {
829 throw('Feature must be attached to Slice to be stored.');
830 }
831
832 # make sure feature coords are relative to start of entire seq_region
833
834 if($slice->start != 1 || $slice->strand != 1) {
835 #move feature onto a slice of the entire seq_region
836 $slice = $slice_adaptor->fetch_by_region($slice->coord_system->name(),
837 $slice->seq_region_name(),
838 undef, #start
839 undef, #end
840 undef, #strand
841 $slice->coord_system->version());
842
843 $feature = $feature->transfer($slice);
844
845 if(!$feature) {
846 throw('Could not transfer Feature to slice of ' .
847 'entire seq_region prior to storing');
848 }
849 }
850
851 # Ensure this type of feature is known to be stored in this coord system.
852 my $cs = $slice->coord_system;
853
854 my ($tab) = $self->_tables();
855 my $tabname = $tab->[0];
856
857 my $mcc = $db->get_MetaCoordContainer();
858
859 $mcc->add_feature_type($cs, $tabname, $feature->length);
860
861 my $seq_region_id = $slice_adaptor->get_seq_region_id($slice);
862
863 if(!$seq_region_id) {
864 throw('Feature is associated with seq_region which is not in this DB.');
865 }
866
867 return ($feature, $seq_region_id);
868 }
869
870
871 # The same function as _pre_store
872 # This one is used to store user uploaded features in XXX_userdata db
873
874 sub _pre_store_userdata {
875 my $self = shift;
876 my $feature = shift;
877
878 if(!ref($feature) || !$feature->isa('Bio::EnsEMBL::Feature')) {
879 throw('Expected Feature argument.');
880 }
881
882 my $slice = $feature->slice();
883 my $slice_adaptor = $slice->adaptor;
884
885 $self->_check_start_end_strand($feature->start(),$feature->end(),
886 $feature->strand(), $slice);
887
888
889 if(!ref($slice) || !($slice->isa('Bio::EnsEMBL::Slice') or $slice->isa('Bio::EnsEMBL::LRGSlice')) ) {
890 throw('Feature must be attached to Slice to be stored.');
891 }
892
893 # make sure feature coords are relative to start of entire seq_region
894
895 if($slice->start != 1 || $slice->strand != 1) {
896 #move feature onto a slice of the entire seq_region
897 $slice = $slice_adaptor->fetch_by_region($slice->coord_system->name(),
898 $slice->seq_region_name(),
899 undef, #start
900 undef, #end
901 undef, #strand
902 $slice->coord_system->version());
903
904 $feature = $feature->transfer($slice);
905
906 if(!$feature) {
907 throw('Could not transfer Feature to slice of ' .
908 'entire seq_region prior to storing');
909 }
910 }
911
912 # Ensure this type of feature is known to be stored in this coord system.
913 my $cs = $slice->coord_system;
914
915 my ($tab) = $self->_tables();
916 my $tabname = $tab->[0];
917
918 my $db = $self->db;
919 my $mcc = $db->get_MetaCoordContainer();
920
921 $mcc->add_feature_type($cs, $tabname, $feature->length);
922
923 my $seq_region_id = $slice_adaptor->get_seq_region_id($slice);
924
925 if(!$seq_region_id) {
926 throw('Feature is associated with seq_region which is not in this DB.');
927 }
928
929 return ($feature, $seq_region_id);
930 }
931
932
933 #
934 # helper function used to validate start/end/strand and
935 # hstart/hend/hstrand etc.
936 #
937 sub _check_start_end_strand {
938 my $self = shift;
939 my $start = shift;
940 my $end = shift;
941 my $strand = shift;
942 my $slice = shift;
943
944 #
945 # Make sure that the start, end, strand are valid
946 #
947 if(int($start) != $start) {
948 throw("Invalid Feature start [$start]. Must be integer.");
949 }
950 if(int($end) != $end) {
951 throw("Invalid Feature end [$end]. Must be integer.");
952 }
953 if(int($strand) != $strand || $strand < -1 || $strand > 1) {
954 throw("Invalid Feature strand [$strand]. Must be -1, 0 or 1.");
955 }
956 if($end < $start && !$slice->is_circular()) {
957 throw("Invalid Feature start/end [$start/$end]. Start must be less " .
958 "than or equal to end.");
959 }
960
961 return 1;
962 }
963
964
965 #
966 # Given a list of features checks if they are in the correct coord system
967 # by looking at the first features slice. If they are not then they are
968 # converted and placed on the slice.
969 #
970 sub _remap {
971 my ( $self, $features, $mapper, $slice ) = @_;
972
973 #check if any remapping is actually needed
974 if(@$features && (!$features->[0]->isa('Bio::EnsEMBL::Feature') ||
975 $features->[0]->slice == $slice)) {
976 return $features;
977 }
978
979 #remapping has not been done, we have to do our own conversion from
980 #to slice coords
981
982 my @out;
983
984 my $slice_start = $slice->start();
985 my $slice_end = $slice->end();
986 my $slice_strand = $slice->strand();
987 my $slice_cs = $slice->coord_system();
988
989 my ($seq_region, $start, $end, $strand);
990
991 my $slice_seq_region_id = $slice->get_seq_region_id();
992 my $slice_seq_region = $slice->seq_region_name();
993
994 foreach my $f (@$features) {
995 #since feats were obtained in contig coords, attached seq is a contig
996 my $fslice = $f->slice();
997 if(!$fslice) {
998 throw("Feature does not have attached slice.\n");
999 }
1000 my $fseq_region = $fslice->seq_region_name();
1001 my $fseq_region_id = $fslice->get_seq_region_id();
1002 my $fcs = $fslice->coord_system();
1003
1004 if(!$slice_cs->equals($fcs)) {
1005 #slice of feature in different coord system, mapping required
1006
1007 ($seq_region, $start, $end, $strand) =
1008 $mapper->fastmap($fseq_region_id,$f->start(),$f->end(),$f->strand(),$fcs);
1009
1010 # undefined start means gap
1011 next if(!defined $start);
1012 } else {
1013 $start = $f->start();
1014 $end = $f->end();
1015 $strand = $f->strand();
1016 $seq_region = $f->slice->seq_region_name();
1017 }
1018
1019 # maps to region outside desired area
1020 next if ($start > $slice_end) || ($end < $slice_start) ||
1021 ($slice_seq_region ne $seq_region);
1022
1023 #shift the feature start, end and strand in one call
1024 if($slice_strand == -1) {
1025 $f->move( $slice_end - $end + 1, $slice_end - $start + 1, $strand * -1 );
1026 } else {
1027 $f->move( $start - $slice_start + 1, $end - $slice_start + 1, $strand );
1028 }
1029
1030 $f->slice($slice);
1031
1032 push @out,$f;
1033 }
1034
1035 return \@out;
1036 }
1037
1038
1039 #
1040 # Given a logic name and an existing constraint this will
1041 # add an analysis table constraint to the feature. Note that if no
1042 # analysis_id exists in the columns of the primary table then no
1043 # constraint is added at all
1044 #
1045 sub _logic_name_to_constraint {
1046 my $self = shift;
1047 my $constraint = shift;
1048 my $logic_name = shift;
1049
1050 return $constraint if(!$logic_name);
1051
1052 #make sure that an analysis_id exists in the primary table
1053 my ($prim_tab) = $self->_tables();
1054 my $prim_synonym = $prim_tab->[1];
1055
1056 my $found_analysis=0;
1057 foreach my $col ($self->_columns) {
1058 my ($syn,$col_name) = split(/\./,$col);
1059 next if($syn ne $prim_synonym);
1060 if($col_name eq 'analysis_id') {
1061 $found_analysis = 1;
1062 last;
1063 }
1064 }
1065
1066 if(!$found_analysis) {
1067 warning("This feature is not associated with an analysis.\n" .
1068 "Ignoring logic_name argument = [$logic_name].\n");
1069 return $constraint;
1070 }
1071
1072 my $aa = $self->db->get_AnalysisAdaptor();
1073 my $an = $aa->fetch_by_logic_name($logic_name);
1074
1075 if ( !defined($an) ) {
1076 return undef;
1077 }
1078
1079 my $an_id = $an->dbID();
1080
1081 $constraint .= ' AND' if($constraint);
1082 $constraint .= " ${prim_synonym}.analysis_id = $an_id";
1083 return $constraint;
1084 }
1085
1086
1087 =head2 store
1088
1089 Arg [1] : list of Bio::EnsEMBL::SeqFeature
1090 Example : $adaptor->store(@feats);
1091 Description: ABSTRACT Subclasses are responsible for implementing this
1092 method. It should take a list of features and store them in
1093 the database.
1094 Returntype : none
1095 Exceptions : thrown method is not implemented by subclass
1096 Caller : general
1097 Status : At Risk
1098 : throws if called.
1099
1100 =cut
1101
1102 sub store{
1103 my $self = @_;
1104
1105 throw("Abstract method store not defined by implementing subclass\n");
1106 }
1107
1108
1109 =head2 remove
1110
1111 Arg [1] : A feature $feature
1112 Example : $feature_adaptor->remove($feature);
1113 Description: This removes a feature from the database. The table the
1114 feature is removed from is defined by the abstract method
1115 _tablename, and the primary key of the table is assumed
1116 to be _tablename() . '_id'. The feature argument must
1117 be an object implementing the dbID method, and for the
1118 feature to be removed from the database a dbID value must
1119 be returned.
1120 Returntype : none
1121 Exceptions : thrown if $feature arg does not implement dbID(), or if
1122 $feature->dbID is not a true value
1123 Caller : general
1124 Status : Stable
1125
1126 =cut
1127
1128
1129 sub remove {
1130 my ($self, $feature) = @_;
1131
1132 if(!$feature || !ref($feature) || !$feature->isa('Bio::EnsEMBL::Feature')) {
1133 throw('Feature argument is required');
1134 }
1135
1136 if(!$feature->is_stored($self->db)) {
1137 throw("This feature is not stored in this database");
1138 }
1139
1140 my @tabs = $self->_tables;
1141 my ($table) = @{$tabs[0]};
1142
1143 my $sth = $self->prepare("DELETE FROM $table WHERE ${table}_id = ?");
1144 $sth->bind_param(1,$feature->dbID,SQL_INTEGER);
1145 $sth->execute();
1146
1147 #unset the feature dbID ad adaptor
1148 $feature->dbID(undef);
1149 $feature->adaptor(undef);
1150
1151 return;
1152 }
1153
1154
1155 =head2 remove_by_Slice
1156
1157 Arg [1] : Bio::Ensembl::Slice $slice
1158 Example : $feature_adaptor->remove_by_Slice($slice);
1159 Description: This removes features from the database which lie on a region
1160 represented by the passed in slice. Only features which are
1161 fully contained by the slice are deleted; features which overlap
1162 the edge of the slice are not removed.
1163 The table the features are removed from is defined by
1164 the abstract method_tablename.
1165 Returntype : none
1166 Exceptions : thrown if no slice is supplied
1167 Caller : general
1168 Status : Stable
1169
1170 =cut
1171
1172 sub remove_by_Slice {
1173 my ($self, $slice) = @_;
1174
1175 if(!$slice || !ref($slice) || !($slice->isa('Bio::EnsEMBL::Slice') or $slice->isa('Bio::EnsEMBL::LRGSlice')) ) {
1176 throw("Slice argument is required");
1177 }
1178
1179 my @tabs = $self->_tables;
1180 my ($table_name) = @{$tabs[0]};
1181
1182 my $seq_region_id = $self->db->get_SliceAdaptor->get_seq_region_id($slice);
1183 my $start = $slice->start();
1184 my $end = $slice->end();
1185
1186 #
1187 # Delete only features fully on the slice, not overlapping ones
1188 #
1189 my $sth = $self->prepare("DELETE FROM $table_name " .
1190 "WHERE seq_region_id = ? " .
1191 "AND seq_region_start >= ? " .
1192 "AND seq_region_end <= ?");
1193
1194 $sth->bind_param(1,$seq_region_id,SQL_INTEGER);
1195 $sth->bind_param(2,$start,SQL_INTEGER);
1196 $sth->bind_param(3,$end,SQL_INTEGER);
1197 $sth->execute();
1198 $sth->finish();
1199 }
1200
1201
1202 #
1203 # Internal function. Allows the max feature length which is normally
1204 # retrieved from the meta_coord table to be overridden. This allows
1205 # for some significant optimizations to be put in when it is known
1206 # that requested features will not be over a certain size.
1207 #
1208 sub _max_feature_length {
1209 my $self = shift;
1210 return $self->{'_max_feature_length'} = shift if(@_);
1211 return $self->{'_max_feature_length'};
1212 }
1213
1214
1215 #
1216 # Lists all seq_region_ids that a particular feature type is found on.
1217 # Useful e.g. for finding out which seq_regions have genes.
1218 # Returns a listref of seq_region_ids.
1219 #
1220 sub _list_seq_region_ids {
1221 my ($self, $table) = @_;
1222
1223 my @out;
1224
1225 my $sql = qq(
1226 SELECT DISTINCT
1227 sr.seq_region_id
1228 FROM seq_region sr,
1229 $table a,
1230 coord_system cs
1231 WHERE sr.seq_region_id = a.seq_region_id
1232 AND sr.coord_system_id = cs.coord_system_id
1233 AND cs.species_id = ?);
1234
1235 my $sth = $self->prepare($sql);
1236
1237 $sth->bind_param( 1, $self->species_id(), SQL_INTEGER );
1238
1239 $sth->execute();
1240
1241 while (my ($id) = $sth->fetchrow) {
1242 push(@out, $id);
1243 }
1244
1245 $sth->finish;
1246
1247 return \@out;
1248 }
1249
1250
1251 =head1 DEPRECATED METHODS
1252
1253 =cut
1254
1255
1256 =head2 fetch_all_by_RawContig_constraint
1257
1258 Description: DEPRECATED use fetch_all_by_RawContig_constraint instead
1259
1260 =cut
1261
1262 sub fetch_all_by_RawContig_constraint {
1263 my $self = shift;
1264 deprecate('Use fetch_all_by_Slice_constraint() instead.');
1265 return $self->fetch_all_by_slice_constraint(@_);
1266 }
1267
1268 =head2 fetch_all_by_RawContig
1269
1270 Description: DEPRECATED use fetch_all_by_Slice instead
1271
1272 =cut
1273
1274 sub fetch_all_by_RawContig {
1275 my $self = shift;
1276 deprecate('Use fetch_all_by_Slice() instead.');
1277 return $self->fetch_all_by_Slice(@_);
1278 }
1279
1280 =head2 fetch_all_by_RawContig_and_score
1281
1282 Description: DEPRECATED use fetch_all_by_Slice_and_score instead
1283
1284 =cut
1285
1286 sub fetch_all_by_RawContig_and_score{
1287 my $self = shift;
1288 deprecate('Use fetch_all_by_Slice_and_score() instead.');
1289 return $self->fetch_all_by_Slice_and_score(@_);
1290 }
1291
1292 =head2 remove_by_RawContig
1293
1294 Description: DEPRECATED use remove_by_Slice instead
1295
1296 =cut
1297
1298 sub remove_by_RawContig {
1299 my $self = shift;
1300 deprecate("Use remove_by_Slice instead");
1301 return $self->remove_by_Slice(@_);
1302 }
1303
1304
1305 sub remove_by_analysis_id {
1306 my ($self, $analysis_id) = @_;
1307
1308 $analysis_id or throw("Must call with analysis id");
1309
1310 my @tabs = $self->_tables;
1311 my ($tablename) = @{$tabs[0]};
1312
1313 my $sql = "DELETE FROM $tablename WHERE analysis_id = $analysis_id";
1314 # warn "SQL : $sql";
1315
1316 my $sth = $self->prepare($sql);
1317 $sth->execute();
1318 $sth->finish();
1319 }
1320
1321 sub remove_by_feature_id {
1322 my ($self, $features_list) = @_;
1323
1324 my @feats = @$features_list or throw("Must call store with features");
1325
1326 my @tabs = $self->_tables;
1327 my ($tablename) = @{$tabs[0]};
1328
1329 my $sql = sprintf "DELETE FROM $tablename WHERE ${tablename}_id IN (%s)", join ', ', @feats;
1330 # warn "SQL : $sql";
1331
1332 my $sth = $self->prepare($sql);
1333 $sth->execute();
1334 $sth->finish();
1335 }
1336
1337
1338 1;