comparison variant_effect_predictor/Bio/EnsEMBL/DBSQL/AssemblyExceptionFeatureAdaptor.pm @ 0:1f6dce3d34e0

Uploaded
author mahtabm
date Thu, 11 Apr 2013 02:01:53 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:1f6dce3d34e0
1 =head1 LICENSE
2
3 Copyright (c) 1999-2012 The European Bioinformatics Institute and
4 Genome Research Limited. All rights reserved.
5
6 This software is distributed under a modified Apache license.
7 For license details, please see
8
9 http://www.ensembl.org/info/about/code_licence.html
10
11 =head1 CONTACT
12
13 Please email comments or questions to the public Ensembl
14 developers list at <dev@ensembl.org>.
15
16 Questions may also be sent to the Ensembl help desk at
17 <helpdesk@ensembl.org>.
18
19 =cut
20
21 =head1 NAME
22
23 Bio::EnsEMBL::DBSQL::AssemblyExceptionFeatureAdaptor
24
25 =head1 SYNOPSIS
26
27 my $assembly_exception_feature_adaptor =
28 $database_adaptor->get_AssemblyExceptionFeatureAdaptor();
29
30 @assembly_exception_features =
31 $assembly_exception_feature_adaptor->fetch_all_by_Slice($slice);
32
33 =head1 DESCRIPTION
34
35 Assembly Exception Feature Adaptor - database access for assembly
36 exception features.
37
38 =head1 METHODS
39
40 =cut
41
42 package Bio::EnsEMBL::DBSQL::AssemblyExceptionFeatureAdaptor;
43
44 use strict;
45 use warnings;
46 no warnings qw(uninitialized);
47
48 use Bio::EnsEMBL::DBSQL::BaseAdaptor;
49 use Bio::EnsEMBL::DBSQL::BaseFeatureAdaptor;
50 use Bio::EnsEMBL::AssemblyExceptionFeature;
51 use Bio::EnsEMBL::Utils::Exception qw(throw warning);
52 use Bio::EnsEMBL::Utils::Cache;
53
54 our @ISA = qw(Bio::EnsEMBL::DBSQL::BaseAdaptor);
55
56 # set the number of slices you'd like to cache
57 our $ASSEMBLY_EXCEPTION_FEATURE_CACHE_SIZE = 100;
58
59 =head2 new
60
61 Arg [1] : list of args @args
62 Superclass constructor arguments
63 Example : none
64 Description: Constructor which just initializes internal cache structures
65 Returntype : Bio::EnsEMBL::DBSQL::AssemblyExceptionFeatureAdaptor
66 Exceptions : none
67 Caller : implementing subclass constructors
68 Status : Stable
69
70 =cut
71
72 sub new {
73 my $caller = shift;
74 my $class = ref($caller) || $caller;
75
76 my $self = $class->SUPER::new(@_);
77
78 # initialize an LRU cache for slices
79 my %cache;
80 tie(%cache, 'Bio::EnsEMBL::Utils::Cache',
81 $ASSEMBLY_EXCEPTION_FEATURE_CACHE_SIZE);
82
83 $self->{'_aexc_slice_cache'} = \%cache;
84
85 return $self;
86 }
87
88 =head2 fetch_all
89
90 Arg [1] : none
91 Example : my @axfs = @{$axfa->fetch_all()};
92 Description: Retrieves all assembly exception features which are in the
93 database and builds internal caches of the features.
94 Returntype : reference to list of Bio::EnsEMBL::AssemblyExceptionFeatures
95 Exceptions : none
96 Caller : fetch_by_dbID, fetch_by_Slice
97 Status : Stable
98
99 =cut
100
101 sub fetch_all {
102 my $self = shift;
103
104 # this is the "global" cache for all assembly exception features in the db
105 if(defined($self->{'_aexc_cache'})) {
106 return $self->{'_aexc_cache'};
107 }
108
109 my $statement = qq(
110 SELECT ae.assembly_exception_id,
111 ae.seq_region_id,
112 ae.seq_region_start,
113 ae.seq_region_end,
114 ae.exc_type,
115 ae.exc_seq_region_id,
116 ae.exc_seq_region_start,
117 ae.exc_seq_region_end,
118 ae.ori
119 FROM assembly_exception ae,
120 coord_system cs,
121 seq_region sr
122 WHERE cs.species_id = ?
123 AND sr.coord_system_id = cs.coord_system_id
124 AND sr.seq_region_id = ae.seq_region_id);
125
126 my $sth = $self->prepare($statement);
127
128 $sth->bind_param( 1, $self->species_id(), SQL_INTEGER );
129
130 $sth->execute();
131
132 my ($ax_id, $sr_id, $sr_start, $sr_end,
133 $x_type, $x_sr_id, $x_sr_start, $x_sr_end, $ori);
134
135 $sth->bind_columns(\$ax_id, \$sr_id, \$sr_start, \$sr_end,
136 \$x_type, \$x_sr_id, \$x_sr_start, \$x_sr_end, \$ori);
137
138 my @features;
139 my $sa = $self->db()->get_SliceAdaptor();
140
141 $self->{'_aexc_dbID_cache'} = {};
142
143 while($sth->fetch()) {
144 my $slice = $sa->fetch_by_seq_region_id($sr_id);
145 my $x_slice = $sa->fetch_by_seq_region_id($x_sr_id);
146
147 # each row creates TWO features, each of which has alternate_slice
148 # pointing to the "other" one
149
150
151 my $a = Bio::EnsEMBL::AssemblyExceptionFeature->new
152 ('-dbID' => $ax_id,
153 '-start' => $sr_start,
154 '-end' => $sr_end,
155 '-strand' => 1,
156 '-adaptor' => $self,
157 '-slice' => $slice,
158 '-alternate_slice' => $x_slice->sub_Slice($x_sr_start, $x_sr_end),
159 '-type' => $x_type);
160
161 push @features, $a;
162 $self->{'_aexc_dbID_cache'}->{$ax_id} = $a;
163
164 push @features, Bio::EnsEMBL::AssemblyExceptionFeature->new
165 ('-dbID' => $ax_id,
166 '-start' => $x_sr_start,
167 '-end' => $x_sr_end,
168 '-strand' => 1,
169 '-adaptor' => $self,
170 '-slice' => $x_slice,
171 '-alternate_slice' => $slice->sub_Slice($sr_start, $sr_end),
172 '-type' => "$x_type REF" );
173 }
174
175 $sth->finish();
176
177 $self->{'_aexc_cache'} = \@features;
178
179 return \@features;
180 }
181
182
183 =head2 fetch_by_dbID
184
185 Arg [1] : int $dbID
186 Example : my $axf = $axfa->fetch_by_dbID(3);
187 Description: Retrieves a single assembly exception feature via its internal
188 identifier. Note that this only retrieves one of the two
189 assembly exception features which are represented by a single
190 row in the assembly_exception table.
191 Returntype : Bio::EnsEMBL::AssemblyExceptionFeature
192 Exceptions : none
193 Caller : general
194 Status : Stable
195
196 =cut
197
198 sub fetch_by_dbID {
199 my $self = shift;
200 my $dbID = shift;
201
202 if(!exists($self->{'_aexc_dbID_cache'})) {
203 # force loading of cache
204 $self->fetch_all();
205 }
206
207 return $self->{'_aexc_dbID_cache'}->{$dbID};
208 }
209
210
211 =head2 fetch_all_by_Slice
212
213 Arg [1] : Bio::EnsEMBL::Slice $slice
214 Example : my @axfs = @{$axfa->fetch_all_by_Slice($slice)};
215 Description: Retrieves all assembly exception features which overlap the
216 provided slice. The returned features will be in coordinate
217 system of the slice.
218 Returntype : reference to list of Bio::EnsEMBL::AssemblyException features
219 Exceptions : none
220 Caller : Feature::get_all_alt_locations, general
221 Status : Stable
222
223 =cut
224
225 sub fetch_all_by_Slice {
226 my $self = shift;
227 my $slice = shift;
228
229 my $key= uc($slice->name());
230
231 # return features from the slice cache if present
232 if(exists($self->{'_aexc_slice_cache'}->{$key})) {
233 return $self->{'_aexc_slice_cache'}->{$key};
234 }
235
236 my $all_features = $self->fetch_all();
237
238 my $mcc = $self->db()->get_MetaCoordContainer();
239 my $css = $mcc->fetch_all_CoordSystems_by_feature_type('assembly_exception');
240
241 my @features;
242
243 my $ma = $self->db()->get_AssemblyMapperAdaptor();
244
245 foreach my $cs (@$css) {
246 my $mapper;
247 if($cs->equals($slice->coord_system)) {
248 $mapper = undef;
249 } else {
250 $mapper = $ma->fetch_by_CoordSystems($cs,$slice->coord_system());
251 }
252
253 push @features, @{ $self->_remap($all_features, $mapper, $slice) };
254 }
255
256 $self->{'_aexc_slice_cache'}->{$key} = \@features;
257
258 return \@features;
259 }
260
261
262 #
263 # Given a list of features checks if they are in the correct coord system
264 # by looking at the first features slice. If they are not then they are
265 # converted and placed on the slice.
266 #
267 # Note that this is a re-implementation of a method with the same name in
268 # BaseFeatureAdaptor, and in contrast to the latter which maps features in
269 # place, this method returns a remapped copy of each feature. The reason for
270 # this is to get around conflicts with caching.
271 #
272 sub _remap {
273 my ($self, $features, $mapper, $slice) = @_;
274
275 # check if any remapping is actually needed
276 if(@$features && (!$features->[0]->isa('Bio::EnsEMBL::Feature') ||
277 $features->[0]->slice == $slice)) {
278 return $features;
279 }
280
281 # remapping has not been done, we have to do our own conversion from
282 # to slice coords
283
284 my @out;
285
286 my $slice_start = $slice->start();
287 my $slice_end = $slice->end();
288 my $slice_strand = $slice->strand();
289 my $slice_cs = $slice->coord_system();
290
291 my ($seq_region, $start, $end, $strand);
292
293 my $slice_seq_region = $slice->seq_region_name();
294
295 foreach my $f (@$features) {
296 # since feats were obtained in contig coords, attached seq is a contig
297 my $fslice = $f->slice();
298 if(!$fslice) {
299 throw("Feature does not have attached slice.\n");
300 }
301 my $fseq_region = $fslice->seq_region_name();
302 my $fcs = $fslice->coord_system();
303
304 if(!$slice_cs->equals($fcs)) {
305 # slice of feature in different coord system, mapping required
306 ($seq_region, $start, $end, $strand) =
307 $mapper->fastmap($fseq_region,$f->start(),$f->end(),$f->strand(),$fcs);
308
309 # undefined start means gap
310 next if(!defined $start);
311 } else {
312 $start = $f->start();
313 $end = $f->end();
314 $strand = $f->strand();
315 $seq_region = $f->slice->seq_region_name();
316 }
317
318 # maps to region outside desired area
319 next if ($start > $slice_end) || ($end < $slice_start) ||
320 ($slice_seq_region ne $seq_region);
321
322 # create new copies of successfully mapped feaatures with shifted start,
323 # end and strand
324 my ($new_start, $new_end);
325 if($slice_strand == -1) {
326 $new_start = $slice_end - $end + 1;
327 $new_end = $slice_end - $start + 1;
328 } else {
329 $new_start = $start - $slice_start + 1;
330 $new_end = $end - $slice_start + 1;
331 }
332
333 push @out, Bio::EnsEMBL::AssemblyExceptionFeature->new(
334 '-dbID' => $f->dbID,
335 '-start' => $new_start,
336 '-end' => $new_end,
337 '-strand' => $strand * $slice_strand,
338 '-adaptor' => $self,
339 '-slice' => $slice,
340 '-alternate_slice' => $f->alternate_slice,
341 '-type' => $f->type,
342 );
343 }
344
345 return \@out;
346 }
347
348 =head2 store
349
350 Arg[1] : Bio::EnsEMBL::AssemblyException $asx
351 Arg[2] : Bio::EnsEMBL::AssemblyException $asx2
352
353 Example : $asx = Bio::EnsEMBL::AssemblyExceptionFeature->new(...)
354 $asx2 = Bio::EnsEMBL::AssemblyExceptionFeature->new(...)
355 $asx_seq_region_id = $asx_adaptor->store($asx);
356 Description: This stores a assembly exception feature in the
357 assembly_exception table and returns the assembly_exception_id.
358 Needs 2 features: one pointing to the Assembly_exception, and the
359 other pointing to the region in the reference that is being mapped to
360 Will check that start, end and type are defined, and the alternate
361 slice is present as well.
362 ReturnType: int
363 Exceptions: throw if assembly exception not defined (needs start, end,
364 type and alternate_slice) of if $asx not a Bio::EnsEMBL::AssemblyException
365 Caller: general
366 Status: Stable
367
368 =cut
369
370 sub store{
371 my $self = shift;
372 my $asx = shift;
373 my $asx2 = shift;
374
375
376 if (! $asx->isa('Bio::EnsEMBL::AssemblyExceptionFeature')){
377 throw("$asx is not a Ensembl assemlby exception -- not stored");
378 }
379 #if already present, return ID in the database
380 my $db = $self->db();
381 if ($asx->is_stored($db)){
382 return $asx->dbID();
383 }
384 #do some checkings for the object
385 #at the moment, the orientation is always 1
386 if (! $asx->start || ! $asx->end ){
387 throw("Assembly exception does not have coordinates");
388 }
389 if ($asx->type !~ /PAR|HAP|PATCH_NOVEL|PATCH_FIX/){
390 throw("Only types of assembly exception features valid are PAR, HAP, PATCH_FIX or PATCH_NOVEL");
391 }
392 if ( !($asx->alternate_slice->isa('Bio::EnsEMBL::Slice')) ){
393 throw("Alternate slice should be a Bio::EnsEMBL::Slice");
394 }
395 #now check the other Assembly exception feature, the one pointing to the REF
396 # region
397 if (!$asx2->isa('Bio::EnsEMBL::AssemblyExceptionFeature')){
398 throw("$asx2 is not a Ensembl assemlby exception -- not stored");
399 }
400 if (! $asx2->start || ! $asx2->end ){
401 throw("Assembly exception does not have coordinates");
402 }
403 if ($asx2->type !~ /HAP REF|PAR REF|PATCH_NOVEL REF|PATCH_FIX REF/){
404 throw("$asx2 should have type of assembly exception features HAP REF, PAR REF, PATCH_FIX REF or PATCH_NOVEL REF");
405 }
406 if (! ($asx2->alternate_slice->isa('Bio::EnsEMBL::Slice')) ){
407 throw("Alternate slice should be a Bio::EnsEMBL::Slice");
408 }
409 #finally check that both features are pointing to each other slice
410 if ($asx->slice != $asx2->alternate_slice || $asx->alternate_slice != $asx2->slice){
411 throw("Slice and alternate slice in both features are not pointing to each other");
412 }
413 #prepare the SQL
414 my $asx_sql = q{
415 INSERT INTO assembly_exception( seq_region_id, seq_region_start,
416 seq_region_end,
417 exc_type, exc_seq_region_id,
418 exc_seq_region_start, exc_seq_region_end,
419 ori)
420 VALUES (?, ?, ?, ?, ?, ?, ?, 1)
421 };
422
423 my $asx_st = $self->prepare($asx_sql);
424 my $asx_id = undef;
425 my $asx_seq_region_id;
426 my $asx2_seq_region_id;
427 my $original = $asx;
428 my $original2 = $asx2;
429 #check all feature information
430 ($asx, $asx_seq_region_id) = $self->_pre_store($asx);
431 ($asx2, $asx2_seq_region_id) = $self->_pre_store($asx2);
432
433 #and store it
434 $asx_st->bind_param(1, $asx_seq_region_id, SQL_INTEGER);
435 $asx_st->bind_param(2, $asx->start(), SQL_INTEGER);
436 $asx_st->bind_param(3, $asx->end(), SQL_INTEGER);
437 $asx_st->bind_param(4, $asx->type(), SQL_VARCHAR);
438 $asx_st->bind_param(5, $asx2_seq_region_id, SQL_INTEGER);
439 $asx_st->bind_param(6, $asx2->start(), SQL_INTEGER);
440 $asx_st->bind_param(7, $asx2->end(), SQL_INTEGER);
441
442 $asx_st->execute();
443 $asx_id = $asx_st->{'mysql_insertid'};
444
445 #finally, update the dbID and adaptor of the asx and asx2
446 $original->adaptor($self);
447 $original->dbID($asx_id);
448 $original2->adaptor($self);
449 $original2->dbID($asx_id);
450 #and finally update dbID cache with new assembly exception
451 $self->{'_aexc_dbID_cache'}->{$asx_id} = $original;
452 #and update the other caches as well
453 push @{$self->{'_aexc_slice_cache'}->{uc($asx->slice->name)}},$original, $original2;
454 push @{$self->{'_aexc_cache'}}, $original, $original2;
455
456 return $asx_id;
457 }
458
459 #
460 # Helper function containing some common feature storing functionality
461 #
462 # Given a Feature this will return a copy (or the same feature if no changes
463 # to the feature are needed) of the feature which is relative to the start
464 # of the seq_region it is on. The seq_region_id of the seq_region it is on
465 # is also returned.
466 #
467 # This method will also ensure that the database knows which coordinate
468 # systems that this feature is stored in.
469 # Since this adaptor doesn't inherit from BaseFeatureAdaptor, we need to copy
470 # the code
471 #
472
473 sub _pre_store {
474 my $self = shift;
475 my $feature = shift;
476
477 if(!ref($feature) || !$feature->isa('Bio::EnsEMBL::Feature')) {
478 throw('Expected Feature argument.');
479 }
480
481 $self->_check_start_end_strand($feature->start(),$feature->end(),
482 $feature->strand());
483
484
485 my $db = $self->db();
486
487 my $slice_adaptor = $db->get_SliceAdaptor();
488 my $slice = $feature->slice();
489
490 if(!ref($slice) || !($slice->isa('Bio::EnsEMBL::Slice') or $slice->isa('Bio::EnsEMBL::LRGSlice')) ) {
491 throw('Feature must be attached to Slice to be stored.');
492 }
493
494 # make sure feature coords are relative to start of entire seq_region
495
496 if($slice->start != 1 || $slice->strand != 1) {
497 #move feature onto a slice of the entire seq_region
498 $slice = $slice_adaptor->fetch_by_region($slice->coord_system->name(),
499 $slice->seq_region_name(),
500 undef, #start
501 undef, #end
502 undef, #strand
503 $slice->coord_system->version());
504
505 $feature = $feature->transfer($slice);
506
507 if(!$feature) {
508 throw('Could not transfer Feature to slice of ' .
509 'entire seq_region prior to storing');
510 }
511 }
512
513 # Ensure this type of feature is known to be stored in this coord system.
514 my $cs = $slice->coord_system;
515
516 my $mcc = $db->get_MetaCoordContainer();
517
518 $mcc->add_feature_type($cs, 'assembly_exception', $feature->length);
519
520 my $seq_region_id = $slice_adaptor->get_seq_region_id($slice);
521
522 if(!$seq_region_id) {
523 throw('Feature is associated with seq_region which is not in this DB.');
524 }
525
526 return ($feature, $seq_region_id);
527 }
528
529 #
530 # helper function used to validate start/end/strand and
531 # hstart/hend/hstrand etc.
532 #
533 sub _check_start_end_strand {
534 my $self = shift;
535 my $start = shift;
536 my $end = shift;
537 my $strand = shift;
538
539 #
540 # Make sure that the start, end, strand are valid
541 #
542 if(int($start) != $start) {
543 throw("Invalid Feature start [$start]. Must be integer.");
544 }
545 if(int($end) != $end) {
546 throw("Invalid Feature end [$end]. Must be integer.");
547 }
548 if(int($strand) != $strand || $strand < -1 || $strand > 1) {
549 throw("Invalid Feature strand [$strand]. Must be -1, 0 or 1.");
550 }
551 if($end < $start) {
552 throw("Invalid Feature start/end [$start/$end]. Start must be less " .
553 "than or equal to end.");
554 }
555
556 return 1;
557 }
558
559 =head2 remove
560
561 Arg [1] : $asx Bio::EnsEMBL::AssemblyFeatureException
562 Example : $asx_adaptor->remove($asx);
563 Description: This removes a assembly exception feature from the database.
564 Returntype : none
565 Exceptions : thrown if $asx arg does not implement dbID(), or if
566 $asx->dbID is not a true value
567 Caller : general
568 Status : Stable
569
570 =cut
571
572 #again, this method is generic in BaseFeatureAdaptor, but since this class
573 #is not inheriting, need to copy&paste
574 sub remove {
575 my ($self, $feature) = @_;
576
577 if(!$feature || !ref($feature) || !$feature->isa('Bio::EnsEMBL::AssemblyExceptionFeature')) {
578 throw('AssemblyExceptionFeature argument is required');
579 }
580
581 if(!$feature->is_stored($self->db)) {
582 throw("This feature is not stored in this database");
583 }
584
585 my $asx_id = $feature->dbID();
586 my $key = uc($feature->slice->name);
587 my $sth = $self->prepare("DELETE FROM assembly_exception WHERE assembly_exception_id = ?");
588 $sth->bind_param(1,$feature->dbID,SQL_INTEGER);
589 $sth->execute();
590
591 #and clear cache
592 #and finally update dbID cache
593 delete $self->{'_aexc_dbID_cache'}->{$asx_id};
594 #and remove from cache feature
595 my @features;
596 foreach my $asx (@{$self->{'_aexc_slice_cache'}->{$key}}){
597 if ($asx->dbID != $asx_id){
598 push @features, $asx;
599 }
600 }
601 $self->{'_aexc_slice_cache'}->{$key} = \@features;
602 @features = ();
603 foreach my $asx (@{$self->{'_aexc_cache'}}){
604 if ($asx->dbID != $asx_id){
605 push @features, $asx;
606 }
607 }
608 $self->{'_aexc_cache'} = \@features;
609
610 #unset the feature dbID ad adaptor
611 $feature->dbID(undef);
612 $feature->adaptor(undef);
613
614 return;
615 }
616
617
618 1;