comparison variant_effect_predictor/Bio/EnsEMBL/DBSQL/MiscFeatureAdaptor.pm @ 0:1f6dce3d34e0

Uploaded
author mahtabm
date Thu, 11 Apr 2013 02:01:53 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:1f6dce3d34e0
1 =head1 LICENSE
2
3 Copyright (c) 1999-2012 The European Bioinformatics Institute and
4 Genome Research Limited. All rights reserved.
5
6 This software is distributed under a modified Apache license.
7 For license details, please see
8
9 http://www.ensembl.org/info/about/code_licence.html
10
11 =head1 CONTACT
12
13 Please email comments or questions to the public Ensembl
14 developers list at <dev@ensembl.org>.
15
16 Questions may also be sent to the Ensembl help desk at
17 <helpdesk@ensembl.org>.
18
19 =cut
20
21 =head1 NAME
22
23 Bio::EnsEMBL::DBSQL::MiscFeatureAdaptor
24
25 =head1 SYNOPSIS
26
27 $mfa = $database_adaptor->get_MiscFeatureAdaptor();
28
29 # retrieve a misc feature by its dbID
30 my $misc_feat = $mfa->fetch_by_dbID(1234);
31
32 # retrieve all misc features in a given region
33 my @misc_feats = @{ $mfa->fetch_all_by_Slice($slice) };
34
35 # retrieve all misc features in a given region with a given set code
36 my @misc_clones =
37 @{ $mfa->fetch_all_by_Slice_and_set_code('cloneset') };
38
39 # store some misc features in the database
40 $mfa->store(@misc_features);
41
42 =head1 DESCRIPTION
43
44 This is an adaptor for the retrieval and storage of MiscFeatures.
45 Misc Features are extremely generic features that can be added with
46 minimal effort to the database. Currently misc features are used to
47 describe the locations of clone sets and tiling path information,
48 but arbitrary features can be stored. Misc features are grouped
49 into sets and can be fetched according to their grouping using the
50 fetch_all_by_Slice_and_set_code and fetch_all_by_set_code methods.
51 MiscFeatures may belong to more than one set.
52
53 =head1 METHODS
54
55 =cut
56
57 package Bio::EnsEMBL::DBSQL::MiscFeatureAdaptor;
58
59 use strict;
60 use Bio::EnsEMBL::DBSQL::BaseFeatureAdaptor;
61 use Bio::EnsEMBL::MiscFeature;
62 use Bio::EnsEMBL::Attribute;
63 use Bio::EnsEMBL::MiscSet;
64 use Bio::EnsEMBL::Utils::Exception qw(throw warning);
65
66 use vars qw(@ISA);
67
68 @ISA = qw(Bio::EnsEMBL::DBSQL::BaseFeatureAdaptor);
69
70
71
72 =head2 fetch_all_by_Slice_and_set_code
73
74 Arg [1] : Bio::EnsEMBL::Slice $slice
75 A slice representing the region to fetch from
76 Arg [2...] : string $set_code
77 The code of the set to retrieve features from
78 Example : @feats = @{$mfa->fetch_all_by_Slice_and_set_code('cloneset')};
79 Description: Retrieves a set of MiscFeatures which have a particular set code
80 and which lie in a particular region. All features with the
81 provide set code and which overlap the given slice are returned.
82 Returntype : listref of Bio::EnsEMBL::MiscFeatures
83 Exceptions : throw if set_code is not provided
84 warning if no set for provided set code exists
85 Caller : general
86 Status : Stable
87
88 =cut
89
90 sub fetch_all_by_Slice_and_set_code {
91 my $self = shift;
92 my $slice = shift;
93
94 throw('Set code argument is required.') unless @_;
95
96 my $msa = $self->db->get_MiscSetAdaptor();
97 my @sets = ();
98 my $max_len = 0;
99 foreach my $set_code (@_) {
100 my $set = $msa->fetch_by_code($set_code);
101 if($set) {
102 $max_len = $set->longest_feature if $set->longest_feature > $max_len;
103 push @sets, $set->dbID;
104 } else {
105 warning("No misc_set with code [$set_code] exists");
106 }
107 }
108 my $constraint;
109 if( @sets > 1 ) {
110 $constraint = " mfms.misc_set_id in ( @{[join ',',@sets]} ) ";
111 } elsif( @sets == 1 ) {
112 $constraint = " mfms.misc_set_id = $sets[0] ";
113 } else {
114 return [];
115 }
116
117 $self->_max_feature_length($max_len);
118
119 my $results = $self->fetch_all_by_Slice_constraint($slice, $constraint);
120
121 $self->_max_feature_length(undef);
122
123 return $results;
124 }
125
126
127
128 =head2 fetch_all_by_attribute_type_value
129
130 Arg [1] : string $attrib_type_code
131 The code of the attribute type to fetch features for
132 Arg [2] : (optional) string $attrib_value
133 The value of the attribute to fetch features for
134 Example :
135 #get all misc features that have an embl accession
136 @feats = @{$mfa->fetch_all_by_attrib_type_value('embl_acc')};
137 #get the misc feature with synonym 'AL014121'
138 ($feat)=@{$mfa->fetch_all_by_attrib_type_value('synonym','AL014121');
139 Description: Retrieves MiscFeatures which have a particular attribute.
140 If the attribute value argument is also provided only
141 features which have the attribute AND a particular value
142 are returned. The features are returned in their native
143 coordinate system (i.e. the coordinate system that they
144 are stored in).
145 Returntype : listref of Bio::EnsEMBL::MiscFeatures
146 Exceptions : throw if attrib_type code arg is not provided
147 Caller : general
148 Status : Stable
149
150 =cut
151
152 sub fetch_all_by_attribute_type_value {
153 my $self = shift;
154 my $attrib_type_code = shift;
155 my $attrib_value = shift;
156
157 throw("Attrib type code argument is required.")
158 if ( !$attrib_type_code );
159
160 # Need to do 2 queries so that all of the ids come back with the
161 # features. The problem with adding attrib constraints to filter the
162 # misc_features which come back is that not all of the attributes will
163 # come back
164
165 my $sql = qq(
166 SELECT DISTINCT
167 ma.misc_feature_id
168 FROM misc_attrib ma,
169 attrib_type at,
170 misc_feature mf,
171 seq_region sr,
172 coord_system cs
173 WHERE ma.attrib_type_id = at.attrib_type_id
174 AND at.code = ?
175 AND ma.misc_feature_id = mf.misc_feature_id
176 AND mf.seq_region_id = sr.seq_region_id
177 AND sr.coord_system_id = cs.coord_system_id
178 AND cs.species_id = ?);
179
180 if ($attrib_value) {
181 $sql .= " AND ma.value = ?";
182 }
183
184 my $sth = $self->prepare($sql);
185
186 $sth->bind_param( 1, $attrib_type_code, SQL_VARCHAR );
187 $sth->bind_param( 2, $self->species_id(), SQL_INTEGER );
188 if ($attrib_value) {
189 $sth->bind_param( 3, $attrib_value, SQL_VARCHAR );
190 }
191
192 $sth->execute();
193
194 my @ids = map { $_->[0] } @{ $sth->fetchall_arrayref() };
195
196 $sth->finish();
197
198 # Construct constraints from the list of ids. Split ids into groups
199 # of 1000 to ensure that the query is not too big.
200 my @constraints;
201 while (@ids) {
202 my @subset = splice( @ids, 0, 1000 );
203 if ( @subset == 1 ) {
204 push @constraints, "mf.misc_feature_id = $subset[0]";
205 } else {
206 my $id_str = join( ',', @subset );
207 push @constraints, "mf.misc_feature_id in ($id_str)";
208 }
209 }
210
211 my @results;
212 foreach my $constraint (@constraints) {
213 push @results, @{ $self->generic_fetch($constraint) };
214 }
215
216 return \@results;
217 } ## end sub fetch_all_by_attribute_type_value
218
219
220 #_tables
221 #
222 # Arg [1] : none
223 # Example : none
224 # Description: PROTECTED Implementation of abstract superclass method to
225 # provide the name of the tables to query
226 # Returntype : string
227 # Exceptions : none
228 # Caller : internal
229
230
231 sub _tables {
232 my $self = shift;
233
234 return (['misc_feature', 'mf'],
235 ['misc_feature_misc_set', 'mfms'],
236 ['misc_attrib', 'ma'],
237 ['attrib_type', 'at']);
238 }
239
240
241 #_columns
242
243 # Arg [1] : none
244 # Example : none
245 # Description: PROTECTED Implementation of abstract superclass method to
246 # provide the name of the columns to query
247 # Returntype : list of strings
248 # Exceptions : none
249 # Caller : internal
250
251 sub _columns {
252 my $self = shift;
253
254 #warning _objs_from_sth implementation depends on ordering
255 return qw (mf.misc_feature_id
256 mf.seq_region_id
257 mf.seq_region_start
258 mf.seq_region_end
259 mf.seq_region_strand
260 ma.value
261 at.code
262 mfms.misc_set_id
263 at.name
264 at.description);
265 }
266
267
268
269 # _default_where_clause
270
271 # Arg [1] : none
272 # Example : none
273 # Description: Overrides superclass method to provide an additional
274 # table joining constraint before the SQL query is performed.
275 # Returntype : string
276 # Exceptions : none
277 # Caller : generic_fetch
278
279 sub _default_where_clause {
280 my $self = shift;
281
282 return '';
283 }
284
285
286 sub _left_join {
287 my $self = shift;
288
289 return(
290 ['misc_feature_misc_set','mf.misc_feature_id = mfms.misc_feature_id'],
291 ['misc_attrib', 'mf.misc_feature_id = ma.misc_feature_id'],
292 ['attrib_type','ma.attrib_type_id = at.attrib_type_id']);
293 }
294
295
296 sub _final_clause {
297 my $self = shift;
298
299 return " ORDER BY mf.misc_feature_id";
300 }
301
302
303 # _objs_from_sth
304
305 # Arg [1] : StatementHandle $sth
306 # Example : none
307 # Description: PROTECTED implementation of abstract superclass method.
308 # responsible for the creation of MiscFeatures from a
309 # hashref generated from an SQL query
310 # Returntype : listref of Bio::EnsEMBL::MiscFeatures
311 # Exceptions : none
312 # Caller : internal
313
314 sub _objs_from_sth {
315 my ($self, $sth, $mapper, $dest_slice) = @_;
316
317 #
318 # This code is ugly because an attempt has been made to remove as many
319 # function calls as possible for speed purposes. Thus many caches and
320 # a fair bit of gymnastics is used.
321 #
322
323 my $sa = $self->db()->get_SliceAdaptor();
324 my $msa = $self->db->get_MiscSetAdaptor();
325
326 my @features;
327 my %ms_hash;
328 my %slice_hash;
329 my %sr_name_hash;
330 my %sr_cs_hash;
331
332 my($misc_feature_id, $seq_region_id, $seq_region_start, $seq_region_end,
333 $seq_region_strand, $attrib_value, $attrib_type_code, $misc_set_id,
334 $attrib_type_name, $attrib_type_description );
335
336 $sth->bind_columns( \$misc_feature_id, \$seq_region_id, \$seq_region_start,
337 \$seq_region_end, \$seq_region_strand,
338 \$attrib_value, \$attrib_type_code,\$misc_set_id,
339 \$attrib_type_name, \$attrib_type_description );
340
341 my $asm_cs;
342 my $cmp_cs;
343 my $asm_cs_vers;
344 my $asm_cs_name;
345 my $cmp_cs_vers;
346 my $cmp_cs_name;
347 if($mapper) {
348 $asm_cs = $mapper->assembled_CoordSystem();
349 $cmp_cs = $mapper->component_CoordSystem();
350 $asm_cs_name = $asm_cs->name();
351 $asm_cs_vers = $asm_cs->version();
352 $cmp_cs_name = $cmp_cs->name();
353 $cmp_cs_vers = $cmp_cs->version();
354 }
355
356 my $dest_slice_start;
357 my $dest_slice_end;
358 my $dest_slice_strand;
359 my $dest_slice_length;
360 my $dest_slice_sr_name;
361 my $dest_slice_sr_id;
362 if($dest_slice) {
363 $dest_slice_start = $dest_slice->start();
364 $dest_slice_end = $dest_slice->end();
365 $dest_slice_strand = $dest_slice->strand();
366 $dest_slice_length = $dest_slice->length();
367 $dest_slice_sr_name = $dest_slice->seq_region_name();
368 $dest_slice_sr_id = $dest_slice->get_seq_region_id();
369 }
370
371 my $current = -1;
372 my $throw_away = -1;
373 my $feat;
374 my $feat_misc_sets;
375 my $feat_attribs;
376 my $seen_attribs;
377
378 FEATURE: while($sth->fetch()) {
379 #if this feature is not being used, skip all rows related to it
380 next if($throw_away == $misc_feature_id);
381
382 if ($current == $misc_feature_id) {
383 #still working on building up attributes and sets for current feature
384
385 #if there is a misc_set, add it to the current feature
386 if ($misc_set_id) {
387 my $misc_set = $ms_hash{$misc_set_id} ||=
388 $msa->fetch_by_dbID($misc_set_id);
389 if ( ! exists $feat_misc_sets->{$misc_set->{'code'}} ) {
390 $feat->add_MiscSet( $misc_set );
391 $feat_misc_sets->{$misc_set->{'code'}} = $misc_set;
392 }
393 }
394
395 #if there is a new attribute add it to the current feature
396 if ($attrib_value && $attrib_type_code &&
397 !$seen_attribs->{"$attrib_type_code:$attrib_value"}) {
398 my $attrib = Bio::EnsEMBL::Attribute->new
399 ( -CODE => $attrib_type_code,
400 -NAME => $attrib_type_name,
401 -DESC => $attrib_type_description,
402 -VALUE => $attrib_value
403 );
404
405
406 $feat_attribs ||= [];
407 push @$feat_attribs, $attrib;
408 $seen_attribs->{"$attrib_type_code:$attrib_value"} = 1;
409 }
410
411 } else {
412 if ($feat) {
413 #start working on a new feature, discard references to last one
414 $feat = {};
415 $feat_attribs = [];
416 $feat_misc_sets = {};
417 $seen_attribs = {};
418 }
419
420 $current = $misc_feature_id;
421 #need to get the internal_seq_region, if present
422 $seq_region_id = $self->get_seq_region_id_internal($seq_region_id);
423 my $slice = $slice_hash{"ID:".$seq_region_id};
424
425 if (!$slice) {
426 $slice = $sa->fetch_by_seq_region_id($seq_region_id);
427 $slice_hash{"ID:".$seq_region_id} = $slice;
428 $sr_name_hash{$seq_region_id} = $slice->seq_region_name();
429 $sr_cs_hash{$seq_region_id} = $slice->coord_system();
430 }
431
432 my $sr_name = $sr_name_hash{$seq_region_id};
433 my $sr_cs = $sr_cs_hash{$seq_region_id};
434 #
435 # remap the feature coordinates to another coord system
436 # if a mapper was provided
437 #
438 if ($mapper) {
439
440
441 if (defined $dest_slice && $mapper->isa('Bio::EnsEMBL::ChainedAssemblyMapper') ) {
442 ( $seq_region_id, $seq_region_start,$seq_region_end, $seq_region_strand ) =
443 $mapper->map( $sr_name, $seq_region_start, $seq_region_end,
444 $seq_region_strand, $sr_cs, 1, $dest_slice);
445
446 } else {
447
448 ( $seq_region_id, $seq_region_start,$seq_region_end, $seq_region_strand ) =
449 $mapper->fastmap( $sr_name, $seq_region_start, $seq_region_end,$seq_region_strand, $sr_cs );
450 }
451
452 #skip features that map to gaps or coord system boundaries
453 if(!defined($seq_region_id)) {
454 $throw_away = $misc_feature_id;
455 next FEATURE;
456 }
457
458 #get a slice in the coord system we just mapped to
459 # if ($asm_cs == $sr_cs ||
460 # ($cmp_cs != $sr_cs && $asm_cs->equals($sr_cs))) {
461 $slice = $slice_hash{"ID:".$seq_region_id} ||=
462 $sa->fetch_by_seq_region_id($seq_region_id);
463 # } else {
464 # $slice = $slice_hash{"NAME:$sr_name:$asm_cs_name:$asm_cs_vers"} ||=
465 # $sa->fetch_by_region($asm_cs_name, $sr_name, undef, undef, undef,
466 # $asm_cs_vers);
467 # }
468 }
469
470 #
471 # If a destination slice was provided convert the coords
472 # If the dest_slice starts at 1 and is foward strand, nothing needs doing
473 #
474 if ($dest_slice) {
475 if ($dest_slice_start != 1 || $dest_slice_strand != 1) {
476 if ($dest_slice_strand == 1) {
477 $seq_region_start = $seq_region_start - $dest_slice_start + 1;
478 $seq_region_end = $seq_region_end - $dest_slice_start + 1;
479 } else {
480 my $tmp_seq_region_start = $seq_region_start;
481 $seq_region_start = $dest_slice_end - $seq_region_end + 1;
482 $seq_region_end = $dest_slice_end - $tmp_seq_region_start + 1;
483 $seq_region_strand *= -1;
484 }
485 }
486 #throw away features off the end of the requested slice
487 if ($seq_region_end < 1 || $seq_region_start > $dest_slice_length ||
488 ( $dest_slice_sr_id ne $seq_region_id )) {
489 #flag this feature as one to throw away
490 $throw_away = $misc_feature_id;
491 next FEATURE;
492 }
493 $slice = $dest_slice;
494 }
495
496
497 if ($attrib_value && $attrib_type_code) {
498 my $attrib = Bio::EnsEMBL::Attribute->new
499 ( -CODE => $attrib_type_code,
500 -NAME => $attrib_type_name,
501 -DESC => $attrib_type_description,
502 -VALUE => $attrib_value
503 );
504 $feat_attribs = [$attrib];
505 $seen_attribs->{"$attrib_type_code:$attrib_value"} = 1;
506 }
507
508 $feat =
509 $self->_create_feature_fast( 'Bio::EnsEMBL::MiscFeature', {
510 'start' => $seq_region_start,
511 'end' => $seq_region_end,
512 'strand' => $seq_region_strand,
513 'slice' => $slice,
514 'adaptor' => $self,
515 'dbID' => $misc_feature_id,
516 'attributes' => $feat_attribs
517 } );
518
519 push @features, $feat;
520
521 if ($misc_set_id) {
522 #get the misc_set object
523 my $misc_set = $ms_hash{$misc_set_id} ||=
524 $msa->fetch_by_dbID($misc_set_id);
525 if ( ! exists $feat_misc_sets->{$misc_set->{'code'}} ) {
526 $feat->add_MiscSet( $misc_set );
527 $feat_misc_sets->{$misc_set->{'code'}} = $misc_set;
528 }
529 }
530 }
531 }
532
533 return \@features;
534 }
535
536
537
538 =head2 list_dbIDs
539
540 Arg [1] : none
541 Example : @feature_ids = @{$misc_feature_adaptor->list_dbIDs()};
542 Description: Gets an array of internal ids for all misc_features in the
543 current db
544 Arg[1] : <optional> int. not 0 for the ids to be sorted by the seq_region.
545 Returntype : list of ints
546 Exceptions : none
547 Caller : ?
548 Status : Stable
549
550 =cut
551
552 sub list_dbIDs {
553 my ($self,$ordered) = @_;
554
555 return $self->_list_dbIDs("misc_feature",undef,$ordered);
556 }
557
558
559 =head2 store
560
561 Arg [1] : list of Bio::EnsEMBL::MiscFeatures @misc_features
562 Example : $misc_feature_adaptor->store(@misc_features);
563 Description: Stores a list of MiscFeatures in this database. The stored
564 features will have their
565 Returntype : none
566 Exceptions : throw on invalid arguments
567 warning if misc feature is already stored in this database
568 throw if start/end/strand attribs are not valid
569 Caller : general
570 Status : Stable
571
572 =cut
573
574 sub store {
575 my $self = shift;
576 my @misc_features = @_;
577
578 my $db = $self->db();
579
580 my $feature_sth = $self->prepare
581 ("INSERT INTO misc_feature SET " .
582 " seq_region_id = ?, " .
583 " seq_region_start = ?, " .
584 " seq_region_end = ?, " .
585 " seq_region_strand = ?");
586
587 my $feature_set_sth = $self->prepare
588 ("INSERT IGNORE misc_feature_misc_set SET " .
589 " misc_feature_id = ?, " .
590 " misc_set_id = ?");
591
592 my $msa = $db->get_MiscSetAdaptor();
593 my $aa = $db->get_AttributeAdaptor();
594
595 FEATURE:
596 foreach my $mf (@misc_features) {
597 if(!ref($mf) || !$mf->isa('Bio::EnsEMBL::MiscFeature')) {
598 throw("List of MiscFeature arguments expeceted");
599 }
600
601 if($mf->is_stored($db)) {
602 warning("MiscFeature [" .$mf->dbID."] is already stored in database.");
603 next FEATURE;
604 }
605
606 # do some checking of the start/end and convert to seq_region coords
607 my $original = $mf;
608 my $seq_region_id;
609 ($mf, $seq_region_id) = $self->_pre_store($mf);
610
611 # store the actual MiscFeature
612 $feature_sth->bind_param(1,$seq_region_id,SQL_INTEGER);
613 $feature_sth->bind_param(2,$mf->start,SQL_INTEGER);
614 $feature_sth->bind_param(3,$mf->end,SQL_INTEGER);
615 $feature_sth->bind_param(4,$mf->strand,SQL_TINYINT);
616 $feature_sth->execute();
617
618 my $dbID = $feature_sth->{'mysql_insertid'};
619
620 $mf->dbID($dbID);
621 $mf->adaptor($self);
622
623 # store all the attributes
624 my $attribs = $mf->get_all_Attributes();
625 $aa->store_on_MiscFeature($mf, $attribs);
626
627 # store all the sets that have not been stored yet
628 my $sets = $mf->get_all_MiscSets();
629 foreach my $set (@$sets) {
630 $msa->store($set) if(!$set->is_stored($db));
631
632 # update the misc_feat_misc_set table to store the set relationship
633 $feature_set_sth->bind_param(1,$dbID,SQL_INTEGER);
634 $feature_set_sth->bind_param(2,$set->dbID,SQL_INTEGER);
635
636 $feature_set_sth->execute();
637 }
638 }
639
640 return;
641 }
642
643 1;
644
645
646
647
648