comparison variant_effect_predictor/Bio/EnsEMBL/DBSQL/DnaAlignFeatureAdaptor.pm @ 0:1f6dce3d34e0

Uploaded
author mahtabm
date Thu, 11 Apr 2013 02:01:53 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:1f6dce3d34e0
1 =head1 LICENSE
2
3 Copyright (c) 1999-2012 The European Bioinformatics Institute and
4 Genome Research Limited. All rights reserved.
5
6 This software is distributed under a modified Apache license.
7 For license details, please see
8
9 http://www.ensembl.org/info/about/code_licence.html
10
11 =head1 CONTACT
12
13 Please email comments or questions to the public Ensembl
14 developers list at <dev@ensembl.org>.
15
16 Questions may also be sent to the Ensembl help desk at
17 <helpdesk@ensembl.org>.
18
19 =cut
20
21 =head1 NAME
22
23 Bio::EnsEMBL::DBSQL::DnaAlignFeatureAdaptor - Adaptor for DnaAlignFeatures
24
25 =head1 SYNOPSIS
26
27 $dafa = $registry->get_adaptor( 'Human', 'Core', 'DnaAlignFeature' );
28
29 my @features = @{ $dafa->fetch_all_by_Slice($slice) };
30
31 $dafa->store(@features);
32
33 =head1 DESCRIPTION
34
35 This is an adaptor responsible for the retrieval and storage of
36 DnaDnaAlignFeatures from the database. This adaptor inherits most of its
37 functionality from the BaseAlignFeatureAdaptor and BaseFeatureAdaptor
38 superclasses.
39
40 =head1 METHODS
41
42 =cut
43
44
45 package Bio::EnsEMBL::DBSQL::DnaAlignFeatureAdaptor;
46 use vars qw(@ISA);
47 use strict;
48 use Bio::EnsEMBL::DnaDnaAlignFeature;
49 use Bio::EnsEMBL::DBSQL::BaseAlignFeatureAdaptor;
50 use Bio::EnsEMBL::Utils::Exception qw(throw warning);
51
52 @ISA = qw(Bio::EnsEMBL::DBSQL::BaseAlignFeatureAdaptor);
53
54
55 =head2 _tables
56
57 Args : none
58 Example : @tabs = $self->_tables
59 Description: PROTECTED implementation of the abstract method inherited from
60 BaseFeatureAdaptor. Returns list of [tablename, alias] pairs
61 Returntype : list of listrefs of strings
62 Exceptions : none
63 Caller : Bio::EnsEMBL::DBSQL::BaseFeatureAdaptor::generic_fetch
64 Status : Stable
65
66 =cut
67
68 sub _tables {
69 my $self = shift;
70
71 return (['dna_align_feature', 'daf'],['external_db','exdb']);
72 }
73
74
75 sub _left_join{
76 return (['external_db',"exdb.external_db_id = daf.external_db_id"]);
77 }
78
79 =head2 _columns
80
81 Args : none
82 Example : @columns = $self->_columns
83 Description: PROTECTED implementation of abstract superclass method.
84 Returns a list of columns that are needed for object creation.
85 Returntype : list of strings
86 Exceptions : none
87 Caller : Bio::EnsEMBL::DBSQL::BaseFeatureAdaptor::generic_fetch
88 Status : Stable
89
90 =cut
91
92 sub _columns {
93 my $self = shift;
94
95 #warning, implementation of _objs_from_sth method depends on order of list
96 return qw(daf.dna_align_feature_id
97 daf.seq_region_id
98 daf.analysis_id
99 daf.seq_region_start
100 daf.seq_region_end
101 daf.seq_region_strand
102 daf.hit_start
103 daf.hit_end
104 daf.hit_name
105 daf.hit_strand
106 daf.cigar_line
107 daf.evalue
108 daf.perc_ident
109 daf.score
110 daf.external_db_id
111 daf.hcoverage
112 daf.external_data
113 daf.pair_dna_align_feature_id
114 exdb.db_name
115 exdb.db_display_name);
116 }
117
118
119 =head2 store
120
121 Arg [1] : list of Bio::EnsEMBL::DnaAlignFeatures @feats
122 the features to store in the database
123 Example : $dna_align_feature_adaptor->store(@features);
124 Description: Stores a list of DnaAlignFeatures in the database
125 Returntype : none
126 Exceptions : throw if any of the provided features cannot be stored
127 which may occur if:
128 * The feature does not have an associate Slice
129 * The feature does not have an associated analysis
130 * The Slice the feature is associated with is on a seq_region
131 unknown to this database
132 A warning is given if:
133 * The feature has already been stored in this db
134 Caller : Pipeline
135 Status : Stable
136
137 =cut
138
139 sub store {
140 my ( $self, @feats ) = @_;
141
142 throw("Must call store with features") if ( scalar(@feats) == 0 );
143
144 my @tabs = $self->_tables;
145 my ($tablename) = @{ $tabs[0] };
146
147 my $db = $self->db();
148 my $analysis_adaptor = $db->get_AnalysisAdaptor();
149
150 my $sth = $self->prepare(
151 "INSERT INTO $tablename (seq_region_id, seq_region_start,
152 seq_region_end, seq_region_strand,
153 hit_start, hit_end, hit_strand, hit_name,
154 cigar_line, analysis_id, score, evalue,
155 perc_ident, external_db_id, hcoverage,
156 pair_dna_align_feature_id)
157 VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)" # 16 arguments
158 );
159
160 FEATURE:
161 foreach my $feat (@feats) {
162 if ( !ref $feat || !$feat->isa("Bio::EnsEMBL::DnaDnaAlignFeature") )
163 {
164 throw("feature must be a Bio::EnsEMBL::DnaDnaAlignFeature,"
165 . " not a ["
166 . ref($feat)
167 . "]." );
168 }
169
170 if ( $feat->is_stored($db) ) {
171 warning( "DnaDnaAlignFeature ["
172 . $feat->dbID()
173 . "] is already stored in this database." );
174 next FEATURE;
175 }
176
177 my $hstart = $feat->hstart();
178 my $hend = $feat->hend();
179 my $hstrand = $feat->hstrand();
180 $self->_check_start_end_strand( $hstart, $hend, $hstrand );
181
182 my $cigar_string = $feat->cigar_string();
183 if ( !$cigar_string ) {
184 $cigar_string = $feat->length() . 'M';
185 warning( "DnaDnaAlignFeature does not define a cigar_string.\n"
186 . "Assuming ungapped block with cigar_line=$cigar_string ." );
187 }
188
189 my $hseqname = $feat->hseqname();
190 if ( !$hseqname ) {
191 throw("DnaDnaAlignFeature must define an hseqname.");
192 }
193
194 if ( !defined( $feat->analysis ) ) {
195 throw(
196 "An analysis must be attached to the features to be stored.");
197 }
198
199 #store the analysis if it has not been stored yet
200 if ( !$feat->analysis->is_stored($db) ) {
201 $analysis_adaptor->store( $feat->analysis() );
202 }
203
204 my $original = $feat;
205 my $seq_region_id;
206 ( $feat, $seq_region_id ) = $self->_pre_store($feat);
207
208 $sth->bind_param( 1, $seq_region_id, SQL_INTEGER );
209 $sth->bind_param( 2, $feat->start, SQL_INTEGER );
210 $sth->bind_param( 3, $feat->end, SQL_INTEGER );
211 $sth->bind_param( 4, $feat->strand, SQL_TINYINT );
212 $sth->bind_param( 5, $hstart, SQL_INTEGER );
213 $sth->bind_param( 6, $hend, SQL_INTEGER );
214 $sth->bind_param( 7, $hstrand, SQL_TINYINT );
215 $sth->bind_param( 8, $hseqname, SQL_VARCHAR );
216 $sth->bind_param( 9, $cigar_string, SQL_LONGVARCHAR );
217 $sth->bind_param( 10, $feat->analysis->dbID, SQL_INTEGER );
218 $sth->bind_param( 11, $feat->score, SQL_DOUBLE );
219 $sth->bind_param( 12, $feat->p_value, SQL_DOUBLE );
220 $sth->bind_param( 13, $feat->percent_id, SQL_FLOAT );
221 $sth->bind_param( 14, $feat->external_db_id, SQL_INTEGER );
222 $sth->bind_param( 15, $feat->hcoverage, SQL_DOUBLE );
223 $sth->bind_param( 16, $feat->pair_dna_align_feature_id,
224 SQL_INTEGER );
225
226 $sth->execute();
227
228 $original->dbID( $sth->{'mysql_insertid'} );
229 $original->adaptor($self);
230 } ## end foreach my $feat (@feats)
231
232 $sth->finish();
233 } ## end sub store
234
235
236 sub save {
237 my ($self, $features) = @_;
238
239 my @feats = @$features;
240 throw("Must call store with features") if( scalar(@feats) == 0 );
241
242 my @tabs = $self->_tables;
243 my ($tablename) = @{$tabs[0]};
244
245 my $db = $self->db();
246 my $analysis_adaptor = $db->get_AnalysisAdaptor();
247
248 my $sql = qq{INSERT INTO $tablename (seq_region_id, seq_region_start, seq_region_end, seq_region_strand, hit_start, hit_end, hit_strand, hit_name, cigar_line, analysis_id, score, evalue, perc_ident, external_db_id, hcoverage, pair_dna_align_feature_id, external_data) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)};
249
250 my %analyses = ();
251
252 my $sth = $self->prepare($sql);
253
254 FEATURE: foreach my $feat ( @feats ) {
255 if( !ref $feat || !$feat->isa("Bio::EnsEMBL::DnaDnaAlignFeature") ) {
256 throw("feature must be a Bio::EnsEMBL::DnaDnaAlignFeature,"
257 . " not a [".ref($feat)."].");
258 }
259
260 if($feat->is_stored($db)) {
261 warning("DnaDnaAlignFeature [".$feat->dbID."] is already stored" .
262 " in this database.");
263 next FEATURE;
264 }
265
266 my $hstart = $feat->hstart || 0; # defined $feat->hstart ? $feat->hstart : $feat->start ;
267 my $hend = $feat->hend || 0; # defined $feat->hend ? $feat->hend : $feat->end;
268 my $hstrand = $feat->hstrand|| 0; # defined $feat->hstrand ? $feat->hstrand : $feat->strand;
269 if( $hstart && $hend ) {
270 if($hend < $hstart) {
271 throw("Invalid Feature start/end [$hstart/$hend]. Start must be less than or equal to end.");
272 }
273 }
274 my $cigar_string = $feat->cigar_string();
275 if(!$cigar_string) {
276 $cigar_string = $feat->length() . 'M';
277 warning("DnaDnaAlignFeature does not define a cigar_string.\n" .
278 "Assuming ungapped block with cigar_line=$cigar_string .");
279 }
280
281 my $hseqname = $feat->hseqname();
282 if(!$hseqname) {
283 throw("DnaDnaAlignFeature must define an hseqname.");
284 }
285
286 if(!defined($feat->analysis)) {
287 throw("An analysis must be attached to the features to be stored.");
288 }
289
290 #store the analysis if it has not been stored yet
291 if(!$feat->analysis->is_stored($db)) {
292 $analysis_adaptor->store($feat->analysis());
293 }
294
295 $analyses{ $feat->analysis->dbID }++;
296
297 my $original = $feat;
298 my $seq_region_id;
299 ($feat, $seq_region_id) = $self->_pre_store_userdata($feat);
300
301 my $extra_data = $feat->extra_data ? $self->dump_data($feat->extra_data) : '';
302
303 $sth->bind_param(1,$seq_region_id,SQL_INTEGER);
304 $sth->bind_param(2,$feat->start,SQL_INTEGER);
305 $sth->bind_param(3,$feat->end,SQL_INTEGER);
306 $sth->bind_param(4,$feat->strand,SQL_TINYINT);
307 $sth->bind_param(5,$hstart,SQL_INTEGER);
308 $sth->bind_param(6,$hend,SQL_INTEGER);
309 $sth->bind_param(7,$hstrand,SQL_TINYINT);
310 $sth->bind_param(8,$hseqname,SQL_VARCHAR);
311 $sth->bind_param(9,$cigar_string,SQL_LONGVARCHAR);
312 $sth->bind_param(10,$feat->analysis->dbID,SQL_INTEGER);
313 $sth->bind_param(11,$feat->score,SQL_DOUBLE);
314 # $sth->bind_param(11,$feat->score); # if the above statement does not work it means you need to upgrade DBD::mysql, meantime you can replace it with this line
315 $sth->bind_param(12,$feat->p_value,SQL_DOUBLE);
316 $sth->bind_param(13,$feat->percent_id,SQL_FLOAT);
317 $sth->bind_param(14,$feat->external_db_id,SQL_INTEGER);
318 $sth->bind_param(15,$feat->hcoverage,SQL_DOUBLE);
319 $sth->bind_param(16,$feat->pair_dna_align_feature_id,SQL_INTEGER);
320 $sth->bind_param(17,$extra_data,SQL_LONGVARCHAR);
321
322
323 $sth->execute();
324 $original->dbID($sth->{'mysql_insertid'});
325 $original->adaptor($self);
326 }
327
328 $sth->finish();
329
330 ## js5 hack to update meta_coord table...
331 if( keys %analyses ) {
332
333 my $sth = $self->prepare( 'select sr.coord_system_id, max(daf.seq_region_end-daf.seq_region_start) from seq_region as sr, dna_align_feature as daf where daf.seq_region_id=sr.seq_region_id and analysis_id in ('.join(',',keys %analyses).') group by coord_system_id' );
334 $sth->execute;
335
336 foreach( @{ $sth->fetchall_arrayref } ) {
337 my $sth2 = $self->prepare( qq(insert ignore into meta_coord values("dna_align_feature",$_->[0],$_->[1])) );
338 $sth2->execute;
339 $sth2->finish;
340
341 $sth2 = $self->prepare( qq(update meta_coord set max_length = $_->[1] where coord_system_id = $_->[0] and table_name="dna_align_feature" and max_length < $_->[1]) );
342 $sth2->execute;
343 $sth2->finish;
344 }
345
346 $sth->finish;
347 }
348
349 }
350
351
352 =head2 _objs_from_sth
353
354 Arg [1] : DBI statement handle $sth
355 an exectuted DBI statement handle generated by selecting
356 the columns specified by _columns() from the table specified
357 by _table()
358 Example : @dna_dna_align_feats = $self->_obj_from_hashref
359 Description: PROTECTED implementation of superclass abstract method.
360 Creates DnaDnaAlignFeature objects from a DBI hashref
361 Returntype : listref of Bio::EnsEMBL::DnaDnaAlignFeatures
362 Exceptions : none
363 Caller : Bio::EnsEMBL::BaseFeatureAdaptor::generic_fetch
364 Status : Stable
365
366 =cut
367
368 sub _objs_from_sth {
369 my ( $self, $sth, $mapper, $dest_slice ) = @_;
370
371 #
372 # This code is ugly because an attempt has been made to remove as many
373 # function calls as possible for speed purposes. Thus many caches and
374 # a fair bit of gymnastics is used.
375 #
376
377 # In case of userdata we need the features on the dest_slice. In case
378 # of get_all_supporting_features dest_slice is not provided.
379 my $sa = ( $dest_slice
380 ? $dest_slice->adaptor()
381 : $self->db()->get_SliceAdaptor() );
382 my $aa = $self->db->get_AnalysisAdaptor();
383
384 my @features;
385 my %analysis_hash;
386 my %slice_hash;
387 my %sr_name_hash;
388 my %sr_cs_hash;
389
390 my ( $dna_align_feature_id, $seq_region_id,
391 $analysis_id, $seq_region_start,
392 $seq_region_end, $seq_region_strand,
393 $hit_start, $hit_end,
394 $hit_name, $hit_strand,
395 $cigar_line, $evalue,
396 $perc_ident, $score,
397 $external_db_id, $hcoverage,
398 $extra_data, $pair_dna_align_feature_id,
399 $external_db_name, $external_display_db_name );
400
401 $sth->bind_columns( \( $dna_align_feature_id, $seq_region_id,
402 $analysis_id, $seq_region_start,
403 $seq_region_end, $seq_region_strand,
404 $hit_start, $hit_end,
405 $hit_name, $hit_strand,
406 $cigar_line, $evalue,
407 $perc_ident, $score,
408 $external_db_id, $hcoverage,
409 $extra_data, $pair_dna_align_feature_id,
410 $external_db_name, $external_display_db_name )
411 );
412
413 my $asm_cs;
414 my $cmp_cs;
415 my $asm_cs_vers;
416 my $asm_cs_name;
417 my $cmp_cs_vers;
418 my $cmp_cs_name;
419
420 if ( defined($mapper) ) {
421 $asm_cs = $mapper->assembled_CoordSystem();
422 $cmp_cs = $mapper->component_CoordSystem();
423 $asm_cs_name = $asm_cs->name();
424 $asm_cs_vers = $asm_cs->version();
425 $cmp_cs_name = $cmp_cs->name();
426 $cmp_cs_vers = $cmp_cs->version();
427 }
428
429 my $dest_slice_start;
430 my $dest_slice_end;
431 my $dest_slice_strand;
432 my $dest_slice_length;
433 my $dest_slice_sr_name;
434 my $dest_slice_seq_region_id;
435
436 if ( defined($dest_slice) ) {
437 $dest_slice_start = $dest_slice->start();
438 $dest_slice_end = $dest_slice->end();
439 $dest_slice_strand = $dest_slice->strand();
440 $dest_slice_length = $dest_slice->length();
441 $dest_slice_sr_name = $dest_slice->seq_region_name();
442 $dest_slice_seq_region_id = $dest_slice->get_seq_region_id();
443 }
444
445 FEATURE:
446 while ( $sth->fetch() ) {
447 # Get the analysis object.
448 my $analysis = $analysis_hash{$analysis_id} ||=
449 $aa->fetch_by_dbID($analysis_id);
450
451 # Get the slice object.
452 my $slice = $slice_hash{ "ID:" . $seq_region_id };
453
454 if ( !defined($slice) ) {
455 $slice = $sa->fetch_by_seq_region_id($seq_region_id);
456 if ( defined($slice) ) {
457 $slice_hash{ "ID:" . $seq_region_id } = $slice;
458 $sr_name_hash{$seq_region_id} = $slice->seq_region_name();
459 $sr_cs_hash{$seq_region_id} = $slice->coord_system();
460 }
461 }
462
463 my $sr_name = $sr_name_hash{$seq_region_id};
464 my $sr_cs = $sr_cs_hash{$seq_region_id};
465
466 # Remap the feature coordinates to another coord system
467 # if a mapper was provided.
468 if ( defined($mapper) ) {
469
470 if (defined $dest_slice && $mapper->isa('Bio::EnsEMBL::ChainedAssemblyMapper') ) {
471 ( $seq_region_id, $seq_region_start,
472 $seq_region_end, $seq_region_strand )
473 =
474 $mapper->map( $sr_name, $seq_region_start, $seq_region_end,
475 $seq_region_strand, $sr_cs, 1, $dest_slice);
476
477 } else {
478
479 ( $seq_region_id, $seq_region_start,
480 $seq_region_end, $seq_region_strand )
481 =
482 $mapper->fastmap( $sr_name, $seq_region_start, $seq_region_end,
483 $seq_region_strand, $sr_cs );
484 }
485
486 # Skip features that map to gaps or coord system boundaries.
487 if ( !defined($seq_region_id) ) { next FEATURE }
488
489 # Get a slice in the coord system we just mapped to.
490 if ( $asm_cs == $sr_cs
491 || ( $cmp_cs != $sr_cs && $asm_cs->equals($sr_cs) ) )
492 {
493 $slice = $slice_hash{ "ID:" . $seq_region_id } ||=
494 $sa->fetch_by_seq_region_id($seq_region_id);
495 } else {
496 $slice = $slice_hash{ "ID:" . $seq_region_id } ||=
497 $sa->fetch_by_seq_region_id($seq_region_id);
498 }
499 }
500
501 # If a destination slice was provided, convert the coords. If the
502 # dest_slice starts at 1 and is forward strand, nothing needs doing.
503 if ( defined($dest_slice) ) {
504 if ( $dest_slice_start != 1 || $dest_slice_strand != 1 ) {
505 if ( $dest_slice_strand == 1 ) {
506 $seq_region_start = $seq_region_start - $dest_slice_start + 1;
507 $seq_region_end = $seq_region_end - $dest_slice_start + 1;
508 } else {
509 my $tmp_seq_region_start = $seq_region_start;
510 $seq_region_start = $dest_slice_end - $seq_region_end + 1;
511 $seq_region_end = $dest_slice_end - $tmp_seq_region_start + 1;
512 $seq_region_strand = -$seq_region_strand;
513 }
514
515 # Throw away features off the end of the requested slice.
516 if ( $seq_region_end < 1
517 || $seq_region_start > $dest_slice_length
518 || ( $dest_slice_seq_region_id ne $seq_region_id ) )
519 {
520 next FEATURE;
521 }
522 }
523 $slice = $dest_slice;
524 }
525
526 # Finally, create the new DnaAlignFeature.
527 push( @features,
528 $self->_create_feature_fast(
529 'Bio::EnsEMBL::DnaDnaAlignFeature', {
530 'slice' => $slice,
531 'start' => $seq_region_start,
532 'end' => $seq_region_end,
533 'strand' => $seq_region_strand,
534 'hseqname' => $hit_name,
535 'hstart' => $hit_start,
536 'hend' => $hit_end,
537 'hstrand' => $hit_strand,
538 'score' => $score,
539 'p_value' => $evalue,
540 'percent_id' => $perc_ident,
541 'cigar_string' => $cigar_line,
542 'analysis' => $analysis,
543 'adaptor' => $self,
544 'dbID' => $dna_align_feature_id,
545 'external_db_id' => $external_db_id,
546 'hcoverage' => $hcoverage,
547 'extra_data' => (
548 $extra_data
549 ? $self->get_dumped_data($extra_data)
550 : '' ),
551 'dbname' => $external_db_name,
552 'db_display_name' => $external_display_db_name,
553 'pair_dna_align_feature_id' => $pair_dna_align_feature_id
554 } ) );
555
556 } ## end while ( $sth->fetch() )
557
558 return \@features;
559 } ## end sub _objs_from_sth
560
561 =head2 list_dbIDs
562
563 Arg [1] : none
564 Example : @feature_ids = @{$dna_align_feature_adaptor->list_dbIDs()};
565 Description: Gets an array of internal ids for all dna align features in
566 the current db
567 Arg[1] : <optional> int. not 0 for the ids to be sorted by the seq_region.
568 Returntype : list of ints
569 Exceptions : none
570 Caller : ?
571 Status : Stable
572
573 =cut
574
575 sub list_dbIDs {
576 my ($self, $ordered) = @_;
577
578 return $self->_list_dbIDs("dna_align_feature",undef, $ordered);
579 }
580
581 1;
582
583