comparison variant_effect_predictor/Bio/EnsEMBL/DBSQL/PredictionTranscriptAdaptor.pm @ 0:1f6dce3d34e0

Uploaded
author mahtabm
date Thu, 11 Apr 2013 02:01:53 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:1f6dce3d34e0
1 =head1 LICENSE
2
3 Copyright (c) 1999-2012 The European Bioinformatics Institute and
4 Genome Research Limited. All rights reserved.
5
6 This software is distributed under a modified Apache license.
7 For license details, please see
8
9 http://www.ensembl.org/info/about/code_licence.html
10
11 =head1 CONTACT
12
13 Please email comments or questions to the public Ensembl
14 developers list at <dev@ensembl.org>.
15
16 Questions may also be sent to the Ensembl help desk at
17 <helpdesk@ensembl.org>.
18
19 =cut
20
21 =head1 NAME
22
23 Bio::EnsEMBL::DBSQL::PredictionTranscriptAdaptor -
24 Performs database interaction related to PredictionTranscripts
25
26 =head1 SYNOPSIS
27
28 # get a prediction transcript adaptor from the database
29 $pta = $database_adaptor->get_PredictionTranscriptAdaptor();
30
31 # get a slice on a region of chromosome 1
32 $sa = $database_adaptor->get_SliceAdaptor();
33
34 $slice = $sa->fetch_by_region( 'chromosome', 'x', 100000, 200000 );
35
36 # get all the prediction transcripts from the slice region
37 $prediction_transcripts = @{ $pta->fetch_all_by_Slice($slice) };
38
39 =head1 METHODS
40
41 =cut
42
43 package Bio::EnsEMBL::DBSQL::PredictionTranscriptAdaptor;
44
45 use vars qw( @ISA );
46 use strict;
47
48 use Bio::EnsEMBL::DBSQL::BaseFeatureAdaptor;
49 use Bio::EnsEMBL::DBSQL::AnalysisAdaptor;
50 use Bio::EnsEMBL::PredictionTranscript;
51 use Bio::EnsEMBL::Utils::Exception qw(deprecate throw warning);
52
53 @ISA = qw( Bio::EnsEMBL::DBSQL::BaseFeatureAdaptor );
54
55
56 # _tables
57 #
58 # Arg [1] : none
59 # Example : none
60 # Description: Implements abstract superclass method to define the table used
61 # to retrieve prediction transcripts from the database
62 # Returntype : string
63 # Exceptions : none
64 # Caller : generic_fetch
65
66 sub _tables {
67 my $self = shift;
68
69 return ['prediction_transcript', 'pt'];
70 }
71
72
73 # _columns
74
75 # Arg [1] : none
76 # Example : none
77 # Description: Implements abstract superclass method to define the columns
78 # retrieved in database queries used to create prediction
79 # transcripts.
80 # Returntype : list of strings
81 # Exceptions : none
82 # Caller : generic_fetch
83 #
84
85 sub _columns {
86 my $self = shift;
87
88 return qw( pt.prediction_transcript_id
89 pt.seq_region_id
90 pt.seq_region_start
91 pt.seq_region_end
92 pt.seq_region_strand
93 pt.analysis_id
94 pt.display_label);
95 }
96
97
98 =head2 fetch_by_stable_id
99
100 Arg [1] : string $stable_id
101 The stable id of the transcript to retrieve
102 Example : $trans = $trans_adptr->fetch_by_stable_id('GENSCAN00000001234');
103 Description: Retrieves a prediction transcript via its display_label.
104 This method is called fetch_by_stable_id for polymorphism with
105 the TranscriptAdaptor. Prediction transcript display_labels are
106 not necessarily stable in that the same identifier may be reused
107 for a completely different prediction transcript in a subsequent
108 database release.
109 Returntype : Bio::EnsEMBL::PredictionTranscript
110 Caller : general
111 Status : Stable
112
113 =cut
114
115 sub fetch_by_stable_id {
116 my $self = shift;
117 my $stable_id = shift;
118
119 throw('Stable_id argument expected') if(!$stable_id);
120
121 my $syn = $self->_tables()->[1];
122
123 $self->bind_param_generic_fetch($stable_id,SQL_VARCHAR);
124 my $pts = $self->generic_fetch("$syn.display_label = ?");
125
126 return (@$pts) ? $pts->[0] : undef;
127 }
128
129
130
131 =head2 fetch_all_by_Slice
132
133 Arg [1] : Bio::EnsEMBL::Slice $slice
134 The slice to fetch transcripts on.
135 Arg [3] : (optional) boolean $load_exons
136 if true, exons will be loaded immediately rather than
137 lazy loaded later.
138 Example : $transcripts = $
139 Description: Overrides superclass method to optionally load exons
140 immediately rather than lazy-loading them later. This
141 is more efficient when there are a lot of transcripts whose
142 exons are going to be used.
143 Returntype : reference to list of transcripts
144 Exceptions : thrown if exon cannot be placed on transcript slice
145 Caller : Slice::get_all_Transcripts
146 Status : Stable
147
148 =cut
149
150 sub fetch_all_by_Slice {
151 my $self = shift;
152 my $slice = shift;
153 my $logic_name = shift;
154 my $load_exons = shift;
155
156 my $transcripts = $self->SUPER::fetch_all_by_Slice($slice,$logic_name);
157
158 # if there are 0 or 1 transcripts still do lazy-loading
159 if(!$load_exons || @$transcripts < 2) {
160 return $transcripts;
161 }
162
163 # preload all of the exons now, instead of lazy loading later
164 # faster than 1 query per transcript
165
166 # get extent of region spanned by transcripts
167 my ($min_start, $max_end);
168 foreach my $tr (@$transcripts) {
169 if(!defined($min_start) || $tr->seq_region_start() < $min_start) {
170 $min_start = $tr->seq_region_start();
171 }
172 if(!defined($max_end) || $tr->seq_region_end() > $max_end) {
173 $max_end = $tr->seq_region_end();
174 }
175 }
176
177 # mades no sense, the limit for the slice will be defined by the transcripts
178 # $min_start += $slice->start() - 1;
179 # $max_end += $slice->start() - 1;
180
181 my $ext_slice;
182
183 if($min_start >= $slice->start() && $max_end <= $slice->end()) {
184 $ext_slice = $slice;
185 } else {
186 my $sa = $self->db()->get_SliceAdaptor();
187 $ext_slice = $sa->fetch_by_region
188 ($slice->coord_system->name(), $slice->seq_region_name(),
189 $min_start,$max_end, $slice->strand(), $slice->coord_system->version());
190 }
191
192 # associate exon identifiers with transcripts
193
194 my %tr_hash = map {$_->dbID => $_} @$transcripts;
195
196 my $tr_id_str = '(' . join(',', keys %tr_hash) . ')';
197
198 my $sth = $self->prepare
199 ("SELECT prediction_transcript_id, prediction_exon_id, exon_rank " .
200 "FROM prediction_exon " .
201 "WHERE prediction_transcript_id IN $tr_id_str");
202
203 $sth->execute();
204
205 my ($ex_id, $tr_id, $rank);
206 $sth->bind_columns(\$tr_id, \$ex_id, \$rank);
207
208 my %ex_tr_hash;
209
210 while($sth->fetch()) {
211 $ex_tr_hash{$ex_id} ||= [];
212 push @{$ex_tr_hash{$ex_id}}, [$tr_hash{$tr_id}, $rank];
213 }
214
215 $sth->finish();
216
217 my $ea = $self->db()->get_PredictionExonAdaptor();
218 my $exons = $ea->fetch_all_by_Slice($ext_slice);
219
220 # move exons onto transcript slice, and add them to transcripts
221 foreach my $ex (@$exons) {
222 $ex = $ex->transfer($slice) if($slice != $ext_slice);
223
224 if(!$ex) {
225 throw("Unexpected. PredictionExon could not be transfered onto " .
226 "PredictionTranscript slice.");
227 }
228
229 foreach my $row (@{$ex_tr_hash{$ex->dbID()}}) {
230 my ($tr, $rank) = @$row;
231 $tr->add_Exon($ex, $rank);
232 }
233 }
234
235 return $transcripts;
236 }
237
238
239
240
241
242 =head2 _objs_from_sth
243
244 Arg [1] : DBI:st $sth
245 An executed DBI statement handle
246 Arg [2] : (optional) Bio::EnsEMBL::Mapper $mapper
247 An mapper to be used to convert contig coordinates
248 to assembly coordinates.
249 Arg [3] : (optional) Bio::EnsEMBL::Slice $slice
250 A slice to map the prediction transcript to.
251 Example : $p_transcripts = $self->_objs_from_sth($sth);
252 Description: Creates a list of Prediction transcripts from an executed DBI
253 statement handle. The columns retrieved via the statement
254 handle must be in the same order as the columns defined by the
255 _columns method. If the slice argument is provided then the
256 the prediction transcripts will be in returned in the coordinate
257 system of the $slice argument. Otherwise the prediction
258 transcripts will be returned in the RawContig coordinate system.
259 Returntype : reference to a list of Bio::EnsEMBL::PredictionTranscripts
260 Exceptions : none
261 Caller : superclass generic_fetch
262 Status : Stable
263
264 =cut
265
266 sub _objs_from_sth {
267 my ($self, $sth, $mapper, $dest_slice) = @_;
268
269 #
270 # This code is ugly because an attempt has been made to remove as many
271 # function calls as possible for speed purposes. Thus many caches and
272 # a fair bit of gymnastics is used.
273 #
274
275 my $sa = $self->db()->get_SliceAdaptor();
276 my $aa = $self->db()->get_AnalysisAdaptor();
277
278 my @ptranscripts;
279 my %analysis_hash;
280 my %slice_hash;
281 my %sr_name_hash;
282 my %sr_cs_hash;
283
284 my ($prediction_transcript_id,
285 $seq_region_id,
286 $seq_region_start,
287 $seq_region_end,
288 $seq_region_strand,
289 $analysis_id,
290 $display_label);
291
292 $sth->bind_columns(\$prediction_transcript_id,
293 \$seq_region_id,
294 \$seq_region_start,
295 \$seq_region_end,
296 \$seq_region_strand,
297 \$analysis_id,
298 \$display_label);
299
300 my $asm_cs;
301 my $cmp_cs;
302 my $asm_cs_vers;
303 my $asm_cs_name;
304 my $cmp_cs_vers;
305 my $cmp_cs_name;
306 if($mapper) {
307 $asm_cs = $mapper->assembled_CoordSystem();
308 $cmp_cs = $mapper->component_CoordSystem();
309 $asm_cs_name = $asm_cs->name();
310 $asm_cs_vers = $asm_cs->version();
311 $cmp_cs_name = $cmp_cs->name();
312 $cmp_cs_vers = $cmp_cs->version();
313 }
314
315 my $dest_slice_start;
316 my $dest_slice_end;
317 my $dest_slice_strand;
318 my $dest_slice_length;
319 my $dest_slice_sr_name;
320 my $dest_slice_sr_id;
321 if($dest_slice) {
322 $dest_slice_start = $dest_slice->start();
323 $dest_slice_end = $dest_slice->end();
324 $dest_slice_strand = $dest_slice->strand();
325 $dest_slice_length = $dest_slice->length();
326 $dest_slice_sr_name = $dest_slice->seq_region_name();
327 $dest_slice_sr_id = $dest_slice->get_seq_region_id();
328 }
329
330 FEATURE: while($sth->fetch()) {
331
332 #get the analysis object
333 my $analysis = $analysis_hash{$analysis_id} ||=
334 $aa->fetch_by_dbID($analysis_id);
335 #need to get the internal_seq_region, if present
336 $seq_region_id = $self->get_seq_region_id_internal($seq_region_id);
337 my $slice = $slice_hash{"ID:".$seq_region_id};
338
339 if(!$slice) {
340 $slice = $sa->fetch_by_seq_region_id($seq_region_id);
341 $slice_hash{"ID:".$seq_region_id} = $slice;
342 $sr_name_hash{$seq_region_id} = $slice->seq_region_name();
343 $sr_cs_hash{$seq_region_id} = $slice->coord_system();
344 }
345
346 my $sr_name = $sr_name_hash{$seq_region_id};
347 my $sr_cs = $sr_cs_hash{$seq_region_id};
348 #
349 # remap the feature coordinates to another coord system
350 # if a mapper was provided
351 #
352 if($mapper) {
353
354 if (defined $dest_slice && $mapper->isa('Bio::EnsEMBL::ChainedAssemblyMapper') ) {
355 ( $seq_region_id, $seq_region_start,
356 $seq_region_end, $seq_region_strand )
357 =
358 $mapper->map( $sr_name, $seq_region_start, $seq_region_end,
359 $seq_region_strand, $sr_cs, 1, $dest_slice);
360
361 } else {
362
363 ( $seq_region_id, $seq_region_start,
364 $seq_region_end, $seq_region_strand )
365 =
366 $mapper->fastmap( $sr_name, $seq_region_start, $seq_region_end,
367 $seq_region_strand, $sr_cs );
368 }
369
370 #skip features that map to gaps or coord system boundaries
371 next FEATURE if(!defined($seq_region_id));
372
373 #get a slice in the coord system we just mapped to
374 # if($asm_cs == $sr_cs || ($cmp_cs != $sr_cs && $asm_cs->equals($sr_cs))) {
375 $slice = $slice_hash{"ID:".$seq_region_id} ||=
376 $sa->fetch_by_seq_region_id($seq_region_id);
377 # } else {
378 # $slice = $slice_hash{"NAME:$sr_name:$asm_cs_name:$asm_cs_vers"} ||=
379 # $sa->fetch_by_region($asm_cs_name, $sr_name, undef, undef, undef,
380 # $asm_cs_vers);
381 # }
382 }
383
384 #
385 # If a destination slice was provided convert the coords
386 # If the dest_slice starts at 1 and is foward strand, nothing needs doing
387 #
388 if($dest_slice) {
389 if($dest_slice_start != 1 || $dest_slice_strand != 1) {
390 if($dest_slice_strand == 1) {
391 $seq_region_start = $seq_region_start - $dest_slice_start + 1;
392 $seq_region_end = $seq_region_end - $dest_slice_start + 1;
393 } else {
394 my $tmp_seq_region_start = $seq_region_start;
395 $seq_region_start = $dest_slice_end - $seq_region_end + 1;
396 $seq_region_end = $dest_slice_end - $tmp_seq_region_start + 1;
397 $seq_region_strand *= -1;
398 }
399 }
400
401 #throw away features off the end of the requested slice
402 if($seq_region_end < 1 || $seq_region_start > $dest_slice_length ||
403 ( $dest_slice_sr_id ne $seq_region_id )) {
404 next FEATURE;
405 }
406
407
408 $slice = $dest_slice;
409 }
410
411 # Finally, create the new PredictionTranscript.
412 push( @ptranscripts,
413 $self->_create_feature('Bio::EnsEMBL::PredictionTranscript', {
414 '-start' => $seq_region_start,
415 '-end' => $seq_region_end,
416 '-strand' => $seq_region_strand,
417 '-adaptor' => $self,
418 '-slice' => $slice,
419 '-analysis' => $analysis,
420 '-dbID' => $prediction_transcript_id,
421 '-display_label' => $display_label
422 } ) );
423
424 }
425
426 return \@ptranscripts;
427 }
428
429
430
431 =head2 store
432
433 Arg [1] : list of Bio::EnsEMBL::PredictionTranscript @pre_transcripts
434 Example : $prediction_transcript_adaptor->store(@pre_transcripts);
435 Description: Stores a list of given prediction transcripts in database.
436 Puts dbID and Adaptor into each object stored object.
437 Returntype : none
438 Exceptions : on wrong argument type
439 Caller : general
440 Status : Stable
441
442 =cut
443
444 sub store {
445 my ( $self, @pre_transcripts ) = @_;
446
447 my $ptstore_sth = $self->prepare
448 (qq{INSERT INTO prediction_transcript (seq_region_id, seq_region_start,
449 seq_region_end, seq_region_strand,
450 analysis_id, display_label)
451 VALUES( ?, ?, ?, ?, ?, ?)});
452
453 my $ptupdate_sth = $self->prepare
454 (qq{UPDATE prediction_transcript SET display_label = ?
455 WHERE prediction_transcript_id = ?});
456
457 my $db = $self->db();
458 my $analysis_adaptor = $db->get_AnalysisAdaptor();
459 my $pexon_adaptor = $db->get_PredictionExonAdaptor();
460
461 FEATURE: foreach my $pt (@pre_transcripts) {
462 if(!ref($pt) || !$pt->isa('Bio::EnsEMBL::PredictionTranscript')) {
463 throw('Expected PredictionTranscript argument not [' . ref($pt).']');
464 }
465
466 #skip prediction transcripts that have already been stored
467 if($pt->is_stored($db)) {
468 warning('Not storing already stored prediction transcript '. $pt->dbID);
469 next FEATURE;
470 }
471
472 #get analysis and store it if it is not in the db
473 my $analysis = $pt->analysis();
474 if(!$analysis) {
475 throw('Prediction transcript must have analysis to be stored.');
476 }
477 if(!$analysis->is_stored($db)) {
478 $analysis_adaptor->store($analysis);
479 }
480
481 #ensure that the transcript coordinates are correct, they may not be,
482 #if somebody has done some exon coordinate juggling and not recalculated
483 #the transcript coords.
484 $pt->recalculate_coordinates();
485
486 my $original = $pt;
487 my $seq_region_id;
488 ($pt, $seq_region_id) = $self->_pre_store($pt);
489
490 #store the prediction transcript
491 $ptstore_sth->bind_param(1,$seq_region_id,SQL_INTEGER);
492 $ptstore_sth->bind_param(2,$pt->start,SQL_INTEGER);
493 $ptstore_sth->bind_param(3,$pt->end,SQL_INTEGER);
494 $ptstore_sth->bind_param(4,$pt->strand,SQL_TINYINT);
495 $ptstore_sth->bind_param(5,$analysis->dbID,SQL_INTEGER);
496 $ptstore_sth->bind_param(6,$pt->display_label,SQL_VARCHAR);
497
498 $ptstore_sth->execute();
499
500 my $pt_id = $ptstore_sth->{'mysql_insertid'};
501 $original->dbID($pt_id);
502 $original->adaptor($self);
503
504 #store the exons
505 my $rank = 1;
506 foreach my $pexon (@{$original->get_all_Exons}) {
507 $pexon_adaptor->store($pexon, $pt_id, $rank++);
508 }
509
510 # if a display label was not defined autogenerate one
511 if(!defined($pt->display_label())) {
512 my $zeros = '0' x (11 - length($pt_id));
513 my $display_label = uc($analysis->logic_name()) . $zeros . $pt_id;
514 $ptupdate_sth->bind_param(1,$display_label,SQL_VARCHAR);
515 $ptupdate_sth->bind_param(2,$pt_id,SQL_INTEGER);
516 $ptupdate_sth->execute();
517 $original->display_label($display_label);
518 }
519 }
520 }
521
522
523
524 =head2 remove
525
526 Arg [1] : Bio::EnsEMBL::PredictionTranscript $pt
527 Example : $prediction_transcript_adaptor->remove($pt);
528 Description: removes given prediction transcript $pt from database.
529 Returntype : none
530 Exceptions : throws if argument not a Bio::EnsEMBL::PredictionTranscript
531 Caller : general
532 Status : Stable
533
534 =cut
535
536 sub remove {
537 my $self = shift;
538 my $pre_trans = shift;
539
540 if(!ref($pre_trans)||!$pre_trans->isa('Bio::EnsEMBL::PredictionTranscript')){
541 throw('Expected PredictionTranscript argument.');
542 }
543
544 if(!$pre_trans->is_stored($self->db())) {
545 warning('PredictionTranscript is not stored in this DB - not removing.');
546 return;
547 }
548
549 #remove all associated prediction exons
550 my $pexon_adaptor = $self->db()->get_PredictionExonAdaptor();
551 foreach my $pexon (@{$pre_trans->get_all_Exons}) {
552 $pexon_adaptor->remove($pexon);
553 }
554
555 #remove the prediction transcript
556 my $sth = $self->prepare( "DELETE FROM prediction_transcript
557 WHERE prediction_transcript_id = ?" );
558 $sth->bind_param(1,$pre_trans->dbID,SQL_INTEGER);
559 $sth->execute();
560
561 #unset the adaptor and internal id
562 $pre_trans->dbID(undef);
563 $pre_trans->adaptor(undef);
564 }
565
566
567 =head2 list_dbIDs
568
569 Arg [1] : none
570 Example : @feature_ids = @{$prediction_transcript_adaptor->list_dbIDs()};
571 Description: Gets an array of internal ids for all prediction transcript
572 features in the current db
573 Arg[1] : <optional> int. not 0 for the ids to be sorted by the seq_region.
574 Returntype : list of ints
575 Exceptions : none
576 Caller : ?
577 Status : Stable
578
579 =cut
580
581 sub list_dbIDs {
582 my ($self, $ordered) = @_;
583
584 return $self->_list_dbIDs("prediction_transcript", undef, $ordered);
585 }
586
587 1;