Mercurial > repos > mahtabm > ensembl
comparison variant_effect_predictor/Bio/EnsEMBL/DBSQL/PredictionTranscriptAdaptor.pm @ 0:1f6dce3d34e0
Uploaded
author | mahtabm |
---|---|
date | Thu, 11 Apr 2013 02:01:53 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:1f6dce3d34e0 |
---|---|
1 =head1 LICENSE | |
2 | |
3 Copyright (c) 1999-2012 The European Bioinformatics Institute and | |
4 Genome Research Limited. All rights reserved. | |
5 | |
6 This software is distributed under a modified Apache license. | |
7 For license details, please see | |
8 | |
9 http://www.ensembl.org/info/about/code_licence.html | |
10 | |
11 =head1 CONTACT | |
12 | |
13 Please email comments or questions to the public Ensembl | |
14 developers list at <dev@ensembl.org>. | |
15 | |
16 Questions may also be sent to the Ensembl help desk at | |
17 <helpdesk@ensembl.org>. | |
18 | |
19 =cut | |
20 | |
21 =head1 NAME | |
22 | |
23 Bio::EnsEMBL::DBSQL::PredictionTranscriptAdaptor - | |
24 Performs database interaction related to PredictionTranscripts | |
25 | |
26 =head1 SYNOPSIS | |
27 | |
28 # get a prediction transcript adaptor from the database | |
29 $pta = $database_adaptor->get_PredictionTranscriptAdaptor(); | |
30 | |
31 # get a slice on a region of chromosome 1 | |
32 $sa = $database_adaptor->get_SliceAdaptor(); | |
33 | |
34 $slice = $sa->fetch_by_region( 'chromosome', 'x', 100000, 200000 ); | |
35 | |
36 # get all the prediction transcripts from the slice region | |
37 $prediction_transcripts = @{ $pta->fetch_all_by_Slice($slice) }; | |
38 | |
39 =head1 METHODS | |
40 | |
41 =cut | |
42 | |
43 package Bio::EnsEMBL::DBSQL::PredictionTranscriptAdaptor; | |
44 | |
45 use vars qw( @ISA ); | |
46 use strict; | |
47 | |
48 use Bio::EnsEMBL::DBSQL::BaseFeatureAdaptor; | |
49 use Bio::EnsEMBL::DBSQL::AnalysisAdaptor; | |
50 use Bio::EnsEMBL::PredictionTranscript; | |
51 use Bio::EnsEMBL::Utils::Exception qw(deprecate throw warning); | |
52 | |
53 @ISA = qw( Bio::EnsEMBL::DBSQL::BaseFeatureAdaptor ); | |
54 | |
55 | |
56 # _tables | |
57 # | |
58 # Arg [1] : none | |
59 # Example : none | |
60 # Description: Implements abstract superclass method to define the table used | |
61 # to retrieve prediction transcripts from the database | |
62 # Returntype : string | |
63 # Exceptions : none | |
64 # Caller : generic_fetch | |
65 | |
66 sub _tables { | |
67 my $self = shift; | |
68 | |
69 return ['prediction_transcript', 'pt']; | |
70 } | |
71 | |
72 | |
73 # _columns | |
74 | |
75 # Arg [1] : none | |
76 # Example : none | |
77 # Description: Implements abstract superclass method to define the columns | |
78 # retrieved in database queries used to create prediction | |
79 # transcripts. | |
80 # Returntype : list of strings | |
81 # Exceptions : none | |
82 # Caller : generic_fetch | |
83 # | |
84 | |
85 sub _columns { | |
86 my $self = shift; | |
87 | |
88 return qw( pt.prediction_transcript_id | |
89 pt.seq_region_id | |
90 pt.seq_region_start | |
91 pt.seq_region_end | |
92 pt.seq_region_strand | |
93 pt.analysis_id | |
94 pt.display_label); | |
95 } | |
96 | |
97 | |
98 =head2 fetch_by_stable_id | |
99 | |
100 Arg [1] : string $stable_id | |
101 The stable id of the transcript to retrieve | |
102 Example : $trans = $trans_adptr->fetch_by_stable_id('GENSCAN00000001234'); | |
103 Description: Retrieves a prediction transcript via its display_label. | |
104 This method is called fetch_by_stable_id for polymorphism with | |
105 the TranscriptAdaptor. Prediction transcript display_labels are | |
106 not necessarily stable in that the same identifier may be reused | |
107 for a completely different prediction transcript in a subsequent | |
108 database release. | |
109 Returntype : Bio::EnsEMBL::PredictionTranscript | |
110 Caller : general | |
111 Status : Stable | |
112 | |
113 =cut | |
114 | |
115 sub fetch_by_stable_id { | |
116 my $self = shift; | |
117 my $stable_id = shift; | |
118 | |
119 throw('Stable_id argument expected') if(!$stable_id); | |
120 | |
121 my $syn = $self->_tables()->[1]; | |
122 | |
123 $self->bind_param_generic_fetch($stable_id,SQL_VARCHAR); | |
124 my $pts = $self->generic_fetch("$syn.display_label = ?"); | |
125 | |
126 return (@$pts) ? $pts->[0] : undef; | |
127 } | |
128 | |
129 | |
130 | |
131 =head2 fetch_all_by_Slice | |
132 | |
133 Arg [1] : Bio::EnsEMBL::Slice $slice | |
134 The slice to fetch transcripts on. | |
135 Arg [3] : (optional) boolean $load_exons | |
136 if true, exons will be loaded immediately rather than | |
137 lazy loaded later. | |
138 Example : $transcripts = $ | |
139 Description: Overrides superclass method to optionally load exons | |
140 immediately rather than lazy-loading them later. This | |
141 is more efficient when there are a lot of transcripts whose | |
142 exons are going to be used. | |
143 Returntype : reference to list of transcripts | |
144 Exceptions : thrown if exon cannot be placed on transcript slice | |
145 Caller : Slice::get_all_Transcripts | |
146 Status : Stable | |
147 | |
148 =cut | |
149 | |
150 sub fetch_all_by_Slice { | |
151 my $self = shift; | |
152 my $slice = shift; | |
153 my $logic_name = shift; | |
154 my $load_exons = shift; | |
155 | |
156 my $transcripts = $self->SUPER::fetch_all_by_Slice($slice,$logic_name); | |
157 | |
158 # if there are 0 or 1 transcripts still do lazy-loading | |
159 if(!$load_exons || @$transcripts < 2) { | |
160 return $transcripts; | |
161 } | |
162 | |
163 # preload all of the exons now, instead of lazy loading later | |
164 # faster than 1 query per transcript | |
165 | |
166 # get extent of region spanned by transcripts | |
167 my ($min_start, $max_end); | |
168 foreach my $tr (@$transcripts) { | |
169 if(!defined($min_start) || $tr->seq_region_start() < $min_start) { | |
170 $min_start = $tr->seq_region_start(); | |
171 } | |
172 if(!defined($max_end) || $tr->seq_region_end() > $max_end) { | |
173 $max_end = $tr->seq_region_end(); | |
174 } | |
175 } | |
176 | |
177 # mades no sense, the limit for the slice will be defined by the transcripts | |
178 # $min_start += $slice->start() - 1; | |
179 # $max_end += $slice->start() - 1; | |
180 | |
181 my $ext_slice; | |
182 | |
183 if($min_start >= $slice->start() && $max_end <= $slice->end()) { | |
184 $ext_slice = $slice; | |
185 } else { | |
186 my $sa = $self->db()->get_SliceAdaptor(); | |
187 $ext_slice = $sa->fetch_by_region | |
188 ($slice->coord_system->name(), $slice->seq_region_name(), | |
189 $min_start,$max_end, $slice->strand(), $slice->coord_system->version()); | |
190 } | |
191 | |
192 # associate exon identifiers with transcripts | |
193 | |
194 my %tr_hash = map {$_->dbID => $_} @$transcripts; | |
195 | |
196 my $tr_id_str = '(' . join(',', keys %tr_hash) . ')'; | |
197 | |
198 my $sth = $self->prepare | |
199 ("SELECT prediction_transcript_id, prediction_exon_id, exon_rank " . | |
200 "FROM prediction_exon " . | |
201 "WHERE prediction_transcript_id IN $tr_id_str"); | |
202 | |
203 $sth->execute(); | |
204 | |
205 my ($ex_id, $tr_id, $rank); | |
206 $sth->bind_columns(\$tr_id, \$ex_id, \$rank); | |
207 | |
208 my %ex_tr_hash; | |
209 | |
210 while($sth->fetch()) { | |
211 $ex_tr_hash{$ex_id} ||= []; | |
212 push @{$ex_tr_hash{$ex_id}}, [$tr_hash{$tr_id}, $rank]; | |
213 } | |
214 | |
215 $sth->finish(); | |
216 | |
217 my $ea = $self->db()->get_PredictionExonAdaptor(); | |
218 my $exons = $ea->fetch_all_by_Slice($ext_slice); | |
219 | |
220 # move exons onto transcript slice, and add them to transcripts | |
221 foreach my $ex (@$exons) { | |
222 $ex = $ex->transfer($slice) if($slice != $ext_slice); | |
223 | |
224 if(!$ex) { | |
225 throw("Unexpected. PredictionExon could not be transfered onto " . | |
226 "PredictionTranscript slice."); | |
227 } | |
228 | |
229 foreach my $row (@{$ex_tr_hash{$ex->dbID()}}) { | |
230 my ($tr, $rank) = @$row; | |
231 $tr->add_Exon($ex, $rank); | |
232 } | |
233 } | |
234 | |
235 return $transcripts; | |
236 } | |
237 | |
238 | |
239 | |
240 | |
241 | |
242 =head2 _objs_from_sth | |
243 | |
244 Arg [1] : DBI:st $sth | |
245 An executed DBI statement handle | |
246 Arg [2] : (optional) Bio::EnsEMBL::Mapper $mapper | |
247 An mapper to be used to convert contig coordinates | |
248 to assembly coordinates. | |
249 Arg [3] : (optional) Bio::EnsEMBL::Slice $slice | |
250 A slice to map the prediction transcript to. | |
251 Example : $p_transcripts = $self->_objs_from_sth($sth); | |
252 Description: Creates a list of Prediction transcripts from an executed DBI | |
253 statement handle. The columns retrieved via the statement | |
254 handle must be in the same order as the columns defined by the | |
255 _columns method. If the slice argument is provided then the | |
256 the prediction transcripts will be in returned in the coordinate | |
257 system of the $slice argument. Otherwise the prediction | |
258 transcripts will be returned in the RawContig coordinate system. | |
259 Returntype : reference to a list of Bio::EnsEMBL::PredictionTranscripts | |
260 Exceptions : none | |
261 Caller : superclass generic_fetch | |
262 Status : Stable | |
263 | |
264 =cut | |
265 | |
266 sub _objs_from_sth { | |
267 my ($self, $sth, $mapper, $dest_slice) = @_; | |
268 | |
269 # | |
270 # This code is ugly because an attempt has been made to remove as many | |
271 # function calls as possible for speed purposes. Thus many caches and | |
272 # a fair bit of gymnastics is used. | |
273 # | |
274 | |
275 my $sa = $self->db()->get_SliceAdaptor(); | |
276 my $aa = $self->db()->get_AnalysisAdaptor(); | |
277 | |
278 my @ptranscripts; | |
279 my %analysis_hash; | |
280 my %slice_hash; | |
281 my %sr_name_hash; | |
282 my %sr_cs_hash; | |
283 | |
284 my ($prediction_transcript_id, | |
285 $seq_region_id, | |
286 $seq_region_start, | |
287 $seq_region_end, | |
288 $seq_region_strand, | |
289 $analysis_id, | |
290 $display_label); | |
291 | |
292 $sth->bind_columns(\$prediction_transcript_id, | |
293 \$seq_region_id, | |
294 \$seq_region_start, | |
295 \$seq_region_end, | |
296 \$seq_region_strand, | |
297 \$analysis_id, | |
298 \$display_label); | |
299 | |
300 my $asm_cs; | |
301 my $cmp_cs; | |
302 my $asm_cs_vers; | |
303 my $asm_cs_name; | |
304 my $cmp_cs_vers; | |
305 my $cmp_cs_name; | |
306 if($mapper) { | |
307 $asm_cs = $mapper->assembled_CoordSystem(); | |
308 $cmp_cs = $mapper->component_CoordSystem(); | |
309 $asm_cs_name = $asm_cs->name(); | |
310 $asm_cs_vers = $asm_cs->version(); | |
311 $cmp_cs_name = $cmp_cs->name(); | |
312 $cmp_cs_vers = $cmp_cs->version(); | |
313 } | |
314 | |
315 my $dest_slice_start; | |
316 my $dest_slice_end; | |
317 my $dest_slice_strand; | |
318 my $dest_slice_length; | |
319 my $dest_slice_sr_name; | |
320 my $dest_slice_sr_id; | |
321 if($dest_slice) { | |
322 $dest_slice_start = $dest_slice->start(); | |
323 $dest_slice_end = $dest_slice->end(); | |
324 $dest_slice_strand = $dest_slice->strand(); | |
325 $dest_slice_length = $dest_slice->length(); | |
326 $dest_slice_sr_name = $dest_slice->seq_region_name(); | |
327 $dest_slice_sr_id = $dest_slice->get_seq_region_id(); | |
328 } | |
329 | |
330 FEATURE: while($sth->fetch()) { | |
331 | |
332 #get the analysis object | |
333 my $analysis = $analysis_hash{$analysis_id} ||= | |
334 $aa->fetch_by_dbID($analysis_id); | |
335 #need to get the internal_seq_region, if present | |
336 $seq_region_id = $self->get_seq_region_id_internal($seq_region_id); | |
337 my $slice = $slice_hash{"ID:".$seq_region_id}; | |
338 | |
339 if(!$slice) { | |
340 $slice = $sa->fetch_by_seq_region_id($seq_region_id); | |
341 $slice_hash{"ID:".$seq_region_id} = $slice; | |
342 $sr_name_hash{$seq_region_id} = $slice->seq_region_name(); | |
343 $sr_cs_hash{$seq_region_id} = $slice->coord_system(); | |
344 } | |
345 | |
346 my $sr_name = $sr_name_hash{$seq_region_id}; | |
347 my $sr_cs = $sr_cs_hash{$seq_region_id}; | |
348 # | |
349 # remap the feature coordinates to another coord system | |
350 # if a mapper was provided | |
351 # | |
352 if($mapper) { | |
353 | |
354 if (defined $dest_slice && $mapper->isa('Bio::EnsEMBL::ChainedAssemblyMapper') ) { | |
355 ( $seq_region_id, $seq_region_start, | |
356 $seq_region_end, $seq_region_strand ) | |
357 = | |
358 $mapper->map( $sr_name, $seq_region_start, $seq_region_end, | |
359 $seq_region_strand, $sr_cs, 1, $dest_slice); | |
360 | |
361 } else { | |
362 | |
363 ( $seq_region_id, $seq_region_start, | |
364 $seq_region_end, $seq_region_strand ) | |
365 = | |
366 $mapper->fastmap( $sr_name, $seq_region_start, $seq_region_end, | |
367 $seq_region_strand, $sr_cs ); | |
368 } | |
369 | |
370 #skip features that map to gaps or coord system boundaries | |
371 next FEATURE if(!defined($seq_region_id)); | |
372 | |
373 #get a slice in the coord system we just mapped to | |
374 # if($asm_cs == $sr_cs || ($cmp_cs != $sr_cs && $asm_cs->equals($sr_cs))) { | |
375 $slice = $slice_hash{"ID:".$seq_region_id} ||= | |
376 $sa->fetch_by_seq_region_id($seq_region_id); | |
377 # } else { | |
378 # $slice = $slice_hash{"NAME:$sr_name:$asm_cs_name:$asm_cs_vers"} ||= | |
379 # $sa->fetch_by_region($asm_cs_name, $sr_name, undef, undef, undef, | |
380 # $asm_cs_vers); | |
381 # } | |
382 } | |
383 | |
384 # | |
385 # If a destination slice was provided convert the coords | |
386 # If the dest_slice starts at 1 and is foward strand, nothing needs doing | |
387 # | |
388 if($dest_slice) { | |
389 if($dest_slice_start != 1 || $dest_slice_strand != 1) { | |
390 if($dest_slice_strand == 1) { | |
391 $seq_region_start = $seq_region_start - $dest_slice_start + 1; | |
392 $seq_region_end = $seq_region_end - $dest_slice_start + 1; | |
393 } else { | |
394 my $tmp_seq_region_start = $seq_region_start; | |
395 $seq_region_start = $dest_slice_end - $seq_region_end + 1; | |
396 $seq_region_end = $dest_slice_end - $tmp_seq_region_start + 1; | |
397 $seq_region_strand *= -1; | |
398 } | |
399 } | |
400 | |
401 #throw away features off the end of the requested slice | |
402 if($seq_region_end < 1 || $seq_region_start > $dest_slice_length || | |
403 ( $dest_slice_sr_id ne $seq_region_id )) { | |
404 next FEATURE; | |
405 } | |
406 | |
407 | |
408 $slice = $dest_slice; | |
409 } | |
410 | |
411 # Finally, create the new PredictionTranscript. | |
412 push( @ptranscripts, | |
413 $self->_create_feature('Bio::EnsEMBL::PredictionTranscript', { | |
414 '-start' => $seq_region_start, | |
415 '-end' => $seq_region_end, | |
416 '-strand' => $seq_region_strand, | |
417 '-adaptor' => $self, | |
418 '-slice' => $slice, | |
419 '-analysis' => $analysis, | |
420 '-dbID' => $prediction_transcript_id, | |
421 '-display_label' => $display_label | |
422 } ) ); | |
423 | |
424 } | |
425 | |
426 return \@ptranscripts; | |
427 } | |
428 | |
429 | |
430 | |
431 =head2 store | |
432 | |
433 Arg [1] : list of Bio::EnsEMBL::PredictionTranscript @pre_transcripts | |
434 Example : $prediction_transcript_adaptor->store(@pre_transcripts); | |
435 Description: Stores a list of given prediction transcripts in database. | |
436 Puts dbID and Adaptor into each object stored object. | |
437 Returntype : none | |
438 Exceptions : on wrong argument type | |
439 Caller : general | |
440 Status : Stable | |
441 | |
442 =cut | |
443 | |
444 sub store { | |
445 my ( $self, @pre_transcripts ) = @_; | |
446 | |
447 my $ptstore_sth = $self->prepare | |
448 (qq{INSERT INTO prediction_transcript (seq_region_id, seq_region_start, | |
449 seq_region_end, seq_region_strand, | |
450 analysis_id, display_label) | |
451 VALUES( ?, ?, ?, ?, ?, ?)}); | |
452 | |
453 my $ptupdate_sth = $self->prepare | |
454 (qq{UPDATE prediction_transcript SET display_label = ? | |
455 WHERE prediction_transcript_id = ?}); | |
456 | |
457 my $db = $self->db(); | |
458 my $analysis_adaptor = $db->get_AnalysisAdaptor(); | |
459 my $pexon_adaptor = $db->get_PredictionExonAdaptor(); | |
460 | |
461 FEATURE: foreach my $pt (@pre_transcripts) { | |
462 if(!ref($pt) || !$pt->isa('Bio::EnsEMBL::PredictionTranscript')) { | |
463 throw('Expected PredictionTranscript argument not [' . ref($pt).']'); | |
464 } | |
465 | |
466 #skip prediction transcripts that have already been stored | |
467 if($pt->is_stored($db)) { | |
468 warning('Not storing already stored prediction transcript '. $pt->dbID); | |
469 next FEATURE; | |
470 } | |
471 | |
472 #get analysis and store it if it is not in the db | |
473 my $analysis = $pt->analysis(); | |
474 if(!$analysis) { | |
475 throw('Prediction transcript must have analysis to be stored.'); | |
476 } | |
477 if(!$analysis->is_stored($db)) { | |
478 $analysis_adaptor->store($analysis); | |
479 } | |
480 | |
481 #ensure that the transcript coordinates are correct, they may not be, | |
482 #if somebody has done some exon coordinate juggling and not recalculated | |
483 #the transcript coords. | |
484 $pt->recalculate_coordinates(); | |
485 | |
486 my $original = $pt; | |
487 my $seq_region_id; | |
488 ($pt, $seq_region_id) = $self->_pre_store($pt); | |
489 | |
490 #store the prediction transcript | |
491 $ptstore_sth->bind_param(1,$seq_region_id,SQL_INTEGER); | |
492 $ptstore_sth->bind_param(2,$pt->start,SQL_INTEGER); | |
493 $ptstore_sth->bind_param(3,$pt->end,SQL_INTEGER); | |
494 $ptstore_sth->bind_param(4,$pt->strand,SQL_TINYINT); | |
495 $ptstore_sth->bind_param(5,$analysis->dbID,SQL_INTEGER); | |
496 $ptstore_sth->bind_param(6,$pt->display_label,SQL_VARCHAR); | |
497 | |
498 $ptstore_sth->execute(); | |
499 | |
500 my $pt_id = $ptstore_sth->{'mysql_insertid'}; | |
501 $original->dbID($pt_id); | |
502 $original->adaptor($self); | |
503 | |
504 #store the exons | |
505 my $rank = 1; | |
506 foreach my $pexon (@{$original->get_all_Exons}) { | |
507 $pexon_adaptor->store($pexon, $pt_id, $rank++); | |
508 } | |
509 | |
510 # if a display label was not defined autogenerate one | |
511 if(!defined($pt->display_label())) { | |
512 my $zeros = '0' x (11 - length($pt_id)); | |
513 my $display_label = uc($analysis->logic_name()) . $zeros . $pt_id; | |
514 $ptupdate_sth->bind_param(1,$display_label,SQL_VARCHAR); | |
515 $ptupdate_sth->bind_param(2,$pt_id,SQL_INTEGER); | |
516 $ptupdate_sth->execute(); | |
517 $original->display_label($display_label); | |
518 } | |
519 } | |
520 } | |
521 | |
522 | |
523 | |
524 =head2 remove | |
525 | |
526 Arg [1] : Bio::EnsEMBL::PredictionTranscript $pt | |
527 Example : $prediction_transcript_adaptor->remove($pt); | |
528 Description: removes given prediction transcript $pt from database. | |
529 Returntype : none | |
530 Exceptions : throws if argument not a Bio::EnsEMBL::PredictionTranscript | |
531 Caller : general | |
532 Status : Stable | |
533 | |
534 =cut | |
535 | |
536 sub remove { | |
537 my $self = shift; | |
538 my $pre_trans = shift; | |
539 | |
540 if(!ref($pre_trans)||!$pre_trans->isa('Bio::EnsEMBL::PredictionTranscript')){ | |
541 throw('Expected PredictionTranscript argument.'); | |
542 } | |
543 | |
544 if(!$pre_trans->is_stored($self->db())) { | |
545 warning('PredictionTranscript is not stored in this DB - not removing.'); | |
546 return; | |
547 } | |
548 | |
549 #remove all associated prediction exons | |
550 my $pexon_adaptor = $self->db()->get_PredictionExonAdaptor(); | |
551 foreach my $pexon (@{$pre_trans->get_all_Exons}) { | |
552 $pexon_adaptor->remove($pexon); | |
553 } | |
554 | |
555 #remove the prediction transcript | |
556 my $sth = $self->prepare( "DELETE FROM prediction_transcript | |
557 WHERE prediction_transcript_id = ?" ); | |
558 $sth->bind_param(1,$pre_trans->dbID,SQL_INTEGER); | |
559 $sth->execute(); | |
560 | |
561 #unset the adaptor and internal id | |
562 $pre_trans->dbID(undef); | |
563 $pre_trans->adaptor(undef); | |
564 } | |
565 | |
566 | |
567 =head2 list_dbIDs | |
568 | |
569 Arg [1] : none | |
570 Example : @feature_ids = @{$prediction_transcript_adaptor->list_dbIDs()}; | |
571 Description: Gets an array of internal ids for all prediction transcript | |
572 features in the current db | |
573 Arg[1] : <optional> int. not 0 for the ids to be sorted by the seq_region. | |
574 Returntype : list of ints | |
575 Exceptions : none | |
576 Caller : ? | |
577 Status : Stable | |
578 | |
579 =cut | |
580 | |
581 sub list_dbIDs { | |
582 my ($self, $ordered) = @_; | |
583 | |
584 return $self->_list_dbIDs("prediction_transcript", undef, $ordered); | |
585 } | |
586 | |
587 1; |