comparison variant_effect_predictor/Bio/EnsEMBL/DBSQL/TranscriptAdaptor.pm @ 0:1f6dce3d34e0

Uploaded
author mahtabm
date Thu, 11 Apr 2013 02:01:53 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:1f6dce3d34e0
1 =head1 LICENSE
2
3 Copyright (c) 1999-2012 The European Bioinformatics Institute and
4 Genome Research Limited. All rights reserved.
5
6 This software is distributed under a modified Apache license.
7 For license details, please see
8
9 http://www.ensembl.org/info/about/code_licence.html
10
11 =head1 CONTACT
12
13 Please email comments or questions to the public Ensembl
14 developers list at <dev@ensembl.org>.
15
16 Questions may also be sent to the Ensembl help desk at
17 <helpdesk@ensembl.org>.
18
19 =cut
20
21 =head1 NAME
22
23 Bio::EnsEMBL::DBSQL::TranscriptAdaptor - An adaptor which performs database
24 interaction relating to the storage and retrieval of Transcripts
25
26 =head1 SYNOPSIS
27
28 use Bio::EnsEMBL::Registry;
29
30 Bio::EnsEMBL::Registry->load_registry_from_db(
31 -host => 'ensembldb.ensembl.org',
32 -user => 'anonymous'
33 );
34
35 $transcript_adaptor =
36 Bio::EnsEMBL::Registry->get_adaptor( 'Human', 'Core',
37 'Transcript' );
38
39 $transcript = $transcript_adaptor->fetch_by_dbID(1234);
40
41 $transcript =
42 $transcript_adaptor->fetch_by_stable_id('ENST00000201961');
43
44 $slice =
45 $slice_adaptor->fetch_by_region( 'Chromosome', '3', 1, 1000000 );
46 @transcripts = @{ $transcript_adaptor->fetch_all_by_Slice($slice) };
47
48 ($transcript) =
49 @{ $transcript_adaptor->fetch_all_by_external_name('NP_065811.1') };
50
51 =head1 DESCRIPTION
52
53 This adaptor provides a means to retrieve and store information related
54 to Transcripts. Primarily this involves the retrieval or storage of
55 Bio::EnsEMBL::Transcript objects from a database.
56
57 See Bio::EnsEMBL::Transcript for details of the Transcript class.
58
59 =cut
60
61 package Bio::EnsEMBL::DBSQL::TranscriptAdaptor;
62
63 use strict;
64
65 use Bio::EnsEMBL::DBSQL::BaseFeatureAdaptor;
66 use Bio::EnsEMBL::Gene;
67 use Bio::EnsEMBL::Exon;
68 use Bio::EnsEMBL::Transcript;
69 use Bio::EnsEMBL::Translation;
70 use Bio::EnsEMBL::Utils::Exception qw( deprecate throw warning );
71
72 use vars qw(@ISA);
73 @ISA = qw( Bio::EnsEMBL::DBSQL::BaseFeatureAdaptor );
74
75
76 # _tables
77 #
78 # Description: PROTECTED implementation of superclass abstract method.
79 # Returns the names, aliases of the tables to use for queries.
80 # Returntype : list of listrefs of strings
81 # Exceptions : none
82 # Caller : internal
83 # Status : Stable
84
85 sub _tables {
86 return (
87 [ 'transcript', 't' ],
88 [ 'xref', 'x' ],
89 [ 'external_db', 'exdb' ] );
90 }
91
92
93 #_columns
94 #
95 # Description: PROTECTED implementation of superclass abstract method.
96 # Returns a list of columns to use for queries.
97 # Returntype : list of strings
98 # Exceptions : none
99 # Caller : internal
100 # Status : Stable
101
102 sub _columns {
103 my ($self) = @_;
104
105 my $created_date =
106 $self->db()->dbc()->from_date_to_seconds("created_date");
107 my $modified_date =
108 $self->db()->dbc()->from_date_to_seconds("modified_date");
109
110 return (
111 't.transcript_id', 't.seq_region_id',
112 't.seq_region_start', 't.seq_region_end',
113 't.seq_region_strand', 't.analysis_id',
114 't.gene_id', 't.is_current',
115 't.stable_id', 't.version',
116 $created_date, $modified_date,
117 't.description', 't.biotype',
118 't.status', 'exdb.db_name',
119 'exdb.status', 'exdb.db_display_name',
120 'x.xref_id', 'x.display_label',
121 'x.dbprimary_acc', 'x.version',
122 'x.description', 'x.info_type',
123 'x.info_text'
124 );
125 }
126
127 sub _left_join {
128 return (
129 [ 'xref', "x.xref_id = t.display_xref_id" ],
130 [ 'external_db', "exdb.external_db_id = x.external_db_id" ]
131 );
132 }
133
134
135 =head2 fetch_by_stable_id
136
137 Arg [1] : String $stable_id
138 The stable id of the transcript to retrieve
139 Example : my $tr = $tr_adaptor->fetch_by_stable_id('ENST00000309301');
140 Description: Retrieves a transcript via its stable id.
141 Returntype : Bio::EnsEMBL::Transcript
142 Exceptions : none
143 Caller : general
144 Status : Stable
145
146 =cut
147
148 sub fetch_by_stable_id {
149 my ($self, $stable_id) = @_;
150
151 my $constraint = "t.stable_id = ? AND t.is_current = 1";
152
153 $self->bind_param_generic_fetch($stable_id,SQL_VARCHAR);
154
155 my ($transcript) = @{ $self->generic_fetch($constraint) };
156
157 return $transcript;
158 }
159
160
161 sub fetch_all {
162 my ($self) = @_;
163
164 my $constraint = 't.biotype != "LRG_gene" and t.is_current = 1';
165 my @trans = @{ $self->generic_fetch($constraint) };
166 return \@trans ;
167 }
168
169 =head2 fetch_all_versions_by_stable_id
170
171 Arg [1] : String $stable_id
172 The stable ID of the transcript to retrieve
173 Example : my $tr = $tr_adaptor->fetch_all_version_by_stable_id
174 ('ENST00000309301');
175 Description : Similar to fetch_by_stable_id, but retrieves all versions of a
176 transcript stored in the database.
177 Returntype : listref of Bio::EnsEMBL::Transcript objects
178 Exceptions : if we cant get the gene in given coord system
179 Caller : general
180 Status : At Risk
181
182 =cut
183
184 sub fetch_all_versions_by_stable_id {
185 my ($self, $stable_id) = @_;
186
187 my $constraint = "t.stable_id = ?";
188
189 $self->bind_param_generic_fetch($stable_id,SQL_VARCHAR);
190
191 return $self->generic_fetch($constraint);
192 }
193
194
195 =head2 fetch_by_translation_stable_id
196
197 Arg [1] : String $transl_stable_id
198 The stable identifier of the translation of the transcript to
199 retrieve
200 Example : my $tr = $tr_adaptor->fetch_by_translation_stable_id
201 ('ENSP00000311007');
202 Description: Retrieves a Transcript object using the stable identifier of
203 its translation.
204 Returntype : Bio::EnsEMBL::Transcript
205 Exceptions : none
206 Caller : general
207 Status : Stable
208
209 =cut
210
211 sub fetch_by_translation_stable_id {
212 my ($self, $transl_stable_id ) = @_;
213
214 my $sth = $self->prepare(qq(
215 SELECT t.transcript_id
216 FROM translation tl,
217 transcript t
218 WHERE tl.stable_id = ?
219 AND tl.transcript_id = t.transcript_id
220 AND t.is_current = 1
221 ));
222
223 $sth->bind_param(1, $transl_stable_id, SQL_VARCHAR);
224 $sth->execute();
225
226 my ($id) = $sth->fetchrow_array;
227 $sth->finish;
228 if ($id){
229 return $self->fetch_by_dbID($id);
230 } else {
231 return undef;
232 }
233 }
234
235
236 =head2 fetch_by_translation_id
237
238 Arg [1] : Int $id
239 The internal identifier of the translation whose transcript
240 is to be retrieved
241 Example : my $tr = $tr_adaptor->fetch_by_translation_id($transl->dbID);
242 Description: Given the internal identifier of a translation this method
243 retrieves the transcript associated with that translation.
244 If the transcript cannot be found undef is returned instead.
245 Returntype : Bio::EnsEMBL::Transcript or undef
246 Exceptions : none
247 Caller : general
248 Status : Stable
249
250 =cut
251
252 sub fetch_by_translation_id {
253 my ( $self, $p_dbID ) = @_;
254
255 if ( !defined($p_dbID) ) {
256 throw("dbID argument is required");
257 }
258
259 my $sth =
260 $self->prepare( "SELECT transcript_id "
261 . "FROM translation "
262 . "WHERE translation_id = ?" );
263
264 $sth->bind_param( 1, $p_dbID, SQL_INTEGER );
265 $sth->execute();
266
267 my ($dbID) = $sth->fetchrow_array();
268 $sth->finish();
269
270 if ($dbID) {
271 return $self->fetch_by_dbID($dbID);
272 }
273
274 return undef;
275 }
276
277 =head2 fetch_all_by_Gene
278
279 Arg [1] : Bio::EnsEMBL::Gene $gene
280 The gene to fetch transcripts of
281 Example : my $gene = $gene_adaptor->fetch_by_stable_id('ENSG0000123');
282 my @transcripts = { $tr_adaptor->fetch_all_by_Gene($gene) };
283 Description: Retrieves Transcript objects for given gene. Puts Genes slice
284 in each Transcript.
285 Returntype : Listref of Bio::EnsEMBL::Transcript objects
286 Exceptions : none
287 Caller : Gene->get_all_Transcripts()
288 Status : Stable
289
290 =cut
291
292 sub fetch_all_by_Gene {
293 my ( $self, $gene ) = @_;
294
295 my $constraint = "t.gene_id = " . $gene->dbID();
296
297 # Use the fetch_all_by_Slice_constraint method because it handles the
298 # difficult Haps/PARs and coordinate remapping.
299
300 # Get a slice that entirely overlaps the gene. This is because we
301 # want all transcripts to be retrieved, not just ones overlapping
302 # the slice the gene is on (the gene may only partially overlap the
303 # slice). For speed reasons, only use a different slice if necessary
304 # though.
305
306 my $gslice = $gene->slice();
307
308 if ( !defined($gslice) ) {
309 throw("Gene must have attached slice to retrieve transcripts.");
310 }
311
312 my $slice;
313
314 if ( $gene->start() < 1 || $gene->end() > $gslice->length() ) {
315 if ( $gslice->is_circular() ) {
316 $slice = $gslice;
317 } else {
318 $slice = $self->db->get_SliceAdaptor->fetch_by_Feature($gene);
319 }
320 } else {
321 $slice = $gslice;
322 }
323
324 my $transcripts =
325 $self->fetch_all_by_Slice_constraint( $slice, $constraint );
326
327 if ( $slice != $gslice ) {
328 my @out;
329 foreach my $tr ( @{$transcripts} ) {
330 push( @out, $tr->transfer($gslice) );
331 }
332 $transcripts = \@out;
333 }
334
335 my $canonical_t = $gene->canonical_transcript();
336
337 foreach my $t ( @{$transcripts} ) {
338 if ( $t->equals($canonical_t) ) {
339 $t->is_canonical(1);
340 last;
341 }
342 }
343
344 return $transcripts;
345 } ## end sub fetch_all_by_Gene
346
347
348 =head2 fetch_all_by_Slice
349
350 Arg [1] : Bio::EnsEMBL::Slice $slice
351 The slice to fetch transcripts on
352 Arg [2] : (optional) Boolean $load_exons
353 If true, exons will be loaded immediately rather than
354 lazy loaded later
355 Arg [3] : (optional) String $logic_name
356 The logic name of the type of features to obtain
357 ARG [4] : (optional) String $constraint
358 An extra contraint.
359 Example : my @transcripts = @{ $tr_adaptor->fetch_all_by_Slice($slice) };
360 Description: Overrides superclass method to optionally load exons
361 immediately rather than lazy-loading them later. This
362 is more efficient when there are a lot of transcripts whose
363 exons are going to be used.
364 Returntype : Listref of Bio::EnsEMBL::Transcript objects
365 Exceptions : thrown if exon cannot be placed on transcript slice
366 Caller : Slice::get_all_Transcripts
367 Status : Stable
368
369 =cut
370
371 sub fetch_all_by_Slice {
372 my ( $self, $slice, $load_exons, $logic_name, $constraint ) = @_;
373
374 my $transcripts;
375 if ( defined($constraint) && $constraint ne '' ) {
376 $transcripts = $self->SUPER::fetch_all_by_Slice_constraint( $slice,
377 't.is_current = 1 AND ' . $constraint, $logic_name );
378 } else {
379 $transcripts = $self->SUPER::fetch_all_by_Slice_constraint( $slice,
380 't.is_current = 1', $logic_name );
381 }
382
383 # if there are 0 or 1 transcripts still do lazy-loading
384 if ( !$load_exons || @$transcripts < 2 ) {
385 return $transcripts;
386 }
387
388 # preload all of the exons now, instead of lazy loading later
389 # faster than 1 query per transcript
390
391 # first check if the exons are already preloaded
392 # @todo FIXME: Should test all exons.
393 if ( exists( $transcripts->[0]->{'_trans_exon_array'} ) ) {
394 return $transcripts;
395 }
396
397 # get extent of region spanned by transcripts
398 my ( $min_start, $max_end );
399 foreach my $tr (@$transcripts) {
400 if ( !defined($min_start) || $tr->seq_region_start() < $min_start )
401 {
402 $min_start = $tr->seq_region_start();
403 }
404 if ( !defined($max_end) || $tr->seq_region_end() > $max_end ) {
405 $max_end = $tr->seq_region_end();
406 }
407 }
408
409 my $ext_slice;
410
411 if ( $min_start >= $slice->start() && $max_end <= $slice->end() ) {
412 $ext_slice = $slice;
413 } else {
414 my $sa = $self->db()->get_SliceAdaptor();
415 $ext_slice = $sa->fetch_by_region(
416 $slice->coord_system->name(), $slice->seq_region_name(),
417 $min_start, $max_end,
418 $slice->strand(), $slice->coord_system->version() );
419 }
420
421 # associate exon identifiers with transcripts
422
423 my %tr_hash = map { $_->dbID => $_ } @{$transcripts};
424
425 my $tr_id_str = join( ',', keys(%tr_hash) );
426
427 my $sth =
428 $self->prepare( "SELECT transcript_id, exon_id, rank "
429 . "FROM exon_transcript "
430 . "WHERE transcript_id IN ($tr_id_str)" );
431
432 $sth->execute();
433
434 my ( $tr_id, $ex_id, $rank );
435 $sth->bind_columns( \( $tr_id, $ex_id, $rank ) );
436
437 my %ex_tr_hash;
438
439 while ( $sth->fetch() ) {
440 $ex_tr_hash{$ex_id} ||= [];
441 push( @{ $ex_tr_hash{$ex_id} }, [ $tr_hash{$tr_id}, $rank ] );
442 }
443
444 my $ea = $self->db()->get_ExonAdaptor();
445 my $exons = $ea->fetch_all_by_Slice_constraint(
446 $ext_slice,
447 sprintf( "e.exon_id IN (%s)",
448 join( ',', sort { $a <=> $b } keys(%ex_tr_hash) ) ) );
449
450 # move exons onto transcript slice, and add them to transcripts
451 foreach my $ex ( @{$exons} ) {
452 my $new_ex;
453 if ( $slice != $ext_slice ) {
454 $new_ex = $ex->transfer($slice);
455 if ( !defined($new_ex) ) {
456 throw("Unexpected. "
457 . "Exon could not be transfered onto Transcript slice." );
458 }
459 } else {
460 $new_ex = $ex;
461 }
462
463 foreach my $row ( @{ $ex_tr_hash{ $new_ex->dbID() } } ) {
464 my ( $tr, $rank ) = @{$row};
465 $tr->add_Exon( $new_ex, $rank );
466 }
467 }
468
469 my $tla = $self->db()->get_TranslationAdaptor();
470
471 # load all of the translations at once
472 $tla->fetch_all_by_Transcript_list($transcripts);
473
474 return $transcripts;
475 } ## end sub fetch_all_by_Slice
476
477
478 =head2 fetch_all_by_external_name
479
480 Arg [1] : String $external_name
481 An external identifier of the transcript to be obtained
482 Arg [2] : (optional) String $external_db_name
483 The name of the external database from which the
484 identifier originates.
485 Arg [3] : Boolean override. Force SQL regex matching for users
486 who really do want to find all 'NM%'
487 Example : my @transcripts =
488 @{ $tr_adaptor->fetch_all_by_external_name( 'NP_065811.1') };
489 my @more_transcripts =
490 @{$tr_adaptor->fetch_all_by_external_name( 'NP_0658__._')};
491 Description: Retrieves all transcripts which are associated with
492 an external identifier such as a GO term, Swissprot
493 identifer, etc. Usually there will only be a single
494 transcript returned in the list reference, but not
495 always. Transcripts are returned in their native
496 coordinate system, i.e. the coordinate system in which
497 they are stored in the database. If they are required
498 in another coordinate system the Transcript::transfer or
499 Transcript::transform method can be used to convert them.
500 If no transcripts with the external identifier are found,
501 a reference to an empty list is returned.
502 SQL wildcards % and _ are supported in the $external_name
503 but their use is somewhat restricted for performance reasons.
504 Users that really do want % and _ in the first three characters
505 should use argument 3 to prevent optimisations
506 Returntype : listref of Bio::EnsEMBL::Transcript
507 Exceptions : none
508 Caller : general
509 Status : Stable
510
511 =cut
512
513 sub fetch_all_by_external_name {
514 my ( $self, $external_name, $external_db_name, $override) = @_;
515
516 my $entryAdaptor = $self->db->get_DBEntryAdaptor();
517
518 my @ids =
519 $entryAdaptor->list_transcript_ids_by_extids( $external_name,
520 $external_db_name, $override );
521
522 return $self->fetch_all_by_dbID_list( \@ids );
523 }
524
525 =head2 fetch_all_by_GOTerm
526
527 Arg [1] : Bio::EnsEMBL::OntologyTerm
528 The GO term for which transcripts should be fetched.
529
530 Example: @transcripts = @{
531 $transcript_adaptor->fetch_all_by_GOTerm(
532 $go_adaptor->fetch_by_accession('GO:0030326') ) };
533
534 Description : Retrieves a list of transcripts that are
535 associated with the given GO term, or with any of
536 its descendent GO terms. The transcripts returned
537 are in their native coordinate system, i.e. in
538 the coordinate system in which they are stored
539 in the database. If another coordinate system
540 is required then the Transcript::transfer or
541 Transcript::transform method can be used.
542
543 Return type : listref of Bio::EnsEMBL::Transcript
544 Exceptions : Throws of argument is not a GO term
545 Caller : general
546 Status : Stable
547
548 =cut
549
550 sub fetch_all_by_GOTerm {
551 my ( $self, $term ) = @_;
552
553 assert_ref( $term, 'Bio::EnsEMBL::OntologyTerm' );
554 if ( $term->ontology() ne 'GO' ) {
555 throw('Argument is not a GO term');
556 }
557
558 my $entryAdaptor = $self->db->get_DBEntryAdaptor();
559
560 my %unique_dbIDs;
561 foreach my $accession ( map { $_->accession() }
562 ( $term, @{ $term->descendants() } ) )
563 {
564 my @ids =
565 $entryAdaptor->list_transcript_ids_by_extids( $accession, 'GO' );
566 foreach my $dbID (@ids) { $unique_dbIDs{$dbID} = 1 }
567 }
568
569 my @result = @{
570 $self->fetch_all_by_dbID_list(
571 [ sort { $a <=> $b } keys(%unique_dbIDs) ]
572 ) };
573
574 return \@result;
575 } ## end sub fetch_all_by_GOTerm
576
577 =head2 fetch_all_by_GOTerm_accession
578
579 Arg [1] : String
580 The GO term accession for which genes should be
581 fetched.
582
583 Example :
584
585 @genes =
586 @{ $gene_adaptor->fetch_all_by_GOTerm_accession(
587 'GO:0030326') };
588
589 Description : Retrieves a list of genes that are associated with
590 the given GO term, or with any of its descendent
591 GO terms. The genes returned are in their native
592 coordinate system, i.e. in the coordinate system
593 in which they are stored in the database. If
594 another coordinate system is required then the
595 Gene::transfer or Gene::transform method can be
596 used.
597
598 Return type : listref of Bio::EnsEMBL::Gene
599 Exceptions : Throws of argument is not a GO term accession
600 Caller : general
601 Status : Stable
602
603 =cut
604
605 sub fetch_all_by_GOTerm_accession {
606 my ( $self, $accession ) = @_;
607
608 if ( $accession !~ /^GO:/ ) {
609 throw('Argument is not a GO term accession');
610 }
611
612 my $goAdaptor =
613 Bio::EnsEMBL::Registry->get_adaptor( 'Multi', 'Ontology',
614 'OntologyTerm' );
615
616 my $term = $goAdaptor->fetch_by_accession($accession);
617
618 return $self->fetch_all_by_GOTerm($term);
619 }
620
621 =head2 fetch_by_display_label
622
623 Arg [1] : String $label - display label of transcript to fetch
624 Example : my $tr = $tr_adaptor->fetch_by_display_label("BRCA2");
625 Description: Returns the transcript which has the given display label or
626 undef if there is none. If there are more than 1, only the first
627 is reported.
628 Returntype : Bio::EnsEMBL::Transcript
629 Exceptions : none
630 Caller : general
631 Status : Stable
632
633 =cut
634
635 sub fetch_by_display_label {
636 my $self = shift;
637 my $label = shift;
638
639 my $constraint = "x.display_label = ? AND t.is_current = 1";
640
641 $self->bind_param_generic_fetch($label,SQL_VARCHAR);
642
643 my ($transcript) = @{ $self->generic_fetch($constraint) };
644
645 return $transcript;
646 }
647
648
649 =head2 fetch_all_by_exon_stable_id
650
651 Arg [1] : String $stable_id
652 The stable id of an exon in a transcript
653 Example : my $tr = $tr_adaptor->fetch_all_by_exon_stable_id
654 ('ENSE00000309301');
655 Description: Retrieves a list of transcripts via an exon stable id.
656 Returntype : Listref of Bio::EnsEMBL::Transcript objects
657 Exceptions : none
658 Caller : general
659 Status : Stable
660
661 =cut
662
663 sub fetch_all_by_exon_stable_id {
664 my ($self, $stable_id) = @_;
665
666 my @trans ;
667
668 my $sth = $self->prepare(qq(
669 SELECT t.transcript_id
670 FROM exon_transcript et, exon e, transcript t
671 WHERE e.exon_id = et.exon_id
672 AND et.transcript_id = t.transcript_id
673 AND e.stable_id = ?
674 AND t.is_current = 1
675 ));
676
677 $sth->bind_param(1, $stable_id, SQL_VARCHAR);
678 $sth->execute();
679
680 while( my $id = $sth->fetchrow_array ) {
681 my $transcript = $self->fetch_by_dbID($id);
682 push(@trans, $transcript) if $transcript;
683 }
684
685 if (!@trans) {
686 return undef;
687 }
688
689 return \@trans;
690 }
691
692 =head2 fetch_all_by_biotype
693
694 Arg [1] : String $biotype
695 listref of $biotypes
696 The biotype of the gene to retrieve. You can also have a reference
697 to a list of biotypes in the event of needing several.
698 Example : $transcript = $transcript_adaptor->fetch_all_by_biotype('pseudogene');
699 $transcript = $transcript_adaptor->fetch_all_by_biotype(['protein_coding','ambiguous_orf']);
700 Description: Retrieves an array reference of transcript objects from the
701 database via its biotype or biotypes.
702 The transcript will be retrieved in its native coordinate system
703 (i.e. in the coordinate system it is stored in the database).
704 It may be converted to a different coordinate system through a
705 call to transform() or transfer(). If the transcript is not found
706 undef is returned instead.
707 Returntype : listref of Bio::EnsEMBL::Transcript
708 Exceptions : if we cant get the transcript in given coord system
709 Caller : general
710 Status : Stable
711
712 =cut
713
714 sub fetch_all_by_biotype {
715 my ($self, $biotype) = @_;
716
717 if (!defined $biotype){
718 throw("Biotype or listref of biotypes expected");
719 }
720 my $constraint;
721 if (ref($biotype) eq 'ARRAY'){
722 $constraint = "t.biotype IN (";
723 foreach my $b (@{$biotype}){
724 $constraint .= "?,";
725 $self->bind_param_generic_fetch($b,SQL_VARCHAR);
726 }
727 chop($constraint); #remove last , from expression
728 $constraint .= ") and t.is_current = 1";
729
730 }
731 else{
732 $constraint = "t.biotype = ? and t.is_current = 1";
733 $self->bind_param_generic_fetch($biotype,SQL_VARCHAR);
734 }
735 my @transcripts = @{ $self->generic_fetch($constraint) };
736 return \@transcripts ;
737 }
738
739
740 =head2 store
741
742 Arg [1] : Bio::EnsEMBL::Transcript $transcript
743 The transcript to be written to the database
744 Arg [2] : Int $gene_dbID
745 The identifier of the gene that this transcript is associated
746 with
747 Arg [3] : DEPRECATED (optional) Int $analysis_id
748 The analysis_id to use when storing this gene. This is for
749 backward compatibility only and used to fall back to the gene
750 analysis_id if no analysis object is attached to the transcript
751 (which you should do for new code).
752 Example : $transID = $tr_adaptor->store($transcript, $gene->dbID);
753 Description: Stores a transcript in the database and returns the new
754 internal identifier for the stored transcript.
755 Returntype : Int
756 Exceptions : none
757 Caller : general
758 Status : Stable
759
760 =cut
761
762 sub store {
763 my ( $self, $transcript, $gene_dbID, $analysis_id ) = @_;
764
765 if ( !ref($transcript)
766 || !$transcript->isa('Bio::EnsEMBL::Transcript') )
767 {
768 throw("$transcript is not a EnsEMBL transcript - not storing");
769 }
770
771 my $db = $self->db();
772
773 if ( $transcript->is_stored($db) ) {
774 return $transcript->dbID();
775 }
776
777 # Force lazy-loading of exons and ensure coords are correct.
778 $transcript->recalculate_coordinates();
779
780 my $is_current = ( defined( $transcript->is_current() )
781 ? $transcript->is_current()
782 : 1 );
783
784 # store analysis
785 my $analysis = $transcript->analysis();
786 my $new_analysis_id;
787
788 if ($analysis) {
789 if ( $analysis->is_stored($db) ) {
790 $new_analysis_id = $analysis->dbID;
791 } else {
792 $new_analysis_id = $db->get_AnalysisAdaptor->store($analysis);
793 }
794 } elsif ($analysis_id) {
795 # Fall back to analysis passed in (usually from gene) if analysis
796 # wasn't set explicitely for the transcript. This is deprectated
797 # though.
798 warning( "You should explicitely attach "
799 . "an analysis object to the Transcript. "
800 . "Will fall back to Gene analysis, "
801 . "but this behaviour is deprecated." );
802 $new_analysis_id = $analysis_id;
803 } else {
804 throw("Need an analysis_id to store the Transcript.");
805 }
806
807 #
808 # Store exons - this needs to be done before the possible transfer
809 # of the transcript to another slice (in _prestore()). Transfering
810 # results in copies being made of the exons and we need to preserve
811 # the object identity of the exons so that they are not stored twice
812 # by different transcripts.
813 #
814 my $exons = $transcript->get_all_Exons();
815 my $exonAdaptor = $db->get_ExonAdaptor();
816 foreach my $exon ( @{$exons} ) {
817 $exonAdaptor->store($exon);
818 }
819
820 my $original_translation = $transcript->translation();
821 my $original = $transcript;
822 my $seq_region_id;
823 ( $transcript, $seq_region_id ) = $self->_pre_store($transcript);
824
825 # First store the transcript without a display xref. The display xref
826 # needs to be set after xrefs are stored which needs to happen after
827 # transcript is stored.
828
829 #
830 # Store transcript
831 #
832 my $store_transcript_sql = qq(
833 INSERT INTO transcript
834 SET gene_id = ?,
835 analysis_id = ?,
836 seq_region_id = ?,
837 seq_region_start = ?,
838 seq_region_end = ?,
839 seq_region_strand = ?,
840 biotype = ?,
841 status = ?,
842 description = ?,
843 is_current = ?,
844 canonical_translation_id = ?
845 );
846
847 if ( defined( $transcript->stable_id() ) ) {
848
849 my $created = $self->db->dbc->from_seconds_to_date($transcript->created_date());
850 my $modified = $self->db->dbc->from_seconds_to_date($transcript->modified_date());
851 $store_transcript_sql .= ", stable_id = ?, version = ?, created_date = " . $created . " , modified_date = " . $modified;
852
853 }
854
855 my $tst = $self->prepare($store_transcript_sql);
856 $tst->bind_param( 1, $gene_dbID, SQL_INTEGER );
857 $tst->bind_param( 2, $new_analysis_id, SQL_INTEGER );
858 $tst->bind_param( 3, $seq_region_id, SQL_INTEGER );
859 $tst->bind_param( 4, $transcript->start(), SQL_INTEGER );
860 $tst->bind_param( 5, $transcript->end(), SQL_INTEGER );
861 $tst->bind_param( 6, $transcript->strand(), SQL_TINYINT );
862 $tst->bind_param( 7, $transcript->biotype(), SQL_VARCHAR );
863 $tst->bind_param( 8, $transcript->status(), SQL_VARCHAR );
864 $tst->bind_param( 9, $transcript->description(), SQL_LONGVARCHAR );
865 $tst->bind_param( 10, $is_current, SQL_TINYINT );
866
867 # If the transcript has a translation, this is updated later:
868 $tst->bind_param( 11, undef, SQL_INTEGER );
869
870 if ( defined( $transcript->stable_id() ) ) {
871
872 $tst->bind_param( 12, $transcript->stable_id(), SQL_VARCHAR );
873 my $version = ($transcript->version()) ? $transcript->version() : 1;
874 $tst->bind_param( 13, $version, SQL_INTEGER );
875 }
876
877
878 $tst->execute();
879 $tst->finish();
880
881 my $transc_dbID = $tst->{'mysql_insertid'};
882
883 #
884 # Store translation
885 #
886
887 my $alt_translations =
888 $transcript->get_all_alternative_translations();
889 my $translation = $transcript->translation();
890
891 if ( defined($translation) ) {
892 # Make sure that the start and end exon are set correctly.
893 my $start_exon = $translation->start_Exon();
894 my $end_exon = $translation->end_Exon();
895
896 if ( !defined($start_exon) ) {
897 throw("Translation does not define a start exon.");
898 }
899
900 if ( !defined($end_exon) ) {
901 throw("Translation does not defined an end exon.");
902 }
903
904 # If the dbID is not set, this means the exon must have been a
905 # different object in memory than the the exons of the transcript.
906 # Try to find the matching exon in all of the exons we just stored.
907 if ( !defined( $start_exon->dbID() ) ) {
908 my $key = $start_exon->hashkey();
909 ($start_exon) = grep { $_->hashkey() eq $key } @$exons;
910
911 if ( defined($start_exon) ) {
912 $translation->start_Exon($start_exon);
913 } else {
914 throw( "Translation's start_Exon does not appear "
915 . "to be one of the exons in "
916 . "its associated Transcript" );
917 }
918 }
919
920 if ( !defined( $end_exon->dbID() ) ) {
921 my $key = $end_exon->hashkey();
922 ($end_exon) = grep { $_->hashkey() eq $key } @$exons;
923
924 if ( defined($end_exon) ) {
925 $translation->end_Exon($end_exon);
926 } else {
927 throw( "Translation's end_Exon does not appear "
928 . "to be one of the exons in "
929 . "its associated Transcript." );
930 }
931 }
932
933 my $old_dbid = $translation->dbID();
934 $db->get_TranslationAdaptor()->store( $translation, $transc_dbID );
935
936 # Need to update the canonical_translation_id for this transcript.
937
938 my $sth = $self->prepare(
939 q(
940 UPDATE transcript
941 SET canonical_translation_id = ?
942 WHERE transcript_id = ?)
943 );
944
945 $sth->bind_param( 1, $translation->dbID(), SQL_INTEGER );
946 $sth->bind_param( 2, $transc_dbID, SQL_INTEGER );
947
948 $sth->execute();
949
950 # Set values of the original translation, we may have copied it when
951 # we transformed the transcript.
952 $original_translation->dbID( $translation->dbID() );
953 $original_translation->adaptor( $translation->adaptor() );
954 } ## end if ( defined($translation...))
955
956 #
957 # Store the alternative translations, if there are any.
958 #
959
960 if ( defined($alt_translations)
961 && scalar( @{$alt_translations} ) > 0 )
962 {
963 foreach my $alt_translation ( @{$alt_translations} ) {
964 my $start_exon = $alt_translation->start_Exon();
965 my $end_exon = $alt_translation->end_Exon();
966
967 if ( !defined($start_exon) ) {
968 throw("Translation does not define a start exon.");
969 } elsif ( !defined($end_exon) ) {
970 throw("Translation does not defined an end exon.");
971 }
972
973 if ( !defined( $start_exon->dbID() ) ) {
974 my $key = $start_exon->hashkey();
975 ($start_exon) = grep { $_->hashkey() eq $key } @{$exons};
976
977 if ( defined($start_exon) ) {
978 $alt_translation->start_Exon($start_exon);
979 } else {
980 throw( "Translation's start_Exon does not appear "
981 . "to be one of the exon in"
982 . "its associated Transcript" );
983 }
984 } elsif ( !defined( $end_exon->dbID() ) ) {
985 my $key = $end_exon->hashkey();
986 ($end_exon) = grep { $_->hashkey() eq $key } @$exons;
987
988 if ( defined($end_exon) ) {
989 $translation->end_Exon($end_exon);
990 } else {
991 throw( "Translation's end_Exon does not appear "
992 . "to be one of the exons in "
993 . "its associated Transcript." );
994 }
995 }
996
997 $db->get_TranslationAdaptor()
998 ->store( $alt_translation, $transc_dbID );
999 } ## end foreach my $alt_translation...
1000 } ## end if ( defined($alt_translations...))
1001
1002 #
1003 # Store the xrefs/object xref mapping.
1004 #
1005 my $dbEntryAdaptor = $db->get_DBEntryAdaptor();
1006
1007 foreach my $dbe ( @{ $transcript->get_all_DBEntries() } ) {
1008 $dbEntryAdaptor->store( $dbe, $transc_dbID, "Transcript", 1 );
1009 }
1010
1011 #
1012 # Update transcript to point to display xref if it is set.
1013 #
1014 if ( my $dxref = $transcript->display_xref() ) {
1015 my $dxref_id;
1016
1017 if ( $dxref->is_stored($db) ) {
1018 $dxref_id = $dxref->dbID();
1019 } else {
1020 $dxref_id = $dbEntryAdaptor->exists($dxref);
1021 }
1022
1023 if ( defined($dxref_id) ) {
1024 my $sth =
1025 $self->prepare( "UPDATE transcript "
1026 . "SET display_xref_id = ? "
1027 . "WHERE transcript_id = ?" );
1028 $sth->bind_param( 1, $dxref_id, SQL_INTEGER );
1029 $sth->bind_param( 2, $transc_dbID, SQL_INTEGER );
1030 $sth->execute();
1031 $dxref->dbID($dxref_id);
1032 $dxref->adaptor($dbEntryAdaptor);
1033 $sth->finish();
1034 } else {
1035 warning(sprintf(
1036 "Display_xref %s:%s is not stored in database.\n"
1037 . "Not storing relationship to this transcript.",
1038 $dxref->dbname(), $dxref->display_id() ) );
1039 $dxref->dbID(undef);
1040 $dxref->adaptor(undef);
1041 }
1042 } ## end if ( my $dxref = $transcript...)
1043
1044 #
1045 # Link transcript to exons in exon_transcript table
1046 #
1047 my $etst = $self->prepare(
1048 "INSERT INTO exon_transcript (exon_id,transcript_id,rank) "
1049 . "VALUES (?,?,?)" );
1050 my $rank = 1;
1051 foreach my $exon ( @{ $transcript->get_all_Exons } ) {
1052 $etst->bind_param( 1, $exon->dbID, SQL_INTEGER );
1053 $etst->bind_param( 2, $transc_dbID, SQL_INTEGER );
1054 $etst->bind_param( 3, $rank, SQL_INTEGER );
1055 $etst->execute();
1056 $rank++;
1057 }
1058
1059 $etst->finish();
1060
1061 # Now the supporting evidence
1062 my $tsf_adaptor = $db->get_TranscriptSupportingFeatureAdaptor();
1063 $tsf_adaptor->store( $transc_dbID,
1064 $transcript->get_all_supporting_features() );
1065
1066 # store transcript attributes if there are any
1067 my $attr_adaptor = $db->get_AttributeAdaptor();
1068
1069 $attr_adaptor->store_on_Transcript( $transc_dbID,
1070 $transcript->get_all_Attributes() );
1071
1072 # store the IntronSupportingEvidence features
1073 my $ise_adaptor = $db->get_IntronSupportingEvidenceAdaptor();
1074 my $intron_supporting_evidence = $transcript->get_all_IntronSupportingEvidence();
1075 foreach my $ise (@{$intron_supporting_evidence}) {
1076 $ise_adaptor->store($ise);
1077 $ise_adaptor->store_transcript_linkage($ise, $transcript, $transc_dbID);
1078 }
1079
1080 # Update the original transcript object - not the transfered copy that
1081 # we might have created.
1082 $original->dbID($transc_dbID);
1083 $original->adaptor($self);
1084
1085 return $transc_dbID;
1086 } ## end sub store
1087
1088
1089 =head2 get_Interpro_by_transid
1090
1091 Arg [1] : String $trans_stable_id
1092 The stable if of the transcript to obtain
1093 Example : @i = $tr_adaptor->get_Interpro_by_transid($trans->stable_id());
1094 Description: Gets interpro accession numbers by transcript stable id.
1095 A hack really - we should have a much more structured
1096 system than this.
1097 Returntype : listref of strings (Interpro_acc:description)
1098 Exceptions : none
1099 Caller : domainview? , GeneView
1100 Status : Stable
1101
1102 =cut
1103
1104 sub get_Interpro_by_transid {
1105 my ($self,$trans_stable_id) = @_;
1106
1107 my $sth = $self->prepare(qq(
1108 SELECT STRAIGHT_JOIN i.interpro_ac, x.description
1109 FROM transcript t,
1110 translation tl,
1111 protein_feature pf,
1112 interpro i,
1113 xref x
1114 WHERE t.stable_id = ?
1115 AND tl.transcript_id = t.transcript_id
1116 AND tl.translation_id = pf.translation_id
1117 AND i.id = pf.hit_name
1118 AND i.interpro_ac = x.dbprimary_acc
1119 AND t.is_current = 1
1120 ));
1121
1122 $sth->bind_param(1, $trans_stable_id, SQL_VARCHAR);
1123 $sth->execute();
1124
1125 my @out;
1126 my %h;
1127 while( (my $arr = $sth->fetchrow_arrayref()) ) {
1128 if( $h{$arr->[0]} ) { next; }
1129 $h{$arr->[0]}=1;
1130 my $string = $arr->[0] .":".$arr->[1];
1131 push(@out,$string);
1132 }
1133
1134 return \@out;
1135 }
1136
1137 =head2 is_Transcript_canonical()
1138
1139 Arg [1] : Bio::EnsEMBL::Transcript $transcript
1140 The transcript to query with
1141 Example : $tr_adaptor->is_Transcript_canonical($transcript);
1142 Description : Returns a boolean if the given transcript is considered
1143 canonical with respect to a gene
1144 Returntype : Boolean
1145 Exceptions : None
1146 Caller : Bio::EnsEMBL::Transcript
1147 Status : Beta
1148
1149
1150 =cut
1151
1152 sub is_Transcript_canonical {
1153 my ($self, $transcript) = @_;
1154 return $self->dbc()->sql_helper()->execute_single_result(
1155 -SQL => 'select count(*) from gene where canonical_transcript_id =?',
1156 -PARAMS => [$transcript->dbID()]
1157 );
1158 }
1159
1160
1161 =head2 remove
1162
1163 Arg [1] : Bio::EnsEMBL::Transcript $transcript
1164 The transcript to remove from the database
1165 Example : $tr_adaptor->remove($transcript);
1166 Description: Removes a transcript completely from the database, and all
1167 associated information.
1168 This method is usually called by the GeneAdaptor::remove method
1169 because this method will not preform the removal of genes
1170 which are associated with this transcript. Do not call this
1171 method directly unless you know there are no genes associated
1172 with the transcript!
1173 Returntype : none
1174 Exceptions : throw on incorrect arguments
1175 warning if transcript is not in this database
1176 Caller : GeneAdaptor::remove
1177 Status : Stable
1178
1179 =cut
1180
1181 sub remove {
1182 my $self = shift;
1183 my $transcript = shift;
1184
1185 if(!ref($transcript) || !$transcript->isa('Bio::EnsEMBL::Transcript')) {
1186 throw("Bio::EnsEMBL::Transcript argument expected");
1187 }
1188
1189 # sanity check: make sure nobody tries to slip past a prediction transcript
1190 # which inherits from transcript but actually uses different tables
1191 if($transcript->isa('Bio::EnsEMBL::PredictionTranscript')) {
1192 throw("TranscriptAdaptor can only remove Transcripts " .
1193 "not PredictionTranscripts");
1194 }
1195
1196 if ( !$transcript->is_stored($self->db()) ) {
1197 warning("Cannot remove transcript ". $transcript->dbID .". Is not stored ".
1198 "in this database.");
1199 return;
1200 }
1201
1202 # remove the supporting features of this transcript
1203
1204 my $prot_adp = $self->db->get_ProteinAlignFeatureAdaptor;
1205 my $dna_adp = $self->db->get_DnaAlignFeatureAdaptor;
1206
1207 my $sfsth = $self->prepare("SELECT feature_type, feature_id " .
1208 "FROM transcript_supporting_feature " .
1209 "WHERE transcript_id = ?");
1210
1211 $sfsth->bind_param(1, $transcript->dbID, SQL_INTEGER);
1212 $sfsth->execute();
1213
1214 # statements to check for shared align_features
1215 my $sth1 = $self->prepare("SELECT count(*) FROM supporting_feature " .
1216 "WHERE feature_type = ? AND feature_id = ?");
1217 my $sth2 = $self->prepare("SELECT count(*) " .
1218 "FROM transcript_supporting_feature " .
1219 "WHERE feature_type = ? AND feature_id = ?");
1220
1221 SUPPORTING_FEATURE:
1222 while(my ($type, $feature_id) = $sfsth->fetchrow()){
1223
1224 # only remove align_feature if this is the last reference to it
1225 $sth1->bind_param(1, $type, SQL_VARCHAR);
1226 $sth1->bind_param(2, $feature_id, SQL_INTEGER);
1227 $sth1->execute;
1228 $sth2->bind_param(1, $type, SQL_VARCHAR);
1229 $sth2->bind_param(2, $feature_id, SQL_INTEGER);
1230 $sth2->execute;
1231 my ($count1) = $sth1->fetchrow;
1232 my ($count2) = $sth2->fetchrow;
1233 if ($count1 + $count2 > 1) {
1234 #warn "transcript: shared feature, not removing $type|$feature_id\n";
1235 next SUPPORTING_FEATURE;
1236 }
1237
1238 #warn "transcript: removing $type|$feature_id\n";
1239
1240 if($type eq 'protein_align_feature'){
1241 my $f = $prot_adp->fetch_by_dbID($feature_id);
1242 $prot_adp->remove($f);
1243 }
1244 elsif($type eq 'dna_align_feature'){
1245 my $f = $dna_adp->fetch_by_dbID($feature_id);
1246 $dna_adp->remove($f);
1247 }
1248 else {
1249 warning("Unknown supporting feature type $type. Not removing feature.");
1250 }
1251 }
1252 $sfsth->finish();
1253 $sth1->finish();
1254 $sth2->finish();
1255
1256 # delete the association to supporting features
1257
1258 $sfsth = $self->prepare("DELETE FROM transcript_supporting_feature WHERE transcript_id = ?");
1259 $sfsth->bind_param(1, $transcript->dbID, SQL_INTEGER);
1260 $sfsth->execute();
1261 $sfsth->finish();
1262
1263 # delete the associated IntronSupportingEvidence and if the ISE had no more
1264 # linked transcripts remove it
1265 my $ise_adaptor = $self->db->get_IntronSupportingEvidenceAdaptor();
1266 foreach my $ise (@{$transcript->get_all_IntronSupportingEvidence()}) {
1267 $ise_adaptor->remove_transcript_linkage($ise, $transcript);
1268 if(! $ise->has_linked_transcripts()) {
1269 $ise_adaptor->remove($ise);
1270 }
1271 }
1272
1273 # remove all xref linkages to this transcript
1274
1275 my $dbeAdaptor = $self->db->get_DBEntryAdaptor();
1276 foreach my $dbe (@{$transcript->get_all_DBEntries}) {
1277 $dbeAdaptor->remove_from_object($dbe, $transcript, 'Transcript');
1278 }
1279
1280 # remove the attributes associated with this transcript
1281 my $attrib_adp = $self->db->get_AttributeAdaptor;
1282 $attrib_adp->remove_from_Transcript($transcript);
1283
1284 # remove the translation associated with this transcript
1285
1286 my $translationAdaptor = $self->db->get_TranslationAdaptor();
1287 if( defined($transcript->translation()) ) {
1288 $translationAdaptor->remove( $transcript->translation );
1289 }
1290
1291 # remove exon associations to this transcript
1292
1293 my $exonAdaptor = $self->db->get_ExonAdaptor();
1294 foreach my $exon ( @{$transcript->get_all_Exons()} ) {
1295 # get the number of transcript references to this exon
1296 # only remove the exon if this is the last transcript to
1297 # reference it
1298
1299 my $sth = $self->prepare( "SELECT count(*)
1300 FROM exon_transcript
1301 WHERE exon_id = ?" );
1302 $sth->bind_param(1, $exon->dbID, SQL_INTEGER);
1303 $sth->execute();
1304 my ($count) = $sth->fetchrow_array();
1305 $sth->finish();
1306
1307 if($count == 1){
1308 $exonAdaptor->remove( $exon );
1309 }
1310 }
1311
1312 my $sth = $self->prepare( "DELETE FROM exon_transcript
1313 WHERE transcript_id = ?" );
1314 $sth->bind_param(1, $transcript->dbID, SQL_INTEGER);
1315 $sth->execute();
1316 $sth->finish();
1317
1318
1319 $sth = $self->prepare( "DELETE FROM transcript
1320 WHERE transcript_id = ?" );
1321 $sth->bind_param(1, $transcript->dbID, SQL_INTEGER);
1322 $sth->execute();
1323 $sth->finish();
1324
1325 $transcript->dbID(undef);
1326 $transcript->adaptor(undef);
1327
1328 return;
1329 }
1330
1331
1332 =head2 update
1333
1334 Arg [1] : Bio::EnsEMBL::Transcript $transcript
1335 The transcript to update
1336 Example : $tr_adaptor->update($transcript);
1337 Description: Updates a transcript in the database.
1338 Returntype : None
1339 Exceptions : thrown if the $transcript is not a Bio::EnsEMBL::Transcript.
1340 warn if the method is called on a transcript that does not exist
1341 in the database.
1342 Should warn if trying to update the number of attached exons, but
1343 this is a far more complex process and is not yet implemented.
1344 Caller : general
1345 Status : Stable
1346
1347 =cut
1348
1349 sub update {
1350 my ( $self, $transcript ) = @_;
1351
1352 if ( !defined($transcript)
1353 || !ref($transcript)
1354 || !$transcript->isa('Bio::EnsEMBL::Transcript') )
1355 {
1356 throw("Must update a transcript object, not a $transcript");
1357 }
1358
1359 my $update_transcript_sql = qq(
1360 UPDATE transcript
1361 SET analysis_id = ?,
1362 display_xref_id = ?,
1363 description = ?,
1364 biotype = ?,
1365 status = ?,
1366 is_current = ?,
1367 canonical_translation_id = ?
1368 WHERE transcript_id = ?
1369 );
1370
1371 my $display_xref = $transcript->display_xref();
1372 my $display_xref_id;
1373
1374 if ( defined($display_xref) && $display_xref->dbID() ) {
1375 $display_xref_id = $display_xref->dbID();
1376 } else {
1377 $display_xref_id = undef;
1378 }
1379
1380 my $sth = $self->prepare($update_transcript_sql);
1381
1382 $sth->bind_param( 1, $transcript->analysis()->dbID(), SQL_INTEGER );
1383 $sth->bind_param( 2, $display_xref_id, SQL_INTEGER );
1384 $sth->bind_param( 3, $transcript->description(), SQL_LONGVARCHAR );
1385 $sth->bind_param( 4, $transcript->biotype(), SQL_VARCHAR );
1386 $sth->bind_param( 5, $transcript->status(), SQL_VARCHAR );
1387 $sth->bind_param( 6, $transcript->is_current(), SQL_TINYINT );
1388 $sth->bind_param( 7, (
1389 defined( $transcript->translation() )
1390 ? $transcript->translation()->dbID()
1391 : undef ),
1392 SQL_INTEGER );
1393 $sth->bind_param( 8, $transcript->dbID(), SQL_INTEGER );
1394
1395 $sth->execute();
1396 } ## end sub update
1397
1398
1399 =head2 list_dbIDs
1400
1401 Example : @transcript_ids = @{ $t_adaptor->list_dbIDs };
1402 Description: Gets a list of internal ids for all transcripts in the db.
1403 Arg[1] : <optional> int. not 0 for the ids to be sorted by the seq_region. Returntype : Listref of Ints
1404 Exceptions : none
1405 Caller : general
1406 Status : Stable
1407
1408 =cut
1409
1410 sub list_dbIDs {
1411 my ($self, $ordered) = @_;
1412
1413 return $self->_list_dbIDs("transcript",undef, $ordered);
1414 }
1415
1416
1417 =head2 list_stable_ids
1418
1419 Example : @stable_trans_ids = @{ $transcript_adaptor->list_stable_ids };
1420 Description: Gets a list of stable ids for all transcripts in the current
1421 database.
1422 Returntype : Listref of Strings
1423 Exceptions : none
1424 Caller : general
1425 Status : Stable
1426
1427 =cut
1428
1429 sub list_stable_ids {
1430 my ($self) = @_;
1431
1432 return $self->_list_dbIDs("transcript", "stable_id");
1433 }
1434
1435
1436 #_objs_from_sth
1437
1438 # Arg [1] : StatementHandle $sth
1439 # Arg [2] : Bio::EnsEMBL::AssemblyMapper $mapper
1440 # Arg [3] : Bio::EnsEMBL::Slice $dest_slice
1441 # Description: PROTECTED implementation of abstract superclass method.
1442 # Responsible for the creation of Transcripts.
1443 # Returntype : Listref of Bio::EnsEMBL::Transcripts in target coord system
1444 # Exceptions : none
1445 # Caller : internal
1446 # Status : Stable
1447
1448 sub _objs_from_sth {
1449 my ($self, $sth, $mapper, $dest_slice) = @_;
1450
1451 #
1452 # This code is ugly because an attempt has been made to remove as many
1453 # function calls as possible for speed purposes. Thus many caches and
1454 # a fair bit of gymnastics is used.
1455 #
1456
1457 my $sa = $self->db()->get_SliceAdaptor();
1458 my $aa = $self->db->get_AnalysisAdaptor();
1459 my $dbEntryAdaptor = $self->db()->get_DBEntryAdaptor();
1460
1461 my @transcripts;
1462 my %analysis_hash;
1463 my %slice_hash;
1464 my %sr_name_hash;
1465 my %sr_cs_hash;
1466
1467 my (
1468 $transcript_id, $seq_region_id, $seq_region_start,
1469 $seq_region_end, $seq_region_strand, $analysis_id,
1470 $gene_id, $is_current, $stable_id,
1471 $version, $created_date, $modified_date,
1472 $description, $biotype, $status,
1473 $external_db, $external_status, $external_db_name,
1474 $xref_id, $xref_display_label, $xref_primary_acc,
1475 $xref_version, $xref_description, $xref_info_type,
1476 $xref_info_text
1477 );
1478
1479 $sth->bind_columns(
1480 \(
1481 $transcript_id, $seq_region_id, $seq_region_start,
1482 $seq_region_end, $seq_region_strand, $analysis_id,
1483 $gene_id, $is_current, $stable_id,
1484 $version, $created_date, $modified_date,
1485 $description, $biotype, $status,
1486 $external_db, $external_status, $external_db_name,
1487 $xref_id, $xref_display_label, $xref_primary_acc,
1488 $xref_version, $xref_description, $xref_info_type,
1489 $xref_info_text
1490 ) );
1491
1492 my $asm_cs;
1493 my $cmp_cs;
1494 my $asm_cs_vers;
1495 my $asm_cs_name;
1496 my $cmp_cs_vers;
1497 my $cmp_cs_name;
1498 if($mapper) {
1499 $asm_cs = $mapper->assembled_CoordSystem();
1500 $cmp_cs = $mapper->component_CoordSystem();
1501 $asm_cs_name = $asm_cs->name();
1502 $asm_cs_vers = $asm_cs->version();
1503 $cmp_cs_name = $cmp_cs->name();
1504 $cmp_cs_vers = $cmp_cs->version();
1505 }
1506
1507 my $dest_slice_start;
1508 my $dest_slice_end;
1509 my $dest_slice_strand;
1510 my $dest_slice_length;
1511 my $dest_slice_cs;
1512 my $dest_slice_sr_name;
1513 my $dest_slice_sr_id;
1514
1515 my $asma;
1516 if($dest_slice) {
1517 $dest_slice_start = $dest_slice->start();
1518 $dest_slice_end = $dest_slice->end();
1519 $dest_slice_strand = $dest_slice->strand();
1520 $dest_slice_length = $dest_slice->length();
1521 $dest_slice_cs = $dest_slice->coord_system();
1522 $dest_slice_sr_name = $dest_slice->seq_region_name();
1523 $dest_slice_sr_id = $dest_slice->get_seq_region_id();
1524 $asma = $self->db->get_AssemblyMapperAdaptor();
1525 }
1526
1527 FEATURE: while($sth->fetch()) {
1528
1529 #get the analysis object
1530 my $analysis = $analysis_hash{$analysis_id} ||=
1531 $aa->fetch_by_dbID($analysis_id);
1532 #need to get the internal_seq_region, if present
1533 $seq_region_id = $self->get_seq_region_id_internal($seq_region_id);
1534 my $slice = $slice_hash{"ID:".$seq_region_id};
1535 my $dest_mapper = $mapper;
1536
1537 if(!$slice) {
1538 $slice = $sa->fetch_by_seq_region_id($seq_region_id);
1539 $slice_hash{"ID:".$seq_region_id} = $slice;
1540 $sr_name_hash{$seq_region_id} = $slice->seq_region_name();
1541 $sr_cs_hash{$seq_region_id} = $slice->coord_system();
1542 }
1543
1544 #obtain a mapper if none was defined, but a dest_seq_region was
1545 if(!$dest_mapper && $dest_slice &&
1546 !$dest_slice_cs->equals($slice->coord_system)) {
1547 $dest_mapper = $asma->fetch_by_CoordSystems($dest_slice_cs,
1548 $slice->coord_system);
1549 $asm_cs = $dest_mapper->assembled_CoordSystem();
1550 $cmp_cs = $dest_mapper->component_CoordSystem();
1551 $asm_cs_name = $asm_cs->name();
1552 $asm_cs_vers = $asm_cs->version();
1553 $cmp_cs_name = $cmp_cs->name();
1554 $cmp_cs_vers = $cmp_cs->version();
1555 }
1556
1557 my $sr_name = $sr_name_hash{$seq_region_id};
1558 my $sr_cs = $sr_cs_hash{$seq_region_id};
1559 #
1560 # remap the feature coordinates to another coord system
1561 # if a mapper was provided
1562 #
1563 if($dest_mapper) {
1564
1565 if (defined $dest_slice && $dest_mapper->isa('Bio::EnsEMBL::ChainedAssemblyMapper') ) {
1566 ( $seq_region_id, $seq_region_start,
1567 $seq_region_end, $seq_region_strand )
1568 =
1569 $dest_mapper->map( $sr_name, $seq_region_start, $seq_region_end,
1570 $seq_region_strand, $sr_cs, 1, $dest_slice);
1571
1572 } else {
1573
1574 ( $seq_region_id, $seq_region_start,
1575 $seq_region_end, $seq_region_strand )
1576 = $dest_mapper->fastmap( $sr_name, $seq_region_start,
1577 $seq_region_end, $seq_region_strand,
1578 $sr_cs );
1579 }
1580
1581 #skip features that map to gaps or coord system boundaries
1582 next FEATURE if(!defined($seq_region_id));
1583
1584 #get a slice in the coord system we just mapped to
1585 if($asm_cs == $sr_cs || ($cmp_cs != $sr_cs && $asm_cs->equals($sr_cs))) {
1586 $slice = $slice_hash{"ID:".$seq_region_id} ||=
1587 $sa->fetch_by_seq_region_id($seq_region_id);
1588 } else {
1589 $slice = $slice_hash{"ID:".$seq_region_id} ||=
1590 $sa->fetch_by_seq_region_id($seq_region_id);
1591 }
1592 }
1593
1594 #
1595 # If a destination slice was provided convert the coords.
1596 #
1597 if (defined($dest_slice)) {
1598 if ( $dest_slice_strand == 1 ) {
1599 $seq_region_start = $seq_region_start - $dest_slice_start + 1;
1600 $seq_region_end = $seq_region_end - $dest_slice_start + 1;
1601
1602 if ( $dest_slice->is_circular ) {
1603 if ( $seq_region_start > $seq_region_end ) {
1604 # Looking at a feature overlapping the chromsome origin.
1605 if ( $seq_region_end > $dest_slice_start ) {
1606 # Looking at the region in the beginning of the chromosome
1607 $seq_region_start -= $dest_slice->seq_region_length();
1608 }
1609 if ( $seq_region_end < 0 ) {
1610 $seq_region_end += $dest_slice->seq_region_length();
1611 }
1612 } else {
1613 if ( $dest_slice_start > $dest_slice_end
1614 && $seq_region_end < 0 )
1615 {
1616 # Looking at the region overlapping the chromosome
1617 # origin and a feature which is at the beginning of the
1618 # chromosome.
1619 $seq_region_start += $dest_slice->seq_region_length();
1620 $seq_region_end += $dest_slice->seq_region_length();
1621 }
1622 }
1623 }
1624 } else {
1625 if ( $dest_slice->is_circular()
1626 && $seq_region_start > $seq_region_end )
1627 {
1628 if ( $seq_region_end > $dest_slice_start ) {
1629 # Looking at the region in the beginning of the chromosome.
1630 $seq_region_start = $dest_slice_end - $seq_region_end + 1;
1631 $seq_region_end =
1632 $seq_region_end -
1633 $dest_slice->seq_region_length() -
1634 $dest_slice_start + 1;
1635 } else {
1636 my $tmp_seq_region_start = $seq_region_start;
1637 $seq_region_start =
1638 $dest_slice_end -
1639 $seq_region_end -
1640 $dest_slice->seq_region_length() + 1;
1641 $seq_region_end =
1642 $dest_slice_end - $tmp_seq_region_start + 1;
1643 }
1644
1645 } else {
1646 my $tmp_seq_region_start = $seq_region_start;
1647 $seq_region_start = $dest_slice_end - $seq_region_end + 1;
1648 $seq_region_end = $dest_slice_end - $tmp_seq_region_start + 1;
1649 }
1650
1651 $seq_region_strand = -$seq_region_strand;
1652 } ## end else [ if ( $dest_slice_strand...)]
1653
1654 # Throw away features off the end of the requested slice
1655 if ( $seq_region_end < 1
1656 || $seq_region_start > $dest_slice_length
1657 || ( $dest_slice_sr_id ne $seq_region_id ) )
1658 {
1659 next FEATURE;
1660 }
1661
1662 $slice = $dest_slice;
1663 }
1664
1665 my $display_xref;
1666
1667 if ($xref_id) {
1668 $display_xref = Bio::EnsEMBL::DBEntry->new_fast( {
1669 'dbID' => $xref_id,
1670 'display_id' => $xref_display_label,
1671 'primary_id' => $xref_primary_acc,
1672 'version' => $xref_version,
1673 'description' => $xref_description,
1674 'info_type' => $xref_info_type,
1675 'info_text' => $xref_info_text,
1676 'adaptor' => $dbEntryAdaptor,
1677 'db_display_name' => $external_db_name,
1678 'dbname' => $external_db
1679 } );
1680 }
1681
1682
1683 # Finally, create the new Transcript.
1684 push(
1685 @transcripts,
1686 $self->_create_feature_fast(
1687 'Bio::EnsEMBL::Transcript',
1688 {
1689 'analysis' => $analysis,
1690 'start' => $seq_region_start,
1691 'end' => $seq_region_end,
1692 'strand' => $seq_region_strand,
1693 'adaptor' => $self,
1694 'slice' => $slice,
1695 'dbID' => $transcript_id,
1696 'stable_id' => $stable_id,
1697 'version' => $version,
1698 'created_date' => $created_date || undef,
1699 'modified_date' => $modified_date || undef,
1700 'external_name' => $xref_display_label,
1701 'external_db' => $external_db,
1702 'external_status' => $external_status,
1703 'external_display_name' => $external_db_name,
1704 'display_xref' => $display_xref,
1705 'description' => $description,
1706 'biotype' => $biotype,
1707 'status' => $status,
1708 'is_current' => $is_current,
1709 'edits_enabled' => 1
1710 } ) );
1711
1712 }
1713
1714 return \@transcripts;
1715 }
1716
1717
1718 =head2 fetch_all_by_exon_supporting_evidence
1719
1720 Arg [1] : String $hit_name
1721 Name of supporting feature
1722 Arg [2] : String $feature_type
1723 one of "dna_align_feature" or "protein_align_feature"
1724 Arg [3] : (optional) Bio::Ensembl::Analysis
1725 Example : $tr = $tr_adaptor->fetch_all_by_exon_supporting_evidence
1726 ('XYZ', 'dna_align_feature');
1727 Description: Gets all the transcripts with exons which have a specified hit
1728 on a particular type of feature. Optionally filter by analysis.
1729 Returntype : Listref of Bio::EnsEMBL::Transcript objects
1730 Exceptions : If feature_type is not of correct type.
1731 Caller : general
1732 Status : Stable
1733
1734 =cut
1735
1736 sub fetch_all_by_exon_supporting_evidence {
1737 my ($self, $hit_name, $feature_type, $analysis) = @_;
1738
1739 if($feature_type !~ /(dna)|(protein)_align_feature/) {
1740 throw("feature type must be dna_align_feature or protein_align_feature");
1741 }
1742
1743 my $anal_from = "";
1744 $anal_from = ", analysis a " if ($analysis);
1745 my $anal_where = "";
1746 $anal_where = "AND a.analysis_id = f.analysis_id AND a.analysis_id=? "
1747 if ($analysis);
1748
1749 my $sql = qq(
1750 SELECT DISTINCT(t.transcript_id)
1751 FROM transcript t,
1752 exon_transcript et,
1753 supporting_feature sf,
1754 $feature_type f
1755 $anal_from
1756 WHERE t.transcript_id = et.transcript_id
1757 AND t.is_current = 1
1758 AND et.exon_id = sf.exon_id
1759 AND sf.feature_id = f.${feature_type}_id
1760 AND sf.feature_type = ?
1761 AND f.hit_name=?
1762 $anal_where
1763 );
1764
1765 my $sth = $self->prepare($sql);
1766
1767 $sth->bind_param(1, $feature_type, SQL_VARCHAR);
1768 $sth->bind_param(2, $hit_name, SQL_VARCHAR);
1769 $sth->bind_param(3, $analysis->dbID(), SQL_INTEGER) if ($analysis);
1770
1771 $sth->execute();
1772
1773 my @transcripts;
1774
1775 while( my $id = $sth->fetchrow_array ) {
1776 my $transcript = $self->fetch_by_dbID( $id );
1777 push(@transcripts, $transcript) if $transcript;
1778 }
1779
1780 return \@transcripts;
1781 }
1782
1783
1784 =head2 fetch_all_by_transcript_supporting_evidence
1785
1786 Arg [1] : String $hit_name
1787 Name of supporting feature
1788 Arg [2] : String $feature_type
1789 one of "dna_align_feature" or "protein_align_feature"
1790 Arg [3] : (optional) Bio::Ensembl::Analysis
1791 Example : $transcripts = $transcript_adaptor->fetch_all_by_transcript_supporting_evidence('XYZ', 'dna_align_feature');
1792 Description: Gets all the transcripts with evidence from a specified hit_name on a particular type of feature, stored in the
1793 transcript_supporting_feature table. Optionally filter by analysis. For hits stored in the supporting_feature
1794 table (linked to exons) use fetch_all_by_exon_supporting_evidence instead.
1795 Returntype : Listref of Bio::EnsEMBL::Transcript objects
1796 Exceptions : If feature_type is not of correct type.
1797 Caller : general
1798 Status : Stable
1799
1800 =cut
1801
1802 sub fetch_all_by_transcript_supporting_evidence {
1803
1804 my ($self, $hit_name, $feature_type, $analysis) = @_;
1805
1806 if($feature_type !~ /(dna)|(protein)_align_feature/) {
1807 throw("feature type must be dna_align_feature or protein_align_feature");
1808 }
1809
1810 my $anal_from = "";
1811 $anal_from = ", analysis a " if ($analysis);
1812 my $anal_where = "";
1813 $anal_where = "AND a.analysis_id = f.analysis_id AND a.analysis_id=? "
1814 if ($analysis);
1815
1816 my $sql = qq(
1817 SELECT DISTINCT(t.transcript_id)
1818 FROM transcript t,
1819 transcript_supporting_feature sf,
1820 $feature_type f
1821 $anal_from
1822 WHERE t.transcript_id = sf.transcript_id
1823 AND t.is_current = 1
1824 AND sf.feature_id = f.${feature_type}_id
1825 AND sf.feature_type = ?
1826 AND f.hit_name=?
1827 $anal_where
1828 );
1829
1830 my $sth = $self->prepare($sql);
1831
1832 $sth->bind_param(1, $feature_type, SQL_VARCHAR);
1833 $sth->bind_param(2, $hit_name, SQL_VARCHAR);
1834 $sth->bind_param(3, $analysis->dbID(), SQL_INTEGER) if ($analysis);
1835
1836 $sth->execute();
1837
1838 my @transcripts;
1839
1840 while( my $id = $sth->fetchrow_array ) {
1841 my $transcript = $self->fetch_by_dbID( $id );
1842 push(@transcripts, $transcript) if $transcript;
1843 }
1844
1845 return \@transcripts;
1846 }
1847
1848
1849 ##########################
1850 # #
1851 # DEPRECATED METHODS #
1852 # #
1853 ##########################
1854
1855
1856 =head2 get_display_xref
1857
1858 Description: DEPRECATED. Use $transcript->display_xref() instead.
1859
1860 =cut
1861
1862 sub get_display_xref {
1863 my ($self, $transcript) = @_;
1864
1865 deprecate("display_xref should be retreived from Transcript object directly.");
1866
1867 if ( !defined $transcript ) {
1868 throw("Must call with a Transcript object");
1869 }
1870
1871 my $sth = $self->prepare(qq(
1872 SELECT e.db_name,
1873 x.display_label,
1874 e.db_external_name,
1875 x.xref_id
1876 FROM transcript t,
1877 xref x,
1878 external_db e
1879 WHERE t.transcript_id = ?
1880 AND t.display_xref_id = x.xref_id
1881 AND x.external_db_id = e.external_db_id
1882 ));
1883
1884 $sth->bind_param(1, $transcript->dbID, SQL_INTEGER);
1885 $sth->execute();
1886
1887 my ($db_name, $display_label, $xref_id, $display_db_name ) =
1888 $sth->fetchrow_array();
1889
1890 if ( !defined $xref_id ) {
1891 return undef;
1892 }
1893
1894 my $db_entry = Bio::EnsEMBL::DBEntry->new(
1895 -dbid => $xref_id,
1896 -adaptor => $self->db->get_DBEntryAdaptor(),
1897 -dbname => $db_name,
1898 -display_id => $display_label
1899 -db_display_name => $display_db_name
1900 );
1901
1902 return $db_entry;
1903 }
1904
1905
1906 =head2 get_stable_entry_info
1907
1908 Description: DEPRECATED. Use $transcript->stable_id() instead.
1909
1910 =cut
1911
1912 sub get_stable_entry_info {
1913 my ($self, $transcript) = @_;
1914
1915 deprecate("Stable ids should be loaded directly now");
1916
1917 unless ( defined $transcript && ref $transcript &&
1918 $transcript->isa('Bio::EnsEMBL::Transcript') ) {
1919 throw("Needs a Transcript object, not a $transcript");
1920 }
1921
1922 my $sth = $self->prepare(qq(
1923 SELECT stable_id, version
1924 FROM transcript
1925 WHERE transcript_id = ?
1926 ));
1927
1928 $sth->bind_param(1, $transcript->dbID, SQL_INTEGER);
1929 $sth->execute();
1930
1931 my @array = $sth->fetchrow_array();
1932 $transcript->{'_stable_id'} = $array[0];
1933 $transcript->{'_version'} = $array[1];
1934
1935 return 1;
1936 }
1937
1938
1939 =head2 fetch_all_by_DBEntry
1940
1941 Description: DEPRECATED. Use fetch_all_by_external_name() instead.
1942
1943 =cut
1944
1945 sub fetch_all_by_DBEntry {
1946 my $self = shift;
1947 deprecate('Use fetch_all_by_external_name instead.');
1948 return $self->fetch_all_by_external_name(@_);
1949 }
1950
1951
1952 1;