0
|
1 =head1 LICENSE
|
|
2
|
|
3 Copyright (c) 1999-2012 The European Bioinformatics Institute and
|
|
4 Genome Research Limited. All rights reserved.
|
|
5
|
|
6 This software is distributed under a modified Apache license.
|
|
7 For license details, please see
|
|
8
|
|
9 http://www.ensembl.org/info/about/code_licence.html
|
|
10
|
|
11 =head1 CONTACT
|
|
12
|
|
13 Please email comments or questions to the public Ensembl
|
|
14 developers list at <dev@ensembl.org>.
|
|
15
|
|
16 Questions may also be sent to the Ensembl help desk at
|
|
17 <helpdesk@ensembl.org>.
|
|
18
|
|
19 =cut
|
|
20
|
|
21 =head1 NAME
|
|
22
|
|
23 Bio::EnsEMBL::DBSQL::TranscriptAdaptor - An adaptor which performs database
|
|
24 interaction relating to the storage and retrieval of Transcripts
|
|
25
|
|
26 =head1 SYNOPSIS
|
|
27
|
|
28 use Bio::EnsEMBL::Registry;
|
|
29
|
|
30 Bio::EnsEMBL::Registry->load_registry_from_db(
|
|
31 -host => 'ensembldb.ensembl.org',
|
|
32 -user => 'anonymous'
|
|
33 );
|
|
34
|
|
35 $transcript_adaptor =
|
|
36 Bio::EnsEMBL::Registry->get_adaptor( 'Human', 'Core',
|
|
37 'Transcript' );
|
|
38
|
|
39 $transcript = $transcript_adaptor->fetch_by_dbID(1234);
|
|
40
|
|
41 $transcript =
|
|
42 $transcript_adaptor->fetch_by_stable_id('ENST00000201961');
|
|
43
|
|
44 $slice =
|
|
45 $slice_adaptor->fetch_by_region( 'Chromosome', '3', 1, 1000000 );
|
|
46 @transcripts = @{ $transcript_adaptor->fetch_all_by_Slice($slice) };
|
|
47
|
|
48 ($transcript) =
|
|
49 @{ $transcript_adaptor->fetch_all_by_external_name('NP_065811.1') };
|
|
50
|
|
51 =head1 DESCRIPTION
|
|
52
|
|
53 This adaptor provides a means to retrieve and store information related
|
|
54 to Transcripts. Primarily this involves the retrieval or storage of
|
|
55 Bio::EnsEMBL::Transcript objects from a database.
|
|
56
|
|
57 See Bio::EnsEMBL::Transcript for details of the Transcript class.
|
|
58
|
|
59 =cut
|
|
60
|
|
61 package Bio::EnsEMBL::DBSQL::TranscriptAdaptor;
|
|
62
|
|
63 use strict;
|
|
64
|
|
65 use Bio::EnsEMBL::DBSQL::BaseFeatureAdaptor;
|
|
66 use Bio::EnsEMBL::Gene;
|
|
67 use Bio::EnsEMBL::Exon;
|
|
68 use Bio::EnsEMBL::Transcript;
|
|
69 use Bio::EnsEMBL::Translation;
|
|
70 use Bio::EnsEMBL::Utils::Exception qw( deprecate throw warning );
|
|
71
|
|
72 use vars qw(@ISA);
|
|
73 @ISA = qw( Bio::EnsEMBL::DBSQL::BaseFeatureAdaptor );
|
|
74
|
|
75
|
|
76 # _tables
|
|
77 #
|
|
78 # Description: PROTECTED implementation of superclass abstract method.
|
|
79 # Returns the names, aliases of the tables to use for queries.
|
|
80 # Returntype : list of listrefs of strings
|
|
81 # Exceptions : none
|
|
82 # Caller : internal
|
|
83 # Status : Stable
|
|
84
|
|
85 sub _tables {
|
|
86 return (
|
|
87 [ 'transcript', 't' ],
|
|
88 [ 'xref', 'x' ],
|
|
89 [ 'external_db', 'exdb' ] );
|
|
90 }
|
|
91
|
|
92
|
|
93 #_columns
|
|
94 #
|
|
95 # Description: PROTECTED implementation of superclass abstract method.
|
|
96 # Returns a list of columns to use for queries.
|
|
97 # Returntype : list of strings
|
|
98 # Exceptions : none
|
|
99 # Caller : internal
|
|
100 # Status : Stable
|
|
101
|
|
102 sub _columns {
|
|
103 my ($self) = @_;
|
|
104
|
|
105 my $created_date =
|
|
106 $self->db()->dbc()->from_date_to_seconds("created_date");
|
|
107 my $modified_date =
|
|
108 $self->db()->dbc()->from_date_to_seconds("modified_date");
|
|
109
|
|
110 return (
|
|
111 't.transcript_id', 't.seq_region_id',
|
|
112 't.seq_region_start', 't.seq_region_end',
|
|
113 't.seq_region_strand', 't.analysis_id',
|
|
114 't.gene_id', 't.is_current',
|
|
115 't.stable_id', 't.version',
|
|
116 $created_date, $modified_date,
|
|
117 't.description', 't.biotype',
|
|
118 't.status', 'exdb.db_name',
|
|
119 'exdb.status', 'exdb.db_display_name',
|
|
120 'x.xref_id', 'x.display_label',
|
|
121 'x.dbprimary_acc', 'x.version',
|
|
122 'x.description', 'x.info_type',
|
|
123 'x.info_text'
|
|
124 );
|
|
125 }
|
|
126
|
|
127 sub _left_join {
|
|
128 return (
|
|
129 [ 'xref', "x.xref_id = t.display_xref_id" ],
|
|
130 [ 'external_db', "exdb.external_db_id = x.external_db_id" ]
|
|
131 );
|
|
132 }
|
|
133
|
|
134
|
|
135 =head2 fetch_by_stable_id
|
|
136
|
|
137 Arg [1] : String $stable_id
|
|
138 The stable id of the transcript to retrieve
|
|
139 Example : my $tr = $tr_adaptor->fetch_by_stable_id('ENST00000309301');
|
|
140 Description: Retrieves a transcript via its stable id.
|
|
141 Returntype : Bio::EnsEMBL::Transcript
|
|
142 Exceptions : none
|
|
143 Caller : general
|
|
144 Status : Stable
|
|
145
|
|
146 =cut
|
|
147
|
|
148 sub fetch_by_stable_id {
|
|
149 my ($self, $stable_id) = @_;
|
|
150
|
|
151 my $constraint = "t.stable_id = ? AND t.is_current = 1";
|
|
152
|
|
153 $self->bind_param_generic_fetch($stable_id,SQL_VARCHAR);
|
|
154
|
|
155 my ($transcript) = @{ $self->generic_fetch($constraint) };
|
|
156
|
|
157 return $transcript;
|
|
158 }
|
|
159
|
|
160
|
|
161 sub fetch_all {
|
|
162 my ($self) = @_;
|
|
163
|
|
164 my $constraint = 't.biotype != "LRG_gene" and t.is_current = 1';
|
|
165 my @trans = @{ $self->generic_fetch($constraint) };
|
|
166 return \@trans ;
|
|
167 }
|
|
168
|
|
169 =head2 fetch_all_versions_by_stable_id
|
|
170
|
|
171 Arg [1] : String $stable_id
|
|
172 The stable ID of the transcript to retrieve
|
|
173 Example : my $tr = $tr_adaptor->fetch_all_version_by_stable_id
|
|
174 ('ENST00000309301');
|
|
175 Description : Similar to fetch_by_stable_id, but retrieves all versions of a
|
|
176 transcript stored in the database.
|
|
177 Returntype : listref of Bio::EnsEMBL::Transcript objects
|
|
178 Exceptions : if we cant get the gene in given coord system
|
|
179 Caller : general
|
|
180 Status : At Risk
|
|
181
|
|
182 =cut
|
|
183
|
|
184 sub fetch_all_versions_by_stable_id {
|
|
185 my ($self, $stable_id) = @_;
|
|
186
|
|
187 my $constraint = "t.stable_id = ?";
|
|
188
|
|
189 $self->bind_param_generic_fetch($stable_id,SQL_VARCHAR);
|
|
190
|
|
191 return $self->generic_fetch($constraint);
|
|
192 }
|
|
193
|
|
194
|
|
195 =head2 fetch_by_translation_stable_id
|
|
196
|
|
197 Arg [1] : String $transl_stable_id
|
|
198 The stable identifier of the translation of the transcript to
|
|
199 retrieve
|
|
200 Example : my $tr = $tr_adaptor->fetch_by_translation_stable_id
|
|
201 ('ENSP00000311007');
|
|
202 Description: Retrieves a Transcript object using the stable identifier of
|
|
203 its translation.
|
|
204 Returntype : Bio::EnsEMBL::Transcript
|
|
205 Exceptions : none
|
|
206 Caller : general
|
|
207 Status : Stable
|
|
208
|
|
209 =cut
|
|
210
|
|
211 sub fetch_by_translation_stable_id {
|
|
212 my ($self, $transl_stable_id ) = @_;
|
|
213
|
|
214 my $sth = $self->prepare(qq(
|
|
215 SELECT t.transcript_id
|
|
216 FROM translation tl,
|
|
217 transcript t
|
|
218 WHERE tl.stable_id = ?
|
|
219 AND tl.transcript_id = t.transcript_id
|
|
220 AND t.is_current = 1
|
|
221 ));
|
|
222
|
|
223 $sth->bind_param(1, $transl_stable_id, SQL_VARCHAR);
|
|
224 $sth->execute();
|
|
225
|
|
226 my ($id) = $sth->fetchrow_array;
|
|
227 $sth->finish;
|
|
228 if ($id){
|
|
229 return $self->fetch_by_dbID($id);
|
|
230 } else {
|
|
231 return undef;
|
|
232 }
|
|
233 }
|
|
234
|
|
235
|
|
236 =head2 fetch_by_translation_id
|
|
237
|
|
238 Arg [1] : Int $id
|
|
239 The internal identifier of the translation whose transcript
|
|
240 is to be retrieved
|
|
241 Example : my $tr = $tr_adaptor->fetch_by_translation_id($transl->dbID);
|
|
242 Description: Given the internal identifier of a translation this method
|
|
243 retrieves the transcript associated with that translation.
|
|
244 If the transcript cannot be found undef is returned instead.
|
|
245 Returntype : Bio::EnsEMBL::Transcript or undef
|
|
246 Exceptions : none
|
|
247 Caller : general
|
|
248 Status : Stable
|
|
249
|
|
250 =cut
|
|
251
|
|
252 sub fetch_by_translation_id {
|
|
253 my ( $self, $p_dbID ) = @_;
|
|
254
|
|
255 if ( !defined($p_dbID) ) {
|
|
256 throw("dbID argument is required");
|
|
257 }
|
|
258
|
|
259 my $sth =
|
|
260 $self->prepare( "SELECT transcript_id "
|
|
261 . "FROM translation "
|
|
262 . "WHERE translation_id = ?" );
|
|
263
|
|
264 $sth->bind_param( 1, $p_dbID, SQL_INTEGER );
|
|
265 $sth->execute();
|
|
266
|
|
267 my ($dbID) = $sth->fetchrow_array();
|
|
268 $sth->finish();
|
|
269
|
|
270 if ($dbID) {
|
|
271 return $self->fetch_by_dbID($dbID);
|
|
272 }
|
|
273
|
|
274 return undef;
|
|
275 }
|
|
276
|
|
277 =head2 fetch_all_by_Gene
|
|
278
|
|
279 Arg [1] : Bio::EnsEMBL::Gene $gene
|
|
280 The gene to fetch transcripts of
|
|
281 Example : my $gene = $gene_adaptor->fetch_by_stable_id('ENSG0000123');
|
|
282 my @transcripts = { $tr_adaptor->fetch_all_by_Gene($gene) };
|
|
283 Description: Retrieves Transcript objects for given gene. Puts Genes slice
|
|
284 in each Transcript.
|
|
285 Returntype : Listref of Bio::EnsEMBL::Transcript objects
|
|
286 Exceptions : none
|
|
287 Caller : Gene->get_all_Transcripts()
|
|
288 Status : Stable
|
|
289
|
|
290 =cut
|
|
291
|
|
292 sub fetch_all_by_Gene {
|
|
293 my ( $self, $gene ) = @_;
|
|
294
|
|
295 my $constraint = "t.gene_id = " . $gene->dbID();
|
|
296
|
|
297 # Use the fetch_all_by_Slice_constraint method because it handles the
|
|
298 # difficult Haps/PARs and coordinate remapping.
|
|
299
|
|
300 # Get a slice that entirely overlaps the gene. This is because we
|
|
301 # want all transcripts to be retrieved, not just ones overlapping
|
|
302 # the slice the gene is on (the gene may only partially overlap the
|
|
303 # slice). For speed reasons, only use a different slice if necessary
|
|
304 # though.
|
|
305
|
|
306 my $gslice = $gene->slice();
|
|
307
|
|
308 if ( !defined($gslice) ) {
|
|
309 throw("Gene must have attached slice to retrieve transcripts.");
|
|
310 }
|
|
311
|
|
312 my $slice;
|
|
313
|
|
314 if ( $gene->start() < 1 || $gene->end() > $gslice->length() ) {
|
|
315 if ( $gslice->is_circular() ) {
|
|
316 $slice = $gslice;
|
|
317 } else {
|
|
318 $slice = $self->db->get_SliceAdaptor->fetch_by_Feature($gene);
|
|
319 }
|
|
320 } else {
|
|
321 $slice = $gslice;
|
|
322 }
|
|
323
|
|
324 my $transcripts =
|
|
325 $self->fetch_all_by_Slice_constraint( $slice, $constraint );
|
|
326
|
|
327 if ( $slice != $gslice ) {
|
|
328 my @out;
|
|
329 foreach my $tr ( @{$transcripts} ) {
|
|
330 push( @out, $tr->transfer($gslice) );
|
|
331 }
|
|
332 $transcripts = \@out;
|
|
333 }
|
|
334
|
|
335 my $canonical_t = $gene->canonical_transcript();
|
|
336
|
|
337 foreach my $t ( @{$transcripts} ) {
|
|
338 if ( $t->equals($canonical_t) ) {
|
|
339 $t->is_canonical(1);
|
|
340 last;
|
|
341 }
|
|
342 }
|
|
343
|
|
344 return $transcripts;
|
|
345 } ## end sub fetch_all_by_Gene
|
|
346
|
|
347
|
|
348 =head2 fetch_all_by_Slice
|
|
349
|
|
350 Arg [1] : Bio::EnsEMBL::Slice $slice
|
|
351 The slice to fetch transcripts on
|
|
352 Arg [2] : (optional) Boolean $load_exons
|
|
353 If true, exons will be loaded immediately rather than
|
|
354 lazy loaded later
|
|
355 Arg [3] : (optional) String $logic_name
|
|
356 The logic name of the type of features to obtain
|
|
357 ARG [4] : (optional) String $constraint
|
|
358 An extra contraint.
|
|
359 Example : my @transcripts = @{ $tr_adaptor->fetch_all_by_Slice($slice) };
|
|
360 Description: Overrides superclass method to optionally load exons
|
|
361 immediately rather than lazy-loading them later. This
|
|
362 is more efficient when there are a lot of transcripts whose
|
|
363 exons are going to be used.
|
|
364 Returntype : Listref of Bio::EnsEMBL::Transcript objects
|
|
365 Exceptions : thrown if exon cannot be placed on transcript slice
|
|
366 Caller : Slice::get_all_Transcripts
|
|
367 Status : Stable
|
|
368
|
|
369 =cut
|
|
370
|
|
371 sub fetch_all_by_Slice {
|
|
372 my ( $self, $slice, $load_exons, $logic_name, $constraint ) = @_;
|
|
373
|
|
374 my $transcripts;
|
|
375 if ( defined($constraint) && $constraint ne '' ) {
|
|
376 $transcripts = $self->SUPER::fetch_all_by_Slice_constraint( $slice,
|
|
377 't.is_current = 1 AND ' . $constraint, $logic_name );
|
|
378 } else {
|
|
379 $transcripts = $self->SUPER::fetch_all_by_Slice_constraint( $slice,
|
|
380 't.is_current = 1', $logic_name );
|
|
381 }
|
|
382
|
|
383 # if there are 0 or 1 transcripts still do lazy-loading
|
|
384 if ( !$load_exons || @$transcripts < 2 ) {
|
|
385 return $transcripts;
|
|
386 }
|
|
387
|
|
388 # preload all of the exons now, instead of lazy loading later
|
|
389 # faster than 1 query per transcript
|
|
390
|
|
391 # first check if the exons are already preloaded
|
|
392 # @todo FIXME: Should test all exons.
|
|
393 if ( exists( $transcripts->[0]->{'_trans_exon_array'} ) ) {
|
|
394 return $transcripts;
|
|
395 }
|
|
396
|
|
397 # get extent of region spanned by transcripts
|
|
398 my ( $min_start, $max_end );
|
|
399 foreach my $tr (@$transcripts) {
|
|
400 if ( !defined($min_start) || $tr->seq_region_start() < $min_start )
|
|
401 {
|
|
402 $min_start = $tr->seq_region_start();
|
|
403 }
|
|
404 if ( !defined($max_end) || $tr->seq_region_end() > $max_end ) {
|
|
405 $max_end = $tr->seq_region_end();
|
|
406 }
|
|
407 }
|
|
408
|
|
409 my $ext_slice;
|
|
410
|
|
411 if ( $min_start >= $slice->start() && $max_end <= $slice->end() ) {
|
|
412 $ext_slice = $slice;
|
|
413 } else {
|
|
414 my $sa = $self->db()->get_SliceAdaptor();
|
|
415 $ext_slice = $sa->fetch_by_region(
|
|
416 $slice->coord_system->name(), $slice->seq_region_name(),
|
|
417 $min_start, $max_end,
|
|
418 $slice->strand(), $slice->coord_system->version() );
|
|
419 }
|
|
420
|
|
421 # associate exon identifiers with transcripts
|
|
422
|
|
423 my %tr_hash = map { $_->dbID => $_ } @{$transcripts};
|
|
424
|
|
425 my $tr_id_str = join( ',', keys(%tr_hash) );
|
|
426
|
|
427 my $sth =
|
|
428 $self->prepare( "SELECT transcript_id, exon_id, rank "
|
|
429 . "FROM exon_transcript "
|
|
430 . "WHERE transcript_id IN ($tr_id_str)" );
|
|
431
|
|
432 $sth->execute();
|
|
433
|
|
434 my ( $tr_id, $ex_id, $rank );
|
|
435 $sth->bind_columns( \( $tr_id, $ex_id, $rank ) );
|
|
436
|
|
437 my %ex_tr_hash;
|
|
438
|
|
439 while ( $sth->fetch() ) {
|
|
440 $ex_tr_hash{$ex_id} ||= [];
|
|
441 push( @{ $ex_tr_hash{$ex_id} }, [ $tr_hash{$tr_id}, $rank ] );
|
|
442 }
|
|
443
|
|
444 my $ea = $self->db()->get_ExonAdaptor();
|
|
445 my $exons = $ea->fetch_all_by_Slice_constraint(
|
|
446 $ext_slice,
|
|
447 sprintf( "e.exon_id IN (%s)",
|
|
448 join( ',', sort { $a <=> $b } keys(%ex_tr_hash) ) ) );
|
|
449
|
|
450 # move exons onto transcript slice, and add them to transcripts
|
|
451 foreach my $ex ( @{$exons} ) {
|
|
452 my $new_ex;
|
|
453 if ( $slice != $ext_slice ) {
|
|
454 $new_ex = $ex->transfer($slice);
|
|
455 if ( !defined($new_ex) ) {
|
|
456 throw("Unexpected. "
|
|
457 . "Exon could not be transfered onto Transcript slice." );
|
|
458 }
|
|
459 } else {
|
|
460 $new_ex = $ex;
|
|
461 }
|
|
462
|
|
463 foreach my $row ( @{ $ex_tr_hash{ $new_ex->dbID() } } ) {
|
|
464 my ( $tr, $rank ) = @{$row};
|
|
465 $tr->add_Exon( $new_ex, $rank );
|
|
466 }
|
|
467 }
|
|
468
|
|
469 my $tla = $self->db()->get_TranslationAdaptor();
|
|
470
|
|
471 # load all of the translations at once
|
|
472 $tla->fetch_all_by_Transcript_list($transcripts);
|
|
473
|
|
474 return $transcripts;
|
|
475 } ## end sub fetch_all_by_Slice
|
|
476
|
|
477
|
|
478 =head2 fetch_all_by_external_name
|
|
479
|
|
480 Arg [1] : String $external_name
|
|
481 An external identifier of the transcript to be obtained
|
|
482 Arg [2] : (optional) String $external_db_name
|
|
483 The name of the external database from which the
|
|
484 identifier originates.
|
|
485 Arg [3] : Boolean override. Force SQL regex matching for users
|
|
486 who really do want to find all 'NM%'
|
|
487 Example : my @transcripts =
|
|
488 @{ $tr_adaptor->fetch_all_by_external_name( 'NP_065811.1') };
|
|
489 my @more_transcripts =
|
|
490 @{$tr_adaptor->fetch_all_by_external_name( 'NP_0658__._')};
|
|
491 Description: Retrieves all transcripts which are associated with
|
|
492 an external identifier such as a GO term, Swissprot
|
|
493 identifer, etc. Usually there will only be a single
|
|
494 transcript returned in the list reference, but not
|
|
495 always. Transcripts are returned in their native
|
|
496 coordinate system, i.e. the coordinate system in which
|
|
497 they are stored in the database. If they are required
|
|
498 in another coordinate system the Transcript::transfer or
|
|
499 Transcript::transform method can be used to convert them.
|
|
500 If no transcripts with the external identifier are found,
|
|
501 a reference to an empty list is returned.
|
|
502 SQL wildcards % and _ are supported in the $external_name
|
|
503 but their use is somewhat restricted for performance reasons.
|
|
504 Users that really do want % and _ in the first three characters
|
|
505 should use argument 3 to prevent optimisations
|
|
506 Returntype : listref of Bio::EnsEMBL::Transcript
|
|
507 Exceptions : none
|
|
508 Caller : general
|
|
509 Status : Stable
|
|
510
|
|
511 =cut
|
|
512
|
|
513 sub fetch_all_by_external_name {
|
|
514 my ( $self, $external_name, $external_db_name, $override) = @_;
|
|
515
|
|
516 my $entryAdaptor = $self->db->get_DBEntryAdaptor();
|
|
517
|
|
518 my @ids =
|
|
519 $entryAdaptor->list_transcript_ids_by_extids( $external_name,
|
|
520 $external_db_name, $override );
|
|
521
|
|
522 return $self->fetch_all_by_dbID_list( \@ids );
|
|
523 }
|
|
524
|
|
525 =head2 fetch_all_by_GOTerm
|
|
526
|
|
527 Arg [1] : Bio::EnsEMBL::OntologyTerm
|
|
528 The GO term for which transcripts should be fetched.
|
|
529
|
|
530 Example: @transcripts = @{
|
|
531 $transcript_adaptor->fetch_all_by_GOTerm(
|
|
532 $go_adaptor->fetch_by_accession('GO:0030326') ) };
|
|
533
|
|
534 Description : Retrieves a list of transcripts that are
|
|
535 associated with the given GO term, or with any of
|
|
536 its descendent GO terms. The transcripts returned
|
|
537 are in their native coordinate system, i.e. in
|
|
538 the coordinate system in which they are stored
|
|
539 in the database. If another coordinate system
|
|
540 is required then the Transcript::transfer or
|
|
541 Transcript::transform method can be used.
|
|
542
|
|
543 Return type : listref of Bio::EnsEMBL::Transcript
|
|
544 Exceptions : Throws of argument is not a GO term
|
|
545 Caller : general
|
|
546 Status : Stable
|
|
547
|
|
548 =cut
|
|
549
|
|
550 sub fetch_all_by_GOTerm {
|
|
551 my ( $self, $term ) = @_;
|
|
552
|
|
553 assert_ref( $term, 'Bio::EnsEMBL::OntologyTerm' );
|
|
554 if ( $term->ontology() ne 'GO' ) {
|
|
555 throw('Argument is not a GO term');
|
|
556 }
|
|
557
|
|
558 my $entryAdaptor = $self->db->get_DBEntryAdaptor();
|
|
559
|
|
560 my %unique_dbIDs;
|
|
561 foreach my $accession ( map { $_->accession() }
|
|
562 ( $term, @{ $term->descendants() } ) )
|
|
563 {
|
|
564 my @ids =
|
|
565 $entryAdaptor->list_transcript_ids_by_extids( $accession, 'GO' );
|
|
566 foreach my $dbID (@ids) { $unique_dbIDs{$dbID} = 1 }
|
|
567 }
|
|
568
|
|
569 my @result = @{
|
|
570 $self->fetch_all_by_dbID_list(
|
|
571 [ sort { $a <=> $b } keys(%unique_dbIDs) ]
|
|
572 ) };
|
|
573
|
|
574 return \@result;
|
|
575 } ## end sub fetch_all_by_GOTerm
|
|
576
|
|
577 =head2 fetch_all_by_GOTerm_accession
|
|
578
|
|
579 Arg [1] : String
|
|
580 The GO term accession for which genes should be
|
|
581 fetched.
|
|
582
|
|
583 Example :
|
|
584
|
|
585 @genes =
|
|
586 @{ $gene_adaptor->fetch_all_by_GOTerm_accession(
|
|
587 'GO:0030326') };
|
|
588
|
|
589 Description : Retrieves a list of genes that are associated with
|
|
590 the given GO term, or with any of its descendent
|
|
591 GO terms. The genes returned are in their native
|
|
592 coordinate system, i.e. in the coordinate system
|
|
593 in which they are stored in the database. If
|
|
594 another coordinate system is required then the
|
|
595 Gene::transfer or Gene::transform method can be
|
|
596 used.
|
|
597
|
|
598 Return type : listref of Bio::EnsEMBL::Gene
|
|
599 Exceptions : Throws of argument is not a GO term accession
|
|
600 Caller : general
|
|
601 Status : Stable
|
|
602
|
|
603 =cut
|
|
604
|
|
605 sub fetch_all_by_GOTerm_accession {
|
|
606 my ( $self, $accession ) = @_;
|
|
607
|
|
608 if ( $accession !~ /^GO:/ ) {
|
|
609 throw('Argument is not a GO term accession');
|
|
610 }
|
|
611
|
|
612 my $goAdaptor =
|
|
613 Bio::EnsEMBL::Registry->get_adaptor( 'Multi', 'Ontology',
|
|
614 'OntologyTerm' );
|
|
615
|
|
616 my $term = $goAdaptor->fetch_by_accession($accession);
|
|
617
|
|
618 return $self->fetch_all_by_GOTerm($term);
|
|
619 }
|
|
620
|
|
621 =head2 fetch_by_display_label
|
|
622
|
|
623 Arg [1] : String $label - display label of transcript to fetch
|
|
624 Example : my $tr = $tr_adaptor->fetch_by_display_label("BRCA2");
|
|
625 Description: Returns the transcript which has the given display label or
|
|
626 undef if there is none. If there are more than 1, only the first
|
|
627 is reported.
|
|
628 Returntype : Bio::EnsEMBL::Transcript
|
|
629 Exceptions : none
|
|
630 Caller : general
|
|
631 Status : Stable
|
|
632
|
|
633 =cut
|
|
634
|
|
635 sub fetch_by_display_label {
|
|
636 my $self = shift;
|
|
637 my $label = shift;
|
|
638
|
|
639 my $constraint = "x.display_label = ? AND t.is_current = 1";
|
|
640
|
|
641 $self->bind_param_generic_fetch($label,SQL_VARCHAR);
|
|
642
|
|
643 my ($transcript) = @{ $self->generic_fetch($constraint) };
|
|
644
|
|
645 return $transcript;
|
|
646 }
|
|
647
|
|
648
|
|
649 =head2 fetch_all_by_exon_stable_id
|
|
650
|
|
651 Arg [1] : String $stable_id
|
|
652 The stable id of an exon in a transcript
|
|
653 Example : my $tr = $tr_adaptor->fetch_all_by_exon_stable_id
|
|
654 ('ENSE00000309301');
|
|
655 Description: Retrieves a list of transcripts via an exon stable id.
|
|
656 Returntype : Listref of Bio::EnsEMBL::Transcript objects
|
|
657 Exceptions : none
|
|
658 Caller : general
|
|
659 Status : Stable
|
|
660
|
|
661 =cut
|
|
662
|
|
663 sub fetch_all_by_exon_stable_id {
|
|
664 my ($self, $stable_id) = @_;
|
|
665
|
|
666 my @trans ;
|
|
667
|
|
668 my $sth = $self->prepare(qq(
|
|
669 SELECT t.transcript_id
|
|
670 FROM exon_transcript et, exon e, transcript t
|
|
671 WHERE e.exon_id = et.exon_id
|
|
672 AND et.transcript_id = t.transcript_id
|
|
673 AND e.stable_id = ?
|
|
674 AND t.is_current = 1
|
|
675 ));
|
|
676
|
|
677 $sth->bind_param(1, $stable_id, SQL_VARCHAR);
|
|
678 $sth->execute();
|
|
679
|
|
680 while( my $id = $sth->fetchrow_array ) {
|
|
681 my $transcript = $self->fetch_by_dbID($id);
|
|
682 push(@trans, $transcript) if $transcript;
|
|
683 }
|
|
684
|
|
685 if (!@trans) {
|
|
686 return undef;
|
|
687 }
|
|
688
|
|
689 return \@trans;
|
|
690 }
|
|
691
|
|
692 =head2 fetch_all_by_biotype
|
|
693
|
|
694 Arg [1] : String $biotype
|
|
695 listref of $biotypes
|
|
696 The biotype of the gene to retrieve. You can also have a reference
|
|
697 to a list of biotypes in the event of needing several.
|
|
698 Example : $transcript = $transcript_adaptor->fetch_all_by_biotype('pseudogene');
|
|
699 $transcript = $transcript_adaptor->fetch_all_by_biotype(['protein_coding','ambiguous_orf']);
|
|
700 Description: Retrieves an array reference of transcript objects from the
|
|
701 database via its biotype or biotypes.
|
|
702 The transcript will be retrieved in its native coordinate system
|
|
703 (i.e. in the coordinate system it is stored in the database).
|
|
704 It may be converted to a different coordinate system through a
|
|
705 call to transform() or transfer(). If the transcript is not found
|
|
706 undef is returned instead.
|
|
707 Returntype : listref of Bio::EnsEMBL::Transcript
|
|
708 Exceptions : if we cant get the transcript in given coord system
|
|
709 Caller : general
|
|
710 Status : Stable
|
|
711
|
|
712 =cut
|
|
713
|
|
714 sub fetch_all_by_biotype {
|
|
715 my ($self, $biotype) = @_;
|
|
716
|
|
717 if (!defined $biotype){
|
|
718 throw("Biotype or listref of biotypes expected");
|
|
719 }
|
|
720 my $constraint;
|
|
721 if (ref($biotype) eq 'ARRAY'){
|
|
722 $constraint = "t.biotype IN (";
|
|
723 foreach my $b (@{$biotype}){
|
|
724 $constraint .= "?,";
|
|
725 $self->bind_param_generic_fetch($b,SQL_VARCHAR);
|
|
726 }
|
|
727 chop($constraint); #remove last , from expression
|
|
728 $constraint .= ") and t.is_current = 1";
|
|
729
|
|
730 }
|
|
731 else{
|
|
732 $constraint = "t.biotype = ? and t.is_current = 1";
|
|
733 $self->bind_param_generic_fetch($biotype,SQL_VARCHAR);
|
|
734 }
|
|
735 my @transcripts = @{ $self->generic_fetch($constraint) };
|
|
736 return \@transcripts ;
|
|
737 }
|
|
738
|
|
739
|
|
740 =head2 store
|
|
741
|
|
742 Arg [1] : Bio::EnsEMBL::Transcript $transcript
|
|
743 The transcript to be written to the database
|
|
744 Arg [2] : Int $gene_dbID
|
|
745 The identifier of the gene that this transcript is associated
|
|
746 with
|
|
747 Arg [3] : DEPRECATED (optional) Int $analysis_id
|
|
748 The analysis_id to use when storing this gene. This is for
|
|
749 backward compatibility only and used to fall back to the gene
|
|
750 analysis_id if no analysis object is attached to the transcript
|
|
751 (which you should do for new code).
|
|
752 Example : $transID = $tr_adaptor->store($transcript, $gene->dbID);
|
|
753 Description: Stores a transcript in the database and returns the new
|
|
754 internal identifier for the stored transcript.
|
|
755 Returntype : Int
|
|
756 Exceptions : none
|
|
757 Caller : general
|
|
758 Status : Stable
|
|
759
|
|
760 =cut
|
|
761
|
|
762 sub store {
|
|
763 my ( $self, $transcript, $gene_dbID, $analysis_id ) = @_;
|
|
764
|
|
765 if ( !ref($transcript)
|
|
766 || !$transcript->isa('Bio::EnsEMBL::Transcript') )
|
|
767 {
|
|
768 throw("$transcript is not a EnsEMBL transcript - not storing");
|
|
769 }
|
|
770
|
|
771 my $db = $self->db();
|
|
772
|
|
773 if ( $transcript->is_stored($db) ) {
|
|
774 return $transcript->dbID();
|
|
775 }
|
|
776
|
|
777 # Force lazy-loading of exons and ensure coords are correct.
|
|
778 $transcript->recalculate_coordinates();
|
|
779
|
|
780 my $is_current = ( defined( $transcript->is_current() )
|
|
781 ? $transcript->is_current()
|
|
782 : 1 );
|
|
783
|
|
784 # store analysis
|
|
785 my $analysis = $transcript->analysis();
|
|
786 my $new_analysis_id;
|
|
787
|
|
788 if ($analysis) {
|
|
789 if ( $analysis->is_stored($db) ) {
|
|
790 $new_analysis_id = $analysis->dbID;
|
|
791 } else {
|
|
792 $new_analysis_id = $db->get_AnalysisAdaptor->store($analysis);
|
|
793 }
|
|
794 } elsif ($analysis_id) {
|
|
795 # Fall back to analysis passed in (usually from gene) if analysis
|
|
796 # wasn't set explicitely for the transcript. This is deprectated
|
|
797 # though.
|
|
798 warning( "You should explicitely attach "
|
|
799 . "an analysis object to the Transcript. "
|
|
800 . "Will fall back to Gene analysis, "
|
|
801 . "but this behaviour is deprecated." );
|
|
802 $new_analysis_id = $analysis_id;
|
|
803 } else {
|
|
804 throw("Need an analysis_id to store the Transcript.");
|
|
805 }
|
|
806
|
|
807 #
|
|
808 # Store exons - this needs to be done before the possible transfer
|
|
809 # of the transcript to another slice (in _prestore()). Transfering
|
|
810 # results in copies being made of the exons and we need to preserve
|
|
811 # the object identity of the exons so that they are not stored twice
|
|
812 # by different transcripts.
|
|
813 #
|
|
814 my $exons = $transcript->get_all_Exons();
|
|
815 my $exonAdaptor = $db->get_ExonAdaptor();
|
|
816 foreach my $exon ( @{$exons} ) {
|
|
817 $exonAdaptor->store($exon);
|
|
818 }
|
|
819
|
|
820 my $original_translation = $transcript->translation();
|
|
821 my $original = $transcript;
|
|
822 my $seq_region_id;
|
|
823 ( $transcript, $seq_region_id ) = $self->_pre_store($transcript);
|
|
824
|
|
825 # First store the transcript without a display xref. The display xref
|
|
826 # needs to be set after xrefs are stored which needs to happen after
|
|
827 # transcript is stored.
|
|
828
|
|
829 #
|
|
830 # Store transcript
|
|
831 #
|
|
832 my $store_transcript_sql = qq(
|
|
833 INSERT INTO transcript
|
|
834 SET gene_id = ?,
|
|
835 analysis_id = ?,
|
|
836 seq_region_id = ?,
|
|
837 seq_region_start = ?,
|
|
838 seq_region_end = ?,
|
|
839 seq_region_strand = ?,
|
|
840 biotype = ?,
|
|
841 status = ?,
|
|
842 description = ?,
|
|
843 is_current = ?,
|
|
844 canonical_translation_id = ?
|
|
845 );
|
|
846
|
|
847 if ( defined( $transcript->stable_id() ) ) {
|
|
848
|
|
849 my $created = $self->db->dbc->from_seconds_to_date($transcript->created_date());
|
|
850 my $modified = $self->db->dbc->from_seconds_to_date($transcript->modified_date());
|
|
851 $store_transcript_sql .= ", stable_id = ?, version = ?, created_date = " . $created . " , modified_date = " . $modified;
|
|
852
|
|
853 }
|
|
854
|
|
855 my $tst = $self->prepare($store_transcript_sql);
|
|
856 $tst->bind_param( 1, $gene_dbID, SQL_INTEGER );
|
|
857 $tst->bind_param( 2, $new_analysis_id, SQL_INTEGER );
|
|
858 $tst->bind_param( 3, $seq_region_id, SQL_INTEGER );
|
|
859 $tst->bind_param( 4, $transcript->start(), SQL_INTEGER );
|
|
860 $tst->bind_param( 5, $transcript->end(), SQL_INTEGER );
|
|
861 $tst->bind_param( 6, $transcript->strand(), SQL_TINYINT );
|
|
862 $tst->bind_param( 7, $transcript->biotype(), SQL_VARCHAR );
|
|
863 $tst->bind_param( 8, $transcript->status(), SQL_VARCHAR );
|
|
864 $tst->bind_param( 9, $transcript->description(), SQL_LONGVARCHAR );
|
|
865 $tst->bind_param( 10, $is_current, SQL_TINYINT );
|
|
866
|
|
867 # If the transcript has a translation, this is updated later:
|
|
868 $tst->bind_param( 11, undef, SQL_INTEGER );
|
|
869
|
|
870 if ( defined( $transcript->stable_id() ) ) {
|
|
871
|
|
872 $tst->bind_param( 12, $transcript->stable_id(), SQL_VARCHAR );
|
|
873 my $version = ($transcript->version()) ? $transcript->version() : 1;
|
|
874 $tst->bind_param( 13, $version, SQL_INTEGER );
|
|
875 }
|
|
876
|
|
877
|
|
878 $tst->execute();
|
|
879 $tst->finish();
|
|
880
|
|
881 my $transc_dbID = $tst->{'mysql_insertid'};
|
|
882
|
|
883 #
|
|
884 # Store translation
|
|
885 #
|
|
886
|
|
887 my $alt_translations =
|
|
888 $transcript->get_all_alternative_translations();
|
|
889 my $translation = $transcript->translation();
|
|
890
|
|
891 if ( defined($translation) ) {
|
|
892 # Make sure that the start and end exon are set correctly.
|
|
893 my $start_exon = $translation->start_Exon();
|
|
894 my $end_exon = $translation->end_Exon();
|
|
895
|
|
896 if ( !defined($start_exon) ) {
|
|
897 throw("Translation does not define a start exon.");
|
|
898 }
|
|
899
|
|
900 if ( !defined($end_exon) ) {
|
|
901 throw("Translation does not defined an end exon.");
|
|
902 }
|
|
903
|
|
904 # If the dbID is not set, this means the exon must have been a
|
|
905 # different object in memory than the the exons of the transcript.
|
|
906 # Try to find the matching exon in all of the exons we just stored.
|
|
907 if ( !defined( $start_exon->dbID() ) ) {
|
|
908 my $key = $start_exon->hashkey();
|
|
909 ($start_exon) = grep { $_->hashkey() eq $key } @$exons;
|
|
910
|
|
911 if ( defined($start_exon) ) {
|
|
912 $translation->start_Exon($start_exon);
|
|
913 } else {
|
|
914 throw( "Translation's start_Exon does not appear "
|
|
915 . "to be one of the exons in "
|
|
916 . "its associated Transcript" );
|
|
917 }
|
|
918 }
|
|
919
|
|
920 if ( !defined( $end_exon->dbID() ) ) {
|
|
921 my $key = $end_exon->hashkey();
|
|
922 ($end_exon) = grep { $_->hashkey() eq $key } @$exons;
|
|
923
|
|
924 if ( defined($end_exon) ) {
|
|
925 $translation->end_Exon($end_exon);
|
|
926 } else {
|
|
927 throw( "Translation's end_Exon does not appear "
|
|
928 . "to be one of the exons in "
|
|
929 . "its associated Transcript." );
|
|
930 }
|
|
931 }
|
|
932
|
|
933 my $old_dbid = $translation->dbID();
|
|
934 $db->get_TranslationAdaptor()->store( $translation, $transc_dbID );
|
|
935
|
|
936 # Need to update the canonical_translation_id for this transcript.
|
|
937
|
|
938 my $sth = $self->prepare(
|
|
939 q(
|
|
940 UPDATE transcript
|
|
941 SET canonical_translation_id = ?
|
|
942 WHERE transcript_id = ?)
|
|
943 );
|
|
944
|
|
945 $sth->bind_param( 1, $translation->dbID(), SQL_INTEGER );
|
|
946 $sth->bind_param( 2, $transc_dbID, SQL_INTEGER );
|
|
947
|
|
948 $sth->execute();
|
|
949
|
|
950 # Set values of the original translation, we may have copied it when
|
|
951 # we transformed the transcript.
|
|
952 $original_translation->dbID( $translation->dbID() );
|
|
953 $original_translation->adaptor( $translation->adaptor() );
|
|
954 } ## end if ( defined($translation...))
|
|
955
|
|
956 #
|
|
957 # Store the alternative translations, if there are any.
|
|
958 #
|
|
959
|
|
960 if ( defined($alt_translations)
|
|
961 && scalar( @{$alt_translations} ) > 0 )
|
|
962 {
|
|
963 foreach my $alt_translation ( @{$alt_translations} ) {
|
|
964 my $start_exon = $alt_translation->start_Exon();
|
|
965 my $end_exon = $alt_translation->end_Exon();
|
|
966
|
|
967 if ( !defined($start_exon) ) {
|
|
968 throw("Translation does not define a start exon.");
|
|
969 } elsif ( !defined($end_exon) ) {
|
|
970 throw("Translation does not defined an end exon.");
|
|
971 }
|
|
972
|
|
973 if ( !defined( $start_exon->dbID() ) ) {
|
|
974 my $key = $start_exon->hashkey();
|
|
975 ($start_exon) = grep { $_->hashkey() eq $key } @{$exons};
|
|
976
|
|
977 if ( defined($start_exon) ) {
|
|
978 $alt_translation->start_Exon($start_exon);
|
|
979 } else {
|
|
980 throw( "Translation's start_Exon does not appear "
|
|
981 . "to be one of the exon in"
|
|
982 . "its associated Transcript" );
|
|
983 }
|
|
984 } elsif ( !defined( $end_exon->dbID() ) ) {
|
|
985 my $key = $end_exon->hashkey();
|
|
986 ($end_exon) = grep { $_->hashkey() eq $key } @$exons;
|
|
987
|
|
988 if ( defined($end_exon) ) {
|
|
989 $translation->end_Exon($end_exon);
|
|
990 } else {
|
|
991 throw( "Translation's end_Exon does not appear "
|
|
992 . "to be one of the exons in "
|
|
993 . "its associated Transcript." );
|
|
994 }
|
|
995 }
|
|
996
|
|
997 $db->get_TranslationAdaptor()
|
|
998 ->store( $alt_translation, $transc_dbID );
|
|
999 } ## end foreach my $alt_translation...
|
|
1000 } ## end if ( defined($alt_translations...))
|
|
1001
|
|
1002 #
|
|
1003 # Store the xrefs/object xref mapping.
|
|
1004 #
|
|
1005 my $dbEntryAdaptor = $db->get_DBEntryAdaptor();
|
|
1006
|
|
1007 foreach my $dbe ( @{ $transcript->get_all_DBEntries() } ) {
|
|
1008 $dbEntryAdaptor->store( $dbe, $transc_dbID, "Transcript", 1 );
|
|
1009 }
|
|
1010
|
|
1011 #
|
|
1012 # Update transcript to point to display xref if it is set.
|
|
1013 #
|
|
1014 if ( my $dxref = $transcript->display_xref() ) {
|
|
1015 my $dxref_id;
|
|
1016
|
|
1017 if ( $dxref->is_stored($db) ) {
|
|
1018 $dxref_id = $dxref->dbID();
|
|
1019 } else {
|
|
1020 $dxref_id = $dbEntryAdaptor->exists($dxref);
|
|
1021 }
|
|
1022
|
|
1023 if ( defined($dxref_id) ) {
|
|
1024 my $sth =
|
|
1025 $self->prepare( "UPDATE transcript "
|
|
1026 . "SET display_xref_id = ? "
|
|
1027 . "WHERE transcript_id = ?" );
|
|
1028 $sth->bind_param( 1, $dxref_id, SQL_INTEGER );
|
|
1029 $sth->bind_param( 2, $transc_dbID, SQL_INTEGER );
|
|
1030 $sth->execute();
|
|
1031 $dxref->dbID($dxref_id);
|
|
1032 $dxref->adaptor($dbEntryAdaptor);
|
|
1033 $sth->finish();
|
|
1034 } else {
|
|
1035 warning(sprintf(
|
|
1036 "Display_xref %s:%s is not stored in database.\n"
|
|
1037 . "Not storing relationship to this transcript.",
|
|
1038 $dxref->dbname(), $dxref->display_id() ) );
|
|
1039 $dxref->dbID(undef);
|
|
1040 $dxref->adaptor(undef);
|
|
1041 }
|
|
1042 } ## end if ( my $dxref = $transcript...)
|
|
1043
|
|
1044 #
|
|
1045 # Link transcript to exons in exon_transcript table
|
|
1046 #
|
|
1047 my $etst = $self->prepare(
|
|
1048 "INSERT INTO exon_transcript (exon_id,transcript_id,rank) "
|
|
1049 . "VALUES (?,?,?)" );
|
|
1050 my $rank = 1;
|
|
1051 foreach my $exon ( @{ $transcript->get_all_Exons } ) {
|
|
1052 $etst->bind_param( 1, $exon->dbID, SQL_INTEGER );
|
|
1053 $etst->bind_param( 2, $transc_dbID, SQL_INTEGER );
|
|
1054 $etst->bind_param( 3, $rank, SQL_INTEGER );
|
|
1055 $etst->execute();
|
|
1056 $rank++;
|
|
1057 }
|
|
1058
|
|
1059 $etst->finish();
|
|
1060
|
|
1061 # Now the supporting evidence
|
|
1062 my $tsf_adaptor = $db->get_TranscriptSupportingFeatureAdaptor();
|
|
1063 $tsf_adaptor->store( $transc_dbID,
|
|
1064 $transcript->get_all_supporting_features() );
|
|
1065
|
|
1066 # store transcript attributes if there are any
|
|
1067 my $attr_adaptor = $db->get_AttributeAdaptor();
|
|
1068
|
|
1069 $attr_adaptor->store_on_Transcript( $transc_dbID,
|
|
1070 $transcript->get_all_Attributes() );
|
|
1071
|
|
1072 # store the IntronSupportingEvidence features
|
|
1073 my $ise_adaptor = $db->get_IntronSupportingEvidenceAdaptor();
|
|
1074 my $intron_supporting_evidence = $transcript->get_all_IntronSupportingEvidence();
|
|
1075 foreach my $ise (@{$intron_supporting_evidence}) {
|
|
1076 $ise_adaptor->store($ise);
|
|
1077 $ise_adaptor->store_transcript_linkage($ise, $transcript, $transc_dbID);
|
|
1078 }
|
|
1079
|
|
1080 # Update the original transcript object - not the transfered copy that
|
|
1081 # we might have created.
|
|
1082 $original->dbID($transc_dbID);
|
|
1083 $original->adaptor($self);
|
|
1084
|
|
1085 return $transc_dbID;
|
|
1086 } ## end sub store
|
|
1087
|
|
1088
|
|
1089 =head2 get_Interpro_by_transid
|
|
1090
|
|
1091 Arg [1] : String $trans_stable_id
|
|
1092 The stable if of the transcript to obtain
|
|
1093 Example : @i = $tr_adaptor->get_Interpro_by_transid($trans->stable_id());
|
|
1094 Description: Gets interpro accession numbers by transcript stable id.
|
|
1095 A hack really - we should have a much more structured
|
|
1096 system than this.
|
|
1097 Returntype : listref of strings (Interpro_acc:description)
|
|
1098 Exceptions : none
|
|
1099 Caller : domainview? , GeneView
|
|
1100 Status : Stable
|
|
1101
|
|
1102 =cut
|
|
1103
|
|
1104 sub get_Interpro_by_transid {
|
|
1105 my ($self,$trans_stable_id) = @_;
|
|
1106
|
|
1107 my $sth = $self->prepare(qq(
|
|
1108 SELECT STRAIGHT_JOIN i.interpro_ac, x.description
|
|
1109 FROM transcript t,
|
|
1110 translation tl,
|
|
1111 protein_feature pf,
|
|
1112 interpro i,
|
|
1113 xref x
|
|
1114 WHERE t.stable_id = ?
|
|
1115 AND tl.transcript_id = t.transcript_id
|
|
1116 AND tl.translation_id = pf.translation_id
|
|
1117 AND i.id = pf.hit_name
|
|
1118 AND i.interpro_ac = x.dbprimary_acc
|
|
1119 AND t.is_current = 1
|
|
1120 ));
|
|
1121
|
|
1122 $sth->bind_param(1, $trans_stable_id, SQL_VARCHAR);
|
|
1123 $sth->execute();
|
|
1124
|
|
1125 my @out;
|
|
1126 my %h;
|
|
1127 while( (my $arr = $sth->fetchrow_arrayref()) ) {
|
|
1128 if( $h{$arr->[0]} ) { next; }
|
|
1129 $h{$arr->[0]}=1;
|
|
1130 my $string = $arr->[0] .":".$arr->[1];
|
|
1131 push(@out,$string);
|
|
1132 }
|
|
1133
|
|
1134 return \@out;
|
|
1135 }
|
|
1136
|
|
1137 =head2 is_Transcript_canonical()
|
|
1138
|
|
1139 Arg [1] : Bio::EnsEMBL::Transcript $transcript
|
|
1140 The transcript to query with
|
|
1141 Example : $tr_adaptor->is_Transcript_canonical($transcript);
|
|
1142 Description : Returns a boolean if the given transcript is considered
|
|
1143 canonical with respect to a gene
|
|
1144 Returntype : Boolean
|
|
1145 Exceptions : None
|
|
1146 Caller : Bio::EnsEMBL::Transcript
|
|
1147 Status : Beta
|
|
1148
|
|
1149
|
|
1150 =cut
|
|
1151
|
|
1152 sub is_Transcript_canonical {
|
|
1153 my ($self, $transcript) = @_;
|
|
1154 return $self->dbc()->sql_helper()->execute_single_result(
|
|
1155 -SQL => 'select count(*) from gene where canonical_transcript_id =?',
|
|
1156 -PARAMS => [$transcript->dbID()]
|
|
1157 );
|
|
1158 }
|
|
1159
|
|
1160
|
|
1161 =head2 remove
|
|
1162
|
|
1163 Arg [1] : Bio::EnsEMBL::Transcript $transcript
|
|
1164 The transcript to remove from the database
|
|
1165 Example : $tr_adaptor->remove($transcript);
|
|
1166 Description: Removes a transcript completely from the database, and all
|
|
1167 associated information.
|
|
1168 This method is usually called by the GeneAdaptor::remove method
|
|
1169 because this method will not preform the removal of genes
|
|
1170 which are associated with this transcript. Do not call this
|
|
1171 method directly unless you know there are no genes associated
|
|
1172 with the transcript!
|
|
1173 Returntype : none
|
|
1174 Exceptions : throw on incorrect arguments
|
|
1175 warning if transcript is not in this database
|
|
1176 Caller : GeneAdaptor::remove
|
|
1177 Status : Stable
|
|
1178
|
|
1179 =cut
|
|
1180
|
|
1181 sub remove {
|
|
1182 my $self = shift;
|
|
1183 my $transcript = shift;
|
|
1184
|
|
1185 if(!ref($transcript) || !$transcript->isa('Bio::EnsEMBL::Transcript')) {
|
|
1186 throw("Bio::EnsEMBL::Transcript argument expected");
|
|
1187 }
|
|
1188
|
|
1189 # sanity check: make sure nobody tries to slip past a prediction transcript
|
|
1190 # which inherits from transcript but actually uses different tables
|
|
1191 if($transcript->isa('Bio::EnsEMBL::PredictionTranscript')) {
|
|
1192 throw("TranscriptAdaptor can only remove Transcripts " .
|
|
1193 "not PredictionTranscripts");
|
|
1194 }
|
|
1195
|
|
1196 if ( !$transcript->is_stored($self->db()) ) {
|
|
1197 warning("Cannot remove transcript ". $transcript->dbID .". Is not stored ".
|
|
1198 "in this database.");
|
|
1199 return;
|
|
1200 }
|
|
1201
|
|
1202 # remove the supporting features of this transcript
|
|
1203
|
|
1204 my $prot_adp = $self->db->get_ProteinAlignFeatureAdaptor;
|
|
1205 my $dna_adp = $self->db->get_DnaAlignFeatureAdaptor;
|
|
1206
|
|
1207 my $sfsth = $self->prepare("SELECT feature_type, feature_id " .
|
|
1208 "FROM transcript_supporting_feature " .
|
|
1209 "WHERE transcript_id = ?");
|
|
1210
|
|
1211 $sfsth->bind_param(1, $transcript->dbID, SQL_INTEGER);
|
|
1212 $sfsth->execute();
|
|
1213
|
|
1214 # statements to check for shared align_features
|
|
1215 my $sth1 = $self->prepare("SELECT count(*) FROM supporting_feature " .
|
|
1216 "WHERE feature_type = ? AND feature_id = ?");
|
|
1217 my $sth2 = $self->prepare("SELECT count(*) " .
|
|
1218 "FROM transcript_supporting_feature " .
|
|
1219 "WHERE feature_type = ? AND feature_id = ?");
|
|
1220
|
|
1221 SUPPORTING_FEATURE:
|
|
1222 while(my ($type, $feature_id) = $sfsth->fetchrow()){
|
|
1223
|
|
1224 # only remove align_feature if this is the last reference to it
|
|
1225 $sth1->bind_param(1, $type, SQL_VARCHAR);
|
|
1226 $sth1->bind_param(2, $feature_id, SQL_INTEGER);
|
|
1227 $sth1->execute;
|
|
1228 $sth2->bind_param(1, $type, SQL_VARCHAR);
|
|
1229 $sth2->bind_param(2, $feature_id, SQL_INTEGER);
|
|
1230 $sth2->execute;
|
|
1231 my ($count1) = $sth1->fetchrow;
|
|
1232 my ($count2) = $sth2->fetchrow;
|
|
1233 if ($count1 + $count2 > 1) {
|
|
1234 #warn "transcript: shared feature, not removing $type|$feature_id\n";
|
|
1235 next SUPPORTING_FEATURE;
|
|
1236 }
|
|
1237
|
|
1238 #warn "transcript: removing $type|$feature_id\n";
|
|
1239
|
|
1240 if($type eq 'protein_align_feature'){
|
|
1241 my $f = $prot_adp->fetch_by_dbID($feature_id);
|
|
1242 $prot_adp->remove($f);
|
|
1243 }
|
|
1244 elsif($type eq 'dna_align_feature'){
|
|
1245 my $f = $dna_adp->fetch_by_dbID($feature_id);
|
|
1246 $dna_adp->remove($f);
|
|
1247 }
|
|
1248 else {
|
|
1249 warning("Unknown supporting feature type $type. Not removing feature.");
|
|
1250 }
|
|
1251 }
|
|
1252 $sfsth->finish();
|
|
1253 $sth1->finish();
|
|
1254 $sth2->finish();
|
|
1255
|
|
1256 # delete the association to supporting features
|
|
1257
|
|
1258 $sfsth = $self->prepare("DELETE FROM transcript_supporting_feature WHERE transcript_id = ?");
|
|
1259 $sfsth->bind_param(1, $transcript->dbID, SQL_INTEGER);
|
|
1260 $sfsth->execute();
|
|
1261 $sfsth->finish();
|
|
1262
|
|
1263 # delete the associated IntronSupportingEvidence and if the ISE had no more
|
|
1264 # linked transcripts remove it
|
|
1265 my $ise_adaptor = $self->db->get_IntronSupportingEvidenceAdaptor();
|
|
1266 foreach my $ise (@{$transcript->get_all_IntronSupportingEvidence()}) {
|
|
1267 $ise_adaptor->remove_transcript_linkage($ise, $transcript);
|
|
1268 if(! $ise->has_linked_transcripts()) {
|
|
1269 $ise_adaptor->remove($ise);
|
|
1270 }
|
|
1271 }
|
|
1272
|
|
1273 # remove all xref linkages to this transcript
|
|
1274
|
|
1275 my $dbeAdaptor = $self->db->get_DBEntryAdaptor();
|
|
1276 foreach my $dbe (@{$transcript->get_all_DBEntries}) {
|
|
1277 $dbeAdaptor->remove_from_object($dbe, $transcript, 'Transcript');
|
|
1278 }
|
|
1279
|
|
1280 # remove the attributes associated with this transcript
|
|
1281 my $attrib_adp = $self->db->get_AttributeAdaptor;
|
|
1282 $attrib_adp->remove_from_Transcript($transcript);
|
|
1283
|
|
1284 # remove the translation associated with this transcript
|
|
1285
|
|
1286 my $translationAdaptor = $self->db->get_TranslationAdaptor();
|
|
1287 if( defined($transcript->translation()) ) {
|
|
1288 $translationAdaptor->remove( $transcript->translation );
|
|
1289 }
|
|
1290
|
|
1291 # remove exon associations to this transcript
|
|
1292
|
|
1293 my $exonAdaptor = $self->db->get_ExonAdaptor();
|
|
1294 foreach my $exon ( @{$transcript->get_all_Exons()} ) {
|
|
1295 # get the number of transcript references to this exon
|
|
1296 # only remove the exon if this is the last transcript to
|
|
1297 # reference it
|
|
1298
|
|
1299 my $sth = $self->prepare( "SELECT count(*)
|
|
1300 FROM exon_transcript
|
|
1301 WHERE exon_id = ?" );
|
|
1302 $sth->bind_param(1, $exon->dbID, SQL_INTEGER);
|
|
1303 $sth->execute();
|
|
1304 my ($count) = $sth->fetchrow_array();
|
|
1305 $sth->finish();
|
|
1306
|
|
1307 if($count == 1){
|
|
1308 $exonAdaptor->remove( $exon );
|
|
1309 }
|
|
1310 }
|
|
1311
|
|
1312 my $sth = $self->prepare( "DELETE FROM exon_transcript
|
|
1313 WHERE transcript_id = ?" );
|
|
1314 $sth->bind_param(1, $transcript->dbID, SQL_INTEGER);
|
|
1315 $sth->execute();
|
|
1316 $sth->finish();
|
|
1317
|
|
1318
|
|
1319 $sth = $self->prepare( "DELETE FROM transcript
|
|
1320 WHERE transcript_id = ?" );
|
|
1321 $sth->bind_param(1, $transcript->dbID, SQL_INTEGER);
|
|
1322 $sth->execute();
|
|
1323 $sth->finish();
|
|
1324
|
|
1325 $transcript->dbID(undef);
|
|
1326 $transcript->adaptor(undef);
|
|
1327
|
|
1328 return;
|
|
1329 }
|
|
1330
|
|
1331
|
|
1332 =head2 update
|
|
1333
|
|
1334 Arg [1] : Bio::EnsEMBL::Transcript $transcript
|
|
1335 The transcript to update
|
|
1336 Example : $tr_adaptor->update($transcript);
|
|
1337 Description: Updates a transcript in the database.
|
|
1338 Returntype : None
|
|
1339 Exceptions : thrown if the $transcript is not a Bio::EnsEMBL::Transcript.
|
|
1340 warn if the method is called on a transcript that does not exist
|
|
1341 in the database.
|
|
1342 Should warn if trying to update the number of attached exons, but
|
|
1343 this is a far more complex process and is not yet implemented.
|
|
1344 Caller : general
|
|
1345 Status : Stable
|
|
1346
|
|
1347 =cut
|
|
1348
|
|
1349 sub update {
|
|
1350 my ( $self, $transcript ) = @_;
|
|
1351
|
|
1352 if ( !defined($transcript)
|
|
1353 || !ref($transcript)
|
|
1354 || !$transcript->isa('Bio::EnsEMBL::Transcript') )
|
|
1355 {
|
|
1356 throw("Must update a transcript object, not a $transcript");
|
|
1357 }
|
|
1358
|
|
1359 my $update_transcript_sql = qq(
|
|
1360 UPDATE transcript
|
|
1361 SET analysis_id = ?,
|
|
1362 display_xref_id = ?,
|
|
1363 description = ?,
|
|
1364 biotype = ?,
|
|
1365 status = ?,
|
|
1366 is_current = ?,
|
|
1367 canonical_translation_id = ?
|
|
1368 WHERE transcript_id = ?
|
|
1369 );
|
|
1370
|
|
1371 my $display_xref = $transcript->display_xref();
|
|
1372 my $display_xref_id;
|
|
1373
|
|
1374 if ( defined($display_xref) && $display_xref->dbID() ) {
|
|
1375 $display_xref_id = $display_xref->dbID();
|
|
1376 } else {
|
|
1377 $display_xref_id = undef;
|
|
1378 }
|
|
1379
|
|
1380 my $sth = $self->prepare($update_transcript_sql);
|
|
1381
|
|
1382 $sth->bind_param( 1, $transcript->analysis()->dbID(), SQL_INTEGER );
|
|
1383 $sth->bind_param( 2, $display_xref_id, SQL_INTEGER );
|
|
1384 $sth->bind_param( 3, $transcript->description(), SQL_LONGVARCHAR );
|
|
1385 $sth->bind_param( 4, $transcript->biotype(), SQL_VARCHAR );
|
|
1386 $sth->bind_param( 5, $transcript->status(), SQL_VARCHAR );
|
|
1387 $sth->bind_param( 6, $transcript->is_current(), SQL_TINYINT );
|
|
1388 $sth->bind_param( 7, (
|
|
1389 defined( $transcript->translation() )
|
|
1390 ? $transcript->translation()->dbID()
|
|
1391 : undef ),
|
|
1392 SQL_INTEGER );
|
|
1393 $sth->bind_param( 8, $transcript->dbID(), SQL_INTEGER );
|
|
1394
|
|
1395 $sth->execute();
|
|
1396 } ## end sub update
|
|
1397
|
|
1398
|
|
1399 =head2 list_dbIDs
|
|
1400
|
|
1401 Example : @transcript_ids = @{ $t_adaptor->list_dbIDs };
|
|
1402 Description: Gets a list of internal ids for all transcripts in the db.
|
|
1403 Arg[1] : <optional> int. not 0 for the ids to be sorted by the seq_region. Returntype : Listref of Ints
|
|
1404 Exceptions : none
|
|
1405 Caller : general
|
|
1406 Status : Stable
|
|
1407
|
|
1408 =cut
|
|
1409
|
|
1410 sub list_dbIDs {
|
|
1411 my ($self, $ordered) = @_;
|
|
1412
|
|
1413 return $self->_list_dbIDs("transcript",undef, $ordered);
|
|
1414 }
|
|
1415
|
|
1416
|
|
1417 =head2 list_stable_ids
|
|
1418
|
|
1419 Example : @stable_trans_ids = @{ $transcript_adaptor->list_stable_ids };
|
|
1420 Description: Gets a list of stable ids for all transcripts in the current
|
|
1421 database.
|
|
1422 Returntype : Listref of Strings
|
|
1423 Exceptions : none
|
|
1424 Caller : general
|
|
1425 Status : Stable
|
|
1426
|
|
1427 =cut
|
|
1428
|
|
1429 sub list_stable_ids {
|
|
1430 my ($self) = @_;
|
|
1431
|
|
1432 return $self->_list_dbIDs("transcript", "stable_id");
|
|
1433 }
|
|
1434
|
|
1435
|
|
1436 #_objs_from_sth
|
|
1437
|
|
1438 # Arg [1] : StatementHandle $sth
|
|
1439 # Arg [2] : Bio::EnsEMBL::AssemblyMapper $mapper
|
|
1440 # Arg [3] : Bio::EnsEMBL::Slice $dest_slice
|
|
1441 # Description: PROTECTED implementation of abstract superclass method.
|
|
1442 # Responsible for the creation of Transcripts.
|
|
1443 # Returntype : Listref of Bio::EnsEMBL::Transcripts in target coord system
|
|
1444 # Exceptions : none
|
|
1445 # Caller : internal
|
|
1446 # Status : Stable
|
|
1447
|
|
1448 sub _objs_from_sth {
|
|
1449 my ($self, $sth, $mapper, $dest_slice) = @_;
|
|
1450
|
|
1451 #
|
|
1452 # This code is ugly because an attempt has been made to remove as many
|
|
1453 # function calls as possible for speed purposes. Thus many caches and
|
|
1454 # a fair bit of gymnastics is used.
|
|
1455 #
|
|
1456
|
|
1457 my $sa = $self->db()->get_SliceAdaptor();
|
|
1458 my $aa = $self->db->get_AnalysisAdaptor();
|
|
1459 my $dbEntryAdaptor = $self->db()->get_DBEntryAdaptor();
|
|
1460
|
|
1461 my @transcripts;
|
|
1462 my %analysis_hash;
|
|
1463 my %slice_hash;
|
|
1464 my %sr_name_hash;
|
|
1465 my %sr_cs_hash;
|
|
1466
|
|
1467 my (
|
|
1468 $transcript_id, $seq_region_id, $seq_region_start,
|
|
1469 $seq_region_end, $seq_region_strand, $analysis_id,
|
|
1470 $gene_id, $is_current, $stable_id,
|
|
1471 $version, $created_date, $modified_date,
|
|
1472 $description, $biotype, $status,
|
|
1473 $external_db, $external_status, $external_db_name,
|
|
1474 $xref_id, $xref_display_label, $xref_primary_acc,
|
|
1475 $xref_version, $xref_description, $xref_info_type,
|
|
1476 $xref_info_text
|
|
1477 );
|
|
1478
|
|
1479 $sth->bind_columns(
|
|
1480 \(
|
|
1481 $transcript_id, $seq_region_id, $seq_region_start,
|
|
1482 $seq_region_end, $seq_region_strand, $analysis_id,
|
|
1483 $gene_id, $is_current, $stable_id,
|
|
1484 $version, $created_date, $modified_date,
|
|
1485 $description, $biotype, $status,
|
|
1486 $external_db, $external_status, $external_db_name,
|
|
1487 $xref_id, $xref_display_label, $xref_primary_acc,
|
|
1488 $xref_version, $xref_description, $xref_info_type,
|
|
1489 $xref_info_text
|
|
1490 ) );
|
|
1491
|
|
1492 my $asm_cs;
|
|
1493 my $cmp_cs;
|
|
1494 my $asm_cs_vers;
|
|
1495 my $asm_cs_name;
|
|
1496 my $cmp_cs_vers;
|
|
1497 my $cmp_cs_name;
|
|
1498 if($mapper) {
|
|
1499 $asm_cs = $mapper->assembled_CoordSystem();
|
|
1500 $cmp_cs = $mapper->component_CoordSystem();
|
|
1501 $asm_cs_name = $asm_cs->name();
|
|
1502 $asm_cs_vers = $asm_cs->version();
|
|
1503 $cmp_cs_name = $cmp_cs->name();
|
|
1504 $cmp_cs_vers = $cmp_cs->version();
|
|
1505 }
|
|
1506
|
|
1507 my $dest_slice_start;
|
|
1508 my $dest_slice_end;
|
|
1509 my $dest_slice_strand;
|
|
1510 my $dest_slice_length;
|
|
1511 my $dest_slice_cs;
|
|
1512 my $dest_slice_sr_name;
|
|
1513 my $dest_slice_sr_id;
|
|
1514
|
|
1515 my $asma;
|
|
1516 if($dest_slice) {
|
|
1517 $dest_slice_start = $dest_slice->start();
|
|
1518 $dest_slice_end = $dest_slice->end();
|
|
1519 $dest_slice_strand = $dest_slice->strand();
|
|
1520 $dest_slice_length = $dest_slice->length();
|
|
1521 $dest_slice_cs = $dest_slice->coord_system();
|
|
1522 $dest_slice_sr_name = $dest_slice->seq_region_name();
|
|
1523 $dest_slice_sr_id = $dest_slice->get_seq_region_id();
|
|
1524 $asma = $self->db->get_AssemblyMapperAdaptor();
|
|
1525 }
|
|
1526
|
|
1527 FEATURE: while($sth->fetch()) {
|
|
1528
|
|
1529 #get the analysis object
|
|
1530 my $analysis = $analysis_hash{$analysis_id} ||=
|
|
1531 $aa->fetch_by_dbID($analysis_id);
|
|
1532 #need to get the internal_seq_region, if present
|
|
1533 $seq_region_id = $self->get_seq_region_id_internal($seq_region_id);
|
|
1534 my $slice = $slice_hash{"ID:".$seq_region_id};
|
|
1535 my $dest_mapper = $mapper;
|
|
1536
|
|
1537 if(!$slice) {
|
|
1538 $slice = $sa->fetch_by_seq_region_id($seq_region_id);
|
|
1539 $slice_hash{"ID:".$seq_region_id} = $slice;
|
|
1540 $sr_name_hash{$seq_region_id} = $slice->seq_region_name();
|
|
1541 $sr_cs_hash{$seq_region_id} = $slice->coord_system();
|
|
1542 }
|
|
1543
|
|
1544 #obtain a mapper if none was defined, but a dest_seq_region was
|
|
1545 if(!$dest_mapper && $dest_slice &&
|
|
1546 !$dest_slice_cs->equals($slice->coord_system)) {
|
|
1547 $dest_mapper = $asma->fetch_by_CoordSystems($dest_slice_cs,
|
|
1548 $slice->coord_system);
|
|
1549 $asm_cs = $dest_mapper->assembled_CoordSystem();
|
|
1550 $cmp_cs = $dest_mapper->component_CoordSystem();
|
|
1551 $asm_cs_name = $asm_cs->name();
|
|
1552 $asm_cs_vers = $asm_cs->version();
|
|
1553 $cmp_cs_name = $cmp_cs->name();
|
|
1554 $cmp_cs_vers = $cmp_cs->version();
|
|
1555 }
|
|
1556
|
|
1557 my $sr_name = $sr_name_hash{$seq_region_id};
|
|
1558 my $sr_cs = $sr_cs_hash{$seq_region_id};
|
|
1559 #
|
|
1560 # remap the feature coordinates to another coord system
|
|
1561 # if a mapper was provided
|
|
1562 #
|
|
1563 if($dest_mapper) {
|
|
1564
|
|
1565 if (defined $dest_slice && $dest_mapper->isa('Bio::EnsEMBL::ChainedAssemblyMapper') ) {
|
|
1566 ( $seq_region_id, $seq_region_start,
|
|
1567 $seq_region_end, $seq_region_strand )
|
|
1568 =
|
|
1569 $dest_mapper->map( $sr_name, $seq_region_start, $seq_region_end,
|
|
1570 $seq_region_strand, $sr_cs, 1, $dest_slice);
|
|
1571
|
|
1572 } else {
|
|
1573
|
|
1574 ( $seq_region_id, $seq_region_start,
|
|
1575 $seq_region_end, $seq_region_strand )
|
|
1576 = $dest_mapper->fastmap( $sr_name, $seq_region_start,
|
|
1577 $seq_region_end, $seq_region_strand,
|
|
1578 $sr_cs );
|
|
1579 }
|
|
1580
|
|
1581 #skip features that map to gaps or coord system boundaries
|
|
1582 next FEATURE if(!defined($seq_region_id));
|
|
1583
|
|
1584 #get a slice in the coord system we just mapped to
|
|
1585 if($asm_cs == $sr_cs || ($cmp_cs != $sr_cs && $asm_cs->equals($sr_cs))) {
|
|
1586 $slice = $slice_hash{"ID:".$seq_region_id} ||=
|
|
1587 $sa->fetch_by_seq_region_id($seq_region_id);
|
|
1588 } else {
|
|
1589 $slice = $slice_hash{"ID:".$seq_region_id} ||=
|
|
1590 $sa->fetch_by_seq_region_id($seq_region_id);
|
|
1591 }
|
|
1592 }
|
|
1593
|
|
1594 #
|
|
1595 # If a destination slice was provided convert the coords.
|
|
1596 #
|
|
1597 if (defined($dest_slice)) {
|
|
1598 if ( $dest_slice_strand == 1 ) {
|
|
1599 $seq_region_start = $seq_region_start - $dest_slice_start + 1;
|
|
1600 $seq_region_end = $seq_region_end - $dest_slice_start + 1;
|
|
1601
|
|
1602 if ( $dest_slice->is_circular ) {
|
|
1603 if ( $seq_region_start > $seq_region_end ) {
|
|
1604 # Looking at a feature overlapping the chromsome origin.
|
|
1605 if ( $seq_region_end > $dest_slice_start ) {
|
|
1606 # Looking at the region in the beginning of the chromosome
|
|
1607 $seq_region_start -= $dest_slice->seq_region_length();
|
|
1608 }
|
|
1609 if ( $seq_region_end < 0 ) {
|
|
1610 $seq_region_end += $dest_slice->seq_region_length();
|
|
1611 }
|
|
1612 } else {
|
|
1613 if ( $dest_slice_start > $dest_slice_end
|
|
1614 && $seq_region_end < 0 )
|
|
1615 {
|
|
1616 # Looking at the region overlapping the chromosome
|
|
1617 # origin and a feature which is at the beginning of the
|
|
1618 # chromosome.
|
|
1619 $seq_region_start += $dest_slice->seq_region_length();
|
|
1620 $seq_region_end += $dest_slice->seq_region_length();
|
|
1621 }
|
|
1622 }
|
|
1623 }
|
|
1624 } else {
|
|
1625 if ( $dest_slice->is_circular()
|
|
1626 && $seq_region_start > $seq_region_end )
|
|
1627 {
|
|
1628 if ( $seq_region_end > $dest_slice_start ) {
|
|
1629 # Looking at the region in the beginning of the chromosome.
|
|
1630 $seq_region_start = $dest_slice_end - $seq_region_end + 1;
|
|
1631 $seq_region_end =
|
|
1632 $seq_region_end -
|
|
1633 $dest_slice->seq_region_length() -
|
|
1634 $dest_slice_start + 1;
|
|
1635 } else {
|
|
1636 my $tmp_seq_region_start = $seq_region_start;
|
|
1637 $seq_region_start =
|
|
1638 $dest_slice_end -
|
|
1639 $seq_region_end -
|
|
1640 $dest_slice->seq_region_length() + 1;
|
|
1641 $seq_region_end =
|
|
1642 $dest_slice_end - $tmp_seq_region_start + 1;
|
|
1643 }
|
|
1644
|
|
1645 } else {
|
|
1646 my $tmp_seq_region_start = $seq_region_start;
|
|
1647 $seq_region_start = $dest_slice_end - $seq_region_end + 1;
|
|
1648 $seq_region_end = $dest_slice_end - $tmp_seq_region_start + 1;
|
|
1649 }
|
|
1650
|
|
1651 $seq_region_strand = -$seq_region_strand;
|
|
1652 } ## end else [ if ( $dest_slice_strand...)]
|
|
1653
|
|
1654 # Throw away features off the end of the requested slice
|
|
1655 if ( $seq_region_end < 1
|
|
1656 || $seq_region_start > $dest_slice_length
|
|
1657 || ( $dest_slice_sr_id ne $seq_region_id ) )
|
|
1658 {
|
|
1659 next FEATURE;
|
|
1660 }
|
|
1661
|
|
1662 $slice = $dest_slice;
|
|
1663 }
|
|
1664
|
|
1665 my $display_xref;
|
|
1666
|
|
1667 if ($xref_id) {
|
|
1668 $display_xref = Bio::EnsEMBL::DBEntry->new_fast( {
|
|
1669 'dbID' => $xref_id,
|
|
1670 'display_id' => $xref_display_label,
|
|
1671 'primary_id' => $xref_primary_acc,
|
|
1672 'version' => $xref_version,
|
|
1673 'description' => $xref_description,
|
|
1674 'info_type' => $xref_info_type,
|
|
1675 'info_text' => $xref_info_text,
|
|
1676 'adaptor' => $dbEntryAdaptor,
|
|
1677 'db_display_name' => $external_db_name,
|
|
1678 'dbname' => $external_db
|
|
1679 } );
|
|
1680 }
|
|
1681
|
|
1682
|
|
1683 # Finally, create the new Transcript.
|
|
1684 push(
|
|
1685 @transcripts,
|
|
1686 $self->_create_feature_fast(
|
|
1687 'Bio::EnsEMBL::Transcript',
|
|
1688 {
|
|
1689 'analysis' => $analysis,
|
|
1690 'start' => $seq_region_start,
|
|
1691 'end' => $seq_region_end,
|
|
1692 'strand' => $seq_region_strand,
|
|
1693 'adaptor' => $self,
|
|
1694 'slice' => $slice,
|
|
1695 'dbID' => $transcript_id,
|
|
1696 'stable_id' => $stable_id,
|
|
1697 'version' => $version,
|
|
1698 'created_date' => $created_date || undef,
|
|
1699 'modified_date' => $modified_date || undef,
|
|
1700 'external_name' => $xref_display_label,
|
|
1701 'external_db' => $external_db,
|
|
1702 'external_status' => $external_status,
|
|
1703 'external_display_name' => $external_db_name,
|
|
1704 'display_xref' => $display_xref,
|
|
1705 'description' => $description,
|
|
1706 'biotype' => $biotype,
|
|
1707 'status' => $status,
|
|
1708 'is_current' => $is_current,
|
|
1709 'edits_enabled' => 1
|
|
1710 } ) );
|
|
1711
|
|
1712 }
|
|
1713
|
|
1714 return \@transcripts;
|
|
1715 }
|
|
1716
|
|
1717
|
|
1718 =head2 fetch_all_by_exon_supporting_evidence
|
|
1719
|
|
1720 Arg [1] : String $hit_name
|
|
1721 Name of supporting feature
|
|
1722 Arg [2] : String $feature_type
|
|
1723 one of "dna_align_feature" or "protein_align_feature"
|
|
1724 Arg [3] : (optional) Bio::Ensembl::Analysis
|
|
1725 Example : $tr = $tr_adaptor->fetch_all_by_exon_supporting_evidence
|
|
1726 ('XYZ', 'dna_align_feature');
|
|
1727 Description: Gets all the transcripts with exons which have a specified hit
|
|
1728 on a particular type of feature. Optionally filter by analysis.
|
|
1729 Returntype : Listref of Bio::EnsEMBL::Transcript objects
|
|
1730 Exceptions : If feature_type is not of correct type.
|
|
1731 Caller : general
|
|
1732 Status : Stable
|
|
1733
|
|
1734 =cut
|
|
1735
|
|
1736 sub fetch_all_by_exon_supporting_evidence {
|
|
1737 my ($self, $hit_name, $feature_type, $analysis) = @_;
|
|
1738
|
|
1739 if($feature_type !~ /(dna)|(protein)_align_feature/) {
|
|
1740 throw("feature type must be dna_align_feature or protein_align_feature");
|
|
1741 }
|
|
1742
|
|
1743 my $anal_from = "";
|
|
1744 $anal_from = ", analysis a " if ($analysis);
|
|
1745 my $anal_where = "";
|
|
1746 $anal_where = "AND a.analysis_id = f.analysis_id AND a.analysis_id=? "
|
|
1747 if ($analysis);
|
|
1748
|
|
1749 my $sql = qq(
|
|
1750 SELECT DISTINCT(t.transcript_id)
|
|
1751 FROM transcript t,
|
|
1752 exon_transcript et,
|
|
1753 supporting_feature sf,
|
|
1754 $feature_type f
|
|
1755 $anal_from
|
|
1756 WHERE t.transcript_id = et.transcript_id
|
|
1757 AND t.is_current = 1
|
|
1758 AND et.exon_id = sf.exon_id
|
|
1759 AND sf.feature_id = f.${feature_type}_id
|
|
1760 AND sf.feature_type = ?
|
|
1761 AND f.hit_name=?
|
|
1762 $anal_where
|
|
1763 );
|
|
1764
|
|
1765 my $sth = $self->prepare($sql);
|
|
1766
|
|
1767 $sth->bind_param(1, $feature_type, SQL_VARCHAR);
|
|
1768 $sth->bind_param(2, $hit_name, SQL_VARCHAR);
|
|
1769 $sth->bind_param(3, $analysis->dbID(), SQL_INTEGER) if ($analysis);
|
|
1770
|
|
1771 $sth->execute();
|
|
1772
|
|
1773 my @transcripts;
|
|
1774
|
|
1775 while( my $id = $sth->fetchrow_array ) {
|
|
1776 my $transcript = $self->fetch_by_dbID( $id );
|
|
1777 push(@transcripts, $transcript) if $transcript;
|
|
1778 }
|
|
1779
|
|
1780 return \@transcripts;
|
|
1781 }
|
|
1782
|
|
1783
|
|
1784 =head2 fetch_all_by_transcript_supporting_evidence
|
|
1785
|
|
1786 Arg [1] : String $hit_name
|
|
1787 Name of supporting feature
|
|
1788 Arg [2] : String $feature_type
|
|
1789 one of "dna_align_feature" or "protein_align_feature"
|
|
1790 Arg [3] : (optional) Bio::Ensembl::Analysis
|
|
1791 Example : $transcripts = $transcript_adaptor->fetch_all_by_transcript_supporting_evidence('XYZ', 'dna_align_feature');
|
|
1792 Description: Gets all the transcripts with evidence from a specified hit_name on a particular type of feature, stored in the
|
|
1793 transcript_supporting_feature table. Optionally filter by analysis. For hits stored in the supporting_feature
|
|
1794 table (linked to exons) use fetch_all_by_exon_supporting_evidence instead.
|
|
1795 Returntype : Listref of Bio::EnsEMBL::Transcript objects
|
|
1796 Exceptions : If feature_type is not of correct type.
|
|
1797 Caller : general
|
|
1798 Status : Stable
|
|
1799
|
|
1800 =cut
|
|
1801
|
|
1802 sub fetch_all_by_transcript_supporting_evidence {
|
|
1803
|
|
1804 my ($self, $hit_name, $feature_type, $analysis) = @_;
|
|
1805
|
|
1806 if($feature_type !~ /(dna)|(protein)_align_feature/) {
|
|
1807 throw("feature type must be dna_align_feature or protein_align_feature");
|
|
1808 }
|
|
1809
|
|
1810 my $anal_from = "";
|
|
1811 $anal_from = ", analysis a " if ($analysis);
|
|
1812 my $anal_where = "";
|
|
1813 $anal_where = "AND a.analysis_id = f.analysis_id AND a.analysis_id=? "
|
|
1814 if ($analysis);
|
|
1815
|
|
1816 my $sql = qq(
|
|
1817 SELECT DISTINCT(t.transcript_id)
|
|
1818 FROM transcript t,
|
|
1819 transcript_supporting_feature sf,
|
|
1820 $feature_type f
|
|
1821 $anal_from
|
|
1822 WHERE t.transcript_id = sf.transcript_id
|
|
1823 AND t.is_current = 1
|
|
1824 AND sf.feature_id = f.${feature_type}_id
|
|
1825 AND sf.feature_type = ?
|
|
1826 AND f.hit_name=?
|
|
1827 $anal_where
|
|
1828 );
|
|
1829
|
|
1830 my $sth = $self->prepare($sql);
|
|
1831
|
|
1832 $sth->bind_param(1, $feature_type, SQL_VARCHAR);
|
|
1833 $sth->bind_param(2, $hit_name, SQL_VARCHAR);
|
|
1834 $sth->bind_param(3, $analysis->dbID(), SQL_INTEGER) if ($analysis);
|
|
1835
|
|
1836 $sth->execute();
|
|
1837
|
|
1838 my @transcripts;
|
|
1839
|
|
1840 while( my $id = $sth->fetchrow_array ) {
|
|
1841 my $transcript = $self->fetch_by_dbID( $id );
|
|
1842 push(@transcripts, $transcript) if $transcript;
|
|
1843 }
|
|
1844
|
|
1845 return \@transcripts;
|
|
1846 }
|
|
1847
|
|
1848
|
|
1849 ##########################
|
|
1850 # #
|
|
1851 # DEPRECATED METHODS #
|
|
1852 # #
|
|
1853 ##########################
|
|
1854
|
|
1855
|
|
1856 =head2 get_display_xref
|
|
1857
|
|
1858 Description: DEPRECATED. Use $transcript->display_xref() instead.
|
|
1859
|
|
1860 =cut
|
|
1861
|
|
1862 sub get_display_xref {
|
|
1863 my ($self, $transcript) = @_;
|
|
1864
|
|
1865 deprecate("display_xref should be retreived from Transcript object directly.");
|
|
1866
|
|
1867 if ( !defined $transcript ) {
|
|
1868 throw("Must call with a Transcript object");
|
|
1869 }
|
|
1870
|
|
1871 my $sth = $self->prepare(qq(
|
|
1872 SELECT e.db_name,
|
|
1873 x.display_label,
|
|
1874 e.db_external_name,
|
|
1875 x.xref_id
|
|
1876 FROM transcript t,
|
|
1877 xref x,
|
|
1878 external_db e
|
|
1879 WHERE t.transcript_id = ?
|
|
1880 AND t.display_xref_id = x.xref_id
|
|
1881 AND x.external_db_id = e.external_db_id
|
|
1882 ));
|
|
1883
|
|
1884 $sth->bind_param(1, $transcript->dbID, SQL_INTEGER);
|
|
1885 $sth->execute();
|
|
1886
|
|
1887 my ($db_name, $display_label, $xref_id, $display_db_name ) =
|
|
1888 $sth->fetchrow_array();
|
|
1889
|
|
1890 if ( !defined $xref_id ) {
|
|
1891 return undef;
|
|
1892 }
|
|
1893
|
|
1894 my $db_entry = Bio::EnsEMBL::DBEntry->new(
|
|
1895 -dbid => $xref_id,
|
|
1896 -adaptor => $self->db->get_DBEntryAdaptor(),
|
|
1897 -dbname => $db_name,
|
|
1898 -display_id => $display_label
|
|
1899 -db_display_name => $display_db_name
|
|
1900 );
|
|
1901
|
|
1902 return $db_entry;
|
|
1903 }
|
|
1904
|
|
1905
|
|
1906 =head2 get_stable_entry_info
|
|
1907
|
|
1908 Description: DEPRECATED. Use $transcript->stable_id() instead.
|
|
1909
|
|
1910 =cut
|
|
1911
|
|
1912 sub get_stable_entry_info {
|
|
1913 my ($self, $transcript) = @_;
|
|
1914
|
|
1915 deprecate("Stable ids should be loaded directly now");
|
|
1916
|
|
1917 unless ( defined $transcript && ref $transcript &&
|
|
1918 $transcript->isa('Bio::EnsEMBL::Transcript') ) {
|
|
1919 throw("Needs a Transcript object, not a $transcript");
|
|
1920 }
|
|
1921
|
|
1922 my $sth = $self->prepare(qq(
|
|
1923 SELECT stable_id, version
|
|
1924 FROM transcript
|
|
1925 WHERE transcript_id = ?
|
|
1926 ));
|
|
1927
|
|
1928 $sth->bind_param(1, $transcript->dbID, SQL_INTEGER);
|
|
1929 $sth->execute();
|
|
1930
|
|
1931 my @array = $sth->fetchrow_array();
|
|
1932 $transcript->{'_stable_id'} = $array[0];
|
|
1933 $transcript->{'_version'} = $array[1];
|
|
1934
|
|
1935 return 1;
|
|
1936 }
|
|
1937
|
|
1938
|
|
1939 =head2 fetch_all_by_DBEntry
|
|
1940
|
|
1941 Description: DEPRECATED. Use fetch_all_by_external_name() instead.
|
|
1942
|
|
1943 =cut
|
|
1944
|
|
1945 sub fetch_all_by_DBEntry {
|
|
1946 my $self = shift;
|
|
1947 deprecate('Use fetch_all_by_external_name instead.');
|
|
1948 return $self->fetch_all_by_external_name(@_);
|
|
1949 }
|
|
1950
|
|
1951
|
|
1952 1;
|