0
|
1 =head1 LICENSE
|
|
2
|
|
3 Copyright (c) 1999-2012 The European Bioinformatics Institute and
|
|
4 Genome Research Limited. All rights reserved.
|
|
5
|
|
6 This software is distributed under a modified Apache license.
|
|
7 For license details, please see
|
|
8
|
|
9 http://www.ensembl.org/info/about/code_licence.html
|
|
10
|
|
11 =head1 CONTACT
|
|
12
|
|
13 Please email comments or questions to the public Ensembl
|
|
14 developers list at <dev@ensembl.org>.
|
|
15
|
|
16 Questions may also be sent to the Ensembl help desk at
|
|
17 <helpdesk@ensembl.org>.
|
|
18
|
|
19 =cut
|
|
20
|
|
21 =head1 NAME
|
|
22
|
|
23 Bio::EnsEMBL::DBSQL::GeneAdaptor - Database adaptor for the retrieval and
|
|
24 storage of Gene objects
|
|
25
|
|
26 =head1 SYNOPSIS
|
|
27
|
|
28 use Bio::EnsEMBL::Registry;
|
|
29
|
|
30 Bio::EnsEMBL::Registry->load_registry_from_db(
|
|
31 -host => 'ensembldb.ensembl.org',
|
|
32 -user => 'anonymous',
|
|
33 );
|
|
34
|
|
35 $gene_adaptor =
|
|
36 Bio::EnsEMBL::Registry->get_adaptor( "human", "core", "gene" );
|
|
37
|
|
38 $gene = $gene_adaptor->fetch_by_dbID(1234);
|
|
39
|
|
40 $gene = $gene_adaptor->fetch_by_stable_id('ENSG00000184129');
|
|
41
|
|
42 @genes = @{ $gene_adaptor->fetch_all_by_external_name('BRCA2') };
|
|
43
|
|
44 $slice_adaptor =
|
|
45 Bio::EnsEMBL::Registry->get_adaptor( "human", "core", "slice" );
|
|
46
|
|
47 $slice =
|
|
48 $slice_adaptor->fetch_by_region( 'chromosome', '1', 1, 1000000 );
|
|
49
|
|
50 @genes = @{ $gene_adaptor->fetch_all_by_Slice($slice) };
|
|
51
|
|
52 =head1 DESCRIPTION
|
|
53
|
|
54 This is a database aware adaptor for the retrieval and storage of gene
|
|
55 objects.
|
|
56
|
|
57 =head1 METHODS
|
|
58
|
|
59 =cut
|
|
60
|
|
61 package Bio::EnsEMBL::DBSQL::GeneAdaptor;
|
|
62
|
|
63 use strict;
|
|
64
|
|
65 use Bio::EnsEMBL::Utils::Exception qw( deprecate throw warning );
|
|
66 use Bio::EnsEMBL::Utils::Scalar qw( assert_ref );
|
|
67 use Bio::EnsEMBL::DBSQL::SliceAdaptor;
|
|
68 use Bio::EnsEMBL::DBSQL::BaseFeatureAdaptor;
|
|
69 use Bio::EnsEMBL::DBSQL::DBAdaptor;
|
|
70 use Bio::EnsEMBL::Gene;
|
|
71
|
|
72 use vars '@ISA';
|
|
73 @ISA = qw(Bio::EnsEMBL::DBSQL::BaseFeatureAdaptor);
|
|
74
|
|
75
|
|
76 # _tables
|
|
77 # Arg [1] : none
|
|
78 # Description: PROTECTED implementation of superclass abstract method.
|
|
79 # Returns the names, aliases of the tables to use for queries.
|
|
80 # Returntype : list of listrefs of strings
|
|
81 # Exceptions : none
|
|
82 # Caller : internal
|
|
83 # Status : Stable
|
|
84
|
|
85 sub _tables {
|
|
86 return (
|
|
87 [ 'gene', 'g' ],
|
|
88 [ 'xref', 'x' ],
|
|
89 [ 'external_db', 'exdb' ] );
|
|
90 }
|
|
91
|
|
92
|
|
93 # _columns
|
|
94 # Arg [1] : none
|
|
95 # Example : none
|
|
96 # Description: PROTECTED implementation of superclass abstract method.
|
|
97 # Returns a list of columns to use for queries.
|
|
98 # Returntype : list of strings
|
|
99 # Exceptions : none
|
|
100 # Caller : internal
|
|
101 # Status : Stable
|
|
102
|
|
103 sub _columns {
|
|
104 my ($self) = @_;
|
|
105
|
|
106 my $created_date =
|
|
107 $self->db()->dbc()->from_date_to_seconds("g.created_date");
|
|
108 my $modified_date =
|
|
109 $self->db()->dbc()->from_date_to_seconds("g.modified_date");
|
|
110
|
|
111 return (
|
|
112 'g.gene_id', 'g.seq_region_id',
|
|
113 'g.seq_region_start', 'g.seq_region_end',
|
|
114 'g.seq_region_strand', 'g.analysis_id',
|
|
115 'g.biotype', 'g.display_xref_id',
|
|
116 'g.description', 'g.status',
|
|
117 'g.source', 'g.is_current',
|
|
118 'g.canonical_transcript_id', 'g.canonical_annotation',
|
|
119 'g.stable_id', 'g.version',
|
|
120 $created_date, $modified_date,
|
|
121 'x.display_label', 'x.dbprimary_acc',
|
|
122 'x.description', 'x.version',
|
|
123 'exdb.db_name', 'exdb.status',
|
|
124 'exdb.db_release', 'exdb.db_display_name',
|
|
125 'x.info_type', 'x.info_text'
|
|
126 );
|
|
127 } ## end sub _columns
|
|
128
|
|
129
|
|
130 sub _left_join {
|
|
131 return (
|
|
132 [ 'xref', "x.xref_id = g.display_xref_id" ],
|
|
133 [ 'external_db', "exdb.external_db_id = x.external_db_id" ] );
|
|
134 }
|
|
135
|
|
136
|
|
137 =head2 list_dbIDs
|
|
138
|
|
139 Example : @gene_ids = @{$gene_adaptor->list_dbIDs()};
|
|
140 Description: Gets an array of internal ids for all genes in the current db
|
|
141 Arg[1] : <optional> int. not 0 for the ids to be sorted by the seq_region.
|
|
142 Returntype : Listref of Ints
|
|
143 Exceptions : none
|
|
144 Caller : general
|
|
145 Status : Stable
|
|
146
|
|
147 =cut
|
|
148
|
|
149 sub list_dbIDs {
|
|
150 my ($self,$ordered) = @_;
|
|
151
|
|
152 return $self->_list_dbIDs("gene",undef, $ordered);
|
|
153 }
|
|
154
|
|
155
|
|
156 =head2 list_stable_ids
|
|
157
|
|
158 Example : @stable_gene_ids = @{$gene_adaptor->list_stable_ids()};
|
|
159 Description: Gets an listref of stable ids for all genes in the current db
|
|
160 Returntype : reference to a list of strings
|
|
161 Exceptions : none
|
|
162 Caller : general
|
|
163 Status : Stable
|
|
164
|
|
165 =cut
|
|
166
|
|
167 sub list_stable_ids {
|
|
168 my ($self) = @_;
|
|
169
|
|
170 return $self->_list_dbIDs("gene", "stable_id");
|
|
171 }
|
|
172
|
|
173
|
|
174 sub list_seq_region_ids {
|
|
175 my $self = shift;
|
|
176
|
|
177 return $self->_list_seq_region_ids('gene');
|
|
178 }
|
|
179
|
|
180 =head2 fetch_by_display_label
|
|
181
|
|
182 Arg [1] : String $label - display label of gene to fetch
|
|
183 Example : my $gene = $geneAdaptor->fetch_by_display_label("BRCA2");
|
|
184 Description: Returns the gene which has the given display label or undef if
|
|
185 there is none. If there are more than 1, the gene on the
|
|
186 reference slice is reported or if none are on the reference,
|
|
187 the first one is reported.
|
|
188 Returntype : Bio::EnsEMBL::Gene
|
|
189 Exceptions : none
|
|
190 Caller : general
|
|
191 Status : Stable
|
|
192
|
|
193 =cut
|
|
194
|
|
195 sub fetch_by_display_label {
|
|
196 my $self = shift;
|
|
197 my $label = shift;
|
|
198
|
|
199 my $constraint = "x.display_label = ? AND g.is_current = 1";
|
|
200 $self->bind_param_generic_fetch($label,SQL_VARCHAR);
|
|
201 my @genes = @{ $self->generic_fetch($constraint) };
|
|
202 my $gene;
|
|
203 if (scalar(@genes) > 1) {
|
|
204 foreach my $gene_tmp (@genes) {
|
|
205 if ($gene_tmp->slice->is_reference) {
|
|
206 $gene = $gene_tmp;
|
|
207 }
|
|
208 last if ($gene);
|
|
209 }
|
|
210 if (!$gene) {
|
|
211 $gene = $genes[0];
|
|
212 }
|
|
213
|
|
214 } elsif (scalar(@genes) == 1) {
|
|
215 $gene = $genes[0];
|
|
216 }
|
|
217
|
|
218 return $gene;
|
|
219 }
|
|
220
|
|
221 =head2 fetch_all_by_display_label
|
|
222
|
|
223 Arg [1] : String $label - display label of genes to fetch
|
|
224 Example : my @genes = @{$geneAdaptor->fetch_all_by_display_label("PPP1R2P1")};
|
|
225 Description: Returns all genes which have the given display label or undef if
|
|
226 there are none.
|
|
227 Returntype : listref of Bio::EnsEMBL::Gene objects
|
|
228 Exceptions : none
|
|
229 Caller : general
|
|
230 Status : Stable
|
|
231
|
|
232 =cut
|
|
233
|
|
234 sub fetch_all_by_display_label {
|
|
235 my $self = shift;
|
|
236 my $label = shift;
|
|
237
|
|
238 my $constraint = "x.display_label = ? AND g.is_current = 1";
|
|
239 $self->bind_param_generic_fetch($label,SQL_VARCHAR);
|
|
240 my $genes = $self->generic_fetch($constraint) ;
|
|
241
|
|
242 return $genes;
|
|
243 }
|
|
244
|
|
245 =head2 fetch_by_stable_id
|
|
246
|
|
247 Arg [1] : String $id
|
|
248 The stable ID of the gene to retrieve
|
|
249 Example : $gene = $gene_adaptor->fetch_by_stable_id('ENSG00000148944');
|
|
250 Description: Retrieves a gene object from the database via its stable id.
|
|
251 The gene will be retrieved in its native coordinate system (i.e.
|
|
252 in the coordinate system it is stored in the database). It may
|
|
253 be converted to a different coordinate system through a call to
|
|
254 transform() or transfer(). If the gene or exon is not found
|
|
255 undef is returned instead.
|
|
256 Returntype : Bio::EnsEMBL::Gene or undef
|
|
257 Exceptions : if we cant get the gene in given coord system
|
|
258 Caller : general
|
|
259 Status : Stable
|
|
260
|
|
261 =cut
|
|
262
|
|
263 sub fetch_by_stable_id {
|
|
264 my ($self, $stable_id) = @_;
|
|
265
|
|
266 my $constraint = "g.stable_id = ? AND g.is_current = 1";
|
|
267 $self->bind_param_generic_fetch($stable_id,SQL_VARCHAR);
|
|
268 my ($gene) = @{ $self->generic_fetch($constraint) };
|
|
269
|
|
270 return $gene;
|
|
271 }
|
|
272
|
|
273
|
|
274
|
|
275 =head2 fetch_all_by_biotype
|
|
276
|
|
277 Arg [1] : String $biotype
|
|
278 listref of $biotypes
|
|
279 The biotype of the gene to retrieve. You can have as an argument a reference
|
|
280 to a list of biotypes
|
|
281 Example : $gene = $gene_adaptor->fetch_all_by_biotype('protein_coding');
|
|
282 $gene = $gene_adaptor->fetch_all_by_biotypes(['protein_coding', 'sRNA', 'miRNA']);
|
|
283 Description: Retrieves an array reference of gene objects from the database via its biotype or biotypes.
|
|
284 The genes will be retrieved in its native coordinate system (i.e.
|
|
285 in the coordinate system it is stored in the database). It may
|
|
286 be converted to a different coordinate system through a call to
|
|
287 transform() or transfer(). If the gene or exon is not found
|
|
288 undef is returned instead.
|
|
289 Returntype : listref of Bio::EnsEMBL::Gene
|
|
290 Exceptions : if we cant get the gene in given coord system
|
|
291 Caller : general
|
|
292 Status : Stable
|
|
293
|
|
294 =cut
|
|
295
|
|
296 sub fetch_all_by_biotype {
|
|
297 my ($self, $biotype) = @_;
|
|
298
|
|
299 if (!defined $biotype){
|
|
300 throw("Biotype or listref of biotypes expected");
|
|
301 }
|
|
302 my $constraint;
|
|
303 if (ref($biotype) eq 'ARRAY'){
|
|
304 $constraint = "g.biotype IN (";
|
|
305 foreach my $b (@{$biotype}){
|
|
306 $constraint .= "?,";
|
|
307 $self->bind_param_generic_fetch($b,SQL_VARCHAR);
|
|
308 }
|
|
309 chop($constraint); #remove last , from expression
|
|
310 $constraint .= ") and g.is_current = 1";
|
|
311
|
|
312 }
|
|
313 else{
|
|
314 $constraint = "g.biotype = ? and g.is_current = 1";
|
|
315 $self->bind_param_generic_fetch($biotype,SQL_VARCHAR);
|
|
316 }
|
|
317 my @genes = @{ $self->generic_fetch($constraint) };
|
|
318 return \@genes ;
|
|
319 }
|
|
320
|
|
321
|
|
322 sub fetch_all {
|
|
323 my ($self) = @_;
|
|
324
|
|
325 my $constraint = 'g.biotype != "LRG_gene" and g.is_current = 1';
|
|
326 my @genes = @{ $self->generic_fetch($constraint) };
|
|
327 return \@genes ;
|
|
328 }
|
|
329
|
|
330
|
|
331 =head2 fetch_all_versions_by_stable_id
|
|
332
|
|
333 Arg [1] : String $stable_id
|
|
334 The stable ID of the gene to retrieve
|
|
335 Example : $gene = $gene_adaptor->fetch_all_versions_by_stable_id
|
|
336 ('ENSG00000148944');
|
|
337 Description : Similar to fetch_by_stable_id, but retrieves all versions of a
|
|
338 gene stored in the database.
|
|
339 Returntype : listref of Bio::EnsEMBL::Gene
|
|
340 Exceptions : if we cant get the gene in given coord system
|
|
341 Caller : general
|
|
342 Status : At Risk
|
|
343
|
|
344 =cut
|
|
345
|
|
346 sub fetch_all_versions_by_stable_id {
|
|
347 my ($self, $stable_id) = @_;
|
|
348
|
|
349 my $constraint = "g.stable_id = ?";
|
|
350 $self->bind_param_generic_fetch($stable_id,SQL_VARCHAR);
|
|
351 return $self->generic_fetch($constraint);
|
|
352 }
|
|
353
|
|
354
|
|
355 =head2 fetch_by_exon_stable_id
|
|
356
|
|
357 Arg [1] : String $id
|
|
358 The stable id of an exon of the gene to retrieve
|
|
359 Example : $gene = $gene_adptr->fetch_by_exon_stable_id('ENSE00000148944');
|
|
360 Description: Retrieves a gene object from the database via an exon stable id.
|
|
361 The gene will be retrieved in its native coordinate system (i.e.
|
|
362 in the coordinate system it is stored in the database). It may
|
|
363 be converted to a different coordinate system through a call to
|
|
364 transform() or transfer(). If the gene or exon is not found
|
|
365 undef is returned instead.
|
|
366 Returntype : Bio::EnsEMBL::Gene or undef
|
|
367 Exceptions : none
|
|
368 Caller : general
|
|
369 Status : Stable
|
|
370
|
|
371 =cut
|
|
372
|
|
373 sub fetch_by_exon_stable_id {
|
|
374 my ($self, $stable_id, $version) = @_;
|
|
375
|
|
376 my $sql = qq(
|
|
377 SELECT t.gene_id
|
|
378 FROM transcript as t,
|
|
379 exon_transcript as et,
|
|
380 exon as e
|
|
381 WHERE t.transcript_id = et.transcript_id
|
|
382 AND et.exon_id = e.exon_id
|
|
383 AND e.stable_id = ?
|
|
384 AND e.is_current = 1
|
|
385 );
|
|
386
|
|
387 my $sth = $self->prepare($sql);
|
|
388 $sth->bind_param(1, $stable_id, SQL_VARCHAR);
|
|
389 $sth->execute();
|
|
390
|
|
391 my ($dbID) = $sth->fetchrow_array();
|
|
392
|
|
393 return undef if(!defined($dbID));
|
|
394
|
|
395 my $gene = $self->fetch_by_dbID($dbID);
|
|
396
|
|
397 return $gene;
|
|
398 }
|
|
399
|
|
400
|
|
401 =head2 fetch_all_by_domain
|
|
402
|
|
403 Arg [1] : String $domain
|
|
404 The domain to fetch genes from
|
|
405 Example : my @genes = @{ $gene_adaptor->fetch_all_by_domain($domain) };
|
|
406 Description: Retrieves a listref of genes whose translation contain interpro
|
|
407 domain $domain. The genes are returned in their native coord
|
|
408 system (i.e. the coord_system they are stored in). If the coord
|
|
409 system needs to be changed, then tranform or transfer should be
|
|
410 called on the individual objects returned.
|
|
411 Returntype : list of Bio::EnsEMBL::Genes
|
|
412 Exceptions : none
|
|
413 Caller : domainview
|
|
414 Status : Stable
|
|
415
|
|
416 =cut
|
|
417
|
|
418 sub fetch_all_by_domain {
|
|
419 my ($self, $domain) = @_;
|
|
420
|
|
421 throw("domain argument is required") unless ($domain);
|
|
422
|
|
423 my $sth = $self->prepare(qq(
|
|
424 SELECT tr.gene_id
|
|
425 FROM interpro i,
|
|
426 protein_feature pf,
|
|
427 transcript tr,
|
|
428 translation tl,
|
|
429 seq_region sr,
|
|
430 coord_system cs
|
|
431 WHERE cs.species_id = ?
|
|
432 AND cs.coord_system_id = sr.coord_system_id
|
|
433 AND sr.seq_region_id = tr.seq_region_id
|
|
434 AND tr.is_current = 1
|
|
435 AND tr.transcript_id = tl.transcript_id
|
|
436 AND tl.translation_id = pf.translation_id
|
|
437 AND pf.hit_name = i.id
|
|
438 AND i.interpro_ac = ?
|
|
439 GROUP BY tr.gene_id));
|
|
440
|
|
441 $sth->bind_param( 1, $self->species_id(), SQL_VARCHAR );
|
|
442 $sth->bind_param( 2, $domain, SQL_VARCHAR );
|
|
443
|
|
444 $sth->execute();
|
|
445
|
|
446 my @array = @{$sth->fetchall_arrayref()};
|
|
447 $sth->finish();
|
|
448
|
|
449 my @gene_ids = map {$_->[0]} @array;
|
|
450
|
|
451 return $self->fetch_all_by_dbID_list(\@gene_ids);
|
|
452 }
|
|
453
|
|
454
|
|
455
|
|
456 =head2 fetch_all_by_Slice_and_external_dbname_link
|
|
457
|
|
458 Arg [1] : Bio::EnsEMBL::Slice $slice
|
|
459 The slice to fetch genes on.
|
|
460 Arg [2] : (optional) string $logic_name
|
|
461 the logic name of the type of features to obtain
|
|
462 Arg [3] : (optional) boolean $load_transcripts
|
|
463 if true, transcripts will be loaded immediately
|
|
464 rather than lazy loaded later.
|
|
465 Arg [4] : Name of the external database
|
|
466 Example : @genes = @{
|
|
467 $ga->fetch_all_by_Slice_and_external_dbname_link(
|
|
468 $slice, undef, undef, "HUGO" ) };
|
|
469 Description: Overrides superclass method to optionally load
|
|
470 transcripts immediately rather than lazy-loading them
|
|
471 later. This is more efficient when there are a lot
|
|
472 of genes whose transcripts are going to be used. The
|
|
473 genes are then filtered to return only those with
|
|
474 external database links of the type specified
|
|
475 Returntype : reference to list of genes
|
|
476 Exceptions : thrown if exon cannot be placed on transcript slice
|
|
477 Caller :
|
|
478 Status : Stable
|
|
479
|
|
480 =cut
|
|
481
|
|
482 sub fetch_all_by_Slice_and_external_dbname_link {
|
|
483 my ( $self, $slice, $logic_name, $load_transcripts, $db_name ) = @_;
|
|
484
|
|
485 # Get the external_db_id(s) from the name.
|
|
486 my $sth = $self->prepare(
|
|
487 "SELECT external_db_id FROM external_db WHERE db_name = ?");
|
|
488
|
|
489 $sth->bind_param( 1, $db_name, SQL_VARCHAR );
|
|
490 $sth->execute();
|
|
491
|
|
492 my $external_db_id;
|
|
493 $sth->bind_columns( \$external_db_id );
|
|
494
|
|
495 my @external_db_ids;
|
|
496 while ( $sth->fetch() ) {
|
|
497 push( @external_db_ids, $external_db_id );
|
|
498 }
|
|
499
|
|
500 if ( scalar(@external_db_ids) == 0 ) {
|
|
501 warn sprintf( "Could not find external database "
|
|
502 . "'%s' in the external_db table\n"
|
|
503 . "Available are:\n",
|
|
504 $db_name );
|
|
505
|
|
506 $sth = $self->prepare("SELECT DISTINCT db_name FROM external_db");
|
|
507
|
|
508 $sth->execute();
|
|
509 $sth->bind_columns( \$external_db_id );
|
|
510
|
|
511 while ( $sth->fetch() ) {
|
|
512 warn "\t$external_db_id\n";
|
|
513 }
|
|
514 return [];
|
|
515 }
|
|
516
|
|
517 # Get the gene_ids for those with links.
|
|
518 my $dbe_adaptor = $self->db()->get_DBEntryAdaptor();
|
|
519
|
|
520 my %linked_genes;
|
|
521 foreach $external_db_id (@external_db_ids) {
|
|
522 my @linked_genes =
|
|
523 $dbe_adaptor->list_gene_ids_by_external_db_id($external_db_id);
|
|
524
|
|
525 foreach my $gene_id (@linked_genes) {
|
|
526 $linked_genes{$gene_id} = 1;
|
|
527 }
|
|
528 }
|
|
529
|
|
530 # Get all the genes on the slice.
|
|
531 my $genes = $self->SUPER::fetch_all_by_Slice_constraint( $slice,
|
|
532 'g.is_current = 1', $logic_name );
|
|
533
|
|
534 # Create a list of those that are in the gene_ids list.
|
|
535 my @genes_passed;
|
|
536 foreach my $gene (@$genes) {
|
|
537 if ( exists( $linked_genes{ $gene->dbID() } ) ) {
|
|
538 push( @genes_passed, $gene );
|
|
539 }
|
|
540 }
|
|
541
|
|
542 # Return the list of those that passed.
|
|
543 return \@genes_passed;
|
|
544 } ## end sub fetch_all_by_Slice_and_external_dbname_link
|
|
545
|
|
546 =head2 fetch_all_by_Slice
|
|
547
|
|
548 Arg [1] : Bio::EnsEMBL::Slice $slice
|
|
549 The slice to fetch genes on.
|
|
550 Arg [2] : (optional) string $logic_name
|
|
551 the logic name of the type of features to obtain
|
|
552 Arg [3] : (optional) boolean $load_transcripts
|
|
553 if true, transcripts will be loaded immediately rather than
|
|
554 lazy loaded later.
|
|
555 Arg [4] : (optional) string $source
|
|
556 the source name of the features to obtain.
|
|
557 Arg [5] : (optional) string biotype
|
|
558 the biotype of the features to obtain.
|
|
559 Example : @genes = @{$gene_adaptor->fetch_all_by_Slice()};
|
|
560 Description: Overrides superclass method to optionally load transcripts
|
|
561 immediately rather than lazy-loading them later. This
|
|
562 is more efficient when there are a lot of genes whose
|
|
563 transcripts are going to be used.
|
|
564 Returntype : reference to list of genes
|
|
565 Exceptions : thrown if exon cannot be placed on transcript slice
|
|
566 Caller : Slice::get_all_Genes
|
|
567 Status : Stable
|
|
568
|
|
569 =cut
|
|
570
|
|
571 sub fetch_all_by_Slice {
|
|
572 my ( $self, $slice, $logic_name, $load_transcripts, $source,
|
|
573 $biotype ) = @_;
|
|
574
|
|
575 my $constraint = 'g.is_current = 1';
|
|
576
|
|
577 if ( defined($source) ) {
|
|
578 $constraint .= " and g.source = '$source'";
|
|
579 }
|
|
580 if ( defined($biotype) ) {
|
|
581 $constraint .= " and g.biotype = '$biotype'";
|
|
582 }
|
|
583
|
|
584 my $genes =
|
|
585 $self->SUPER::fetch_all_by_Slice_constraint( $slice, $constraint,
|
|
586 $logic_name );
|
|
587
|
|
588 # If there are less than two genes, still do lazy-loading.
|
|
589 if ( !$load_transcripts || @$genes < 2 ) {
|
|
590 return $genes;
|
|
591 }
|
|
592
|
|
593 # Preload all of the transcripts now, instead of lazy loading later,
|
|
594 # faster than one query per transcript.
|
|
595
|
|
596 # First check if transcripts are already preloaded.
|
|
597 # FIXME: Should check all transcripts.
|
|
598 if ( exists( $genes->[0]->{'_transcript_array'} ) ) {
|
|
599 return $genes;
|
|
600 }
|
|
601
|
|
602 # Get extent of region spanned by transcripts.
|
|
603 my ( $min_start, $max_end );
|
|
604 foreach my $g (@$genes) {
|
|
605 if ( !defined($min_start) || $g->seq_region_start() < $min_start ) {
|
|
606 $min_start = $g->seq_region_start();
|
|
607 }
|
|
608 if ( !defined($max_end) || $g->seq_region_end() > $max_end ) {
|
|
609 $max_end = $g->seq_region_end();
|
|
610 }
|
|
611 }
|
|
612
|
|
613 my $ext_slice;
|
|
614
|
|
615 if ( $min_start >= $slice->start() && $max_end <= $slice->end() ) {
|
|
616 $ext_slice = $slice;
|
|
617 } else {
|
|
618 my $sa = $self->db()->get_SliceAdaptor();
|
|
619 $ext_slice = $sa->fetch_by_region(
|
|
620 $slice->coord_system->name(), $slice->seq_region_name(),
|
|
621 $min_start, $max_end,
|
|
622 $slice->strand(), $slice->coord_system->version() );
|
|
623 }
|
|
624
|
|
625 # Associate transcript identifiers with genes.
|
|
626
|
|
627 my %g_hash = map { $_->dbID => $_ } @{$genes};
|
|
628
|
|
629 my $g_id_str = join( ',', keys(%g_hash) );
|
|
630
|
|
631 my $sth =
|
|
632 $self->prepare( "SELECT gene_id, transcript_id "
|
|
633 . "FROM transcript "
|
|
634 . "WHERE gene_id IN ($g_id_str)" );
|
|
635
|
|
636 $sth->execute();
|
|
637
|
|
638 my ( $g_id, $tr_id );
|
|
639 $sth->bind_columns( \( $g_id, $tr_id ) );
|
|
640
|
|
641 my %tr_g_hash;
|
|
642
|
|
643 while ( $sth->fetch() ) {
|
|
644 $tr_g_hash{$tr_id} = $g_hash{$g_id};
|
|
645 }
|
|
646
|
|
647 my $ta = $self->db()->get_TranscriptAdaptor();
|
|
648 my $transcripts = $ta->fetch_all_by_Slice(
|
|
649 $ext_slice,
|
|
650 1, undef,
|
|
651 sprintf( "t.transcript_id IN (%s)",
|
|
652 join( ',', sort { $a <=> $b } keys(%tr_g_hash) ) ) );
|
|
653
|
|
654 # Move transcripts onto gene slice, and add them to genes.
|
|
655 foreach my $tr ( @{$transcripts} ) {
|
|
656 if ( !exists( $tr_g_hash{ $tr->dbID() } ) ) { next }
|
|
657
|
|
658 my $new_tr;
|
|
659 if ( $slice != $ext_slice ) {
|
|
660 $new_tr = $tr->transfer($slice);
|
|
661 if ( !defined($new_tr) ) {
|
|
662 throw("Unexpected. "
|
|
663 . "Transcript could not be transfered onto Gene slice." );
|
|
664 }
|
|
665 } else {
|
|
666 $new_tr = $tr;
|
|
667 }
|
|
668
|
|
669 $tr_g_hash{ $tr->dbID() }->add_Transcript($new_tr);
|
|
670 }
|
|
671
|
|
672 return $genes;
|
|
673 } ## end sub fetch_all_by_Slice
|
|
674
|
|
675 =head2 fetch_by_transcript_id
|
|
676
|
|
677 Arg [1] : Int $trans_id
|
|
678 Unique database identifier for the transcript whose gene should
|
|
679 be retrieved. The gene is returned in its native coord
|
|
680 system (i.e. the coord_system it is stored in). If the coord
|
|
681 system needs to be changed, then tranform or transfer should
|
|
682 be called on the returned object. undef is returned if the
|
|
683 gene or transcript is not found in the database.
|
|
684 Example : $gene = $gene_adaptor->fetch_by_transcript_id(1241);
|
|
685 Description: Retrieves a gene from the database via the database identifier
|
|
686 of one of its transcripts.
|
|
687 Returntype : Bio::EnsEMBL::Gene
|
|
688 Exceptions : none
|
|
689 Caller : general
|
|
690 Status : Stable
|
|
691
|
|
692 =cut
|
|
693
|
|
694 sub fetch_by_transcript_id {
|
|
695 my ($self, $trans_id) = @_;
|
|
696
|
|
697 # this is a cheap SQL call
|
|
698 my $sth = $self->prepare(qq(
|
|
699 SELECT tr.gene_id
|
|
700 FROM transcript tr
|
|
701 WHERE tr.transcript_id = ?
|
|
702 ));
|
|
703
|
|
704 $sth->bind_param(1, $trans_id, SQL_INTEGER);
|
|
705 $sth->execute();
|
|
706
|
|
707 my ($geneid) = $sth->fetchrow_array();
|
|
708
|
|
709 $sth->finish();
|
|
710
|
|
711 return undef if( !defined $geneid );
|
|
712
|
|
713 my $gene = $self->fetch_by_dbID($geneid);
|
|
714 return $gene;
|
|
715 }
|
|
716
|
|
717
|
|
718 =head2 fetch_by_transcript_stable_id
|
|
719
|
|
720 Arg [1] : string $trans_stable_id
|
|
721 transcript stable ID whose gene should be retrieved
|
|
722 Example : my $gene = $gene_adaptor->fetch_by_transcript_stable_id
|
|
723 ('ENST0000234');
|
|
724 Description: Retrieves a gene from the database via the stable ID of one of
|
|
725 its transcripts
|
|
726 Returntype : Bio::EnsEMBL::Gene
|
|
727 Exceptions : none
|
|
728 Caller : general
|
|
729 Status : Stable
|
|
730
|
|
731 =cut
|
|
732
|
|
733 sub fetch_by_transcript_stable_id {
|
|
734 my ($self, $trans_stable_id) = @_;
|
|
735
|
|
736 my $sth = $self->prepare(qq(
|
|
737 SELECT gene_id
|
|
738 FROM transcript
|
|
739 WHERE stable_id = ?
|
|
740 AND is_current = 1
|
|
741 ));
|
|
742
|
|
743 $sth->bind_param(1, $trans_stable_id, SQL_VARCHAR);
|
|
744 $sth->execute();
|
|
745
|
|
746 my ($geneid) = $sth->fetchrow_array();
|
|
747 $sth->finish;
|
|
748
|
|
749 return undef if (!defined $geneid);
|
|
750
|
|
751 my $gene = $self->fetch_by_dbID($geneid);
|
|
752 return $gene;
|
|
753 }
|
|
754
|
|
755
|
|
756 =head2 fetch_by_translation_stable_id
|
|
757
|
|
758 Arg [1] : String $translation_stable_id
|
|
759 The stable id of a translation of the gene to be obtained
|
|
760 Example : my $gene = $gene_adaptor->fetch_by_translation_stable_id
|
|
761 ('ENSP00000278194');
|
|
762 Description: Retrieves a gene via the stable id of one of its translations.
|
|
763 Returntype : Bio::EnsEMBL::Gene
|
|
764 Exceptions : none
|
|
765 Caller : general
|
|
766 Status : Stable
|
|
767
|
|
768 =cut
|
|
769
|
|
770 sub fetch_by_translation_stable_id {
|
|
771 my ($self, $translation_stable_id) = @_;
|
|
772
|
|
773 my $sth = $self->prepare(qq(
|
|
774 SELECT tr.gene_id
|
|
775 FROM transcript tr,
|
|
776 translation tl
|
|
777 WHERE tl.stable_id = ?
|
|
778 AND tr.transcript_id = tl.transcript_id
|
|
779 AND tr.is_current = 1
|
|
780 ));
|
|
781
|
|
782 $sth->bind_param(1, $translation_stable_id, SQL_VARCHAR);
|
|
783 $sth->execute();
|
|
784
|
|
785 my ($geneid) = $sth->fetchrow_array();
|
|
786 $sth->finish;
|
|
787 if( !defined $geneid ) {
|
|
788 return undef;
|
|
789 }
|
|
790 return $self->fetch_by_dbID($geneid);
|
|
791 }
|
|
792
|
|
793
|
|
794
|
|
795
|
|
796 =head2 fetch_all_by_external_name
|
|
797
|
|
798 Arg [1] : String $external_name
|
|
799 The external identifier for the gene to be obtained
|
|
800 Arg [2] : (optional) String $external_db_name
|
|
801 The name of the external database from which the
|
|
802 identifier originates.
|
|
803 Arg [3] : Boolean override. Force SQL regex matching for users
|
|
804 who really do want to find all 'NM%'
|
|
805 Example : @genes = @{$gene_adaptor->fetch_all_by_external_name('BRCA2')}
|
|
806 @many_genes = @{$gene_adaptor->fetch_all_by_external_name('BRCA%')}
|
|
807 Description: Retrieves a list of genes with an external database
|
|
808 identifier $external_name. The genes returned are in
|
|
809 their native coordinate system, i.e. in the coordinate
|
|
810 system they are stored in the database in. If another
|
|
811 coordinate system is required then the Gene::transfer or
|
|
812 Gene::transform method can be used.
|
|
813 SQL wildcards % and _ are supported in the $external_name,
|
|
814 but their use is somewhat restricted for performance reasons.
|
|
815 Users that really do want % and _ in the first three characters
|
|
816 should use argument 3 to prevent optimisations
|
|
817 Returntype : listref of Bio::EnsEMBL::Gene
|
|
818 Exceptions : none
|
|
819 Caller : goview, general
|
|
820 Status : Stable
|
|
821
|
|
822 =cut
|
|
823
|
|
824 sub fetch_all_by_external_name {
|
|
825 my ( $self, $external_name, $external_db_name, $override ) = @_;
|
|
826
|
|
827 my $entryAdaptor = $self->db->get_DBEntryAdaptor();
|
|
828
|
|
829 my @ids =
|
|
830 $entryAdaptor->list_gene_ids_by_extids( $external_name,
|
|
831 $external_db_name, $override );
|
|
832
|
|
833 my %genes_by_dbIDs =
|
|
834 map { $_->dbID(), $_ } @{ $self->fetch_all_by_dbID_list( \@ids ) };
|
|
835
|
|
836 my @result = map { $genes_by_dbIDs{$_} } @ids;
|
|
837
|
|
838 return \@result;
|
|
839 }
|
|
840
|
|
841 =head2 fetch_all_by_GOTerm
|
|
842
|
|
843 Arg [1] : Bio::EnsEMBL::OntologyTerm
|
|
844 The GO term for which genes should be fetched.
|
|
845
|
|
846 Example: @genes = @{
|
|
847 $gene_adaptor->fetch_all_by_GOTerm(
|
|
848 $go_adaptor->fetch_by_accession('GO:0030326') ) };
|
|
849
|
|
850 Description : Retrieves a list of genes that are associated with
|
|
851 the given GO term, or with any of its descendent
|
|
852 GO terms. The genes returned are in their native
|
|
853 coordinate system, i.e. in the coordinate system
|
|
854 in which they are stored in the database. If
|
|
855 another coordinate system is required then the
|
|
856 Gene::transfer or Gene::transform method can be
|
|
857 used.
|
|
858
|
|
859 Return type : listref of Bio::EnsEMBL::Gene
|
|
860 Exceptions : Throws of argument is not a GO term
|
|
861 Caller : general
|
|
862 Status : Stable
|
|
863
|
|
864 =cut
|
|
865
|
|
866 sub fetch_all_by_GOTerm {
|
|
867 my ( $self, $term ) = @_;
|
|
868
|
|
869 assert_ref( $term, 'Bio::EnsEMBL::OntologyTerm' );
|
|
870 if ( $term->ontology() ne 'GO' ) {
|
|
871 throw('Argument is not a GO term');
|
|
872 }
|
|
873
|
|
874 my $entryAdaptor = $self->db->get_DBEntryAdaptor();
|
|
875
|
|
876 my %unique_dbIDs;
|
|
877 foreach my $accession ( map { $_->accession() }
|
|
878 ( $term, @{ $term->descendants() } ) )
|
|
879 {
|
|
880 my @ids =
|
|
881 $entryAdaptor->list_gene_ids_by_extids( $accession, 'GO' );
|
|
882 foreach my $dbID (@ids) { $unique_dbIDs{$dbID} = 1 }
|
|
883 }
|
|
884
|
|
885 my @result = @{
|
|
886 $self->fetch_all_by_dbID_list(
|
|
887 [ sort { $a <=> $b } keys(%unique_dbIDs) ]
|
|
888 ) };
|
|
889
|
|
890 return \@result;
|
|
891 } ## end sub fetch_all_by_GOTerm
|
|
892
|
|
893 =head2 fetch_all_by_GOTerm_accession
|
|
894
|
|
895 Arg [1] : String
|
|
896 The GO term accession for which genes should be
|
|
897 fetched.
|
|
898
|
|
899 Example :
|
|
900
|
|
901 @genes =
|
|
902 @{ $gene_adaptor->fetch_all_by_GOTerm_accession(
|
|
903 'GO:0030326') };
|
|
904
|
|
905 Description : Retrieves a list of genes that are associated with
|
|
906 the given GO term, or with any of its descendent
|
|
907 GO terms. The genes returned are in their native
|
|
908 coordinate system, i.e. in the coordinate system
|
|
909 in which they are stored in the database. If
|
|
910 another coordinate system is required then the
|
|
911 Gene::transfer or Gene::transform method can be
|
|
912 used.
|
|
913
|
|
914 Return type : listref of Bio::EnsEMBL::Gene
|
|
915 Exceptions : Throws of argument is not a GO term accession
|
|
916 Caller : general
|
|
917 Status : Stable
|
|
918
|
|
919 =cut
|
|
920
|
|
921 sub fetch_all_by_GOTerm_accession {
|
|
922 my ( $self, $accession ) = @_;
|
|
923
|
|
924 if ( $accession !~ /^GO:/ ) {
|
|
925 throw('Argument is not a GO term accession');
|
|
926 }
|
|
927
|
|
928 my $goAdaptor =
|
|
929 Bio::EnsEMBL::Registry->get_adaptor( 'Multi', 'Ontology',
|
|
930 'OntologyTerm' );
|
|
931
|
|
932 my $term = $goAdaptor->fetch_by_accession($accession);
|
|
933
|
|
934 return $self->fetch_all_by_GOTerm($term);
|
|
935 }
|
|
936
|
|
937 =head2 fetch_all_alt_alleles
|
|
938
|
|
939 Arg [1] : Bio::EnsEMBL::Gene $gene
|
|
940 The gene to fetch alternative alleles for
|
|
941 Example : my @alt_genes = @{ $gene_adaptor->fetch_all_alt_alleles($gene) };
|
|
942 foreach my $alt_gene (@alt_genes) {
|
|
943 print "Alternate allele: " . $alt_gene->stable_id() . "\n" ;
|
|
944 }
|
|
945 Description: Retrieves genes which are alternate alleles to a provided gene.
|
|
946 Alternate alleles in Ensembl are genes which are similar and are
|
|
947 on an alternative haplotype of the same region. There are not
|
|
948 currently very many of these. This method will return a
|
|
949 reference to an empty list if no alternative alleles are found.
|
|
950 Returntype : listref of Bio::EnsEMBL::Genes
|
|
951 Exceptions : throw if incorrect arg provided
|
|
952 warning if gene arg does not have dbID
|
|
953 Caller : Gene::get_all_alt_alleles
|
|
954 Status : Stable
|
|
955
|
|
956 =cut
|
|
957
|
|
958 sub fetch_all_alt_alleles {
|
|
959 my $self = shift;
|
|
960 my $gene = shift;
|
|
961
|
|
962 if(!ref($gene) || !$gene->isa('Bio::EnsEMBL::Gene')) {
|
|
963 throw('Bio::EnsEMBL::Gene argument is required');
|
|
964 }
|
|
965
|
|
966 my $gene_id = $gene->dbID();
|
|
967
|
|
968 if(!$gene_id) {
|
|
969 warning('Cannot retrieve alternate alleles for gene without dbID');
|
|
970 return [];
|
|
971 }
|
|
972
|
|
973 my $sth = $self->prepare("SELECT aa1.gene_id " .
|
|
974 "FROM alt_allele aa1, alt_allele aa2 " .
|
|
975 "WHERE aa1.alt_allele_id = aa2.alt_allele_id " .
|
|
976 "AND aa2.gene_id = ? " .
|
|
977 "AND aa1.gene_id <> ?");
|
|
978
|
|
979 $sth->bind_param(1, $gene_id, SQL_INTEGER);
|
|
980 $sth->bind_param(2, $gene_id, SQL_INTEGER);
|
|
981 $sth->execute();
|
|
982
|
|
983 my @alt_ids;
|
|
984 my $row;
|
|
985 while($row = $sth->fetchrow_arrayref()) {
|
|
986 push @alt_ids, $row->[0];
|
|
987 }
|
|
988 $sth->finish();
|
|
989
|
|
990 if (@alt_ids) {
|
|
991 return $self->fetch_all_by_dbID_list(\@alt_ids);
|
|
992 }
|
|
993
|
|
994 return [];
|
|
995 }
|
|
996
|
|
997 sub is_ref{
|
|
998 my ( $self, $gene_id) = @_;
|
|
999 my $is_not_ref;
|
|
1000
|
|
1001 # easier to find if it is not an alt_Allele do this and then negate it.
|
|
1002 my $sth = $self->prepare("select count(1) from alt_allele where gene_id = $gene_id and is_ref = 0");
|
|
1003 $sth->execute();
|
|
1004 $sth->bind_columns(\$is_not_ref);
|
|
1005 $sth->fetch;
|
|
1006
|
|
1007 if(defined($is_not_ref) and $is_not_ref){
|
|
1008 return 0;
|
|
1009 }
|
|
1010
|
|
1011 return 1;
|
|
1012 }
|
|
1013
|
|
1014
|
|
1015 =head2 store_alt_alleles
|
|
1016
|
|
1017
|
|
1018 Arg [1] : reference to list of Bio::EnsEMBL::Genes $genes
|
|
1019 Example : $gene_adaptor->store_alt_alleles([$gene1, $gene2, $gene3]);
|
|
1020 Description: This method creates a group of alternative alleles (i.e. locus)
|
|
1021 from a set of genes. The genes should be genes from alternate
|
|
1022 haplotypes which are similar. The genes must already be stored
|
|
1023 in this database.
|
|
1024 Returntype : int alt_allele_id or undef if no alt_alleles were stored
|
|
1025 Exceptions : throw on incorrect arguments
|
|
1026 throw on sql error (e.g. duplicate unique id)
|
|
1027 Caller : general
|
|
1028 Status : Stable
|
|
1029
|
|
1030 =cut
|
|
1031
|
|
1032 sub store_alt_alleles {
|
|
1033 my $self = shift;
|
|
1034 my $genes = shift;
|
|
1035
|
|
1036 if(!ref($genes) eq 'ARRAY') {
|
|
1037 throw('List reference of Bio::EnsEMBL::Gene argument expected.');
|
|
1038 }
|
|
1039
|
|
1040 my @genes = @$genes;
|
|
1041 my $num_genes = scalar(@genes);
|
|
1042
|
|
1043 if($num_genes < 2) {
|
|
1044 warning('At least 2 genes must be provided to construct alternative alleles (gene id: '. $genes[0]->dbID() .'). Ignoring.');
|
|
1045 return;
|
|
1046 }
|
|
1047
|
|
1048 my @is_ref;
|
|
1049 my @ref_genes = ();
|
|
1050 my @non_ref_genes = ();
|
|
1051 my @gene_ids = ();
|
|
1052
|
|
1053 foreach my $gene (@genes) {
|
|
1054
|
|
1055 if(!ref($gene) || !$gene->isa('Bio::EnsEMBL::Gene')) {
|
|
1056 throw('List reference of Bio::EnsEMBL::Gene argument expected.');
|
|
1057 }
|
|
1058
|
|
1059 my $gene_id = $gene->dbID();
|
|
1060
|
|
1061 if (!$gene_id) {
|
|
1062 throw('Genes must have dbIDs in order to construct alternative alleles.');
|
|
1063 } else {
|
|
1064 push @gene_ids, $gene_id;
|
|
1065 }
|
|
1066
|
|
1067 my $is_ref = $gene->slice->is_reference();
|
|
1068
|
|
1069 push @is_ref, $is_ref;
|
|
1070
|
|
1071 if ($is_ref) {
|
|
1072 push @ref_genes, $gene->dbID();
|
|
1073 } else {
|
|
1074 push @non_ref_genes, $gene->dbID();
|
|
1075 }
|
|
1076 }
|
|
1077 if (scalar(@ref_genes) > 1) {
|
|
1078 warning('More than one alternative allele on the reference sequence (gene ids: ' . join(',',@ref_genes) . '). Ignoring.');
|
|
1079 return;
|
|
1080 }
|
|
1081
|
|
1082 #
|
|
1083 #insert the first gene seperately in order to get a unique identifier for
|
|
1084 #the set of alleles
|
|
1085 #
|
|
1086
|
|
1087 my $sth = $self->prepare("INSERT INTO alt_allele (gene_id, is_ref) VALUES (?,?)");
|
|
1088 $sth->bind_param(1, $gene_ids[0], SQL_INTEGER);
|
|
1089 $sth->bind_param(2, $is_ref[0], SQL_INTEGER);
|
|
1090 eval {
|
|
1091 $sth->execute();
|
|
1092 };
|
|
1093 my $alt_allele_id = $sth->{'mysql_insertid'};
|
|
1094
|
|
1095 if (!$alt_allele_id || $@) {
|
|
1096 throw("An SQL error occured inserting alternative alleles:\n$@");
|
|
1097 }
|
|
1098 $sth->finish();
|
|
1099 #
|
|
1100 # Insert all subsequent alt alleles using the alt_allele identifier
|
|
1101 # from the first insert
|
|
1102 #
|
|
1103
|
|
1104 $sth = $self->prepare("INSERT INTO alt_allele (alt_allele_id, gene_id, is_ref) " .
|
|
1105 "VALUES (?,?,?)");
|
|
1106
|
|
1107 for (my $i = 1; $i < $num_genes; $i++) {
|
|
1108
|
|
1109 $sth->bind_param(1, $alt_allele_id, SQL_INTEGER);
|
|
1110 $sth->bind_param(2, $gene_ids[$i], SQL_INTEGER);
|
|
1111 $sth->bind_param(3, $is_ref[$i], SQL_INTEGER);
|
|
1112 eval {
|
|
1113 $sth->execute();
|
|
1114 };
|
|
1115
|
|
1116 if ($@) {
|
|
1117 # an error occured, revert the db to the previous state
|
|
1118 $sth = $self->prepare("DELETE FROM alt_allele WHERE alt_allele_id = ?");
|
|
1119 $sth->bind_param(1, $alt_allele_id, SQL_INTEGER);
|
|
1120 $sth->execute();
|
|
1121 $sth->finish();
|
|
1122 throw("An SQL error occured inserting alternative alleles:\n$@");
|
|
1123 }
|
|
1124 }
|
|
1125
|
|
1126 $sth->finish();
|
|
1127
|
|
1128 return $alt_allele_id;
|
|
1129 }
|
|
1130
|
|
1131
|
|
1132 =head2 store
|
|
1133
|
|
1134 Arg [1] : Bio::EnsEMBL::Gene $gene
|
|
1135 The gene to store in the database
|
|
1136 Arg [2] : ignore_release in xrefs [default 1] set to 0 to use release info
|
|
1137 in external database references
|
|
1138 Example : $gene_adaptor->store($gene);
|
|
1139 Description: Stores a gene in the database.
|
|
1140 Returntype : the database identifier (dbID) of the newly stored gene
|
|
1141 Exceptions : thrown if the $gene is not a Bio::EnsEMBL::Gene or if
|
|
1142 $gene does not have an analysis object
|
|
1143 Caller : general
|
|
1144 Status : Stable
|
|
1145
|
|
1146 =cut
|
|
1147
|
|
1148 sub store {
|
|
1149 my ($self, $gene, $ignore_release) = @_;
|
|
1150
|
|
1151 if (!ref $gene || !$gene->isa('Bio::EnsEMBL::Gene') ) {
|
|
1152 throw("Must store a gene object, not a $gene");
|
|
1153 }
|
|
1154 if(!defined($ignore_release)){
|
|
1155 $ignore_release = 1;
|
|
1156 }
|
|
1157 my $db = $self->db();
|
|
1158
|
|
1159 if ($gene->is_stored($db)) {
|
|
1160 return $gene->dbID();
|
|
1161 }
|
|
1162
|
|
1163 # ensure coords are correct before storing
|
|
1164 $gene->recalculate_coordinates();
|
|
1165
|
|
1166 my $analysis = $gene->analysis();
|
|
1167 throw("Genes must have an analysis object.") if(!defined($analysis));
|
|
1168
|
|
1169 my $analysis_id;
|
|
1170 if ($analysis->is_stored($db)) {
|
|
1171 $analysis_id = $analysis->dbID();
|
|
1172 } else {
|
|
1173 $analysis_id = $db->get_AnalysisAdaptor->store($analysis);
|
|
1174 }
|
|
1175
|
|
1176 my $type = $gene->biotype || "";
|
|
1177
|
|
1178 # default to is_current = 1 if this attribute is not set
|
|
1179 my $is_current = $gene->is_current;
|
|
1180 $is_current = 1 unless (defined($is_current));
|
|
1181
|
|
1182 my $original = $gene;
|
|
1183 my $original_transcripts = $gene->get_all_Transcripts();
|
|
1184
|
|
1185 my $seq_region_id;
|
|
1186
|
|
1187 ( $gene, $seq_region_id ) = $self->_pre_store($gene);
|
|
1188
|
|
1189 my $store_gene_sql = qq(
|
|
1190 INSERT INTO gene
|
|
1191 SET biotype = ?,
|
|
1192 analysis_id = ?,
|
|
1193 seq_region_id = ?,
|
|
1194 seq_region_start = ?,
|
|
1195 seq_region_end = ?,
|
|
1196 seq_region_strand = ?,
|
|
1197 description = ?,
|
|
1198 source = ?,
|
|
1199 status = ?,
|
|
1200 is_current = ?,
|
|
1201 canonical_transcript_id = ?,
|
|
1202 canonical_annotation = ?
|
|
1203 );
|
|
1204
|
|
1205 if (defined($gene->stable_id)) {
|
|
1206 my $created = $self->db->dbc->from_seconds_to_date($gene->created_date());
|
|
1207 my $modified = $self->db->dbc->from_seconds_to_date($gene->modified_date());
|
|
1208 $store_gene_sql .= ", stable_id = ?, version = ?, created_date = " . $created . " , modified_date = " . $modified;
|
|
1209
|
|
1210 }
|
|
1211
|
|
1212 # column status is used from schema version 34 onwards (before it was
|
|
1213 # confidence)
|
|
1214
|
|
1215 my $sth = $self->prepare($store_gene_sql);
|
|
1216 $sth->bind_param( 1, $type, SQL_VARCHAR );
|
|
1217 $sth->bind_param( 2, $analysis_id, SQL_INTEGER );
|
|
1218 $sth->bind_param( 3, $seq_region_id, SQL_INTEGER );
|
|
1219 $sth->bind_param( 4, $gene->start(), SQL_INTEGER );
|
|
1220 $sth->bind_param( 5, $gene->end(), SQL_INTEGER );
|
|
1221 $sth->bind_param( 6, $gene->strand(), SQL_TINYINT );
|
|
1222 $sth->bind_param( 7, $gene->description(), SQL_LONGVARCHAR );
|
|
1223 $sth->bind_param( 8, $gene->source(), SQL_VARCHAR );
|
|
1224 $sth->bind_param( 9, $gene->status(), SQL_VARCHAR );
|
|
1225 $sth->bind_param( 10, $is_current, SQL_TINYINT );
|
|
1226
|
|
1227 # Canonical transcript ID will be updated later.
|
|
1228 # Set it to zero for now.
|
|
1229 $sth->bind_param( 11, 0, SQL_TINYINT );
|
|
1230
|
|
1231 $sth->bind_param( 12, $gene->canonical_annotation(), SQL_VARCHAR );
|
|
1232
|
|
1233 if ( defined($gene->stable_id) ) {
|
|
1234
|
|
1235 $sth->bind_param( 13, $gene->stable_id, SQL_VARCHAR );
|
|
1236 my $version = ($gene->version()) ? $gene->version() : 1;
|
|
1237 $sth->bind_param( 14, $version, SQL_INTEGER );
|
|
1238 }
|
|
1239
|
|
1240 $sth->execute();
|
|
1241 $sth->finish();
|
|
1242
|
|
1243 my $gene_dbID = $sth->{'mysql_insertid'};
|
|
1244
|
|
1245 # store the dbentries associated with this gene
|
|
1246 my $dbEntryAdaptor = $db->get_DBEntryAdaptor();
|
|
1247
|
|
1248 foreach my $dbe ( @{ $gene->get_all_DBEntries } ) {
|
|
1249 $dbEntryAdaptor->store( $dbe, $gene_dbID, "Gene", $ignore_release );
|
|
1250 }
|
|
1251
|
|
1252 # We allow transcripts not to share equal exons and instead have
|
|
1253 # copies. For the database we still want sharing though, to have
|
|
1254 # easier time with stable ids. So we need to have a step to merge
|
|
1255 # exons together before store.
|
|
1256 my %exons;
|
|
1257
|
|
1258 foreach my $trans ( @{$gene->get_all_Transcripts} ) {
|
|
1259 foreach my $e ( @{$trans->get_all_Exons} ) {
|
|
1260 my $key = $e->hashkey();
|
|
1261 if( exists $exons{ $key } ) {
|
|
1262 $trans->swap_exons( $e, $exons{$key} );
|
|
1263 } else {
|
|
1264 $exons{$key} = $e;
|
|
1265 }
|
|
1266 }
|
|
1267 }
|
|
1268
|
|
1269 my $transcript_adaptor = $db->get_TranscriptAdaptor();
|
|
1270
|
|
1271 my $transcripts = $gene->get_all_Transcripts();
|
|
1272
|
|
1273 my $new_canonical_transcript_id;
|
|
1274 for ( my $i = 0; $i < @$transcripts; $i++ ) {
|
|
1275 my $new = $transcripts->[$i];
|
|
1276 my $old = $original_transcripts->[$i];
|
|
1277
|
|
1278 $transcript_adaptor->store( $new, $gene_dbID, $analysis_id );
|
|
1279
|
|
1280 if ( !defined($new_canonical_transcript_id)
|
|
1281 && $new->is_canonical() )
|
|
1282 {
|
|
1283 $new_canonical_transcript_id = $new->dbID();
|
|
1284 }
|
|
1285
|
|
1286 # update the original transcripts since we may have made copies of
|
|
1287 # them by transforming the gene
|
|
1288 $old->dbID( $new->dbID() );
|
|
1289 $old->adaptor( $new->adaptor() );
|
|
1290
|
|
1291 if ( $new->translation ) {
|
|
1292 $old->translation->dbID( $new->translation()->dbID );
|
|
1293 $old->translation->adaptor( $new->translation()->adaptor );
|
|
1294 }
|
|
1295 }
|
|
1296
|
|
1297 if ( defined($new_canonical_transcript_id) ) {
|
|
1298 # Now the canonical transcript has been stored, so update the
|
|
1299 # canonical_transcript_id of this gene with the new dbID.
|
|
1300 my $sth = $self->prepare(
|
|
1301 q(
|
|
1302 UPDATE gene
|
|
1303 SET canonical_transcript_id = ?
|
|
1304 WHERE gene_id = ?)
|
|
1305 );
|
|
1306
|
|
1307 $sth->bind_param( 1, $new_canonical_transcript_id, SQL_INTEGER );
|
|
1308 $sth->bind_param( 2, $gene_dbID, SQL_INTEGER );
|
|
1309
|
|
1310 $sth->execute();
|
|
1311 $sth->finish();
|
|
1312 }
|
|
1313
|
|
1314 # update gene to point to display xref if it is set
|
|
1315 if(my $display_xref = $gene->display_xref) {
|
|
1316 my $dxref_id;
|
|
1317 if($display_xref->is_stored($db)) {
|
|
1318 $dxref_id = $display_xref->dbID();
|
|
1319 } else {
|
|
1320 $dxref_id = $dbEntryAdaptor->exists($display_xref);
|
|
1321 }
|
|
1322
|
|
1323 if(defined($dxref_id)) {
|
|
1324 my $sth = $self->prepare
|
|
1325 ("UPDATE gene SET display_xref_id = ? WHERE gene_id = ?");
|
|
1326 $sth->bind_param(1, $dxref_id, SQL_INTEGER);
|
|
1327 $sth->bind_param(2, $gene_dbID, SQL_INTEGER);
|
|
1328 $sth->execute();
|
|
1329 $sth->finish();
|
|
1330 $display_xref->dbID($dxref_id);
|
|
1331 $display_xref->adaptor($dbEntryAdaptor);
|
|
1332 $display_xref->dbID($dxref_id);
|
|
1333 $display_xref->adaptor($dbEntryAdaptor);
|
|
1334 } else {
|
|
1335 warning("Display_xref ".$display_xref->dbname().":".
|
|
1336 $display_xref->display_id() . " is not stored in database.\n".
|
|
1337 "Not storing relationship to this gene.");
|
|
1338 $display_xref->dbID(undef);
|
|
1339 $display_xref->adaptor(undef);
|
|
1340 }
|
|
1341 }
|
|
1342
|
|
1343 # store gene attributes if there are any
|
|
1344 my $attr_adaptor = $db->get_AttributeAdaptor();
|
|
1345 $attr_adaptor->store_on_Gene($gene_dbID, $gene->get_all_Attributes);
|
|
1346
|
|
1347 # store unconventional transcript associations if there are any
|
|
1348 my $utaa = $db->get_UnconventionalTranscriptAssociationAdaptor();
|
|
1349 foreach my $uta (@{$gene->get_all_unconventional_transcript_associations()}) {
|
|
1350 $utaa->store($uta);
|
|
1351 }
|
|
1352
|
|
1353 # set the adaptor and dbID on the original passed in gene not the
|
|
1354 # transfered copy
|
|
1355 $original->adaptor($self);
|
|
1356 $original->dbID($gene_dbID);
|
|
1357
|
|
1358 return $gene_dbID;
|
|
1359 }
|
|
1360
|
|
1361
|
|
1362 =head2 remove
|
|
1363
|
|
1364 Arg [1] : Bio::EnsEMBL::Gene $gene
|
|
1365 the gene to remove from the database
|
|
1366 Example : $gene_adaptor->remove($gene);
|
|
1367 Description: Removes a gene completely from the database. All associated
|
|
1368 transcripts, exons, stable_identifiers, descriptions, etc.
|
|
1369 are removed as well. Use with caution!
|
|
1370 Returntype : none
|
|
1371 Exceptions : throw on incorrect arguments
|
|
1372 warning if gene is not stored in this database
|
|
1373 Caller : general
|
|
1374 Status : Stable
|
|
1375
|
|
1376 =cut
|
|
1377
|
|
1378 sub remove {
|
|
1379 my $self = shift;
|
|
1380 my $gene = shift;
|
|
1381
|
|
1382 if (!ref($gene) || !$gene->isa('Bio::EnsEMBL::Gene')) {
|
|
1383 throw("Bio::EnsEMBL::Gene argument expected.");
|
|
1384 }
|
|
1385
|
|
1386 if ( !$gene->is_stored($self->db()) ) {
|
|
1387 warning("Cannot remove gene " . $gene->dbID() . ". Is not stored in " .
|
|
1388 "this database.");
|
|
1389 return;
|
|
1390 }
|
|
1391
|
|
1392 # remove all object xrefs associated with this gene
|
|
1393
|
|
1394 my $dbe_adaptor = $self->db()->get_DBEntryAdaptor();
|
|
1395 foreach my $dbe (@{$gene->get_all_DBEntries()}) {
|
|
1396 $dbe_adaptor->remove_from_object($dbe, $gene, 'Gene');
|
|
1397 }
|
|
1398
|
|
1399 # remove all alternative allele entries associated with this gene
|
|
1400 my $sth = $self->prepare("DELETE FROM alt_allele WHERE gene_id = ?");
|
|
1401 $sth->bind_param( 1, $gene->dbID, SQL_INTEGER );
|
|
1402 $sth->execute();
|
|
1403 $sth->finish();
|
|
1404
|
|
1405 # remove the attributes associated with this transcript
|
|
1406 my $attrib_adaptor = $self->db->get_AttributeAdaptor;
|
|
1407 $attrib_adaptor->remove_from_Gene($gene);
|
|
1408
|
|
1409 # remove all of the transcripts associated with this gene
|
|
1410 my $transcriptAdaptor = $self->db->get_TranscriptAdaptor();
|
|
1411 foreach my $trans ( @{$gene->get_all_Transcripts()} ) {
|
|
1412 $transcriptAdaptor->remove($trans);
|
|
1413 }
|
|
1414
|
|
1415 # remove any unconventional transcript associations involving this gene
|
|
1416
|
|
1417 $sth =
|
|
1418 $self->prepare( "DELETE FROM unconventional_transcript_association "
|
|
1419 . "WHERE gene_id = ? " );
|
|
1420 $sth->bind_param( 1, $gene->dbID, SQL_INTEGER );
|
|
1421 $sth->execute();
|
|
1422 $sth->finish();
|
|
1423
|
|
1424 # remove this gene from the database
|
|
1425
|
|
1426 $sth = $self->prepare("DELETE FROM gene WHERE gene_id = ? ");
|
|
1427 $sth->bind_param( 1, $gene->dbID, SQL_INTEGER );
|
|
1428 $sth->execute();
|
|
1429 $sth->finish();
|
|
1430
|
|
1431 # unset the gene identifier and adaptor thereby flagging it as unstored
|
|
1432
|
|
1433 $gene->dbID(undef);
|
|
1434 $gene->adaptor(undef);
|
|
1435
|
|
1436 return;
|
|
1437 }
|
|
1438
|
|
1439
|
|
1440 =head2 get_Interpro_by_geneid
|
|
1441
|
|
1442 Arg [1] : String $gene_stable_id
|
|
1443 The stable ID of the gene to obtain
|
|
1444 Example : @i = @{
|
|
1445 $gene_adaptor->get_Interpro_by_geneid(
|
|
1446 $gene->stable_id() ) };
|
|
1447 Description: Gets interpro accession numbers by gene stable id. A hack really
|
|
1448 - we should have a much more structured system than this.
|
|
1449 Returntype : listref of strings (Interpro_acc:description)
|
|
1450 Exceptions : none
|
|
1451 Caller : domainview
|
|
1452 Status : Stable
|
|
1453
|
|
1454 =cut
|
|
1455
|
|
1456 sub get_Interpro_by_geneid {
|
|
1457 my ($self, $gene_stable_id) = @_;
|
|
1458
|
|
1459 my $sql = qq(
|
|
1460 SELECT i.interpro_ac,
|
|
1461 x.description
|
|
1462 FROM transcript t,
|
|
1463 translation tl,
|
|
1464 protein_feature pf,
|
|
1465 interpro i,
|
|
1466 xref x,
|
|
1467 gene g
|
|
1468 WHERE g.stable_id = ?
|
|
1469 AND t.gene_id = g.gene_id
|
|
1470 AND t.is_current = 1
|
|
1471 AND tl.transcript_id = t.transcript_id
|
|
1472 AND tl.translation_id = pf.translation_id
|
|
1473 AND i.id = pf.hit_name
|
|
1474 AND i.interpro_ac = x.dbprimary_acc);
|
|
1475
|
|
1476 my $sth = $self->prepare($sql);
|
|
1477
|
|
1478 $sth->bind_param( 1, $gene_stable_id, SQL_VARCHAR );
|
|
1479
|
|
1480 $sth->execute;
|
|
1481
|
|
1482 my @out;
|
|
1483 my %h;
|
|
1484 while( (my $arr = $sth->fetchrow_arrayref()) ) {
|
|
1485 if( $h{$arr->[0]} ) { next; }
|
|
1486 $h{$arr->[0]}=1;
|
|
1487 my $string = $arr->[0] .":".$arr->[1];
|
|
1488 push(@out,$string);
|
|
1489 }
|
|
1490
|
|
1491 return \@out;
|
|
1492 }
|
|
1493
|
|
1494
|
|
1495 =head2 update
|
|
1496
|
|
1497 Arg [1] : Bio::EnsEMBL::Gene $gene
|
|
1498 The gene to update
|
|
1499 Example : $gene_adaptor->update($gene);
|
|
1500 Description: Updates the type, analysis, display_xref, status, is_current and
|
|
1501 description of a gene in the database.
|
|
1502 Returntype : None
|
|
1503 Exceptions : thrown if the $gene is not a Bio::EnsEMBL::Gene
|
|
1504 Caller : general
|
|
1505 Status : Stable
|
|
1506
|
|
1507 =cut
|
|
1508
|
|
1509 sub update {
|
|
1510 my ($self, $gene) = @_;
|
|
1511 my $update = 0;
|
|
1512
|
|
1513 if ( !defined $gene || !ref $gene || !$gene->isa('Bio::EnsEMBL::Gene') ) {
|
|
1514 throw("Must update a gene object, not a $gene");
|
|
1515 }
|
|
1516
|
|
1517 my $update_gene_sql = qq(
|
|
1518 UPDATE gene
|
|
1519 SET biotype = ?,
|
|
1520 analysis_id = ?,
|
|
1521 display_xref_id = ?,
|
|
1522 status = ?,
|
|
1523 description = ?,
|
|
1524 is_current = ?,
|
|
1525 canonical_transcript_id = ?,
|
|
1526 canonical_annotation = ?
|
|
1527 WHERE gene_id = ?
|
|
1528 );
|
|
1529
|
|
1530 my $display_xref = $gene->display_xref();
|
|
1531 my $display_xref_id;
|
|
1532
|
|
1533 if ( $display_xref && $display_xref->dbID() ) {
|
|
1534 $display_xref_id = $display_xref->dbID();
|
|
1535 } else {
|
|
1536 $display_xref_id = undef;
|
|
1537 }
|
|
1538
|
|
1539 my $sth = $self->prepare( $update_gene_sql );
|
|
1540
|
|
1541 $sth->bind_param( 1, $gene->biotype(), SQL_VARCHAR );
|
|
1542 $sth->bind_param( 2, $gene->analysis->dbID(), SQL_INTEGER );
|
|
1543 $sth->bind_param( 3, $display_xref_id, SQL_INTEGER );
|
|
1544 $sth->bind_param( 4, $gene->status(), SQL_VARCHAR );
|
|
1545 $sth->bind_param( 5, $gene->description(), SQL_VARCHAR );
|
|
1546 $sth->bind_param( 6, $gene->is_current(), SQL_TINYINT );
|
|
1547
|
|
1548 if ( defined( $gene->canonical_transcript() ) ) {
|
|
1549 $sth->bind_param( 7, $gene->canonical_transcript()->dbID(),
|
|
1550 SQL_INTEGER );
|
|
1551 } else {
|
|
1552 $sth->bind_param( 7, 0, SQL_INTEGER );
|
|
1553 }
|
|
1554
|
|
1555 $sth->bind_param( 8, $gene->canonical_annotation(), SQL_VARCHAR );
|
|
1556 $sth->bind_param( 9, $gene->dbID(), SQL_INTEGER );
|
|
1557
|
|
1558 $sth->execute();
|
|
1559
|
|
1560 # maybe should update stable id ???
|
|
1561 }
|
|
1562
|
|
1563
|
|
1564 # _objs_from_sth
|
|
1565
|
|
1566 # Arg [1] : StatementHandle $sth
|
|
1567 # Arg [2] : Bio::EnsEMBL::AssemblyMapper $mapper
|
|
1568 # Arg [3] : Bio::EnsEMBL::Slice $dest_slice
|
|
1569 # Description: PROTECTED implementation of abstract superclass method.
|
|
1570 # responsible for the creation of Genes
|
|
1571 # Returntype : listref of Bio::EnsEMBL::Genes in target coordinate system
|
|
1572 # Exceptions : none
|
|
1573 # Caller : internal
|
|
1574 # Status : Stable
|
|
1575
|
|
1576 sub _objs_from_sth {
|
|
1577 my ($self, $sth, $mapper, $dest_slice) = @_;
|
|
1578
|
|
1579 #
|
|
1580 # This code is ugly because an attempt has been made to remove as many
|
|
1581 # function calls as possible for speed purposes. Thus many caches and
|
|
1582 # a fair bit of gymnastics is used.
|
|
1583 #
|
|
1584
|
|
1585 my $sa = $self->db()->get_SliceAdaptor();
|
|
1586 my $aa = $self->db()->get_AnalysisAdaptor();
|
|
1587 my $dbEntryAdaptor = $self->db()->get_DBEntryAdaptor();
|
|
1588
|
|
1589 my @genes;
|
|
1590 my %analysis_hash;
|
|
1591 my %slice_hash;
|
|
1592 my %sr_name_hash;
|
|
1593 my %sr_cs_hash;
|
|
1594
|
|
1595 my (
|
|
1596 $gene_id, $seq_region_id,
|
|
1597 $seq_region_start, $seq_region_end,
|
|
1598 $seq_region_strand, $analysis_id,
|
|
1599 $biotype, $display_xref_id,
|
|
1600 $gene_description, $status,
|
|
1601 $source, $is_current,
|
|
1602 $canonical_transcript_id, $canonical_annotation,
|
|
1603 $stable_id, $version,
|
|
1604 $created_date, $modified_date,
|
|
1605 $xref_display_id, $xref_primary_acc,
|
|
1606 $xref_desc, $xref_version,
|
|
1607 $external_db, $external_status,
|
|
1608 $external_release, $external_db_name,
|
|
1609 $info_type, $info_text
|
|
1610 );
|
|
1611
|
|
1612 $sth->bind_columns(
|
|
1613 \(
|
|
1614 $gene_id, $seq_region_id,
|
|
1615 $seq_region_start, $seq_region_end,
|
|
1616 $seq_region_strand, $analysis_id,
|
|
1617 $biotype, $display_xref_id,
|
|
1618 $gene_description, $status,
|
|
1619 $source, $is_current,
|
|
1620 $canonical_transcript_id, $canonical_annotation,
|
|
1621 $stable_id, $version,
|
|
1622 $created_date, $modified_date,
|
|
1623 $xref_display_id, $xref_primary_acc,
|
|
1624 $xref_desc, $xref_version,
|
|
1625 $external_db, $external_status,
|
|
1626 $external_release, $external_db_name,
|
|
1627 $info_type, $info_text
|
|
1628 ) );
|
|
1629
|
|
1630 my $asm_cs;
|
|
1631 my $cmp_cs;
|
|
1632 my $asm_cs_vers;
|
|
1633 my $asm_cs_name;
|
|
1634 my $cmp_cs_vers;
|
|
1635 my $cmp_cs_name;
|
|
1636
|
|
1637 if($mapper) {
|
|
1638 $asm_cs = $mapper->assembled_CoordSystem();
|
|
1639 $cmp_cs = $mapper->component_CoordSystem();
|
|
1640 $asm_cs_name = $asm_cs->name();
|
|
1641 $asm_cs_vers = $asm_cs->version();
|
|
1642 $cmp_cs_name = $cmp_cs->name();
|
|
1643 $cmp_cs_vers = $cmp_cs->version();
|
|
1644 }
|
|
1645
|
|
1646 my $dest_slice_start;
|
|
1647 my $dest_slice_end;
|
|
1648 my $dest_slice_strand;
|
|
1649 my $dest_slice_length;
|
|
1650 my $dest_slice_sr_name;
|
|
1651 my $dest_slice_sr_id;
|
|
1652
|
|
1653 if($dest_slice) {
|
|
1654 $dest_slice_start = $dest_slice->start();
|
|
1655 $dest_slice_end = $dest_slice->end();
|
|
1656 $dest_slice_strand = $dest_slice->strand();
|
|
1657 $dest_slice_length = $dest_slice->length();
|
|
1658 $dest_slice_sr_name = $dest_slice->seq_region_name();
|
|
1659 $dest_slice_sr_id = $dest_slice->get_seq_region_id();
|
|
1660 }
|
|
1661
|
|
1662 FEATURE: while($sth->fetch()) {
|
|
1663 #get the analysis object
|
|
1664 my $analysis = $analysis_hash{$analysis_id} ||=
|
|
1665 $aa->fetch_by_dbID($analysis_id);
|
|
1666
|
|
1667 #need to get the internal_seq_region, if present
|
|
1668 $seq_region_id = $self->get_seq_region_id_internal($seq_region_id);
|
|
1669 my $slice = $slice_hash{"ID:".$seq_region_id};
|
|
1670
|
|
1671 if(!$slice) {
|
|
1672 $slice = $sa->fetch_by_seq_region_id($seq_region_id);
|
|
1673 $slice_hash{"ID:".$seq_region_id} = $slice;
|
|
1674 $sr_name_hash{$seq_region_id} = $slice->seq_region_name();
|
|
1675 $sr_cs_hash{$seq_region_id} = $slice->coord_system();
|
|
1676 }
|
|
1677
|
|
1678 my $sr_name = $sr_name_hash{$seq_region_id};
|
|
1679 my $sr_cs = $sr_cs_hash{$seq_region_id};
|
|
1680
|
|
1681 #
|
|
1682 # remap the feature coordinates to another coord system
|
|
1683 # if a mapper was provided
|
|
1684 #
|
|
1685 if($mapper) {
|
|
1686
|
|
1687 if (defined $dest_slice && $mapper->isa('Bio::EnsEMBL::ChainedAssemblyMapper') ) {
|
|
1688 ( $seq_region_id, $seq_region_start,
|
|
1689 $seq_region_end, $seq_region_strand )
|
|
1690 =
|
|
1691 $mapper->map( $sr_name, $seq_region_start, $seq_region_end,
|
|
1692 $seq_region_strand, $sr_cs, 1, $dest_slice);
|
|
1693
|
|
1694 } else {
|
|
1695
|
|
1696 ( $seq_region_id, $seq_region_start,
|
|
1697 $seq_region_end, $seq_region_strand )
|
|
1698 =
|
|
1699 $mapper->fastmap( $sr_name, $seq_region_start, $seq_region_end,
|
|
1700 $seq_region_strand, $sr_cs );
|
|
1701 }
|
|
1702
|
|
1703 #skip features that map to gaps or coord system boundaries
|
|
1704 next FEATURE if(!defined($seq_region_id));
|
|
1705
|
|
1706 #get a slice in the coord system we just mapped to
|
|
1707 # if($asm_cs == $sr_cs || ($cmp_cs != $sr_cs && $asm_cs->equals($sr_cs))) {
|
|
1708 $slice = $slice_hash{"ID:".$seq_region_id} ||=
|
|
1709 $sa->fetch_by_seq_region_id($seq_region_id);
|
|
1710 # } else {
|
|
1711 # $slice = $slice_hash{"NAME:$sr_name:$asm_cs_name:$asm_cs_vers"} ||=
|
|
1712 # $sa->fetch_by_region($asm_cs_name, $sr_name, undef, undef, undef,
|
|
1713 # $asm_cs_vers);
|
|
1714 # }
|
|
1715 }
|
|
1716
|
|
1717 #
|
|
1718 # If a destination slice was provided convert the coords.
|
|
1719 #
|
|
1720 if ( defined($dest_slice) ) {
|
|
1721 if ( $dest_slice_strand == 1 ) {
|
|
1722 # Positive strand.
|
|
1723
|
|
1724 $seq_region_start = $seq_region_start - $dest_slice_start + 1;
|
|
1725 $seq_region_end = $seq_region_end - $dest_slice_start + 1;
|
|
1726
|
|
1727 if ( $dest_slice->is_circular() ) {
|
|
1728 # Handle cicular chromosomes.
|
|
1729
|
|
1730 if ( $seq_region_start > $seq_region_end ) {
|
|
1731 # Looking at a feature overlapping the chromsome origin.
|
|
1732
|
|
1733 if ( $seq_region_end > $dest_slice_start ) {
|
|
1734 # Looking at the region in the beginning of the
|
|
1735 # chromosome.
|
|
1736 $seq_region_start -= $dest_slice->seq_region_length();
|
|
1737 }
|
|
1738
|
|
1739 if ( $seq_region_end < 0 ) {
|
|
1740 $seq_region_end += $dest_slice->seq_region_length();
|
|
1741 }
|
|
1742
|
|
1743 } else {
|
|
1744
|
|
1745 if ( $dest_slice_start > $dest_slice_end
|
|
1746 && $seq_region_end < 0 )
|
|
1747 {
|
|
1748 # Looking at the region overlapping the chromosome
|
|
1749 # origin and a feature which is at the beginning of the
|
|
1750 # chromosome.
|
|
1751 $seq_region_start += $dest_slice->seq_region_length();
|
|
1752 $seq_region_end += $dest_slice->seq_region_length();
|
|
1753 }
|
|
1754 }
|
|
1755
|
|
1756 } ## end if ( $dest_slice->is_circular...)
|
|
1757
|
|
1758 } else {
|
|
1759 # Negative strand.
|
|
1760
|
|
1761 if ( $dest_slice->is_circular()
|
|
1762 && $seq_region_start > $seq_region_end )
|
|
1763 {
|
|
1764 # Handle cicular chromosomes.
|
|
1765
|
|
1766 if ( $seq_region_end > $dest_slice_start ) {
|
|
1767 # Looking at the region in the beginning of the
|
|
1768 # chromosome.
|
|
1769 $seq_region_start = $dest_slice_end - $seq_region_end + 1;
|
|
1770 $seq_region_end =
|
|
1771 $seq_region_end -
|
|
1772 $dest_slice->seq_region_length -
|
|
1773 $dest_slice_start + 1;
|
|
1774 } else {
|
|
1775 my $tmp_seq_region_start = $seq_region_start;
|
|
1776 $seq_region_start =
|
|
1777 $dest_slice_end -
|
|
1778 $seq_region_end -
|
|
1779 $dest_slice->seq_region_length + 1;
|
|
1780 $seq_region_end =
|
|
1781 $dest_slice_end - $tmp_seq_region_start + 1;
|
|
1782 }
|
|
1783
|
|
1784 } else {
|
|
1785 # Non-circular chromosome.
|
|
1786
|
|
1787 my $tmp_seq_region_start = $seq_region_start;
|
|
1788 $seq_region_start = $dest_slice_end - $seq_region_end + 1;
|
|
1789 $seq_region_end = $dest_slice_end - $tmp_seq_region_start + 1;
|
|
1790 }
|
|
1791
|
|
1792 $seq_region_strand = -$seq_region_strand;
|
|
1793
|
|
1794 } ## end else [ if ( $dest_slice_strand...)]
|
|
1795
|
|
1796 # Throw away features off the end of the requested slice or on
|
|
1797 # different seq_region.
|
|
1798
|
|
1799 if ( $seq_region_end < 1
|
|
1800 || $seq_region_start > $dest_slice_length
|
|
1801 || ( $dest_slice_sr_id ne $seq_region_id ) )
|
|
1802 {
|
|
1803 next FEATURE;
|
|
1804 }
|
|
1805
|
|
1806 $slice = $dest_slice;
|
|
1807 } ## end if ( defined($dest_slice...))
|
|
1808
|
|
1809 my $display_xref;
|
|
1810
|
|
1811 if ($display_xref_id) {
|
|
1812 $display_xref = Bio::EnsEMBL::DBEntry->new_fast( {
|
|
1813 'dbID' => $display_xref_id,
|
|
1814 'adaptor' => $dbEntryAdaptor,
|
|
1815 'display_id' => $xref_display_id,
|
|
1816 'primary_id' => $xref_primary_acc,
|
|
1817 'version' => $xref_version,
|
|
1818 'description' => $xref_desc,
|
|
1819 'release' => $external_release,
|
|
1820 'dbname' => $external_db,
|
|
1821 'db_display_name' => $external_db_name,
|
|
1822 'info_type' => $info_type,
|
|
1823 'info_text' => $info_text
|
|
1824 } );
|
|
1825 $display_xref->status($external_status);
|
|
1826 }
|
|
1827
|
|
1828 # Finally, create the new Gene.
|
|
1829 push(
|
|
1830 @genes,
|
|
1831 $self->_create_feature_fast(
|
|
1832 'Bio::EnsEMBL::Gene',
|
|
1833 {
|
|
1834 'analysis' => $analysis,
|
|
1835 'biotype' => $biotype,
|
|
1836 'start' => $seq_region_start,
|
|
1837 'end' => $seq_region_end,
|
|
1838 'strand' => $seq_region_strand,
|
|
1839 'adaptor' => $self,
|
|
1840 'slice' => $slice,
|
|
1841 'dbID' => $gene_id,
|
|
1842 'stable_id' => $stable_id,
|
|
1843 'version' => $version,
|
|
1844 'created_date' => $created_date || undef,
|
|
1845 'modified_date' => $modified_date || undef,
|
|
1846 'description' => $gene_description,
|
|
1847 'external_name' => undef, # will use display_id
|
|
1848 # from display_xref
|
|
1849 'external_db' => $external_db,
|
|
1850 'external_status' => $external_status,
|
|
1851 'display_xref' => $display_xref,
|
|
1852 'status' => $status,
|
|
1853 'source' => $source,
|
|
1854 'is_current' => $is_current,
|
|
1855 'canonical_transcript_id' => $canonical_transcript_id,
|
|
1856 'canonical_annotation' => $canonical_annotation
|
|
1857 } ) );
|
|
1858
|
|
1859 }
|
|
1860
|
|
1861 return \@genes;
|
|
1862 }
|
|
1863
|
|
1864
|
|
1865 =head2 cache_gene_seq_mappings
|
|
1866
|
|
1867 Example : $gene_adaptor->cache_gene_seq_mappings();
|
|
1868 Description: caches all the assembly mappings needed for genes
|
|
1869 Returntype : None
|
|
1870 Exceptions : None
|
|
1871 Caller : general
|
|
1872 Status : At Risk
|
|
1873 : New experimental code
|
|
1874
|
|
1875 =cut
|
|
1876
|
|
1877 sub cache_gene_seq_mappings {
|
|
1878 my ($self) = @_;
|
|
1879
|
|
1880 # get the sequence level to map too
|
|
1881
|
|
1882 my $sql =
|
|
1883 'SELECT name '
|
|
1884 . 'FROM coord_system '
|
|
1885 . 'WHERE attrib like "%%sequence_level%%"'
|
|
1886 . 'AND species_id = ?';
|
|
1887
|
|
1888 my $sth = $self->prepare($sql);
|
|
1889 $sth->bind_param( 1, $self->species_id(), SQL_INTEGER );
|
|
1890 $sth->execute();
|
|
1891
|
|
1892 my $sequence_level = $sth->fetchrow_array();
|
|
1893
|
|
1894 $sth->finish();
|
|
1895
|
|
1896 my $csa = $self->db->get_CoordSystemAdaptor();
|
|
1897 my $ama = $self->db->get_AssemblyMapperAdaptor();
|
|
1898
|
|
1899 my $cs1 = $csa->fetch_by_name($sequence_level);
|
|
1900
|
|
1901 # get level to map to two
|
|
1902
|
|
1903 my $mcc = $self->db->get_MetaCoordContainerAdaptor();
|
|
1904 my $csnew = $mcc->fetch_all_CoordSystems_by_feature_type('gene');
|
|
1905
|
|
1906 foreach my $cs2 (@$csnew) {
|
|
1907 my $am = $ama->fetch_by_CoordSystems( $cs1, $cs2 );
|
|
1908 $am->register_all();
|
|
1909 }
|
|
1910
|
|
1911 } ## end sub cache_gene_seq_mappings
|
|
1912
|
|
1913
|
|
1914 =head2 fetch_all_by_exon_supporting_evidence
|
|
1915
|
|
1916 Arg [1] : String $hit_name
|
|
1917 Name of supporting feature
|
|
1918 Arg [2] : String $feature_type
|
|
1919 one of "dna_align_feature" or "protein_align_feature"
|
|
1920 Arg [3] : (optional) Bio::Ensembl::Analysis
|
|
1921 Example : $genes = $gene_adaptor->fetch_all_by_exon_supporting_evidence(
|
|
1922 'XYZ', 'dna_align_feature');
|
|
1923 Description: Gets all the genes with transcripts with exons which have a
|
|
1924 specified hit on a particular type of feature. Optionally filter
|
|
1925 by analysis.
|
|
1926 Returntype : Listref of Bio::EnsEMBL::Gene
|
|
1927 Exceptions : If feature_type is not of correct type.
|
|
1928 Caller : general
|
|
1929 Status : Stable
|
|
1930
|
|
1931 =cut
|
|
1932
|
|
1933 sub fetch_all_by_exon_supporting_evidence {
|
|
1934 my ($self, $hit_name, $feature_type, $analysis) = @_;
|
|
1935
|
|
1936 if ($feature_type !~ /(dna)|(protein)_align_feature/) {
|
|
1937 throw("feature type must be dna_align_feature or protein_align_feature");
|
|
1938 }
|
|
1939
|
|
1940 my $anal_from = ", analysis a " if ($analysis);
|
|
1941 my $anal_where = "AND a.analysis_id = f.analysis_id AND a.analysis_id=? " if ($analysis);
|
|
1942
|
|
1943 my $sql = qq(
|
|
1944 SELECT DISTINCT(g.gene_id)
|
|
1945 FROM gene g,
|
|
1946 transcript t,
|
|
1947 exon_transcript et,
|
|
1948 supporting_feature sf,
|
|
1949 $feature_type f
|
|
1950 $anal_from
|
|
1951 WHERE g.gene_id = t.gene_id
|
|
1952 AND g.is_current = 1
|
|
1953 AND t.transcript_id = et.transcript_id
|
|
1954 AND et.exon_id = sf.exon_id
|
|
1955 AND sf.feature_id = f.${feature_type}_id
|
|
1956 AND sf.feature_type = ?
|
|
1957 AND f.hit_name=?
|
|
1958 $anal_where
|
|
1959 );
|
|
1960
|
|
1961 my $sth = $self->prepare($sql);
|
|
1962
|
|
1963 $sth->bind_param(1, $feature_type, SQL_VARCHAR);
|
|
1964 $sth->bind_param(2, $hit_name, SQL_VARCHAR);
|
|
1965 $sth->bind_param(3, $analysis->dbID(), SQL_INTEGER) if ($analysis);
|
|
1966
|
|
1967 $sth->execute();
|
|
1968
|
|
1969 my @genes;
|
|
1970
|
|
1971 while ( my $id = $sth->fetchrow_array ) {
|
|
1972 my $gene = $self->fetch_by_dbID($id);
|
|
1973 push(@genes, $gene) if $gene;
|
|
1974 }
|
|
1975
|
|
1976 return \@genes;
|
|
1977 }
|
|
1978
|
|
1979
|
|
1980 =head2 fetch_all_by_transcript_supporting_evidence
|
|
1981
|
|
1982 Arg [1] : String $hit_name
|
|
1983 Name of supporting feature
|
|
1984 Arg [2] : String $feature_type
|
|
1985 one of "dna_align_feature" or "protein_align_feature"
|
|
1986 Arg [3] : (optional) Bio::Ensembl::Analysis
|
|
1987 Example : $genes = $gene_adaptor->fetch_all_by_transcript_supporting_evidence('XYZ', 'dna_align_feature');
|
|
1988 Description: Gets all the genes with transcripts with evidence for a
|
|
1989 specified hit on a particular type of feature. Optionally filter
|
|
1990 by analysis.
|
|
1991 Returntype : Listref of Bio::EnsEMBL::Gene.
|
|
1992 Exceptions : If feature_type is not of correct type.
|
|
1993 Caller : general
|
|
1994 Status : Stable
|
|
1995
|
|
1996 =cut
|
|
1997
|
|
1998 sub fetch_all_by_transcript_supporting_evidence {
|
|
1999 my ($self, $hit_name, $feature_type, $analysis) = @_;
|
|
2000
|
|
2001 if($feature_type !~ /(dna)|(protein)_align_feature/) {
|
|
2002 throw("feature type must be dna_align_feature or protein_align_feature");
|
|
2003 }
|
|
2004
|
|
2005 my $anal_from = ", analysis a " if ($analysis);
|
|
2006 my $anal_where = "AND a.analysis_id = f.analysis_id AND a.analysis_id=? " if ($analysis);
|
|
2007
|
|
2008 my $sql = qq(
|
|
2009 SELECT DISTINCT(g.gene_id)
|
|
2010 FROM gene g,
|
|
2011 transcript t,
|
|
2012 transcript_supporting_feature sf,
|
|
2013 $feature_type f
|
|
2014 $anal_from
|
|
2015 WHERE g.gene_id = t.gene_id
|
|
2016 AND g.is_current = 1
|
|
2017 AND t.transcript_id = sf.transcript_id
|
|
2018 AND sf.feature_id = f.${feature_type}_id
|
|
2019 AND sf.feature_type = ?
|
|
2020 AND f.hit_name=?
|
|
2021 $anal_where
|
|
2022 );
|
|
2023
|
|
2024 my $sth = $self->prepare($sql);
|
|
2025
|
|
2026 $sth->bind_param(1, $feature_type, SQL_VARCHAR);
|
|
2027 $sth->bind_param(2, $hit_name, SQL_VARCHAR);
|
|
2028 $sth->bind_param(3, $analysis->dbID(), SQL_INTEGER) if ($analysis);
|
|
2029
|
|
2030 $sth->execute();
|
|
2031
|
|
2032 my @genes;
|
|
2033
|
|
2034 while( my $id = $sth->fetchrow_array ) {
|
|
2035 my $gene = $self->fetch_by_dbID($id);
|
|
2036 push(@genes, $gene) if $gene;
|
|
2037 }
|
|
2038
|
|
2039 return \@genes;
|
|
2040 }
|
|
2041
|
|
2042 =head2 fetch_nearest_Gene_by_Feature
|
|
2043
|
|
2044 Arg [1] : Feature object
|
|
2045 Example : $genes = $gene_adaptor->fetch_nearest_Gene_by_Feature($feat);
|
|
2046 Description: Gets the nearest gene to the feature
|
|
2047 Returntype : Listref of Bio::EnsEMBL::Gene, EMPTY list if no nearest
|
|
2048 Caller : general
|
|
2049 Status : UnStable
|
|
2050
|
|
2051 =cut
|
|
2052
|
|
2053 sub fetch_nearest_Gene_by_Feature{
|
|
2054 my $self = shift;
|
|
2055 my $feat = shift;
|
|
2056
|
|
2057 my $stranded = shift;
|
|
2058 my $stream = shift; # 1 up stream -1 downstream
|
|
2059 my @genes;
|
|
2060
|
|
2061
|
|
2062 my $strand = $feat->strand;
|
|
2063 if(defined($stream) and !$strand){
|
|
2064 warn("stream specified but feature has no strand so +ve strand will be used");
|
|
2065 $strand = 1;
|
|
2066 }
|
|
2067 my $min_dist = 999;
|
|
2068 my $gene_id = 0;
|
|
2069
|
|
2070 my $overlapping = $feat->get_overlapping_Genes();
|
|
2071
|
|
2072 return $overlapping if(defined(@{$overlapping}[0]));
|
|
2073
|
|
2074 my $seq_region_id = $feat->slice->adaptor->get_seq_region_id($feat->slice);
|
|
2075 my $start = ($feat->start + $feat->slice->start) -1;
|
|
2076 my $end = ($feat->end + $feat->slice->start) -1;
|
|
2077
|
|
2078
|
|
2079 my @gene_ids;
|
|
2080 if(!defined($stream) or $stream == 0){
|
|
2081
|
|
2082 my $sql1 = "select g.gene_id, (? - g.seq_region_end) as 'dist' from gene g where ";
|
|
2083 if($stranded){
|
|
2084 $sql1 .= "g.seq_region_strand = ".$strand." and ";
|
|
2085 }
|
|
2086 $sql1 .= "seq_region_id = ? and g.seq_region_end < ? order by dist limit 10";
|
|
2087
|
|
2088 #
|
|
2089 # MAYBE set the result of prepare to be static in case lots of calls.
|
|
2090 #
|
|
2091 my $sql1_sth = $self->prepare($sql1) || die "Could not prepare $sql1";
|
|
2092 $sql1_sth->execute($start, $seq_region_id, $start) || die "Could not execute sql";
|
|
2093 $sql1_sth->bind_columns(\$gene_id, \$min_dist) || die "Could mot bin columns";
|
|
2094
|
|
2095 my $last_dist = 99999999999999999;
|
|
2096 while($sql1_sth->fetch()){
|
|
2097 if($min_dist <= $last_dist){
|
|
2098 push @gene_ids, $gene_id;
|
|
2099 $last_dist = $min_dist;
|
|
2100 }
|
|
2101 }
|
|
2102 $sql1_sth->finish();
|
|
2103
|
|
2104
|
|
2105
|
|
2106 my $sql2 = "select g.gene_id, (g.seq_region_start - ?) as 'dist' from gene g where ";
|
|
2107 if($stranded){
|
|
2108 $sql2 .= "g.seq_region_strand = ".$feat->strand." and ";
|
|
2109 }
|
|
2110 $sql2 .= "seq_region_id = ? and g.seq_region_start > ? order by dist limit 10";
|
|
2111
|
|
2112 my $sql2_sth = $self->prepare($sql2) || die "could not prepare $sql2";
|
|
2113
|
|
2114 my ($tmp_min_dist, $tmp_gene_id);
|
|
2115 $sql2_sth->execute($end, $seq_region_id, $end) || die "Could not execute sql";
|
|
2116 $sql2_sth->bind_columns(\$tmp_gene_id, \$tmp_min_dist) || die "Could mot bin columns";
|
|
2117 my $first =1;
|
|
2118 while($sql2_sth->fetch()){
|
|
2119 if( $tmp_min_dist <= $last_dist){
|
|
2120 if($first){
|
|
2121 $first = 0;
|
|
2122 if($tmp_min_dist < $last_dist){
|
|
2123 @gene_ids = (); #reset
|
|
2124 }
|
|
2125 }
|
|
2126 push @gene_ids, $tmp_gene_id;
|
|
2127 $last_dist = $tmp_min_dist;
|
|
2128 }
|
|
2129 }
|
|
2130 $sql2_sth->finish();
|
|
2131
|
|
2132
|
|
2133 }
|
|
2134 elsif(($stream*$strand) == 1){
|
|
2135 my $sql1 = "select g.gene_id, (? - g.seq_region_end) as 'dist' from gene g where ";
|
|
2136 if($stranded){
|
|
2137 $sql1 .= "g.seq_region_strand = ".$strand." and ";
|
|
2138 }
|
|
2139 $sql1 .= "seq_region_id = ? and g.seq_region_end < ? order by dist limit 10";
|
|
2140
|
|
2141 #
|
|
2142 # MAYBE set the result of prepare to be static in case lots of calls.
|
|
2143 #
|
|
2144 my $sql1_sth = $self->prepare($sql1) || die "Could not prepare $sql1";
|
|
2145 $sql1_sth->execute($start, $seq_region_id, $start) || die "Could not execute sql";
|
|
2146 $sql1_sth->bind_columns(\$gene_id, \$min_dist) || die "Could mot bin columns";
|
|
2147
|
|
2148 my $last_dist;
|
|
2149 my $first = 1;
|
|
2150 while($sql1_sth->fetch()){
|
|
2151 if($first){
|
|
2152 $first = 0;
|
|
2153 }
|
|
2154 else{
|
|
2155 next if ($min_dist > $last_dist);
|
|
2156 }
|
|
2157 push @gene_ids, $gene_id;
|
|
2158 $last_dist = $min_dist;
|
|
2159 }
|
|
2160 $sql1_sth->finish();
|
|
2161 }
|
|
2162 elsif(($stream * $strand) == -1){
|
|
2163
|
|
2164 my $sql2 = "select g.gene_id, (g.seq_region_start - ?) as 'dist' from gene g where ";
|
|
2165 if($stranded){
|
|
2166 $sql2 .= "g.seq_region_strand = ".$feat->strand." and ";
|
|
2167 }
|
|
2168 $sql2 .= "seq_region_id = ? and g.seq_region_start > ? order by dist limit 10";
|
|
2169
|
|
2170 my $sql2_sth = $self->prepare($sql2) || die "could not prepare $sql2";
|
|
2171
|
|
2172 my ($tmp_min_dist, $tmp_gene_id);
|
|
2173 $sql2_sth->execute($end, $seq_region_id, $end) || die "Could not execute sql";
|
|
2174 $sql2_sth->bind_columns(\$tmp_gene_id, \$tmp_min_dist) || die "Could mot bin columns";
|
|
2175 my $first =1;
|
|
2176 my $last_dist;
|
|
2177 while($sql2_sth->fetch()){
|
|
2178 if($first){
|
|
2179 $first = 0;
|
|
2180 }
|
|
2181 else{
|
|
2182 next if ($tmp_min_dist > $last_dist);
|
|
2183 }
|
|
2184 push @gene_ids, $tmp_gene_id;
|
|
2185 $last_dist = $tmp_min_dist;
|
|
2186 }
|
|
2187 $sql2_sth->finish();
|
|
2188 }
|
|
2189 else{
|
|
2190 die "Invalid stream or strand must be -1, 0 or 1\n";
|
|
2191 }
|
|
2192
|
|
2193
|
|
2194
|
|
2195 foreach my $gene_id (@gene_ids){
|
|
2196 push @genes, $self->fetch_by_dbID($gene_id);
|
|
2197 }
|
|
2198 return \@genes;
|
|
2199
|
|
2200 }
|
|
2201
|
|
2202 ##########################
|
|
2203 # #
|
|
2204 # DEPRECATED METHODS #
|
|
2205 # #
|
|
2206 ##########################
|
|
2207
|
|
2208
|
|
2209 =head2 fetch_by_maximum_DBLink
|
|
2210
|
|
2211 DEPRECATED - use fetch_all_by_external_name instead
|
|
2212
|
|
2213 =cut
|
|
2214
|
|
2215 sub fetch_by_maximum_DBLink {
|
|
2216 my ($self, $external_id) = @_;
|
|
2217
|
|
2218 deprecate( "use fetch_all_by_external_name instead" );
|
|
2219
|
|
2220 my $genes=$self->fetch_all_by_external_name($external_id);
|
|
2221
|
|
2222 my $biggest;
|
|
2223 my $max = 0;
|
|
2224 my $size = scalar(@$genes);
|
|
2225 if ($size > 0) {
|
|
2226 foreach my $gene (@$genes) {
|
|
2227 my $size = scalar(@{$gene->get_all_Exons});
|
|
2228 if ($size > $max) {
|
|
2229 $biggest = $gene;
|
|
2230 $max = $size;
|
|
2231 }
|
|
2232 }
|
|
2233 return $biggest;
|
|
2234 }
|
|
2235 return;
|
|
2236 }
|
|
2237
|
|
2238
|
|
2239 =head2 get_display_xref
|
|
2240
|
|
2241 DEPRECATED use $gene->display_xref
|
|
2242
|
|
2243 =cut
|
|
2244
|
|
2245 sub get_display_xref {
|
|
2246 my ($self, $gene) = @_;
|
|
2247
|
|
2248 deprecate( "display xref should retrieved from Gene object directly" );
|
|
2249
|
|
2250 if ( !defined $gene ) {
|
|
2251 throw("Must call with a Gene object");
|
|
2252 }
|
|
2253
|
|
2254 my $sth = $self->prepare(qq(
|
|
2255 SELECT e.db_name,
|
|
2256 x.display_label,
|
|
2257 x.xref_id
|
|
2258 FROM gene g,
|
|
2259 xref x,
|
|
2260 external_db e
|
|
2261 WHERE g.gene_id = ?
|
|
2262 AND g.display_xref_id = x.xref_id
|
|
2263 AND x.external_db_id = e.external_db_id
|
|
2264 ));
|
|
2265
|
|
2266 $sth->bind_param(1, $gene->dbID, SQL_INTEGER);
|
|
2267 $sth->execute();
|
|
2268
|
|
2269 my ($db_name, $display_label, $xref_id) = $sth->fetchrow_array();
|
|
2270 if ( !defined $xref_id ) {
|
|
2271 return undef;
|
|
2272 }
|
|
2273
|
|
2274 my $db_entry = Bio::EnsEMBL::DBEntry->new(
|
|
2275 -dbid => $xref_id,
|
|
2276 -adaptor => $self->db->get_DBEntryAdaptor(),
|
|
2277 -dbname => $db_name,
|
|
2278 -display_id => $display_label
|
|
2279 );
|
|
2280
|
|
2281 return $db_entry;
|
|
2282 }
|
|
2283
|
|
2284
|
|
2285 =head2 get_description
|
|
2286
|
|
2287 DEPRECATED, use gene->get_description
|
|
2288
|
|
2289 =cut
|
|
2290
|
|
2291 sub get_description {
|
|
2292 my ($self, $dbID) = @_;
|
|
2293
|
|
2294 deprecate( "Gene description should be loaded on gene retrieval. Use gene->get_description()" );
|
|
2295
|
|
2296 if ( !defined $dbID ) {
|
|
2297 throw("must call with dbID");
|
|
2298 }
|
|
2299
|
|
2300 my $sth = $self->prepare("SELECT description
|
|
2301 FROM gene_description
|
|
2302 WHERE gene_id = ?");
|
|
2303
|
|
2304 $sth->bind_param(1, $dbID, SQL_INTEGER);
|
|
2305 $sth->execute();
|
|
2306
|
|
2307 my @array = $sth->fetchrow_array();
|
|
2308 return $array[0];
|
|
2309 }
|
|
2310
|
|
2311
|
|
2312 =head2 fetch_by_Peptide_id
|
|
2313
|
|
2314 DEPRECATED, use fetch_by_translation_stable_id()
|
|
2315
|
|
2316 =cut
|
|
2317
|
|
2318 sub fetch_by_Peptide_id {
|
|
2319 my ( $self, $translation_stable_id) = @_;
|
|
2320
|
|
2321 deprecate( "Please use better named fetch_by_translation_stable_id \n".
|
|
2322 caller(2) );
|
|
2323
|
|
2324 $self->fetch_by_translation_stable_id($translation_stable_id);
|
|
2325 }
|
|
2326
|
|
2327
|
|
2328 =head2 get_stable_entry_info
|
|
2329
|
|
2330 DEPRECATED use $gene->stable_id instead
|
|
2331
|
|
2332 =cut
|
|
2333
|
|
2334 sub get_stable_entry_info {
|
|
2335 my ($self,$gene) = @_;
|
|
2336
|
|
2337 deprecated("stable id info is loaded on default, no lazy loading necessary");
|
|
2338
|
|
2339 if ( !defined $gene || !ref $gene || !$gene->isa('Bio::EnsEMBL::Gene') ) {
|
|
2340 throw("Needs a gene object, not a $gene");
|
|
2341 }
|
|
2342
|
|
2343 my $created_date = $self->db->dbc->from_date_to_seconds("created_date");
|
|
2344 my $modified_date = $self->db->dbc->from_date_to_seconds("modified_date");
|
|
2345
|
|
2346 my $sth =
|
|
2347 $self->prepare( "SELECT stable_id, "
|
|
2348 . $created_date . ","
|
|
2349 . $modified_date
|
|
2350 . ", version FROM gene WHERE gene_id = ?" );
|
|
2351
|
|
2352 $sth->bind_param(1, $gene->dbID, SQL_INTEGER);
|
|
2353 $sth->execute();
|
|
2354
|
|
2355 my @array = $sth->fetchrow_array();
|
|
2356 $gene->{'stable_id'} = $array[0];
|
|
2357 $gene->{'created'} = $array[1];
|
|
2358 $gene->{'modified'} = $array[2];
|
|
2359 $gene->{'version'} = $array[3];
|
|
2360
|
|
2361 return 1;
|
|
2362 }
|
|
2363
|
|
2364
|
|
2365 =head2 fetch_all_by_DBEntry
|
|
2366
|
|
2367 DEPRECATED - Use fetch_all_by_external_name instead
|
|
2368
|
|
2369 =cut
|
|
2370
|
|
2371 sub fetch_all_by_DBEntry {
|
|
2372 my $self = shift;
|
|
2373
|
|
2374 deprecate('Use fetch_all_by_external_name instead.');
|
|
2375
|
|
2376 return $self->fetch_all_by_external_name(@_);
|
|
2377 }
|
|
2378
|
|
2379
|
|
2380 1;
|
|
2381
|
|
2382
|