comparison variant_effect_predictor/Bio/EnsEMBL/DBSQL/GeneAdaptor.pm @ 0:1f6dce3d34e0

Uploaded
author mahtabm
date Thu, 11 Apr 2013 02:01:53 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:1f6dce3d34e0
1 =head1 LICENSE
2
3 Copyright (c) 1999-2012 The European Bioinformatics Institute and
4 Genome Research Limited. All rights reserved.
5
6 This software is distributed under a modified Apache license.
7 For license details, please see
8
9 http://www.ensembl.org/info/about/code_licence.html
10
11 =head1 CONTACT
12
13 Please email comments or questions to the public Ensembl
14 developers list at <dev@ensembl.org>.
15
16 Questions may also be sent to the Ensembl help desk at
17 <helpdesk@ensembl.org>.
18
19 =cut
20
21 =head1 NAME
22
23 Bio::EnsEMBL::DBSQL::GeneAdaptor - Database adaptor for the retrieval and
24 storage of Gene objects
25
26 =head1 SYNOPSIS
27
28 use Bio::EnsEMBL::Registry;
29
30 Bio::EnsEMBL::Registry->load_registry_from_db(
31 -host => 'ensembldb.ensembl.org',
32 -user => 'anonymous',
33 );
34
35 $gene_adaptor =
36 Bio::EnsEMBL::Registry->get_adaptor( "human", "core", "gene" );
37
38 $gene = $gene_adaptor->fetch_by_dbID(1234);
39
40 $gene = $gene_adaptor->fetch_by_stable_id('ENSG00000184129');
41
42 @genes = @{ $gene_adaptor->fetch_all_by_external_name('BRCA2') };
43
44 $slice_adaptor =
45 Bio::EnsEMBL::Registry->get_adaptor( "human", "core", "slice" );
46
47 $slice =
48 $slice_adaptor->fetch_by_region( 'chromosome', '1', 1, 1000000 );
49
50 @genes = @{ $gene_adaptor->fetch_all_by_Slice($slice) };
51
52 =head1 DESCRIPTION
53
54 This is a database aware adaptor for the retrieval and storage of gene
55 objects.
56
57 =head1 METHODS
58
59 =cut
60
61 package Bio::EnsEMBL::DBSQL::GeneAdaptor;
62
63 use strict;
64
65 use Bio::EnsEMBL::Utils::Exception qw( deprecate throw warning );
66 use Bio::EnsEMBL::Utils::Scalar qw( assert_ref );
67 use Bio::EnsEMBL::DBSQL::SliceAdaptor;
68 use Bio::EnsEMBL::DBSQL::BaseFeatureAdaptor;
69 use Bio::EnsEMBL::DBSQL::DBAdaptor;
70 use Bio::EnsEMBL::Gene;
71
72 use vars '@ISA';
73 @ISA = qw(Bio::EnsEMBL::DBSQL::BaseFeatureAdaptor);
74
75
76 # _tables
77 # Arg [1] : none
78 # Description: PROTECTED implementation of superclass abstract method.
79 # Returns the names, aliases of the tables to use for queries.
80 # Returntype : list of listrefs of strings
81 # Exceptions : none
82 # Caller : internal
83 # Status : Stable
84
85 sub _tables {
86 return (
87 [ 'gene', 'g' ],
88 [ 'xref', 'x' ],
89 [ 'external_db', 'exdb' ] );
90 }
91
92
93 # _columns
94 # Arg [1] : none
95 # Example : none
96 # Description: PROTECTED implementation of superclass abstract method.
97 # Returns a list of columns to use for queries.
98 # Returntype : list of strings
99 # Exceptions : none
100 # Caller : internal
101 # Status : Stable
102
103 sub _columns {
104 my ($self) = @_;
105
106 my $created_date =
107 $self->db()->dbc()->from_date_to_seconds("g.created_date");
108 my $modified_date =
109 $self->db()->dbc()->from_date_to_seconds("g.modified_date");
110
111 return (
112 'g.gene_id', 'g.seq_region_id',
113 'g.seq_region_start', 'g.seq_region_end',
114 'g.seq_region_strand', 'g.analysis_id',
115 'g.biotype', 'g.display_xref_id',
116 'g.description', 'g.status',
117 'g.source', 'g.is_current',
118 'g.canonical_transcript_id', 'g.canonical_annotation',
119 'g.stable_id', 'g.version',
120 $created_date, $modified_date,
121 'x.display_label', 'x.dbprimary_acc',
122 'x.description', 'x.version',
123 'exdb.db_name', 'exdb.status',
124 'exdb.db_release', 'exdb.db_display_name',
125 'x.info_type', 'x.info_text'
126 );
127 } ## end sub _columns
128
129
130 sub _left_join {
131 return (
132 [ 'xref', "x.xref_id = g.display_xref_id" ],
133 [ 'external_db', "exdb.external_db_id = x.external_db_id" ] );
134 }
135
136
137 =head2 list_dbIDs
138
139 Example : @gene_ids = @{$gene_adaptor->list_dbIDs()};
140 Description: Gets an array of internal ids for all genes in the current db
141 Arg[1] : <optional> int. not 0 for the ids to be sorted by the seq_region.
142 Returntype : Listref of Ints
143 Exceptions : none
144 Caller : general
145 Status : Stable
146
147 =cut
148
149 sub list_dbIDs {
150 my ($self,$ordered) = @_;
151
152 return $self->_list_dbIDs("gene",undef, $ordered);
153 }
154
155
156 =head2 list_stable_ids
157
158 Example : @stable_gene_ids = @{$gene_adaptor->list_stable_ids()};
159 Description: Gets an listref of stable ids for all genes in the current db
160 Returntype : reference to a list of strings
161 Exceptions : none
162 Caller : general
163 Status : Stable
164
165 =cut
166
167 sub list_stable_ids {
168 my ($self) = @_;
169
170 return $self->_list_dbIDs("gene", "stable_id");
171 }
172
173
174 sub list_seq_region_ids {
175 my $self = shift;
176
177 return $self->_list_seq_region_ids('gene');
178 }
179
180 =head2 fetch_by_display_label
181
182 Arg [1] : String $label - display label of gene to fetch
183 Example : my $gene = $geneAdaptor->fetch_by_display_label("BRCA2");
184 Description: Returns the gene which has the given display label or undef if
185 there is none. If there are more than 1, the gene on the
186 reference slice is reported or if none are on the reference,
187 the first one is reported.
188 Returntype : Bio::EnsEMBL::Gene
189 Exceptions : none
190 Caller : general
191 Status : Stable
192
193 =cut
194
195 sub fetch_by_display_label {
196 my $self = shift;
197 my $label = shift;
198
199 my $constraint = "x.display_label = ? AND g.is_current = 1";
200 $self->bind_param_generic_fetch($label,SQL_VARCHAR);
201 my @genes = @{ $self->generic_fetch($constraint) };
202 my $gene;
203 if (scalar(@genes) > 1) {
204 foreach my $gene_tmp (@genes) {
205 if ($gene_tmp->slice->is_reference) {
206 $gene = $gene_tmp;
207 }
208 last if ($gene);
209 }
210 if (!$gene) {
211 $gene = $genes[0];
212 }
213
214 } elsif (scalar(@genes) == 1) {
215 $gene = $genes[0];
216 }
217
218 return $gene;
219 }
220
221 =head2 fetch_all_by_display_label
222
223 Arg [1] : String $label - display label of genes to fetch
224 Example : my @genes = @{$geneAdaptor->fetch_all_by_display_label("PPP1R2P1")};
225 Description: Returns all genes which have the given display label or undef if
226 there are none.
227 Returntype : listref of Bio::EnsEMBL::Gene objects
228 Exceptions : none
229 Caller : general
230 Status : Stable
231
232 =cut
233
234 sub fetch_all_by_display_label {
235 my $self = shift;
236 my $label = shift;
237
238 my $constraint = "x.display_label = ? AND g.is_current = 1";
239 $self->bind_param_generic_fetch($label,SQL_VARCHAR);
240 my $genes = $self->generic_fetch($constraint) ;
241
242 return $genes;
243 }
244
245 =head2 fetch_by_stable_id
246
247 Arg [1] : String $id
248 The stable ID of the gene to retrieve
249 Example : $gene = $gene_adaptor->fetch_by_stable_id('ENSG00000148944');
250 Description: Retrieves a gene object from the database via its stable id.
251 The gene will be retrieved in its native coordinate system (i.e.
252 in the coordinate system it is stored in the database). It may
253 be converted to a different coordinate system through a call to
254 transform() or transfer(). If the gene or exon is not found
255 undef is returned instead.
256 Returntype : Bio::EnsEMBL::Gene or undef
257 Exceptions : if we cant get the gene in given coord system
258 Caller : general
259 Status : Stable
260
261 =cut
262
263 sub fetch_by_stable_id {
264 my ($self, $stable_id) = @_;
265
266 my $constraint = "g.stable_id = ? AND g.is_current = 1";
267 $self->bind_param_generic_fetch($stable_id,SQL_VARCHAR);
268 my ($gene) = @{ $self->generic_fetch($constraint) };
269
270 return $gene;
271 }
272
273
274
275 =head2 fetch_all_by_biotype
276
277 Arg [1] : String $biotype
278 listref of $biotypes
279 The biotype of the gene to retrieve. You can have as an argument a reference
280 to a list of biotypes
281 Example : $gene = $gene_adaptor->fetch_all_by_biotype('protein_coding');
282 $gene = $gene_adaptor->fetch_all_by_biotypes(['protein_coding', 'sRNA', 'miRNA']);
283 Description: Retrieves an array reference of gene objects from the database via its biotype or biotypes.
284 The genes will be retrieved in its native coordinate system (i.e.
285 in the coordinate system it is stored in the database). It may
286 be converted to a different coordinate system through a call to
287 transform() or transfer(). If the gene or exon is not found
288 undef is returned instead.
289 Returntype : listref of Bio::EnsEMBL::Gene
290 Exceptions : if we cant get the gene in given coord system
291 Caller : general
292 Status : Stable
293
294 =cut
295
296 sub fetch_all_by_biotype {
297 my ($self, $biotype) = @_;
298
299 if (!defined $biotype){
300 throw("Biotype or listref of biotypes expected");
301 }
302 my $constraint;
303 if (ref($biotype) eq 'ARRAY'){
304 $constraint = "g.biotype IN (";
305 foreach my $b (@{$biotype}){
306 $constraint .= "?,";
307 $self->bind_param_generic_fetch($b,SQL_VARCHAR);
308 }
309 chop($constraint); #remove last , from expression
310 $constraint .= ") and g.is_current = 1";
311
312 }
313 else{
314 $constraint = "g.biotype = ? and g.is_current = 1";
315 $self->bind_param_generic_fetch($biotype,SQL_VARCHAR);
316 }
317 my @genes = @{ $self->generic_fetch($constraint) };
318 return \@genes ;
319 }
320
321
322 sub fetch_all {
323 my ($self) = @_;
324
325 my $constraint = 'g.biotype != "LRG_gene" and g.is_current = 1';
326 my @genes = @{ $self->generic_fetch($constraint) };
327 return \@genes ;
328 }
329
330
331 =head2 fetch_all_versions_by_stable_id
332
333 Arg [1] : String $stable_id
334 The stable ID of the gene to retrieve
335 Example : $gene = $gene_adaptor->fetch_all_versions_by_stable_id
336 ('ENSG00000148944');
337 Description : Similar to fetch_by_stable_id, but retrieves all versions of a
338 gene stored in the database.
339 Returntype : listref of Bio::EnsEMBL::Gene
340 Exceptions : if we cant get the gene in given coord system
341 Caller : general
342 Status : At Risk
343
344 =cut
345
346 sub fetch_all_versions_by_stable_id {
347 my ($self, $stable_id) = @_;
348
349 my $constraint = "g.stable_id = ?";
350 $self->bind_param_generic_fetch($stable_id,SQL_VARCHAR);
351 return $self->generic_fetch($constraint);
352 }
353
354
355 =head2 fetch_by_exon_stable_id
356
357 Arg [1] : String $id
358 The stable id of an exon of the gene to retrieve
359 Example : $gene = $gene_adptr->fetch_by_exon_stable_id('ENSE00000148944');
360 Description: Retrieves a gene object from the database via an exon stable id.
361 The gene will be retrieved in its native coordinate system (i.e.
362 in the coordinate system it is stored in the database). It may
363 be converted to a different coordinate system through a call to
364 transform() or transfer(). If the gene or exon is not found
365 undef is returned instead.
366 Returntype : Bio::EnsEMBL::Gene or undef
367 Exceptions : none
368 Caller : general
369 Status : Stable
370
371 =cut
372
373 sub fetch_by_exon_stable_id {
374 my ($self, $stable_id, $version) = @_;
375
376 my $sql = qq(
377 SELECT t.gene_id
378 FROM transcript as t,
379 exon_transcript as et,
380 exon as e
381 WHERE t.transcript_id = et.transcript_id
382 AND et.exon_id = e.exon_id
383 AND e.stable_id = ?
384 AND e.is_current = 1
385 );
386
387 my $sth = $self->prepare($sql);
388 $sth->bind_param(1, $stable_id, SQL_VARCHAR);
389 $sth->execute();
390
391 my ($dbID) = $sth->fetchrow_array();
392
393 return undef if(!defined($dbID));
394
395 my $gene = $self->fetch_by_dbID($dbID);
396
397 return $gene;
398 }
399
400
401 =head2 fetch_all_by_domain
402
403 Arg [1] : String $domain
404 The domain to fetch genes from
405 Example : my @genes = @{ $gene_adaptor->fetch_all_by_domain($domain) };
406 Description: Retrieves a listref of genes whose translation contain interpro
407 domain $domain. The genes are returned in their native coord
408 system (i.e. the coord_system they are stored in). If the coord
409 system needs to be changed, then tranform or transfer should be
410 called on the individual objects returned.
411 Returntype : list of Bio::EnsEMBL::Genes
412 Exceptions : none
413 Caller : domainview
414 Status : Stable
415
416 =cut
417
418 sub fetch_all_by_domain {
419 my ($self, $domain) = @_;
420
421 throw("domain argument is required") unless ($domain);
422
423 my $sth = $self->prepare(qq(
424 SELECT tr.gene_id
425 FROM interpro i,
426 protein_feature pf,
427 transcript tr,
428 translation tl,
429 seq_region sr,
430 coord_system cs
431 WHERE cs.species_id = ?
432 AND cs.coord_system_id = sr.coord_system_id
433 AND sr.seq_region_id = tr.seq_region_id
434 AND tr.is_current = 1
435 AND tr.transcript_id = tl.transcript_id
436 AND tl.translation_id = pf.translation_id
437 AND pf.hit_name = i.id
438 AND i.interpro_ac = ?
439 GROUP BY tr.gene_id));
440
441 $sth->bind_param( 1, $self->species_id(), SQL_VARCHAR );
442 $sth->bind_param( 2, $domain, SQL_VARCHAR );
443
444 $sth->execute();
445
446 my @array = @{$sth->fetchall_arrayref()};
447 $sth->finish();
448
449 my @gene_ids = map {$_->[0]} @array;
450
451 return $self->fetch_all_by_dbID_list(\@gene_ids);
452 }
453
454
455
456 =head2 fetch_all_by_Slice_and_external_dbname_link
457
458 Arg [1] : Bio::EnsEMBL::Slice $slice
459 The slice to fetch genes on.
460 Arg [2] : (optional) string $logic_name
461 the logic name of the type of features to obtain
462 Arg [3] : (optional) boolean $load_transcripts
463 if true, transcripts will be loaded immediately
464 rather than lazy loaded later.
465 Arg [4] : Name of the external database
466 Example : @genes = @{
467 $ga->fetch_all_by_Slice_and_external_dbname_link(
468 $slice, undef, undef, "HUGO" ) };
469 Description: Overrides superclass method to optionally load
470 transcripts immediately rather than lazy-loading them
471 later. This is more efficient when there are a lot
472 of genes whose transcripts are going to be used. The
473 genes are then filtered to return only those with
474 external database links of the type specified
475 Returntype : reference to list of genes
476 Exceptions : thrown if exon cannot be placed on transcript slice
477 Caller :
478 Status : Stable
479
480 =cut
481
482 sub fetch_all_by_Slice_and_external_dbname_link {
483 my ( $self, $slice, $logic_name, $load_transcripts, $db_name ) = @_;
484
485 # Get the external_db_id(s) from the name.
486 my $sth = $self->prepare(
487 "SELECT external_db_id FROM external_db WHERE db_name = ?");
488
489 $sth->bind_param( 1, $db_name, SQL_VARCHAR );
490 $sth->execute();
491
492 my $external_db_id;
493 $sth->bind_columns( \$external_db_id );
494
495 my @external_db_ids;
496 while ( $sth->fetch() ) {
497 push( @external_db_ids, $external_db_id );
498 }
499
500 if ( scalar(@external_db_ids) == 0 ) {
501 warn sprintf( "Could not find external database "
502 . "'%s' in the external_db table\n"
503 . "Available are:\n",
504 $db_name );
505
506 $sth = $self->prepare("SELECT DISTINCT db_name FROM external_db");
507
508 $sth->execute();
509 $sth->bind_columns( \$external_db_id );
510
511 while ( $sth->fetch() ) {
512 warn "\t$external_db_id\n";
513 }
514 return [];
515 }
516
517 # Get the gene_ids for those with links.
518 my $dbe_adaptor = $self->db()->get_DBEntryAdaptor();
519
520 my %linked_genes;
521 foreach $external_db_id (@external_db_ids) {
522 my @linked_genes =
523 $dbe_adaptor->list_gene_ids_by_external_db_id($external_db_id);
524
525 foreach my $gene_id (@linked_genes) {
526 $linked_genes{$gene_id} = 1;
527 }
528 }
529
530 # Get all the genes on the slice.
531 my $genes = $self->SUPER::fetch_all_by_Slice_constraint( $slice,
532 'g.is_current = 1', $logic_name );
533
534 # Create a list of those that are in the gene_ids list.
535 my @genes_passed;
536 foreach my $gene (@$genes) {
537 if ( exists( $linked_genes{ $gene->dbID() } ) ) {
538 push( @genes_passed, $gene );
539 }
540 }
541
542 # Return the list of those that passed.
543 return \@genes_passed;
544 } ## end sub fetch_all_by_Slice_and_external_dbname_link
545
546 =head2 fetch_all_by_Slice
547
548 Arg [1] : Bio::EnsEMBL::Slice $slice
549 The slice to fetch genes on.
550 Arg [2] : (optional) string $logic_name
551 the logic name of the type of features to obtain
552 Arg [3] : (optional) boolean $load_transcripts
553 if true, transcripts will be loaded immediately rather than
554 lazy loaded later.
555 Arg [4] : (optional) string $source
556 the source name of the features to obtain.
557 Arg [5] : (optional) string biotype
558 the biotype of the features to obtain.
559 Example : @genes = @{$gene_adaptor->fetch_all_by_Slice()};
560 Description: Overrides superclass method to optionally load transcripts
561 immediately rather than lazy-loading them later. This
562 is more efficient when there are a lot of genes whose
563 transcripts are going to be used.
564 Returntype : reference to list of genes
565 Exceptions : thrown if exon cannot be placed on transcript slice
566 Caller : Slice::get_all_Genes
567 Status : Stable
568
569 =cut
570
571 sub fetch_all_by_Slice {
572 my ( $self, $slice, $logic_name, $load_transcripts, $source,
573 $biotype ) = @_;
574
575 my $constraint = 'g.is_current = 1';
576
577 if ( defined($source) ) {
578 $constraint .= " and g.source = '$source'";
579 }
580 if ( defined($biotype) ) {
581 $constraint .= " and g.biotype = '$biotype'";
582 }
583
584 my $genes =
585 $self->SUPER::fetch_all_by_Slice_constraint( $slice, $constraint,
586 $logic_name );
587
588 # If there are less than two genes, still do lazy-loading.
589 if ( !$load_transcripts || @$genes < 2 ) {
590 return $genes;
591 }
592
593 # Preload all of the transcripts now, instead of lazy loading later,
594 # faster than one query per transcript.
595
596 # First check if transcripts are already preloaded.
597 # FIXME: Should check all transcripts.
598 if ( exists( $genes->[0]->{'_transcript_array'} ) ) {
599 return $genes;
600 }
601
602 # Get extent of region spanned by transcripts.
603 my ( $min_start, $max_end );
604 foreach my $g (@$genes) {
605 if ( !defined($min_start) || $g->seq_region_start() < $min_start ) {
606 $min_start = $g->seq_region_start();
607 }
608 if ( !defined($max_end) || $g->seq_region_end() > $max_end ) {
609 $max_end = $g->seq_region_end();
610 }
611 }
612
613 my $ext_slice;
614
615 if ( $min_start >= $slice->start() && $max_end <= $slice->end() ) {
616 $ext_slice = $slice;
617 } else {
618 my $sa = $self->db()->get_SliceAdaptor();
619 $ext_slice = $sa->fetch_by_region(
620 $slice->coord_system->name(), $slice->seq_region_name(),
621 $min_start, $max_end,
622 $slice->strand(), $slice->coord_system->version() );
623 }
624
625 # Associate transcript identifiers with genes.
626
627 my %g_hash = map { $_->dbID => $_ } @{$genes};
628
629 my $g_id_str = join( ',', keys(%g_hash) );
630
631 my $sth =
632 $self->prepare( "SELECT gene_id, transcript_id "
633 . "FROM transcript "
634 . "WHERE gene_id IN ($g_id_str)" );
635
636 $sth->execute();
637
638 my ( $g_id, $tr_id );
639 $sth->bind_columns( \( $g_id, $tr_id ) );
640
641 my %tr_g_hash;
642
643 while ( $sth->fetch() ) {
644 $tr_g_hash{$tr_id} = $g_hash{$g_id};
645 }
646
647 my $ta = $self->db()->get_TranscriptAdaptor();
648 my $transcripts = $ta->fetch_all_by_Slice(
649 $ext_slice,
650 1, undef,
651 sprintf( "t.transcript_id IN (%s)",
652 join( ',', sort { $a <=> $b } keys(%tr_g_hash) ) ) );
653
654 # Move transcripts onto gene slice, and add them to genes.
655 foreach my $tr ( @{$transcripts} ) {
656 if ( !exists( $tr_g_hash{ $tr->dbID() } ) ) { next }
657
658 my $new_tr;
659 if ( $slice != $ext_slice ) {
660 $new_tr = $tr->transfer($slice);
661 if ( !defined($new_tr) ) {
662 throw("Unexpected. "
663 . "Transcript could not be transfered onto Gene slice." );
664 }
665 } else {
666 $new_tr = $tr;
667 }
668
669 $tr_g_hash{ $tr->dbID() }->add_Transcript($new_tr);
670 }
671
672 return $genes;
673 } ## end sub fetch_all_by_Slice
674
675 =head2 fetch_by_transcript_id
676
677 Arg [1] : Int $trans_id
678 Unique database identifier for the transcript whose gene should
679 be retrieved. The gene is returned in its native coord
680 system (i.e. the coord_system it is stored in). If the coord
681 system needs to be changed, then tranform or transfer should
682 be called on the returned object. undef is returned if the
683 gene or transcript is not found in the database.
684 Example : $gene = $gene_adaptor->fetch_by_transcript_id(1241);
685 Description: Retrieves a gene from the database via the database identifier
686 of one of its transcripts.
687 Returntype : Bio::EnsEMBL::Gene
688 Exceptions : none
689 Caller : general
690 Status : Stable
691
692 =cut
693
694 sub fetch_by_transcript_id {
695 my ($self, $trans_id) = @_;
696
697 # this is a cheap SQL call
698 my $sth = $self->prepare(qq(
699 SELECT tr.gene_id
700 FROM transcript tr
701 WHERE tr.transcript_id = ?
702 ));
703
704 $sth->bind_param(1, $trans_id, SQL_INTEGER);
705 $sth->execute();
706
707 my ($geneid) = $sth->fetchrow_array();
708
709 $sth->finish();
710
711 return undef if( !defined $geneid );
712
713 my $gene = $self->fetch_by_dbID($geneid);
714 return $gene;
715 }
716
717
718 =head2 fetch_by_transcript_stable_id
719
720 Arg [1] : string $trans_stable_id
721 transcript stable ID whose gene should be retrieved
722 Example : my $gene = $gene_adaptor->fetch_by_transcript_stable_id
723 ('ENST0000234');
724 Description: Retrieves a gene from the database via the stable ID of one of
725 its transcripts
726 Returntype : Bio::EnsEMBL::Gene
727 Exceptions : none
728 Caller : general
729 Status : Stable
730
731 =cut
732
733 sub fetch_by_transcript_stable_id {
734 my ($self, $trans_stable_id) = @_;
735
736 my $sth = $self->prepare(qq(
737 SELECT gene_id
738 FROM transcript
739 WHERE stable_id = ?
740 AND is_current = 1
741 ));
742
743 $sth->bind_param(1, $trans_stable_id, SQL_VARCHAR);
744 $sth->execute();
745
746 my ($geneid) = $sth->fetchrow_array();
747 $sth->finish;
748
749 return undef if (!defined $geneid);
750
751 my $gene = $self->fetch_by_dbID($geneid);
752 return $gene;
753 }
754
755
756 =head2 fetch_by_translation_stable_id
757
758 Arg [1] : String $translation_stable_id
759 The stable id of a translation of the gene to be obtained
760 Example : my $gene = $gene_adaptor->fetch_by_translation_stable_id
761 ('ENSP00000278194');
762 Description: Retrieves a gene via the stable id of one of its translations.
763 Returntype : Bio::EnsEMBL::Gene
764 Exceptions : none
765 Caller : general
766 Status : Stable
767
768 =cut
769
770 sub fetch_by_translation_stable_id {
771 my ($self, $translation_stable_id) = @_;
772
773 my $sth = $self->prepare(qq(
774 SELECT tr.gene_id
775 FROM transcript tr,
776 translation tl
777 WHERE tl.stable_id = ?
778 AND tr.transcript_id = tl.transcript_id
779 AND tr.is_current = 1
780 ));
781
782 $sth->bind_param(1, $translation_stable_id, SQL_VARCHAR);
783 $sth->execute();
784
785 my ($geneid) = $sth->fetchrow_array();
786 $sth->finish;
787 if( !defined $geneid ) {
788 return undef;
789 }
790 return $self->fetch_by_dbID($geneid);
791 }
792
793
794
795
796 =head2 fetch_all_by_external_name
797
798 Arg [1] : String $external_name
799 The external identifier for the gene to be obtained
800 Arg [2] : (optional) String $external_db_name
801 The name of the external database from which the
802 identifier originates.
803 Arg [3] : Boolean override. Force SQL regex matching for users
804 who really do want to find all 'NM%'
805 Example : @genes = @{$gene_adaptor->fetch_all_by_external_name('BRCA2')}
806 @many_genes = @{$gene_adaptor->fetch_all_by_external_name('BRCA%')}
807 Description: Retrieves a list of genes with an external database
808 identifier $external_name. The genes returned are in
809 their native coordinate system, i.e. in the coordinate
810 system they are stored in the database in. If another
811 coordinate system is required then the Gene::transfer or
812 Gene::transform method can be used.
813 SQL wildcards % and _ are supported in the $external_name,
814 but their use is somewhat restricted for performance reasons.
815 Users that really do want % and _ in the first three characters
816 should use argument 3 to prevent optimisations
817 Returntype : listref of Bio::EnsEMBL::Gene
818 Exceptions : none
819 Caller : goview, general
820 Status : Stable
821
822 =cut
823
824 sub fetch_all_by_external_name {
825 my ( $self, $external_name, $external_db_name, $override ) = @_;
826
827 my $entryAdaptor = $self->db->get_DBEntryAdaptor();
828
829 my @ids =
830 $entryAdaptor->list_gene_ids_by_extids( $external_name,
831 $external_db_name, $override );
832
833 my %genes_by_dbIDs =
834 map { $_->dbID(), $_ } @{ $self->fetch_all_by_dbID_list( \@ids ) };
835
836 my @result = map { $genes_by_dbIDs{$_} } @ids;
837
838 return \@result;
839 }
840
841 =head2 fetch_all_by_GOTerm
842
843 Arg [1] : Bio::EnsEMBL::OntologyTerm
844 The GO term for which genes should be fetched.
845
846 Example: @genes = @{
847 $gene_adaptor->fetch_all_by_GOTerm(
848 $go_adaptor->fetch_by_accession('GO:0030326') ) };
849
850 Description : Retrieves a list of genes that are associated with
851 the given GO term, or with any of its descendent
852 GO terms. The genes returned are in their native
853 coordinate system, i.e. in the coordinate system
854 in which they are stored in the database. If
855 another coordinate system is required then the
856 Gene::transfer or Gene::transform method can be
857 used.
858
859 Return type : listref of Bio::EnsEMBL::Gene
860 Exceptions : Throws of argument is not a GO term
861 Caller : general
862 Status : Stable
863
864 =cut
865
866 sub fetch_all_by_GOTerm {
867 my ( $self, $term ) = @_;
868
869 assert_ref( $term, 'Bio::EnsEMBL::OntologyTerm' );
870 if ( $term->ontology() ne 'GO' ) {
871 throw('Argument is not a GO term');
872 }
873
874 my $entryAdaptor = $self->db->get_DBEntryAdaptor();
875
876 my %unique_dbIDs;
877 foreach my $accession ( map { $_->accession() }
878 ( $term, @{ $term->descendants() } ) )
879 {
880 my @ids =
881 $entryAdaptor->list_gene_ids_by_extids( $accession, 'GO' );
882 foreach my $dbID (@ids) { $unique_dbIDs{$dbID} = 1 }
883 }
884
885 my @result = @{
886 $self->fetch_all_by_dbID_list(
887 [ sort { $a <=> $b } keys(%unique_dbIDs) ]
888 ) };
889
890 return \@result;
891 } ## end sub fetch_all_by_GOTerm
892
893 =head2 fetch_all_by_GOTerm_accession
894
895 Arg [1] : String
896 The GO term accession for which genes should be
897 fetched.
898
899 Example :
900
901 @genes =
902 @{ $gene_adaptor->fetch_all_by_GOTerm_accession(
903 'GO:0030326') };
904
905 Description : Retrieves a list of genes that are associated with
906 the given GO term, or with any of its descendent
907 GO terms. The genes returned are in their native
908 coordinate system, i.e. in the coordinate system
909 in which they are stored in the database. If
910 another coordinate system is required then the
911 Gene::transfer or Gene::transform method can be
912 used.
913
914 Return type : listref of Bio::EnsEMBL::Gene
915 Exceptions : Throws of argument is not a GO term accession
916 Caller : general
917 Status : Stable
918
919 =cut
920
921 sub fetch_all_by_GOTerm_accession {
922 my ( $self, $accession ) = @_;
923
924 if ( $accession !~ /^GO:/ ) {
925 throw('Argument is not a GO term accession');
926 }
927
928 my $goAdaptor =
929 Bio::EnsEMBL::Registry->get_adaptor( 'Multi', 'Ontology',
930 'OntologyTerm' );
931
932 my $term = $goAdaptor->fetch_by_accession($accession);
933
934 return $self->fetch_all_by_GOTerm($term);
935 }
936
937 =head2 fetch_all_alt_alleles
938
939 Arg [1] : Bio::EnsEMBL::Gene $gene
940 The gene to fetch alternative alleles for
941 Example : my @alt_genes = @{ $gene_adaptor->fetch_all_alt_alleles($gene) };
942 foreach my $alt_gene (@alt_genes) {
943 print "Alternate allele: " . $alt_gene->stable_id() . "\n" ;
944 }
945 Description: Retrieves genes which are alternate alleles to a provided gene.
946 Alternate alleles in Ensembl are genes which are similar and are
947 on an alternative haplotype of the same region. There are not
948 currently very many of these. This method will return a
949 reference to an empty list if no alternative alleles are found.
950 Returntype : listref of Bio::EnsEMBL::Genes
951 Exceptions : throw if incorrect arg provided
952 warning if gene arg does not have dbID
953 Caller : Gene::get_all_alt_alleles
954 Status : Stable
955
956 =cut
957
958 sub fetch_all_alt_alleles {
959 my $self = shift;
960 my $gene = shift;
961
962 if(!ref($gene) || !$gene->isa('Bio::EnsEMBL::Gene')) {
963 throw('Bio::EnsEMBL::Gene argument is required');
964 }
965
966 my $gene_id = $gene->dbID();
967
968 if(!$gene_id) {
969 warning('Cannot retrieve alternate alleles for gene without dbID');
970 return [];
971 }
972
973 my $sth = $self->prepare("SELECT aa1.gene_id " .
974 "FROM alt_allele aa1, alt_allele aa2 " .
975 "WHERE aa1.alt_allele_id = aa2.alt_allele_id " .
976 "AND aa2.gene_id = ? " .
977 "AND aa1.gene_id <> ?");
978
979 $sth->bind_param(1, $gene_id, SQL_INTEGER);
980 $sth->bind_param(2, $gene_id, SQL_INTEGER);
981 $sth->execute();
982
983 my @alt_ids;
984 my $row;
985 while($row = $sth->fetchrow_arrayref()) {
986 push @alt_ids, $row->[0];
987 }
988 $sth->finish();
989
990 if (@alt_ids) {
991 return $self->fetch_all_by_dbID_list(\@alt_ids);
992 }
993
994 return [];
995 }
996
997 sub is_ref{
998 my ( $self, $gene_id) = @_;
999 my $is_not_ref;
1000
1001 # easier to find if it is not an alt_Allele do this and then negate it.
1002 my $sth = $self->prepare("select count(1) from alt_allele where gene_id = $gene_id and is_ref = 0");
1003 $sth->execute();
1004 $sth->bind_columns(\$is_not_ref);
1005 $sth->fetch;
1006
1007 if(defined($is_not_ref) and $is_not_ref){
1008 return 0;
1009 }
1010
1011 return 1;
1012 }
1013
1014
1015 =head2 store_alt_alleles
1016
1017
1018 Arg [1] : reference to list of Bio::EnsEMBL::Genes $genes
1019 Example : $gene_adaptor->store_alt_alleles([$gene1, $gene2, $gene3]);
1020 Description: This method creates a group of alternative alleles (i.e. locus)
1021 from a set of genes. The genes should be genes from alternate
1022 haplotypes which are similar. The genes must already be stored
1023 in this database.
1024 Returntype : int alt_allele_id or undef if no alt_alleles were stored
1025 Exceptions : throw on incorrect arguments
1026 throw on sql error (e.g. duplicate unique id)
1027 Caller : general
1028 Status : Stable
1029
1030 =cut
1031
1032 sub store_alt_alleles {
1033 my $self = shift;
1034 my $genes = shift;
1035
1036 if(!ref($genes) eq 'ARRAY') {
1037 throw('List reference of Bio::EnsEMBL::Gene argument expected.');
1038 }
1039
1040 my @genes = @$genes;
1041 my $num_genes = scalar(@genes);
1042
1043 if($num_genes < 2) {
1044 warning('At least 2 genes must be provided to construct alternative alleles (gene id: '. $genes[0]->dbID() .'). Ignoring.');
1045 return;
1046 }
1047
1048 my @is_ref;
1049 my @ref_genes = ();
1050 my @non_ref_genes = ();
1051 my @gene_ids = ();
1052
1053 foreach my $gene (@genes) {
1054
1055 if(!ref($gene) || !$gene->isa('Bio::EnsEMBL::Gene')) {
1056 throw('List reference of Bio::EnsEMBL::Gene argument expected.');
1057 }
1058
1059 my $gene_id = $gene->dbID();
1060
1061 if (!$gene_id) {
1062 throw('Genes must have dbIDs in order to construct alternative alleles.');
1063 } else {
1064 push @gene_ids, $gene_id;
1065 }
1066
1067 my $is_ref = $gene->slice->is_reference();
1068
1069 push @is_ref, $is_ref;
1070
1071 if ($is_ref) {
1072 push @ref_genes, $gene->dbID();
1073 } else {
1074 push @non_ref_genes, $gene->dbID();
1075 }
1076 }
1077 if (scalar(@ref_genes) > 1) {
1078 warning('More than one alternative allele on the reference sequence (gene ids: ' . join(',',@ref_genes) . '). Ignoring.');
1079 return;
1080 }
1081
1082 #
1083 #insert the first gene seperately in order to get a unique identifier for
1084 #the set of alleles
1085 #
1086
1087 my $sth = $self->prepare("INSERT INTO alt_allele (gene_id, is_ref) VALUES (?,?)");
1088 $sth->bind_param(1, $gene_ids[0], SQL_INTEGER);
1089 $sth->bind_param(2, $is_ref[0], SQL_INTEGER);
1090 eval {
1091 $sth->execute();
1092 };
1093 my $alt_allele_id = $sth->{'mysql_insertid'};
1094
1095 if (!$alt_allele_id || $@) {
1096 throw("An SQL error occured inserting alternative alleles:\n$@");
1097 }
1098 $sth->finish();
1099 #
1100 # Insert all subsequent alt alleles using the alt_allele identifier
1101 # from the first insert
1102 #
1103
1104 $sth = $self->prepare("INSERT INTO alt_allele (alt_allele_id, gene_id, is_ref) " .
1105 "VALUES (?,?,?)");
1106
1107 for (my $i = 1; $i < $num_genes; $i++) {
1108
1109 $sth->bind_param(1, $alt_allele_id, SQL_INTEGER);
1110 $sth->bind_param(2, $gene_ids[$i], SQL_INTEGER);
1111 $sth->bind_param(3, $is_ref[$i], SQL_INTEGER);
1112 eval {
1113 $sth->execute();
1114 };
1115
1116 if ($@) {
1117 # an error occured, revert the db to the previous state
1118 $sth = $self->prepare("DELETE FROM alt_allele WHERE alt_allele_id = ?");
1119 $sth->bind_param(1, $alt_allele_id, SQL_INTEGER);
1120 $sth->execute();
1121 $sth->finish();
1122 throw("An SQL error occured inserting alternative alleles:\n$@");
1123 }
1124 }
1125
1126 $sth->finish();
1127
1128 return $alt_allele_id;
1129 }
1130
1131
1132 =head2 store
1133
1134 Arg [1] : Bio::EnsEMBL::Gene $gene
1135 The gene to store in the database
1136 Arg [2] : ignore_release in xrefs [default 1] set to 0 to use release info
1137 in external database references
1138 Example : $gene_adaptor->store($gene);
1139 Description: Stores a gene in the database.
1140 Returntype : the database identifier (dbID) of the newly stored gene
1141 Exceptions : thrown if the $gene is not a Bio::EnsEMBL::Gene or if
1142 $gene does not have an analysis object
1143 Caller : general
1144 Status : Stable
1145
1146 =cut
1147
1148 sub store {
1149 my ($self, $gene, $ignore_release) = @_;
1150
1151 if (!ref $gene || !$gene->isa('Bio::EnsEMBL::Gene') ) {
1152 throw("Must store a gene object, not a $gene");
1153 }
1154 if(!defined($ignore_release)){
1155 $ignore_release = 1;
1156 }
1157 my $db = $self->db();
1158
1159 if ($gene->is_stored($db)) {
1160 return $gene->dbID();
1161 }
1162
1163 # ensure coords are correct before storing
1164 $gene->recalculate_coordinates();
1165
1166 my $analysis = $gene->analysis();
1167 throw("Genes must have an analysis object.") if(!defined($analysis));
1168
1169 my $analysis_id;
1170 if ($analysis->is_stored($db)) {
1171 $analysis_id = $analysis->dbID();
1172 } else {
1173 $analysis_id = $db->get_AnalysisAdaptor->store($analysis);
1174 }
1175
1176 my $type = $gene->biotype || "";
1177
1178 # default to is_current = 1 if this attribute is not set
1179 my $is_current = $gene->is_current;
1180 $is_current = 1 unless (defined($is_current));
1181
1182 my $original = $gene;
1183 my $original_transcripts = $gene->get_all_Transcripts();
1184
1185 my $seq_region_id;
1186
1187 ( $gene, $seq_region_id ) = $self->_pre_store($gene);
1188
1189 my $store_gene_sql = qq(
1190 INSERT INTO gene
1191 SET biotype = ?,
1192 analysis_id = ?,
1193 seq_region_id = ?,
1194 seq_region_start = ?,
1195 seq_region_end = ?,
1196 seq_region_strand = ?,
1197 description = ?,
1198 source = ?,
1199 status = ?,
1200 is_current = ?,
1201 canonical_transcript_id = ?,
1202 canonical_annotation = ?
1203 );
1204
1205 if (defined($gene->stable_id)) {
1206 my $created = $self->db->dbc->from_seconds_to_date($gene->created_date());
1207 my $modified = $self->db->dbc->from_seconds_to_date($gene->modified_date());
1208 $store_gene_sql .= ", stable_id = ?, version = ?, created_date = " . $created . " , modified_date = " . $modified;
1209
1210 }
1211
1212 # column status is used from schema version 34 onwards (before it was
1213 # confidence)
1214
1215 my $sth = $self->prepare($store_gene_sql);
1216 $sth->bind_param( 1, $type, SQL_VARCHAR );
1217 $sth->bind_param( 2, $analysis_id, SQL_INTEGER );
1218 $sth->bind_param( 3, $seq_region_id, SQL_INTEGER );
1219 $sth->bind_param( 4, $gene->start(), SQL_INTEGER );
1220 $sth->bind_param( 5, $gene->end(), SQL_INTEGER );
1221 $sth->bind_param( 6, $gene->strand(), SQL_TINYINT );
1222 $sth->bind_param( 7, $gene->description(), SQL_LONGVARCHAR );
1223 $sth->bind_param( 8, $gene->source(), SQL_VARCHAR );
1224 $sth->bind_param( 9, $gene->status(), SQL_VARCHAR );
1225 $sth->bind_param( 10, $is_current, SQL_TINYINT );
1226
1227 # Canonical transcript ID will be updated later.
1228 # Set it to zero for now.
1229 $sth->bind_param( 11, 0, SQL_TINYINT );
1230
1231 $sth->bind_param( 12, $gene->canonical_annotation(), SQL_VARCHAR );
1232
1233 if ( defined($gene->stable_id) ) {
1234
1235 $sth->bind_param( 13, $gene->stable_id, SQL_VARCHAR );
1236 my $version = ($gene->version()) ? $gene->version() : 1;
1237 $sth->bind_param( 14, $version, SQL_INTEGER );
1238 }
1239
1240 $sth->execute();
1241 $sth->finish();
1242
1243 my $gene_dbID = $sth->{'mysql_insertid'};
1244
1245 # store the dbentries associated with this gene
1246 my $dbEntryAdaptor = $db->get_DBEntryAdaptor();
1247
1248 foreach my $dbe ( @{ $gene->get_all_DBEntries } ) {
1249 $dbEntryAdaptor->store( $dbe, $gene_dbID, "Gene", $ignore_release );
1250 }
1251
1252 # We allow transcripts not to share equal exons and instead have
1253 # copies. For the database we still want sharing though, to have
1254 # easier time with stable ids. So we need to have a step to merge
1255 # exons together before store.
1256 my %exons;
1257
1258 foreach my $trans ( @{$gene->get_all_Transcripts} ) {
1259 foreach my $e ( @{$trans->get_all_Exons} ) {
1260 my $key = $e->hashkey();
1261 if( exists $exons{ $key } ) {
1262 $trans->swap_exons( $e, $exons{$key} );
1263 } else {
1264 $exons{$key} = $e;
1265 }
1266 }
1267 }
1268
1269 my $transcript_adaptor = $db->get_TranscriptAdaptor();
1270
1271 my $transcripts = $gene->get_all_Transcripts();
1272
1273 my $new_canonical_transcript_id;
1274 for ( my $i = 0; $i < @$transcripts; $i++ ) {
1275 my $new = $transcripts->[$i];
1276 my $old = $original_transcripts->[$i];
1277
1278 $transcript_adaptor->store( $new, $gene_dbID, $analysis_id );
1279
1280 if ( !defined($new_canonical_transcript_id)
1281 && $new->is_canonical() )
1282 {
1283 $new_canonical_transcript_id = $new->dbID();
1284 }
1285
1286 # update the original transcripts since we may have made copies of
1287 # them by transforming the gene
1288 $old->dbID( $new->dbID() );
1289 $old->adaptor( $new->adaptor() );
1290
1291 if ( $new->translation ) {
1292 $old->translation->dbID( $new->translation()->dbID );
1293 $old->translation->adaptor( $new->translation()->adaptor );
1294 }
1295 }
1296
1297 if ( defined($new_canonical_transcript_id) ) {
1298 # Now the canonical transcript has been stored, so update the
1299 # canonical_transcript_id of this gene with the new dbID.
1300 my $sth = $self->prepare(
1301 q(
1302 UPDATE gene
1303 SET canonical_transcript_id = ?
1304 WHERE gene_id = ?)
1305 );
1306
1307 $sth->bind_param( 1, $new_canonical_transcript_id, SQL_INTEGER );
1308 $sth->bind_param( 2, $gene_dbID, SQL_INTEGER );
1309
1310 $sth->execute();
1311 $sth->finish();
1312 }
1313
1314 # update gene to point to display xref if it is set
1315 if(my $display_xref = $gene->display_xref) {
1316 my $dxref_id;
1317 if($display_xref->is_stored($db)) {
1318 $dxref_id = $display_xref->dbID();
1319 } else {
1320 $dxref_id = $dbEntryAdaptor->exists($display_xref);
1321 }
1322
1323 if(defined($dxref_id)) {
1324 my $sth = $self->prepare
1325 ("UPDATE gene SET display_xref_id = ? WHERE gene_id = ?");
1326 $sth->bind_param(1, $dxref_id, SQL_INTEGER);
1327 $sth->bind_param(2, $gene_dbID, SQL_INTEGER);
1328 $sth->execute();
1329 $sth->finish();
1330 $display_xref->dbID($dxref_id);
1331 $display_xref->adaptor($dbEntryAdaptor);
1332 $display_xref->dbID($dxref_id);
1333 $display_xref->adaptor($dbEntryAdaptor);
1334 } else {
1335 warning("Display_xref ".$display_xref->dbname().":".
1336 $display_xref->display_id() . " is not stored in database.\n".
1337 "Not storing relationship to this gene.");
1338 $display_xref->dbID(undef);
1339 $display_xref->adaptor(undef);
1340 }
1341 }
1342
1343 # store gene attributes if there are any
1344 my $attr_adaptor = $db->get_AttributeAdaptor();
1345 $attr_adaptor->store_on_Gene($gene_dbID, $gene->get_all_Attributes);
1346
1347 # store unconventional transcript associations if there are any
1348 my $utaa = $db->get_UnconventionalTranscriptAssociationAdaptor();
1349 foreach my $uta (@{$gene->get_all_unconventional_transcript_associations()}) {
1350 $utaa->store($uta);
1351 }
1352
1353 # set the adaptor and dbID on the original passed in gene not the
1354 # transfered copy
1355 $original->adaptor($self);
1356 $original->dbID($gene_dbID);
1357
1358 return $gene_dbID;
1359 }
1360
1361
1362 =head2 remove
1363
1364 Arg [1] : Bio::EnsEMBL::Gene $gene
1365 the gene to remove from the database
1366 Example : $gene_adaptor->remove($gene);
1367 Description: Removes a gene completely from the database. All associated
1368 transcripts, exons, stable_identifiers, descriptions, etc.
1369 are removed as well. Use with caution!
1370 Returntype : none
1371 Exceptions : throw on incorrect arguments
1372 warning if gene is not stored in this database
1373 Caller : general
1374 Status : Stable
1375
1376 =cut
1377
1378 sub remove {
1379 my $self = shift;
1380 my $gene = shift;
1381
1382 if (!ref($gene) || !$gene->isa('Bio::EnsEMBL::Gene')) {
1383 throw("Bio::EnsEMBL::Gene argument expected.");
1384 }
1385
1386 if ( !$gene->is_stored($self->db()) ) {
1387 warning("Cannot remove gene " . $gene->dbID() . ". Is not stored in " .
1388 "this database.");
1389 return;
1390 }
1391
1392 # remove all object xrefs associated with this gene
1393
1394 my $dbe_adaptor = $self->db()->get_DBEntryAdaptor();
1395 foreach my $dbe (@{$gene->get_all_DBEntries()}) {
1396 $dbe_adaptor->remove_from_object($dbe, $gene, 'Gene');
1397 }
1398
1399 # remove all alternative allele entries associated with this gene
1400 my $sth = $self->prepare("DELETE FROM alt_allele WHERE gene_id = ?");
1401 $sth->bind_param( 1, $gene->dbID, SQL_INTEGER );
1402 $sth->execute();
1403 $sth->finish();
1404
1405 # remove the attributes associated with this transcript
1406 my $attrib_adaptor = $self->db->get_AttributeAdaptor;
1407 $attrib_adaptor->remove_from_Gene($gene);
1408
1409 # remove all of the transcripts associated with this gene
1410 my $transcriptAdaptor = $self->db->get_TranscriptAdaptor();
1411 foreach my $trans ( @{$gene->get_all_Transcripts()} ) {
1412 $transcriptAdaptor->remove($trans);
1413 }
1414
1415 # remove any unconventional transcript associations involving this gene
1416
1417 $sth =
1418 $self->prepare( "DELETE FROM unconventional_transcript_association "
1419 . "WHERE gene_id = ? " );
1420 $sth->bind_param( 1, $gene->dbID, SQL_INTEGER );
1421 $sth->execute();
1422 $sth->finish();
1423
1424 # remove this gene from the database
1425
1426 $sth = $self->prepare("DELETE FROM gene WHERE gene_id = ? ");
1427 $sth->bind_param( 1, $gene->dbID, SQL_INTEGER );
1428 $sth->execute();
1429 $sth->finish();
1430
1431 # unset the gene identifier and adaptor thereby flagging it as unstored
1432
1433 $gene->dbID(undef);
1434 $gene->adaptor(undef);
1435
1436 return;
1437 }
1438
1439
1440 =head2 get_Interpro_by_geneid
1441
1442 Arg [1] : String $gene_stable_id
1443 The stable ID of the gene to obtain
1444 Example : @i = @{
1445 $gene_adaptor->get_Interpro_by_geneid(
1446 $gene->stable_id() ) };
1447 Description: Gets interpro accession numbers by gene stable id. A hack really
1448 - we should have a much more structured system than this.
1449 Returntype : listref of strings (Interpro_acc:description)
1450 Exceptions : none
1451 Caller : domainview
1452 Status : Stable
1453
1454 =cut
1455
1456 sub get_Interpro_by_geneid {
1457 my ($self, $gene_stable_id) = @_;
1458
1459 my $sql = qq(
1460 SELECT i.interpro_ac,
1461 x.description
1462 FROM transcript t,
1463 translation tl,
1464 protein_feature pf,
1465 interpro i,
1466 xref x,
1467 gene g
1468 WHERE g.stable_id = ?
1469 AND t.gene_id = g.gene_id
1470 AND t.is_current = 1
1471 AND tl.transcript_id = t.transcript_id
1472 AND tl.translation_id = pf.translation_id
1473 AND i.id = pf.hit_name
1474 AND i.interpro_ac = x.dbprimary_acc);
1475
1476 my $sth = $self->prepare($sql);
1477
1478 $sth->bind_param( 1, $gene_stable_id, SQL_VARCHAR );
1479
1480 $sth->execute;
1481
1482 my @out;
1483 my %h;
1484 while( (my $arr = $sth->fetchrow_arrayref()) ) {
1485 if( $h{$arr->[0]} ) { next; }
1486 $h{$arr->[0]}=1;
1487 my $string = $arr->[0] .":".$arr->[1];
1488 push(@out,$string);
1489 }
1490
1491 return \@out;
1492 }
1493
1494
1495 =head2 update
1496
1497 Arg [1] : Bio::EnsEMBL::Gene $gene
1498 The gene to update
1499 Example : $gene_adaptor->update($gene);
1500 Description: Updates the type, analysis, display_xref, status, is_current and
1501 description of a gene in the database.
1502 Returntype : None
1503 Exceptions : thrown if the $gene is not a Bio::EnsEMBL::Gene
1504 Caller : general
1505 Status : Stable
1506
1507 =cut
1508
1509 sub update {
1510 my ($self, $gene) = @_;
1511 my $update = 0;
1512
1513 if ( !defined $gene || !ref $gene || !$gene->isa('Bio::EnsEMBL::Gene') ) {
1514 throw("Must update a gene object, not a $gene");
1515 }
1516
1517 my $update_gene_sql = qq(
1518 UPDATE gene
1519 SET biotype = ?,
1520 analysis_id = ?,
1521 display_xref_id = ?,
1522 status = ?,
1523 description = ?,
1524 is_current = ?,
1525 canonical_transcript_id = ?,
1526 canonical_annotation = ?
1527 WHERE gene_id = ?
1528 );
1529
1530 my $display_xref = $gene->display_xref();
1531 my $display_xref_id;
1532
1533 if ( $display_xref && $display_xref->dbID() ) {
1534 $display_xref_id = $display_xref->dbID();
1535 } else {
1536 $display_xref_id = undef;
1537 }
1538
1539 my $sth = $self->prepare( $update_gene_sql );
1540
1541 $sth->bind_param( 1, $gene->biotype(), SQL_VARCHAR );
1542 $sth->bind_param( 2, $gene->analysis->dbID(), SQL_INTEGER );
1543 $sth->bind_param( 3, $display_xref_id, SQL_INTEGER );
1544 $sth->bind_param( 4, $gene->status(), SQL_VARCHAR );
1545 $sth->bind_param( 5, $gene->description(), SQL_VARCHAR );
1546 $sth->bind_param( 6, $gene->is_current(), SQL_TINYINT );
1547
1548 if ( defined( $gene->canonical_transcript() ) ) {
1549 $sth->bind_param( 7, $gene->canonical_transcript()->dbID(),
1550 SQL_INTEGER );
1551 } else {
1552 $sth->bind_param( 7, 0, SQL_INTEGER );
1553 }
1554
1555 $sth->bind_param( 8, $gene->canonical_annotation(), SQL_VARCHAR );
1556 $sth->bind_param( 9, $gene->dbID(), SQL_INTEGER );
1557
1558 $sth->execute();
1559
1560 # maybe should update stable id ???
1561 }
1562
1563
1564 # _objs_from_sth
1565
1566 # Arg [1] : StatementHandle $sth
1567 # Arg [2] : Bio::EnsEMBL::AssemblyMapper $mapper
1568 # Arg [3] : Bio::EnsEMBL::Slice $dest_slice
1569 # Description: PROTECTED implementation of abstract superclass method.
1570 # responsible for the creation of Genes
1571 # Returntype : listref of Bio::EnsEMBL::Genes in target coordinate system
1572 # Exceptions : none
1573 # Caller : internal
1574 # Status : Stable
1575
1576 sub _objs_from_sth {
1577 my ($self, $sth, $mapper, $dest_slice) = @_;
1578
1579 #
1580 # This code is ugly because an attempt has been made to remove as many
1581 # function calls as possible for speed purposes. Thus many caches and
1582 # a fair bit of gymnastics is used.
1583 #
1584
1585 my $sa = $self->db()->get_SliceAdaptor();
1586 my $aa = $self->db()->get_AnalysisAdaptor();
1587 my $dbEntryAdaptor = $self->db()->get_DBEntryAdaptor();
1588
1589 my @genes;
1590 my %analysis_hash;
1591 my %slice_hash;
1592 my %sr_name_hash;
1593 my %sr_cs_hash;
1594
1595 my (
1596 $gene_id, $seq_region_id,
1597 $seq_region_start, $seq_region_end,
1598 $seq_region_strand, $analysis_id,
1599 $biotype, $display_xref_id,
1600 $gene_description, $status,
1601 $source, $is_current,
1602 $canonical_transcript_id, $canonical_annotation,
1603 $stable_id, $version,
1604 $created_date, $modified_date,
1605 $xref_display_id, $xref_primary_acc,
1606 $xref_desc, $xref_version,
1607 $external_db, $external_status,
1608 $external_release, $external_db_name,
1609 $info_type, $info_text
1610 );
1611
1612 $sth->bind_columns(
1613 \(
1614 $gene_id, $seq_region_id,
1615 $seq_region_start, $seq_region_end,
1616 $seq_region_strand, $analysis_id,
1617 $biotype, $display_xref_id,
1618 $gene_description, $status,
1619 $source, $is_current,
1620 $canonical_transcript_id, $canonical_annotation,
1621 $stable_id, $version,
1622 $created_date, $modified_date,
1623 $xref_display_id, $xref_primary_acc,
1624 $xref_desc, $xref_version,
1625 $external_db, $external_status,
1626 $external_release, $external_db_name,
1627 $info_type, $info_text
1628 ) );
1629
1630 my $asm_cs;
1631 my $cmp_cs;
1632 my $asm_cs_vers;
1633 my $asm_cs_name;
1634 my $cmp_cs_vers;
1635 my $cmp_cs_name;
1636
1637 if($mapper) {
1638 $asm_cs = $mapper->assembled_CoordSystem();
1639 $cmp_cs = $mapper->component_CoordSystem();
1640 $asm_cs_name = $asm_cs->name();
1641 $asm_cs_vers = $asm_cs->version();
1642 $cmp_cs_name = $cmp_cs->name();
1643 $cmp_cs_vers = $cmp_cs->version();
1644 }
1645
1646 my $dest_slice_start;
1647 my $dest_slice_end;
1648 my $dest_slice_strand;
1649 my $dest_slice_length;
1650 my $dest_slice_sr_name;
1651 my $dest_slice_sr_id;
1652
1653 if($dest_slice) {
1654 $dest_slice_start = $dest_slice->start();
1655 $dest_slice_end = $dest_slice->end();
1656 $dest_slice_strand = $dest_slice->strand();
1657 $dest_slice_length = $dest_slice->length();
1658 $dest_slice_sr_name = $dest_slice->seq_region_name();
1659 $dest_slice_sr_id = $dest_slice->get_seq_region_id();
1660 }
1661
1662 FEATURE: while($sth->fetch()) {
1663 #get the analysis object
1664 my $analysis = $analysis_hash{$analysis_id} ||=
1665 $aa->fetch_by_dbID($analysis_id);
1666
1667 #need to get the internal_seq_region, if present
1668 $seq_region_id = $self->get_seq_region_id_internal($seq_region_id);
1669 my $slice = $slice_hash{"ID:".$seq_region_id};
1670
1671 if(!$slice) {
1672 $slice = $sa->fetch_by_seq_region_id($seq_region_id);
1673 $slice_hash{"ID:".$seq_region_id} = $slice;
1674 $sr_name_hash{$seq_region_id} = $slice->seq_region_name();
1675 $sr_cs_hash{$seq_region_id} = $slice->coord_system();
1676 }
1677
1678 my $sr_name = $sr_name_hash{$seq_region_id};
1679 my $sr_cs = $sr_cs_hash{$seq_region_id};
1680
1681 #
1682 # remap the feature coordinates to another coord system
1683 # if a mapper was provided
1684 #
1685 if($mapper) {
1686
1687 if (defined $dest_slice && $mapper->isa('Bio::EnsEMBL::ChainedAssemblyMapper') ) {
1688 ( $seq_region_id, $seq_region_start,
1689 $seq_region_end, $seq_region_strand )
1690 =
1691 $mapper->map( $sr_name, $seq_region_start, $seq_region_end,
1692 $seq_region_strand, $sr_cs, 1, $dest_slice);
1693
1694 } else {
1695
1696 ( $seq_region_id, $seq_region_start,
1697 $seq_region_end, $seq_region_strand )
1698 =
1699 $mapper->fastmap( $sr_name, $seq_region_start, $seq_region_end,
1700 $seq_region_strand, $sr_cs );
1701 }
1702
1703 #skip features that map to gaps or coord system boundaries
1704 next FEATURE if(!defined($seq_region_id));
1705
1706 #get a slice in the coord system we just mapped to
1707 # if($asm_cs == $sr_cs || ($cmp_cs != $sr_cs && $asm_cs->equals($sr_cs))) {
1708 $slice = $slice_hash{"ID:".$seq_region_id} ||=
1709 $sa->fetch_by_seq_region_id($seq_region_id);
1710 # } else {
1711 # $slice = $slice_hash{"NAME:$sr_name:$asm_cs_name:$asm_cs_vers"} ||=
1712 # $sa->fetch_by_region($asm_cs_name, $sr_name, undef, undef, undef,
1713 # $asm_cs_vers);
1714 # }
1715 }
1716
1717 #
1718 # If a destination slice was provided convert the coords.
1719 #
1720 if ( defined($dest_slice) ) {
1721 if ( $dest_slice_strand == 1 ) {
1722 # Positive strand.
1723
1724 $seq_region_start = $seq_region_start - $dest_slice_start + 1;
1725 $seq_region_end = $seq_region_end - $dest_slice_start + 1;
1726
1727 if ( $dest_slice->is_circular() ) {
1728 # Handle cicular chromosomes.
1729
1730 if ( $seq_region_start > $seq_region_end ) {
1731 # Looking at a feature overlapping the chromsome origin.
1732
1733 if ( $seq_region_end > $dest_slice_start ) {
1734 # Looking at the region in the beginning of the
1735 # chromosome.
1736 $seq_region_start -= $dest_slice->seq_region_length();
1737 }
1738
1739 if ( $seq_region_end < 0 ) {
1740 $seq_region_end += $dest_slice->seq_region_length();
1741 }
1742
1743 } else {
1744
1745 if ( $dest_slice_start > $dest_slice_end
1746 && $seq_region_end < 0 )
1747 {
1748 # Looking at the region overlapping the chromosome
1749 # origin and a feature which is at the beginning of the
1750 # chromosome.
1751 $seq_region_start += $dest_slice->seq_region_length();
1752 $seq_region_end += $dest_slice->seq_region_length();
1753 }
1754 }
1755
1756 } ## end if ( $dest_slice->is_circular...)
1757
1758 } else {
1759 # Negative strand.
1760
1761 if ( $dest_slice->is_circular()
1762 && $seq_region_start > $seq_region_end )
1763 {
1764 # Handle cicular chromosomes.
1765
1766 if ( $seq_region_end > $dest_slice_start ) {
1767 # Looking at the region in the beginning of the
1768 # chromosome.
1769 $seq_region_start = $dest_slice_end - $seq_region_end + 1;
1770 $seq_region_end =
1771 $seq_region_end -
1772 $dest_slice->seq_region_length -
1773 $dest_slice_start + 1;
1774 } else {
1775 my $tmp_seq_region_start = $seq_region_start;
1776 $seq_region_start =
1777 $dest_slice_end -
1778 $seq_region_end -
1779 $dest_slice->seq_region_length + 1;
1780 $seq_region_end =
1781 $dest_slice_end - $tmp_seq_region_start + 1;
1782 }
1783
1784 } else {
1785 # Non-circular chromosome.
1786
1787 my $tmp_seq_region_start = $seq_region_start;
1788 $seq_region_start = $dest_slice_end - $seq_region_end + 1;
1789 $seq_region_end = $dest_slice_end - $tmp_seq_region_start + 1;
1790 }
1791
1792 $seq_region_strand = -$seq_region_strand;
1793
1794 } ## end else [ if ( $dest_slice_strand...)]
1795
1796 # Throw away features off the end of the requested slice or on
1797 # different seq_region.
1798
1799 if ( $seq_region_end < 1
1800 || $seq_region_start > $dest_slice_length
1801 || ( $dest_slice_sr_id ne $seq_region_id ) )
1802 {
1803 next FEATURE;
1804 }
1805
1806 $slice = $dest_slice;
1807 } ## end if ( defined($dest_slice...))
1808
1809 my $display_xref;
1810
1811 if ($display_xref_id) {
1812 $display_xref = Bio::EnsEMBL::DBEntry->new_fast( {
1813 'dbID' => $display_xref_id,
1814 'adaptor' => $dbEntryAdaptor,
1815 'display_id' => $xref_display_id,
1816 'primary_id' => $xref_primary_acc,
1817 'version' => $xref_version,
1818 'description' => $xref_desc,
1819 'release' => $external_release,
1820 'dbname' => $external_db,
1821 'db_display_name' => $external_db_name,
1822 'info_type' => $info_type,
1823 'info_text' => $info_text
1824 } );
1825 $display_xref->status($external_status);
1826 }
1827
1828 # Finally, create the new Gene.
1829 push(
1830 @genes,
1831 $self->_create_feature_fast(
1832 'Bio::EnsEMBL::Gene',
1833 {
1834 'analysis' => $analysis,
1835 'biotype' => $biotype,
1836 'start' => $seq_region_start,
1837 'end' => $seq_region_end,
1838 'strand' => $seq_region_strand,
1839 'adaptor' => $self,
1840 'slice' => $slice,
1841 'dbID' => $gene_id,
1842 'stable_id' => $stable_id,
1843 'version' => $version,
1844 'created_date' => $created_date || undef,
1845 'modified_date' => $modified_date || undef,
1846 'description' => $gene_description,
1847 'external_name' => undef, # will use display_id
1848 # from display_xref
1849 'external_db' => $external_db,
1850 'external_status' => $external_status,
1851 'display_xref' => $display_xref,
1852 'status' => $status,
1853 'source' => $source,
1854 'is_current' => $is_current,
1855 'canonical_transcript_id' => $canonical_transcript_id,
1856 'canonical_annotation' => $canonical_annotation
1857 } ) );
1858
1859 }
1860
1861 return \@genes;
1862 }
1863
1864
1865 =head2 cache_gene_seq_mappings
1866
1867 Example : $gene_adaptor->cache_gene_seq_mappings();
1868 Description: caches all the assembly mappings needed for genes
1869 Returntype : None
1870 Exceptions : None
1871 Caller : general
1872 Status : At Risk
1873 : New experimental code
1874
1875 =cut
1876
1877 sub cache_gene_seq_mappings {
1878 my ($self) = @_;
1879
1880 # get the sequence level to map too
1881
1882 my $sql =
1883 'SELECT name '
1884 . 'FROM coord_system '
1885 . 'WHERE attrib like "%%sequence_level%%"'
1886 . 'AND species_id = ?';
1887
1888 my $sth = $self->prepare($sql);
1889 $sth->bind_param( 1, $self->species_id(), SQL_INTEGER );
1890 $sth->execute();
1891
1892 my $sequence_level = $sth->fetchrow_array();
1893
1894 $sth->finish();
1895
1896 my $csa = $self->db->get_CoordSystemAdaptor();
1897 my $ama = $self->db->get_AssemblyMapperAdaptor();
1898
1899 my $cs1 = $csa->fetch_by_name($sequence_level);
1900
1901 # get level to map to two
1902
1903 my $mcc = $self->db->get_MetaCoordContainerAdaptor();
1904 my $csnew = $mcc->fetch_all_CoordSystems_by_feature_type('gene');
1905
1906 foreach my $cs2 (@$csnew) {
1907 my $am = $ama->fetch_by_CoordSystems( $cs1, $cs2 );
1908 $am->register_all();
1909 }
1910
1911 } ## end sub cache_gene_seq_mappings
1912
1913
1914 =head2 fetch_all_by_exon_supporting_evidence
1915
1916 Arg [1] : String $hit_name
1917 Name of supporting feature
1918 Arg [2] : String $feature_type
1919 one of "dna_align_feature" or "protein_align_feature"
1920 Arg [3] : (optional) Bio::Ensembl::Analysis
1921 Example : $genes = $gene_adaptor->fetch_all_by_exon_supporting_evidence(
1922 'XYZ', 'dna_align_feature');
1923 Description: Gets all the genes with transcripts with exons which have a
1924 specified hit on a particular type of feature. Optionally filter
1925 by analysis.
1926 Returntype : Listref of Bio::EnsEMBL::Gene
1927 Exceptions : If feature_type is not of correct type.
1928 Caller : general
1929 Status : Stable
1930
1931 =cut
1932
1933 sub fetch_all_by_exon_supporting_evidence {
1934 my ($self, $hit_name, $feature_type, $analysis) = @_;
1935
1936 if ($feature_type !~ /(dna)|(protein)_align_feature/) {
1937 throw("feature type must be dna_align_feature or protein_align_feature");
1938 }
1939
1940 my $anal_from = ", analysis a " if ($analysis);
1941 my $anal_where = "AND a.analysis_id = f.analysis_id AND a.analysis_id=? " if ($analysis);
1942
1943 my $sql = qq(
1944 SELECT DISTINCT(g.gene_id)
1945 FROM gene g,
1946 transcript t,
1947 exon_transcript et,
1948 supporting_feature sf,
1949 $feature_type f
1950 $anal_from
1951 WHERE g.gene_id = t.gene_id
1952 AND g.is_current = 1
1953 AND t.transcript_id = et.transcript_id
1954 AND et.exon_id = sf.exon_id
1955 AND sf.feature_id = f.${feature_type}_id
1956 AND sf.feature_type = ?
1957 AND f.hit_name=?
1958 $anal_where
1959 );
1960
1961 my $sth = $self->prepare($sql);
1962
1963 $sth->bind_param(1, $feature_type, SQL_VARCHAR);
1964 $sth->bind_param(2, $hit_name, SQL_VARCHAR);
1965 $sth->bind_param(3, $analysis->dbID(), SQL_INTEGER) if ($analysis);
1966
1967 $sth->execute();
1968
1969 my @genes;
1970
1971 while ( my $id = $sth->fetchrow_array ) {
1972 my $gene = $self->fetch_by_dbID($id);
1973 push(@genes, $gene) if $gene;
1974 }
1975
1976 return \@genes;
1977 }
1978
1979
1980 =head2 fetch_all_by_transcript_supporting_evidence
1981
1982 Arg [1] : String $hit_name
1983 Name of supporting feature
1984 Arg [2] : String $feature_type
1985 one of "dna_align_feature" or "protein_align_feature"
1986 Arg [3] : (optional) Bio::Ensembl::Analysis
1987 Example : $genes = $gene_adaptor->fetch_all_by_transcript_supporting_evidence('XYZ', 'dna_align_feature');
1988 Description: Gets all the genes with transcripts with evidence for a
1989 specified hit on a particular type of feature. Optionally filter
1990 by analysis.
1991 Returntype : Listref of Bio::EnsEMBL::Gene.
1992 Exceptions : If feature_type is not of correct type.
1993 Caller : general
1994 Status : Stable
1995
1996 =cut
1997
1998 sub fetch_all_by_transcript_supporting_evidence {
1999 my ($self, $hit_name, $feature_type, $analysis) = @_;
2000
2001 if($feature_type !~ /(dna)|(protein)_align_feature/) {
2002 throw("feature type must be dna_align_feature or protein_align_feature");
2003 }
2004
2005 my $anal_from = ", analysis a " if ($analysis);
2006 my $anal_where = "AND a.analysis_id = f.analysis_id AND a.analysis_id=? " if ($analysis);
2007
2008 my $sql = qq(
2009 SELECT DISTINCT(g.gene_id)
2010 FROM gene g,
2011 transcript t,
2012 transcript_supporting_feature sf,
2013 $feature_type f
2014 $anal_from
2015 WHERE g.gene_id = t.gene_id
2016 AND g.is_current = 1
2017 AND t.transcript_id = sf.transcript_id
2018 AND sf.feature_id = f.${feature_type}_id
2019 AND sf.feature_type = ?
2020 AND f.hit_name=?
2021 $anal_where
2022 );
2023
2024 my $sth = $self->prepare($sql);
2025
2026 $sth->bind_param(1, $feature_type, SQL_VARCHAR);
2027 $sth->bind_param(2, $hit_name, SQL_VARCHAR);
2028 $sth->bind_param(3, $analysis->dbID(), SQL_INTEGER) if ($analysis);
2029
2030 $sth->execute();
2031
2032 my @genes;
2033
2034 while( my $id = $sth->fetchrow_array ) {
2035 my $gene = $self->fetch_by_dbID($id);
2036 push(@genes, $gene) if $gene;
2037 }
2038
2039 return \@genes;
2040 }
2041
2042 =head2 fetch_nearest_Gene_by_Feature
2043
2044 Arg [1] : Feature object
2045 Example : $genes = $gene_adaptor->fetch_nearest_Gene_by_Feature($feat);
2046 Description: Gets the nearest gene to the feature
2047 Returntype : Listref of Bio::EnsEMBL::Gene, EMPTY list if no nearest
2048 Caller : general
2049 Status : UnStable
2050
2051 =cut
2052
2053 sub fetch_nearest_Gene_by_Feature{
2054 my $self = shift;
2055 my $feat = shift;
2056
2057 my $stranded = shift;
2058 my $stream = shift; # 1 up stream -1 downstream
2059 my @genes;
2060
2061
2062 my $strand = $feat->strand;
2063 if(defined($stream) and !$strand){
2064 warn("stream specified but feature has no strand so +ve strand will be used");
2065 $strand = 1;
2066 }
2067 my $min_dist = 999;
2068 my $gene_id = 0;
2069
2070 my $overlapping = $feat->get_overlapping_Genes();
2071
2072 return $overlapping if(defined(@{$overlapping}[0]));
2073
2074 my $seq_region_id = $feat->slice->adaptor->get_seq_region_id($feat->slice);
2075 my $start = ($feat->start + $feat->slice->start) -1;
2076 my $end = ($feat->end + $feat->slice->start) -1;
2077
2078
2079 my @gene_ids;
2080 if(!defined($stream) or $stream == 0){
2081
2082 my $sql1 = "select g.gene_id, (? - g.seq_region_end) as 'dist' from gene g where ";
2083 if($stranded){
2084 $sql1 .= "g.seq_region_strand = ".$strand." and ";
2085 }
2086 $sql1 .= "seq_region_id = ? and g.seq_region_end < ? order by dist limit 10";
2087
2088 #
2089 # MAYBE set the result of prepare to be static in case lots of calls.
2090 #
2091 my $sql1_sth = $self->prepare($sql1) || die "Could not prepare $sql1";
2092 $sql1_sth->execute($start, $seq_region_id, $start) || die "Could not execute sql";
2093 $sql1_sth->bind_columns(\$gene_id, \$min_dist) || die "Could mot bin columns";
2094
2095 my $last_dist = 99999999999999999;
2096 while($sql1_sth->fetch()){
2097 if($min_dist <= $last_dist){
2098 push @gene_ids, $gene_id;
2099 $last_dist = $min_dist;
2100 }
2101 }
2102 $sql1_sth->finish();
2103
2104
2105
2106 my $sql2 = "select g.gene_id, (g.seq_region_start - ?) as 'dist' from gene g where ";
2107 if($stranded){
2108 $sql2 .= "g.seq_region_strand = ".$feat->strand." and ";
2109 }
2110 $sql2 .= "seq_region_id = ? and g.seq_region_start > ? order by dist limit 10";
2111
2112 my $sql2_sth = $self->prepare($sql2) || die "could not prepare $sql2";
2113
2114 my ($tmp_min_dist, $tmp_gene_id);
2115 $sql2_sth->execute($end, $seq_region_id, $end) || die "Could not execute sql";
2116 $sql2_sth->bind_columns(\$tmp_gene_id, \$tmp_min_dist) || die "Could mot bin columns";
2117 my $first =1;
2118 while($sql2_sth->fetch()){
2119 if( $tmp_min_dist <= $last_dist){
2120 if($first){
2121 $first = 0;
2122 if($tmp_min_dist < $last_dist){
2123 @gene_ids = (); #reset
2124 }
2125 }
2126 push @gene_ids, $tmp_gene_id;
2127 $last_dist = $tmp_min_dist;
2128 }
2129 }
2130 $sql2_sth->finish();
2131
2132
2133 }
2134 elsif(($stream*$strand) == 1){
2135 my $sql1 = "select g.gene_id, (? - g.seq_region_end) as 'dist' from gene g where ";
2136 if($stranded){
2137 $sql1 .= "g.seq_region_strand = ".$strand." and ";
2138 }
2139 $sql1 .= "seq_region_id = ? and g.seq_region_end < ? order by dist limit 10";
2140
2141 #
2142 # MAYBE set the result of prepare to be static in case lots of calls.
2143 #
2144 my $sql1_sth = $self->prepare($sql1) || die "Could not prepare $sql1";
2145 $sql1_sth->execute($start, $seq_region_id, $start) || die "Could not execute sql";
2146 $sql1_sth->bind_columns(\$gene_id, \$min_dist) || die "Could mot bin columns";
2147
2148 my $last_dist;
2149 my $first = 1;
2150 while($sql1_sth->fetch()){
2151 if($first){
2152 $first = 0;
2153 }
2154 else{
2155 next if ($min_dist > $last_dist);
2156 }
2157 push @gene_ids, $gene_id;
2158 $last_dist = $min_dist;
2159 }
2160 $sql1_sth->finish();
2161 }
2162 elsif(($stream * $strand) == -1){
2163
2164 my $sql2 = "select g.gene_id, (g.seq_region_start - ?) as 'dist' from gene g where ";
2165 if($stranded){
2166 $sql2 .= "g.seq_region_strand = ".$feat->strand." and ";
2167 }
2168 $sql2 .= "seq_region_id = ? and g.seq_region_start > ? order by dist limit 10";
2169
2170 my $sql2_sth = $self->prepare($sql2) || die "could not prepare $sql2";
2171
2172 my ($tmp_min_dist, $tmp_gene_id);
2173 $sql2_sth->execute($end, $seq_region_id, $end) || die "Could not execute sql";
2174 $sql2_sth->bind_columns(\$tmp_gene_id, \$tmp_min_dist) || die "Could mot bin columns";
2175 my $first =1;
2176 my $last_dist;
2177 while($sql2_sth->fetch()){
2178 if($first){
2179 $first = 0;
2180 }
2181 else{
2182 next if ($tmp_min_dist > $last_dist);
2183 }
2184 push @gene_ids, $tmp_gene_id;
2185 $last_dist = $tmp_min_dist;
2186 }
2187 $sql2_sth->finish();
2188 }
2189 else{
2190 die "Invalid stream or strand must be -1, 0 or 1\n";
2191 }
2192
2193
2194
2195 foreach my $gene_id (@gene_ids){
2196 push @genes, $self->fetch_by_dbID($gene_id);
2197 }
2198 return \@genes;
2199
2200 }
2201
2202 ##########################
2203 # #
2204 # DEPRECATED METHODS #
2205 # #
2206 ##########################
2207
2208
2209 =head2 fetch_by_maximum_DBLink
2210
2211 DEPRECATED - use fetch_all_by_external_name instead
2212
2213 =cut
2214
2215 sub fetch_by_maximum_DBLink {
2216 my ($self, $external_id) = @_;
2217
2218 deprecate( "use fetch_all_by_external_name instead" );
2219
2220 my $genes=$self->fetch_all_by_external_name($external_id);
2221
2222 my $biggest;
2223 my $max = 0;
2224 my $size = scalar(@$genes);
2225 if ($size > 0) {
2226 foreach my $gene (@$genes) {
2227 my $size = scalar(@{$gene->get_all_Exons});
2228 if ($size > $max) {
2229 $biggest = $gene;
2230 $max = $size;
2231 }
2232 }
2233 return $biggest;
2234 }
2235 return;
2236 }
2237
2238
2239 =head2 get_display_xref
2240
2241 DEPRECATED use $gene->display_xref
2242
2243 =cut
2244
2245 sub get_display_xref {
2246 my ($self, $gene) = @_;
2247
2248 deprecate( "display xref should retrieved from Gene object directly" );
2249
2250 if ( !defined $gene ) {
2251 throw("Must call with a Gene object");
2252 }
2253
2254 my $sth = $self->prepare(qq(
2255 SELECT e.db_name,
2256 x.display_label,
2257 x.xref_id
2258 FROM gene g,
2259 xref x,
2260 external_db e
2261 WHERE g.gene_id = ?
2262 AND g.display_xref_id = x.xref_id
2263 AND x.external_db_id = e.external_db_id
2264 ));
2265
2266 $sth->bind_param(1, $gene->dbID, SQL_INTEGER);
2267 $sth->execute();
2268
2269 my ($db_name, $display_label, $xref_id) = $sth->fetchrow_array();
2270 if ( !defined $xref_id ) {
2271 return undef;
2272 }
2273
2274 my $db_entry = Bio::EnsEMBL::DBEntry->new(
2275 -dbid => $xref_id,
2276 -adaptor => $self->db->get_DBEntryAdaptor(),
2277 -dbname => $db_name,
2278 -display_id => $display_label
2279 );
2280
2281 return $db_entry;
2282 }
2283
2284
2285 =head2 get_description
2286
2287 DEPRECATED, use gene->get_description
2288
2289 =cut
2290
2291 sub get_description {
2292 my ($self, $dbID) = @_;
2293
2294 deprecate( "Gene description should be loaded on gene retrieval. Use gene->get_description()" );
2295
2296 if ( !defined $dbID ) {
2297 throw("must call with dbID");
2298 }
2299
2300 my $sth = $self->prepare("SELECT description
2301 FROM gene_description
2302 WHERE gene_id = ?");
2303
2304 $sth->bind_param(1, $dbID, SQL_INTEGER);
2305 $sth->execute();
2306
2307 my @array = $sth->fetchrow_array();
2308 return $array[0];
2309 }
2310
2311
2312 =head2 fetch_by_Peptide_id
2313
2314 DEPRECATED, use fetch_by_translation_stable_id()
2315
2316 =cut
2317
2318 sub fetch_by_Peptide_id {
2319 my ( $self, $translation_stable_id) = @_;
2320
2321 deprecate( "Please use better named fetch_by_translation_stable_id \n".
2322 caller(2) );
2323
2324 $self->fetch_by_translation_stable_id($translation_stable_id);
2325 }
2326
2327
2328 =head2 get_stable_entry_info
2329
2330 DEPRECATED use $gene->stable_id instead
2331
2332 =cut
2333
2334 sub get_stable_entry_info {
2335 my ($self,$gene) = @_;
2336
2337 deprecated("stable id info is loaded on default, no lazy loading necessary");
2338
2339 if ( !defined $gene || !ref $gene || !$gene->isa('Bio::EnsEMBL::Gene') ) {
2340 throw("Needs a gene object, not a $gene");
2341 }
2342
2343 my $created_date = $self->db->dbc->from_date_to_seconds("created_date");
2344 my $modified_date = $self->db->dbc->from_date_to_seconds("modified_date");
2345
2346 my $sth =
2347 $self->prepare( "SELECT stable_id, "
2348 . $created_date . ","
2349 . $modified_date
2350 . ", version FROM gene WHERE gene_id = ?" );
2351
2352 $sth->bind_param(1, $gene->dbID, SQL_INTEGER);
2353 $sth->execute();
2354
2355 my @array = $sth->fetchrow_array();
2356 $gene->{'stable_id'} = $array[0];
2357 $gene->{'created'} = $array[1];
2358 $gene->{'modified'} = $array[2];
2359 $gene->{'version'} = $array[3];
2360
2361 return 1;
2362 }
2363
2364
2365 =head2 fetch_all_by_DBEntry
2366
2367 DEPRECATED - Use fetch_all_by_external_name instead
2368
2369 =cut
2370
2371 sub fetch_all_by_DBEntry {
2372 my $self = shift;
2373
2374 deprecate('Use fetch_all_by_external_name instead.');
2375
2376 return $self->fetch_all_by_external_name(@_);
2377 }
2378
2379
2380 1;
2381
2382