Mercurial > repos > mahtabm > ensembl
comparison variant_effect_predictor/Bio/EnsEMBL/DBSQL/GeneAdaptor.pm @ 0:1f6dce3d34e0
Uploaded
author | mahtabm |
---|---|
date | Thu, 11 Apr 2013 02:01:53 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:1f6dce3d34e0 |
---|---|
1 =head1 LICENSE | |
2 | |
3 Copyright (c) 1999-2012 The European Bioinformatics Institute and | |
4 Genome Research Limited. All rights reserved. | |
5 | |
6 This software is distributed under a modified Apache license. | |
7 For license details, please see | |
8 | |
9 http://www.ensembl.org/info/about/code_licence.html | |
10 | |
11 =head1 CONTACT | |
12 | |
13 Please email comments or questions to the public Ensembl | |
14 developers list at <dev@ensembl.org>. | |
15 | |
16 Questions may also be sent to the Ensembl help desk at | |
17 <helpdesk@ensembl.org>. | |
18 | |
19 =cut | |
20 | |
21 =head1 NAME | |
22 | |
23 Bio::EnsEMBL::DBSQL::GeneAdaptor - Database adaptor for the retrieval and | |
24 storage of Gene objects | |
25 | |
26 =head1 SYNOPSIS | |
27 | |
28 use Bio::EnsEMBL::Registry; | |
29 | |
30 Bio::EnsEMBL::Registry->load_registry_from_db( | |
31 -host => 'ensembldb.ensembl.org', | |
32 -user => 'anonymous', | |
33 ); | |
34 | |
35 $gene_adaptor = | |
36 Bio::EnsEMBL::Registry->get_adaptor( "human", "core", "gene" ); | |
37 | |
38 $gene = $gene_adaptor->fetch_by_dbID(1234); | |
39 | |
40 $gene = $gene_adaptor->fetch_by_stable_id('ENSG00000184129'); | |
41 | |
42 @genes = @{ $gene_adaptor->fetch_all_by_external_name('BRCA2') }; | |
43 | |
44 $slice_adaptor = | |
45 Bio::EnsEMBL::Registry->get_adaptor( "human", "core", "slice" ); | |
46 | |
47 $slice = | |
48 $slice_adaptor->fetch_by_region( 'chromosome', '1', 1, 1000000 ); | |
49 | |
50 @genes = @{ $gene_adaptor->fetch_all_by_Slice($slice) }; | |
51 | |
52 =head1 DESCRIPTION | |
53 | |
54 This is a database aware adaptor for the retrieval and storage of gene | |
55 objects. | |
56 | |
57 =head1 METHODS | |
58 | |
59 =cut | |
60 | |
61 package Bio::EnsEMBL::DBSQL::GeneAdaptor; | |
62 | |
63 use strict; | |
64 | |
65 use Bio::EnsEMBL::Utils::Exception qw( deprecate throw warning ); | |
66 use Bio::EnsEMBL::Utils::Scalar qw( assert_ref ); | |
67 use Bio::EnsEMBL::DBSQL::SliceAdaptor; | |
68 use Bio::EnsEMBL::DBSQL::BaseFeatureAdaptor; | |
69 use Bio::EnsEMBL::DBSQL::DBAdaptor; | |
70 use Bio::EnsEMBL::Gene; | |
71 | |
72 use vars '@ISA'; | |
73 @ISA = qw(Bio::EnsEMBL::DBSQL::BaseFeatureAdaptor); | |
74 | |
75 | |
76 # _tables | |
77 # Arg [1] : none | |
78 # Description: PROTECTED implementation of superclass abstract method. | |
79 # Returns the names, aliases of the tables to use for queries. | |
80 # Returntype : list of listrefs of strings | |
81 # Exceptions : none | |
82 # Caller : internal | |
83 # Status : Stable | |
84 | |
85 sub _tables { | |
86 return ( | |
87 [ 'gene', 'g' ], | |
88 [ 'xref', 'x' ], | |
89 [ 'external_db', 'exdb' ] ); | |
90 } | |
91 | |
92 | |
93 # _columns | |
94 # Arg [1] : none | |
95 # Example : none | |
96 # Description: PROTECTED implementation of superclass abstract method. | |
97 # Returns a list of columns to use for queries. | |
98 # Returntype : list of strings | |
99 # Exceptions : none | |
100 # Caller : internal | |
101 # Status : Stable | |
102 | |
103 sub _columns { | |
104 my ($self) = @_; | |
105 | |
106 my $created_date = | |
107 $self->db()->dbc()->from_date_to_seconds("g.created_date"); | |
108 my $modified_date = | |
109 $self->db()->dbc()->from_date_to_seconds("g.modified_date"); | |
110 | |
111 return ( | |
112 'g.gene_id', 'g.seq_region_id', | |
113 'g.seq_region_start', 'g.seq_region_end', | |
114 'g.seq_region_strand', 'g.analysis_id', | |
115 'g.biotype', 'g.display_xref_id', | |
116 'g.description', 'g.status', | |
117 'g.source', 'g.is_current', | |
118 'g.canonical_transcript_id', 'g.canonical_annotation', | |
119 'g.stable_id', 'g.version', | |
120 $created_date, $modified_date, | |
121 'x.display_label', 'x.dbprimary_acc', | |
122 'x.description', 'x.version', | |
123 'exdb.db_name', 'exdb.status', | |
124 'exdb.db_release', 'exdb.db_display_name', | |
125 'x.info_type', 'x.info_text' | |
126 ); | |
127 } ## end sub _columns | |
128 | |
129 | |
130 sub _left_join { | |
131 return ( | |
132 [ 'xref', "x.xref_id = g.display_xref_id" ], | |
133 [ 'external_db', "exdb.external_db_id = x.external_db_id" ] ); | |
134 } | |
135 | |
136 | |
137 =head2 list_dbIDs | |
138 | |
139 Example : @gene_ids = @{$gene_adaptor->list_dbIDs()}; | |
140 Description: Gets an array of internal ids for all genes in the current db | |
141 Arg[1] : <optional> int. not 0 for the ids to be sorted by the seq_region. | |
142 Returntype : Listref of Ints | |
143 Exceptions : none | |
144 Caller : general | |
145 Status : Stable | |
146 | |
147 =cut | |
148 | |
149 sub list_dbIDs { | |
150 my ($self,$ordered) = @_; | |
151 | |
152 return $self->_list_dbIDs("gene",undef, $ordered); | |
153 } | |
154 | |
155 | |
156 =head2 list_stable_ids | |
157 | |
158 Example : @stable_gene_ids = @{$gene_adaptor->list_stable_ids()}; | |
159 Description: Gets an listref of stable ids for all genes in the current db | |
160 Returntype : reference to a list of strings | |
161 Exceptions : none | |
162 Caller : general | |
163 Status : Stable | |
164 | |
165 =cut | |
166 | |
167 sub list_stable_ids { | |
168 my ($self) = @_; | |
169 | |
170 return $self->_list_dbIDs("gene", "stable_id"); | |
171 } | |
172 | |
173 | |
174 sub list_seq_region_ids { | |
175 my $self = shift; | |
176 | |
177 return $self->_list_seq_region_ids('gene'); | |
178 } | |
179 | |
180 =head2 fetch_by_display_label | |
181 | |
182 Arg [1] : String $label - display label of gene to fetch | |
183 Example : my $gene = $geneAdaptor->fetch_by_display_label("BRCA2"); | |
184 Description: Returns the gene which has the given display label or undef if | |
185 there is none. If there are more than 1, the gene on the | |
186 reference slice is reported or if none are on the reference, | |
187 the first one is reported. | |
188 Returntype : Bio::EnsEMBL::Gene | |
189 Exceptions : none | |
190 Caller : general | |
191 Status : Stable | |
192 | |
193 =cut | |
194 | |
195 sub fetch_by_display_label { | |
196 my $self = shift; | |
197 my $label = shift; | |
198 | |
199 my $constraint = "x.display_label = ? AND g.is_current = 1"; | |
200 $self->bind_param_generic_fetch($label,SQL_VARCHAR); | |
201 my @genes = @{ $self->generic_fetch($constraint) }; | |
202 my $gene; | |
203 if (scalar(@genes) > 1) { | |
204 foreach my $gene_tmp (@genes) { | |
205 if ($gene_tmp->slice->is_reference) { | |
206 $gene = $gene_tmp; | |
207 } | |
208 last if ($gene); | |
209 } | |
210 if (!$gene) { | |
211 $gene = $genes[0]; | |
212 } | |
213 | |
214 } elsif (scalar(@genes) == 1) { | |
215 $gene = $genes[0]; | |
216 } | |
217 | |
218 return $gene; | |
219 } | |
220 | |
221 =head2 fetch_all_by_display_label | |
222 | |
223 Arg [1] : String $label - display label of genes to fetch | |
224 Example : my @genes = @{$geneAdaptor->fetch_all_by_display_label("PPP1R2P1")}; | |
225 Description: Returns all genes which have the given display label or undef if | |
226 there are none. | |
227 Returntype : listref of Bio::EnsEMBL::Gene objects | |
228 Exceptions : none | |
229 Caller : general | |
230 Status : Stable | |
231 | |
232 =cut | |
233 | |
234 sub fetch_all_by_display_label { | |
235 my $self = shift; | |
236 my $label = shift; | |
237 | |
238 my $constraint = "x.display_label = ? AND g.is_current = 1"; | |
239 $self->bind_param_generic_fetch($label,SQL_VARCHAR); | |
240 my $genes = $self->generic_fetch($constraint) ; | |
241 | |
242 return $genes; | |
243 } | |
244 | |
245 =head2 fetch_by_stable_id | |
246 | |
247 Arg [1] : String $id | |
248 The stable ID of the gene to retrieve | |
249 Example : $gene = $gene_adaptor->fetch_by_stable_id('ENSG00000148944'); | |
250 Description: Retrieves a gene object from the database via its stable id. | |
251 The gene will be retrieved in its native coordinate system (i.e. | |
252 in the coordinate system it is stored in the database). It may | |
253 be converted to a different coordinate system through a call to | |
254 transform() or transfer(). If the gene or exon is not found | |
255 undef is returned instead. | |
256 Returntype : Bio::EnsEMBL::Gene or undef | |
257 Exceptions : if we cant get the gene in given coord system | |
258 Caller : general | |
259 Status : Stable | |
260 | |
261 =cut | |
262 | |
263 sub fetch_by_stable_id { | |
264 my ($self, $stable_id) = @_; | |
265 | |
266 my $constraint = "g.stable_id = ? AND g.is_current = 1"; | |
267 $self->bind_param_generic_fetch($stable_id,SQL_VARCHAR); | |
268 my ($gene) = @{ $self->generic_fetch($constraint) }; | |
269 | |
270 return $gene; | |
271 } | |
272 | |
273 | |
274 | |
275 =head2 fetch_all_by_biotype | |
276 | |
277 Arg [1] : String $biotype | |
278 listref of $biotypes | |
279 The biotype of the gene to retrieve. You can have as an argument a reference | |
280 to a list of biotypes | |
281 Example : $gene = $gene_adaptor->fetch_all_by_biotype('protein_coding'); | |
282 $gene = $gene_adaptor->fetch_all_by_biotypes(['protein_coding', 'sRNA', 'miRNA']); | |
283 Description: Retrieves an array reference of gene objects from the database via its biotype or biotypes. | |
284 The genes will be retrieved in its native coordinate system (i.e. | |
285 in the coordinate system it is stored in the database). It may | |
286 be converted to a different coordinate system through a call to | |
287 transform() or transfer(). If the gene or exon is not found | |
288 undef is returned instead. | |
289 Returntype : listref of Bio::EnsEMBL::Gene | |
290 Exceptions : if we cant get the gene in given coord system | |
291 Caller : general | |
292 Status : Stable | |
293 | |
294 =cut | |
295 | |
296 sub fetch_all_by_biotype { | |
297 my ($self, $biotype) = @_; | |
298 | |
299 if (!defined $biotype){ | |
300 throw("Biotype or listref of biotypes expected"); | |
301 } | |
302 my $constraint; | |
303 if (ref($biotype) eq 'ARRAY'){ | |
304 $constraint = "g.biotype IN ("; | |
305 foreach my $b (@{$biotype}){ | |
306 $constraint .= "?,"; | |
307 $self->bind_param_generic_fetch($b,SQL_VARCHAR); | |
308 } | |
309 chop($constraint); #remove last , from expression | |
310 $constraint .= ") and g.is_current = 1"; | |
311 | |
312 } | |
313 else{ | |
314 $constraint = "g.biotype = ? and g.is_current = 1"; | |
315 $self->bind_param_generic_fetch($biotype,SQL_VARCHAR); | |
316 } | |
317 my @genes = @{ $self->generic_fetch($constraint) }; | |
318 return \@genes ; | |
319 } | |
320 | |
321 | |
322 sub fetch_all { | |
323 my ($self) = @_; | |
324 | |
325 my $constraint = 'g.biotype != "LRG_gene" and g.is_current = 1'; | |
326 my @genes = @{ $self->generic_fetch($constraint) }; | |
327 return \@genes ; | |
328 } | |
329 | |
330 | |
331 =head2 fetch_all_versions_by_stable_id | |
332 | |
333 Arg [1] : String $stable_id | |
334 The stable ID of the gene to retrieve | |
335 Example : $gene = $gene_adaptor->fetch_all_versions_by_stable_id | |
336 ('ENSG00000148944'); | |
337 Description : Similar to fetch_by_stable_id, but retrieves all versions of a | |
338 gene stored in the database. | |
339 Returntype : listref of Bio::EnsEMBL::Gene | |
340 Exceptions : if we cant get the gene in given coord system | |
341 Caller : general | |
342 Status : At Risk | |
343 | |
344 =cut | |
345 | |
346 sub fetch_all_versions_by_stable_id { | |
347 my ($self, $stable_id) = @_; | |
348 | |
349 my $constraint = "g.stable_id = ?"; | |
350 $self->bind_param_generic_fetch($stable_id,SQL_VARCHAR); | |
351 return $self->generic_fetch($constraint); | |
352 } | |
353 | |
354 | |
355 =head2 fetch_by_exon_stable_id | |
356 | |
357 Arg [1] : String $id | |
358 The stable id of an exon of the gene to retrieve | |
359 Example : $gene = $gene_adptr->fetch_by_exon_stable_id('ENSE00000148944'); | |
360 Description: Retrieves a gene object from the database via an exon stable id. | |
361 The gene will be retrieved in its native coordinate system (i.e. | |
362 in the coordinate system it is stored in the database). It may | |
363 be converted to a different coordinate system through a call to | |
364 transform() or transfer(). If the gene or exon is not found | |
365 undef is returned instead. | |
366 Returntype : Bio::EnsEMBL::Gene or undef | |
367 Exceptions : none | |
368 Caller : general | |
369 Status : Stable | |
370 | |
371 =cut | |
372 | |
373 sub fetch_by_exon_stable_id { | |
374 my ($self, $stable_id, $version) = @_; | |
375 | |
376 my $sql = qq( | |
377 SELECT t.gene_id | |
378 FROM transcript as t, | |
379 exon_transcript as et, | |
380 exon as e | |
381 WHERE t.transcript_id = et.transcript_id | |
382 AND et.exon_id = e.exon_id | |
383 AND e.stable_id = ? | |
384 AND e.is_current = 1 | |
385 ); | |
386 | |
387 my $sth = $self->prepare($sql); | |
388 $sth->bind_param(1, $stable_id, SQL_VARCHAR); | |
389 $sth->execute(); | |
390 | |
391 my ($dbID) = $sth->fetchrow_array(); | |
392 | |
393 return undef if(!defined($dbID)); | |
394 | |
395 my $gene = $self->fetch_by_dbID($dbID); | |
396 | |
397 return $gene; | |
398 } | |
399 | |
400 | |
401 =head2 fetch_all_by_domain | |
402 | |
403 Arg [1] : String $domain | |
404 The domain to fetch genes from | |
405 Example : my @genes = @{ $gene_adaptor->fetch_all_by_domain($domain) }; | |
406 Description: Retrieves a listref of genes whose translation contain interpro | |
407 domain $domain. The genes are returned in their native coord | |
408 system (i.e. the coord_system they are stored in). If the coord | |
409 system needs to be changed, then tranform or transfer should be | |
410 called on the individual objects returned. | |
411 Returntype : list of Bio::EnsEMBL::Genes | |
412 Exceptions : none | |
413 Caller : domainview | |
414 Status : Stable | |
415 | |
416 =cut | |
417 | |
418 sub fetch_all_by_domain { | |
419 my ($self, $domain) = @_; | |
420 | |
421 throw("domain argument is required") unless ($domain); | |
422 | |
423 my $sth = $self->prepare(qq( | |
424 SELECT tr.gene_id | |
425 FROM interpro i, | |
426 protein_feature pf, | |
427 transcript tr, | |
428 translation tl, | |
429 seq_region sr, | |
430 coord_system cs | |
431 WHERE cs.species_id = ? | |
432 AND cs.coord_system_id = sr.coord_system_id | |
433 AND sr.seq_region_id = tr.seq_region_id | |
434 AND tr.is_current = 1 | |
435 AND tr.transcript_id = tl.transcript_id | |
436 AND tl.translation_id = pf.translation_id | |
437 AND pf.hit_name = i.id | |
438 AND i.interpro_ac = ? | |
439 GROUP BY tr.gene_id)); | |
440 | |
441 $sth->bind_param( 1, $self->species_id(), SQL_VARCHAR ); | |
442 $sth->bind_param( 2, $domain, SQL_VARCHAR ); | |
443 | |
444 $sth->execute(); | |
445 | |
446 my @array = @{$sth->fetchall_arrayref()}; | |
447 $sth->finish(); | |
448 | |
449 my @gene_ids = map {$_->[0]} @array; | |
450 | |
451 return $self->fetch_all_by_dbID_list(\@gene_ids); | |
452 } | |
453 | |
454 | |
455 | |
456 =head2 fetch_all_by_Slice_and_external_dbname_link | |
457 | |
458 Arg [1] : Bio::EnsEMBL::Slice $slice | |
459 The slice to fetch genes on. | |
460 Arg [2] : (optional) string $logic_name | |
461 the logic name of the type of features to obtain | |
462 Arg [3] : (optional) boolean $load_transcripts | |
463 if true, transcripts will be loaded immediately | |
464 rather than lazy loaded later. | |
465 Arg [4] : Name of the external database | |
466 Example : @genes = @{ | |
467 $ga->fetch_all_by_Slice_and_external_dbname_link( | |
468 $slice, undef, undef, "HUGO" ) }; | |
469 Description: Overrides superclass method to optionally load | |
470 transcripts immediately rather than lazy-loading them | |
471 later. This is more efficient when there are a lot | |
472 of genes whose transcripts are going to be used. The | |
473 genes are then filtered to return only those with | |
474 external database links of the type specified | |
475 Returntype : reference to list of genes | |
476 Exceptions : thrown if exon cannot be placed on transcript slice | |
477 Caller : | |
478 Status : Stable | |
479 | |
480 =cut | |
481 | |
482 sub fetch_all_by_Slice_and_external_dbname_link { | |
483 my ( $self, $slice, $logic_name, $load_transcripts, $db_name ) = @_; | |
484 | |
485 # Get the external_db_id(s) from the name. | |
486 my $sth = $self->prepare( | |
487 "SELECT external_db_id FROM external_db WHERE db_name = ?"); | |
488 | |
489 $sth->bind_param( 1, $db_name, SQL_VARCHAR ); | |
490 $sth->execute(); | |
491 | |
492 my $external_db_id; | |
493 $sth->bind_columns( \$external_db_id ); | |
494 | |
495 my @external_db_ids; | |
496 while ( $sth->fetch() ) { | |
497 push( @external_db_ids, $external_db_id ); | |
498 } | |
499 | |
500 if ( scalar(@external_db_ids) == 0 ) { | |
501 warn sprintf( "Could not find external database " | |
502 . "'%s' in the external_db table\n" | |
503 . "Available are:\n", | |
504 $db_name ); | |
505 | |
506 $sth = $self->prepare("SELECT DISTINCT db_name FROM external_db"); | |
507 | |
508 $sth->execute(); | |
509 $sth->bind_columns( \$external_db_id ); | |
510 | |
511 while ( $sth->fetch() ) { | |
512 warn "\t$external_db_id\n"; | |
513 } | |
514 return []; | |
515 } | |
516 | |
517 # Get the gene_ids for those with links. | |
518 my $dbe_adaptor = $self->db()->get_DBEntryAdaptor(); | |
519 | |
520 my %linked_genes; | |
521 foreach $external_db_id (@external_db_ids) { | |
522 my @linked_genes = | |
523 $dbe_adaptor->list_gene_ids_by_external_db_id($external_db_id); | |
524 | |
525 foreach my $gene_id (@linked_genes) { | |
526 $linked_genes{$gene_id} = 1; | |
527 } | |
528 } | |
529 | |
530 # Get all the genes on the slice. | |
531 my $genes = $self->SUPER::fetch_all_by_Slice_constraint( $slice, | |
532 'g.is_current = 1', $logic_name ); | |
533 | |
534 # Create a list of those that are in the gene_ids list. | |
535 my @genes_passed; | |
536 foreach my $gene (@$genes) { | |
537 if ( exists( $linked_genes{ $gene->dbID() } ) ) { | |
538 push( @genes_passed, $gene ); | |
539 } | |
540 } | |
541 | |
542 # Return the list of those that passed. | |
543 return \@genes_passed; | |
544 } ## end sub fetch_all_by_Slice_and_external_dbname_link | |
545 | |
546 =head2 fetch_all_by_Slice | |
547 | |
548 Arg [1] : Bio::EnsEMBL::Slice $slice | |
549 The slice to fetch genes on. | |
550 Arg [2] : (optional) string $logic_name | |
551 the logic name of the type of features to obtain | |
552 Arg [3] : (optional) boolean $load_transcripts | |
553 if true, transcripts will be loaded immediately rather than | |
554 lazy loaded later. | |
555 Arg [4] : (optional) string $source | |
556 the source name of the features to obtain. | |
557 Arg [5] : (optional) string biotype | |
558 the biotype of the features to obtain. | |
559 Example : @genes = @{$gene_adaptor->fetch_all_by_Slice()}; | |
560 Description: Overrides superclass method to optionally load transcripts | |
561 immediately rather than lazy-loading them later. This | |
562 is more efficient when there are a lot of genes whose | |
563 transcripts are going to be used. | |
564 Returntype : reference to list of genes | |
565 Exceptions : thrown if exon cannot be placed on transcript slice | |
566 Caller : Slice::get_all_Genes | |
567 Status : Stable | |
568 | |
569 =cut | |
570 | |
571 sub fetch_all_by_Slice { | |
572 my ( $self, $slice, $logic_name, $load_transcripts, $source, | |
573 $biotype ) = @_; | |
574 | |
575 my $constraint = 'g.is_current = 1'; | |
576 | |
577 if ( defined($source) ) { | |
578 $constraint .= " and g.source = '$source'"; | |
579 } | |
580 if ( defined($biotype) ) { | |
581 $constraint .= " and g.biotype = '$biotype'"; | |
582 } | |
583 | |
584 my $genes = | |
585 $self->SUPER::fetch_all_by_Slice_constraint( $slice, $constraint, | |
586 $logic_name ); | |
587 | |
588 # If there are less than two genes, still do lazy-loading. | |
589 if ( !$load_transcripts || @$genes < 2 ) { | |
590 return $genes; | |
591 } | |
592 | |
593 # Preload all of the transcripts now, instead of lazy loading later, | |
594 # faster than one query per transcript. | |
595 | |
596 # First check if transcripts are already preloaded. | |
597 # FIXME: Should check all transcripts. | |
598 if ( exists( $genes->[0]->{'_transcript_array'} ) ) { | |
599 return $genes; | |
600 } | |
601 | |
602 # Get extent of region spanned by transcripts. | |
603 my ( $min_start, $max_end ); | |
604 foreach my $g (@$genes) { | |
605 if ( !defined($min_start) || $g->seq_region_start() < $min_start ) { | |
606 $min_start = $g->seq_region_start(); | |
607 } | |
608 if ( !defined($max_end) || $g->seq_region_end() > $max_end ) { | |
609 $max_end = $g->seq_region_end(); | |
610 } | |
611 } | |
612 | |
613 my $ext_slice; | |
614 | |
615 if ( $min_start >= $slice->start() && $max_end <= $slice->end() ) { | |
616 $ext_slice = $slice; | |
617 } else { | |
618 my $sa = $self->db()->get_SliceAdaptor(); | |
619 $ext_slice = $sa->fetch_by_region( | |
620 $slice->coord_system->name(), $slice->seq_region_name(), | |
621 $min_start, $max_end, | |
622 $slice->strand(), $slice->coord_system->version() ); | |
623 } | |
624 | |
625 # Associate transcript identifiers with genes. | |
626 | |
627 my %g_hash = map { $_->dbID => $_ } @{$genes}; | |
628 | |
629 my $g_id_str = join( ',', keys(%g_hash) ); | |
630 | |
631 my $sth = | |
632 $self->prepare( "SELECT gene_id, transcript_id " | |
633 . "FROM transcript " | |
634 . "WHERE gene_id IN ($g_id_str)" ); | |
635 | |
636 $sth->execute(); | |
637 | |
638 my ( $g_id, $tr_id ); | |
639 $sth->bind_columns( \( $g_id, $tr_id ) ); | |
640 | |
641 my %tr_g_hash; | |
642 | |
643 while ( $sth->fetch() ) { | |
644 $tr_g_hash{$tr_id} = $g_hash{$g_id}; | |
645 } | |
646 | |
647 my $ta = $self->db()->get_TranscriptAdaptor(); | |
648 my $transcripts = $ta->fetch_all_by_Slice( | |
649 $ext_slice, | |
650 1, undef, | |
651 sprintf( "t.transcript_id IN (%s)", | |
652 join( ',', sort { $a <=> $b } keys(%tr_g_hash) ) ) ); | |
653 | |
654 # Move transcripts onto gene slice, and add them to genes. | |
655 foreach my $tr ( @{$transcripts} ) { | |
656 if ( !exists( $tr_g_hash{ $tr->dbID() } ) ) { next } | |
657 | |
658 my $new_tr; | |
659 if ( $slice != $ext_slice ) { | |
660 $new_tr = $tr->transfer($slice); | |
661 if ( !defined($new_tr) ) { | |
662 throw("Unexpected. " | |
663 . "Transcript could not be transfered onto Gene slice." ); | |
664 } | |
665 } else { | |
666 $new_tr = $tr; | |
667 } | |
668 | |
669 $tr_g_hash{ $tr->dbID() }->add_Transcript($new_tr); | |
670 } | |
671 | |
672 return $genes; | |
673 } ## end sub fetch_all_by_Slice | |
674 | |
675 =head2 fetch_by_transcript_id | |
676 | |
677 Arg [1] : Int $trans_id | |
678 Unique database identifier for the transcript whose gene should | |
679 be retrieved. The gene is returned in its native coord | |
680 system (i.e. the coord_system it is stored in). If the coord | |
681 system needs to be changed, then tranform or transfer should | |
682 be called on the returned object. undef is returned if the | |
683 gene or transcript is not found in the database. | |
684 Example : $gene = $gene_adaptor->fetch_by_transcript_id(1241); | |
685 Description: Retrieves a gene from the database via the database identifier | |
686 of one of its transcripts. | |
687 Returntype : Bio::EnsEMBL::Gene | |
688 Exceptions : none | |
689 Caller : general | |
690 Status : Stable | |
691 | |
692 =cut | |
693 | |
694 sub fetch_by_transcript_id { | |
695 my ($self, $trans_id) = @_; | |
696 | |
697 # this is a cheap SQL call | |
698 my $sth = $self->prepare(qq( | |
699 SELECT tr.gene_id | |
700 FROM transcript tr | |
701 WHERE tr.transcript_id = ? | |
702 )); | |
703 | |
704 $sth->bind_param(1, $trans_id, SQL_INTEGER); | |
705 $sth->execute(); | |
706 | |
707 my ($geneid) = $sth->fetchrow_array(); | |
708 | |
709 $sth->finish(); | |
710 | |
711 return undef if( !defined $geneid ); | |
712 | |
713 my $gene = $self->fetch_by_dbID($geneid); | |
714 return $gene; | |
715 } | |
716 | |
717 | |
718 =head2 fetch_by_transcript_stable_id | |
719 | |
720 Arg [1] : string $trans_stable_id | |
721 transcript stable ID whose gene should be retrieved | |
722 Example : my $gene = $gene_adaptor->fetch_by_transcript_stable_id | |
723 ('ENST0000234'); | |
724 Description: Retrieves a gene from the database via the stable ID of one of | |
725 its transcripts | |
726 Returntype : Bio::EnsEMBL::Gene | |
727 Exceptions : none | |
728 Caller : general | |
729 Status : Stable | |
730 | |
731 =cut | |
732 | |
733 sub fetch_by_transcript_stable_id { | |
734 my ($self, $trans_stable_id) = @_; | |
735 | |
736 my $sth = $self->prepare(qq( | |
737 SELECT gene_id | |
738 FROM transcript | |
739 WHERE stable_id = ? | |
740 AND is_current = 1 | |
741 )); | |
742 | |
743 $sth->bind_param(1, $trans_stable_id, SQL_VARCHAR); | |
744 $sth->execute(); | |
745 | |
746 my ($geneid) = $sth->fetchrow_array(); | |
747 $sth->finish; | |
748 | |
749 return undef if (!defined $geneid); | |
750 | |
751 my $gene = $self->fetch_by_dbID($geneid); | |
752 return $gene; | |
753 } | |
754 | |
755 | |
756 =head2 fetch_by_translation_stable_id | |
757 | |
758 Arg [1] : String $translation_stable_id | |
759 The stable id of a translation of the gene to be obtained | |
760 Example : my $gene = $gene_adaptor->fetch_by_translation_stable_id | |
761 ('ENSP00000278194'); | |
762 Description: Retrieves a gene via the stable id of one of its translations. | |
763 Returntype : Bio::EnsEMBL::Gene | |
764 Exceptions : none | |
765 Caller : general | |
766 Status : Stable | |
767 | |
768 =cut | |
769 | |
770 sub fetch_by_translation_stable_id { | |
771 my ($self, $translation_stable_id) = @_; | |
772 | |
773 my $sth = $self->prepare(qq( | |
774 SELECT tr.gene_id | |
775 FROM transcript tr, | |
776 translation tl | |
777 WHERE tl.stable_id = ? | |
778 AND tr.transcript_id = tl.transcript_id | |
779 AND tr.is_current = 1 | |
780 )); | |
781 | |
782 $sth->bind_param(1, $translation_stable_id, SQL_VARCHAR); | |
783 $sth->execute(); | |
784 | |
785 my ($geneid) = $sth->fetchrow_array(); | |
786 $sth->finish; | |
787 if( !defined $geneid ) { | |
788 return undef; | |
789 } | |
790 return $self->fetch_by_dbID($geneid); | |
791 } | |
792 | |
793 | |
794 | |
795 | |
796 =head2 fetch_all_by_external_name | |
797 | |
798 Arg [1] : String $external_name | |
799 The external identifier for the gene to be obtained | |
800 Arg [2] : (optional) String $external_db_name | |
801 The name of the external database from which the | |
802 identifier originates. | |
803 Arg [3] : Boolean override. Force SQL regex matching for users | |
804 who really do want to find all 'NM%' | |
805 Example : @genes = @{$gene_adaptor->fetch_all_by_external_name('BRCA2')} | |
806 @many_genes = @{$gene_adaptor->fetch_all_by_external_name('BRCA%')} | |
807 Description: Retrieves a list of genes with an external database | |
808 identifier $external_name. The genes returned are in | |
809 their native coordinate system, i.e. in the coordinate | |
810 system they are stored in the database in. If another | |
811 coordinate system is required then the Gene::transfer or | |
812 Gene::transform method can be used. | |
813 SQL wildcards % and _ are supported in the $external_name, | |
814 but their use is somewhat restricted for performance reasons. | |
815 Users that really do want % and _ in the first three characters | |
816 should use argument 3 to prevent optimisations | |
817 Returntype : listref of Bio::EnsEMBL::Gene | |
818 Exceptions : none | |
819 Caller : goview, general | |
820 Status : Stable | |
821 | |
822 =cut | |
823 | |
824 sub fetch_all_by_external_name { | |
825 my ( $self, $external_name, $external_db_name, $override ) = @_; | |
826 | |
827 my $entryAdaptor = $self->db->get_DBEntryAdaptor(); | |
828 | |
829 my @ids = | |
830 $entryAdaptor->list_gene_ids_by_extids( $external_name, | |
831 $external_db_name, $override ); | |
832 | |
833 my %genes_by_dbIDs = | |
834 map { $_->dbID(), $_ } @{ $self->fetch_all_by_dbID_list( \@ids ) }; | |
835 | |
836 my @result = map { $genes_by_dbIDs{$_} } @ids; | |
837 | |
838 return \@result; | |
839 } | |
840 | |
841 =head2 fetch_all_by_GOTerm | |
842 | |
843 Arg [1] : Bio::EnsEMBL::OntologyTerm | |
844 The GO term for which genes should be fetched. | |
845 | |
846 Example: @genes = @{ | |
847 $gene_adaptor->fetch_all_by_GOTerm( | |
848 $go_adaptor->fetch_by_accession('GO:0030326') ) }; | |
849 | |
850 Description : Retrieves a list of genes that are associated with | |
851 the given GO term, or with any of its descendent | |
852 GO terms. The genes returned are in their native | |
853 coordinate system, i.e. in the coordinate system | |
854 in which they are stored in the database. If | |
855 another coordinate system is required then the | |
856 Gene::transfer or Gene::transform method can be | |
857 used. | |
858 | |
859 Return type : listref of Bio::EnsEMBL::Gene | |
860 Exceptions : Throws of argument is not a GO term | |
861 Caller : general | |
862 Status : Stable | |
863 | |
864 =cut | |
865 | |
866 sub fetch_all_by_GOTerm { | |
867 my ( $self, $term ) = @_; | |
868 | |
869 assert_ref( $term, 'Bio::EnsEMBL::OntologyTerm' ); | |
870 if ( $term->ontology() ne 'GO' ) { | |
871 throw('Argument is not a GO term'); | |
872 } | |
873 | |
874 my $entryAdaptor = $self->db->get_DBEntryAdaptor(); | |
875 | |
876 my %unique_dbIDs; | |
877 foreach my $accession ( map { $_->accession() } | |
878 ( $term, @{ $term->descendants() } ) ) | |
879 { | |
880 my @ids = | |
881 $entryAdaptor->list_gene_ids_by_extids( $accession, 'GO' ); | |
882 foreach my $dbID (@ids) { $unique_dbIDs{$dbID} = 1 } | |
883 } | |
884 | |
885 my @result = @{ | |
886 $self->fetch_all_by_dbID_list( | |
887 [ sort { $a <=> $b } keys(%unique_dbIDs) ] | |
888 ) }; | |
889 | |
890 return \@result; | |
891 } ## end sub fetch_all_by_GOTerm | |
892 | |
893 =head2 fetch_all_by_GOTerm_accession | |
894 | |
895 Arg [1] : String | |
896 The GO term accession for which genes should be | |
897 fetched. | |
898 | |
899 Example : | |
900 | |
901 @genes = | |
902 @{ $gene_adaptor->fetch_all_by_GOTerm_accession( | |
903 'GO:0030326') }; | |
904 | |
905 Description : Retrieves a list of genes that are associated with | |
906 the given GO term, or with any of its descendent | |
907 GO terms. The genes returned are in their native | |
908 coordinate system, i.e. in the coordinate system | |
909 in which they are stored in the database. If | |
910 another coordinate system is required then the | |
911 Gene::transfer or Gene::transform method can be | |
912 used. | |
913 | |
914 Return type : listref of Bio::EnsEMBL::Gene | |
915 Exceptions : Throws of argument is not a GO term accession | |
916 Caller : general | |
917 Status : Stable | |
918 | |
919 =cut | |
920 | |
921 sub fetch_all_by_GOTerm_accession { | |
922 my ( $self, $accession ) = @_; | |
923 | |
924 if ( $accession !~ /^GO:/ ) { | |
925 throw('Argument is not a GO term accession'); | |
926 } | |
927 | |
928 my $goAdaptor = | |
929 Bio::EnsEMBL::Registry->get_adaptor( 'Multi', 'Ontology', | |
930 'OntologyTerm' ); | |
931 | |
932 my $term = $goAdaptor->fetch_by_accession($accession); | |
933 | |
934 return $self->fetch_all_by_GOTerm($term); | |
935 } | |
936 | |
937 =head2 fetch_all_alt_alleles | |
938 | |
939 Arg [1] : Bio::EnsEMBL::Gene $gene | |
940 The gene to fetch alternative alleles for | |
941 Example : my @alt_genes = @{ $gene_adaptor->fetch_all_alt_alleles($gene) }; | |
942 foreach my $alt_gene (@alt_genes) { | |
943 print "Alternate allele: " . $alt_gene->stable_id() . "\n" ; | |
944 } | |
945 Description: Retrieves genes which are alternate alleles to a provided gene. | |
946 Alternate alleles in Ensembl are genes which are similar and are | |
947 on an alternative haplotype of the same region. There are not | |
948 currently very many of these. This method will return a | |
949 reference to an empty list if no alternative alleles are found. | |
950 Returntype : listref of Bio::EnsEMBL::Genes | |
951 Exceptions : throw if incorrect arg provided | |
952 warning if gene arg does not have dbID | |
953 Caller : Gene::get_all_alt_alleles | |
954 Status : Stable | |
955 | |
956 =cut | |
957 | |
958 sub fetch_all_alt_alleles { | |
959 my $self = shift; | |
960 my $gene = shift; | |
961 | |
962 if(!ref($gene) || !$gene->isa('Bio::EnsEMBL::Gene')) { | |
963 throw('Bio::EnsEMBL::Gene argument is required'); | |
964 } | |
965 | |
966 my $gene_id = $gene->dbID(); | |
967 | |
968 if(!$gene_id) { | |
969 warning('Cannot retrieve alternate alleles for gene without dbID'); | |
970 return []; | |
971 } | |
972 | |
973 my $sth = $self->prepare("SELECT aa1.gene_id " . | |
974 "FROM alt_allele aa1, alt_allele aa2 " . | |
975 "WHERE aa1.alt_allele_id = aa2.alt_allele_id " . | |
976 "AND aa2.gene_id = ? " . | |
977 "AND aa1.gene_id <> ?"); | |
978 | |
979 $sth->bind_param(1, $gene_id, SQL_INTEGER); | |
980 $sth->bind_param(2, $gene_id, SQL_INTEGER); | |
981 $sth->execute(); | |
982 | |
983 my @alt_ids; | |
984 my $row; | |
985 while($row = $sth->fetchrow_arrayref()) { | |
986 push @alt_ids, $row->[0]; | |
987 } | |
988 $sth->finish(); | |
989 | |
990 if (@alt_ids) { | |
991 return $self->fetch_all_by_dbID_list(\@alt_ids); | |
992 } | |
993 | |
994 return []; | |
995 } | |
996 | |
997 sub is_ref{ | |
998 my ( $self, $gene_id) = @_; | |
999 my $is_not_ref; | |
1000 | |
1001 # easier to find if it is not an alt_Allele do this and then negate it. | |
1002 my $sth = $self->prepare("select count(1) from alt_allele where gene_id = $gene_id and is_ref = 0"); | |
1003 $sth->execute(); | |
1004 $sth->bind_columns(\$is_not_ref); | |
1005 $sth->fetch; | |
1006 | |
1007 if(defined($is_not_ref) and $is_not_ref){ | |
1008 return 0; | |
1009 } | |
1010 | |
1011 return 1; | |
1012 } | |
1013 | |
1014 | |
1015 =head2 store_alt_alleles | |
1016 | |
1017 | |
1018 Arg [1] : reference to list of Bio::EnsEMBL::Genes $genes | |
1019 Example : $gene_adaptor->store_alt_alleles([$gene1, $gene2, $gene3]); | |
1020 Description: This method creates a group of alternative alleles (i.e. locus) | |
1021 from a set of genes. The genes should be genes from alternate | |
1022 haplotypes which are similar. The genes must already be stored | |
1023 in this database. | |
1024 Returntype : int alt_allele_id or undef if no alt_alleles were stored | |
1025 Exceptions : throw on incorrect arguments | |
1026 throw on sql error (e.g. duplicate unique id) | |
1027 Caller : general | |
1028 Status : Stable | |
1029 | |
1030 =cut | |
1031 | |
1032 sub store_alt_alleles { | |
1033 my $self = shift; | |
1034 my $genes = shift; | |
1035 | |
1036 if(!ref($genes) eq 'ARRAY') { | |
1037 throw('List reference of Bio::EnsEMBL::Gene argument expected.'); | |
1038 } | |
1039 | |
1040 my @genes = @$genes; | |
1041 my $num_genes = scalar(@genes); | |
1042 | |
1043 if($num_genes < 2) { | |
1044 warning('At least 2 genes must be provided to construct alternative alleles (gene id: '. $genes[0]->dbID() .'). Ignoring.'); | |
1045 return; | |
1046 } | |
1047 | |
1048 my @is_ref; | |
1049 my @ref_genes = (); | |
1050 my @non_ref_genes = (); | |
1051 my @gene_ids = (); | |
1052 | |
1053 foreach my $gene (@genes) { | |
1054 | |
1055 if(!ref($gene) || !$gene->isa('Bio::EnsEMBL::Gene')) { | |
1056 throw('List reference of Bio::EnsEMBL::Gene argument expected.'); | |
1057 } | |
1058 | |
1059 my $gene_id = $gene->dbID(); | |
1060 | |
1061 if (!$gene_id) { | |
1062 throw('Genes must have dbIDs in order to construct alternative alleles.'); | |
1063 } else { | |
1064 push @gene_ids, $gene_id; | |
1065 } | |
1066 | |
1067 my $is_ref = $gene->slice->is_reference(); | |
1068 | |
1069 push @is_ref, $is_ref; | |
1070 | |
1071 if ($is_ref) { | |
1072 push @ref_genes, $gene->dbID(); | |
1073 } else { | |
1074 push @non_ref_genes, $gene->dbID(); | |
1075 } | |
1076 } | |
1077 if (scalar(@ref_genes) > 1) { | |
1078 warning('More than one alternative allele on the reference sequence (gene ids: ' . join(',',@ref_genes) . '). Ignoring.'); | |
1079 return; | |
1080 } | |
1081 | |
1082 # | |
1083 #insert the first gene seperately in order to get a unique identifier for | |
1084 #the set of alleles | |
1085 # | |
1086 | |
1087 my $sth = $self->prepare("INSERT INTO alt_allele (gene_id, is_ref) VALUES (?,?)"); | |
1088 $sth->bind_param(1, $gene_ids[0], SQL_INTEGER); | |
1089 $sth->bind_param(2, $is_ref[0], SQL_INTEGER); | |
1090 eval { | |
1091 $sth->execute(); | |
1092 }; | |
1093 my $alt_allele_id = $sth->{'mysql_insertid'}; | |
1094 | |
1095 if (!$alt_allele_id || $@) { | |
1096 throw("An SQL error occured inserting alternative alleles:\n$@"); | |
1097 } | |
1098 $sth->finish(); | |
1099 # | |
1100 # Insert all subsequent alt alleles using the alt_allele identifier | |
1101 # from the first insert | |
1102 # | |
1103 | |
1104 $sth = $self->prepare("INSERT INTO alt_allele (alt_allele_id, gene_id, is_ref) " . | |
1105 "VALUES (?,?,?)"); | |
1106 | |
1107 for (my $i = 1; $i < $num_genes; $i++) { | |
1108 | |
1109 $sth->bind_param(1, $alt_allele_id, SQL_INTEGER); | |
1110 $sth->bind_param(2, $gene_ids[$i], SQL_INTEGER); | |
1111 $sth->bind_param(3, $is_ref[$i], SQL_INTEGER); | |
1112 eval { | |
1113 $sth->execute(); | |
1114 }; | |
1115 | |
1116 if ($@) { | |
1117 # an error occured, revert the db to the previous state | |
1118 $sth = $self->prepare("DELETE FROM alt_allele WHERE alt_allele_id = ?"); | |
1119 $sth->bind_param(1, $alt_allele_id, SQL_INTEGER); | |
1120 $sth->execute(); | |
1121 $sth->finish(); | |
1122 throw("An SQL error occured inserting alternative alleles:\n$@"); | |
1123 } | |
1124 } | |
1125 | |
1126 $sth->finish(); | |
1127 | |
1128 return $alt_allele_id; | |
1129 } | |
1130 | |
1131 | |
1132 =head2 store | |
1133 | |
1134 Arg [1] : Bio::EnsEMBL::Gene $gene | |
1135 The gene to store in the database | |
1136 Arg [2] : ignore_release in xrefs [default 1] set to 0 to use release info | |
1137 in external database references | |
1138 Example : $gene_adaptor->store($gene); | |
1139 Description: Stores a gene in the database. | |
1140 Returntype : the database identifier (dbID) of the newly stored gene | |
1141 Exceptions : thrown if the $gene is not a Bio::EnsEMBL::Gene or if | |
1142 $gene does not have an analysis object | |
1143 Caller : general | |
1144 Status : Stable | |
1145 | |
1146 =cut | |
1147 | |
1148 sub store { | |
1149 my ($self, $gene, $ignore_release) = @_; | |
1150 | |
1151 if (!ref $gene || !$gene->isa('Bio::EnsEMBL::Gene') ) { | |
1152 throw("Must store a gene object, not a $gene"); | |
1153 } | |
1154 if(!defined($ignore_release)){ | |
1155 $ignore_release = 1; | |
1156 } | |
1157 my $db = $self->db(); | |
1158 | |
1159 if ($gene->is_stored($db)) { | |
1160 return $gene->dbID(); | |
1161 } | |
1162 | |
1163 # ensure coords are correct before storing | |
1164 $gene->recalculate_coordinates(); | |
1165 | |
1166 my $analysis = $gene->analysis(); | |
1167 throw("Genes must have an analysis object.") if(!defined($analysis)); | |
1168 | |
1169 my $analysis_id; | |
1170 if ($analysis->is_stored($db)) { | |
1171 $analysis_id = $analysis->dbID(); | |
1172 } else { | |
1173 $analysis_id = $db->get_AnalysisAdaptor->store($analysis); | |
1174 } | |
1175 | |
1176 my $type = $gene->biotype || ""; | |
1177 | |
1178 # default to is_current = 1 if this attribute is not set | |
1179 my $is_current = $gene->is_current; | |
1180 $is_current = 1 unless (defined($is_current)); | |
1181 | |
1182 my $original = $gene; | |
1183 my $original_transcripts = $gene->get_all_Transcripts(); | |
1184 | |
1185 my $seq_region_id; | |
1186 | |
1187 ( $gene, $seq_region_id ) = $self->_pre_store($gene); | |
1188 | |
1189 my $store_gene_sql = qq( | |
1190 INSERT INTO gene | |
1191 SET biotype = ?, | |
1192 analysis_id = ?, | |
1193 seq_region_id = ?, | |
1194 seq_region_start = ?, | |
1195 seq_region_end = ?, | |
1196 seq_region_strand = ?, | |
1197 description = ?, | |
1198 source = ?, | |
1199 status = ?, | |
1200 is_current = ?, | |
1201 canonical_transcript_id = ?, | |
1202 canonical_annotation = ? | |
1203 ); | |
1204 | |
1205 if (defined($gene->stable_id)) { | |
1206 my $created = $self->db->dbc->from_seconds_to_date($gene->created_date()); | |
1207 my $modified = $self->db->dbc->from_seconds_to_date($gene->modified_date()); | |
1208 $store_gene_sql .= ", stable_id = ?, version = ?, created_date = " . $created . " , modified_date = " . $modified; | |
1209 | |
1210 } | |
1211 | |
1212 # column status is used from schema version 34 onwards (before it was | |
1213 # confidence) | |
1214 | |
1215 my $sth = $self->prepare($store_gene_sql); | |
1216 $sth->bind_param( 1, $type, SQL_VARCHAR ); | |
1217 $sth->bind_param( 2, $analysis_id, SQL_INTEGER ); | |
1218 $sth->bind_param( 3, $seq_region_id, SQL_INTEGER ); | |
1219 $sth->bind_param( 4, $gene->start(), SQL_INTEGER ); | |
1220 $sth->bind_param( 5, $gene->end(), SQL_INTEGER ); | |
1221 $sth->bind_param( 6, $gene->strand(), SQL_TINYINT ); | |
1222 $sth->bind_param( 7, $gene->description(), SQL_LONGVARCHAR ); | |
1223 $sth->bind_param( 8, $gene->source(), SQL_VARCHAR ); | |
1224 $sth->bind_param( 9, $gene->status(), SQL_VARCHAR ); | |
1225 $sth->bind_param( 10, $is_current, SQL_TINYINT ); | |
1226 | |
1227 # Canonical transcript ID will be updated later. | |
1228 # Set it to zero for now. | |
1229 $sth->bind_param( 11, 0, SQL_TINYINT ); | |
1230 | |
1231 $sth->bind_param( 12, $gene->canonical_annotation(), SQL_VARCHAR ); | |
1232 | |
1233 if ( defined($gene->stable_id) ) { | |
1234 | |
1235 $sth->bind_param( 13, $gene->stable_id, SQL_VARCHAR ); | |
1236 my $version = ($gene->version()) ? $gene->version() : 1; | |
1237 $sth->bind_param( 14, $version, SQL_INTEGER ); | |
1238 } | |
1239 | |
1240 $sth->execute(); | |
1241 $sth->finish(); | |
1242 | |
1243 my $gene_dbID = $sth->{'mysql_insertid'}; | |
1244 | |
1245 # store the dbentries associated with this gene | |
1246 my $dbEntryAdaptor = $db->get_DBEntryAdaptor(); | |
1247 | |
1248 foreach my $dbe ( @{ $gene->get_all_DBEntries } ) { | |
1249 $dbEntryAdaptor->store( $dbe, $gene_dbID, "Gene", $ignore_release ); | |
1250 } | |
1251 | |
1252 # We allow transcripts not to share equal exons and instead have | |
1253 # copies. For the database we still want sharing though, to have | |
1254 # easier time with stable ids. So we need to have a step to merge | |
1255 # exons together before store. | |
1256 my %exons; | |
1257 | |
1258 foreach my $trans ( @{$gene->get_all_Transcripts} ) { | |
1259 foreach my $e ( @{$trans->get_all_Exons} ) { | |
1260 my $key = $e->hashkey(); | |
1261 if( exists $exons{ $key } ) { | |
1262 $trans->swap_exons( $e, $exons{$key} ); | |
1263 } else { | |
1264 $exons{$key} = $e; | |
1265 } | |
1266 } | |
1267 } | |
1268 | |
1269 my $transcript_adaptor = $db->get_TranscriptAdaptor(); | |
1270 | |
1271 my $transcripts = $gene->get_all_Transcripts(); | |
1272 | |
1273 my $new_canonical_transcript_id; | |
1274 for ( my $i = 0; $i < @$transcripts; $i++ ) { | |
1275 my $new = $transcripts->[$i]; | |
1276 my $old = $original_transcripts->[$i]; | |
1277 | |
1278 $transcript_adaptor->store( $new, $gene_dbID, $analysis_id ); | |
1279 | |
1280 if ( !defined($new_canonical_transcript_id) | |
1281 && $new->is_canonical() ) | |
1282 { | |
1283 $new_canonical_transcript_id = $new->dbID(); | |
1284 } | |
1285 | |
1286 # update the original transcripts since we may have made copies of | |
1287 # them by transforming the gene | |
1288 $old->dbID( $new->dbID() ); | |
1289 $old->adaptor( $new->adaptor() ); | |
1290 | |
1291 if ( $new->translation ) { | |
1292 $old->translation->dbID( $new->translation()->dbID ); | |
1293 $old->translation->adaptor( $new->translation()->adaptor ); | |
1294 } | |
1295 } | |
1296 | |
1297 if ( defined($new_canonical_transcript_id) ) { | |
1298 # Now the canonical transcript has been stored, so update the | |
1299 # canonical_transcript_id of this gene with the new dbID. | |
1300 my $sth = $self->prepare( | |
1301 q( | |
1302 UPDATE gene | |
1303 SET canonical_transcript_id = ? | |
1304 WHERE gene_id = ?) | |
1305 ); | |
1306 | |
1307 $sth->bind_param( 1, $new_canonical_transcript_id, SQL_INTEGER ); | |
1308 $sth->bind_param( 2, $gene_dbID, SQL_INTEGER ); | |
1309 | |
1310 $sth->execute(); | |
1311 $sth->finish(); | |
1312 } | |
1313 | |
1314 # update gene to point to display xref if it is set | |
1315 if(my $display_xref = $gene->display_xref) { | |
1316 my $dxref_id; | |
1317 if($display_xref->is_stored($db)) { | |
1318 $dxref_id = $display_xref->dbID(); | |
1319 } else { | |
1320 $dxref_id = $dbEntryAdaptor->exists($display_xref); | |
1321 } | |
1322 | |
1323 if(defined($dxref_id)) { | |
1324 my $sth = $self->prepare | |
1325 ("UPDATE gene SET display_xref_id = ? WHERE gene_id = ?"); | |
1326 $sth->bind_param(1, $dxref_id, SQL_INTEGER); | |
1327 $sth->bind_param(2, $gene_dbID, SQL_INTEGER); | |
1328 $sth->execute(); | |
1329 $sth->finish(); | |
1330 $display_xref->dbID($dxref_id); | |
1331 $display_xref->adaptor($dbEntryAdaptor); | |
1332 $display_xref->dbID($dxref_id); | |
1333 $display_xref->adaptor($dbEntryAdaptor); | |
1334 } else { | |
1335 warning("Display_xref ".$display_xref->dbname().":". | |
1336 $display_xref->display_id() . " is not stored in database.\n". | |
1337 "Not storing relationship to this gene."); | |
1338 $display_xref->dbID(undef); | |
1339 $display_xref->adaptor(undef); | |
1340 } | |
1341 } | |
1342 | |
1343 # store gene attributes if there are any | |
1344 my $attr_adaptor = $db->get_AttributeAdaptor(); | |
1345 $attr_adaptor->store_on_Gene($gene_dbID, $gene->get_all_Attributes); | |
1346 | |
1347 # store unconventional transcript associations if there are any | |
1348 my $utaa = $db->get_UnconventionalTranscriptAssociationAdaptor(); | |
1349 foreach my $uta (@{$gene->get_all_unconventional_transcript_associations()}) { | |
1350 $utaa->store($uta); | |
1351 } | |
1352 | |
1353 # set the adaptor and dbID on the original passed in gene not the | |
1354 # transfered copy | |
1355 $original->adaptor($self); | |
1356 $original->dbID($gene_dbID); | |
1357 | |
1358 return $gene_dbID; | |
1359 } | |
1360 | |
1361 | |
1362 =head2 remove | |
1363 | |
1364 Arg [1] : Bio::EnsEMBL::Gene $gene | |
1365 the gene to remove from the database | |
1366 Example : $gene_adaptor->remove($gene); | |
1367 Description: Removes a gene completely from the database. All associated | |
1368 transcripts, exons, stable_identifiers, descriptions, etc. | |
1369 are removed as well. Use with caution! | |
1370 Returntype : none | |
1371 Exceptions : throw on incorrect arguments | |
1372 warning if gene is not stored in this database | |
1373 Caller : general | |
1374 Status : Stable | |
1375 | |
1376 =cut | |
1377 | |
1378 sub remove { | |
1379 my $self = shift; | |
1380 my $gene = shift; | |
1381 | |
1382 if (!ref($gene) || !$gene->isa('Bio::EnsEMBL::Gene')) { | |
1383 throw("Bio::EnsEMBL::Gene argument expected."); | |
1384 } | |
1385 | |
1386 if ( !$gene->is_stored($self->db()) ) { | |
1387 warning("Cannot remove gene " . $gene->dbID() . ". Is not stored in " . | |
1388 "this database."); | |
1389 return; | |
1390 } | |
1391 | |
1392 # remove all object xrefs associated with this gene | |
1393 | |
1394 my $dbe_adaptor = $self->db()->get_DBEntryAdaptor(); | |
1395 foreach my $dbe (@{$gene->get_all_DBEntries()}) { | |
1396 $dbe_adaptor->remove_from_object($dbe, $gene, 'Gene'); | |
1397 } | |
1398 | |
1399 # remove all alternative allele entries associated with this gene | |
1400 my $sth = $self->prepare("DELETE FROM alt_allele WHERE gene_id = ?"); | |
1401 $sth->bind_param( 1, $gene->dbID, SQL_INTEGER ); | |
1402 $sth->execute(); | |
1403 $sth->finish(); | |
1404 | |
1405 # remove the attributes associated with this transcript | |
1406 my $attrib_adaptor = $self->db->get_AttributeAdaptor; | |
1407 $attrib_adaptor->remove_from_Gene($gene); | |
1408 | |
1409 # remove all of the transcripts associated with this gene | |
1410 my $transcriptAdaptor = $self->db->get_TranscriptAdaptor(); | |
1411 foreach my $trans ( @{$gene->get_all_Transcripts()} ) { | |
1412 $transcriptAdaptor->remove($trans); | |
1413 } | |
1414 | |
1415 # remove any unconventional transcript associations involving this gene | |
1416 | |
1417 $sth = | |
1418 $self->prepare( "DELETE FROM unconventional_transcript_association " | |
1419 . "WHERE gene_id = ? " ); | |
1420 $sth->bind_param( 1, $gene->dbID, SQL_INTEGER ); | |
1421 $sth->execute(); | |
1422 $sth->finish(); | |
1423 | |
1424 # remove this gene from the database | |
1425 | |
1426 $sth = $self->prepare("DELETE FROM gene WHERE gene_id = ? "); | |
1427 $sth->bind_param( 1, $gene->dbID, SQL_INTEGER ); | |
1428 $sth->execute(); | |
1429 $sth->finish(); | |
1430 | |
1431 # unset the gene identifier and adaptor thereby flagging it as unstored | |
1432 | |
1433 $gene->dbID(undef); | |
1434 $gene->adaptor(undef); | |
1435 | |
1436 return; | |
1437 } | |
1438 | |
1439 | |
1440 =head2 get_Interpro_by_geneid | |
1441 | |
1442 Arg [1] : String $gene_stable_id | |
1443 The stable ID of the gene to obtain | |
1444 Example : @i = @{ | |
1445 $gene_adaptor->get_Interpro_by_geneid( | |
1446 $gene->stable_id() ) }; | |
1447 Description: Gets interpro accession numbers by gene stable id. A hack really | |
1448 - we should have a much more structured system than this. | |
1449 Returntype : listref of strings (Interpro_acc:description) | |
1450 Exceptions : none | |
1451 Caller : domainview | |
1452 Status : Stable | |
1453 | |
1454 =cut | |
1455 | |
1456 sub get_Interpro_by_geneid { | |
1457 my ($self, $gene_stable_id) = @_; | |
1458 | |
1459 my $sql = qq( | |
1460 SELECT i.interpro_ac, | |
1461 x.description | |
1462 FROM transcript t, | |
1463 translation tl, | |
1464 protein_feature pf, | |
1465 interpro i, | |
1466 xref x, | |
1467 gene g | |
1468 WHERE g.stable_id = ? | |
1469 AND t.gene_id = g.gene_id | |
1470 AND t.is_current = 1 | |
1471 AND tl.transcript_id = t.transcript_id | |
1472 AND tl.translation_id = pf.translation_id | |
1473 AND i.id = pf.hit_name | |
1474 AND i.interpro_ac = x.dbprimary_acc); | |
1475 | |
1476 my $sth = $self->prepare($sql); | |
1477 | |
1478 $sth->bind_param( 1, $gene_stable_id, SQL_VARCHAR ); | |
1479 | |
1480 $sth->execute; | |
1481 | |
1482 my @out; | |
1483 my %h; | |
1484 while( (my $arr = $sth->fetchrow_arrayref()) ) { | |
1485 if( $h{$arr->[0]} ) { next; } | |
1486 $h{$arr->[0]}=1; | |
1487 my $string = $arr->[0] .":".$arr->[1]; | |
1488 push(@out,$string); | |
1489 } | |
1490 | |
1491 return \@out; | |
1492 } | |
1493 | |
1494 | |
1495 =head2 update | |
1496 | |
1497 Arg [1] : Bio::EnsEMBL::Gene $gene | |
1498 The gene to update | |
1499 Example : $gene_adaptor->update($gene); | |
1500 Description: Updates the type, analysis, display_xref, status, is_current and | |
1501 description of a gene in the database. | |
1502 Returntype : None | |
1503 Exceptions : thrown if the $gene is not a Bio::EnsEMBL::Gene | |
1504 Caller : general | |
1505 Status : Stable | |
1506 | |
1507 =cut | |
1508 | |
1509 sub update { | |
1510 my ($self, $gene) = @_; | |
1511 my $update = 0; | |
1512 | |
1513 if ( !defined $gene || !ref $gene || !$gene->isa('Bio::EnsEMBL::Gene') ) { | |
1514 throw("Must update a gene object, not a $gene"); | |
1515 } | |
1516 | |
1517 my $update_gene_sql = qq( | |
1518 UPDATE gene | |
1519 SET biotype = ?, | |
1520 analysis_id = ?, | |
1521 display_xref_id = ?, | |
1522 status = ?, | |
1523 description = ?, | |
1524 is_current = ?, | |
1525 canonical_transcript_id = ?, | |
1526 canonical_annotation = ? | |
1527 WHERE gene_id = ? | |
1528 ); | |
1529 | |
1530 my $display_xref = $gene->display_xref(); | |
1531 my $display_xref_id; | |
1532 | |
1533 if ( $display_xref && $display_xref->dbID() ) { | |
1534 $display_xref_id = $display_xref->dbID(); | |
1535 } else { | |
1536 $display_xref_id = undef; | |
1537 } | |
1538 | |
1539 my $sth = $self->prepare( $update_gene_sql ); | |
1540 | |
1541 $sth->bind_param( 1, $gene->biotype(), SQL_VARCHAR ); | |
1542 $sth->bind_param( 2, $gene->analysis->dbID(), SQL_INTEGER ); | |
1543 $sth->bind_param( 3, $display_xref_id, SQL_INTEGER ); | |
1544 $sth->bind_param( 4, $gene->status(), SQL_VARCHAR ); | |
1545 $sth->bind_param( 5, $gene->description(), SQL_VARCHAR ); | |
1546 $sth->bind_param( 6, $gene->is_current(), SQL_TINYINT ); | |
1547 | |
1548 if ( defined( $gene->canonical_transcript() ) ) { | |
1549 $sth->bind_param( 7, $gene->canonical_transcript()->dbID(), | |
1550 SQL_INTEGER ); | |
1551 } else { | |
1552 $sth->bind_param( 7, 0, SQL_INTEGER ); | |
1553 } | |
1554 | |
1555 $sth->bind_param( 8, $gene->canonical_annotation(), SQL_VARCHAR ); | |
1556 $sth->bind_param( 9, $gene->dbID(), SQL_INTEGER ); | |
1557 | |
1558 $sth->execute(); | |
1559 | |
1560 # maybe should update stable id ??? | |
1561 } | |
1562 | |
1563 | |
1564 # _objs_from_sth | |
1565 | |
1566 # Arg [1] : StatementHandle $sth | |
1567 # Arg [2] : Bio::EnsEMBL::AssemblyMapper $mapper | |
1568 # Arg [3] : Bio::EnsEMBL::Slice $dest_slice | |
1569 # Description: PROTECTED implementation of abstract superclass method. | |
1570 # responsible for the creation of Genes | |
1571 # Returntype : listref of Bio::EnsEMBL::Genes in target coordinate system | |
1572 # Exceptions : none | |
1573 # Caller : internal | |
1574 # Status : Stable | |
1575 | |
1576 sub _objs_from_sth { | |
1577 my ($self, $sth, $mapper, $dest_slice) = @_; | |
1578 | |
1579 # | |
1580 # This code is ugly because an attempt has been made to remove as many | |
1581 # function calls as possible for speed purposes. Thus many caches and | |
1582 # a fair bit of gymnastics is used. | |
1583 # | |
1584 | |
1585 my $sa = $self->db()->get_SliceAdaptor(); | |
1586 my $aa = $self->db()->get_AnalysisAdaptor(); | |
1587 my $dbEntryAdaptor = $self->db()->get_DBEntryAdaptor(); | |
1588 | |
1589 my @genes; | |
1590 my %analysis_hash; | |
1591 my %slice_hash; | |
1592 my %sr_name_hash; | |
1593 my %sr_cs_hash; | |
1594 | |
1595 my ( | |
1596 $gene_id, $seq_region_id, | |
1597 $seq_region_start, $seq_region_end, | |
1598 $seq_region_strand, $analysis_id, | |
1599 $biotype, $display_xref_id, | |
1600 $gene_description, $status, | |
1601 $source, $is_current, | |
1602 $canonical_transcript_id, $canonical_annotation, | |
1603 $stable_id, $version, | |
1604 $created_date, $modified_date, | |
1605 $xref_display_id, $xref_primary_acc, | |
1606 $xref_desc, $xref_version, | |
1607 $external_db, $external_status, | |
1608 $external_release, $external_db_name, | |
1609 $info_type, $info_text | |
1610 ); | |
1611 | |
1612 $sth->bind_columns( | |
1613 \( | |
1614 $gene_id, $seq_region_id, | |
1615 $seq_region_start, $seq_region_end, | |
1616 $seq_region_strand, $analysis_id, | |
1617 $biotype, $display_xref_id, | |
1618 $gene_description, $status, | |
1619 $source, $is_current, | |
1620 $canonical_transcript_id, $canonical_annotation, | |
1621 $stable_id, $version, | |
1622 $created_date, $modified_date, | |
1623 $xref_display_id, $xref_primary_acc, | |
1624 $xref_desc, $xref_version, | |
1625 $external_db, $external_status, | |
1626 $external_release, $external_db_name, | |
1627 $info_type, $info_text | |
1628 ) ); | |
1629 | |
1630 my $asm_cs; | |
1631 my $cmp_cs; | |
1632 my $asm_cs_vers; | |
1633 my $asm_cs_name; | |
1634 my $cmp_cs_vers; | |
1635 my $cmp_cs_name; | |
1636 | |
1637 if($mapper) { | |
1638 $asm_cs = $mapper->assembled_CoordSystem(); | |
1639 $cmp_cs = $mapper->component_CoordSystem(); | |
1640 $asm_cs_name = $asm_cs->name(); | |
1641 $asm_cs_vers = $asm_cs->version(); | |
1642 $cmp_cs_name = $cmp_cs->name(); | |
1643 $cmp_cs_vers = $cmp_cs->version(); | |
1644 } | |
1645 | |
1646 my $dest_slice_start; | |
1647 my $dest_slice_end; | |
1648 my $dest_slice_strand; | |
1649 my $dest_slice_length; | |
1650 my $dest_slice_sr_name; | |
1651 my $dest_slice_sr_id; | |
1652 | |
1653 if($dest_slice) { | |
1654 $dest_slice_start = $dest_slice->start(); | |
1655 $dest_slice_end = $dest_slice->end(); | |
1656 $dest_slice_strand = $dest_slice->strand(); | |
1657 $dest_slice_length = $dest_slice->length(); | |
1658 $dest_slice_sr_name = $dest_slice->seq_region_name(); | |
1659 $dest_slice_sr_id = $dest_slice->get_seq_region_id(); | |
1660 } | |
1661 | |
1662 FEATURE: while($sth->fetch()) { | |
1663 #get the analysis object | |
1664 my $analysis = $analysis_hash{$analysis_id} ||= | |
1665 $aa->fetch_by_dbID($analysis_id); | |
1666 | |
1667 #need to get the internal_seq_region, if present | |
1668 $seq_region_id = $self->get_seq_region_id_internal($seq_region_id); | |
1669 my $slice = $slice_hash{"ID:".$seq_region_id}; | |
1670 | |
1671 if(!$slice) { | |
1672 $slice = $sa->fetch_by_seq_region_id($seq_region_id); | |
1673 $slice_hash{"ID:".$seq_region_id} = $slice; | |
1674 $sr_name_hash{$seq_region_id} = $slice->seq_region_name(); | |
1675 $sr_cs_hash{$seq_region_id} = $slice->coord_system(); | |
1676 } | |
1677 | |
1678 my $sr_name = $sr_name_hash{$seq_region_id}; | |
1679 my $sr_cs = $sr_cs_hash{$seq_region_id}; | |
1680 | |
1681 # | |
1682 # remap the feature coordinates to another coord system | |
1683 # if a mapper was provided | |
1684 # | |
1685 if($mapper) { | |
1686 | |
1687 if (defined $dest_slice && $mapper->isa('Bio::EnsEMBL::ChainedAssemblyMapper') ) { | |
1688 ( $seq_region_id, $seq_region_start, | |
1689 $seq_region_end, $seq_region_strand ) | |
1690 = | |
1691 $mapper->map( $sr_name, $seq_region_start, $seq_region_end, | |
1692 $seq_region_strand, $sr_cs, 1, $dest_slice); | |
1693 | |
1694 } else { | |
1695 | |
1696 ( $seq_region_id, $seq_region_start, | |
1697 $seq_region_end, $seq_region_strand ) | |
1698 = | |
1699 $mapper->fastmap( $sr_name, $seq_region_start, $seq_region_end, | |
1700 $seq_region_strand, $sr_cs ); | |
1701 } | |
1702 | |
1703 #skip features that map to gaps or coord system boundaries | |
1704 next FEATURE if(!defined($seq_region_id)); | |
1705 | |
1706 #get a slice in the coord system we just mapped to | |
1707 # if($asm_cs == $sr_cs || ($cmp_cs != $sr_cs && $asm_cs->equals($sr_cs))) { | |
1708 $slice = $slice_hash{"ID:".$seq_region_id} ||= | |
1709 $sa->fetch_by_seq_region_id($seq_region_id); | |
1710 # } else { | |
1711 # $slice = $slice_hash{"NAME:$sr_name:$asm_cs_name:$asm_cs_vers"} ||= | |
1712 # $sa->fetch_by_region($asm_cs_name, $sr_name, undef, undef, undef, | |
1713 # $asm_cs_vers); | |
1714 # } | |
1715 } | |
1716 | |
1717 # | |
1718 # If a destination slice was provided convert the coords. | |
1719 # | |
1720 if ( defined($dest_slice) ) { | |
1721 if ( $dest_slice_strand == 1 ) { | |
1722 # Positive strand. | |
1723 | |
1724 $seq_region_start = $seq_region_start - $dest_slice_start + 1; | |
1725 $seq_region_end = $seq_region_end - $dest_slice_start + 1; | |
1726 | |
1727 if ( $dest_slice->is_circular() ) { | |
1728 # Handle cicular chromosomes. | |
1729 | |
1730 if ( $seq_region_start > $seq_region_end ) { | |
1731 # Looking at a feature overlapping the chromsome origin. | |
1732 | |
1733 if ( $seq_region_end > $dest_slice_start ) { | |
1734 # Looking at the region in the beginning of the | |
1735 # chromosome. | |
1736 $seq_region_start -= $dest_slice->seq_region_length(); | |
1737 } | |
1738 | |
1739 if ( $seq_region_end < 0 ) { | |
1740 $seq_region_end += $dest_slice->seq_region_length(); | |
1741 } | |
1742 | |
1743 } else { | |
1744 | |
1745 if ( $dest_slice_start > $dest_slice_end | |
1746 && $seq_region_end < 0 ) | |
1747 { | |
1748 # Looking at the region overlapping the chromosome | |
1749 # origin and a feature which is at the beginning of the | |
1750 # chromosome. | |
1751 $seq_region_start += $dest_slice->seq_region_length(); | |
1752 $seq_region_end += $dest_slice->seq_region_length(); | |
1753 } | |
1754 } | |
1755 | |
1756 } ## end if ( $dest_slice->is_circular...) | |
1757 | |
1758 } else { | |
1759 # Negative strand. | |
1760 | |
1761 if ( $dest_slice->is_circular() | |
1762 && $seq_region_start > $seq_region_end ) | |
1763 { | |
1764 # Handle cicular chromosomes. | |
1765 | |
1766 if ( $seq_region_end > $dest_slice_start ) { | |
1767 # Looking at the region in the beginning of the | |
1768 # chromosome. | |
1769 $seq_region_start = $dest_slice_end - $seq_region_end + 1; | |
1770 $seq_region_end = | |
1771 $seq_region_end - | |
1772 $dest_slice->seq_region_length - | |
1773 $dest_slice_start + 1; | |
1774 } else { | |
1775 my $tmp_seq_region_start = $seq_region_start; | |
1776 $seq_region_start = | |
1777 $dest_slice_end - | |
1778 $seq_region_end - | |
1779 $dest_slice->seq_region_length + 1; | |
1780 $seq_region_end = | |
1781 $dest_slice_end - $tmp_seq_region_start + 1; | |
1782 } | |
1783 | |
1784 } else { | |
1785 # Non-circular chromosome. | |
1786 | |
1787 my $tmp_seq_region_start = $seq_region_start; | |
1788 $seq_region_start = $dest_slice_end - $seq_region_end + 1; | |
1789 $seq_region_end = $dest_slice_end - $tmp_seq_region_start + 1; | |
1790 } | |
1791 | |
1792 $seq_region_strand = -$seq_region_strand; | |
1793 | |
1794 } ## end else [ if ( $dest_slice_strand...)] | |
1795 | |
1796 # Throw away features off the end of the requested slice or on | |
1797 # different seq_region. | |
1798 | |
1799 if ( $seq_region_end < 1 | |
1800 || $seq_region_start > $dest_slice_length | |
1801 || ( $dest_slice_sr_id ne $seq_region_id ) ) | |
1802 { | |
1803 next FEATURE; | |
1804 } | |
1805 | |
1806 $slice = $dest_slice; | |
1807 } ## end if ( defined($dest_slice...)) | |
1808 | |
1809 my $display_xref; | |
1810 | |
1811 if ($display_xref_id) { | |
1812 $display_xref = Bio::EnsEMBL::DBEntry->new_fast( { | |
1813 'dbID' => $display_xref_id, | |
1814 'adaptor' => $dbEntryAdaptor, | |
1815 'display_id' => $xref_display_id, | |
1816 'primary_id' => $xref_primary_acc, | |
1817 'version' => $xref_version, | |
1818 'description' => $xref_desc, | |
1819 'release' => $external_release, | |
1820 'dbname' => $external_db, | |
1821 'db_display_name' => $external_db_name, | |
1822 'info_type' => $info_type, | |
1823 'info_text' => $info_text | |
1824 } ); | |
1825 $display_xref->status($external_status); | |
1826 } | |
1827 | |
1828 # Finally, create the new Gene. | |
1829 push( | |
1830 @genes, | |
1831 $self->_create_feature_fast( | |
1832 'Bio::EnsEMBL::Gene', | |
1833 { | |
1834 'analysis' => $analysis, | |
1835 'biotype' => $biotype, | |
1836 'start' => $seq_region_start, | |
1837 'end' => $seq_region_end, | |
1838 'strand' => $seq_region_strand, | |
1839 'adaptor' => $self, | |
1840 'slice' => $slice, | |
1841 'dbID' => $gene_id, | |
1842 'stable_id' => $stable_id, | |
1843 'version' => $version, | |
1844 'created_date' => $created_date || undef, | |
1845 'modified_date' => $modified_date || undef, | |
1846 'description' => $gene_description, | |
1847 'external_name' => undef, # will use display_id | |
1848 # from display_xref | |
1849 'external_db' => $external_db, | |
1850 'external_status' => $external_status, | |
1851 'display_xref' => $display_xref, | |
1852 'status' => $status, | |
1853 'source' => $source, | |
1854 'is_current' => $is_current, | |
1855 'canonical_transcript_id' => $canonical_transcript_id, | |
1856 'canonical_annotation' => $canonical_annotation | |
1857 } ) ); | |
1858 | |
1859 } | |
1860 | |
1861 return \@genes; | |
1862 } | |
1863 | |
1864 | |
1865 =head2 cache_gene_seq_mappings | |
1866 | |
1867 Example : $gene_adaptor->cache_gene_seq_mappings(); | |
1868 Description: caches all the assembly mappings needed for genes | |
1869 Returntype : None | |
1870 Exceptions : None | |
1871 Caller : general | |
1872 Status : At Risk | |
1873 : New experimental code | |
1874 | |
1875 =cut | |
1876 | |
1877 sub cache_gene_seq_mappings { | |
1878 my ($self) = @_; | |
1879 | |
1880 # get the sequence level to map too | |
1881 | |
1882 my $sql = | |
1883 'SELECT name ' | |
1884 . 'FROM coord_system ' | |
1885 . 'WHERE attrib like "%%sequence_level%%"' | |
1886 . 'AND species_id = ?'; | |
1887 | |
1888 my $sth = $self->prepare($sql); | |
1889 $sth->bind_param( 1, $self->species_id(), SQL_INTEGER ); | |
1890 $sth->execute(); | |
1891 | |
1892 my $sequence_level = $sth->fetchrow_array(); | |
1893 | |
1894 $sth->finish(); | |
1895 | |
1896 my $csa = $self->db->get_CoordSystemAdaptor(); | |
1897 my $ama = $self->db->get_AssemblyMapperAdaptor(); | |
1898 | |
1899 my $cs1 = $csa->fetch_by_name($sequence_level); | |
1900 | |
1901 # get level to map to two | |
1902 | |
1903 my $mcc = $self->db->get_MetaCoordContainerAdaptor(); | |
1904 my $csnew = $mcc->fetch_all_CoordSystems_by_feature_type('gene'); | |
1905 | |
1906 foreach my $cs2 (@$csnew) { | |
1907 my $am = $ama->fetch_by_CoordSystems( $cs1, $cs2 ); | |
1908 $am->register_all(); | |
1909 } | |
1910 | |
1911 } ## end sub cache_gene_seq_mappings | |
1912 | |
1913 | |
1914 =head2 fetch_all_by_exon_supporting_evidence | |
1915 | |
1916 Arg [1] : String $hit_name | |
1917 Name of supporting feature | |
1918 Arg [2] : String $feature_type | |
1919 one of "dna_align_feature" or "protein_align_feature" | |
1920 Arg [3] : (optional) Bio::Ensembl::Analysis | |
1921 Example : $genes = $gene_adaptor->fetch_all_by_exon_supporting_evidence( | |
1922 'XYZ', 'dna_align_feature'); | |
1923 Description: Gets all the genes with transcripts with exons which have a | |
1924 specified hit on a particular type of feature. Optionally filter | |
1925 by analysis. | |
1926 Returntype : Listref of Bio::EnsEMBL::Gene | |
1927 Exceptions : If feature_type is not of correct type. | |
1928 Caller : general | |
1929 Status : Stable | |
1930 | |
1931 =cut | |
1932 | |
1933 sub fetch_all_by_exon_supporting_evidence { | |
1934 my ($self, $hit_name, $feature_type, $analysis) = @_; | |
1935 | |
1936 if ($feature_type !~ /(dna)|(protein)_align_feature/) { | |
1937 throw("feature type must be dna_align_feature or protein_align_feature"); | |
1938 } | |
1939 | |
1940 my $anal_from = ", analysis a " if ($analysis); | |
1941 my $anal_where = "AND a.analysis_id = f.analysis_id AND a.analysis_id=? " if ($analysis); | |
1942 | |
1943 my $sql = qq( | |
1944 SELECT DISTINCT(g.gene_id) | |
1945 FROM gene g, | |
1946 transcript t, | |
1947 exon_transcript et, | |
1948 supporting_feature sf, | |
1949 $feature_type f | |
1950 $anal_from | |
1951 WHERE g.gene_id = t.gene_id | |
1952 AND g.is_current = 1 | |
1953 AND t.transcript_id = et.transcript_id | |
1954 AND et.exon_id = sf.exon_id | |
1955 AND sf.feature_id = f.${feature_type}_id | |
1956 AND sf.feature_type = ? | |
1957 AND f.hit_name=? | |
1958 $anal_where | |
1959 ); | |
1960 | |
1961 my $sth = $self->prepare($sql); | |
1962 | |
1963 $sth->bind_param(1, $feature_type, SQL_VARCHAR); | |
1964 $sth->bind_param(2, $hit_name, SQL_VARCHAR); | |
1965 $sth->bind_param(3, $analysis->dbID(), SQL_INTEGER) if ($analysis); | |
1966 | |
1967 $sth->execute(); | |
1968 | |
1969 my @genes; | |
1970 | |
1971 while ( my $id = $sth->fetchrow_array ) { | |
1972 my $gene = $self->fetch_by_dbID($id); | |
1973 push(@genes, $gene) if $gene; | |
1974 } | |
1975 | |
1976 return \@genes; | |
1977 } | |
1978 | |
1979 | |
1980 =head2 fetch_all_by_transcript_supporting_evidence | |
1981 | |
1982 Arg [1] : String $hit_name | |
1983 Name of supporting feature | |
1984 Arg [2] : String $feature_type | |
1985 one of "dna_align_feature" or "protein_align_feature" | |
1986 Arg [3] : (optional) Bio::Ensembl::Analysis | |
1987 Example : $genes = $gene_adaptor->fetch_all_by_transcript_supporting_evidence('XYZ', 'dna_align_feature'); | |
1988 Description: Gets all the genes with transcripts with evidence for a | |
1989 specified hit on a particular type of feature. Optionally filter | |
1990 by analysis. | |
1991 Returntype : Listref of Bio::EnsEMBL::Gene. | |
1992 Exceptions : If feature_type is not of correct type. | |
1993 Caller : general | |
1994 Status : Stable | |
1995 | |
1996 =cut | |
1997 | |
1998 sub fetch_all_by_transcript_supporting_evidence { | |
1999 my ($self, $hit_name, $feature_type, $analysis) = @_; | |
2000 | |
2001 if($feature_type !~ /(dna)|(protein)_align_feature/) { | |
2002 throw("feature type must be dna_align_feature or protein_align_feature"); | |
2003 } | |
2004 | |
2005 my $anal_from = ", analysis a " if ($analysis); | |
2006 my $anal_where = "AND a.analysis_id = f.analysis_id AND a.analysis_id=? " if ($analysis); | |
2007 | |
2008 my $sql = qq( | |
2009 SELECT DISTINCT(g.gene_id) | |
2010 FROM gene g, | |
2011 transcript t, | |
2012 transcript_supporting_feature sf, | |
2013 $feature_type f | |
2014 $anal_from | |
2015 WHERE g.gene_id = t.gene_id | |
2016 AND g.is_current = 1 | |
2017 AND t.transcript_id = sf.transcript_id | |
2018 AND sf.feature_id = f.${feature_type}_id | |
2019 AND sf.feature_type = ? | |
2020 AND f.hit_name=? | |
2021 $anal_where | |
2022 ); | |
2023 | |
2024 my $sth = $self->prepare($sql); | |
2025 | |
2026 $sth->bind_param(1, $feature_type, SQL_VARCHAR); | |
2027 $sth->bind_param(2, $hit_name, SQL_VARCHAR); | |
2028 $sth->bind_param(3, $analysis->dbID(), SQL_INTEGER) if ($analysis); | |
2029 | |
2030 $sth->execute(); | |
2031 | |
2032 my @genes; | |
2033 | |
2034 while( my $id = $sth->fetchrow_array ) { | |
2035 my $gene = $self->fetch_by_dbID($id); | |
2036 push(@genes, $gene) if $gene; | |
2037 } | |
2038 | |
2039 return \@genes; | |
2040 } | |
2041 | |
2042 =head2 fetch_nearest_Gene_by_Feature | |
2043 | |
2044 Arg [1] : Feature object | |
2045 Example : $genes = $gene_adaptor->fetch_nearest_Gene_by_Feature($feat); | |
2046 Description: Gets the nearest gene to the feature | |
2047 Returntype : Listref of Bio::EnsEMBL::Gene, EMPTY list if no nearest | |
2048 Caller : general | |
2049 Status : UnStable | |
2050 | |
2051 =cut | |
2052 | |
2053 sub fetch_nearest_Gene_by_Feature{ | |
2054 my $self = shift; | |
2055 my $feat = shift; | |
2056 | |
2057 my $stranded = shift; | |
2058 my $stream = shift; # 1 up stream -1 downstream | |
2059 my @genes; | |
2060 | |
2061 | |
2062 my $strand = $feat->strand; | |
2063 if(defined($stream) and !$strand){ | |
2064 warn("stream specified but feature has no strand so +ve strand will be used"); | |
2065 $strand = 1; | |
2066 } | |
2067 my $min_dist = 999; | |
2068 my $gene_id = 0; | |
2069 | |
2070 my $overlapping = $feat->get_overlapping_Genes(); | |
2071 | |
2072 return $overlapping if(defined(@{$overlapping}[0])); | |
2073 | |
2074 my $seq_region_id = $feat->slice->adaptor->get_seq_region_id($feat->slice); | |
2075 my $start = ($feat->start + $feat->slice->start) -1; | |
2076 my $end = ($feat->end + $feat->slice->start) -1; | |
2077 | |
2078 | |
2079 my @gene_ids; | |
2080 if(!defined($stream) or $stream == 0){ | |
2081 | |
2082 my $sql1 = "select g.gene_id, (? - g.seq_region_end) as 'dist' from gene g where "; | |
2083 if($stranded){ | |
2084 $sql1 .= "g.seq_region_strand = ".$strand." and "; | |
2085 } | |
2086 $sql1 .= "seq_region_id = ? and g.seq_region_end < ? order by dist limit 10"; | |
2087 | |
2088 # | |
2089 # MAYBE set the result of prepare to be static in case lots of calls. | |
2090 # | |
2091 my $sql1_sth = $self->prepare($sql1) || die "Could not prepare $sql1"; | |
2092 $sql1_sth->execute($start, $seq_region_id, $start) || die "Could not execute sql"; | |
2093 $sql1_sth->bind_columns(\$gene_id, \$min_dist) || die "Could mot bin columns"; | |
2094 | |
2095 my $last_dist = 99999999999999999; | |
2096 while($sql1_sth->fetch()){ | |
2097 if($min_dist <= $last_dist){ | |
2098 push @gene_ids, $gene_id; | |
2099 $last_dist = $min_dist; | |
2100 } | |
2101 } | |
2102 $sql1_sth->finish(); | |
2103 | |
2104 | |
2105 | |
2106 my $sql2 = "select g.gene_id, (g.seq_region_start - ?) as 'dist' from gene g where "; | |
2107 if($stranded){ | |
2108 $sql2 .= "g.seq_region_strand = ".$feat->strand." and "; | |
2109 } | |
2110 $sql2 .= "seq_region_id = ? and g.seq_region_start > ? order by dist limit 10"; | |
2111 | |
2112 my $sql2_sth = $self->prepare($sql2) || die "could not prepare $sql2"; | |
2113 | |
2114 my ($tmp_min_dist, $tmp_gene_id); | |
2115 $sql2_sth->execute($end, $seq_region_id, $end) || die "Could not execute sql"; | |
2116 $sql2_sth->bind_columns(\$tmp_gene_id, \$tmp_min_dist) || die "Could mot bin columns"; | |
2117 my $first =1; | |
2118 while($sql2_sth->fetch()){ | |
2119 if( $tmp_min_dist <= $last_dist){ | |
2120 if($first){ | |
2121 $first = 0; | |
2122 if($tmp_min_dist < $last_dist){ | |
2123 @gene_ids = (); #reset | |
2124 } | |
2125 } | |
2126 push @gene_ids, $tmp_gene_id; | |
2127 $last_dist = $tmp_min_dist; | |
2128 } | |
2129 } | |
2130 $sql2_sth->finish(); | |
2131 | |
2132 | |
2133 } | |
2134 elsif(($stream*$strand) == 1){ | |
2135 my $sql1 = "select g.gene_id, (? - g.seq_region_end) as 'dist' from gene g where "; | |
2136 if($stranded){ | |
2137 $sql1 .= "g.seq_region_strand = ".$strand." and "; | |
2138 } | |
2139 $sql1 .= "seq_region_id = ? and g.seq_region_end < ? order by dist limit 10"; | |
2140 | |
2141 # | |
2142 # MAYBE set the result of prepare to be static in case lots of calls. | |
2143 # | |
2144 my $sql1_sth = $self->prepare($sql1) || die "Could not prepare $sql1"; | |
2145 $sql1_sth->execute($start, $seq_region_id, $start) || die "Could not execute sql"; | |
2146 $sql1_sth->bind_columns(\$gene_id, \$min_dist) || die "Could mot bin columns"; | |
2147 | |
2148 my $last_dist; | |
2149 my $first = 1; | |
2150 while($sql1_sth->fetch()){ | |
2151 if($first){ | |
2152 $first = 0; | |
2153 } | |
2154 else{ | |
2155 next if ($min_dist > $last_dist); | |
2156 } | |
2157 push @gene_ids, $gene_id; | |
2158 $last_dist = $min_dist; | |
2159 } | |
2160 $sql1_sth->finish(); | |
2161 } | |
2162 elsif(($stream * $strand) == -1){ | |
2163 | |
2164 my $sql2 = "select g.gene_id, (g.seq_region_start - ?) as 'dist' from gene g where "; | |
2165 if($stranded){ | |
2166 $sql2 .= "g.seq_region_strand = ".$feat->strand." and "; | |
2167 } | |
2168 $sql2 .= "seq_region_id = ? and g.seq_region_start > ? order by dist limit 10"; | |
2169 | |
2170 my $sql2_sth = $self->prepare($sql2) || die "could not prepare $sql2"; | |
2171 | |
2172 my ($tmp_min_dist, $tmp_gene_id); | |
2173 $sql2_sth->execute($end, $seq_region_id, $end) || die "Could not execute sql"; | |
2174 $sql2_sth->bind_columns(\$tmp_gene_id, \$tmp_min_dist) || die "Could mot bin columns"; | |
2175 my $first =1; | |
2176 my $last_dist; | |
2177 while($sql2_sth->fetch()){ | |
2178 if($first){ | |
2179 $first = 0; | |
2180 } | |
2181 else{ | |
2182 next if ($tmp_min_dist > $last_dist); | |
2183 } | |
2184 push @gene_ids, $tmp_gene_id; | |
2185 $last_dist = $tmp_min_dist; | |
2186 } | |
2187 $sql2_sth->finish(); | |
2188 } | |
2189 else{ | |
2190 die "Invalid stream or strand must be -1, 0 or 1\n"; | |
2191 } | |
2192 | |
2193 | |
2194 | |
2195 foreach my $gene_id (@gene_ids){ | |
2196 push @genes, $self->fetch_by_dbID($gene_id); | |
2197 } | |
2198 return \@genes; | |
2199 | |
2200 } | |
2201 | |
2202 ########################## | |
2203 # # | |
2204 # DEPRECATED METHODS # | |
2205 # # | |
2206 ########################## | |
2207 | |
2208 | |
2209 =head2 fetch_by_maximum_DBLink | |
2210 | |
2211 DEPRECATED - use fetch_all_by_external_name instead | |
2212 | |
2213 =cut | |
2214 | |
2215 sub fetch_by_maximum_DBLink { | |
2216 my ($self, $external_id) = @_; | |
2217 | |
2218 deprecate( "use fetch_all_by_external_name instead" ); | |
2219 | |
2220 my $genes=$self->fetch_all_by_external_name($external_id); | |
2221 | |
2222 my $biggest; | |
2223 my $max = 0; | |
2224 my $size = scalar(@$genes); | |
2225 if ($size > 0) { | |
2226 foreach my $gene (@$genes) { | |
2227 my $size = scalar(@{$gene->get_all_Exons}); | |
2228 if ($size > $max) { | |
2229 $biggest = $gene; | |
2230 $max = $size; | |
2231 } | |
2232 } | |
2233 return $biggest; | |
2234 } | |
2235 return; | |
2236 } | |
2237 | |
2238 | |
2239 =head2 get_display_xref | |
2240 | |
2241 DEPRECATED use $gene->display_xref | |
2242 | |
2243 =cut | |
2244 | |
2245 sub get_display_xref { | |
2246 my ($self, $gene) = @_; | |
2247 | |
2248 deprecate( "display xref should retrieved from Gene object directly" ); | |
2249 | |
2250 if ( !defined $gene ) { | |
2251 throw("Must call with a Gene object"); | |
2252 } | |
2253 | |
2254 my $sth = $self->prepare(qq( | |
2255 SELECT e.db_name, | |
2256 x.display_label, | |
2257 x.xref_id | |
2258 FROM gene g, | |
2259 xref x, | |
2260 external_db e | |
2261 WHERE g.gene_id = ? | |
2262 AND g.display_xref_id = x.xref_id | |
2263 AND x.external_db_id = e.external_db_id | |
2264 )); | |
2265 | |
2266 $sth->bind_param(1, $gene->dbID, SQL_INTEGER); | |
2267 $sth->execute(); | |
2268 | |
2269 my ($db_name, $display_label, $xref_id) = $sth->fetchrow_array(); | |
2270 if ( !defined $xref_id ) { | |
2271 return undef; | |
2272 } | |
2273 | |
2274 my $db_entry = Bio::EnsEMBL::DBEntry->new( | |
2275 -dbid => $xref_id, | |
2276 -adaptor => $self->db->get_DBEntryAdaptor(), | |
2277 -dbname => $db_name, | |
2278 -display_id => $display_label | |
2279 ); | |
2280 | |
2281 return $db_entry; | |
2282 } | |
2283 | |
2284 | |
2285 =head2 get_description | |
2286 | |
2287 DEPRECATED, use gene->get_description | |
2288 | |
2289 =cut | |
2290 | |
2291 sub get_description { | |
2292 my ($self, $dbID) = @_; | |
2293 | |
2294 deprecate( "Gene description should be loaded on gene retrieval. Use gene->get_description()" ); | |
2295 | |
2296 if ( !defined $dbID ) { | |
2297 throw("must call with dbID"); | |
2298 } | |
2299 | |
2300 my $sth = $self->prepare("SELECT description | |
2301 FROM gene_description | |
2302 WHERE gene_id = ?"); | |
2303 | |
2304 $sth->bind_param(1, $dbID, SQL_INTEGER); | |
2305 $sth->execute(); | |
2306 | |
2307 my @array = $sth->fetchrow_array(); | |
2308 return $array[0]; | |
2309 } | |
2310 | |
2311 | |
2312 =head2 fetch_by_Peptide_id | |
2313 | |
2314 DEPRECATED, use fetch_by_translation_stable_id() | |
2315 | |
2316 =cut | |
2317 | |
2318 sub fetch_by_Peptide_id { | |
2319 my ( $self, $translation_stable_id) = @_; | |
2320 | |
2321 deprecate( "Please use better named fetch_by_translation_stable_id \n". | |
2322 caller(2) ); | |
2323 | |
2324 $self->fetch_by_translation_stable_id($translation_stable_id); | |
2325 } | |
2326 | |
2327 | |
2328 =head2 get_stable_entry_info | |
2329 | |
2330 DEPRECATED use $gene->stable_id instead | |
2331 | |
2332 =cut | |
2333 | |
2334 sub get_stable_entry_info { | |
2335 my ($self,$gene) = @_; | |
2336 | |
2337 deprecated("stable id info is loaded on default, no lazy loading necessary"); | |
2338 | |
2339 if ( !defined $gene || !ref $gene || !$gene->isa('Bio::EnsEMBL::Gene') ) { | |
2340 throw("Needs a gene object, not a $gene"); | |
2341 } | |
2342 | |
2343 my $created_date = $self->db->dbc->from_date_to_seconds("created_date"); | |
2344 my $modified_date = $self->db->dbc->from_date_to_seconds("modified_date"); | |
2345 | |
2346 my $sth = | |
2347 $self->prepare( "SELECT stable_id, " | |
2348 . $created_date . "," | |
2349 . $modified_date | |
2350 . ", version FROM gene WHERE gene_id = ?" ); | |
2351 | |
2352 $sth->bind_param(1, $gene->dbID, SQL_INTEGER); | |
2353 $sth->execute(); | |
2354 | |
2355 my @array = $sth->fetchrow_array(); | |
2356 $gene->{'stable_id'} = $array[0]; | |
2357 $gene->{'created'} = $array[1]; | |
2358 $gene->{'modified'} = $array[2]; | |
2359 $gene->{'version'} = $array[3]; | |
2360 | |
2361 return 1; | |
2362 } | |
2363 | |
2364 | |
2365 =head2 fetch_all_by_DBEntry | |
2366 | |
2367 DEPRECATED - Use fetch_all_by_external_name instead | |
2368 | |
2369 =cut | |
2370 | |
2371 sub fetch_all_by_DBEntry { | |
2372 my $self = shift; | |
2373 | |
2374 deprecate('Use fetch_all_by_external_name instead.'); | |
2375 | |
2376 return $self->fetch_all_by_external_name(@_); | |
2377 } | |
2378 | |
2379 | |
2380 1; | |
2381 | |
2382 |