0
|
1 =head1 LICENSE
|
|
2
|
|
3 Copyright (c) 1999-2012 The European Bioinformatics Institute and
|
|
4 Genome Research Limited. All rights reserved.
|
|
5
|
|
6 This software is distributed under a modified Apache license.
|
|
7 For license details, please see
|
|
8
|
|
9 http://www.ensembl.org/info/about/code_licence.html
|
|
10
|
|
11 =head1 CONTACT
|
|
12
|
|
13 Please email comments or questions to the public Ensembl
|
|
14 developers list at <dev@ensembl.org>.
|
|
15
|
|
16 Questions may also be sent to the Ensembl help desk at
|
|
17 <helpdesk@ensembl.org>.
|
|
18
|
|
19 =cut
|
|
20
|
|
21 =head1 NAME
|
|
22
|
|
23 Bio::EnsEMBL::IdMapping::Cache - a cache to hold data objects used by the
|
|
24 IdMapping application
|
|
25
|
|
26 =head1 DESCRIPTION
|
|
27
|
|
28 =head1 METHODS
|
|
29
|
|
30 =cut
|
|
31
|
|
32
|
|
33 package Bio::EnsEMBL::IdMapping::Cache;
|
|
34
|
|
35 use strict;
|
|
36 use warnings;
|
|
37 no warnings 'uninitialized';
|
|
38
|
|
39 use Bio::EnsEMBL::Utils::Argument qw(rearrange);
|
|
40 use Bio::EnsEMBL::Utils::Exception qw(throw warning);
|
|
41 use Bio::EnsEMBL::Utils::ScriptUtils qw(parse_bytes path_append);
|
|
42 use Bio::EnsEMBL::Utils::Scalar qw(assert_ref);
|
|
43 use Bio::EnsEMBL::IdMapping::TinyGene;
|
|
44 use Bio::EnsEMBL::IdMapping::TinyTranscript;
|
|
45 use Bio::EnsEMBL::IdMapping::TinyTranslation;
|
|
46 use Bio::EnsEMBL::IdMapping::TinyExon;
|
|
47 use Bio::EnsEMBL::DBSQL::DBAdaptor;
|
|
48 use Storable qw(nstore retrieve);
|
|
49 use Digest::MD5 qw(md5_hex);
|
|
50
|
|
51 # define available cache names here
|
|
52 my @cache_names = qw(
|
|
53 exons_by_id
|
|
54 transcripts_by_id
|
|
55 transcripts_by_exon_id
|
|
56 translations_by_id
|
|
57 genes_by_id
|
|
58 genes_by_transcript_id
|
|
59 );
|
|
60
|
|
61
|
|
62 =head2 new
|
|
63
|
|
64 Arg [LOGGER]: Bio::EnsEMBL::Utils::Logger $logger - a logger object
|
|
65 Arg [CONF] : Bio::EnsEMBL::Utils::ConfParser $conf - a configuration object
|
|
66 Example : my $cache = Bio::EnsEMBL::IdMapping::Cache->new(
|
|
67 -LOGGER => $logger,
|
|
68 -CONF => $conf,
|
|
69 );
|
|
70 Description : constructor
|
|
71 Return type : Bio::EnsEMBL::IdMapping::Cache object
|
|
72 Exceptions : thrown on wrong or missing arguments
|
|
73 Caller : general
|
|
74 Status : At Risk
|
|
75 : under development
|
|
76
|
|
77 =cut
|
|
78
|
|
79 sub new {
|
|
80 my $caller = shift;
|
|
81 my $class = ref($caller) || $caller;
|
|
82
|
|
83 my ($logger, $conf, $load_instance) =
|
|
84 rearrange(['LOGGER', 'CONF', 'LOAD_INSTANCE'], @_);
|
|
85
|
|
86 unless ($logger->isa('Bio::EnsEMBL::Utils::Logger')) {
|
|
87 throw("You must provide a Bio::EnsEMBL::Utils::Logger for logging.");
|
|
88 }
|
|
89
|
|
90 unless ($conf->isa('Bio::EnsEMBL::Utils::ConfParser')) {
|
|
91 throw("You must provide configuration as a Bio::EnsEMBL::Utils::ConfParser object.");
|
|
92 }
|
|
93
|
|
94 my $self = {};
|
|
95 bless ($self, $class);
|
|
96
|
|
97 # initialise
|
|
98 $self->logger($logger);
|
|
99 $self->conf($conf);
|
|
100
|
|
101 if ($load_instance) {
|
|
102 $self->read_instance_from_file;
|
|
103 }
|
|
104
|
|
105 return $self;
|
|
106 }
|
|
107
|
|
108
|
|
109 =head2 build_cache_by_slice
|
|
110
|
|
111 Arg[1] : String $dbtype - db type (source|target)
|
|
112 Arg[2] : String $slice_name - the name of a slice (format as returned by
|
|
113 Bio::EnsEMBL::Slice->name)
|
|
114 Example : my ($num_genes, $filesize) = $cache->build_cache_by_slice(
|
|
115 'source', 'chromosome:NCBI36:X:1:1000000:-1');
|
|
116 Description : Builds a cache of genes, transcripts, translations and exons
|
|
117 needed by the IdMapping application and serialises the resulting
|
|
118 cache object to a file, one slice at a time.
|
|
119 Return type : list of the number of genes processed and the size of the
|
|
120 serialised cache file
|
|
121 Exceptions : thrown on invalid slice name
|
|
122 Caller : general
|
|
123 Status : At Risk
|
|
124 : under development
|
|
125
|
|
126 =cut
|
|
127
|
|
128 sub build_cache_by_slice {
|
|
129 my $self = shift;
|
|
130 my $dbtype = shift;
|
|
131 my $slice_name = shift;
|
|
132
|
|
133 # set cache method (required for loading cache later)
|
|
134 $self->cache_method('BY_SEQ_REGION');
|
|
135
|
|
136 my $dba = $self->get_DBAdaptor($dbtype);
|
|
137 my $sa = $dba->get_SliceAdaptor;
|
|
138
|
|
139 my $slice = $sa->fetch_by_name($slice_name);
|
|
140 unless ($slice) {
|
|
141 throw("Could not retrieve slice $slice_name.");
|
|
142 }
|
|
143
|
|
144 my $genes = $slice->get_all_Genes( undef, undef, 1 );
|
|
145
|
|
146 # find common coord_system
|
|
147 my $common_cs_found = $self->find_common_coord_systems;
|
|
148
|
|
149 # find out whether native coord_system is a common coord_system.
|
|
150 # if so, you don't need to project.
|
|
151 # also don't project if no common coord_system present
|
|
152 my $need_project = 1;
|
|
153
|
|
154 my $csid = join( ':',
|
|
155 $slice->coord_system_name,
|
|
156 $slice->coord_system->version );
|
|
157
|
|
158 if ( $self->is_common_cs($csid) or !$self->highest_common_cs ) {
|
|
159 $need_project = 0;
|
|
160 }
|
|
161
|
|
162 # build cache
|
|
163 my $type = "$dbtype.$slice_name";
|
|
164 my $num_genes =
|
|
165 $self->build_cache_from_genes( $type, $genes, $need_project );
|
|
166 undef $genes;
|
|
167
|
|
168 # write cache to file, then flush cache to reclaim memory
|
|
169 my $size = $self->write_all_to_file($type);
|
|
170
|
|
171 return $num_genes, $size;
|
|
172 } ## end sub build_cache_by_slice
|
|
173
|
|
174
|
|
175 =head2 build_cache_all
|
|
176
|
|
177 Arg[1] : String $dbtype - db type (source|target)
|
|
178 Example : my ($num_genes, $filesize) = $cache->build_cache_all('source');
|
|
179 Description : Builds a cache of genes, transcripts, translations and exons
|
|
180 needed by the IdMapping application and serialises the
|
|
181 resulting cache object to a file. All genes across the genome
|
|
182 are processed in one go. This method should be used when
|
|
183 build_cache_by_seq_region can't be used due to a large number
|
|
184 of toplevel seq_regions (e.g. 2x genomes).
|
|
185 Return type : list of the number of genes processed and the size of the
|
|
186 serialised cache file
|
|
187 Exceptions : thrown on invalid slice name
|
|
188 Caller : general
|
|
189 Status : At Risk
|
|
190 : under development
|
|
191
|
|
192 =cut
|
|
193
|
|
194 sub build_cache_all {
|
|
195 my $self = shift;
|
|
196 my $dbtype = shift;
|
|
197
|
|
198 # set cache method (required for loading cache later)
|
|
199 $self->cache_method('ALL');
|
|
200
|
|
201 my $dba = $self->get_DBAdaptor($dbtype);
|
|
202 my $ga = $dba->get_GeneAdaptor;
|
|
203
|
|
204 my $genes = $ga->fetch_all;
|
|
205
|
|
206 # find common coord_system
|
|
207 my $common_cs_found = $self->find_common_coord_systems;
|
|
208
|
|
209 # Build cache. Setting $need_project to 'CHECK' will cause
|
|
210 # build_cache_from_genes() to check the coordinate system for each
|
|
211 # gene.
|
|
212 my $type = "$dbtype.ALL";
|
|
213 my $need_project = 'CHECK';
|
|
214 my $num_genes =
|
|
215 $self->build_cache_from_genes( $type, $genes, $need_project );
|
|
216
|
|
217 undef $genes;
|
|
218
|
|
219 # write cache to file, then flush cache to reclaim memory
|
|
220 my $size = $self->write_all_to_file($type);
|
|
221
|
|
222 return $num_genes, $size;
|
|
223 }
|
|
224
|
|
225
|
|
226 =head2 build_cache_from_genes
|
|
227
|
|
228 Arg[1] : String $type - cache type
|
|
229 Arg[2] : Listref of Bio::EnsEMBL::Genes $genes - genes to build cache
|
|
230 from
|
|
231 Arg[3] : Boolean $need_project - indicate if we need to project exons to
|
|
232 common coordinate system
|
|
233 Example : $cache->build_cache_from_genes(
|
|
234 'source.chromosome:NCBI36:X:1:100000:1', \@genes);
|
|
235 Description : Builds the cache by fetching transcripts, translations and exons
|
|
236 for a list of genes from the database, and creating lightweight
|
|
237 Bio::EnsEMBL::IdMapping::TinyFeature objects containing only the
|
|
238 data needed by the IdMapping application. These objects are
|
|
239 attached to a name cache in this cache object. Exons only need
|
|
240 to be projected to a commond coordinate system if their native
|
|
241 coordinate system isn't common to source and target assembly
|
|
242 itself.
|
|
243 Return type : int - number of genes after filtering
|
|
244 Exceptions : thrown on wrong or missing arguments
|
|
245 Caller : internal
|
|
246 Status : At Risk
|
|
247 : under development
|
|
248
|
|
249 =cut
|
|
250
|
|
251 sub build_cache_from_genes {
|
|
252 my $self = shift;
|
|
253 my $type = shift;
|
|
254 my $genes = shift;
|
|
255 my $need_project = shift;
|
|
256
|
|
257 throw("You must provide a type.") unless $type;
|
|
258 throw("You must provide a listref of genes.")
|
|
259 unless ( ref($genes) eq 'ARRAY' );
|
|
260
|
|
261 # biotype filter
|
|
262 if ( $self->conf()->param('biotypes') ||
|
|
263 $self->conf()->param('biotypes_include') ||
|
|
264 $self->conf()->param('biotypes_exclude') )
|
|
265 {
|
|
266 $genes = $self->filter_biotypes($genes);
|
|
267 }
|
|
268 my $num_genes = scalar(@$genes);
|
|
269
|
|
270 # initialise cache for the given type.
|
|
271 $self->{'cache'}->{$type} = {};
|
|
272
|
|
273 #my $i = 0;
|
|
274 #my $num_genes = scalar(@$genes);
|
|
275 #my $progress_id = $self->logger->init_progress($num_genes);
|
|
276
|
|
277 # loop over genes sorted by gene location.
|
|
278 # the sort will hopefully improve assembly mapper cache performance and
|
|
279 # therefore speed up exon sequence retrieval
|
|
280 foreach my $gene ( sort { $a->start <=> $b->start } @$genes ) {
|
|
281 #$self->logger->log_progressbar($progress_id, ++$i, 2);
|
|
282 #$self->logger->log_progress($num_genes, ++$i, 20, 3, 1);
|
|
283
|
|
284 if ( $need_project eq 'CHECK' ) {
|
|
285 # find out whether native coord_system is a common coord_system.
|
|
286 # if so, you don't need to project.
|
|
287 # also don't project if no common coord_system present
|
|
288 if ( $self->highest_common_cs ) {
|
|
289 my $csid = join( ':',
|
|
290 $gene->slice->coord_system_name,
|
|
291 $gene->slice->coord_system->version );
|
|
292 if ( $self->is_common_cs($csid) ) {
|
|
293 $need_project = 0;
|
|
294 }
|
|
295 }
|
|
296 else {
|
|
297 $need_project = 0;
|
|
298 }
|
|
299 }
|
|
300
|
|
301 # create lightweigt gene
|
|
302 my $lgene =
|
|
303 Bio::EnsEMBL::IdMapping::TinyGene->new_fast( [
|
|
304 $gene->dbID, $gene->stable_id,
|
|
305 $gene->version, $gene->created_date,
|
|
306 $gene->modified_date, $gene->start,
|
|
307 $gene->end, $gene->strand,
|
|
308 $gene->slice->seq_region_name, $gene->biotype,
|
|
309 $gene->status, $gene->analysis->logic_name,
|
|
310 ( $gene->is_known ? 1 : 0 ), ] );
|
|
311
|
|
312 # build gene caches
|
|
313 $self->add( 'genes_by_id', $type, $gene->dbID, $lgene );
|
|
314
|
|
315 # transcripts
|
|
316 foreach my $tr ( @{ $gene->get_all_Transcripts } ) {
|
|
317 my $ltr =
|
|
318 Bio::EnsEMBL::IdMapping::TinyTranscript->new_fast( [
|
|
319 $tr->dbID, $tr->stable_id,
|
|
320 $tr->version, $tr->created_date,
|
|
321 $tr->modified_date, $tr->start,
|
|
322 $tr->end, $tr->strand,
|
|
323 $tr->length, md5_hex( $tr->spliced_seq ),
|
|
324 ( $tr->is_known ? 1 : 0 ) ] );
|
|
325
|
|
326 $ltr->biotype( $tr->biotype() );
|
|
327 $lgene->add_Transcript($ltr);
|
|
328
|
|
329 # build transcript caches
|
|
330 $self->add( 'transcripts_by_id', $type, $tr->dbID, $ltr );
|
|
331 $self->add( 'genes_by_transcript_id', $type, $tr->dbID, $lgene );
|
|
332
|
|
333 # translation (if there is one)
|
|
334 if ( my $tl = $tr->translation ) {
|
|
335 my $ltl =
|
|
336 Bio::EnsEMBL::IdMapping::TinyTranslation->new_fast( [
|
|
337 $tl->dbID, $tl->stable_id,
|
|
338 $tl->version, $tl->created_date,
|
|
339 $tl->modified_date, $tr->dbID,
|
|
340 $tr->translate->seq, ( $tr->is_known ? 1 : 0 ),
|
|
341 ] );
|
|
342
|
|
343 $ltr->add_Translation($ltl);
|
|
344
|
|
345 $self->add( 'translations_by_id', $type, $tl->dbID, $ltl );
|
|
346
|
|
347 undef $tl;
|
|
348 }
|
|
349
|
|
350 # exons
|
|
351 foreach my $exon ( @{ $tr->get_all_Exons } ) {
|
|
352 my $lexon =
|
|
353 Bio::EnsEMBL::IdMapping::TinyExon->new_fast( [
|
|
354 $exon->dbID,
|
|
355 $exon->stable_id,
|
|
356 $exon->version,
|
|
357 $exon->created_date,
|
|
358 $exon->modified_date,
|
|
359 $exon->start,
|
|
360 $exon->end,
|
|
361 $exon->strand,
|
|
362 $exon->slice->seq_region_name,
|
|
363 $exon->slice->coord_system_name,
|
|
364 $exon->slice->coord_system->version,
|
|
365 $exon->slice->subseq( $exon->start, $exon->end,
|
|
366 $exon->strand ),
|
|
367 $exon->phase,
|
|
368 $need_project, ] );
|
|
369
|
|
370 # get coordinates in common coordinate system if needed
|
|
371 if ($need_project) {
|
|
372 my @seg = @{
|
|
373 $exon->project( $self->highest_common_cs,
|
|
374 $self->highest_common_cs_version ) };
|
|
375
|
|
376 if ( scalar(@seg) == 1 ) {
|
|
377 my $sl = $seg[0]->to_Slice;
|
|
378 $lexon->common_start( $sl->start );
|
|
379 $lexon->common_end( $sl->end );
|
|
380 $lexon->common_strand( $sl->strand );
|
|
381 $lexon->common_sr_name( $sl->seq_region_name );
|
|
382 }
|
|
383 }
|
|
384
|
|
385 $ltr->add_Exon($lexon);
|
|
386
|
|
387 $self->add( 'exons_by_id', $type, $exon->dbID, $lexon );
|
|
388 $self->add_list( 'transcripts_by_exon_id',
|
|
389 $type, $exon->dbID, $ltr );
|
|
390
|
|
391 undef $exon;
|
|
392 } ## end foreach my $exon ( @{ $tr->get_all_Exons...})
|
|
393
|
|
394 undef $tr;
|
|
395 } ## end foreach my $tr ( @{ $gene->get_all_Transcripts...})
|
|
396
|
|
397 undef $gene;
|
|
398 } ## end foreach my $gene ( sort { $a...})
|
|
399
|
|
400 return $num_genes;
|
|
401 } ## end sub build_cache_from_genes
|
|
402
|
|
403
|
|
404 =head2 filter_biotypes
|
|
405
|
|
406 Arg[1] : Listref of Bio::EnsEMBL::Genes $genes - the genes to filter
|
|
407 Example : my @filtered = @{ $cache->filter_biotypes(\@genes) };
|
|
408
|
|
409 Description : Filters a list of genes by biotype. Biotypes are
|
|
410 taken from the IdMapping configuration parameter
|
|
411 'biotypes_include' or 'biotypes_exclude'.
|
|
412
|
|
413 If the configuration parameter 'biotypes_exclude' is
|
|
414 defined, then rather than returning the genes whose
|
|
415 biotype is listed in the configuration parameter
|
|
416 'biotypes_include' the method will return the genes
|
|
417 whose biotype is *not* listed in the 'biotypes_exclude'
|
|
418 configuration parameter.
|
|
419
|
|
420 It is an error to define both these configuration
|
|
421 parameters.
|
|
422
|
|
423 The old parameter 'biotypes' is equivalent to
|
|
424 'biotypes_include'.
|
|
425
|
|
426 Return type : Listref of Bio::EnsEMBL::Genes (or empty list)
|
|
427 Exceptions : none
|
|
428 Caller : internal
|
|
429 Status : At Risk
|
|
430 : under development
|
|
431
|
|
432 =cut
|
|
433
|
|
434 sub filter_biotypes {
|
|
435 my ( $self, $genes ) = @_;
|
|
436
|
|
437 my @filtered;
|
|
438 my @biotypes;
|
|
439 my $opt_reverse;
|
|
440
|
|
441 if ( defined( $self->conf()->param('biotypes_include') ) ||
|
|
442 defined( $self->conf()->param('biotypes') ) )
|
|
443 {
|
|
444 if ( defined( $self->conf()->param('biotypes_exclude') ) ) {
|
|
445 $self->logger()
|
|
446 ->error( "You may not use both " .
|
|
447 "'biotypes_include' and 'biotypes_exclude' " .
|
|
448 "in the configuration" );
|
|
449 }
|
|
450
|
|
451 if ( defined( $self->conf()->param('biotypes_include') ) ) {
|
|
452 @biotypes = $self->conf()->param('biotypes_include');
|
|
453 }
|
|
454 else {
|
|
455 @biotypes = $self->conf()->param('biotypes');
|
|
456 }
|
|
457 $opt_reverse = 0;
|
|
458 }
|
|
459 else {
|
|
460 @biotypes = $self->conf()->param('biotypes_exclude');
|
|
461 $opt_reverse = 1;
|
|
462 }
|
|
463
|
|
464 foreach my $gene ( @{$genes} ) {
|
|
465 my $keep_gene;
|
|
466
|
|
467 foreach my $biotype (@biotypes) {
|
|
468 if ( $gene->biotype() eq $biotype ) {
|
|
469 if ($opt_reverse) { $keep_gene = 0 }
|
|
470 else { $keep_gene = 1 }
|
|
471 last;
|
|
472 }
|
|
473 }
|
|
474
|
|
475 if ( defined($keep_gene) ) {
|
|
476 if ($keep_gene) {
|
|
477 push( @filtered, $gene );
|
|
478 }
|
|
479 }
|
|
480 elsif ($opt_reverse) {
|
|
481 push( @filtered, $gene );
|
|
482 }
|
|
483 }
|
|
484
|
|
485 return \@filtered;
|
|
486 } ## end sub filter_biotypes
|
|
487
|
|
488
|
|
489 =head2 add
|
|
490
|
|
491 Arg[1] : String $name - a cache name (e.g. 'genes_by_id')
|
|
492 Arg[2] : String type - a cache type (e.g. "source.$slice_name")
|
|
493 Arg[3] : String $key - key of this entry (e.g. a gene dbID)
|
|
494 Arg[4] : Bio::EnsEMBL::IdMappping::TinyFeature $val - value to cache
|
|
495 Example : $cache->add('genes_by_id',
|
|
496 'source.chromosome:NCBI36:X:1:1000000:1', '1234', $tiny_gene);
|
|
497 Description : Adds a TinyFeature object to a named cache.
|
|
498 Return type : Bio::EnsEMBL::IdMapping::TinyFeature
|
|
499 Exceptions : thrown on wrong or missing arguments
|
|
500 Caller : internal
|
|
501 Status : At Risk
|
|
502 : under development
|
|
503
|
|
504 =cut
|
|
505
|
|
506 sub add {
|
|
507 my $self = shift;
|
|
508 my $name = shift;
|
|
509 my $type = shift;
|
|
510 my $key = shift;
|
|
511 my $val = shift;
|
|
512
|
|
513 throw("You must provide a cache name (e.g. genes_by_id.") unless $name;
|
|
514 throw("You must provide a cache type.") unless $type;
|
|
515 throw("You must provide a cache key (e.g. a gene dbID).") unless $key;
|
|
516
|
|
517 $self->{'cache'}->{$type}->{$name}->{$key} = $val;
|
|
518
|
|
519 return $self->{'cache'}->{$type}->{$name}->{$key};
|
|
520 }
|
|
521
|
|
522 =head2 add_list
|
|
523
|
|
524 Arg[1] : String $name - a cache name (e.g. 'genes_by_id')
|
|
525 Arg[2] : String type - a cache type (e.g. "source.$slice_name")
|
|
526 Arg[3] : String $key - key of this entry (e.g. a gene dbID)
|
|
527 Arg[4] : List of Bio::EnsEMBL::IdMappping::TinyFeature @val - values
|
|
528 to cache
|
|
529 Example : $cache->add_list('transcripts_by_exon_id',
|
|
530 'source.chromosome:NCBI36:X:1:1000000:1', '1234',
|
|
531 $tiny_transcript1, $tiny_transcript2);
|
|
532 Description : Adds a list of TinyFeature objects to a named cache.
|
|
533 Return type : Listref of Bio::EnsEMBL::IdMapping::TinyFeature objects
|
|
534 Exceptions : thrown on wrong or missing arguments
|
|
535 Caller : internal
|
|
536 Status : At Risk
|
|
537 : under development
|
|
538
|
|
539 =cut
|
|
540
|
|
541 sub add_list {
|
|
542 my $self = shift;
|
|
543 my $name = shift;
|
|
544 my $type = shift;
|
|
545 my $key = shift;
|
|
546 my @vals = @_;
|
|
547
|
|
548 throw("You must provide a cache name (e.g. genes_by_id.") unless $name;
|
|
549 throw("You must provide a cache type.") unless $type;
|
|
550 throw("You must provide a cache key (e.g. a gene dbID).") unless $key;
|
|
551
|
|
552 push @{ $self->{'cache'}->{$type}->{$name}->{$key} }, @vals;
|
|
553
|
|
554 return $self->{'cache'}->{$type}->{$name}->{$key};
|
|
555 }
|
|
556
|
|
557 sub get_by_key {
|
|
558 my $self = shift;
|
|
559 my $name = shift;
|
|
560 my $type = shift;
|
|
561 my $key = shift;
|
|
562
|
|
563 throw("You must provide a cache name (e.g. genes_by_id.") unless $name;
|
|
564 throw("You must provide a cache type.") unless $type;
|
|
565 throw("You must provide a cache key (e.g. a gene dbID).") unless $key;
|
|
566
|
|
567 # transparently load cache from file unless already loaded
|
|
568 unless ($self->{'instance'}->{'loaded'}->{"$type"}) {
|
|
569 $self->read_and_merge($type);
|
|
570 }
|
|
571
|
|
572 return $self->{'cache'}->{$type}->{$name}->{$key};
|
|
573 }
|
|
574
|
|
575 sub get_by_name {
|
|
576 my $self = shift;
|
|
577 my $name = shift;
|
|
578 my $type = shift;
|
|
579
|
|
580 throw("You must provide a cache name (e.g. genes_by_id.") unless $name;
|
|
581 throw("You must provide a cache type.") unless $type;
|
|
582
|
|
583 # transparently load cache from file unless already loaded
|
|
584 unless ($self->{'instance'}->{'loaded'}->{$type}) {
|
|
585 $self->read_and_merge($type);
|
|
586 }
|
|
587
|
|
588 return $self->{'cache'}->{$type}->{$name} || {};
|
|
589 }
|
|
590
|
|
591
|
|
592 sub get_count_by_name {
|
|
593 my $self = shift;
|
|
594 my $name = shift;
|
|
595 my $type = shift;
|
|
596
|
|
597 throw("You must provide a cache name (e.g. genes_by_id.") unless $name;
|
|
598 throw("You must provide a cache type.") unless $type;
|
|
599
|
|
600 # transparently load cache from file unless already loaded
|
|
601 unless ($self->{'instance'}->{'loaded'}->{$type}) {
|
|
602 $self->read_and_merge($type);
|
|
603 }
|
|
604
|
|
605 return scalar(keys %{ $self->get_by_name($name, $type) });
|
|
606 }
|
|
607
|
|
608
|
|
609 sub find_common_coord_systems {
|
|
610 my $self = shift;
|
|
611
|
|
612 # get adaptors for source db
|
|
613 my $s_dba = $self->get_DBAdaptor('source');
|
|
614 my $s_csa = $s_dba->get_CoordSystemAdaptor;
|
|
615 my $s_sa = $s_dba->get_SliceAdaptor;
|
|
616
|
|
617 # get adaptors for target db
|
|
618 my $t_dba = $self->get_DBAdaptor('target');
|
|
619 my $t_csa = $t_dba->get_CoordSystemAdaptor;
|
|
620 my $t_sa = $t_dba->get_SliceAdaptor;
|
|
621
|
|
622 # find common coord_systems
|
|
623 my @s_coord_systems = @{ $s_csa->fetch_all };
|
|
624 my @t_coord_systems = @{ $t_csa->fetch_all };
|
|
625 my $found_highest = 0;
|
|
626
|
|
627 SOURCE:
|
|
628 foreach my $s_cs (@s_coord_systems) {
|
|
629 if ( !$s_cs->is_default() ) { next SOURCE }
|
|
630
|
|
631 TARGET:
|
|
632 foreach my $t_cs (@t_coord_systems) {
|
|
633 if ( !$t_cs->is_default() ) { next TARGET }
|
|
634
|
|
635 if ( $s_cs->name eq $t_cs->name ) {
|
|
636
|
|
637 # test for identical coord_system version
|
|
638 if ( $s_cs->version and ( $s_cs->version ne $t_cs->version ) ) {
|
|
639 next TARGET;
|
|
640 }
|
|
641
|
|
642 # test for at least 50% identical seq_regions
|
|
643 if ( $self->seq_regions_compatible( $s_cs, $s_sa, $t_sa ) ) {
|
|
644 $self->add_common_cs($s_cs);
|
|
645
|
|
646 unless ($found_highest) {
|
|
647 $self->highest_common_cs( $s_cs->name );
|
|
648 $self->highest_common_cs_version( $s_cs->version );
|
|
649 }
|
|
650
|
|
651 $found_highest = 1;
|
|
652
|
|
653 next SOURCE;
|
|
654 }
|
|
655 }
|
|
656 } ## end foreach my $t_cs (@t_coord_systems)
|
|
657 } ## end foreach my $s_cs (@s_coord_systems)
|
|
658
|
|
659 return $found_highest;
|
|
660 } ## end sub find_common_coord_systems
|
|
661
|
|
662
|
|
663 sub seq_regions_compatible {
|
|
664 my $self = shift;
|
|
665 my $cs = shift;
|
|
666 my $s_sa = shift;
|
|
667 my $t_sa = shift;
|
|
668
|
|
669 unless ($cs and $cs->isa('Bio::EnsEMBL::CoordSystem')) {
|
|
670 throw('You must provide a CoordSystem');
|
|
671 }
|
|
672
|
|
673 unless ($s_sa and $t_sa and $s_sa->isa('Bio::EnsEMBL::DBSQL::SliceAdaptor')
|
|
674 and $t_sa->isa('Bio::EnsEMBL::DBSQL::SliceAdaptor')) {
|
|
675 throw('You must provide a source and target SliceAdaptor');
|
|
676 }
|
|
677
|
|
678 my %sr_match;
|
|
679 my $equal = 0;
|
|
680
|
|
681 my $s_seq_regions = $s_sa->fetch_all($cs->name, $cs->version);
|
|
682 my $t_seq_regions = $t_sa->fetch_all($cs->name, $cs->version);
|
|
683
|
|
684 # sanity check to prevent divison by zero
|
|
685 my $s_count = scalar(@$s_seq_regions);
|
|
686 my $t_count = scalar(@$t_seq_regions);
|
|
687 return(0) if ($s_count == 0 or $t_count == 0);
|
|
688
|
|
689 foreach my $s_sr (@$s_seq_regions) {
|
|
690 $sr_match{$s_sr->seq_region_name} = $s_sr->length;
|
|
691 }
|
|
692
|
|
693 foreach my $t_sr (@$t_seq_regions) {
|
|
694 if (exists($sr_match{$t_sr->seq_region_name})) {
|
|
695 $equal++;
|
|
696
|
|
697 # return false if we have a region with same name but different length
|
|
698 return(0) unless ($sr_match{$t_sr->seq_region_name} == $t_sr->length);
|
|
699 }
|
|
700 }
|
|
701
|
|
702 if ($equal/$s_count > 0.5 and $equal/$t_count > 0.5) {
|
|
703 return(1);
|
|
704 } else {
|
|
705 $self->logger->info("Only $equal seq_regions identical for ".$cs->name." ".$cs->version."\n");
|
|
706 return(0);
|
|
707 }
|
|
708
|
|
709 }
|
|
710
|
|
711
|
|
712 sub check_db_connection {
|
|
713 my $self = shift;
|
|
714 my $dbtype = shift;
|
|
715
|
|
716 my $err = 0;
|
|
717
|
|
718 eval {
|
|
719 my $dba = $self->get_DBAdaptor($dbtype);
|
|
720 $dba->dbc->connect;
|
|
721 };
|
|
722
|
|
723 if ($@) {
|
|
724 $self->logger->warning("Can't connect to $dbtype db: $@\n");
|
|
725 $err++;
|
|
726 } else {
|
|
727 $self->logger->debug("Connection to $dbtype db ok.\n");
|
|
728 $self->{'_db_conn_ok'}->{$dbtype} = 1;
|
|
729 }
|
|
730
|
|
731 return $err;
|
|
732 }
|
|
733
|
|
734
|
|
735 sub check_db_read_permissions {
|
|
736 my $self = shift;
|
|
737 my $dbtype = shift;
|
|
738
|
|
739 # skip this check if db connection failed (this prevents re-throwing
|
|
740 # exceptions).
|
|
741 return 1 unless ($self->{'_db_conn_ok'}->{$dbtype});
|
|
742
|
|
743 my $err = 0;
|
|
744 my %privs = %{ $self->get_db_privs($dbtype) };
|
|
745
|
|
746 unless ($privs{'SELECT'} or $privs{'ALL PRIVILEGES'}) {
|
|
747 $self->logger->warning("User doesn't have read permission on $dbtype db.\n");
|
|
748 $err++;
|
|
749 } else {
|
|
750 $self->logger->debug("Read permission on $dbtype db ok.\n");
|
|
751 }
|
|
752
|
|
753 return $err;
|
|
754 }
|
|
755
|
|
756
|
|
757 sub check_db_write_permissions {
|
|
758 my $self = shift;
|
|
759 my $dbtype = shift;
|
|
760
|
|
761 # skip this check if db connection failed (this prevents re-throwing
|
|
762 # exceptions).
|
|
763 return 1 unless ($self->{'_db_conn_ok'}->{$dbtype});
|
|
764
|
|
765 my $err = 0;
|
|
766
|
|
767 unless ($self->do_upload) {
|
|
768 $self->logger->debug("No uploads, so write permission on $dbtype db not required.\n");
|
|
769 return $err;
|
|
770 }
|
|
771
|
|
772 my %privs = %{ $self->get_db_privs($dbtype) };
|
|
773
|
|
774 unless ($privs{'INSERT'} or $privs{'ALL PRIVILEGES'}) {
|
|
775 $self->logger->warning("User doesn't have write permission on $dbtype db.\n");
|
|
776 $err++;
|
|
777 } else {
|
|
778 $self->logger->debug("Write permission on $dbtype db ok.\n");
|
|
779 }
|
|
780
|
|
781 return $err;
|
|
782 }
|
|
783
|
|
784
|
|
785 sub do_upload {
|
|
786 my $self = shift;
|
|
787
|
|
788 if ($self->conf->param('dry_run') or
|
|
789 ! ($self->conf->param('upload_events') or
|
|
790 $self->conf->param('upload_stable_ids') or
|
|
791 $self->conf->param('upload_archive'))) {
|
|
792 return 0;
|
|
793 } else {
|
|
794 return 1;
|
|
795 }
|
|
796 }
|
|
797
|
|
798
|
|
799 sub get_db_privs {
|
|
800 my ( $self, $dbtype ) = @_;
|
|
801
|
|
802 my %privs = ();
|
|
803 my $rs;
|
|
804
|
|
805 # get privileges from mysql db
|
|
806 eval {
|
|
807 my $dbc = $self->get_DBAdaptor($dbtype)->dbc();
|
|
808 my $sql = qq(SHOW GRANTS FOR ) . $dbc->username();
|
|
809 my $sth = $dbc->prepare($sql);
|
|
810 $sth->execute();
|
|
811 $rs = $sth->fetchall_arrayref();
|
|
812 #$sth->finish();
|
|
813 };
|
|
814
|
|
815 if ($@) {
|
|
816 $self->logger->warning(
|
|
817 "Error obtaining privileges from $dbtype db: $@\n");
|
|
818 return {};
|
|
819 }
|
|
820
|
|
821 # parse the output
|
|
822 foreach my $r ( map { $_->[0] } @{$rs} ) {
|
|
823 $r =~ s/GRANT (.*) ON .*/$1/i;
|
|
824 foreach my $p ( split( ',', $r ) ) {
|
|
825 # trim leading and trailing whitespace
|
|
826 $p =~ s/^\s+//;
|
|
827 $p =~ s/\s+$//;
|
|
828 $privs{ uc($p) } = 1;
|
|
829 }
|
|
830 }
|
|
831
|
|
832 return \%privs;
|
|
833 } ## end sub get_db_privs
|
|
834
|
|
835
|
|
836 sub check_empty_tables {
|
|
837 my $self = shift;
|
|
838 my $dbtype = shift;
|
|
839
|
|
840 # skip this check if db connection failed (this prevents re-throwing
|
|
841 # exceptions).
|
|
842 return 1 unless ($self->{'_db_conn_ok'}->{$dbtype});
|
|
843
|
|
844 my $err = 0;
|
|
845 my $c = 0;
|
|
846
|
|
847 if ($self->conf->param('no_check_empty_tables') or !$self->do_upload) {
|
|
848 $self->logger->debug("Won't check for empty stable ID and archive tables in $dbtype db.\n");
|
|
849 return $err;
|
|
850 }
|
|
851
|
|
852 eval {
|
|
853 my @tables =
|
|
854 qw(
|
|
855 gene_stable_id
|
|
856 transcript_stable_id
|
|
857 translation_stable_id
|
|
858 exon_stable_id
|
|
859 stable_id_event
|
|
860 mapping_session
|
|
861 gene_archive
|
|
862 peptide_archive
|
|
863 );
|
|
864
|
|
865 my $dba = $self->get_DBAdaptor($dbtype);
|
|
866 foreach my $table (@tables) {
|
|
867 if ( $table =~ /^([^_]+)_stable_id/ ) {
|
|
868 $table = $1;
|
|
869 if ( $c =
|
|
870 $self->fetch_value_from_db(
|
|
871 $dba,
|
|
872 "SELECT COUNT(*) FROM $table WHERE stable_id IS NOT NULL"
|
|
873 ) )
|
|
874 {
|
|
875 $self->logger->warning(
|
|
876 "$table table in $dbtype db has $c stable IDs.\n");
|
|
877 $err++;
|
|
878 }
|
|
879 }
|
|
880 else {
|
|
881 if ( $c =
|
|
882 $self->fetch_value_from_db(
|
|
883 $dba, "SELECT COUNT(*) FROM $table"
|
|
884 ) )
|
|
885 {
|
|
886 $self->logger->warning(
|
|
887 "$table table in $dbtype db has $c entries.\n");
|
|
888 $err++;
|
|
889 }
|
|
890 }
|
|
891 } ## end foreach my $table (@tables)
|
|
892 };
|
|
893
|
|
894 if ($@) {
|
|
895 $self->logger->warning(
|
|
896 "Error retrieving stable ID and archive table row counts from $dbtype db: $@\n"
|
|
897 );
|
|
898 $err++;
|
|
899 }
|
|
900 elsif ( !$err ) {
|
|
901 $self->logger->debug(
|
|
902 "All stable ID and archive tables in $dbtype db are empty.\n");
|
|
903 }
|
|
904 return $err;
|
|
905 }
|
|
906
|
|
907
|
|
908 sub check_sequence {
|
|
909 my ( $self, $dbtype ) = @_;
|
|
910
|
|
911 # skip this check if db connection failed (this prevents re-throwing
|
|
912 # exceptions).
|
|
913 return 1 unless ( $self->{'_db_conn_ok'}->{$dbtype} );
|
|
914
|
|
915 my $err = 0;
|
|
916 my $c = 0;
|
|
917
|
|
918 eval {
|
|
919 my $dba = $self->get_DBAdaptor($dbtype);
|
|
920 unless ( $c =
|
|
921 $self->fetch_value_from_db(
|
|
922 $dba->dnadb(), "SELECT COUNT(*) FROM dna"
|
|
923 ) )
|
|
924 {
|
|
925 $err++;
|
|
926 }
|
|
927 };
|
|
928
|
|
929 if ($@) {
|
|
930 $self->logger->warning( "Error retrieving dna table row count "
|
|
931 . "from $dbtype database: $@\n" );
|
|
932 $err++;
|
|
933 } elsif ($err) {
|
|
934 $self->logger->warning("No sequence found in $dbtype database.\n");
|
|
935 } else {
|
|
936 $self->logger->debug(
|
|
937 ucfirst($dbtype) . " db has sequence ($c entries).\n" );
|
|
938 }
|
|
939
|
|
940 return $err;
|
|
941 } ## end sub check_sequence
|
|
942
|
|
943
|
|
944 sub check_meta_entries {
|
|
945 my $self = shift;
|
|
946 my $dbtype = shift;
|
|
947
|
|
948 # skip this check if db connection failed (this prevents re-throwing
|
|
949 # exceptions).
|
|
950 return 1 unless ($self->{'_db_conn_ok'}->{$dbtype});
|
|
951
|
|
952 my $err = 0;
|
|
953 my $assembly_default;
|
|
954 my $schema_version;
|
|
955
|
|
956 eval {
|
|
957 my $dba = $self->get_DBAdaptor($dbtype);
|
|
958 $assembly_default = $self->fetch_value_from_db($dba,
|
|
959 qq(SELECT meta_value FROM meta WHERE meta_key = 'assembly.default'));
|
|
960 $schema_version = $self->fetch_value_from_db($dba,
|
|
961 qq(SELECT meta_value FROM meta WHERE meta_key = 'schema_version'));
|
|
962 };
|
|
963
|
|
964 if ($@) {
|
|
965 $self->logger->warning("Error retrieving dna table row count from $dbtype db: $@\n");
|
|
966 return ++$err;
|
|
967 }
|
|
968
|
|
969 unless ($assembly_default) {
|
|
970 $self->logger->warning("No meta.assembly.default value found in $dbtype db.\n");
|
|
971 $err++;
|
|
972 } else {
|
|
973 $self->logger->debug("meta.assembly.default value found ($assembly_default).\n");
|
|
974 }
|
|
975
|
|
976 unless ($schema_version) {
|
|
977 $self->logger->warning("No meta.schema_version value found in $dbtype db.\n");
|
|
978 $err++;
|
|
979 } else {
|
|
980 $self->logger->debug("meta.schema_version value found ($schema_version).\n");
|
|
981 }
|
|
982
|
|
983 return $err;
|
|
984 }
|
|
985
|
|
986
|
|
987 sub fetch_value_from_db {
|
|
988 my ( $self, $dba, $sql ) = @_;
|
|
989
|
|
990 assert_ref( $dba, 'Bio::EnsEMBL::DBSQL::DBAdaptor' );
|
|
991
|
|
992 if ( !defined($sql) ) {
|
|
993 throw("Need an SQL statement to execute.\n");
|
|
994 }
|
|
995
|
|
996 my $sth = $dba->dbc->prepare($sql);
|
|
997 $sth->execute();
|
|
998
|
|
999 my ($c) = $sth->fetchrow_array;
|
|
1000 return $c;
|
|
1001 }
|
|
1002
|
|
1003 sub get_DBAdaptor {
|
|
1004 my ( $self, $prefix ) = @_;
|
|
1005
|
|
1006 unless ( $self->{'_dba'}->{$prefix} ) {
|
|
1007 # connect to database
|
|
1008 my $dba =
|
|
1009 new Bio::EnsEMBL::DBSQL::DBAdaptor(
|
|
1010 -host => $self->conf->param("${prefix}host"),
|
|
1011 -port => $self->conf->param("${prefix}port"),
|
|
1012 -user => $self->conf->param("${prefix}user"),
|
|
1013 -pass => $self->conf->param("${prefix}pass"),
|
|
1014 -dbname => $self->conf->param("${prefix}dbname"),
|
|
1015 -group => $prefix, );
|
|
1016
|
|
1017 if ( !defined( $self->conf->param("${prefix}host_dna") ) ) {
|
|
1018 # explicitely set the dnadb to itself - by default the Registry
|
|
1019 # assumes a group 'core' for this now
|
|
1020 $dba->dnadb($dba);
|
|
1021 } else {
|
|
1022 my $dna_dba =
|
|
1023 new Bio::EnsEMBL::DBSQL::DBAdaptor(
|
|
1024 -host => $self->conf->param("${prefix}host_dna"),
|
|
1025 -port => $self->conf->param("${prefix}port_dna"),
|
|
1026 -user => $self->conf->param("${prefix}user_dna"),
|
|
1027 -pass => $self->conf->param("${prefix}pass_dna"),
|
|
1028 -dbname => $self->conf->param("${prefix}dbname_dna"),
|
|
1029 -group => $prefix, );
|
|
1030 $dba->dnadb($dna_dba);
|
|
1031 }
|
|
1032
|
|
1033 $self->{'_dba'}->{$prefix} = $dba;
|
|
1034 } ## end unless ( $self->{'_dba'}->...)
|
|
1035
|
|
1036 return $self->{'_dba'}->{$prefix};
|
|
1037 } ## end sub get_DBAdaptor
|
|
1038
|
|
1039
|
|
1040 sub cache_file_exists {
|
|
1041 my $self = shift;
|
|
1042 my $type = shift;
|
|
1043
|
|
1044 throw("You must provide a cache type.") unless $type;
|
|
1045
|
|
1046 my $cache_file = $self->cache_file($type);
|
|
1047
|
|
1048 if (-e $cache_file) {
|
|
1049 $self->logger->info("Cache file found for $type.\n", 2);
|
|
1050 $self->logger->debug("Will read from $cache_file.\n", 2);
|
|
1051 return 1;
|
|
1052 } else {
|
|
1053 $self->logger->info("No cache file found for $type.\n", 2);
|
|
1054 $self->logger->info("Will build cache from db.\n", 2);
|
|
1055 return 0;
|
|
1056 }
|
|
1057 }
|
|
1058
|
|
1059
|
|
1060 sub cache_file {
|
|
1061 my $self = shift;
|
|
1062 my $type = shift;
|
|
1063
|
|
1064 throw("You must provide a cache type.") unless $type;
|
|
1065
|
|
1066 return $self->dump_path."/$type.object_cache.ser";
|
|
1067 }
|
|
1068
|
|
1069
|
|
1070 sub instance_file {
|
|
1071 my $self = shift;
|
|
1072
|
|
1073 return $self->dump_path."/cache_instance.ser";
|
|
1074 }
|
|
1075
|
|
1076
|
|
1077 sub dump_path {
|
|
1078 my $self = shift;
|
|
1079
|
|
1080 $self->{'dump_path'} ||= path_append($self->conf->param('basedir'), 'cache');
|
|
1081
|
|
1082 return $self->{'dump_path'};
|
|
1083 }
|
|
1084
|
|
1085
|
|
1086 sub write_all_to_file {
|
|
1087 my $self = shift;
|
|
1088 my $type = shift;
|
|
1089
|
|
1090 throw("You must provide a cache type.") unless $type;
|
|
1091
|
|
1092 my $size = 0;
|
|
1093 $size += $self->write_to_file($type);
|
|
1094 $size += $self->write_instance_to_file;
|
|
1095
|
|
1096 return parse_bytes($size);
|
|
1097 }
|
|
1098
|
|
1099
|
|
1100 sub write_to_file {
|
|
1101 my $self = shift;
|
|
1102 my $type = shift;
|
|
1103
|
|
1104 throw("You must provide a cache type.") unless $type;
|
|
1105
|
|
1106 unless ($self->{'cache'}->{$type}) {
|
|
1107 $self->logger->warning("No features found in $type. Won't write cache file.\n");
|
|
1108 return;
|
|
1109 }
|
|
1110
|
|
1111 my $cache_file = $self->cache_file($type);
|
|
1112
|
|
1113 eval { nstore($self->{'cache'}->{$type}, $cache_file) };
|
|
1114 if ($@) {
|
|
1115 throw("Unable to store $cache_file: $@\n");
|
|
1116 }
|
|
1117
|
|
1118 my $size = -s $cache_file;
|
|
1119 return $size;
|
|
1120 }
|
|
1121
|
|
1122
|
|
1123 sub write_instance_to_file {
|
|
1124 my $self = shift;
|
|
1125
|
|
1126 my $instance_file = $self->instance_file;
|
|
1127
|
|
1128 eval { nstore($self->{'instance'}, $instance_file) };
|
|
1129 if ($@) {
|
|
1130 throw("Unable to store $instance_file: $@\n");
|
|
1131 }
|
|
1132
|
|
1133 my $size = -s $instance_file;
|
|
1134 return $size;
|
|
1135 }
|
|
1136
|
|
1137
|
|
1138 sub read_from_file {
|
|
1139 my $self = shift;
|
|
1140 my $type = shift;
|
|
1141
|
|
1142 throw("You must provide a cache type.") unless $type;
|
|
1143
|
|
1144 my $cache_file = $self->cache_file($type);
|
|
1145
|
|
1146 if (-s $cache_file) {
|
|
1147
|
|
1148 #$self->logger->info("Reading cache from file...\n", 0, 'stamped');
|
|
1149 #$self->logger->info("Cache file $cache_file.\n", 1);
|
|
1150 eval { $self->{'cache'}->{$type} = retrieve($cache_file); };
|
|
1151 if ($@) {
|
|
1152 throw("Unable to retrieve cache: $@");
|
|
1153 }
|
|
1154 #$self->logger->info("Done.\n", 0, 'stamped');
|
|
1155
|
|
1156 } else {
|
|
1157 $self->logger->warning("Cache file $cache_file not found or empty.\n");
|
|
1158 }
|
|
1159
|
|
1160
|
|
1161 return $self->{'cache'}->{$type};
|
|
1162 }
|
|
1163
|
|
1164
|
|
1165 sub read_and_merge {
|
|
1166 my $self = shift;
|
|
1167 my $dbtype = shift;
|
|
1168
|
|
1169 unless ($dbtype eq 'source' or $dbtype eq 'target') {
|
|
1170 throw("Db type must be 'source' or 'target'.");
|
|
1171 }
|
|
1172
|
|
1173 # read cache from single or multiple files, depending on caching strategy
|
|
1174 my $cache_method = $self->cache_method;
|
|
1175 if ($cache_method eq 'ALL') {
|
|
1176 $self->read_from_file("$dbtype.ALL");
|
|
1177 } elsif ($cache_method eq 'BY_SEQ_REGION') {
|
|
1178 foreach my $slice_name (@{ $self->slice_names($dbtype) }) {
|
|
1179 $self->read_from_file("$dbtype.$slice_name");
|
|
1180 }
|
|
1181 } else {
|
|
1182 throw("Unknown cache method: $cache_method.");
|
|
1183 }
|
|
1184
|
|
1185 $self->merge($dbtype);
|
|
1186
|
|
1187 # flag as being loaded
|
|
1188 $self->{'instance'}->{'loaded'}->{$dbtype} = 1;
|
|
1189 }
|
|
1190
|
|
1191
|
|
1192 sub merge {
|
|
1193 my $self = shift;
|
|
1194 my $dbtype = shift;
|
|
1195
|
|
1196 unless ($dbtype eq 'source' or $dbtype eq 'target') {
|
|
1197 throw("Db type must be 'source' or 'target'.");
|
|
1198 }
|
|
1199
|
|
1200 foreach my $type (keys %{ $self->{'cache'} || {} }) {
|
|
1201 next unless ($type =~ /^$dbtype/);
|
|
1202
|
|
1203 foreach my $name (keys %{ $self->{'cache'}->{$type} || {} }) {
|
|
1204
|
|
1205 foreach my $key (keys %{ $self->{'cache'}->{$type}->{$name} || {} }) {
|
|
1206 if (defined $self->{'cache'}->{$dbtype}->{$name}->{$key}) {
|
|
1207 # warning("Duplicate key in cache: $name|$dbtype|$key. Skipping.\n");
|
|
1208 } else {
|
|
1209 $self->{'cache'}->{$dbtype}->{$name}->{$key} =
|
|
1210 $self->{'cache'}->{$type}->{$name}->{$key};
|
|
1211 }
|
|
1212
|
|
1213 delete $self->{'cache'}->{$type}->{$name}->{$key};
|
|
1214 }
|
|
1215
|
|
1216 delete $self->{'cache'}->{$type}->{$name};
|
|
1217 }
|
|
1218
|
|
1219 delete $self->{'cache'}->{$type};
|
|
1220
|
|
1221 }
|
|
1222 }
|
|
1223
|
|
1224
|
|
1225 sub read_instance_from_file {
|
|
1226 my $self = shift;
|
|
1227
|
|
1228 my $instance_file = $self->instance_file;
|
|
1229
|
|
1230 unless (-s $instance_file) {
|
|
1231 throw("No valid cache instance file found at $instance_file.");
|
|
1232 }
|
|
1233
|
|
1234 eval { $self->{'instance'} = retrieve($instance_file); };
|
|
1235 if ($@) {
|
|
1236 throw("Unable to retrieve cache instance: $@");
|
|
1237 }
|
|
1238
|
|
1239 return $self->{'instance'};
|
|
1240 }
|
|
1241
|
|
1242
|
|
1243 sub slice_names {
|
|
1244 my $self = shift;
|
|
1245 my $dbtype = shift;
|
|
1246
|
|
1247 throw("You must provide a db type (source|target).") unless $dbtype;
|
|
1248
|
|
1249 my $dba = $self->get_DBAdaptor($dbtype);
|
|
1250 my $sa = $dba->get_SliceAdaptor;
|
|
1251
|
|
1252 my @slice_names = ();
|
|
1253
|
|
1254 if ( $self->conf->param('chromosomes') ) {
|
|
1255 # Fetch the specified chromosomes.
|
|
1256 foreach my $chr ( $self->conf->param('chromosomes') ) {
|
|
1257 my $slice = $sa->fetch_by_region( 'chromosome', $chr );
|
|
1258 push @slice_names, $slice->name;
|
|
1259 }
|
|
1260
|
|
1261 }
|
|
1262 elsif ( $self->conf->param('region') ) {
|
|
1263 # Fetch the slices on the specified regions. Don't use
|
|
1264 # SliceAdaptor->fetch_by_name() since this will fail if assembly
|
|
1265 # versions are different for source and target db.
|
|
1266 my ( $cs, $version, $name, $start, $end, $strand ) =
|
|
1267 split( /:/, $self->conf->param('region') );
|
|
1268
|
|
1269 my $slice = $sa->fetch_by_region( $cs, $name, $start, $end );
|
|
1270
|
|
1271 push @slice_names, $slice->name;
|
|
1272
|
|
1273 }
|
|
1274 else {
|
|
1275 # Fetch all slices that have genes on them.
|
|
1276 my $ga = $dba->get_GeneAdaptor;
|
|
1277
|
|
1278 foreach my $srid ( @{ $ga->list_seq_region_ids } ) {
|
|
1279 my $slice = $sa->fetch_by_seq_region_id($srid);
|
|
1280
|
|
1281 if ( !$slice->is_reference() ) {
|
|
1282 my $slices =
|
|
1283 $slice->adaptor()
|
|
1284 ->fetch_by_region_unique( $slice->coord_system_name(),
|
|
1285 $slice->seq_region_name() );
|
|
1286
|
|
1287 push( @slice_names, map { $_->name() } @{$slices} );
|
|
1288 }
|
|
1289 else {
|
|
1290 push @slice_names, $slice->name();
|
|
1291 }
|
|
1292 }
|
|
1293 }
|
|
1294
|
|
1295 return \@slice_names;
|
|
1296 } ## end sub slice_names
|
|
1297
|
|
1298
|
|
1299 sub logger {
|
|
1300 my $self = shift;
|
|
1301 $self->{'logger'} = shift if (@_);
|
|
1302 return $self->{'logger'};
|
|
1303 }
|
|
1304
|
|
1305 sub conf {
|
|
1306 my $self = shift;
|
|
1307 $self->{'conf'} = shift if (@_);
|
|
1308 return $self->{'conf'};
|
|
1309 }
|
|
1310
|
|
1311
|
|
1312 sub cache_method {
|
|
1313 my $self = shift;
|
|
1314 $self->{'instance'}->{'cache_method'} = shift if (@_);
|
|
1315 return $self->{'instance'}->{'cache_method'};
|
|
1316 }
|
|
1317
|
|
1318
|
|
1319 sub highest_common_cs {
|
|
1320 my $self = shift;
|
|
1321 $self->{'instance'}->{'hccs'} = shift if (@_);
|
|
1322 return $self->{'instance'}->{'hccs'};
|
|
1323 }
|
|
1324
|
|
1325
|
|
1326 sub highest_common_cs_version {
|
|
1327 my $self = shift;
|
|
1328 $self->{'instance'}->{'hccsv'} = shift if (@_);
|
|
1329 return $self->{'instance'}->{'hccsv'};
|
|
1330 }
|
|
1331
|
|
1332
|
|
1333 sub add_common_cs {
|
|
1334 my $self = shift;
|
|
1335 my $cs = shift;
|
|
1336
|
|
1337 unless ($cs and $cs->isa('Bio::EnsEMBL::CoordSystem')) {
|
|
1338 throw('You must provide a CoordSystem');
|
|
1339 }
|
|
1340
|
|
1341 my $csid = join(':', $cs->name, $cs->version);
|
|
1342
|
|
1343 $self->{'instance'}->{'ccs'}->{$csid} = 1;
|
|
1344 }
|
|
1345
|
|
1346
|
|
1347 sub is_common_cs {
|
|
1348 my $self = shift;
|
|
1349 my $csid = shift;
|
|
1350
|
|
1351 return $self->{'instance'}->{'ccs'}->{$csid};
|
|
1352 }
|
|
1353
|
|
1354
|
|
1355 1;
|
|
1356
|