annotate variant_effect_predictor/Bio/EnsEMBL/IdMapping/Cache.pm @ 1:d6778b5d8382 draft default tip

Deleted selected files
author willmclaren
date Fri, 03 Aug 2012 10:05:43 -0400
parents 21066c0abaf5
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1 =head1 LICENSE
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
2
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
3 Copyright (c) 1999-2012 The European Bioinformatics Institute and
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
4 Genome Research Limited. All rights reserved.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
5
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
6 This software is distributed under a modified Apache license.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
7 For license details, please see
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
8
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
9 http://www.ensembl.org/info/about/code_licence.html
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
10
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
11 =head1 CONTACT
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
12
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
13 Please email comments or questions to the public Ensembl
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
14 developers list at <dev@ensembl.org>.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
15
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
16 Questions may also be sent to the Ensembl help desk at
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
17 <helpdesk@ensembl.org>.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
18
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
19 =cut
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
20
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
21 =head1 NAME
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
22
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
23 Bio::EnsEMBL::IdMapping::Cache - a cache to hold data objects used by the
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
24 IdMapping application
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
25
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
26 =head1 DESCRIPTION
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
27
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
28 =head1 METHODS
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
29
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
30 =cut
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
31
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
32
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
33 package Bio::EnsEMBL::IdMapping::Cache;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
34
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
35 use strict;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
36 use warnings;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
37 no warnings 'uninitialized';
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
38
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
39 use Bio::EnsEMBL::Utils::Argument qw(rearrange);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
40 use Bio::EnsEMBL::Utils::Exception qw(throw warning);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
41 use Bio::EnsEMBL::Utils::ScriptUtils qw(parse_bytes path_append);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
42 use Bio::EnsEMBL::Utils::Scalar qw(assert_ref);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
43 use Bio::EnsEMBL::IdMapping::TinyGene;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
44 use Bio::EnsEMBL::IdMapping::TinyTranscript;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
45 use Bio::EnsEMBL::IdMapping::TinyTranslation;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
46 use Bio::EnsEMBL::IdMapping::TinyExon;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
47 use Bio::EnsEMBL::DBSQL::DBAdaptor;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
48 use Storable qw(nstore retrieve);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
49 use Digest::MD5 qw(md5_hex);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
50
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
51 # define available cache names here
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
52 my @cache_names = qw(
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
53 exons_by_id
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
54 transcripts_by_id
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
55 transcripts_by_exon_id
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
56 translations_by_id
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
57 genes_by_id
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
58 genes_by_transcript_id
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
59 );
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
60
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
61
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
62 =head2 new
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
63
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
64 Arg [LOGGER]: Bio::EnsEMBL::Utils::Logger $logger - a logger object
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
65 Arg [CONF] : Bio::EnsEMBL::Utils::ConfParser $conf - a configuration object
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
66 Example : my $cache = Bio::EnsEMBL::IdMapping::Cache->new(
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
67 -LOGGER => $logger,
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
68 -CONF => $conf,
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
69 );
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
70 Description : constructor
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
71 Return type : Bio::EnsEMBL::IdMapping::Cache object
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
72 Exceptions : thrown on wrong or missing arguments
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
73 Caller : general
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
74 Status : At Risk
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
75 : under development
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
76
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
77 =cut
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
78
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
79 sub new {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
80 my $caller = shift;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
81 my $class = ref($caller) || $caller;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
82
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
83 my ($logger, $conf, $load_instance) =
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
84 rearrange(['LOGGER', 'CONF', 'LOAD_INSTANCE'], @_);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
85
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
86 unless ($logger->isa('Bio::EnsEMBL::Utils::Logger')) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
87 throw("You must provide a Bio::EnsEMBL::Utils::Logger for logging.");
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
88 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
89
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
90 unless ($conf->isa('Bio::EnsEMBL::Utils::ConfParser')) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
91 throw("You must provide configuration as a Bio::EnsEMBL::Utils::ConfParser object.");
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
92 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
93
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
94 my $self = {};
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
95 bless ($self, $class);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
96
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
97 # initialise
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
98 $self->logger($logger);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
99 $self->conf($conf);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
100
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
101 if ($load_instance) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
102 $self->read_instance_from_file;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
103 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
104
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
105 return $self;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
106 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
107
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
108
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
109 =head2 build_cache_by_slice
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
110
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
111 Arg[1] : String $dbtype - db type (source|target)
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
112 Arg[2] : String $slice_name - the name of a slice (format as returned by
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
113 Bio::EnsEMBL::Slice->name)
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
114 Example : my ($num_genes, $filesize) = $cache->build_cache_by_slice(
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
115 'source', 'chromosome:NCBI36:X:1:1000000:-1');
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
116 Description : Builds a cache of genes, transcripts, translations and exons
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
117 needed by the IdMapping application and serialises the resulting
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
118 cache object to a file, one slice at a time.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
119 Return type : list of the number of genes processed and the size of the
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
120 serialised cache file
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
121 Exceptions : thrown on invalid slice name
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
122 Caller : general
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
123 Status : At Risk
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
124 : under development
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
125
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
126 =cut
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
127
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
128 sub build_cache_by_slice {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
129 my $self = shift;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
130 my $dbtype = shift;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
131 my $slice_name = shift;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
132
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
133 # set cache method (required for loading cache later)
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
134 $self->cache_method('BY_SEQ_REGION');
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
135
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
136 my $dba = $self->get_DBAdaptor($dbtype);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
137 my $sa = $dba->get_SliceAdaptor;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
138
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
139 my $slice = $sa->fetch_by_name($slice_name);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
140 unless ($slice) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
141 throw("Could not retrieve slice $slice_name.");
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
142 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
143
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
144 my $genes = $slice->get_all_Genes( undef, undef, 1 );
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
145
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
146 # find common coord_system
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
147 my $common_cs_found = $self->find_common_coord_systems;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
148
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
149 # find out whether native coord_system is a common coord_system.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
150 # if so, you don't need to project.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
151 # also don't project if no common coord_system present
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
152 my $need_project = 1;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
153
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
154 my $csid = join( ':',
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
155 $slice->coord_system_name,
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
156 $slice->coord_system->version );
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
157
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
158 if ( $self->is_common_cs($csid) or !$self->highest_common_cs ) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
159 $need_project = 0;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
160 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
161
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
162 # build cache
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
163 my $type = "$dbtype.$slice_name";
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
164 my $num_genes =
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
165 $self->build_cache_from_genes( $type, $genes, $need_project );
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
166 undef $genes;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
167
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
168 # write cache to file, then flush cache to reclaim memory
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
169 my $size = $self->write_all_to_file($type);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
170
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
171 return $num_genes, $size;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
172 } ## end sub build_cache_by_slice
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
173
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
174
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
175 =head2 build_cache_all
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
176
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
177 Arg[1] : String $dbtype - db type (source|target)
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
178 Example : my ($num_genes, $filesize) = $cache->build_cache_all('source');
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
179 Description : Builds a cache of genes, transcripts, translations and exons
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
180 needed by the IdMapping application and serialises the
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
181 resulting cache object to a file. All genes across the genome
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
182 are processed in one go. This method should be used when
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
183 build_cache_by_seq_region can't be used due to a large number
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
184 of toplevel seq_regions (e.g. 2x genomes).
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
185 Return type : list of the number of genes processed and the size of the
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
186 serialised cache file
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
187 Exceptions : thrown on invalid slice name
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
188 Caller : general
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
189 Status : At Risk
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
190 : under development
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
191
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
192 =cut
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
193
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
194 sub build_cache_all {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
195 my $self = shift;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
196 my $dbtype = shift;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
197
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
198 # set cache method (required for loading cache later)
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
199 $self->cache_method('ALL');
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
200
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
201 my $dba = $self->get_DBAdaptor($dbtype);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
202 my $ga = $dba->get_GeneAdaptor;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
203
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
204 my $genes = $ga->fetch_all;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
205
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
206 # find common coord_system
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
207 my $common_cs_found = $self->find_common_coord_systems;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
208
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
209 # Build cache. Setting $need_project to 'CHECK' will cause
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
210 # build_cache_from_genes() to check the coordinate system for each
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
211 # gene.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
212 my $type = "$dbtype.ALL";
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
213 my $need_project = 'CHECK';
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
214 my $num_genes =
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
215 $self->build_cache_from_genes( $type, $genes, $need_project );
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
216
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
217 undef $genes;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
218
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
219 # write cache to file, then flush cache to reclaim memory
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
220 my $size = $self->write_all_to_file($type);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
221
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
222 return $num_genes, $size;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
223 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
224
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
225
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
226 =head2 build_cache_from_genes
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
227
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
228 Arg[1] : String $type - cache type
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
229 Arg[2] : Listref of Bio::EnsEMBL::Genes $genes - genes to build cache
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
230 from
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
231 Arg[3] : Boolean $need_project - indicate if we need to project exons to
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
232 common coordinate system
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
233 Example : $cache->build_cache_from_genes(
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
234 'source.chromosome:NCBI36:X:1:100000:1', \@genes);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
235 Description : Builds the cache by fetching transcripts, translations and exons
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
236 for a list of genes from the database, and creating lightweight
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
237 Bio::EnsEMBL::IdMapping::TinyFeature objects containing only the
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
238 data needed by the IdMapping application. These objects are
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
239 attached to a name cache in this cache object. Exons only need
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
240 to be projected to a commond coordinate system if their native
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
241 coordinate system isn't common to source and target assembly
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
242 itself.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
243 Return type : int - number of genes after filtering
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
244 Exceptions : thrown on wrong or missing arguments
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
245 Caller : internal
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
246 Status : At Risk
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
247 : under development
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
248
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
249 =cut
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
250
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
251 sub build_cache_from_genes {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
252 my $self = shift;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
253 my $type = shift;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
254 my $genes = shift;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
255 my $need_project = shift;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
256
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
257 throw("You must provide a type.") unless $type;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
258 throw("You must provide a listref of genes.")
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
259 unless ( ref($genes) eq 'ARRAY' );
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
260
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
261 # biotype filter
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
262 if ( $self->conf()->param('biotypes') ||
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
263 $self->conf()->param('biotypes_include') ||
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
264 $self->conf()->param('biotypes_exclude') )
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
265 {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
266 $genes = $self->filter_biotypes($genes);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
267 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
268 my $num_genes = scalar(@$genes);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
269
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
270 # initialise cache for the given type.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
271 $self->{'cache'}->{$type} = {};
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
272
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
273 #my $i = 0;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
274 #my $num_genes = scalar(@$genes);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
275 #my $progress_id = $self->logger->init_progress($num_genes);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
276
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
277 # loop over genes sorted by gene location.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
278 # the sort will hopefully improve assembly mapper cache performance and
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
279 # therefore speed up exon sequence retrieval
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
280 foreach my $gene ( sort { $a->start <=> $b->start } @$genes ) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
281 #$self->logger->log_progressbar($progress_id, ++$i, 2);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
282 #$self->logger->log_progress($num_genes, ++$i, 20, 3, 1);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
283
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
284 if ( $need_project eq 'CHECK' ) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
285 # find out whether native coord_system is a common coord_system.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
286 # if so, you don't need to project.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
287 # also don't project if no common coord_system present
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
288 if ( $self->highest_common_cs ) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
289 my $csid = join( ':',
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
290 $gene->slice->coord_system_name,
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
291 $gene->slice->coord_system->version );
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
292 if ( $self->is_common_cs($csid) ) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
293 $need_project = 0;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
294 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
295 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
296 else {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
297 $need_project = 0;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
298 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
299 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
300
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
301 # create lightweigt gene
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
302 my $lgene =
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
303 Bio::EnsEMBL::IdMapping::TinyGene->new_fast( [
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
304 $gene->dbID, $gene->stable_id,
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
305 $gene->version, $gene->created_date,
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
306 $gene->modified_date, $gene->start,
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
307 $gene->end, $gene->strand,
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
308 $gene->slice->seq_region_name, $gene->biotype,
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
309 $gene->status, $gene->analysis->logic_name,
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
310 ( $gene->is_known ? 1 : 0 ), ] );
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
311
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
312 # build gene caches
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
313 $self->add( 'genes_by_id', $type, $gene->dbID, $lgene );
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
314
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
315 # transcripts
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
316 foreach my $tr ( @{ $gene->get_all_Transcripts } ) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
317 my $ltr =
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
318 Bio::EnsEMBL::IdMapping::TinyTranscript->new_fast( [
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
319 $tr->dbID, $tr->stable_id,
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
320 $tr->version, $tr->created_date,
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
321 $tr->modified_date, $tr->start,
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
322 $tr->end, $tr->strand,
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
323 $tr->length, md5_hex( $tr->spliced_seq ),
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
324 ( $tr->is_known ? 1 : 0 ) ] );
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
325
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
326 $ltr->biotype( $tr->biotype() );
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
327 $lgene->add_Transcript($ltr);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
328
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
329 # build transcript caches
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
330 $self->add( 'transcripts_by_id', $type, $tr->dbID, $ltr );
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
331 $self->add( 'genes_by_transcript_id', $type, $tr->dbID, $lgene );
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
332
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
333 # translation (if there is one)
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
334 if ( my $tl = $tr->translation ) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
335 my $ltl =
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
336 Bio::EnsEMBL::IdMapping::TinyTranslation->new_fast( [
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
337 $tl->dbID, $tl->stable_id,
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
338 $tl->version, $tl->created_date,
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
339 $tl->modified_date, $tr->dbID,
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
340 $tr->translate->seq, ( $tr->is_known ? 1 : 0 ),
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
341 ] );
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
342
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
343 $ltr->add_Translation($ltl);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
344
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
345 $self->add( 'translations_by_id', $type, $tl->dbID, $ltl );
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
346
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
347 undef $tl;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
348 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
349
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
350 # exons
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
351 foreach my $exon ( @{ $tr->get_all_Exons } ) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
352 my $lexon =
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
353 Bio::EnsEMBL::IdMapping::TinyExon->new_fast( [
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
354 $exon->dbID,
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
355 $exon->stable_id,
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
356 $exon->version,
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
357 $exon->created_date,
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
358 $exon->modified_date,
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
359 $exon->start,
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
360 $exon->end,
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
361 $exon->strand,
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
362 $exon->slice->seq_region_name,
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
363 $exon->slice->coord_system_name,
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
364 $exon->slice->coord_system->version,
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
365 $exon->slice->subseq( $exon->start, $exon->end,
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
366 $exon->strand ),
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
367 $exon->phase,
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
368 $need_project, ] );
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
369
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
370 # get coordinates in common coordinate system if needed
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
371 if ($need_project) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
372 my @seg = @{
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
373 $exon->project( $self->highest_common_cs,
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
374 $self->highest_common_cs_version ) };
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
375
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
376 if ( scalar(@seg) == 1 ) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
377 my $sl = $seg[0]->to_Slice;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
378 $lexon->common_start( $sl->start );
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
379 $lexon->common_end( $sl->end );
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
380 $lexon->common_strand( $sl->strand );
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
381 $lexon->common_sr_name( $sl->seq_region_name );
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
382 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
383 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
384
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
385 $ltr->add_Exon($lexon);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
386
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
387 $self->add( 'exons_by_id', $type, $exon->dbID, $lexon );
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
388 $self->add_list( 'transcripts_by_exon_id',
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
389 $type, $exon->dbID, $ltr );
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
390
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
391 undef $exon;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
392 } ## end foreach my $exon ( @{ $tr->get_all_Exons...})
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
393
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
394 undef $tr;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
395 } ## end foreach my $tr ( @{ $gene->get_all_Transcripts...})
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
396
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
397 undef $gene;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
398 } ## end foreach my $gene ( sort { $a...})
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
399
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
400 return $num_genes;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
401 } ## end sub build_cache_from_genes
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
402
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
403
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
404 =head2 filter_biotypes
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
405
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
406 Arg[1] : Listref of Bio::EnsEMBL::Genes $genes - the genes to filter
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
407 Example : my @filtered = @{ $cache->filter_biotypes(\@genes) };
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
408
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
409 Description : Filters a list of genes by biotype. Biotypes are
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
410 taken from the IdMapping configuration parameter
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
411 'biotypes_include' or 'biotypes_exclude'.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
412
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
413 If the configuration parameter 'biotypes_exclude' is
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
414 defined, then rather than returning the genes whose
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
415 biotype is listed in the configuration parameter
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
416 'biotypes_include' the method will return the genes
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
417 whose biotype is *not* listed in the 'biotypes_exclude'
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
418 configuration parameter.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
419
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
420 It is an error to define both these configuration
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
421 parameters.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
422
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
423 The old parameter 'biotypes' is equivalent to
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
424 'biotypes_include'.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
425
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
426 Return type : Listref of Bio::EnsEMBL::Genes (or empty list)
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
427 Exceptions : none
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
428 Caller : internal
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
429 Status : At Risk
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
430 : under development
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
431
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
432 =cut
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
433
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
434 sub filter_biotypes {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
435 my ( $self, $genes ) = @_;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
436
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
437 my @filtered;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
438 my @biotypes;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
439 my $opt_reverse;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
440
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
441 if ( defined( $self->conf()->param('biotypes_include') ) ||
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
442 defined( $self->conf()->param('biotypes') ) )
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
443 {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
444 if ( defined( $self->conf()->param('biotypes_exclude') ) ) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
445 $self->logger()
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
446 ->error( "You may not use both " .
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
447 "'biotypes_include' and 'biotypes_exclude' " .
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
448 "in the configuration" );
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
449 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
450
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
451 if ( defined( $self->conf()->param('biotypes_include') ) ) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
452 @biotypes = $self->conf()->param('biotypes_include');
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
453 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
454 else {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
455 @biotypes = $self->conf()->param('biotypes');
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
456 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
457 $opt_reverse = 0;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
458 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
459 else {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
460 @biotypes = $self->conf()->param('biotypes_exclude');
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
461 $opt_reverse = 1;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
462 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
463
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
464 foreach my $gene ( @{$genes} ) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
465 my $keep_gene;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
466
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
467 foreach my $biotype (@biotypes) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
468 if ( $gene->biotype() eq $biotype ) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
469 if ($opt_reverse) { $keep_gene = 0 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
470 else { $keep_gene = 1 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
471 last;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
472 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
473 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
474
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
475 if ( defined($keep_gene) ) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
476 if ($keep_gene) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
477 push( @filtered, $gene );
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
478 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
479 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
480 elsif ($opt_reverse) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
481 push( @filtered, $gene );
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
482 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
483 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
484
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
485 return \@filtered;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
486 } ## end sub filter_biotypes
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
487
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
488
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
489 =head2 add
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
490
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
491 Arg[1] : String $name - a cache name (e.g. 'genes_by_id')
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
492 Arg[2] : String type - a cache type (e.g. "source.$slice_name")
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
493 Arg[3] : String $key - key of this entry (e.g. a gene dbID)
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
494 Arg[4] : Bio::EnsEMBL::IdMappping::TinyFeature $val - value to cache
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
495 Example : $cache->add('genes_by_id',
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
496 'source.chromosome:NCBI36:X:1:1000000:1', '1234', $tiny_gene);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
497 Description : Adds a TinyFeature object to a named cache.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
498 Return type : Bio::EnsEMBL::IdMapping::TinyFeature
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
499 Exceptions : thrown on wrong or missing arguments
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
500 Caller : internal
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
501 Status : At Risk
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
502 : under development
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
503
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
504 =cut
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
505
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
506 sub add {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
507 my $self = shift;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
508 my $name = shift;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
509 my $type = shift;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
510 my $key = shift;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
511 my $val = shift;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
512
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
513 throw("You must provide a cache name (e.g. genes_by_id.") unless $name;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
514 throw("You must provide a cache type.") unless $type;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
515 throw("You must provide a cache key (e.g. a gene dbID).") unless $key;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
516
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
517 $self->{'cache'}->{$type}->{$name}->{$key} = $val;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
518
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
519 return $self->{'cache'}->{$type}->{$name}->{$key};
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
520 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
521
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
522 =head2 add_list
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
523
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
524 Arg[1] : String $name - a cache name (e.g. 'genes_by_id')
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
525 Arg[2] : String type - a cache type (e.g. "source.$slice_name")
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
526 Arg[3] : String $key - key of this entry (e.g. a gene dbID)
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
527 Arg[4] : List of Bio::EnsEMBL::IdMappping::TinyFeature @val - values
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
528 to cache
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
529 Example : $cache->add_list('transcripts_by_exon_id',
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
530 'source.chromosome:NCBI36:X:1:1000000:1', '1234',
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
531 $tiny_transcript1, $tiny_transcript2);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
532 Description : Adds a list of TinyFeature objects to a named cache.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
533 Return type : Listref of Bio::EnsEMBL::IdMapping::TinyFeature objects
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
534 Exceptions : thrown on wrong or missing arguments
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
535 Caller : internal
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
536 Status : At Risk
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
537 : under development
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
538
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
539 =cut
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
540
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
541 sub add_list {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
542 my $self = shift;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
543 my $name = shift;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
544 my $type = shift;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
545 my $key = shift;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
546 my @vals = @_;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
547
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
548 throw("You must provide a cache name (e.g. genes_by_id.") unless $name;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
549 throw("You must provide a cache type.") unless $type;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
550 throw("You must provide a cache key (e.g. a gene dbID).") unless $key;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
551
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
552 push @{ $self->{'cache'}->{$type}->{$name}->{$key} }, @vals;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
553
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
554 return $self->{'cache'}->{$type}->{$name}->{$key};
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
555 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
556
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
557 sub get_by_key {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
558 my $self = shift;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
559 my $name = shift;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
560 my $type = shift;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
561 my $key = shift;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
562
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
563 throw("You must provide a cache name (e.g. genes_by_id.") unless $name;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
564 throw("You must provide a cache type.") unless $type;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
565 throw("You must provide a cache key (e.g. a gene dbID).") unless $key;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
566
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
567 # transparently load cache from file unless already loaded
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
568 unless ($self->{'instance'}->{'loaded'}->{"$type"}) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
569 $self->read_and_merge($type);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
570 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
571
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
572 return $self->{'cache'}->{$type}->{$name}->{$key};
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
573 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
574
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
575 sub get_by_name {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
576 my $self = shift;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
577 my $name = shift;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
578 my $type = shift;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
579
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
580 throw("You must provide a cache name (e.g. genes_by_id.") unless $name;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
581 throw("You must provide a cache type.") unless $type;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
582
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
583 # transparently load cache from file unless already loaded
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
584 unless ($self->{'instance'}->{'loaded'}->{$type}) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
585 $self->read_and_merge($type);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
586 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
587
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
588 return $self->{'cache'}->{$type}->{$name} || {};
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
589 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
590
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
591
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
592 sub get_count_by_name {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
593 my $self = shift;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
594 my $name = shift;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
595 my $type = shift;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
596
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
597 throw("You must provide a cache name (e.g. genes_by_id.") unless $name;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
598 throw("You must provide a cache type.") unless $type;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
599
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
600 # transparently load cache from file unless already loaded
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
601 unless ($self->{'instance'}->{'loaded'}->{$type}) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
602 $self->read_and_merge($type);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
603 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
604
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
605 return scalar(keys %{ $self->get_by_name($name, $type) });
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
606 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
607
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
608
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
609 sub find_common_coord_systems {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
610 my $self = shift;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
611
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
612 # get adaptors for source db
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
613 my $s_dba = $self->get_DBAdaptor('source');
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
614 my $s_csa = $s_dba->get_CoordSystemAdaptor;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
615 my $s_sa = $s_dba->get_SliceAdaptor;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
616
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
617 # get adaptors for target db
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
618 my $t_dba = $self->get_DBAdaptor('target');
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
619 my $t_csa = $t_dba->get_CoordSystemAdaptor;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
620 my $t_sa = $t_dba->get_SliceAdaptor;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
621
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
622 # find common coord_systems
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
623 my @s_coord_systems = @{ $s_csa->fetch_all };
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
624 my @t_coord_systems = @{ $t_csa->fetch_all };
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
625 my $found_highest = 0;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
626
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
627 SOURCE:
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
628 foreach my $s_cs (@s_coord_systems) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
629 if ( !$s_cs->is_default() ) { next SOURCE }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
630
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
631 TARGET:
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
632 foreach my $t_cs (@t_coord_systems) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
633 if ( !$t_cs->is_default() ) { next TARGET }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
634
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
635 if ( $s_cs->name eq $t_cs->name ) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
636
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
637 # test for identical coord_system version
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
638 if ( $s_cs->version and ( $s_cs->version ne $t_cs->version ) ) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
639 next TARGET;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
640 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
641
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
642 # test for at least 50% identical seq_regions
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
643 if ( $self->seq_regions_compatible( $s_cs, $s_sa, $t_sa ) ) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
644 $self->add_common_cs($s_cs);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
645
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
646 unless ($found_highest) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
647 $self->highest_common_cs( $s_cs->name );
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
648 $self->highest_common_cs_version( $s_cs->version );
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
649 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
650
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
651 $found_highest = 1;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
652
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
653 next SOURCE;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
654 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
655 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
656 } ## end foreach my $t_cs (@t_coord_systems)
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
657 } ## end foreach my $s_cs (@s_coord_systems)
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
658
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
659 return $found_highest;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
660 } ## end sub find_common_coord_systems
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
661
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
662
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
663 sub seq_regions_compatible {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
664 my $self = shift;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
665 my $cs = shift;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
666 my $s_sa = shift;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
667 my $t_sa = shift;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
668
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
669 unless ($cs and $cs->isa('Bio::EnsEMBL::CoordSystem')) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
670 throw('You must provide a CoordSystem');
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
671 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
672
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
673 unless ($s_sa and $t_sa and $s_sa->isa('Bio::EnsEMBL::DBSQL::SliceAdaptor')
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
674 and $t_sa->isa('Bio::EnsEMBL::DBSQL::SliceAdaptor')) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
675 throw('You must provide a source and target SliceAdaptor');
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
676 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
677
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
678 my %sr_match;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
679 my $equal = 0;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
680
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
681 my $s_seq_regions = $s_sa->fetch_all($cs->name, $cs->version);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
682 my $t_seq_regions = $t_sa->fetch_all($cs->name, $cs->version);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
683
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
684 # sanity check to prevent divison by zero
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
685 my $s_count = scalar(@$s_seq_regions);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
686 my $t_count = scalar(@$t_seq_regions);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
687 return(0) if ($s_count == 0 or $t_count == 0);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
688
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
689 foreach my $s_sr (@$s_seq_regions) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
690 $sr_match{$s_sr->seq_region_name} = $s_sr->length;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
691 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
692
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
693 foreach my $t_sr (@$t_seq_regions) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
694 if (exists($sr_match{$t_sr->seq_region_name})) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
695 $equal++;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
696
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
697 # return false if we have a region with same name but different length
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
698 return(0) unless ($sr_match{$t_sr->seq_region_name} == $t_sr->length);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
699 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
700 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
701
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
702 if ($equal/$s_count > 0.5 and $equal/$t_count > 0.5) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
703 return(1);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
704 } else {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
705 $self->logger->info("Only $equal seq_regions identical for ".$cs->name." ".$cs->version."\n");
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
706 return(0);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
707 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
708
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
709 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
710
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
711
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
712 sub check_db_connection {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
713 my $self = shift;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
714 my $dbtype = shift;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
715
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
716 my $err = 0;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
717
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
718 eval {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
719 my $dba = $self->get_DBAdaptor($dbtype);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
720 $dba->dbc->connect;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
721 };
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
722
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
723 if ($@) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
724 $self->logger->warning("Can't connect to $dbtype db: $@\n");
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
725 $err++;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
726 } else {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
727 $self->logger->debug("Connection to $dbtype db ok.\n");
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
728 $self->{'_db_conn_ok'}->{$dbtype} = 1;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
729 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
730
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
731 return $err;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
732 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
733
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
734
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
735 sub check_db_read_permissions {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
736 my $self = shift;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
737 my $dbtype = shift;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
738
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
739 # skip this check if db connection failed (this prevents re-throwing
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
740 # exceptions).
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
741 return 1 unless ($self->{'_db_conn_ok'}->{$dbtype});
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
742
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
743 my $err = 0;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
744 my %privs = %{ $self->get_db_privs($dbtype) };
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
745
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
746 unless ($privs{'SELECT'} or $privs{'ALL PRIVILEGES'}) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
747 $self->logger->warning("User doesn't have read permission on $dbtype db.\n");
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
748 $err++;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
749 } else {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
750 $self->logger->debug("Read permission on $dbtype db ok.\n");
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
751 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
752
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
753 return $err;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
754 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
755
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
756
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
757 sub check_db_write_permissions {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
758 my $self = shift;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
759 my $dbtype = shift;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
760
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
761 # skip this check if db connection failed (this prevents re-throwing
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
762 # exceptions).
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
763 return 1 unless ($self->{'_db_conn_ok'}->{$dbtype});
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
764
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
765 my $err = 0;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
766
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
767 unless ($self->do_upload) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
768 $self->logger->debug("No uploads, so write permission on $dbtype db not required.\n");
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
769 return $err;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
770 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
771
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
772 my %privs = %{ $self->get_db_privs($dbtype) };
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
773
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
774 unless ($privs{'INSERT'} or $privs{'ALL PRIVILEGES'}) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
775 $self->logger->warning("User doesn't have write permission on $dbtype db.\n");
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
776 $err++;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
777 } else {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
778 $self->logger->debug("Write permission on $dbtype db ok.\n");
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
779 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
780
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
781 return $err;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
782 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
783
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
784
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
785 sub do_upload {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
786 my $self = shift;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
787
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
788 if ($self->conf->param('dry_run') or
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
789 ! ($self->conf->param('upload_events') or
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
790 $self->conf->param('upload_stable_ids') or
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
791 $self->conf->param('upload_archive'))) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
792 return 0;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
793 } else {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
794 return 1;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
795 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
796 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
797
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
798
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
799 sub get_db_privs {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
800 my ( $self, $dbtype ) = @_;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
801
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
802 my %privs = ();
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
803 my $rs;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
804
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
805 # get privileges from mysql db
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
806 eval {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
807 my $dbc = $self->get_DBAdaptor($dbtype)->dbc();
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
808 my $sql = qq(SHOW GRANTS FOR ) . $dbc->username();
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
809 my $sth = $dbc->prepare($sql);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
810 $sth->execute();
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
811 $rs = $sth->fetchall_arrayref();
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
812 #$sth->finish();
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
813 };
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
814
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
815 if ($@) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
816 $self->logger->warning(
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
817 "Error obtaining privileges from $dbtype db: $@\n");
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
818 return {};
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
819 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
820
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
821 # parse the output
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
822 foreach my $r ( map { $_->[0] } @{$rs} ) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
823 $r =~ s/GRANT (.*) ON .*/$1/i;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
824 foreach my $p ( split( ',', $r ) ) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
825 # trim leading and trailing whitespace
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
826 $p =~ s/^\s+//;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
827 $p =~ s/\s+$//;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
828 $privs{ uc($p) } = 1;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
829 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
830 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
831
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
832 return \%privs;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
833 } ## end sub get_db_privs
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
834
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
835
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
836 sub check_empty_tables {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
837 my $self = shift;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
838 my $dbtype = shift;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
839
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
840 # skip this check if db connection failed (this prevents re-throwing
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
841 # exceptions).
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
842 return 1 unless ($self->{'_db_conn_ok'}->{$dbtype});
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
843
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
844 my $err = 0;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
845 my $c = 0;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
846
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
847 if ($self->conf->param('no_check_empty_tables') or !$self->do_upload) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
848 $self->logger->debug("Won't check for empty stable ID and archive tables in $dbtype db.\n");
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
849 return $err;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
850 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
851
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
852 eval {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
853 my @tables =
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
854 qw(
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
855 gene_stable_id
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
856 transcript_stable_id
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
857 translation_stable_id
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
858 exon_stable_id
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
859 stable_id_event
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
860 mapping_session
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
861 gene_archive
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
862 peptide_archive
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
863 );
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
864
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
865 my $dba = $self->get_DBAdaptor($dbtype);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
866 foreach my $table (@tables) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
867 if ( $table =~ /^([^_]+)_stable_id/ ) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
868 $table = $1;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
869 if ( $c =
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
870 $self->fetch_value_from_db(
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
871 $dba,
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
872 "SELECT COUNT(*) FROM $table WHERE stable_id IS NOT NULL"
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
873 ) )
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
874 {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
875 $self->logger->warning(
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
876 "$table table in $dbtype db has $c stable IDs.\n");
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
877 $err++;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
878 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
879 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
880 else {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
881 if ( $c =
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
882 $self->fetch_value_from_db(
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
883 $dba, "SELECT COUNT(*) FROM $table"
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
884 ) )
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
885 {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
886 $self->logger->warning(
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
887 "$table table in $dbtype db has $c entries.\n");
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
888 $err++;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
889 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
890 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
891 } ## end foreach my $table (@tables)
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
892 };
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
893
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
894 if ($@) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
895 $self->logger->warning(
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
896 "Error retrieving stable ID and archive table row counts from $dbtype db: $@\n"
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
897 );
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
898 $err++;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
899 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
900 elsif ( !$err ) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
901 $self->logger->debug(
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
902 "All stable ID and archive tables in $dbtype db are empty.\n");
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
903 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
904 return $err;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
905 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
906
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
907
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
908 sub check_sequence {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
909 my ( $self, $dbtype ) = @_;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
910
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
911 # skip this check if db connection failed (this prevents re-throwing
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
912 # exceptions).
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
913 return 1 unless ( $self->{'_db_conn_ok'}->{$dbtype} );
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
914
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
915 my $err = 0;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
916 my $c = 0;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
917
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
918 eval {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
919 my $dba = $self->get_DBAdaptor($dbtype);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
920 unless ( $c =
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
921 $self->fetch_value_from_db(
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
922 $dba->dnadb(), "SELECT COUNT(*) FROM dna"
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
923 ) )
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
924 {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
925 $err++;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
926 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
927 };
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
928
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
929 if ($@) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
930 $self->logger->warning( "Error retrieving dna table row count "
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
931 . "from $dbtype database: $@\n" );
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
932 $err++;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
933 } elsif ($err) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
934 $self->logger->warning("No sequence found in $dbtype database.\n");
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
935 } else {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
936 $self->logger->debug(
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
937 ucfirst($dbtype) . " db has sequence ($c entries).\n" );
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
938 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
939
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
940 return $err;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
941 } ## end sub check_sequence
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
942
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
943
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
944 sub check_meta_entries {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
945 my $self = shift;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
946 my $dbtype = shift;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
947
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
948 # skip this check if db connection failed (this prevents re-throwing
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
949 # exceptions).
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
950 return 1 unless ($self->{'_db_conn_ok'}->{$dbtype});
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
951
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
952 my $err = 0;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
953 my $assembly_default;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
954 my $schema_version;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
955
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
956 eval {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
957 my $dba = $self->get_DBAdaptor($dbtype);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
958 $assembly_default = $self->fetch_value_from_db($dba,
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
959 qq(SELECT meta_value FROM meta WHERE meta_key = 'assembly.default'));
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
960 $schema_version = $self->fetch_value_from_db($dba,
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
961 qq(SELECT meta_value FROM meta WHERE meta_key = 'schema_version'));
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
962 };
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
963
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
964 if ($@) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
965 $self->logger->warning("Error retrieving dna table row count from $dbtype db: $@\n");
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
966 return ++$err;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
967 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
968
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
969 unless ($assembly_default) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
970 $self->logger->warning("No meta.assembly.default value found in $dbtype db.\n");
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
971 $err++;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
972 } else {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
973 $self->logger->debug("meta.assembly.default value found ($assembly_default).\n");
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
974 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
975
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
976 unless ($schema_version) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
977 $self->logger->warning("No meta.schema_version value found in $dbtype db.\n");
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
978 $err++;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
979 } else {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
980 $self->logger->debug("meta.schema_version value found ($schema_version).\n");
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
981 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
982
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
983 return $err;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
984 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
985
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
986
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
987 sub fetch_value_from_db {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
988 my ( $self, $dba, $sql ) = @_;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
989
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
990 assert_ref( $dba, 'Bio::EnsEMBL::DBSQL::DBAdaptor' );
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
991
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
992 if ( !defined($sql) ) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
993 throw("Need an SQL statement to execute.\n");
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
994 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
995
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
996 my $sth = $dba->dbc->prepare($sql);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
997 $sth->execute();
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
998
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
999 my ($c) = $sth->fetchrow_array;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1000 return $c;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1001 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1002
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1003 sub get_DBAdaptor {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1004 my ( $self, $prefix ) = @_;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1005
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1006 unless ( $self->{'_dba'}->{$prefix} ) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1007 # connect to database
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1008 my $dba =
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1009 new Bio::EnsEMBL::DBSQL::DBAdaptor(
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1010 -host => $self->conf->param("${prefix}host"),
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1011 -port => $self->conf->param("${prefix}port"),
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1012 -user => $self->conf->param("${prefix}user"),
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1013 -pass => $self->conf->param("${prefix}pass"),
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1014 -dbname => $self->conf->param("${prefix}dbname"),
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1015 -group => $prefix, );
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1016
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1017 if ( !defined( $self->conf->param("${prefix}host_dna") ) ) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1018 # explicitely set the dnadb to itself - by default the Registry
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1019 # assumes a group 'core' for this now
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1020 $dba->dnadb($dba);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1021 } else {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1022 my $dna_dba =
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1023 new Bio::EnsEMBL::DBSQL::DBAdaptor(
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1024 -host => $self->conf->param("${prefix}host_dna"),
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1025 -port => $self->conf->param("${prefix}port_dna"),
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1026 -user => $self->conf->param("${prefix}user_dna"),
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1027 -pass => $self->conf->param("${prefix}pass_dna"),
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1028 -dbname => $self->conf->param("${prefix}dbname_dna"),
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1029 -group => $prefix, );
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1030 $dba->dnadb($dna_dba);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1031 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1032
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1033 $self->{'_dba'}->{$prefix} = $dba;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1034 } ## end unless ( $self->{'_dba'}->...)
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1035
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1036 return $self->{'_dba'}->{$prefix};
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1037 } ## end sub get_DBAdaptor
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1038
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1039
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1040 sub cache_file_exists {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1041 my $self = shift;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1042 my $type = shift;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1043
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1044 throw("You must provide a cache type.") unless $type;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1045
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1046 my $cache_file = $self->cache_file($type);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1047
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1048 if (-e $cache_file) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1049 $self->logger->info("Cache file found for $type.\n", 2);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1050 $self->logger->debug("Will read from $cache_file.\n", 2);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1051 return 1;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1052 } else {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1053 $self->logger->info("No cache file found for $type.\n", 2);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1054 $self->logger->info("Will build cache from db.\n", 2);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1055 return 0;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1056 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1057 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1058
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1059
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1060 sub cache_file {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1061 my $self = shift;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1062 my $type = shift;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1063
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1064 throw("You must provide a cache type.") unless $type;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1065
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1066 return $self->dump_path."/$type.object_cache.ser";
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1067 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1068
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1069
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1070 sub instance_file {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1071 my $self = shift;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1072
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1073 return $self->dump_path."/cache_instance.ser";
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1074 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1075
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1076
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1077 sub dump_path {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1078 my $self = shift;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1079
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1080 $self->{'dump_path'} ||= path_append($self->conf->param('basedir'), 'cache');
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1081
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1082 return $self->{'dump_path'};
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1083 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1084
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1085
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1086 sub write_all_to_file {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1087 my $self = shift;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1088 my $type = shift;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1089
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1090 throw("You must provide a cache type.") unless $type;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1091
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1092 my $size = 0;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1093 $size += $self->write_to_file($type);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1094 $size += $self->write_instance_to_file;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1095
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1096 return parse_bytes($size);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1097 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1098
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1099
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1100 sub write_to_file {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1101 my $self = shift;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1102 my $type = shift;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1103
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1104 throw("You must provide a cache type.") unless $type;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1105
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1106 unless ($self->{'cache'}->{$type}) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1107 $self->logger->warning("No features found in $type. Won't write cache file.\n");
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1108 return;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1109 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1110
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1111 my $cache_file = $self->cache_file($type);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1112
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1113 eval { nstore($self->{'cache'}->{$type}, $cache_file) };
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1114 if ($@) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1115 throw("Unable to store $cache_file: $@\n");
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1116 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1117
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1118 my $size = -s $cache_file;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1119 return $size;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1120 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1121
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1122
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1123 sub write_instance_to_file {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1124 my $self = shift;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1125
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1126 my $instance_file = $self->instance_file;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1127
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1128 eval { nstore($self->{'instance'}, $instance_file) };
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1129 if ($@) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1130 throw("Unable to store $instance_file: $@\n");
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1131 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1132
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1133 my $size = -s $instance_file;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1134 return $size;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1135 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1136
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1137
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1138 sub read_from_file {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1139 my $self = shift;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1140 my $type = shift;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1141
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1142 throw("You must provide a cache type.") unless $type;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1143
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1144 my $cache_file = $self->cache_file($type);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1145
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1146 if (-s $cache_file) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1147
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1148 #$self->logger->info("Reading cache from file...\n", 0, 'stamped');
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1149 #$self->logger->info("Cache file $cache_file.\n", 1);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1150 eval { $self->{'cache'}->{$type} = retrieve($cache_file); };
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1151 if ($@) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1152 throw("Unable to retrieve cache: $@");
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1153 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1154 #$self->logger->info("Done.\n", 0, 'stamped');
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1155
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1156 } else {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1157 $self->logger->warning("Cache file $cache_file not found or empty.\n");
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1158 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1159
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1160
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1161 return $self->{'cache'}->{$type};
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1162 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1163
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1164
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1165 sub read_and_merge {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1166 my $self = shift;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1167 my $dbtype = shift;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1168
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1169 unless ($dbtype eq 'source' or $dbtype eq 'target') {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1170 throw("Db type must be 'source' or 'target'.");
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1171 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1172
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1173 # read cache from single or multiple files, depending on caching strategy
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1174 my $cache_method = $self->cache_method;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1175 if ($cache_method eq 'ALL') {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1176 $self->read_from_file("$dbtype.ALL");
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1177 } elsif ($cache_method eq 'BY_SEQ_REGION') {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1178 foreach my $slice_name (@{ $self->slice_names($dbtype) }) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1179 $self->read_from_file("$dbtype.$slice_name");
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1180 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1181 } else {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1182 throw("Unknown cache method: $cache_method.");
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1183 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1184
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1185 $self->merge($dbtype);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1186
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1187 # flag as being loaded
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1188 $self->{'instance'}->{'loaded'}->{$dbtype} = 1;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1189 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1190
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1191
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1192 sub merge {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1193 my $self = shift;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1194 my $dbtype = shift;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1195
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1196 unless ($dbtype eq 'source' or $dbtype eq 'target') {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1197 throw("Db type must be 'source' or 'target'.");
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1198 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1199
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1200 foreach my $type (keys %{ $self->{'cache'} || {} }) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1201 next unless ($type =~ /^$dbtype/);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1202
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1203 foreach my $name (keys %{ $self->{'cache'}->{$type} || {} }) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1204
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1205 foreach my $key (keys %{ $self->{'cache'}->{$type}->{$name} || {} }) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1206 if (defined $self->{'cache'}->{$dbtype}->{$name}->{$key}) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1207 # warning("Duplicate key in cache: $name|$dbtype|$key. Skipping.\n");
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1208 } else {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1209 $self->{'cache'}->{$dbtype}->{$name}->{$key} =
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1210 $self->{'cache'}->{$type}->{$name}->{$key};
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1211 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1212
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1213 delete $self->{'cache'}->{$type}->{$name}->{$key};
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1214 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1215
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1216 delete $self->{'cache'}->{$type}->{$name};
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1217 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1218
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1219 delete $self->{'cache'}->{$type};
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1220
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1221 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1222 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1223
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1224
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1225 sub read_instance_from_file {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1226 my $self = shift;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1227
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1228 my $instance_file = $self->instance_file;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1229
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1230 unless (-s $instance_file) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1231 throw("No valid cache instance file found at $instance_file.");
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1232 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1233
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1234 eval { $self->{'instance'} = retrieve($instance_file); };
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1235 if ($@) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1236 throw("Unable to retrieve cache instance: $@");
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1237 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1238
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1239 return $self->{'instance'};
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1240 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1241
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1242
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1243 sub slice_names {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1244 my $self = shift;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1245 my $dbtype = shift;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1246
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1247 throw("You must provide a db type (source|target).") unless $dbtype;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1248
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1249 my $dba = $self->get_DBAdaptor($dbtype);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1250 my $sa = $dba->get_SliceAdaptor;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1251
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1252 my @slice_names = ();
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1253
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1254 if ( $self->conf->param('chromosomes') ) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1255 # Fetch the specified chromosomes.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1256 foreach my $chr ( $self->conf->param('chromosomes') ) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1257 my $slice = $sa->fetch_by_region( 'chromosome', $chr );
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1258 push @slice_names, $slice->name;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1259 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1260
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1261 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1262 elsif ( $self->conf->param('region') ) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1263 # Fetch the slices on the specified regions. Don't use
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1264 # SliceAdaptor->fetch_by_name() since this will fail if assembly
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1265 # versions are different for source and target db.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1266 my ( $cs, $version, $name, $start, $end, $strand ) =
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1267 split( /:/, $self->conf->param('region') );
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1268
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1269 my $slice = $sa->fetch_by_region( $cs, $name, $start, $end );
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1270
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1271 push @slice_names, $slice->name;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1272
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1273 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1274 else {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1275 # Fetch all slices that have genes on them.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1276 my $ga = $dba->get_GeneAdaptor;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1277
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1278 foreach my $srid ( @{ $ga->list_seq_region_ids } ) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1279 my $slice = $sa->fetch_by_seq_region_id($srid);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1280
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1281 if ( !$slice->is_reference() ) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1282 my $slices =
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1283 $slice->adaptor()
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1284 ->fetch_by_region_unique( $slice->coord_system_name(),
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1285 $slice->seq_region_name() );
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1286
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1287 push( @slice_names, map { $_->name() } @{$slices} );
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1288 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1289 else {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1290 push @slice_names, $slice->name();
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1291 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1292 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1293 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1294
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1295 return \@slice_names;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1296 } ## end sub slice_names
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1297
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1298
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1299 sub logger {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1300 my $self = shift;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1301 $self->{'logger'} = shift if (@_);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1302 return $self->{'logger'};
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1303 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1304
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1305 sub conf {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1306 my $self = shift;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1307 $self->{'conf'} = shift if (@_);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1308 return $self->{'conf'};
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1309 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1310
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1311
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1312 sub cache_method {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1313 my $self = shift;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1314 $self->{'instance'}->{'cache_method'} = shift if (@_);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1315 return $self->{'instance'}->{'cache_method'};
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1316 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1317
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1318
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1319 sub highest_common_cs {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1320 my $self = shift;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1321 $self->{'instance'}->{'hccs'} = shift if (@_);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1322 return $self->{'instance'}->{'hccs'};
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1323 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1324
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1325
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1326 sub highest_common_cs_version {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1327 my $self = shift;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1328 $self->{'instance'}->{'hccsv'} = shift if (@_);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1329 return $self->{'instance'}->{'hccsv'};
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1330 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1331
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1332
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1333 sub add_common_cs {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1334 my $self = shift;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1335 my $cs = shift;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1336
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1337 unless ($cs and $cs->isa('Bio::EnsEMBL::CoordSystem')) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1338 throw('You must provide a CoordSystem');
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1339 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1340
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1341 my $csid = join(':', $cs->name, $cs->version);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1342
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1343 $self->{'instance'}->{'ccs'}->{$csid} = 1;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1344 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1345
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1346
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1347 sub is_common_cs {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1348 my $self = shift;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1349 my $csid = shift;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1350
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1351 return $self->{'instance'}->{'ccs'}->{$csid};
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1352 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1353
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1354
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1355 1;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1356