0
|
1 =head1 LICENSE
|
|
2
|
|
3 Copyright (c) 1999-2012 The European Bioinformatics Institute and
|
|
4 Genome Research Limited. All rights reserved.
|
|
5
|
|
6 This software is distributed under a modified Apache license.
|
|
7 For license details, please see
|
|
8
|
|
9 http://www.ensembl.org/info/about/code_licence.html
|
|
10
|
|
11 =head1 CONTACT
|
|
12
|
|
13 Please email comments or questions to the public Ensembl
|
|
14 developers list at <dev@ensembl.org>.
|
|
15
|
|
16 Questions may also be sent to the Ensembl help desk at
|
|
17 <helpdesk@ensembl.org>.
|
|
18
|
|
19 =head1 NAME
|
|
20
|
|
21 Bio::EnsEMBL::Compara::GenomeDB - DESCRIPTION of Object
|
|
22
|
|
23 =head1 SYNOPSIS
|
|
24 use Bio::EnsEMBL::Compara::DnaFrag;
|
|
25 my $genome_db = new Bio::EnsEMBL::Compara::GenomeDB();
|
|
26
|
|
27 SET VALUES
|
|
28 $genome_db->dbID(22);
|
|
29 $genome_db->dba($dba);
|
|
30 $genome_db->name("Homo sapiens");
|
|
31 $genome_db->assembly("NCBI36");
|
|
32 $genome_db->taxon_id(9606);
|
|
33 $genome_db->taxon($taxon);
|
|
34 $genome_db->genebuild("2006-12-Ensembl");
|
|
35 $genome_db->assembly_default(1);
|
|
36 $genome_db->locator("Bio::EnsEMBL::DBSQL::DBAdaptor/host=???;port=???;user=???;dbname=homo_sapiens_core_51_36m;species=Homo sapiens;disconnect_when_inactive=1");
|
|
37
|
|
38 GET VALUES
|
|
39 $dbID = $genome_db->dbID;
|
|
40 $genome_db_adaptor = $genome_db->adaptor;
|
|
41 $name = $genome_db->name;
|
|
42 $assembly = $genome_db->assembly;
|
|
43 $taxon_id = $genome_db->taxon_id;
|
|
44 $taxon = $genome_db->taxon;
|
|
45 $genebuild = $genome_db->genebuild;
|
|
46 $assembly_default = $genome_db->assembly_default;
|
|
47 $locator = $genome_db->locator;
|
|
48
|
|
49
|
|
50 =head1 DESCRIPTION
|
|
51
|
|
52 The GenomeDB object stores information about each species including the taxon_id, species name, assembly, genebuild and the location of the core database.
|
|
53
|
|
54 =head1 APPENDIX
|
|
55
|
|
56 The rest of the documentation details each of the object methods. Internal methods are usually preceded with a _
|
|
57
|
|
58 =cut
|
|
59
|
|
60
|
|
61 # Let the code begin...
|
|
62
|
|
63
|
|
64 package Bio::EnsEMBL::Compara::GenomeDB;
|
|
65
|
|
66 use strict;
|
|
67
|
|
68 use Bio::EnsEMBL::Utils::Exception qw(warning deprecate throw);
|
|
69 use Bio::EnsEMBL::DBLoader;
|
|
70
|
|
71 =head2 new
|
|
72
|
|
73 Example :
|
|
74 my $genome_db = new Bio::EnsEMBL::Compara::GenomeDB();
|
|
75 $genome_db->dba($dba);
|
|
76 $genome_db->name("Homo sapiens");
|
|
77 $genome_db->assembly("NCBI36");
|
|
78 $genome_db->taxon_id(9606);
|
|
79 $genome_db->dbID(22);
|
|
80 $genome_db->genebuild("2006-12-Ensembl");
|
|
81
|
|
82 Description: Creates a new GenomeDB object
|
|
83 Returntype : Bio::EnsEMBL::Compara::GenomeDB
|
|
84 Exceptions : none
|
|
85 Caller : general
|
|
86 Status : Stable
|
|
87
|
|
88 =cut
|
|
89
|
|
90 sub new {
|
|
91 my($caller, $dba, $name, $assembly, $taxon_id, $dbID, $genebuild) = @_;
|
|
92
|
|
93 my $class = ref($caller) || $caller;
|
|
94 my $self = bless({}, $class);
|
|
95
|
|
96 $dba && $self->db_adaptor($dba);
|
|
97 $name && $self->name($name);
|
|
98 $assembly && $self->assembly($assembly);
|
|
99 $taxon_id && $self->taxon_id($taxon_id);
|
|
100 $dbID && $self->dbID($dbID);
|
|
101 $genebuild && $self->genebuild($genebuild);
|
|
102
|
|
103 return $self;
|
|
104 }
|
|
105
|
|
106 =head2 new_fast
|
|
107
|
|
108 Arg [1] : hash reference $hashref
|
|
109 Example :
|
|
110 Description: This is an ultra fast constructor which requires knowledge of
|
|
111 the objects internals to be used.
|
|
112 Returntype : Bio::EnsEMBL::Compara::GenomeDB
|
|
113 Exceptions : none
|
|
114 Caller : Bio::EnsEMBL::Compara::DBSQL::GenomeDBAdaptor
|
|
115 Status : Stable
|
|
116
|
|
117 =cut
|
|
118
|
|
119 sub new_fast {
|
|
120 my $class = shift;
|
|
121 my $hashref = shift;
|
|
122
|
|
123 return bless $hashref, $class;
|
|
124 }
|
|
125
|
|
126
|
|
127 =head2 db_adaptor
|
|
128
|
|
129 Arg [1] : (optional) Bio::EnsEMBL::DBSQL::DBAdaptor $dba
|
|
130 The DBAdaptor containing sequence information for the genome
|
|
131 represented by this object.
|
|
132 Example : $gdb->db_adaptor($dba);
|
|
133 Description: Getter/Setter for the DBAdaptor containing sequence
|
|
134 information for the genome represented by this object.
|
|
135 Returntype : Bio::EnsEMBL::DBSQL::DBAdaptor
|
|
136 Caller : general
|
|
137 Status : Stable
|
|
138
|
|
139 =cut
|
|
140
|
|
141 sub db_adaptor {
|
|
142 my ( $self, $dba ) = @_;
|
|
143
|
|
144 if($dba) {
|
|
145 $self->{'_db_adaptor'} = ($dba && $dba->isa('Bio::EnsEMBL::DBSQL::DBAdaptor'))
|
|
146 ? $dba
|
|
147 : undef;
|
|
148 }
|
|
149
|
|
150 unless($self->{'_db_adaptor'}) {
|
|
151 $self->{'_db_adaptor'} = $self->connect_to_genome_locator;
|
|
152 }
|
|
153
|
|
154 return $self->{'_db_adaptor'};
|
|
155 }
|
|
156
|
|
157
|
|
158
|
|
159 =head2 name
|
|
160
|
|
161 Arg [1] : (optional) string $value
|
|
162 Example : $gdb->name('Homo sapiens');
|
|
163 Description: Getter setter for the name of this genome database, usually
|
|
164 just the species name.
|
|
165 Returntype : string
|
|
166 Exceptions : none
|
|
167 Caller : general
|
|
168 Status : Stable
|
|
169
|
|
170 =cut
|
|
171
|
|
172 sub name{
|
|
173 my ($self,$value) = @_;
|
|
174
|
|
175 if( defined $value) {
|
|
176 $self->{'name'} = $value;
|
|
177 }
|
|
178 return $self->{'name'};
|
|
179 }
|
|
180
|
|
181
|
|
182 =head2 short_name
|
|
183
|
|
184 Example : $gdb->short_name;
|
|
185 Description: The name of this genome in the Gspe ('G'enera
|
|
186 'spe'cies) format. Can also handle 'G'enera 's'pecies
|
|
187 's'ub 's'pecies (Gsss)
|
|
188 Returntype : string
|
|
189 Exceptions : none
|
|
190 Caller : general
|
|
191 Status : Stable
|
|
192
|
|
193 =cut
|
|
194
|
|
195 sub short_name {
|
|
196 my $self = shift;
|
|
197 my $name = $self->name;
|
|
198 $name =~ s/\b(\w)/\U$1/g;
|
|
199 $name =~ s/\_/\ /g;
|
|
200 unless( $name =~ s/(\S)\S*\s(\S)\S*\s(\S)\S*\s(\S).*/$1$2$3$4/ ){
|
|
201 unless( $name =~ s/(\S)\S*\s(\S)\S*\s(\S{2,2}).*/$1$2$3/ ){
|
|
202 unless( $name =~ s/(\S)\S*\s(\S{3,3}).*/$1$2/ ){
|
|
203 $name = substr( $name, 0, 4 );
|
|
204 }
|
|
205 }
|
|
206 }
|
|
207 return $name;
|
|
208 }
|
|
209
|
|
210 =head2 get_short_name
|
|
211
|
|
212 Example : $gdb->get_short_name;
|
|
213 Description: The name of this genome in the Gspe ('G'enera
|
|
214 'spe'cies) format. Can also handle 'G'enera 's'pecies
|
|
215 's'ub 's'pecies (Gsss)
|
|
216 Returntype : string
|
|
217 Exceptions : none
|
|
218 Caller : general
|
|
219 Status : Stable
|
|
220
|
|
221 =cut
|
|
222
|
|
223 sub get_short_name {
|
|
224 my $self = shift;
|
|
225 return $self->short_name;
|
|
226 }
|
|
227
|
|
228
|
|
229 =head2 dbID
|
|
230
|
|
231 Arg [1] : (optional) int $value the new value of this objects database
|
|
232 identifier
|
|
233 Example : $dbID = $genome_db->dbID;
|
|
234 Description: Getter/Setter for the internal identifier of this GenomeDB
|
|
235 Returntype : int
|
|
236 Exceptions : none
|
|
237 Caller : general
|
|
238 Status : Stable
|
|
239
|
|
240 =cut
|
|
241
|
|
242 sub dbID{
|
|
243 my ($self,$value) = @_;
|
|
244 if( defined $value) {
|
|
245 $self->{'dbID'} = $value;
|
|
246 }
|
|
247 return $self->{'dbID'};
|
|
248 }
|
|
249
|
|
250
|
|
251 =head2 adaptor
|
|
252
|
|
253 Arg [1] : (optional) Bio::EnsEMBL::Compara::GenomeDBAdaptor $adaptor
|
|
254 Example : $adaptor = $GenomeDB->adaptor();
|
|
255 Description: Getter/Setter for the GenomeDB object adaptor used
|
|
256 by this GenomeDB for database interaction.
|
|
257 Returntype : Bio::EnsEMBL::Compara::GenomeDBAdaptor
|
|
258 Exceptions : none
|
|
259 Caller : general
|
|
260 Status : Stable
|
|
261
|
|
262 =cut
|
|
263
|
|
264 sub adaptor{
|
|
265 my ($self,$value) = @_;
|
|
266 if( defined $value) {
|
|
267 $self->{'adaptor'} = $value;
|
|
268 }
|
|
269 return $self->{'adaptor'};
|
|
270 }
|
|
271
|
|
272
|
|
273 =head2 assembly
|
|
274
|
|
275 Arg [1] : (optional) string
|
|
276 Example : $gdb->assembly('NCBI36');
|
|
277 Description: Getter/Setter for the assembly type of this genome db.
|
|
278 Returntype : string
|
|
279 Exceptions : none
|
|
280 Caller : general
|
|
281 Status : Stable
|
|
282
|
|
283 =cut
|
|
284
|
|
285 sub assembly {
|
|
286 my $self = shift;
|
|
287 my $assembly = shift;
|
|
288
|
|
289 if($assembly) {
|
|
290 $self->{'assembly'} = $assembly;
|
|
291 }
|
|
292 return $self->{'assembly'};
|
|
293 }
|
|
294
|
|
295 =head2 assembly_default
|
|
296
|
|
297 Arg [1] : (optional) int
|
|
298 Example : $gdb->assembly_default(1);
|
|
299 Description: Getter/Setter for the assembly_default of this genome db.
|
|
300 Returntype : int
|
|
301 Exceptions : none
|
|
302 Caller : general
|
|
303 Status : Stable
|
|
304
|
|
305 =cut
|
|
306
|
|
307 sub assembly_default {
|
|
308 my $self = shift;
|
|
309 my $boolean = shift;
|
|
310
|
|
311 if(defined $boolean) {
|
|
312 $self->{'assembly_default'} = $boolean;
|
|
313 }
|
|
314 $self->{'assembly_default'}='1' unless(defined($self->{'assembly_default'}));
|
|
315 return $self->{'assembly_default'};
|
|
316 }
|
|
317
|
|
318 =head2 genebuild
|
|
319
|
|
320 Arg [1] : (optional) string
|
|
321 Example : $gdb->genebuild('2006-12-Ensembl');
|
|
322 Description: Getter/Setter for the genebuild type of this genome db.
|
|
323 Returntype : string
|
|
324 Exceptions : none
|
|
325 Caller : general
|
|
326 Status : Stable
|
|
327
|
|
328 =cut
|
|
329
|
|
330 sub genebuild {
|
|
331 my $self = shift;
|
|
332 $self->{'genebuild'} = shift if (@_);
|
|
333 $self->{'genebuild'}='' unless(defined($self->{'genebuild'}));
|
|
334 return $self->{'genebuild'};
|
|
335 }
|
|
336
|
|
337
|
|
338 =head2 taxon_id
|
|
339
|
|
340 Arg [1] : (optional) int
|
|
341 Example : $gdb->taxon_id(9606);
|
|
342 Description: Getter/Setter for the taxon id of the contained genome db
|
|
343 Returntype : int
|
|
344 Exceptions : none
|
|
345 Caller : general
|
|
346 Status : Stable
|
|
347
|
|
348 =cut
|
|
349
|
|
350 sub taxon_id {
|
|
351 my $self = shift;
|
|
352 my $taxon_id = shift;
|
|
353
|
|
354 if(defined $taxon_id) {
|
|
355 $self->{'taxon_id'} = $taxon_id;
|
|
356 }
|
|
357 return $self->{'taxon_id'};
|
|
358 }
|
|
359
|
|
360 =head2 taxon
|
|
361
|
|
362 Description: uses taxon_id to fetch the NCBITaxon object
|
|
363 Returntype : Bio::EnsEMBL::Compara::NCBITaxon object
|
|
364 Exceptions : if taxon_id or adaptor not defined
|
|
365 Caller : general
|
|
366 Status : Stable
|
|
367
|
|
368 =cut
|
|
369
|
|
370 sub taxon {
|
|
371 my $self = shift;
|
|
372
|
|
373 return $self->{'_taxon'} if(defined $self->{'_taxon'});
|
|
374
|
|
375 unless (defined $self->taxon_id and $self->adaptor) {
|
|
376 throw("can't fetch Taxon without a taxon_id and an adaptor");
|
|
377 }
|
|
378 my $ncbi_taxon_adaptor = $self->adaptor->db->get_NCBITaxonAdaptor;
|
|
379 $self->{'_taxon'} = $ncbi_taxon_adaptor->fetch_node_by_taxon_id($self->{'taxon_id'});
|
|
380 return $self->{'_taxon'};
|
|
381 }
|
|
382
|
|
383
|
|
384 =head2 locator
|
|
385
|
|
386 Arg [1] : string
|
|
387 Description: Returns a string which describes where the external genome (ensembl core)
|
|
388 database base is located. Locator format is:
|
|
389 "Bio::EnsEMBL::DBSQL::DBAdaptor/host=ecs4port=3351;user=ensro;dbname=mus_musculus_core_20_32"
|
|
390 Returntype : string
|
|
391 Exceptions : none
|
|
392 Caller : general
|
|
393 Status : Stable
|
|
394
|
|
395 =cut
|
|
396
|
|
397 sub locator {
|
|
398 my $self = shift;
|
|
399 $self->{'locator'} = shift if (@_);
|
|
400 $self->{'locator'}='' unless(defined($self->{'locator'}));
|
|
401 return $self->{'locator'};
|
|
402 }
|
|
403
|
|
404 =head2 connect_to_genome_locator
|
|
405
|
|
406 Arg [1] : string
|
|
407 Description: uses the locator string to connect to the external genome database
|
|
408 Returntype : DBConnection/DBAdaptor defined in locator string
|
|
409 (usually a Bio::EnsEMBL::DBSQL::DBAdaptor)
|
|
410 return undef if locator undefined or unable to connect
|
|
411 Exceptions : none
|
|
412 Caller : internal private method
|
|
413 Status : Stable
|
|
414
|
|
415 =cut
|
|
416
|
|
417 sub connect_to_genome_locator {
|
|
418 my $self = shift;
|
|
419
|
|
420 return undef if($self->locator eq '');
|
|
421
|
|
422 my $genomeDBA = undef;
|
|
423 eval {$genomeDBA = Bio::EnsEMBL::DBLoader->new($self->locator); };
|
|
424 warn "The locator could not be loaded because: $@" if $@;
|
|
425 return $genomeDBA;
|
|
426 }
|
|
427
|
|
428
|
|
429 =head2 toString
|
|
430
|
|
431 Args : (none)
|
|
432 Example : print $dbID->toString()."\n";
|
|
433 Description: returns a stringified representation of the object
|
|
434 Returntype : string
|
|
435
|
|
436 =cut
|
|
437
|
|
438 sub toString {
|
|
439 my $self = shift;
|
|
440
|
|
441 return ref($self).": dbID=".($self->dbID || '?')
|
|
442 .", name='".$self->name
|
|
443 ."', assembly='".$self->assembly
|
|
444 ."', genebuild='".$self->genebuild
|
|
445 ."', taxon_id='".$self->taxon_id
|
|
446 ."', locator='".$self->locator
|
|
447 ."'";
|
|
448 }
|
|
449
|
|
450
|
|
451 1;
|