0
|
1 #
|
|
2 # EnsEMBL module for Bio::EnsEMBL::Funcgen::CoordSystem
|
|
3 #
|
|
4
|
|
5
|
|
6 =head1 LICENSE
|
|
7
|
|
8 Copyright (c) 1999-2011 The European Bioinformatics Institute and
|
|
9 Genome Research Limited. All rights reserved.
|
|
10
|
|
11 This software is distributed under a modified Apache license.
|
|
12 For license details, please see
|
|
13
|
|
14 http://www.ensembl.org/info/about/code_licence.html
|
|
15
|
|
16 =head1 CONTACT
|
|
17
|
|
18 Please email comments or questions to the public Ensembl
|
|
19 developers list at <ensembl-dev@ebi.ac.uk>.
|
|
20
|
|
21 Questions may also be sent to the Ensembl help desk at
|
|
22 <helpdesk@ensembl.org>.
|
|
23
|
|
24
|
|
25 =head1 NAME
|
|
26
|
|
27 Bio::EnsEMBL::Funcgen::CoordSystem
|
|
28
|
|
29 =head1 SYNOPSIS
|
|
30
|
|
31 my $db = Bio::EnsEMBL::Funcgen::DBSQL::DBAdaptor->new(...);
|
|
32
|
|
33 my $csa = $db->get_CoordSystemAdaptor();
|
|
34
|
|
35 #
|
|
36 # Get default chromosome coord system for the 39_36a DB:
|
|
37 #
|
|
38 my $cs = $csa->fetch_by_name_schema_build_version('chromosome', '39_36a');
|
|
39 my $str = join ':', $cs->name(),$cs->version(),$cs->dbID();
|
|
40 print "$str\n";
|
|
41
|
|
42
|
|
43 =head1 DESCRIPTION
|
|
44
|
|
45 This has been adapted from the core CoordSystem object to accomodate the multi-assembly
|
|
46 aspects of the eFG schema, namely hadnling the schema_build of the referenced core DB.
|
|
47
|
|
48 This is a simple object which contains a few coordinate system attributes:
|
|
49 name, internal identifier, version and schema_build. A coordinate system is
|
|
50 uniquely defined by its name and version and which DB it came from i.e. schema_build.
|
|
51 A version of a coordinate system applies to all sequences within a coordinate system.
|
|
52 This should not be confused with individual sequence versions.
|
|
53
|
|
54 Take for example the Human assembly. The version 'NCBI33' applies to
|
|
55 to all chromosomes in the NCBI33 assembly (that is the entire 'chromosome'
|
|
56 coordinate system). The 'clone' coordinate system in the same database would
|
|
57 have no version however. Although the clone sequences have their own sequence
|
|
58 versions, there is no version which applies to the entire set of clones.
|
|
59
|
|
60 Coordinate system objects are immutable. Their name and version, and other
|
|
61 attributes may not be altered after they are created.
|
|
62
|
|
63 =cut
|
|
64
|
|
65
|
|
66 use strict;
|
|
67 use warnings;
|
|
68
|
|
69 package Bio::EnsEMBL::Funcgen::CoordSystem;
|
|
70
|
|
71 use Bio::EnsEMBL::Storable;
|
|
72
|
|
73 use Bio::EnsEMBL::Utils::Argument qw(rearrange);
|
|
74 use Bio::EnsEMBL::Utils::Exception qw(throw);
|
|
75
|
|
76 use vars qw(@ISA);
|
|
77
|
|
78 @ISA = qw(Bio::EnsEMBL::Storable);
|
|
79
|
|
80 my %warnings;
|
|
81
|
|
82
|
|
83 =head2 new
|
|
84
|
|
85 Arg [..] : List of named arguments:
|
|
86 -NAME - The name of the coordinate system
|
|
87 -VERSION - (optional) The version of the coordinate system.
|
|
88 Note that if the version passed in is undefined,
|
|
89 it will be set to the empty string in the
|
|
90 resulting CoordSystem object.
|
|
91 -RANK - The rank of the coordinate system. The highest
|
|
92 level coordinate system should have rank 1, the
|
|
93 second highest rank 2 and so on. An example of
|
|
94 a high level coordinate system is 'chromosome' an
|
|
95 example of a lower level coordinate system is
|
|
96 'clone'.
|
|
97 -SCHEMA_BUILD - The schema and data build version of the DB of
|
|
98 origin.
|
|
99 -TOP_LEVEL - (optional) Sets whether this is a top-level coord
|
|
100 system. Default = 0. This should only be set to
|
|
101 true if you are creating an artificial toplevel
|
|
102 coordsystem by the name of 'toplevel'
|
|
103 -SEQUENCE_LEVEL - (optional) Sets whether this is a sequence
|
|
104 level coordinate system. Default = 0
|
|
105 -DEFAULT - (optional)
|
|
106 Whether this is the default version of the
|
|
107 coordinate systems of this name. Default = 0
|
|
108 -DBID - (optional) The internal identifier of this
|
|
109 coordinate system
|
|
110 -ADAPTOR - (optional) The adaptor which provides database
|
|
111 interaction for this object
|
|
112 Example : $cs = Bio::EnsEMBL::CoordSystem->new(-NAME => 'chromosome',
|
|
113 -VERSION => 'NCBI33',
|
|
114 -RANK => 1,
|
|
115 -DBID => 1,
|
|
116 -SCHEMA_BUILD => '39_36a',
|
|
117 -ADAPTOR => adaptor,
|
|
118 -DEFAULT => 1,
|
|
119 -SEQUENCE_LEVEL => 0);
|
|
120 Description: Creates a new CoordSystem object representing a coordinate
|
|
121 system.
|
|
122 Returntype : Bio::EnsEMBL::Funcgen::CoordSystem
|
|
123 Exceptions : none
|
|
124 Caller : general
|
|
125 Status : Stable
|
|
126
|
|
127 =cut
|
|
128
|
|
129 sub new {
|
|
130 my $caller = shift;
|
|
131 my $class = ref($caller) || $caller;
|
|
132
|
|
133 my $self = $class->SUPER::new(@_);
|
|
134
|
|
135
|
|
136 #Can we just hadnle schema_build here and call super->new for the rest.
|
|
137 #We will also have to handle the top/default levels issues with multiple DBs
|
|
138
|
|
139
|
|
140 #my ($name, $version, $sbuild, $top_level, $sequence_level, $default, $rank) =
|
|
141 # rearrange(['NAME','VERSION', 'SCHEMA_BUILD','TOP_LEVEL', 'SEQUENCE_LEVEL',
|
|
142 # 'DEFAULT', 'RANK'], @_);
|
|
143
|
|
144 my ($name, $version) = rearrange(['NAME','VERSION'], @_);
|
|
145
|
|
146
|
|
147 throw('A name argument is required') if(! $name);
|
|
148
|
|
149
|
|
150 $version = '' if(!defined($version));
|
|
151
|
|
152
|
|
153 #$top_level = ($top_level) ? 1 : 0;
|
|
154 #$sequence_level = ($sequence_level) ? 1 : 0;
|
|
155 #$default = ($default) ? 1 : 0;
|
|
156 #$rank ||= 0;
|
|
157
|
|
158 #if($top_level) {
|
|
159 # if($rank) {
|
|
160 # throw('RANK argument must be 0 if TOP_LEVEL is 1');
|
|
161 # }
|
|
162
|
|
163 # if($name) {
|
|
164 # if($name ne 'toplevel') {
|
|
165 # throw('The NAME argument must be "toplevel" if TOP_LEVEL is 1')
|
|
166 # }
|
|
167 # } else {
|
|
168 # $name = 'toplevel';
|
|
169 # }
|
|
170
|
|
171 # if($sequence_level) {
|
|
172 # throw("SEQUENCE_LEVEL argument must be 0 if TOP_LEVEL is 1");
|
|
173 # }
|
|
174
|
|
175 # $default = 0;
|
|
176
|
|
177 # } else {
|
|
178 # if(!$rank) {
|
|
179 # throw("RANK argument must be non-zero if not toplevel CoordSystem");
|
|
180 # }
|
|
181 # if($name eq 'toplevel') {
|
|
182 # throw("Cannot name coord system 'toplevel' unless TOP_LEVEL is 1");
|
|
183 # }
|
|
184 # }
|
|
185
|
|
186 # if($rank !~ /^\d+$/) {
|
|
187 # throw('The RANK argument must be a positive integer');
|
|
188 # }
|
|
189
|
|
190
|
|
191 $self->{'core_cache'} = {};
|
|
192 $self->{'version'} = $version;
|
|
193 $self->{'name'} = $name;
|
|
194 #$self->{'schema_build'} = $sbuild;
|
|
195 #$self->{'top_level'} = $top_level;
|
|
196 #$self->{'sequence_level'} = $sequence_level;
|
|
197 #$self->{'default'} = $default;
|
|
198 #$self->{'rank'} = $rank;
|
|
199
|
|
200
|
|
201
|
|
202
|
|
203 return $self;
|
|
204 }
|
|
205
|
|
206
|
|
207 =head2 add_core_coord_system_info
|
|
208
|
|
209 Arg [1] : mandatory hash:
|
|
210
|
|
211 -RANK => $rank,
|
|
212 -SEQUENCE_LEVEL => $seq_lvl,
|
|
213 -DEFAULT => $default,
|
|
214 -SCHEMA_BUILD => $sbuild,
|
|
215 -CORE_COORD_SYSTEM_ID => $ccs_id,
|
|
216 -IS_STORED => $stored_status,
|
|
217
|
|
218 Example : $cs->add_core_coord_system_info(
|
|
219 -RANK => $rank,
|
|
220 -SEQUENCE_LEVEL => $seq_lvl,
|
|
221 -DEFAULT => $default,
|
|
222 -SCHEMA_BUILD => $sbuild,
|
|
223 -CORE_COORD_SYSTEM_ID => $ccs_id,
|
|
224 -IS_STORED => 1,
|
|
225 );
|
|
226
|
|
227 Description: Setter for core coord system information
|
|
228 Returntype : none
|
|
229 Exceptions : throws if:
|
|
230 rank not 0 when toplevel
|
|
231 name not 'TOPLEVEL" when toplevel
|
|
232 sequence level and top level
|
|
233 no schema_build defined
|
|
234 no rank
|
|
235 rank 0 when not toplevel
|
|
236 name 'TOPLEVEL' when not toplevel
|
|
237
|
|
238 Caller : Bio::EnsEMBL::Funcgen::DBSQL::CoordSystemAdaptor and ?
|
|
239 Status : at risk - replace with add_core_CoordSystem? implement top level?
|
|
240
|
|
241 #this does not check name and version!
|
|
242
|
|
243
|
|
244 =cut
|
|
245
|
|
246 sub add_core_coord_system_info {
|
|
247 my ($self) = shift;
|
|
248
|
|
249 my ($sbuild, $top_level, $sequence_level, $default, $rank, $stored, $ccs_id) =
|
|
250 rearrange(['SCHEMA_BUILD','TOP_LEVEL', 'SEQUENCE_LEVEL',
|
|
251 'DEFAULT', 'RANK', 'IS_STORED', 'CORE_COORD_SYSTEM_ID'], @_);
|
|
252
|
|
253
|
|
254 throw('Must provide a schema_build') if ! $sbuild;
|
|
255 throw('Must provide a core_coord_system_id') if ! $ccs_id;
|
|
256
|
|
257
|
|
258 #$top_level = ($top_level) ? 1 : 0;
|
|
259 $sequence_level = ($sequence_level) ? 1 : 0;
|
|
260 $default = ($default) ? 1 : 0;
|
|
261 $stored ||=0;
|
|
262
|
|
263 $rank ||= 0;
|
|
264
|
|
265 if($top_level) {
|
|
266 if($rank) {
|
|
267 throw('RANK argument must be 0 if TOP_LEVEL is 1');
|
|
268 }
|
|
269
|
|
270 if($self->name()) {
|
|
271 if($self->name() ne 'toplevel') {
|
|
272 throw('The NAME argument must be "toplevel" if TOP_LEVEL is 1')
|
|
273 }
|
|
274 } else {
|
|
275 throw('toplevel not yet implemented');
|
|
276 #$name = 'toplevel';
|
|
277 }
|
|
278
|
|
279 if($sequence_level) {
|
|
280 throw("SEQUENCE_LEVEL argument must be 0 if TOP_LEVEL is 1");
|
|
281 }
|
|
282
|
|
283 $default = 0;
|
|
284
|
|
285 } else {
|
|
286 if(!$rank) {
|
|
287 throw("RANK argument must be non-zero if not toplevel CoordSystem");
|
|
288 }
|
|
289 if($self->name() eq 'toplevel') {
|
|
290 throw("Cannot name coord system 'toplevel' unless TOP_LEVEL is 1");
|
|
291 }
|
|
292 }
|
|
293
|
|
294 if($rank !~ /^\d+$/) {
|
|
295 throw('The RANK argument must be a positive integer');
|
|
296 }
|
|
297
|
|
298
|
|
299 #We can add unstored coord systems here
|
|
300 #But will these ever have valid entries in the seq_region cache
|
|
301 #Initialising this cache key turning off the warning in equals about
|
|
302 #Using the nearest coord_system
|
|
303
|
|
304 $self->{'core_cache'}{$sbuild} = {(
|
|
305 RANK => $rank,
|
|
306 SEQUENCE_LEVEL => $sequence_level,
|
|
307 DEFAULT => $default,
|
|
308 CORE_COORD_SYSTEM_ID => $ccs_id,
|
|
309 IS_STORED => $stored,
|
|
310 )};
|
|
311
|
|
312
|
|
313
|
|
314
|
|
315 return;
|
|
316 }
|
|
317
|
|
318
|
|
319 #remove all but schema_buil and equals?
|
|
320 #depends on how we handle levels
|
|
321
|
|
322 =head2 name
|
|
323
|
|
324 Arg [1] : (optional) string $name
|
|
325 Example : print $coord_system->name();
|
|
326 Description: Getter for the name of this coordinate system
|
|
327 Returntype : string
|
|
328 Exceptions : none
|
|
329 Caller : general
|
|
330 Status : Stable
|
|
331
|
|
332 =cut
|
|
333
|
|
334 sub name {
|
|
335 my $self = shift;
|
|
336 return $self->{'name'};
|
|
337 }
|
|
338
|
|
339
|
|
340 =head2 get_latest_schema_build
|
|
341
|
|
342 Example : my $db_schema_build = $coord_system->get_latest_schema_build();
|
|
343 Description: Getter for the most recent schema_build of this coordinate system
|
|
344 Returntype : string
|
|
345 Exceptions : none
|
|
346 Caller : general
|
|
347 Status : at risk
|
|
348
|
|
349 =cut
|
|
350
|
|
351 sub get_latest_schema_build {
|
|
352 my $self = shift;
|
|
353
|
|
354 return (sort (keys %{$self->{'core_cache'}}))[0];
|
|
355 }
|
|
356
|
|
357
|
|
358 =head2 contains_schema_build
|
|
359
|
|
360 Example : if ($coord_system->contains_schema_build('43_36e')){..do some coord system things ..};
|
|
361 Description: Returns true is the CoordSystem maps to the corresponding core CoordSystem
|
|
362 Returntype : Boolean
|
|
363 Exceptions : throws if schema_build not defined
|
|
364 Caller : general
|
|
365 Status : at risk
|
|
366
|
|
367 =cut
|
|
368
|
|
369 sub contains_schema_build {
|
|
370 my ($self, $schema_build) = @_;
|
|
371
|
|
372 throw('Must pass a schema_build') if ! $schema_build;
|
|
373
|
|
374 return (exists $self->{'core_cache'}{$schema_build}) ? 1 : 0;
|
|
375 }
|
|
376
|
|
377 =head2 version
|
|
378
|
|
379 Arg [1] : none
|
|
380 Example : print $coord->version();
|
|
381 Description: Getter/Setter for the version of this coordinate system. This
|
|
382 will return an empty string if no version is defined for this
|
|
383 coordinate system.
|
|
384 Returntype : string
|
|
385 Exceptions : none
|
|
386 Caller : general
|
|
387 Status : Stable
|
|
388
|
|
389 =cut
|
|
390
|
|
391 sub version {
|
|
392 my $self = shift;
|
|
393
|
|
394 return $self->{'version'};
|
|
395 }
|
|
396
|
|
397
|
|
398
|
|
399
|
|
400 =head2 equals
|
|
401
|
|
402 Arg [1] : Bio::EnsEMBL::Funcgen::CoordSystem $cs
|
|
403 The coord system to compare to for equality.
|
|
404 Example : if($coord_sys->equals($other_coord_sys)) { ... }
|
|
405 Description: Compares 2 coordinate systems and returns true if they are
|
|
406 equivalent. The definition of equivalent is sharing the same
|
|
407 name and version.
|
|
408 Returntype : string
|
|
409 Exceptions : none
|
|
410 Caller : general
|
|
411 Status : At risk
|
|
412
|
|
413 =cut
|
|
414
|
|
415 sub equals {
|
|
416 my $self = shift;
|
|
417 my $cs = shift;
|
|
418
|
|
419 if(!$cs || !ref($cs) ||
|
|
420 (! $cs->isa('Bio::EnsEMBL::Funcgen::CoordSystem') &&
|
|
421 ! $cs->isa('Bio::EnsEMBL::CoordSystem'))){
|
|
422 throw('Argument must be a Bio::EnsEMBL::Funcgen::CoordSystem');
|
|
423 }
|
|
424
|
|
425
|
|
426 #need to add check on schema_build here
|
|
427 #all schema_builds should have been added by BaseFeatureAdaptor during import
|
|
428 #fails if we are using two different versions with the same cs's
|
|
429
|
|
430 if(($self->version() eq $cs->version()) &&
|
|
431 ($self->name() eq $cs->name())){
|
|
432
|
|
433 #we need to make sure these are default CS, otherwise we can get into trouble with
|
|
434 #re-used or mismatched seq_region_ids between DB wih different default assemblies
|
|
435
|
|
436 if (! $self->contains_schema_build($self->adaptor->db->_get_schema_build($cs->adaptor()))) {
|
|
437
|
|
438 #Only warn first time this is seen
|
|
439 my $warning_key = $self->adaptor->db->_get_schema_build($cs->adaptor()).':'.$self->name().':'.$self->version;
|
|
440
|
|
441 if(! exists $warnings{$warning_key}){
|
|
442 warn 'You are using a schema_build('.$self->adaptor->db->_get_schema_build($cs->adaptor()).') which has no CoordSystem stored for '.$cs->version.". Defaulting to closest name version match.\n";
|
|
443 $warnings{$warning_key} = 1;
|
|
444 }
|
|
445 }
|
|
446 return 1;
|
|
447 }
|
|
448
|
|
449 return 0;
|
|
450 }
|
|
451
|
|
452
|
|
453
|
|
454
|
|
455 =head2 is_top_level
|
|
456
|
|
457 Arg [1] : none
|
|
458 Example : if($coord_sys->is_top_level()) { ... }
|
|
459 Description: Returns true if this is the toplevel pseudo coordinate system.
|
|
460 The toplevel coordinate system is not a real coordinate system
|
|
461 which is stored in the database, but it is a placeholder that
|
|
462 can be used to request transformations or retrievals to/from
|
|
463 the highest defined coordinate system in a given region.
|
|
464 Returntype : 0 or 1
|
|
465 Exceptions : none
|
|
466 Caller : general
|
|
467 Status : at risk - not implemented yet
|
|
468
|
|
469 =cut
|
|
470
|
|
471 sub is_top_level {
|
|
472 my $self = shift;
|
|
473
|
|
474 throw('Not yet implmented, need to test against the core cache using dnadb/schema_build');
|
|
475
|
|
476 return $self->{'top_level'};
|
|
477 }
|
|
478
|
|
479
|
|
480 #These attribute methods are largely redundant
|
|
481 #is_default is used by Feature Adaptors to restrict features to
|
|
482 #current default assembly for non slice based methods
|
|
483 #Especially redundant now we have implemented this in fetch_all
|
|
484
|
|
485 =head2 is_sequence_level
|
|
486
|
|
487 Arg [1] : Bio::EnsEMBL::DBSQL::DBAdaptor
|
|
488 Example : if($coord_sys->is_sequence_level($dnadb)) { ... }
|
|
489 Description: Returns true if this is a sequence level coordinate system
|
|
490 for a given dnadb
|
|
491 Returntype : 0 or 1
|
|
492 Exceptions : none
|
|
493 Caller : general
|
|
494 Status : at risk
|
|
495
|
|
496 =cut
|
|
497
|
|
498 sub is_sequence_level {
|
|
499 my ($self, $dnadb) = @_;
|
|
500
|
|
501 return $self->get_coord_system_attribute('sequence_level', $dnadb);
|
|
502 }
|
|
503
|
|
504
|
|
505 =head2 is_default
|
|
506
|
|
507 Arg [1] : Bio::EnsEMBL::DBSQL::DBAdaptor
|
|
508 Example : if($coord_sys->is_default($dnadb)) { ... }
|
|
509 Description: Returns true if this coordinate system is the default
|
|
510 version of the coordinate system of this name for a given dnadb.
|
|
511 Returntype : 0 or 1
|
|
512 Exceptions : none
|
|
513 Caller : general - Used
|
|
514 Status : at risk
|
|
515
|
|
516 =cut
|
|
517
|
|
518 sub is_default {
|
|
519 my ($self, $dnadb) = @_;
|
|
520
|
|
521 return $self->get_coord_system_attribute('default', $dnadb);
|
|
522 }
|
|
523
|
|
524 sub get_coord_system_attribute{
|
|
525 my($self, $attr_name, $dnadb) = @_;
|
|
526
|
|
527 if(! ($dnadb && ref($dnadb) && $dnadb->isa('Bio::EnsEMBL::DBSQL::DBAdaptor'))){
|
|
528 throw("You must pass a dnadb to access the CoordSystem attribute:\t $attr_name");
|
|
529 }
|
|
530
|
|
531 my $schema_build = $self->adaptor->db->_get_schema_build($dnadb);
|
|
532
|
|
533 if(! $self->contains_schema_build($schema_build)){
|
|
534 throw("CoordSystem does not contain the schema_build:\t$schema_build");
|
|
535 }
|
|
536
|
|
537 return $self->{'core_cache'}{$schema_build}{uc($attr_name)};
|
|
538
|
|
539 }
|
|
540
|
|
541
|
|
542 =head2 rank
|
|
543
|
|
544 Arg [1] : Bio::EnsEMBL::DBSQL::DBAdaptor
|
|
545 Example : if($cs1->rank($dnadb) < $cs2->rank($dnadb)) {
|
|
546 print $cs1->name(), " is a higher level coord system than",
|
|
547 $cs2->name(), "\n";
|
|
548 }
|
|
549 Description: Returns the rank of this coordinate system for a given dnadb.
|
|
550 A lower number is a higher coordinate system. The highest level coordinate
|
|
551 system has a rank of 1 (e.g. 'chromosome'). The toplevel
|
|
552 pseudo coordinate system has a rank of 0.
|
|
553 Returntype : int
|
|
554 Exceptions : none
|
|
555 Caller : general
|
|
556 Status : at risk - not yet implemented
|
|
557
|
|
558 =cut
|
|
559
|
|
560 sub rank {
|
|
561 my ($self, $dnadb) = @_;
|
|
562 return $self->get_coord_system_attribute('rank', $dnadb);
|
|
563
|
|
564 }
|
|
565
|
|
566 1;
|