comparison variant_effect_predictor/Bio/EnsEMBL/Funcgen/CoordSystem.pm @ 0:1f6dce3d34e0

Uploaded
author mahtabm
date Thu, 11 Apr 2013 02:01:53 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:1f6dce3d34e0
1 #
2 # EnsEMBL module for Bio::EnsEMBL::Funcgen::CoordSystem
3 #
4
5
6 =head1 LICENSE
7
8 Copyright (c) 1999-2011 The European Bioinformatics Institute and
9 Genome Research Limited. All rights reserved.
10
11 This software is distributed under a modified Apache license.
12 For license details, please see
13
14 http://www.ensembl.org/info/about/code_licence.html
15
16 =head1 CONTACT
17
18 Please email comments or questions to the public Ensembl
19 developers list at <ensembl-dev@ebi.ac.uk>.
20
21 Questions may also be sent to the Ensembl help desk at
22 <helpdesk@ensembl.org>.
23
24
25 =head1 NAME
26
27 Bio::EnsEMBL::Funcgen::CoordSystem
28
29 =head1 SYNOPSIS
30
31 my $db = Bio::EnsEMBL::Funcgen::DBSQL::DBAdaptor->new(...);
32
33 my $csa = $db->get_CoordSystemAdaptor();
34
35 #
36 # Get default chromosome coord system for the 39_36a DB:
37 #
38 my $cs = $csa->fetch_by_name_schema_build_version('chromosome', '39_36a');
39 my $str = join ':', $cs->name(),$cs->version(),$cs->dbID();
40 print "$str\n";
41
42
43 =head1 DESCRIPTION
44
45 This has been adapted from the core CoordSystem object to accomodate the multi-assembly
46 aspects of the eFG schema, namely hadnling the schema_build of the referenced core DB.
47
48 This is a simple object which contains a few coordinate system attributes:
49 name, internal identifier, version and schema_build. A coordinate system is
50 uniquely defined by its name and version and which DB it came from i.e. schema_build.
51 A version of a coordinate system applies to all sequences within a coordinate system.
52 This should not be confused with individual sequence versions.
53
54 Take for example the Human assembly. The version 'NCBI33' applies to
55 to all chromosomes in the NCBI33 assembly (that is the entire 'chromosome'
56 coordinate system). The 'clone' coordinate system in the same database would
57 have no version however. Although the clone sequences have their own sequence
58 versions, there is no version which applies to the entire set of clones.
59
60 Coordinate system objects are immutable. Their name and version, and other
61 attributes may not be altered after they are created.
62
63 =cut
64
65
66 use strict;
67 use warnings;
68
69 package Bio::EnsEMBL::Funcgen::CoordSystem;
70
71 use Bio::EnsEMBL::Storable;
72
73 use Bio::EnsEMBL::Utils::Argument qw(rearrange);
74 use Bio::EnsEMBL::Utils::Exception qw(throw);
75
76 use vars qw(@ISA);
77
78 @ISA = qw(Bio::EnsEMBL::Storable);
79
80 my %warnings;
81
82
83 =head2 new
84
85 Arg [..] : List of named arguments:
86 -NAME - The name of the coordinate system
87 -VERSION - (optional) The version of the coordinate system.
88 Note that if the version passed in is undefined,
89 it will be set to the empty string in the
90 resulting CoordSystem object.
91 -RANK - The rank of the coordinate system. The highest
92 level coordinate system should have rank 1, the
93 second highest rank 2 and so on. An example of
94 a high level coordinate system is 'chromosome' an
95 example of a lower level coordinate system is
96 'clone'.
97 -SCHEMA_BUILD - The schema and data build version of the DB of
98 origin.
99 -TOP_LEVEL - (optional) Sets whether this is a top-level coord
100 system. Default = 0. This should only be set to
101 true if you are creating an artificial toplevel
102 coordsystem by the name of 'toplevel'
103 -SEQUENCE_LEVEL - (optional) Sets whether this is a sequence
104 level coordinate system. Default = 0
105 -DEFAULT - (optional)
106 Whether this is the default version of the
107 coordinate systems of this name. Default = 0
108 -DBID - (optional) The internal identifier of this
109 coordinate system
110 -ADAPTOR - (optional) The adaptor which provides database
111 interaction for this object
112 Example : $cs = Bio::EnsEMBL::CoordSystem->new(-NAME => 'chromosome',
113 -VERSION => 'NCBI33',
114 -RANK => 1,
115 -DBID => 1,
116 -SCHEMA_BUILD => '39_36a',
117 -ADAPTOR => adaptor,
118 -DEFAULT => 1,
119 -SEQUENCE_LEVEL => 0);
120 Description: Creates a new CoordSystem object representing a coordinate
121 system.
122 Returntype : Bio::EnsEMBL::Funcgen::CoordSystem
123 Exceptions : none
124 Caller : general
125 Status : Stable
126
127 =cut
128
129 sub new {
130 my $caller = shift;
131 my $class = ref($caller) || $caller;
132
133 my $self = $class->SUPER::new(@_);
134
135
136 #Can we just hadnle schema_build here and call super->new for the rest.
137 #We will also have to handle the top/default levels issues with multiple DBs
138
139
140 #my ($name, $version, $sbuild, $top_level, $sequence_level, $default, $rank) =
141 # rearrange(['NAME','VERSION', 'SCHEMA_BUILD','TOP_LEVEL', 'SEQUENCE_LEVEL',
142 # 'DEFAULT', 'RANK'], @_);
143
144 my ($name, $version) = rearrange(['NAME','VERSION'], @_);
145
146
147 throw('A name argument is required') if(! $name);
148
149
150 $version = '' if(!defined($version));
151
152
153 #$top_level = ($top_level) ? 1 : 0;
154 #$sequence_level = ($sequence_level) ? 1 : 0;
155 #$default = ($default) ? 1 : 0;
156 #$rank ||= 0;
157
158 #if($top_level) {
159 # if($rank) {
160 # throw('RANK argument must be 0 if TOP_LEVEL is 1');
161 # }
162
163 # if($name) {
164 # if($name ne 'toplevel') {
165 # throw('The NAME argument must be "toplevel" if TOP_LEVEL is 1')
166 # }
167 # } else {
168 # $name = 'toplevel';
169 # }
170
171 # if($sequence_level) {
172 # throw("SEQUENCE_LEVEL argument must be 0 if TOP_LEVEL is 1");
173 # }
174
175 # $default = 0;
176
177 # } else {
178 # if(!$rank) {
179 # throw("RANK argument must be non-zero if not toplevel CoordSystem");
180 # }
181 # if($name eq 'toplevel') {
182 # throw("Cannot name coord system 'toplevel' unless TOP_LEVEL is 1");
183 # }
184 # }
185
186 # if($rank !~ /^\d+$/) {
187 # throw('The RANK argument must be a positive integer');
188 # }
189
190
191 $self->{'core_cache'} = {};
192 $self->{'version'} = $version;
193 $self->{'name'} = $name;
194 #$self->{'schema_build'} = $sbuild;
195 #$self->{'top_level'} = $top_level;
196 #$self->{'sequence_level'} = $sequence_level;
197 #$self->{'default'} = $default;
198 #$self->{'rank'} = $rank;
199
200
201
202
203 return $self;
204 }
205
206
207 =head2 add_core_coord_system_info
208
209 Arg [1] : mandatory hash:
210
211 -RANK => $rank,
212 -SEQUENCE_LEVEL => $seq_lvl,
213 -DEFAULT => $default,
214 -SCHEMA_BUILD => $sbuild,
215 -CORE_COORD_SYSTEM_ID => $ccs_id,
216 -IS_STORED => $stored_status,
217
218 Example : $cs->add_core_coord_system_info(
219 -RANK => $rank,
220 -SEQUENCE_LEVEL => $seq_lvl,
221 -DEFAULT => $default,
222 -SCHEMA_BUILD => $sbuild,
223 -CORE_COORD_SYSTEM_ID => $ccs_id,
224 -IS_STORED => 1,
225 );
226
227 Description: Setter for core coord system information
228 Returntype : none
229 Exceptions : throws if:
230 rank not 0 when toplevel
231 name not 'TOPLEVEL" when toplevel
232 sequence level and top level
233 no schema_build defined
234 no rank
235 rank 0 when not toplevel
236 name 'TOPLEVEL' when not toplevel
237
238 Caller : Bio::EnsEMBL::Funcgen::DBSQL::CoordSystemAdaptor and ?
239 Status : at risk - replace with add_core_CoordSystem? implement top level?
240
241 #this does not check name and version!
242
243
244 =cut
245
246 sub add_core_coord_system_info {
247 my ($self) = shift;
248
249 my ($sbuild, $top_level, $sequence_level, $default, $rank, $stored, $ccs_id) =
250 rearrange(['SCHEMA_BUILD','TOP_LEVEL', 'SEQUENCE_LEVEL',
251 'DEFAULT', 'RANK', 'IS_STORED', 'CORE_COORD_SYSTEM_ID'], @_);
252
253
254 throw('Must provide a schema_build') if ! $sbuild;
255 throw('Must provide a core_coord_system_id') if ! $ccs_id;
256
257
258 #$top_level = ($top_level) ? 1 : 0;
259 $sequence_level = ($sequence_level) ? 1 : 0;
260 $default = ($default) ? 1 : 0;
261 $stored ||=0;
262
263 $rank ||= 0;
264
265 if($top_level) {
266 if($rank) {
267 throw('RANK argument must be 0 if TOP_LEVEL is 1');
268 }
269
270 if($self->name()) {
271 if($self->name() ne 'toplevel') {
272 throw('The NAME argument must be "toplevel" if TOP_LEVEL is 1')
273 }
274 } else {
275 throw('toplevel not yet implemented');
276 #$name = 'toplevel';
277 }
278
279 if($sequence_level) {
280 throw("SEQUENCE_LEVEL argument must be 0 if TOP_LEVEL is 1");
281 }
282
283 $default = 0;
284
285 } else {
286 if(!$rank) {
287 throw("RANK argument must be non-zero if not toplevel CoordSystem");
288 }
289 if($self->name() eq 'toplevel') {
290 throw("Cannot name coord system 'toplevel' unless TOP_LEVEL is 1");
291 }
292 }
293
294 if($rank !~ /^\d+$/) {
295 throw('The RANK argument must be a positive integer');
296 }
297
298
299 #We can add unstored coord systems here
300 #But will these ever have valid entries in the seq_region cache
301 #Initialising this cache key turning off the warning in equals about
302 #Using the nearest coord_system
303
304 $self->{'core_cache'}{$sbuild} = {(
305 RANK => $rank,
306 SEQUENCE_LEVEL => $sequence_level,
307 DEFAULT => $default,
308 CORE_COORD_SYSTEM_ID => $ccs_id,
309 IS_STORED => $stored,
310 )};
311
312
313
314
315 return;
316 }
317
318
319 #remove all but schema_buil and equals?
320 #depends on how we handle levels
321
322 =head2 name
323
324 Arg [1] : (optional) string $name
325 Example : print $coord_system->name();
326 Description: Getter for the name of this coordinate system
327 Returntype : string
328 Exceptions : none
329 Caller : general
330 Status : Stable
331
332 =cut
333
334 sub name {
335 my $self = shift;
336 return $self->{'name'};
337 }
338
339
340 =head2 get_latest_schema_build
341
342 Example : my $db_schema_build = $coord_system->get_latest_schema_build();
343 Description: Getter for the most recent schema_build of this coordinate system
344 Returntype : string
345 Exceptions : none
346 Caller : general
347 Status : at risk
348
349 =cut
350
351 sub get_latest_schema_build {
352 my $self = shift;
353
354 return (sort (keys %{$self->{'core_cache'}}))[0];
355 }
356
357
358 =head2 contains_schema_build
359
360 Example : if ($coord_system->contains_schema_build('43_36e')){..do some coord system things ..};
361 Description: Returns true is the CoordSystem maps to the corresponding core CoordSystem
362 Returntype : Boolean
363 Exceptions : throws if schema_build not defined
364 Caller : general
365 Status : at risk
366
367 =cut
368
369 sub contains_schema_build {
370 my ($self, $schema_build) = @_;
371
372 throw('Must pass a schema_build') if ! $schema_build;
373
374 return (exists $self->{'core_cache'}{$schema_build}) ? 1 : 0;
375 }
376
377 =head2 version
378
379 Arg [1] : none
380 Example : print $coord->version();
381 Description: Getter/Setter for the version of this coordinate system. This
382 will return an empty string if no version is defined for this
383 coordinate system.
384 Returntype : string
385 Exceptions : none
386 Caller : general
387 Status : Stable
388
389 =cut
390
391 sub version {
392 my $self = shift;
393
394 return $self->{'version'};
395 }
396
397
398
399
400 =head2 equals
401
402 Arg [1] : Bio::EnsEMBL::Funcgen::CoordSystem $cs
403 The coord system to compare to for equality.
404 Example : if($coord_sys->equals($other_coord_sys)) { ... }
405 Description: Compares 2 coordinate systems and returns true if they are
406 equivalent. The definition of equivalent is sharing the same
407 name and version.
408 Returntype : string
409 Exceptions : none
410 Caller : general
411 Status : At risk
412
413 =cut
414
415 sub equals {
416 my $self = shift;
417 my $cs = shift;
418
419 if(!$cs || !ref($cs) ||
420 (! $cs->isa('Bio::EnsEMBL::Funcgen::CoordSystem') &&
421 ! $cs->isa('Bio::EnsEMBL::CoordSystem'))){
422 throw('Argument must be a Bio::EnsEMBL::Funcgen::CoordSystem');
423 }
424
425
426 #need to add check on schema_build here
427 #all schema_builds should have been added by BaseFeatureAdaptor during import
428 #fails if we are using two different versions with the same cs's
429
430 if(($self->version() eq $cs->version()) &&
431 ($self->name() eq $cs->name())){
432
433 #we need to make sure these are default CS, otherwise we can get into trouble with
434 #re-used or mismatched seq_region_ids between DB wih different default assemblies
435
436 if (! $self->contains_schema_build($self->adaptor->db->_get_schema_build($cs->adaptor()))) {
437
438 #Only warn first time this is seen
439 my $warning_key = $self->adaptor->db->_get_schema_build($cs->adaptor()).':'.$self->name().':'.$self->version;
440
441 if(! exists $warnings{$warning_key}){
442 warn 'You are using a schema_build('.$self->adaptor->db->_get_schema_build($cs->adaptor()).') which has no CoordSystem stored for '.$cs->version.". Defaulting to closest name version match.\n";
443 $warnings{$warning_key} = 1;
444 }
445 }
446 return 1;
447 }
448
449 return 0;
450 }
451
452
453
454
455 =head2 is_top_level
456
457 Arg [1] : none
458 Example : if($coord_sys->is_top_level()) { ... }
459 Description: Returns true if this is the toplevel pseudo coordinate system.
460 The toplevel coordinate system is not a real coordinate system
461 which is stored in the database, but it is a placeholder that
462 can be used to request transformations or retrievals to/from
463 the highest defined coordinate system in a given region.
464 Returntype : 0 or 1
465 Exceptions : none
466 Caller : general
467 Status : at risk - not implemented yet
468
469 =cut
470
471 sub is_top_level {
472 my $self = shift;
473
474 throw('Not yet implmented, need to test against the core cache using dnadb/schema_build');
475
476 return $self->{'top_level'};
477 }
478
479
480 #These attribute methods are largely redundant
481 #is_default is used by Feature Adaptors to restrict features to
482 #current default assembly for non slice based methods
483 #Especially redundant now we have implemented this in fetch_all
484
485 =head2 is_sequence_level
486
487 Arg [1] : Bio::EnsEMBL::DBSQL::DBAdaptor
488 Example : if($coord_sys->is_sequence_level($dnadb)) { ... }
489 Description: Returns true if this is a sequence level coordinate system
490 for a given dnadb
491 Returntype : 0 or 1
492 Exceptions : none
493 Caller : general
494 Status : at risk
495
496 =cut
497
498 sub is_sequence_level {
499 my ($self, $dnadb) = @_;
500
501 return $self->get_coord_system_attribute('sequence_level', $dnadb);
502 }
503
504
505 =head2 is_default
506
507 Arg [1] : Bio::EnsEMBL::DBSQL::DBAdaptor
508 Example : if($coord_sys->is_default($dnadb)) { ... }
509 Description: Returns true if this coordinate system is the default
510 version of the coordinate system of this name for a given dnadb.
511 Returntype : 0 or 1
512 Exceptions : none
513 Caller : general - Used
514 Status : at risk
515
516 =cut
517
518 sub is_default {
519 my ($self, $dnadb) = @_;
520
521 return $self->get_coord_system_attribute('default', $dnadb);
522 }
523
524 sub get_coord_system_attribute{
525 my($self, $attr_name, $dnadb) = @_;
526
527 if(! ($dnadb && ref($dnadb) && $dnadb->isa('Bio::EnsEMBL::DBSQL::DBAdaptor'))){
528 throw("You must pass a dnadb to access the CoordSystem attribute:\t $attr_name");
529 }
530
531 my $schema_build = $self->adaptor->db->_get_schema_build($dnadb);
532
533 if(! $self->contains_schema_build($schema_build)){
534 throw("CoordSystem does not contain the schema_build:\t$schema_build");
535 }
536
537 return $self->{'core_cache'}{$schema_build}{uc($attr_name)};
538
539 }
540
541
542 =head2 rank
543
544 Arg [1] : Bio::EnsEMBL::DBSQL::DBAdaptor
545 Example : if($cs1->rank($dnadb) < $cs2->rank($dnadb)) {
546 print $cs1->name(), " is a higher level coord system than",
547 $cs2->name(), "\n";
548 }
549 Description: Returns the rank of this coordinate system for a given dnadb.
550 A lower number is a higher coordinate system. The highest level coordinate
551 system has a rank of 1 (e.g. 'chromosome'). The toplevel
552 pseudo coordinate system has a rank of 0.
553 Returntype : int
554 Exceptions : none
555 Caller : general
556 Status : at risk - not yet implemented
557
558 =cut
559
560 sub rank {
561 my ($self, $dnadb) = @_;
562 return $self->get_coord_system_attribute('rank', $dnadb);
563
564 }
565
566 1;