Mercurial > repos > mahtabm > ensembl
comparison variant_effect_predictor/Bio/EnsEMBL/Funcgen/CoordSystem.pm @ 0:1f6dce3d34e0
Uploaded
author | mahtabm |
---|---|
date | Thu, 11 Apr 2013 02:01:53 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:1f6dce3d34e0 |
---|---|
1 # | |
2 # EnsEMBL module for Bio::EnsEMBL::Funcgen::CoordSystem | |
3 # | |
4 | |
5 | |
6 =head1 LICENSE | |
7 | |
8 Copyright (c) 1999-2011 The European Bioinformatics Institute and | |
9 Genome Research Limited. All rights reserved. | |
10 | |
11 This software is distributed under a modified Apache license. | |
12 For license details, please see | |
13 | |
14 http://www.ensembl.org/info/about/code_licence.html | |
15 | |
16 =head1 CONTACT | |
17 | |
18 Please email comments or questions to the public Ensembl | |
19 developers list at <ensembl-dev@ebi.ac.uk>. | |
20 | |
21 Questions may also be sent to the Ensembl help desk at | |
22 <helpdesk@ensembl.org>. | |
23 | |
24 | |
25 =head1 NAME | |
26 | |
27 Bio::EnsEMBL::Funcgen::CoordSystem | |
28 | |
29 =head1 SYNOPSIS | |
30 | |
31 my $db = Bio::EnsEMBL::Funcgen::DBSQL::DBAdaptor->new(...); | |
32 | |
33 my $csa = $db->get_CoordSystemAdaptor(); | |
34 | |
35 # | |
36 # Get default chromosome coord system for the 39_36a DB: | |
37 # | |
38 my $cs = $csa->fetch_by_name_schema_build_version('chromosome', '39_36a'); | |
39 my $str = join ':', $cs->name(),$cs->version(),$cs->dbID(); | |
40 print "$str\n"; | |
41 | |
42 | |
43 =head1 DESCRIPTION | |
44 | |
45 This has been adapted from the core CoordSystem object to accomodate the multi-assembly | |
46 aspects of the eFG schema, namely hadnling the schema_build of the referenced core DB. | |
47 | |
48 This is a simple object which contains a few coordinate system attributes: | |
49 name, internal identifier, version and schema_build. A coordinate system is | |
50 uniquely defined by its name and version and which DB it came from i.e. schema_build. | |
51 A version of a coordinate system applies to all sequences within a coordinate system. | |
52 This should not be confused with individual sequence versions. | |
53 | |
54 Take for example the Human assembly. The version 'NCBI33' applies to | |
55 to all chromosomes in the NCBI33 assembly (that is the entire 'chromosome' | |
56 coordinate system). The 'clone' coordinate system in the same database would | |
57 have no version however. Although the clone sequences have their own sequence | |
58 versions, there is no version which applies to the entire set of clones. | |
59 | |
60 Coordinate system objects are immutable. Their name and version, and other | |
61 attributes may not be altered after they are created. | |
62 | |
63 =cut | |
64 | |
65 | |
66 use strict; | |
67 use warnings; | |
68 | |
69 package Bio::EnsEMBL::Funcgen::CoordSystem; | |
70 | |
71 use Bio::EnsEMBL::Storable; | |
72 | |
73 use Bio::EnsEMBL::Utils::Argument qw(rearrange); | |
74 use Bio::EnsEMBL::Utils::Exception qw(throw); | |
75 | |
76 use vars qw(@ISA); | |
77 | |
78 @ISA = qw(Bio::EnsEMBL::Storable); | |
79 | |
80 my %warnings; | |
81 | |
82 | |
83 =head2 new | |
84 | |
85 Arg [..] : List of named arguments: | |
86 -NAME - The name of the coordinate system | |
87 -VERSION - (optional) The version of the coordinate system. | |
88 Note that if the version passed in is undefined, | |
89 it will be set to the empty string in the | |
90 resulting CoordSystem object. | |
91 -RANK - The rank of the coordinate system. The highest | |
92 level coordinate system should have rank 1, the | |
93 second highest rank 2 and so on. An example of | |
94 a high level coordinate system is 'chromosome' an | |
95 example of a lower level coordinate system is | |
96 'clone'. | |
97 -SCHEMA_BUILD - The schema and data build version of the DB of | |
98 origin. | |
99 -TOP_LEVEL - (optional) Sets whether this is a top-level coord | |
100 system. Default = 0. This should only be set to | |
101 true if you are creating an artificial toplevel | |
102 coordsystem by the name of 'toplevel' | |
103 -SEQUENCE_LEVEL - (optional) Sets whether this is a sequence | |
104 level coordinate system. Default = 0 | |
105 -DEFAULT - (optional) | |
106 Whether this is the default version of the | |
107 coordinate systems of this name. Default = 0 | |
108 -DBID - (optional) The internal identifier of this | |
109 coordinate system | |
110 -ADAPTOR - (optional) The adaptor which provides database | |
111 interaction for this object | |
112 Example : $cs = Bio::EnsEMBL::CoordSystem->new(-NAME => 'chromosome', | |
113 -VERSION => 'NCBI33', | |
114 -RANK => 1, | |
115 -DBID => 1, | |
116 -SCHEMA_BUILD => '39_36a', | |
117 -ADAPTOR => adaptor, | |
118 -DEFAULT => 1, | |
119 -SEQUENCE_LEVEL => 0); | |
120 Description: Creates a new CoordSystem object representing a coordinate | |
121 system. | |
122 Returntype : Bio::EnsEMBL::Funcgen::CoordSystem | |
123 Exceptions : none | |
124 Caller : general | |
125 Status : Stable | |
126 | |
127 =cut | |
128 | |
129 sub new { | |
130 my $caller = shift; | |
131 my $class = ref($caller) || $caller; | |
132 | |
133 my $self = $class->SUPER::new(@_); | |
134 | |
135 | |
136 #Can we just hadnle schema_build here and call super->new for the rest. | |
137 #We will also have to handle the top/default levels issues with multiple DBs | |
138 | |
139 | |
140 #my ($name, $version, $sbuild, $top_level, $sequence_level, $default, $rank) = | |
141 # rearrange(['NAME','VERSION', 'SCHEMA_BUILD','TOP_LEVEL', 'SEQUENCE_LEVEL', | |
142 # 'DEFAULT', 'RANK'], @_); | |
143 | |
144 my ($name, $version) = rearrange(['NAME','VERSION'], @_); | |
145 | |
146 | |
147 throw('A name argument is required') if(! $name); | |
148 | |
149 | |
150 $version = '' if(!defined($version)); | |
151 | |
152 | |
153 #$top_level = ($top_level) ? 1 : 0; | |
154 #$sequence_level = ($sequence_level) ? 1 : 0; | |
155 #$default = ($default) ? 1 : 0; | |
156 #$rank ||= 0; | |
157 | |
158 #if($top_level) { | |
159 # if($rank) { | |
160 # throw('RANK argument must be 0 if TOP_LEVEL is 1'); | |
161 # } | |
162 | |
163 # if($name) { | |
164 # if($name ne 'toplevel') { | |
165 # throw('The NAME argument must be "toplevel" if TOP_LEVEL is 1') | |
166 # } | |
167 # } else { | |
168 # $name = 'toplevel'; | |
169 # } | |
170 | |
171 # if($sequence_level) { | |
172 # throw("SEQUENCE_LEVEL argument must be 0 if TOP_LEVEL is 1"); | |
173 # } | |
174 | |
175 # $default = 0; | |
176 | |
177 # } else { | |
178 # if(!$rank) { | |
179 # throw("RANK argument must be non-zero if not toplevel CoordSystem"); | |
180 # } | |
181 # if($name eq 'toplevel') { | |
182 # throw("Cannot name coord system 'toplevel' unless TOP_LEVEL is 1"); | |
183 # } | |
184 # } | |
185 | |
186 # if($rank !~ /^\d+$/) { | |
187 # throw('The RANK argument must be a positive integer'); | |
188 # } | |
189 | |
190 | |
191 $self->{'core_cache'} = {}; | |
192 $self->{'version'} = $version; | |
193 $self->{'name'} = $name; | |
194 #$self->{'schema_build'} = $sbuild; | |
195 #$self->{'top_level'} = $top_level; | |
196 #$self->{'sequence_level'} = $sequence_level; | |
197 #$self->{'default'} = $default; | |
198 #$self->{'rank'} = $rank; | |
199 | |
200 | |
201 | |
202 | |
203 return $self; | |
204 } | |
205 | |
206 | |
207 =head2 add_core_coord_system_info | |
208 | |
209 Arg [1] : mandatory hash: | |
210 | |
211 -RANK => $rank, | |
212 -SEQUENCE_LEVEL => $seq_lvl, | |
213 -DEFAULT => $default, | |
214 -SCHEMA_BUILD => $sbuild, | |
215 -CORE_COORD_SYSTEM_ID => $ccs_id, | |
216 -IS_STORED => $stored_status, | |
217 | |
218 Example : $cs->add_core_coord_system_info( | |
219 -RANK => $rank, | |
220 -SEQUENCE_LEVEL => $seq_lvl, | |
221 -DEFAULT => $default, | |
222 -SCHEMA_BUILD => $sbuild, | |
223 -CORE_COORD_SYSTEM_ID => $ccs_id, | |
224 -IS_STORED => 1, | |
225 ); | |
226 | |
227 Description: Setter for core coord system information | |
228 Returntype : none | |
229 Exceptions : throws if: | |
230 rank not 0 when toplevel | |
231 name not 'TOPLEVEL" when toplevel | |
232 sequence level and top level | |
233 no schema_build defined | |
234 no rank | |
235 rank 0 when not toplevel | |
236 name 'TOPLEVEL' when not toplevel | |
237 | |
238 Caller : Bio::EnsEMBL::Funcgen::DBSQL::CoordSystemAdaptor and ? | |
239 Status : at risk - replace with add_core_CoordSystem? implement top level? | |
240 | |
241 #this does not check name and version! | |
242 | |
243 | |
244 =cut | |
245 | |
246 sub add_core_coord_system_info { | |
247 my ($self) = shift; | |
248 | |
249 my ($sbuild, $top_level, $sequence_level, $default, $rank, $stored, $ccs_id) = | |
250 rearrange(['SCHEMA_BUILD','TOP_LEVEL', 'SEQUENCE_LEVEL', | |
251 'DEFAULT', 'RANK', 'IS_STORED', 'CORE_COORD_SYSTEM_ID'], @_); | |
252 | |
253 | |
254 throw('Must provide a schema_build') if ! $sbuild; | |
255 throw('Must provide a core_coord_system_id') if ! $ccs_id; | |
256 | |
257 | |
258 #$top_level = ($top_level) ? 1 : 0; | |
259 $sequence_level = ($sequence_level) ? 1 : 0; | |
260 $default = ($default) ? 1 : 0; | |
261 $stored ||=0; | |
262 | |
263 $rank ||= 0; | |
264 | |
265 if($top_level) { | |
266 if($rank) { | |
267 throw('RANK argument must be 0 if TOP_LEVEL is 1'); | |
268 } | |
269 | |
270 if($self->name()) { | |
271 if($self->name() ne 'toplevel') { | |
272 throw('The NAME argument must be "toplevel" if TOP_LEVEL is 1') | |
273 } | |
274 } else { | |
275 throw('toplevel not yet implemented'); | |
276 #$name = 'toplevel'; | |
277 } | |
278 | |
279 if($sequence_level) { | |
280 throw("SEQUENCE_LEVEL argument must be 0 if TOP_LEVEL is 1"); | |
281 } | |
282 | |
283 $default = 0; | |
284 | |
285 } else { | |
286 if(!$rank) { | |
287 throw("RANK argument must be non-zero if not toplevel CoordSystem"); | |
288 } | |
289 if($self->name() eq 'toplevel') { | |
290 throw("Cannot name coord system 'toplevel' unless TOP_LEVEL is 1"); | |
291 } | |
292 } | |
293 | |
294 if($rank !~ /^\d+$/) { | |
295 throw('The RANK argument must be a positive integer'); | |
296 } | |
297 | |
298 | |
299 #We can add unstored coord systems here | |
300 #But will these ever have valid entries in the seq_region cache | |
301 #Initialising this cache key turning off the warning in equals about | |
302 #Using the nearest coord_system | |
303 | |
304 $self->{'core_cache'}{$sbuild} = {( | |
305 RANK => $rank, | |
306 SEQUENCE_LEVEL => $sequence_level, | |
307 DEFAULT => $default, | |
308 CORE_COORD_SYSTEM_ID => $ccs_id, | |
309 IS_STORED => $stored, | |
310 )}; | |
311 | |
312 | |
313 | |
314 | |
315 return; | |
316 } | |
317 | |
318 | |
319 #remove all but schema_buil and equals? | |
320 #depends on how we handle levels | |
321 | |
322 =head2 name | |
323 | |
324 Arg [1] : (optional) string $name | |
325 Example : print $coord_system->name(); | |
326 Description: Getter for the name of this coordinate system | |
327 Returntype : string | |
328 Exceptions : none | |
329 Caller : general | |
330 Status : Stable | |
331 | |
332 =cut | |
333 | |
334 sub name { | |
335 my $self = shift; | |
336 return $self->{'name'}; | |
337 } | |
338 | |
339 | |
340 =head2 get_latest_schema_build | |
341 | |
342 Example : my $db_schema_build = $coord_system->get_latest_schema_build(); | |
343 Description: Getter for the most recent schema_build of this coordinate system | |
344 Returntype : string | |
345 Exceptions : none | |
346 Caller : general | |
347 Status : at risk | |
348 | |
349 =cut | |
350 | |
351 sub get_latest_schema_build { | |
352 my $self = shift; | |
353 | |
354 return (sort (keys %{$self->{'core_cache'}}))[0]; | |
355 } | |
356 | |
357 | |
358 =head2 contains_schema_build | |
359 | |
360 Example : if ($coord_system->contains_schema_build('43_36e')){..do some coord system things ..}; | |
361 Description: Returns true is the CoordSystem maps to the corresponding core CoordSystem | |
362 Returntype : Boolean | |
363 Exceptions : throws if schema_build not defined | |
364 Caller : general | |
365 Status : at risk | |
366 | |
367 =cut | |
368 | |
369 sub contains_schema_build { | |
370 my ($self, $schema_build) = @_; | |
371 | |
372 throw('Must pass a schema_build') if ! $schema_build; | |
373 | |
374 return (exists $self->{'core_cache'}{$schema_build}) ? 1 : 0; | |
375 } | |
376 | |
377 =head2 version | |
378 | |
379 Arg [1] : none | |
380 Example : print $coord->version(); | |
381 Description: Getter/Setter for the version of this coordinate system. This | |
382 will return an empty string if no version is defined for this | |
383 coordinate system. | |
384 Returntype : string | |
385 Exceptions : none | |
386 Caller : general | |
387 Status : Stable | |
388 | |
389 =cut | |
390 | |
391 sub version { | |
392 my $self = shift; | |
393 | |
394 return $self->{'version'}; | |
395 } | |
396 | |
397 | |
398 | |
399 | |
400 =head2 equals | |
401 | |
402 Arg [1] : Bio::EnsEMBL::Funcgen::CoordSystem $cs | |
403 The coord system to compare to for equality. | |
404 Example : if($coord_sys->equals($other_coord_sys)) { ... } | |
405 Description: Compares 2 coordinate systems and returns true if they are | |
406 equivalent. The definition of equivalent is sharing the same | |
407 name and version. | |
408 Returntype : string | |
409 Exceptions : none | |
410 Caller : general | |
411 Status : At risk | |
412 | |
413 =cut | |
414 | |
415 sub equals { | |
416 my $self = shift; | |
417 my $cs = shift; | |
418 | |
419 if(!$cs || !ref($cs) || | |
420 (! $cs->isa('Bio::EnsEMBL::Funcgen::CoordSystem') && | |
421 ! $cs->isa('Bio::EnsEMBL::CoordSystem'))){ | |
422 throw('Argument must be a Bio::EnsEMBL::Funcgen::CoordSystem'); | |
423 } | |
424 | |
425 | |
426 #need to add check on schema_build here | |
427 #all schema_builds should have been added by BaseFeatureAdaptor during import | |
428 #fails if we are using two different versions with the same cs's | |
429 | |
430 if(($self->version() eq $cs->version()) && | |
431 ($self->name() eq $cs->name())){ | |
432 | |
433 #we need to make sure these are default CS, otherwise we can get into trouble with | |
434 #re-used or mismatched seq_region_ids between DB wih different default assemblies | |
435 | |
436 if (! $self->contains_schema_build($self->adaptor->db->_get_schema_build($cs->adaptor()))) { | |
437 | |
438 #Only warn first time this is seen | |
439 my $warning_key = $self->adaptor->db->_get_schema_build($cs->adaptor()).':'.$self->name().':'.$self->version; | |
440 | |
441 if(! exists $warnings{$warning_key}){ | |
442 warn 'You are using a schema_build('.$self->adaptor->db->_get_schema_build($cs->adaptor()).') which has no CoordSystem stored for '.$cs->version.". Defaulting to closest name version match.\n"; | |
443 $warnings{$warning_key} = 1; | |
444 } | |
445 } | |
446 return 1; | |
447 } | |
448 | |
449 return 0; | |
450 } | |
451 | |
452 | |
453 | |
454 | |
455 =head2 is_top_level | |
456 | |
457 Arg [1] : none | |
458 Example : if($coord_sys->is_top_level()) { ... } | |
459 Description: Returns true if this is the toplevel pseudo coordinate system. | |
460 The toplevel coordinate system is not a real coordinate system | |
461 which is stored in the database, but it is a placeholder that | |
462 can be used to request transformations or retrievals to/from | |
463 the highest defined coordinate system in a given region. | |
464 Returntype : 0 or 1 | |
465 Exceptions : none | |
466 Caller : general | |
467 Status : at risk - not implemented yet | |
468 | |
469 =cut | |
470 | |
471 sub is_top_level { | |
472 my $self = shift; | |
473 | |
474 throw('Not yet implmented, need to test against the core cache using dnadb/schema_build'); | |
475 | |
476 return $self->{'top_level'}; | |
477 } | |
478 | |
479 | |
480 #These attribute methods are largely redundant | |
481 #is_default is used by Feature Adaptors to restrict features to | |
482 #current default assembly for non slice based methods | |
483 #Especially redundant now we have implemented this in fetch_all | |
484 | |
485 =head2 is_sequence_level | |
486 | |
487 Arg [1] : Bio::EnsEMBL::DBSQL::DBAdaptor | |
488 Example : if($coord_sys->is_sequence_level($dnadb)) { ... } | |
489 Description: Returns true if this is a sequence level coordinate system | |
490 for a given dnadb | |
491 Returntype : 0 or 1 | |
492 Exceptions : none | |
493 Caller : general | |
494 Status : at risk | |
495 | |
496 =cut | |
497 | |
498 sub is_sequence_level { | |
499 my ($self, $dnadb) = @_; | |
500 | |
501 return $self->get_coord_system_attribute('sequence_level', $dnadb); | |
502 } | |
503 | |
504 | |
505 =head2 is_default | |
506 | |
507 Arg [1] : Bio::EnsEMBL::DBSQL::DBAdaptor | |
508 Example : if($coord_sys->is_default($dnadb)) { ... } | |
509 Description: Returns true if this coordinate system is the default | |
510 version of the coordinate system of this name for a given dnadb. | |
511 Returntype : 0 or 1 | |
512 Exceptions : none | |
513 Caller : general - Used | |
514 Status : at risk | |
515 | |
516 =cut | |
517 | |
518 sub is_default { | |
519 my ($self, $dnadb) = @_; | |
520 | |
521 return $self->get_coord_system_attribute('default', $dnadb); | |
522 } | |
523 | |
524 sub get_coord_system_attribute{ | |
525 my($self, $attr_name, $dnadb) = @_; | |
526 | |
527 if(! ($dnadb && ref($dnadb) && $dnadb->isa('Bio::EnsEMBL::DBSQL::DBAdaptor'))){ | |
528 throw("You must pass a dnadb to access the CoordSystem attribute:\t $attr_name"); | |
529 } | |
530 | |
531 my $schema_build = $self->adaptor->db->_get_schema_build($dnadb); | |
532 | |
533 if(! $self->contains_schema_build($schema_build)){ | |
534 throw("CoordSystem does not contain the schema_build:\t$schema_build"); | |
535 } | |
536 | |
537 return $self->{'core_cache'}{$schema_build}{uc($attr_name)}; | |
538 | |
539 } | |
540 | |
541 | |
542 =head2 rank | |
543 | |
544 Arg [1] : Bio::EnsEMBL::DBSQL::DBAdaptor | |
545 Example : if($cs1->rank($dnadb) < $cs2->rank($dnadb)) { | |
546 print $cs1->name(), " is a higher level coord system than", | |
547 $cs2->name(), "\n"; | |
548 } | |
549 Description: Returns the rank of this coordinate system for a given dnadb. | |
550 A lower number is a higher coordinate system. The highest level coordinate | |
551 system has a rank of 1 (e.g. 'chromosome'). The toplevel | |
552 pseudo coordinate system has a rank of 0. | |
553 Returntype : int | |
554 Exceptions : none | |
555 Caller : general | |
556 Status : at risk - not yet implemented | |
557 | |
558 =cut | |
559 | |
560 sub rank { | |
561 my ($self, $dnadb) = @_; | |
562 return $self->get_coord_system_attribute('rank', $dnadb); | |
563 | |
564 } | |
565 | |
566 1; |