Mercurial > repos > mahtabm > ensemb_rep_gvl
comparison variant_effect_predictor/Bio/EnsEMBL/AssemblyMapper.pm @ 0:2bc9b66ada89 draft default tip
Uploaded
author | mahtabm |
---|---|
date | Thu, 11 Apr 2013 06:29:17 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:2bc9b66ada89 |
---|---|
1 =head1 LICENSE | |
2 | |
3 Copyright (c) 1999-2012 The European Bioinformatics Institute and | |
4 Genome Research Limited. All rights reserved. | |
5 | |
6 This software is distributed under a modified Apache license. | |
7 For license details, please see | |
8 | |
9 http://www.ensembl.org/info/about/code_licence.html | |
10 | |
11 =head1 CONTACT | |
12 | |
13 Please email comments or questions to the public Ensembl | |
14 developers list at <dev@ensembl.org>. | |
15 | |
16 Questions may also be sent to the Ensembl help desk at | |
17 <helpdesk@ensembl.org>. | |
18 | |
19 =cut | |
20 | |
21 =head1 NAME | |
22 | |
23 Bio::EnsEMBL::AssemblyMapper - | |
24 Handles mapping between two coordinate systems using the information | |
25 stored in the assembly table. | |
26 | |
27 =head1 SYNOPSIS | |
28 | |
29 $db = Bio::EnsEMBL::DBSQL::DBAdaptor->new(...); | |
30 $asma = $db->get_AssemblyMapperAdaptor(); | |
31 $csa = $db->get_CoordSystemAdaptor(); | |
32 | |
33 my $chr_cs = $cs_adaptor->fetch_by_name( 'chromosome', 'NCBI33' ); | |
34 my $ctg_cs = $cs_adaptor->fetch_by_name('contig'); | |
35 | |
36 $asm_mapper = $map_adaptor->fetch_by_CoordSystems( $cs1, $cs2 ); | |
37 | |
38 # Map to contig coordinate system from chromosomal. | |
39 @ctg_coords = | |
40 $asm_mapper->map( 'X', 1_000_000, 2_000_000, 1, $chr_cs ); | |
41 | |
42 # Map to chromosome coordinate system from contig. | |
43 @chr_coords = | |
44 $asm_mapper->map( 'AL30421.1.200.92341', 100, 10000, -1, | |
45 $ctg_cs ); | |
46 | |
47 # List contig names for a region of chromsome. | |
48 @ctg_ids = $asm_mapper->list_ids( '13', 1_000_000, 1, $chr_cs ); | |
49 | |
50 # List chromosome names for a contig region. | |
51 @chr_ids = | |
52 $asm_mapper->list_ids( 'AL30421.1.200.92341', 1, 1000, -1, | |
53 $ctg_cs ); | |
54 | |
55 =head1 DESCRIPTION | |
56 | |
57 The AssemblyMapper is a database aware mapper which faciliates | |
58 conversion of coordinates between any two coordinate systems with an | |
59 relationship explicitly defined in the assembly table. In the future | |
60 it may be possible to perform multiple step (implicit) mapping between | |
61 coordinate systems. | |
62 | |
63 It is implemented using the Bio::EnsEMBL::Mapper object, which is a | |
64 generic mapper object between disjoint coordinate systems. | |
65 | |
66 =head1 METHODS | |
67 | |
68 =cut | |
69 | |
70 | |
71 package Bio::EnsEMBL::AssemblyMapper; | |
72 | |
73 use strict; | |
74 use warnings; | |
75 | |
76 use Bio::EnsEMBL::Mapper; | |
77 use Bio::EnsEMBL::Utils::Exception qw(throw deprecate); | |
78 use Scalar::Util qw(weaken); | |
79 | |
80 my $ASSEMBLED = 'assembled'; | |
81 my $COMPONENT = 'component'; | |
82 | |
83 my $DEFAULT_MAX_PAIR_COUNT = 1000; | |
84 | |
85 | |
86 =head2 new | |
87 | |
88 Arg [1] : Bio::EnsEMBL::DBSQL::AssemblyMapperAdaptor | |
89 Arg [2] : Bio::EnsEMBL::CoordSystem $asm_cs | |
90 Arg [3] : Bio::EnsEMBL::CoordSystem $cmp_cs | |
91 Example : Should use AssemblyMapperAdaptor->fetch_by_CoordSystems() | |
92 Description: Creates a new AssemblyMapper | |
93 Returntype : Bio::EnsEMBL::DBSQL::AssemblyMapperAdaptor | |
94 Exceptions : Throws if multiple coord_systems are provided | |
95 Caller : AssemblyMapperAdaptor | |
96 Status : Stable | |
97 | |
98 =cut | |
99 | |
100 sub new { | |
101 my ( $proto, $adaptor, @coord_systems ) = @_; | |
102 | |
103 my $class = ref($proto) || $proto; | |
104 | |
105 my $self = bless( {}, $class ); | |
106 | |
107 $self->adaptor($adaptor); | |
108 | |
109 $adaptor->cache_seq_ids_with_mult_assemblys(); | |
110 | |
111 if ( @coord_systems != 2 ) { | |
112 throw( 'Can only map between two coordinate systems. ' | |
113 . scalar(@coord_systems) | |
114 . ' were provided' ); | |
115 } | |
116 | |
117 # Set the component and assembled coordinate systems | |
118 $self->{'asm_cs'} = $coord_systems[0]; | |
119 $self->{'cmp_cs'} = $coord_systems[1]; | |
120 | |
121 # We load the mapper calling the 'ASSEMBLED' the 'from' coord system | |
122 # and the 'COMPONENT' the 'to' coord system. | |
123 | |
124 $self->{'mapper'} = Bio::EnsEMBL::Mapper->new( $ASSEMBLED, $COMPONENT, | |
125 $coord_systems[0], $coord_systems[1] ); | |
126 | |
127 $self->{'max_pair_count'} = $DEFAULT_MAX_PAIR_COUNT; | |
128 | |
129 return $self; | |
130 } ## end sub new | |
131 | |
132 =head2 max_pair_count | |
133 | |
134 Arg [1] : (optional) int $max_pair_count | |
135 Example : $mapper->max_pair_count(100000) | |
136 Description: Getter/Setter for the number of mapping pairs allowed | |
137 in the internal cache. This can be used to override | |
138 the default value (1000) to tune the performance and | |
139 memory usage for certain scenarios. Higher value | |
140 means bigger cache, more memory used. | |
141 Return type: int | |
142 Exceptions : None | |
143 Caller : General | |
144 Status : Stable | |
145 | |
146 =cut | |
147 | |
148 sub max_pair_count { | |
149 my ( $self, $value ) = @_; | |
150 | |
151 if ( defined($value) ) { | |
152 $self->{'max_pair_count'} = $value; | |
153 } | |
154 | |
155 return $self->{'max_pair_count'}; | |
156 } | |
157 | |
158 =head2 register_all | |
159 | |
160 Arg [1] : None | |
161 Example : $mapper->max_pair_count(10e6); | |
162 $mapper->register_all(); | |
163 Description: Pre-registers all assembly information in this | |
164 mapper. The cache size should be set to a | |
165 sufficiently large value so that all of the | |
166 information can be stored. This method is useful | |
167 when *a lot* of mapping will be done in regions | |
168 which are distributed around the genome. After | |
169 registration the mapper will consume a lot of memory | |
170 but will not have to perform any SQL and will be | |
171 faster. | |
172 Return type: None | |
173 Exceptions : None | |
174 Caller : Specialised programs doing a lot of mapping. | |
175 Status : Stable | |
176 | |
177 =cut | |
178 | |
179 sub register_all { | |
180 my ($self) = @_; | |
181 | |
182 $self->adaptor()->register_all($self); | |
183 } | |
184 | |
185 =head2 map | |
186 | |
187 Arg [1] : string $frm_seq_region | |
188 The name of the sequence region to transform FROM. | |
189 Arg [2] : int $frm_start | |
190 The start of the region to transform FROM. | |
191 Arg [3] : int $frm_end | |
192 The end of the region to transform FROM. | |
193 Arg [4] : int $strand | |
194 The strand of the region to transform FROM. | |
195 Arg [5] : Bio::EnsEMBL::CoordSystem | |
196 The coordinate system to transform FROM | |
197 Example : @coords = | |
198 $asm_mapper->map( 'X', 1_000_000, 2_000_000, 1, | |
199 $chr_cs ); | |
200 Description: Transforms coordinates from one coordinate system to | |
201 another. | |
202 Return type: List of Bio::EnsEMBL::Mapper::Coordinate and/or | |
203 Bio::EnsEMBL::Mapper:Gap objects. | |
204 Exceptions : Throws if if the specified TO coordinat system is not | |
205 one of the coordinate systems associated with this | |
206 assembly mapper. | |
207 Caller : General | |
208 Status : Stable | |
209 | |
210 =cut | |
211 | |
212 sub map { | |
213 throw('Incorrect number of arguments.') if (!( @_ >= 6)); | |
214 | |
215 my ( $self, $frm_seq_region_name, $frm_start, $frm_end, $frm_strand, | |
216 $frm_cs, $to_slice ) | |
217 = @_; | |
218 | |
219 my $mapper = $self->{'mapper'}; | |
220 my $asm_cs = $self->{'asm_cs'}; | |
221 my $cmp_cs = $self->{'cmp_cs'}; | |
222 my $adaptor = $self->{'adaptor'}; | |
223 my $frm; | |
224 | |
225 | |
226 my $seq_region_id = | |
227 $self->adaptor() | |
228 ->seq_regions_to_ids( $frm_cs, [$frm_seq_region_name] )->[0]; | |
229 | |
230 # Speed critical section: | |
231 # Try to do simple pointer equality comparisons of the coord system | |
232 # objects first since this is likely to work most of the time and is | |
233 # much faster than a function call. | |
234 | |
235 if ( $frm_cs == $cmp_cs | |
236 || ( $frm_cs != $asm_cs && $frm_cs->equals($cmp_cs) ) ) | |
237 { | |
238 if ( !$self->{'cmp_register'}->{$seq_region_id} ) { | |
239 $adaptor->register_component( $self, $seq_region_id ); | |
240 } | |
241 $frm = $COMPONENT; | |
242 | |
243 } elsif ( $frm_cs == $asm_cs || $frm_cs->equals($asm_cs) ) { | |
244 | |
245 # This can be probably be sped up some by only calling registered | |
246 # assembled if needed. | |
247 $adaptor->register_assembled( $self, $seq_region_id, $frm_start, | |
248 $frm_end ); | |
249 $frm = $ASSEMBLED; | |
250 | |
251 } else { | |
252 | |
253 throw( | |
254 sprintf( "Coordinate system %s %s is neither the assembled " | |
255 . "nor the component coordinate system " | |
256 . "of this AssemblyMapper", | |
257 $frm_cs->name(), $frm_cs->version() ) ); | |
258 | |
259 } | |
260 | |
261 return | |
262 $mapper->map_coordinates( $seq_region_id, $frm_start, $frm_end, | |
263 $frm_strand, $frm ); | |
264 } ## end sub map | |
265 | |
266 | |
267 =head2 flush | |
268 | |
269 Args : None | |
270 Example : None | |
271 Description: Remove all cached items from this AssemblyMapper. | |
272 Return type: None | |
273 Exceptions : None | |
274 Caller : AssemblyMapperAdaptor | |
275 Status : Stable | |
276 | |
277 =cut | |
278 | |
279 sub flush { | |
280 my ($self) = @_; | |
281 | |
282 $self->{'mapper'}->flush(); | |
283 $self->{'cmp_register'} = {}; | |
284 $self->{'asm_register'} = {}; | |
285 } | |
286 | |
287 =head2 size | |
288 | |
289 Args : None | |
290 Example : $num_of_pairs = $mapper->size(); | |
291 Description: Returns the number of pairs currently stored. | |
292 Return type: int | |
293 Exceptions : None | |
294 Caller : General | |
295 Status : Stable | |
296 | |
297 =cut | |
298 | |
299 sub size { | |
300 my ($self) = @_; | |
301 | |
302 return $self->{'mapper'}->{'pair_count'}; | |
303 } | |
304 | |
305 =head2 fastmap | |
306 | |
307 Arg [1] : string $frm_seq_region | |
308 The name of the sequence region to transform FROM. | |
309 Arg [2] : int $frm_start | |
310 The start of the region to transform FROM. | |
311 Arg [3] : int $frm_end | |
312 The end of the region to transform FROM. | |
313 Arg [4] : int $strand | |
314 The strand of the region to transform FROM. | |
315 Arg [5] : Bio::EnsEMBL::CoordSystem | |
316 The coordinate system to transform FROM. | |
317 Example : @coords = | |
318 $asm_mapper->map( 'X', 1_000_000, 2_000_000, 1, | |
319 $chr_cs ); | |
320 Description: Transforms coordinates from one coordinate system to | |
321 another. | |
322 Return type: List of Bio::EnsEMBL::Mapper::Coordinate and/or | |
323 Bio::EnsEMBL::Mapper:Gap objects. | |
324 Exceptions : Throws if the specified TO coordinat system is not | |
325 one of the coordinate systems associated with this | |
326 assembly mapper. | |
327 Caller : General | |
328 Status : Stable | |
329 | |
330 =cut | |
331 | |
332 sub fastmap { | |
333 if ( @_ != 6 ) { | |
334 throw('Incorrect number of arguments.'); | |
335 } | |
336 | |
337 my ( $self, $frm_seq_region_name, $frm_start, $frm_end, $frm_strand, | |
338 $frm_cs ) | |
339 = @_; | |
340 | |
341 my $mapper = $self->{'mapper'}; | |
342 my $asm_cs = $self->{'asm_cs'}; | |
343 my $cmp_cs = $self->{'cmp_cs'}; | |
344 my $adaptor = $self->adaptor(); | |
345 my $frm; | |
346 | |
347 my @tmp; | |
348 push @tmp, $frm_seq_region_name; | |
349 | |
350 my $seq_region_id = | |
351 $self->adaptor()->seq_regions_to_ids( $frm_cs, \@tmp )->[0]; | |
352 | |
353 # Speed critical section: | |
354 # Try to do simple pointer equality comparisons of the coord system | |
355 # objects first since this is likely to work most of the time and is | |
356 # much faster than a function call. | |
357 | |
358 if ( $frm_cs == $cmp_cs | |
359 || ( $frm_cs != $asm_cs && $frm_cs->equals($cmp_cs) ) ) | |
360 { | |
361 | |
362 if ( !$self->{'cmp_register'}->{$seq_region_id} ) { | |
363 $adaptor->register_component( $self, $seq_region_id ); | |
364 } | |
365 $frm = $COMPONENT; | |
366 | |
367 } elsif ( $frm_cs == $asm_cs || $frm_cs->equals($asm_cs) ) { | |
368 | |
369 # This can be probably be sped up some by only calling registered | |
370 # assembled if needed | |
371 $adaptor->register_assembled( $self, $seq_region_id, $frm_start, | |
372 $frm_end ); | |
373 $frm = $ASSEMBLED; | |
374 | |
375 } else { | |
376 | |
377 throw( | |
378 sprintf( "Coordinate system %s %s is neither the assembled " | |
379 . "nor the component coordinate system " | |
380 . "of this AssemblyMapper", | |
381 $frm_cs->name(), $frm_cs->version() ) ); | |
382 | |
383 } | |
384 | |
385 return | |
386 $mapper->fastmap( $seq_region_id, $frm_start, $frm_end, $frm_strand, | |
387 $frm ); | |
388 } ## end sub fastmap | |
389 | |
390 =head2 list_ids | |
391 | |
392 Arg [1] : string $frm_seq_region | |
393 The name of the sequence region of interest. | |
394 Arg [2] : int $frm_start | |
395 The start of the region of interest. | |
396 Arg [3] : int $frm_end | |
397 The end of the region to transform of interest. | |
398 Arg [5] : Bio::EnsEMBL::CoordSystem $frm_cs | |
399 The coordinate system to obtain overlapping IDs of. | |
400 Example : foreach my $id ( | |
401 $asm_mapper->list_ids( 'X', 1, 1000, $ctg_cs ) ) | |
402 { ... } | |
403 Description: Retrieves a list of overlapping seq_region names of | |
404 another coordinate system. This is the same as the | |
405 list_ids method but uses seq_region names rather | |
406 internal IDs. | |
407 Return type: List of strings. | |
408 Exceptions : None | |
409 Caller : General | |
410 Status : Stable | |
411 | |
412 =cut | |
413 | |
414 sub list_ids { | |
415 if ( @_ != 5 ) { | |
416 throw('Incorrect number of arguments.'); | |
417 } | |
418 | |
419 my ( $self, $frm_seq_region_name, $frm_start, $frm_end, $frm_cs ) = | |
420 @_; | |
421 | |
422 my @tmp = ($frm_seq_region_name); | |
423 | |
424 my $seq_region_id = | |
425 $self->adaptor()->seq_regions_to_ids( $frm_cs, \@tmp )->[0]; | |
426 | |
427 if ( $frm_cs->equals( $self->component_CoordSystem() ) ) { | |
428 | |
429 if ( !$self->have_registered_component($seq_region_id) ) { | |
430 $self->adaptor->register_component( $self, $seq_region_id ); | |
431 } | |
432 | |
433 # Pull out the 'from' identifiers of the mapper pairs. The we | |
434 # loaded the assembled side as the 'from' side in the constructor. | |
435 | |
436 return | |
437 map ( { $_->from()->id() } | |
438 $self->mapper()->list_pairs( | |
439 $seq_region_id, $frm_start, $frm_end, $COMPONENT | |
440 ) ); | |
441 | |
442 } elsif ( $frm_cs->equals( $self->assembled_CoordSystem() ) ) { | |
443 | |
444 $self->adaptor->register_assembled( $self, $seq_region_id, | |
445 $frm_start, $frm_end ); | |
446 | |
447 # Pull out the 'to' identifiers of the mapper pairs we loaded the | |
448 # component side as the 'to' coord system in the constructor. | |
449 | |
450 return | |
451 map ( { $_->to->id() } | |
452 $self->mapper()->list_pairs( | |
453 $seq_region_id, $frm_start, $frm_end, $ASSEMBLED | |
454 ) ); | |
455 | |
456 } else { | |
457 | |
458 throw( | |
459 sprintf( "Coordinate system %s %s is neither the assembled " | |
460 . "nor the component coordinate system " | |
461 . "of this AssemblyMapper", | |
462 $frm_cs->name(), $frm_cs->version() ) ); | |
463 | |
464 } | |
465 } ## end sub list_ids | |
466 | |
467 #sub list_seq_regions { | |
468 # throw('Incorrect number of arguments.') if(@_ != 5); | |
469 # my($self, $frm_seq_region_name, $frm_start, $frm_end, $frm_cs) = @_; | |
470 | |
471 # if($frm_cs->equals($self->component_CoordSystem())) { | |
472 | |
473 # if(!$self->have_registered_component($seq_region_id)) { | |
474 # $self->adaptor->register_component($self,$seq_region_id); | |
475 # } | |
476 | |
477 # #pull out the 'from' identifiers of the mapper pairs. The | |
478 # #we loaded the assembled side as the 'from' side in the constructor | |
479 # return | |
480 # map {$_->from()->id()} | |
481 # $self->mapper()->list_pairs($seq_region_id, $frm_start, | |
482 # $frm_end, $COMPONENT); | |
483 | |
484 # } elsif($frm_cs->equals($self->assembled_CoordSystem())) { | |
485 | |
486 # $self->adaptor->register_assembled($self, | |
487 # $frm_seq_region,$frm_start,$frm_end); | |
488 | |
489 # #pull out the 'to' identifiers of the mapper pairs | |
490 # #we loaded the component side as the 'to' coord system in the constructor | |
491 # return | |
492 # map {$_->to->id()} | |
493 # $self->mapper()->list_pairs($frm_seq_region, $frm_start, | |
494 # $frm_end, $ASSEMBLED); | |
495 # } else { | |
496 # throw("Coordinate system " . $frm_cs->name . " " . $frm_cs->version . | |
497 # " is neither the assembled nor the component coordinate system " . | |
498 # " of this AssemblyMapper"); | |
499 # } | |
500 #} | |
501 | |
502 | |
503 =head2 list_seq_regions | |
504 | |
505 Arg [1] : string $frm_seq_region | |
506 The name of the sequence region of interest. | |
507 Arg [2] : int $frm_start | |
508 The start of the region of interest. | |
509 Arg [3] : int $frm_end | |
510 The end of the region to transform of interest. | |
511 Arg [5] : Bio::EnsEMBL::CoordSystem $frm_cs | |
512 The coordinate system to obtain overlapping IDs of. | |
513 Example : foreach my $id ( | |
514 $asm_mapper->list_seq_regions( | |
515 'X', 1, 1000, $chr_cs | |
516 ) ) { ... } | |
517 Description: Retrieves a list of overlapping seq_region internal | |
518 identifiers of another coordinate system. This is | |
519 the same as the list_seq_regions method but uses | |
520 internal identfiers rather than seq_region strings. | |
521 Return type: List of ints. | |
522 Exceptions : None | |
523 Caller : General | |
524 Status : Stable | |
525 | |
526 =cut | |
527 | |
528 sub list_seq_regions { | |
529 if ( @_ != 5 ) { | |
530 throw('Incorrect number of arguments.'); | |
531 } | |
532 | |
533 my ( $self, $frm_seq_region, $frm_start, $frm_end, $frm_cs ) = @_; | |
534 | |
535 # Retrieve the seq_region names. | |
536 | |
537 my @seq_ids = | |
538 $self->list_ids( $frm_seq_region, $frm_start, $frm_end, $frm_cs ); | |
539 | |
540 # The seq_regions are from the 'to' coordinate system not the from | |
541 # coordinate system we used to obtain them. | |
542 | |
543 my $to_cs; | |
544 if ( $frm_cs->equals( $self->assembled_CoordSystem() ) ) { | |
545 $to_cs = $self->component_CoordSystem(); | |
546 } else { | |
547 $to_cs = $self->assembled_CoordSystem(); | |
548 } | |
549 | |
550 # Convert them to IDs. | |
551 return @{ $self->adaptor()->seq_ids_to_regions( \@seq_ids ) }; | |
552 } | |
553 | |
554 #sub list_ids { | |
555 # throw('Incorrect number of arguments.') if(@_ != 5); | |
556 # my($self, $frm_seq_region, $frm_start, $frm_end, $frm_cs) = @_; | |
557 | |
558 # #retrieve the seq_region names | |
559 # my @seq_regs = | |
560 # $self->list_seq_regions($frm_seq_region,$frm_start,$frm_end,$frm_cs); | |
561 | |
562 # #The seq_regions are from the 'to' coordinate system not the | |
563 # #from coordinate system we used to obtain them | |
564 # my $to_cs; | |
565 # if($frm_cs->equals($self->assembled_CoordSystem())) { | |
566 # $to_cs = $self->component_CoordSystem(); | |
567 # } else { | |
568 # $to_cs = $self->assembled_CoordSystem(); | |
569 # } | |
570 | |
571 # #convert them to ids | |
572 # return @{$self->adaptor()->seq_regions_to_ids($to_cs, \@seq_regs)}; | |
573 #} | |
574 | |
575 =head2 have_registered_component | |
576 | |
577 Arg [1] : string $cmp_seq_region | |
578 The name of the sequence region to check for | |
579 registration. | |
580 Example : if ( $asm_mapper->have_registered_component('AL240214.1') ) {} | |
581 Description: Returns true if a given component region has | |
582 been registered with this assembly mapper. This | |
583 should only be called by this class or the | |
584 AssemblyMapperAdaptor. In other words, do not use | |
585 this method unless you really know what you are | |
586 doing. | |
587 Return type: Boolean (0 or 1) | |
588 Exceptions : Throws on incorrect arguments. | |
589 Caller : Internal, AssemblyMapperAdaptor | |
590 Status : Stable | |
591 | |
592 =cut | |
593 | |
594 sub have_registered_component { | |
595 my ( $self, $cmp_seq_region ) = @_; | |
596 | |
597 if ( !defined($cmp_seq_region) ) { | |
598 throw('cmp_seq_region argument is required'); | |
599 } | |
600 | |
601 if ( exists( $self->{'cmp_register'}->{$cmp_seq_region} ) ) { | |
602 return 1; | |
603 } | |
604 | |
605 return 0; | |
606 } | |
607 | |
608 =head2 have_registered_assembled | |
609 | |
610 Arg [1] : string $asm_seq_region | |
611 The name of the sequence region to check for | |
612 registration. | |
613 Arg [2] : int $chunk_id | |
614 The chunk number of the provided seq_region to check | |
615 for registration. | |
616 Example : if ( $asm_mapper->have_registered_component( 'X', 9 ) ) { } | |
617 Description: Returns true if a given assembled region chunk | |
618 has been registered with this assembly mapper. | |
619 This should only be called by this class or the | |
620 AssemblyMapperAdaptor. In other words, do not use | |
621 this method unless you really know what you are | |
622 doing. | |
623 Return type: Boolean (0 or 1) | |
624 Exceptions : Throws on incorrect arguments | |
625 Caller : Internal, AssemblyMapperAdaptor | |
626 Status : Stable | |
627 | |
628 =cut | |
629 | |
630 sub have_registered_assembled { | |
631 my ( $self, $asm_seq_region, $chunk_id ) = @_; | |
632 | |
633 if ( !defined($asm_seq_region) ) { | |
634 throw('asm_seq_region argument is required'); | |
635 } | |
636 if ( !defined($chunk_id) ) { | |
637 throw('chunk_id is required'); | |
638 } | |
639 | |
640 if ( | |
641 exists( $self->{'asm_register'}->{$asm_seq_region}->{$chunk_id} ) ) | |
642 { | |
643 return 1; | |
644 } | |
645 | |
646 return 0; | |
647 } | |
648 | |
649 | |
650 =head2 register_component | |
651 | |
652 Arg [1] : integer $cmp_seq_region | |
653 The dbID of the component sequence region to | |
654 register. | |
655 Example : $asm_mapper->register_component('AL312341.1'); | |
656 Description: Flags a given component sequence region as registered | |
657 in this assembly mapper. This should only be called | |
658 by this class or the AssemblyMapperAdaptor. | |
659 Return type: None | |
660 Exceptions : Throws on incorrect arguments | |
661 Caller : Internal, AssemblyMapperAdaptor | |
662 Status : Stable | |
663 | |
664 =cut | |
665 | |
666 sub register_component { | |
667 my ( $self, $cmp_seq_region ) = @_; | |
668 | |
669 if ( !defined($cmp_seq_region) ) { | |
670 throw('cmp_seq_region argument is required'); | |
671 } | |
672 | |
673 $self->{'cmp_register'}->{$cmp_seq_region} = 1; | |
674 } | |
675 | |
676 =head2 register_assembled | |
677 | |
678 Arg [1] : integer $asm_seq_region | |
679 The dbID of the sequence region to register. | |
680 Arg [2] : int $chunk_id | |
681 The chunk number of the provided seq_region to register. | |
682 Example : $asm_mapper->register_assembled( 'X', 4 ); | |
683 Description: Flags a given assembled region as registered in this | |
684 assembly mapper. This should only be called by this | |
685 class or the AssemblyMapperAdaptor. Do not call this | |
686 method unless you really know what you are doing. | |
687 Return type: None | |
688 Exceptions : Throws on incorrect arguments | |
689 Caller : Internal, AssemblyMapperAdaptor | |
690 Status : Stable | |
691 | |
692 =cut | |
693 | |
694 sub register_assembled { | |
695 my ( $self, $asm_seq_region, $chunk_id ) = @_; | |
696 | |
697 if ( !defined($asm_seq_region) ) { | |
698 throw('asm_seq_region argument is required'); | |
699 } | |
700 if ( !defined($chunk_id) ) { | |
701 throw('chunk_id srgument is required'); | |
702 } | |
703 | |
704 $self->{'asm_register'}->{$asm_seq_region}->{$chunk_id} = 1; | |
705 } | |
706 | |
707 =head2 mapper | |
708 | |
709 Arg [1] : None | |
710 Example : $mapper = $asm_mapper->mapper(); | |
711 Description: Retrieves the internal mapper used by this Assembly | |
712 Mapper. This is unlikely to be useful unless you | |
713 _really_ know what you are doing. | |
714 Return type: Bio::EnsEMBL::Mapper | |
715 Exceptions : None | |
716 Caller : Internal, AssemblyMapperAdaptor | |
717 Status : Stable | |
718 | |
719 =cut | |
720 | |
721 sub mapper { | |
722 my ($self) = @_; | |
723 | |
724 return $self->{'mapper'}; | |
725 } | |
726 | |
727 =head2 assembled_CoordSystem | |
728 | |
729 Arg [1] : None | |
730 Example : $cs = $asm_mapper->assembled_CoordSystem(); | |
731 Description: Retrieves the assembled CoordSystem from this | |
732 assembly mapper. | |
733 Return type: Bio::EnsEMBL::CoordSystem | |
734 Exceptions : None | |
735 Caller : Internal, AssemblyMapperAdaptor | |
736 Status : Stable | |
737 | |
738 =cut | |
739 | |
740 sub assembled_CoordSystem { | |
741 my ($self) = @_; | |
742 | |
743 return $self->{'asm_cs'}; | |
744 } | |
745 | |
746 =head2 component_CoordSystem | |
747 | |
748 Arg [1] : None | |
749 Example : $cs = $asm_mapper->component_CoordSystem(); | |
750 Description: Retrieves the component CoordSystem from this | |
751 assembly mapper. | |
752 Return type: Bio::EnsEMBL::CoordSystem | |
753 Exceptions : None | |
754 Caller : Internal, AssemblyMapperAdaptor | |
755 Status : Stable | |
756 | |
757 =cut | |
758 | |
759 sub component_CoordSystem { | |
760 my ($self) = @_; | |
761 | |
762 return $self->{'cmp_cs'}; | |
763 } | |
764 | |
765 =head2 adaptor | |
766 | |
767 Arg [1] : Bio::EnsEMBL::DBSQL::AssemblyMapperAdaptor $adaptor | |
768 Description: Getter/set terfor this object's database adaptor. | |
769 Returntype : Bio::EnsEMBL::DBSQL::AssemblyMapperAdaptor | |
770 Exceptions : None | |
771 Caller : General | |
772 Status : Stable | |
773 | |
774 =cut | |
775 | |
776 sub adaptor { | |
777 my ( $self, $value ) = @_; | |
778 | |
779 if ( defined($value) ) { | |
780 weaken($self->{'adaptor'} = $value); | |
781 } | |
782 | |
783 return $self->{'adaptor'}; | |
784 } | |
785 | |
786 =head2 in_assembly | |
787 | |
788 Description: DEPRECATED, use map() or list_ids() instead. | |
789 | |
790 =cut | |
791 | |
792 sub in_assembly { | |
793 my ( $self, $object ) = @_; | |
794 | |
795 deprecate('Use map() or list_ids() instead.'); | |
796 | |
797 my $csa = $self->db->get_CoordSystemAdaptor(); | |
798 | |
799 my $top_level = $csa->fetch_top_level(); | |
800 | |
801 my $asma = | |
802 $self->adaptor->fetch_by_CoordSystems( $object->coord_system(), | |
803 $top_level ); | |
804 | |
805 my @list = $asma->list_ids( $object->seq_region(), | |
806 $object->start(), | |
807 $object->end(), | |
808 $object->coord_system() ); | |
809 | |
810 return ( @list > 0 ); | |
811 } | |
812 | |
813 =head2 map_coordinates_to_assembly | |
814 | |
815 Description: DEPRECATED, use map() instead. | |
816 | |
817 =cut | |
818 | |
819 sub map_coordinates_to_assembly { | |
820 my ( $self, $contig_id, $start, $end, $strand ) = @_; | |
821 | |
822 deprecate('Use map() instead.'); | |
823 | |
824 # Not sure if contig_id is seq_region_id or name... | |
825 return | |
826 $self->map( $contig_id, $start, $end, $strand, | |
827 $self->contig_CoordSystem() ); | |
828 | |
829 } | |
830 | |
831 =head2 fast_to_assembly | |
832 | |
833 Description: DEPRECATED, use map() instead. | |
834 | |
835 =cut | |
836 | |
837 sub fast_to_assembly { | |
838 my ( $self, $contig_id, $start, $end, $strand ) = @_; | |
839 | |
840 deprecate('Use map() instead.'); | |
841 | |
842 # Not sure if contig_id is seq_region_id or name... | |
843 return | |
844 $self->map( $contig_id, $start, $end, $strand, | |
845 $self->contig_CoordSystem() ); | |
846 } | |
847 | |
848 =head2 map_coordinates_to_rawcontig | |
849 | |
850 Description: DEPRECATED, use map() instead. | |
851 | |
852 =cut | |
853 | |
854 sub map_coordinates_to_rawcontig { | |
855 my ( $self, $chr_name, $start, $end, $strand ) = @_; | |
856 | |
857 deprecate('Use map() instead.'); | |
858 | |
859 return | |
860 $self->map( $chr_name, $start, $end, $strand, | |
861 $self->assembled_CoordSystem() ); | |
862 } | |
863 | |
864 =head2 list_contig_ids | |
865 | |
866 Description: DEPRECATED, use list_ids() instead. | |
867 | |
868 =cut | |
869 | |
870 sub list_contig_ids { | |
871 my ( $self, $chr_name, $start, $end ) = @_; | |
872 | |
873 deprecate('Use list_ids() instead.'); | |
874 | |
875 return | |
876 $self->list_ids( $chr_name, $start, $end, | |
877 $self->assembled_CoordSystem() ); | |
878 } | |
879 | |
880 1; |