Mercurial > repos > mahtabm > ensembl
comparison variant_effect_predictor/Bio/EnsEMBL/DBSQL/AssemblyMapperAdaptor.pm @ 0:1f6dce3d34e0
Uploaded
author | mahtabm |
---|---|
date | Thu, 11 Apr 2013 02:01:53 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:1f6dce3d34e0 |
---|---|
1 =head1 LICENSE | |
2 | |
3 Copyright (c) 1999-2012 The European Bioinformatics Institute and | |
4 Genome Research Limited. All rights reserved. | |
5 | |
6 This software is distributed under a modified Apache license. | |
7 For license details, please see | |
8 | |
9 http://www.ensembl.org/info/about/code_licence.html | |
10 | |
11 =head1 CONTACT | |
12 | |
13 Please email comments or questions to the public Ensembl | |
14 developers list at <dev@ensembl.org>. | |
15 | |
16 Questions may also be sent to the Ensembl help desk at | |
17 <helpdesk@ensembl.org>. | |
18 | |
19 =cut | |
20 | |
21 =head1 NAME | |
22 | |
23 Bio::EnsEMBL::DBSQL::AssemblyMapperAdaptor | |
24 | |
25 =head1 SYNOPSIS | |
26 | |
27 use Bio::EnsEMBL::Registry; | |
28 | |
29 Bio::EnsEMBL::Registry->load_registry_from_db( | |
30 -host => 'ensembldb.ensembl.org', | |
31 -user => 'anonymous' | |
32 ); | |
33 | |
34 $asma = Bio::EnsEMBL::Registry->get_adaptor( "human", "core", | |
35 "assemblymapper" ); | |
36 | |
37 $csa = Bio::EnsEMBL::Registry->get_adaptor( "human", "core", | |
38 "coordsystem" ); | |
39 | |
40 my $chr33_cs = $csa->fetch_by_name( 'chromosome', 'NCBI33' ); | |
41 my $chr34_cs = $csa->fetch_by_name( 'chromosome', 'NCBI34' ); | |
42 my $ctg_cs = $csa->fetch_by_name('contig'); | |
43 my $clone_cs = $csa->fetch_by_name('clone'); | |
44 | |
45 my $chr_ctg_mapper = | |
46 $asma->fetch_by_CoordSystems( $chr33_cs, $ctg_cs ); | |
47 | |
48 my $ncbi33_ncbi34_mapper = | |
49 $asm_adptr->fetch_by_CoordSystems( $chr33, $chr34 ); | |
50 | |
51 my $ctg_clone_mapper = | |
52 $asm_adptr->fetch_by_CoordSystems( $ctg_cs, $clone_cs ); | |
53 | |
54 | |
55 =head1 DESCRIPTION | |
56 | |
57 Adaptor for handling Assembly mappers. This is a I<Singleton> class. | |
58 ie: There is only one per database (C<DBAdaptor>). | |
59 | |
60 This is used to retrieve mappers between any two coordinate systems | |
61 whose makeup is described by the assembly table. Currently one step | |
62 (explicit) and two step (implicit) pairwise mapping is supported. In | |
63 one-step mapping an explicit relationship between the coordinate systems | |
64 is defined in the assembly table. In two-step 'chained' mapping no | |
65 explicit mapping is present but the coordinate systems must share a | |
66 common mapping to an intermediate coordinate system. | |
67 | |
68 =head1 METHODS | |
69 | |
70 =cut | |
71 | |
72 package Bio::EnsEMBL::DBSQL::AssemblyMapperAdaptor; | |
73 use vars qw(@ISA); | |
74 use strict; | |
75 | |
76 use Bio::EnsEMBL::DBSQL::BaseAdaptor; | |
77 use Bio::EnsEMBL::AssemblyMapper; | |
78 use Bio::EnsEMBL::ChainedAssemblyMapper; | |
79 use Bio::EnsEMBL::TopLevelAssemblyMapper; | |
80 | |
81 use Bio::EnsEMBL::Utils::Cache; #CPAN LRU cache | |
82 use Bio::EnsEMBL::Utils::Exception qw(throw deprecate warning stack_trace_dump); | |
83 #use Bio::EnsEMBL::Utils::Exception qw(deprecate throw); | |
84 use Bio::EnsEMBL::Utils::SeqRegionCache; | |
85 | |
86 use integer; #do proper arithmetic bitshifts | |
87 | |
88 @ISA = qw(Bio::EnsEMBL::DBSQL::BaseAdaptor); | |
89 | |
90 | |
91 my $CHUNKFACTOR = 20; # 2^20 = approx. 10^6 | |
92 | |
93 =head2 new | |
94 | |
95 Arg [1] : Bio::EnsEMBL::DBAdaptor $dbadaptor the adaptor for | |
96 the database this assembly mapper is using. | |
97 Example : my $asma = new Bio::EnsEMBL::AssemblyMapperAdaptor($dbadaptor); | |
98 Description: Creates a new AssemblyMapperAdaptor object | |
99 Returntype : Bio::EnsEMBL::DBSQL::AssemblyMapperAdaptor | |
100 Exceptions : none | |
101 Caller : Bio::EnsEMBL::DBSQL::DBAdaptor | |
102 Status : Stable | |
103 | |
104 =cut | |
105 | |
106 sub new { | |
107 my($class, $dbadaptor) = @_; | |
108 | |
109 my $self = $class->SUPER::new($dbadaptor); | |
110 | |
111 $self->{'_asm_mapper_cache'} = {}; | |
112 | |
113 # use a shared cache (for this database) that contains info about | |
114 # seq regions | |
115 my $seq_region_cache = $self->db->get_SeqRegionCache(); | |
116 $self->{'sr_name_cache'} = $seq_region_cache->{'name_cache'}; | |
117 $self->{'sr_id_cache'} = $seq_region_cache->{'id_cache'}; | |
118 | |
119 return $self; | |
120 } | |
121 | |
122 | |
123 | |
124 =head2 cache_seq_ids_with_mult_assemblys | |
125 | |
126 Example : $self->adaptor->cache_seq_ids_with_mult_assemblys(); | |
127 Description: Creates a hash of the component seq region ids that | |
128 map to more than one assembly from the assembly table. | |
129 Retruntype : none | |
130 Exceptions : none | |
131 Caller : AssemblyMapper, ChainedAssemblyMapper | |
132 Status : At Risk | |
133 | |
134 =cut | |
135 | |
136 sub cache_seq_ids_with_mult_assemblys{ | |
137 my $self = shift; | |
138 my %multis; | |
139 | |
140 return if (defined($self->{'multi_seq_ids'})); | |
141 | |
142 $self->{'multi_seq_ids'} = {}; | |
143 | |
144 my $sql = qq( | |
145 SELECT sra.seq_region_id | |
146 FROM seq_region_attrib sra, | |
147 attrib_type at, | |
148 seq_region sr, | |
149 coord_system cs | |
150 WHERE sra.attrib_type_id = at.attrib_type_id | |
151 AND code = "MultAssem" | |
152 AND sra.seq_region_id = sr.seq_region_id | |
153 AND sr.coord_system_id = cs.coord_system_id | |
154 AND cs.species_id = ?); | |
155 | |
156 my $sth = $self->prepare($sql); | |
157 | |
158 $sth->bind_param( 1, $self->species_id(), SQL_INTEGER ); | |
159 | |
160 $sth->execute(); | |
161 | |
162 my ($seq_region_id); | |
163 | |
164 $sth->bind_columns(\$seq_region_id); | |
165 | |
166 while($sth->fetch()) { | |
167 $self->{'multi_seq_ids'}->{$seq_region_id} = 1; | |
168 } | |
169 $sth->finish; | |
170 } | |
171 | |
172 | |
173 =head2 fetch_by_CoordSystems | |
174 | |
175 Arg [1] : Bio::EnsEMBL::CoordSystem $cs1 | |
176 One of the coordinate systems to retrieve the mapper | |
177 between | |
178 Arg [2] : Bio::EnsEMBL::CoordSystem $cs2 | |
179 The other coordinate system to map between | |
180 Description: Retrieves an Assembly mapper for two coordinate | |
181 systems whose relationship is described in the | |
182 assembly table. | |
183 | |
184 The ordering of the coodinate systems is arbitrary. | |
185 The following two statements are equivalent: | |
186 $mapper = $asma->fetch_by_CoordSystems($cs1,$cs2); | |
187 $mapper = $asma->fetch_by_CoordSystems($cs2,$cs1); | |
188 Returntype : Bio::EnsEMBL::AssemblyMapper | |
189 Exceptions : wrong argument types | |
190 Caller : general | |
191 Status : Stable | |
192 | |
193 =cut | |
194 | |
195 sub fetch_by_CoordSystems { | |
196 my $self = shift; | |
197 my $cs1 = shift; | |
198 my $cs2 = shift; | |
199 | |
200 if(!ref($cs1) || !$cs1->isa('Bio::EnsEMBL::CoordSystem')) { | |
201 throw("cs1 argument must be a Bio::EnsEMBL::CoordSystem."); | |
202 } | |
203 if(!ref($cs2) || !$cs2->isa('Bio::EnsEMBL::CoordSystem')) { | |
204 throw("cs2 argument must be a Bio::EnsEMBL::CoordSystem."); | |
205 } | |
206 | |
207 # if($cs1->equals($cs2)) { | |
208 # throw("Cannot create mapper between same coord systems: " . | |
209 # $cs1->name . " " . $cs1->version); | |
210 # } | |
211 | |
212 if($cs1->is_top_level()) { | |
213 return Bio::EnsEMBL::TopLevelAssemblyMapper->new($self, $cs1, $cs2); | |
214 } | |
215 if($cs2->is_top_level()) { | |
216 return Bio::EnsEMBL::TopLevelAssemblyMapper->new($self, $cs2, $cs1); | |
217 } | |
218 | |
219 my $csa = $self->db->get_CoordSystemAdaptor(); | |
220 | |
221 #retrieve the shortest possible mapping path between these systems | |
222 my @mapping_path = @{$csa->get_mapping_path($cs1,$cs2)}; | |
223 | |
224 if(!@mapping_path) { | |
225 | |
226 # It is perfectly fine not to have a mapping. No warning needed really | |
227 # Just check the return code!! | |
228 | |
229 # warning( | |
230 # "There is no mapping defined between these coord systems:\n" . | |
231 # $cs1->name() . " " . $cs1->version() . " and " . $cs2->name() . " " . | |
232 # $cs2->version() | |
233 # ); | |
234 return undef; | |
235 } | |
236 | |
237 my $key = join(':', map({defined($_)?$_->dbID():"-"} @mapping_path)); | |
238 | |
239 my $asm_mapper = $self->{'_asm_mapper_cache'}->{$key}; | |
240 | |
241 return $asm_mapper if($asm_mapper); | |
242 | |
243 if(@mapping_path == 1) { | |
244 throw("Incorrect mapping path defined in meta table. " . | |
245 "0 step mapping encountered between:\n" . | |
246 $cs1->name() . " " . $cs1->version() . " and " . $cs2->name . " " . | |
247 $cs2->version()); | |
248 } | |
249 | |
250 if(@mapping_path == 2) { | |
251 #1 step regular mapping | |
252 $asm_mapper = Bio::EnsEMBL::AssemblyMapper->new($self, @mapping_path); | |
253 | |
254 # If you want multiple pieces on two seqRegions to map to each other | |
255 # you need to make an assembly.mapping entry that is seperated with a # | |
256 # instead of an |. | |
257 | |
258 $self->{'_asm_mapper_cache'}->{$key} = $asm_mapper; | |
259 return $asm_mapper; | |
260 } | |
261 | |
262 if(@mapping_path == 3) { | |
263 #two step chained mapping | |
264 $asm_mapper = Bio::EnsEMBL::ChainedAssemblyMapper->new($self,@mapping_path); | |
265 #in multi-step mapping it is possible get requests with the | |
266 #coordinate system ordering reversed since both mappings directions | |
267 #cache on both orderings just in case | |
268 #e.g. chr <-> contig <-> clone and clone <-> contig <-> chr | |
269 | |
270 $self->{'_asm_mapper_cache'}->{$key} = $asm_mapper; | |
271 $key = join(':', map({defined($_)?$_->dbID():"-"} reverse(@mapping_path))); | |
272 $self->{'_asm_mapper_cache'}->{$key} = $asm_mapper; | |
273 return $asm_mapper; | |
274 } | |
275 | |
276 throw("Only 1 and 2 step coordinate system mapping is currently\n" . | |
277 "supported. Mapping between " . | |
278 $cs1->name() . " " . $cs1->version() . " and " . | |
279 $cs2->name() . " " . $cs2->version() . | |
280 " requires ". (scalar(@mapping_path)-1) . " steps."); | |
281 } | |
282 | |
283 | |
284 | |
285 =head2 register_assembled | |
286 | |
287 Arg [1] : Bio::EnsEMBL::AssemblyMapper $asm_mapper | |
288 A valid AssemblyMapper object | |
289 Arg [2] : integer $asm_seq_region | |
290 The dbID of the seq_region to be registered | |
291 Arg [3] : int $asm_start | |
292 The start of the region to be registered | |
293 Arg [4] : int $asm_end | |
294 The end of the region to be registered | |
295 Description: Declares an assembled region to the AssemblyMapper. | |
296 This extracts the relevant data from the assembly | |
297 table and stores it in Mapper internal to the $asm_mapper. | |
298 It therefore must be called before any mapping is | |
299 attempted on that region. Otherwise only gaps will | |
300 be returned. Note that the AssemblyMapper automatically | |
301 calls this method when the need arises. | |
302 Returntype : none | |
303 Exceptions : throw if the seq_region to be registered does not exist | |
304 or if it associated with multiple assembled pieces (bad data | |
305 in assembly table) | |
306 Caller : Bio::EnsEMBL::AssemblyMapper | |
307 Status : Stable | |
308 | |
309 =cut | |
310 | |
311 | |
312 sub register_assembled { | |
313 my $self = shift; | |
314 my $asm_mapper = shift; | |
315 my $asm_seq_region = shift; | |
316 my $asm_start = shift; | |
317 my $asm_end = shift; | |
318 | |
319 if(!ref($asm_mapper) || !$asm_mapper->isa('Bio::EnsEMBL::AssemblyMapper')) { | |
320 throw("Bio::EnsEMBL::AssemblyMapper argument expected"); | |
321 } | |
322 | |
323 throw("asm_seq_region argument expected") if(!defined($asm_seq_region)); | |
324 throw("asm_start argument expected") if(!defined($asm_start)); | |
325 throw("asm_end argument expected") if(!defined($asm_end)); | |
326 | |
327 my $asm_cs_id = $asm_mapper->assembled_CoordSystem->dbID(); | |
328 my $cmp_cs_id = $asm_mapper->component_CoordSystem->dbID(); | |
329 | |
330 #split up the region to be registered into fixed chunks | |
331 #this allows us to keep track of regions that have already been | |
332 #registered and also works under the assumption that if a small region | |
333 #is requested it is likely that other requests will be made in the | |
334 #vicinity (the minimum size registered the chunksize (2^chunkfactor) | |
335 | |
336 my @chunk_regions; | |
337 #determine span of chunks | |
338 #bitwise shift right is fast and easy integer division | |
339 | |
340 my($start_chunk, $end_chunk); | |
341 | |
342 $start_chunk = $asm_start >> $CHUNKFACTOR; | |
343 $end_chunk = $asm_end >> $CHUNKFACTOR; | |
344 | |
345 # inserts have start = end + 1, on boundary condition start_chunk | |
346 # could be less than end chunk | |
347 if($asm_start == $asm_end + 1) { | |
348 ($start_chunk, $end_chunk) = ($end_chunk, $start_chunk); | |
349 } | |
350 | |
351 #find regions of continuous unregistered chunks | |
352 my $i; | |
353 my ($begin_chunk_region,$end_chunk_region); | |
354 for ($i = $start_chunk; $i <= $end_chunk; $i++) { | |
355 if($asm_mapper->have_registered_assembled($asm_seq_region, $i)) { | |
356 if(defined($begin_chunk_region)) { | |
357 #this is the end of an unregistered region. | |
358 my $region = [($begin_chunk_region << $CHUNKFACTOR), | |
359 (($end_chunk_region+1) << $CHUNKFACTOR)-1]; | |
360 push @chunk_regions, $region; | |
361 $begin_chunk_region = $end_chunk_region = undef; | |
362 } | |
363 } else { | |
364 $begin_chunk_region = $i if(!defined($begin_chunk_region)); | |
365 $end_chunk_region = $i+1; | |
366 $asm_mapper->register_assembled($asm_seq_region,$i); | |
367 } | |
368 } | |
369 | |
370 #the last part may have been an unregistered region too | |
371 if(defined($begin_chunk_region)) { | |
372 my $region = [($begin_chunk_region << $CHUNKFACTOR), | |
373 (($end_chunk_region+1) << $CHUNKFACTOR) -1]; | |
374 push @chunk_regions, $region; | |
375 } | |
376 | |
377 return if(!@chunk_regions); | |
378 | |
379 # keep the Mapper to a reasonable size | |
380 if( $asm_mapper->size() > $asm_mapper->max_pair_count() ) { | |
381 $asm_mapper->flush(); | |
382 #we now have to go and register the entire requested region since we | |
383 #just flushed everything | |
384 | |
385 @chunk_regions = ( [ ( $start_chunk << $CHUNKFACTOR) | |
386 , (($end_chunk+1) << $CHUNKFACTOR)-1 ] ); | |
387 | |
388 for( my $i = $start_chunk; $i <= $end_chunk; $i++ ) { | |
389 $asm_mapper->register_assembled( $asm_seq_region, $i ); | |
390 } | |
391 } | |
392 | |
393 # my $asm_seq_region_id = | |
394 # $self->_seq_region_name_to_id($asm_seq_region,$asm_cs_id); | |
395 | |
396 # Retrieve the description of how the assembled region is made from | |
397 # component regions for each of the continuous blocks of unregistered, | |
398 # chunked regions | |
399 | |
400 my $q = qq{ | |
401 SELECT | |
402 asm.cmp_start, | |
403 asm.cmp_end, | |
404 asm.cmp_seq_region_id, | |
405 sr.name, | |
406 sr.length, | |
407 asm.ori, | |
408 asm.asm_start, | |
409 asm.asm_end | |
410 FROM | |
411 assembly asm, seq_region sr | |
412 WHERE | |
413 asm.asm_seq_region_id = ? AND | |
414 ? <= asm.asm_end AND | |
415 ? >= asm.asm_start AND | |
416 asm.cmp_seq_region_id = sr.seq_region_id AND | |
417 sr.coord_system_id = ? | |
418 }; | |
419 | |
420 my $sth = $self->prepare($q); | |
421 | |
422 foreach my $region (@chunk_regions) { | |
423 my($region_start, $region_end) = @$region; | |
424 $sth->bind_param(1,$asm_seq_region,SQL_INTEGER); | |
425 $sth->bind_param(2,$region_start,SQL_INTEGER); | |
426 $sth->bind_param(3,$region_end,SQL_INTEGER); | |
427 $sth->bind_param(4,$cmp_cs_id,SQL_INTEGER); | |
428 | |
429 $sth->execute(); | |
430 | |
431 my($cmp_start, $cmp_end, $cmp_seq_region_id, $cmp_seq_region, $ori, | |
432 $asm_start, $asm_end, $cmp_seq_region_length); | |
433 | |
434 $sth->bind_columns(\$cmp_start, \$cmp_end, \$cmp_seq_region_id, | |
435 \$cmp_seq_region, \$cmp_seq_region_length, \$ori, | |
436 \$asm_start, \$asm_end); | |
437 | |
438 # | |
439 # Load the unregistered regions of the mapper | |
440 # | |
441 while($sth->fetch()) { | |
442 next if($asm_mapper->have_registered_component($cmp_seq_region_id) | |
443 and !defined($self->{'multi_seq_ids'}->{$cmp_seq_region_id})); | |
444 $asm_mapper->register_component($cmp_seq_region_id); | |
445 $asm_mapper->mapper->add_map_coordinates( | |
446 $asm_seq_region, $asm_start, $asm_end, | |
447 $ori, | |
448 $cmp_seq_region_id, $cmp_start, $cmp_end); | |
449 | |
450 my $arr = [ $cmp_seq_region_id, $cmp_seq_region, | |
451 $cmp_cs_id, $cmp_seq_region_length ]; | |
452 | |
453 $self->{'sr_name_cache'}->{"$cmp_seq_region:$cmp_cs_id"} = $arr; | |
454 $self->{'sr_id_cache'}->{"$cmp_seq_region_id"} = $arr; | |
455 } | |
456 } | |
457 | |
458 $sth->finish(); | |
459 } | |
460 | |
461 | |
462 | |
463 sub _seq_region_name_to_id { | |
464 my $self = shift; | |
465 my $sr_name = shift; | |
466 my $cs_id = shift; | |
467 | |
468 ($sr_name && $cs_id) || throw('seq_region_name and coord_system_id args ' . | |
469 'are required'); | |
470 | |
471 my $arr = $self->{'sr_name_cache'}->{"$sr_name:$cs_id"}; | |
472 if( $arr ) { | |
473 return $arr->[0]; | |
474 } | |
475 | |
476 # Get the seq_region_id via the name. This would be quicker if we just | |
477 # used internal ids instead but stored but then we lose the ability | |
478 # the transform accross databases with different internal ids | |
479 | |
480 my $sth = $self->prepare("SELECT seq_region_id, length " . | |
481 "FROM seq_region " . | |
482 "WHERE name = ? AND coord_system_id = ?"); | |
483 | |
484 $sth->bind_param(1,$sr_name,SQL_VARCHAR); | |
485 $sth->bind_param(2,$cs_id,SQL_INTEGER); | |
486 $sth->execute(); | |
487 | |
488 if(!$sth->rows() == 1) { | |
489 throw("Ambiguous or non-existant seq_region [$sr_name] " . | |
490 "in coord system $cs_id"); | |
491 } | |
492 | |
493 my ($sr_id, $sr_length) = $sth->fetchrow_array(); | |
494 $sth->finish(); | |
495 | |
496 $arr = [ $sr_id, $sr_name, $cs_id, $sr_length ]; | |
497 | |
498 $self->{'sr_name_cache'}->{"$sr_name:$cs_id"} = $arr; | |
499 $self->{'sr_id_cache'}->{"$sr_id"} = $arr; | |
500 | |
501 return $sr_id; | |
502 } | |
503 | |
504 sub _seq_region_id_to_name { | |
505 my $self = shift; | |
506 my $sr_id = shift; | |
507 | |
508 ($sr_id) || throw('seq_region_is required'); | |
509 | |
510 my $arr = $self->{'sr_id_cache'}->{"$sr_id"}; | |
511 if( $arr ) { | |
512 return $arr->[1]; | |
513 } | |
514 | |
515 # Get the seq_region name via the id. This would be quicker if we just | |
516 # used internal ids instead but stored but then we lose the ability | |
517 # the transform accross databases with different internal ids | |
518 | |
519 my $sth = $self->prepare("SELECT name, length ,coord_system_id " . | |
520 "FROM seq_region " . | |
521 "WHERE seq_region_id = ? "); | |
522 | |
523 $sth->bind_param(1,$sr_id,SQL_INTEGER); | |
524 $sth->execute(); | |
525 | |
526 if(!$sth->rows() == 1) { | |
527 throw("non-existant seq_region [$sr_id]"); | |
528 } | |
529 | |
530 my ($sr_name, $sr_length, $cs_id) = $sth->fetchrow_array(); | |
531 $sth->finish(); | |
532 | |
533 $arr = [ $sr_id, $sr_name, $cs_id, $sr_length ]; | |
534 | |
535 $self->{'sr_name_cache'}->{"$sr_name:$cs_id"} = $arr; | |
536 $self->{'sr_id_cache'}->{"$sr_id"} = $arr; | |
537 | |
538 return $sr_name; | |
539 } | |
540 | |
541 | |
542 =head2 register_component | |
543 | |
544 Arg [1] : Bio::EnsEMBL::AssemblyMapper $asm_mapper | |
545 A valid AssemblyMapper object | |
546 Arg [2] : integer $cmp_seq_region | |
547 The dbID of the seq_region to be registered | |
548 Description: Declares a component region to the AssemblyMapper. | |
549 This extracts the relevant data from the assembly | |
550 table and stores it in Mapper internal to the $asm_mapper. | |
551 It therefore must be called before any mapping is | |
552 attempted on that region. Otherwise only gaps will | |
553 be returned. Note that the AssemblyMapper automatically | |
554 calls this method when the need arises. | |
555 Returntype : none | |
556 Exceptions : throw if the seq_region to be registered does not exist | |
557 or if it associated with multiple assembled pieces (bad data | |
558 in assembly table) | |
559 Caller : Bio::EnsEMBL::AssemblyMapper | |
560 Status : Stable | |
561 | |
562 =cut | |
563 | |
564 sub register_component { | |
565 my $self = shift; | |
566 my $asm_mapper = shift; | |
567 my $cmp_seq_region = shift; | |
568 | |
569 if(!ref($asm_mapper) || !$asm_mapper->isa('Bio::EnsEMBL::AssemblyMapper')) { | |
570 throw("Bio::EnsEMBL::AssemblyMapper argument expected"); | |
571 } | |
572 | |
573 if(!defined($cmp_seq_region)) { | |
574 throw("cmp_seq_region argument expected"); | |
575 } | |
576 | |
577 my $cmp_cs_id = $asm_mapper->component_CoordSystem()->dbID(); | |
578 my $asm_cs_id = $asm_mapper->assembled_CoordSystem()->dbID(); | |
579 | |
580 #do nothing if this region is already registered or special case | |
581 return if($asm_mapper->have_registered_component($cmp_seq_region) | |
582 and !defined($self->{'multi_seq_ids'}->{$cmp_seq_region})); | |
583 | |
584 # my $cmp_seq_region_id = | |
585 # $self->_seq_region_name_to_id($cmp_seq_region, $cmp_cs_id); | |
586 | |
587 # Determine what part of the assembled region this component region makes up | |
588 | |
589 my $q = qq{ | |
590 SELECT | |
591 asm.asm_start, | |
592 asm.asm_end, | |
593 asm.asm_seq_region_id, | |
594 sr.name, | |
595 sr.length | |
596 FROM | |
597 assembly asm, seq_region sr | |
598 WHERE | |
599 asm.cmp_seq_region_id = ? AND | |
600 asm.asm_seq_region_id = sr.seq_region_id AND | |
601 sr.coord_system_id = ? | |
602 }; | |
603 | |
604 my $sth = $self->prepare($q); | |
605 $sth->bind_param(1,$cmp_seq_region,SQL_INTEGER); | |
606 $sth->bind_param(2,$asm_cs_id,SQL_INTEGER); | |
607 $sth->execute(); | |
608 | |
609 if($sth->rows() == 0) { | |
610 #this component is not used in the assembled part i.e. gap | |
611 $asm_mapper->register_component($cmp_seq_region); | |
612 return; | |
613 } | |
614 | |
615 #we do not currently support components mapping to multiple assembled | |
616 # make sure that you've got the correct mapping in the meta-table : | |
617 # chromosome:EquCab2#contig ( use'#' for multiple mappings ) | |
618 # chromosome:EquCab2|contig ( use '|' delimiter for 1-1 mappings ) | |
619 # | |
620 if($sth->rows() != 1) { | |
621 throw("Multiple assembled regions for single " . | |
622 "component region cmp_seq_region_id=[$cmp_seq_region]\n". | |
623 "Remember that multiple mappings use the \#-operaator". | |
624 " in the meta-table (i.e. chromosome:EquCab2\#contig\n"); | |
625 } | |
626 | |
627 my ($asm_start, $asm_end, $asm_seq_region_id, | |
628 $asm_seq_region, $asm_seq_region_length) = $sth->fetchrow_array(); | |
629 | |
630 my $arr = [ $asm_seq_region_id, $asm_seq_region, | |
631 $asm_cs_id, $asm_seq_region_length ]; | |
632 | |
633 $self->{'sr_name_cache'}->{"$asm_seq_region:$asm_cs_id"} = $arr; | |
634 $self->{'sr_id_cache'}->{"$asm_seq_region_id"} = $arr; | |
635 | |
636 $sth->finish(); | |
637 | |
638 # Register the corresponding assembled region. This allows a us to | |
639 # register things in assembled chunks which allows us to: | |
640 # (1) Keep track of what assembled regions are registered | |
641 # (2) Use locality of reference (if they want something in same general | |
642 # region it will already be registered). | |
643 | |
644 $self->register_assembled($asm_mapper,$asm_seq_region_id,$asm_start,$asm_end); | |
645 } | |
646 | |
647 | |
648 | |
649 =head2 register_chained | |
650 | |
651 Arg [1] : Bio::EnsEMBL::ChainedAssemblyMapper $casm_mapper | |
652 The chained assembly mapper to register regions on | |
653 Arg [2] : string $from ('first' or 'last') | |
654 The direction we are registering from, and the name of the | |
655 internal mapper. | |
656 Arg [3] : string $seq_region_name | |
657 The name of the seqregion we are registering on | |
658 Arg [4] : listref $ranges | |
659 A list of ranges to register (in [$start,$end] tuples). | |
660 Arg [5] : (optional) $to_slice | |
661 Only register those on this Slice. | |
662 Description: Registers a set of ranges on a chained assembly mapper. | |
663 This function is at the heart of the chained mapping process. | |
664 It retrieves information from the assembly table and | |
665 dynamically constructs the mappings between two coordinate | |
666 systems which are 2 mapping steps apart. It does this by using | |
667 two internal mappers to load up a third mapper which is | |
668 actually used by the ChainedAssemblyMapper to perform the | |
669 mapping. | |
670 | |
671 This method must be called before any mapping is | |
672 attempted on regions of interest, otherwise only gaps will | |
673 be returned. Note that the ChainedAssemblyMapper automatically | |
674 calls this method when the need arises. | |
675 Returntype : none | |
676 Exceptions : throw if the seq_region to be registered does not exist | |
677 or if it associated with multiple assembled pieces (bad data | |
678 in assembly table) | |
679 | |
680 throw if the mapping between the coordinate systems cannot | |
681 be performed in two steps, which means there is an internal | |
682 error in the data in the meta table or in the code that creates | |
683 the mapping paths. | |
684 Caller : Bio::EnsEMBL::AssemblyMapper | |
685 Status : Stable | |
686 | |
687 =cut | |
688 | |
689 sub register_chained { | |
690 my $self = shift; | |
691 my $casm_mapper = shift; | |
692 my $from = shift; | |
693 my $seq_region_id = shift; | |
694 my $ranges = shift; | |
695 my $to_slice = shift; | |
696 | |
697 my $to_seq_region_id; | |
698 if(defined($to_slice)){ | |
699 if($casm_mapper->first_CoordSystem()->equals($casm_mapper->last_CoordSystem())){ | |
700 return $self->_register_chained_special($casm_mapper, $from, $seq_region_id, $ranges, $to_slice); | |
701 } | |
702 $to_seq_region_id = $to_slice->get_seq_region_id(); | |
703 if(!defined($to_seq_region_id)){ | |
704 die "Could not get seq_region_id for to_slice".$to_slice->seq_region_name."\n"; | |
705 } | |
706 } | |
707 | |
708 my ($start_name, $start_mid_mapper, $start_cs, $start_registry, | |
709 $end_name, $end_mid_mapper, $end_cs, $end_registry); | |
710 | |
711 if($from eq 'first') { | |
712 $start_name = 'first'; | |
713 $start_mid_mapper = $casm_mapper->first_middle_mapper(); | |
714 $start_cs = $casm_mapper->first_CoordSystem(); | |
715 $start_registry = $casm_mapper->first_registry(); | |
716 $end_mid_mapper = $casm_mapper->last_middle_mapper(); | |
717 $end_cs = $casm_mapper->last_CoordSystem(); | |
718 $end_registry = $casm_mapper->last_registry(); | |
719 $end_name = 'last'; | |
720 } elsif($from eq 'last') { | |
721 $start_name = 'last'; | |
722 $start_mid_mapper = $casm_mapper->last_middle_mapper(); | |
723 $start_cs = $casm_mapper->last_CoordSystem(); | |
724 $start_registry = $casm_mapper->last_registry(); | |
725 $end_mid_mapper = $casm_mapper->first_middle_mapper(); | |
726 $end_cs = $casm_mapper->first_CoordSystem(); | |
727 $end_registry = $casm_mapper->first_registry(); | |
728 $end_name = 'first'; | |
729 } else { | |
730 throw("Invalid from argument: [$from], must be 'first' or 'last'"); | |
731 } | |
732 | |
733 my $combined_mapper = $casm_mapper->first_last_mapper(); | |
734 my $mid_cs = $casm_mapper->middle_CoordSystem(); | |
735 my $mid_name = 'middle'; | |
736 my $csa = $self->db->get_CoordSystemAdaptor(); | |
737 | |
738 # Check for the simple case where the ChainedMapper is short | |
739 if( ! defined $mid_cs ) { | |
740 $start_mid_mapper = $combined_mapper; | |
741 } | |
742 | |
743 | |
744 ############## | |
745 # obtain the first half of the mappings and load them into the start mapper | |
746 # | |
747 | |
748 #ascertain which is component and which is actually assembled coord system | |
749 my @path; | |
750 | |
751 # check for the simple case, where the ChainedMapper is short | |
752 if( defined $mid_cs ) { | |
753 @path = @{$csa->get_mapping_path($start_cs, $mid_cs)}; | |
754 } else { | |
755 @path = @{$csa->get_mapping_path( $start_cs, $end_cs )}; | |
756 } | |
757 | |
758 if(@path != 2 && defined( $path[1] )) { | |
759 my $path = join(',', map({$_->name .' '. $_->version} @path)); | |
760 my $len = scalar(@path) - 1; | |
761 throw("Unexpected mapping path between start and intermediate " . | |
762 "coord systems (". $start_cs->name . " " . $start_cs->version . | |
763 " and " . $mid_cs->name . " " . $mid_cs->version . ")." . | |
764 "\nExpected path length 1, got $len. " . | |
765 "(path=$path)"); | |
766 } | |
767 | |
768 my $sth; | |
769 my ($asm_cs,$cmp_cs); | |
770 $asm_cs = $path[0]; | |
771 $cmp_cs = $path[-1]; | |
772 | |
773 #the SQL varies depending on whether we are coming from assembled or | |
774 #component coordinate system | |
775 | |
776 my $asm2cmp = (<<ASMCMP); | |
777 SELECT | |
778 asm.cmp_start, | |
779 asm.cmp_end, | |
780 asm.cmp_seq_region_id, | |
781 sr.name, | |
782 sr.length, | |
783 asm.ori, | |
784 asm.asm_start, | |
785 asm.asm_end | |
786 FROM | |
787 assembly asm, seq_region sr | |
788 WHERE | |
789 asm.asm_seq_region_id = ? AND | |
790 ? <= asm.asm_end AND | |
791 ? >= asm.asm_start AND | |
792 asm.cmp_seq_region_id = sr.seq_region_id AND | |
793 sr.coord_system_id = ? | |
794 ASMCMP | |
795 | |
796 | |
797 my $cmp2asm = (<<CMPASM); | |
798 SELECT | |
799 asm.asm_start, | |
800 asm.asm_end, | |
801 asm.asm_seq_region_id, | |
802 sr.name, | |
803 sr.length, | |
804 asm.ori, | |
805 asm.cmp_start, | |
806 asm.cmp_end | |
807 FROM | |
808 assembly asm, seq_region sr | |
809 WHERE | |
810 asm.cmp_seq_region_id = ? AND | |
811 ? <= asm.cmp_end AND | |
812 ? >= asm.cmp_start AND | |
813 asm.asm_seq_region_id = sr.seq_region_id AND | |
814 sr.coord_system_id = ? | |
815 CMPASM | |
816 | |
817 my $asm2cmp_sth; | |
818 my $cmp2asm_sth; | |
819 if(defined($to_slice)){ | |
820 my $to_cs = $to_slice->coord_system; | |
821 if($asm_cs->equals($to_cs)){ | |
822 $asm2cmp_sth = $self->prepare($asm2cmp); | |
823 $cmp2asm_sth = $self->prepare($cmp2asm." AND asm.asm_seq_region_id = $to_seq_region_id"); | |
824 } | |
825 elsif($cmp_cs->equals($to_cs)){ | |
826 $asm2cmp_sth = $self->prepare($asm2cmp." AND asm.cmp_seq_region_id = $to_seq_region_id"); | |
827 $cmp2asm_sth = $self->prepare($cmp2asm); | |
828 } | |
829 else{ | |
830 $asm2cmp_sth = $self->prepare($asm2cmp); | |
831 $cmp2asm_sth = $self->prepare($cmp2asm); | |
832 } | |
833 } | |
834 else{ | |
835 $asm2cmp_sth = $self->prepare($asm2cmp); | |
836 $cmp2asm_sth = $self->prepare($cmp2asm); | |
837 } | |
838 | |
839 | |
840 | |
841 $sth = ($asm_cs->equals($start_cs)) ? $asm2cmp_sth : $cmp2asm_sth; | |
842 | |
843 my $mid_cs_id; | |
844 | |
845 # check for the simple case where the ChainedMapper is short | |
846 if( defined $mid_cs ) { | |
847 $mid_cs_id = $mid_cs->dbID(); | |
848 } else { | |
849 $mid_cs_id = $end_cs->dbID(); | |
850 } | |
851 | |
852 my @mid_ranges; | |
853 my @start_ranges; | |
854 | |
855 #need to perform the query for each unregistered range | |
856 foreach my $range (@$ranges) { | |
857 my ($start, $end) = @$range; | |
858 $sth->bind_param(1,$seq_region_id,SQL_INTEGER); | |
859 $sth->bind_param(2,$start,SQL_INTEGER); | |
860 $sth->bind_param(3,$end,SQL_INTEGER); | |
861 $sth->bind_param(4,$mid_cs_id,SQL_INTEGER); | |
862 $sth->execute(); | |
863 | |
864 #load the start <-> mid mapper with the results and record the mid cs | |
865 #ranges we just added to the mapper | |
866 | |
867 my ($mid_start, $mid_end, $mid_seq_region_id, $mid_seq_region, $mid_length, | |
868 $ori, $start_start, $start_end); | |
869 | |
870 $sth->bind_columns(\$mid_start, \$mid_end, \$mid_seq_region_id, | |
871 \$mid_seq_region, \$mid_length, \$ori, \$start_start, | |
872 \$start_end); | |
873 | |
874 while($sth->fetch()) { | |
875 | |
876 if( defined $mid_cs ) { | |
877 $start_mid_mapper->add_map_coordinates | |
878 ( | |
879 $seq_region_id,$start_start, $start_end, $ori, | |
880 $mid_seq_region_id, $mid_start, $mid_end | |
881 ); | |
882 } else { | |
883 if( $from eq "first" ) { | |
884 $combined_mapper->add_map_coordinates | |
885 ( | |
886 $seq_region_id,$start_start, $start_end, $ori, | |
887 $mid_seq_region_id, $mid_start, $mid_end | |
888 ); | |
889 } else { | |
890 $combined_mapper->add_map_coordinates | |
891 ( | |
892 $mid_seq_region_id, $mid_start, $mid_end, $ori, | |
893 $seq_region_id,$start_start, $start_end | |
894 ); | |
895 } | |
896 } | |
897 | |
898 #update sr_name cache | |
899 my $arr = [ $mid_seq_region_id, $mid_seq_region, | |
900 $mid_cs_id, $mid_length ]; | |
901 | |
902 $self->{'sr_name_cache'}->{"$mid_seq_region:$mid_cs_id"} = $arr; | |
903 $self->{'sr_id_cache'}->{"$mid_seq_region_id"} = $arr; | |
904 | |
905 push @mid_ranges,[$mid_seq_region_id,$mid_seq_region, | |
906 $mid_start,$mid_end]; | |
907 push @start_ranges, [ $seq_region_id, $start_start, $start_end ]; | |
908 | |
909 #the region that we actually register may actually be larger or smaller | |
910 #than the region that we wanted to register. | |
911 #register the intersection of the region so we do not end up doing | |
912 #extra work later | |
913 | |
914 if($start_start < $start || $start_end > $end) { | |
915 $start_registry->check_and_register($seq_region_id,$start_start, | |
916 $start_end); | |
917 } | |
918 } | |
919 $sth->finish(); | |
920 } | |
921 | |
922 # in the one step case, we load the mid ranges in the | |
923 # last_registry and we are done | |
924 if( ! defined $mid_cs ) { | |
925 for my $range ( @mid_ranges ) { | |
926 $end_registry->check_and_register( $range->[0], $range->[2], | |
927 $range->[3] ); | |
928 } | |
929 | |
930 # and thats it for the simple case ... | |
931 return; | |
932 } | |
933 | |
934 | |
935 ########### | |
936 # now the second half of the mapping | |
937 # perform another query and load the mid <-> end mapper using the mid cs | |
938 # ranges | |
939 # | |
940 | |
941 #ascertain which is component and which is actually assembled coord system | |
942 @path = @{$csa->get_mapping_path($mid_cs, $end_cs)}; | |
943 if(@path == 2 || ( @path == 3 && !defined $path[1])) { | |
944 | |
945 $asm_cs = $path[0]; | |
946 $cmp_cs = $path[-1]; | |
947 } else { | |
948 my $path = join(',', map({$_->name .' '. $_->version} @path)); | |
949 my $len = scalar(@path)-1; | |
950 throw("Unexpected mapping path between intermediate and last" . | |
951 "coord systems (". $mid_cs->name . " " . $mid_cs->version . | |
952 " and " . $end_cs->name . " " . $end_cs->version . ")." . | |
953 "\nExpected path length 1, got $len. " . | |
954 "(path=$path)"); | |
955 } | |
956 | |
957 if(defined($to_slice)){ | |
958 my $to_cs = $to_slice->coord_system; | |
959 if($asm_cs->equals($to_cs)){ | |
960 $asm2cmp_sth = $self->prepare($asm2cmp); | |
961 $cmp2asm_sth = $self->prepare($cmp2asm." AND asm.asm_seq_region_id = $to_seq_region_id"); | |
962 } | |
963 elsif($cmp_cs->equals($to_cs)){ | |
964 $asm2cmp_sth = $self->prepare($asm2cmp." AND asm.cmp_seq_region_id = $to_seq_region_id"); | |
965 $cmp2asm_sth = $self->prepare($cmp2asm); | |
966 } | |
967 else{ | |
968 $asm2cmp_sth = $self->prepare($asm2cmp); | |
969 $cmp2asm_sth = $self->prepare($cmp2asm); | |
970 } | |
971 } | |
972 | |
973 $sth = ($asm_cs->equals($mid_cs)) ? $asm2cmp_sth : $cmp2asm_sth; | |
974 | |
975 my $end_cs_id = $end_cs->dbID(); | |
976 foreach my $mid_range (@mid_ranges) { | |
977 my ($mid_seq_region_id, $mid_seq_region,$start, $end) = @$mid_range; | |
978 $sth->bind_param(1,$mid_seq_region_id,SQL_INTEGER); | |
979 $sth->bind_param(2,$start,SQL_INTEGER); | |
980 $sth->bind_param(3,$end,SQL_INTEGER); | |
981 $sth->bind_param(4,$end_cs_id,SQL_INTEGER); | |
982 $sth->execute(); | |
983 | |
984 #load the end <-> mid mapper with the results and record the mid cs | |
985 #ranges we just added to the mapper | |
986 | |
987 my ($end_start, $end_end, $end_seq_region_id, $end_seq_region, $end_length, | |
988 $ori, $mid_start, $mid_end); | |
989 | |
990 $sth->bind_columns(\$end_start, \$end_end, \$end_seq_region_id, | |
991 \$end_seq_region, \$end_length, \$ori, \$mid_start, | |
992 \$mid_end); | |
993 | |
994 while($sth->fetch()) { | |
995 $end_mid_mapper->add_map_coordinates | |
996 ( | |
997 $end_seq_region_id, $end_start, $end_end, $ori, | |
998 $mid_seq_region_id, $mid_start, $mid_end | |
999 ); | |
1000 | |
1001 #update sr_name cache | |
1002 my $arr = [ $end_seq_region_id,$end_seq_region,$end_cs_id,$end_length ]; | |
1003 | |
1004 $self->{'sr_name_cache'}->{"$end_seq_region:$end_cs_id"} = $arr; | |
1005 $self->{'sr_id_cache'}->{"$end_seq_region_id"} = $arr; | |
1006 | |
1007 #register this region on the end coord system | |
1008 $end_registry->check_and_register($end_seq_region_id, $end_start, $end_end); | |
1009 } | |
1010 $sth->finish(); | |
1011 } | |
1012 | |
1013 ######### | |
1014 # Now that both halves are loaded | |
1015 # Do stepwise mapping using both of the loaded mappers to load | |
1016 # the final start <-> end mapper | |
1017 # | |
1018 | |
1019 _build_combined_mapper(\@start_ranges, $start_mid_mapper, $end_mid_mapper, | |
1020 $combined_mapper, $start_name); | |
1021 #all done! | |
1022 return; | |
1023 } | |
1024 | |
1025 | |
1026 =head2 _register_chained_special | |
1027 | |
1028 Arg [1] : Bio::EnsEMBL::ChainedAssemblyMapper $casm_mapper | |
1029 The chained assembly mapper to register regions on | |
1030 Arg [2] : string $from ('first' or 'last') | |
1031 The direction we are registering from, and the name of the | |
1032 internal mapper. | |
1033 Arg [3] : string $seq_region_name | |
1034 The name of the seqregion we are registering on | |
1035 Arg [4] : listref $ranges | |
1036 A list of ranges to register (in [$start,$end] tuples). | |
1037 Arg [5] : (optional) $to_slice | |
1038 Only register those on this Slice. | |
1039 Description: Registers a set of ranges on a chained assembly mapper. | |
1040 This function is at the heart of the chained mapping process. | |
1041 It retrieves information from the assembly table and | |
1042 dynamically constructs the mappings between two coordinate | |
1043 systems which are 2 mapping steps apart. It does this by using | |
1044 two internal mappers to load up a third mapper which is | |
1045 actually used by the ChainedAssemblyMapper to perform the | |
1046 mapping. | |
1047 | |
1048 This method must be called before any mapping is | |
1049 attempted on regions of interest, otherwise only gaps will | |
1050 be returned. Note that the ChainedAssemblyMapper automatically | |
1051 calls this method when the need arises. | |
1052 Returntype : none | |
1053 Exceptions : throw if the seq_region to be registered does not exist | |
1054 or if it associated with multiple assembled pieces (bad data | |
1055 in assembly table) | |
1056 | |
1057 throw if the mapping between the coordinate systems cannot | |
1058 be performed in two steps, which means there is an internal | |
1059 error in the data in the meta table or in the code that creates | |
1060 the mapping paths. | |
1061 Caller : Bio::EnsEMBL::AssemblyMapper | |
1062 Status : Stable | |
1063 | |
1064 =cut | |
1065 | |
1066 sub _register_chained_special { | |
1067 my $self = shift; | |
1068 my $casm_mapper = shift; | |
1069 my $from = shift; | |
1070 my $seq_region_id = shift; | |
1071 my $ranges = shift; | |
1072 my $to_slice = shift; | |
1073 my $found = 0; | |
1074 | |
1075 my $sth = $self->prepare("SELECT | |
1076 asm.cmp_start, | |
1077 asm.cmp_end, | |
1078 asm.cmp_seq_region_id, | |
1079 sr.name, | |
1080 sr.length, | |
1081 asm.ori, | |
1082 asm.asm_start, | |
1083 asm.asm_end | |
1084 FROM | |
1085 assembly asm, seq_region sr | |
1086 WHERE | |
1087 asm.asm_seq_region_id = ? AND | |
1088 ? <= asm.asm_end AND | |
1089 ? >= asm.asm_start AND | |
1090 asm.cmp_seq_region_id = sr.seq_region_id AND | |
1091 sr.coord_system_id = ? AND | |
1092 asm.cmp_seq_region_id = ?"); | |
1093 | |
1094 | |
1095 my ($start_name, $start_mid_mapper, $start_cs, $start_registry, | |
1096 $end_name, $end_mid_mapper, $end_cs, $end_registry); | |
1097 | |
1098 if($from eq 'first') { | |
1099 $start_name = 'first'; | |
1100 $start_mid_mapper = $casm_mapper->first_middle_mapper(); | |
1101 $start_cs = $casm_mapper->first_CoordSystem(); | |
1102 $start_registry = $casm_mapper->first_registry(); | |
1103 $end_mid_mapper = $casm_mapper->last_middle_mapper(); | |
1104 $end_cs = $casm_mapper->last_CoordSystem(); | |
1105 $end_registry = $casm_mapper->last_registry(); | |
1106 $end_name = 'last'; | |
1107 } elsif($from eq 'last') { | |
1108 $start_name = 'last'; | |
1109 $start_mid_mapper = $casm_mapper->last_middle_mapper(); | |
1110 $start_cs = $casm_mapper->last_CoordSystem(); | |
1111 $start_registry = $casm_mapper->last_registry(); | |
1112 $end_mid_mapper = $casm_mapper->first_middle_mapper(); | |
1113 $end_cs = $casm_mapper->first_CoordSystem(); | |
1114 $end_registry = $casm_mapper->first_registry(); | |
1115 $end_name = 'first'; | |
1116 } else { | |
1117 throw("Invalid from argument: [$from], must be 'first' or 'last'"); | |
1118 } | |
1119 | |
1120 my $combined_mapper = $casm_mapper->first_last_mapper(); | |
1121 my $mid_cs = $casm_mapper->middle_CoordSystem(); | |
1122 my $mid_name = 'middle'; | |
1123 my $csa = $self->db->get_CoordSystemAdaptor(); | |
1124 | |
1125 # Check for the simple case where the ChainedMapper is short | |
1126 if( ! defined $mid_cs ) { | |
1127 $start_mid_mapper = $combined_mapper; | |
1128 } | |
1129 | |
1130 | |
1131 my @path; | |
1132 if( defined $mid_cs ) { | |
1133 @path = @{$csa->get_mapping_path($start_cs, $mid_cs)}; | |
1134 } else { | |
1135 @path = @{$csa->get_mapping_path( $start_cs, $end_cs )}; | |
1136 } | |
1137 if( ! defined $mid_cs ) { | |
1138 $start_mid_mapper = $combined_mapper; | |
1139 } | |
1140 | |
1141 if(@path != 2 && defined( $path[1] )) { | |
1142 my $path = join(',', map({$_->name .' '. $_->version} @path)); | |
1143 my $len = scalar(@path) - 1; | |
1144 throw("Unexpected mapping path between start and intermediate " . | |
1145 "coord systems (". $start_cs->name . " " . $start_cs->version . | |
1146 " and " . $mid_cs->name . " " . $mid_cs->version . ")." . | |
1147 "\nExpected path length 1, got $len. " . | |
1148 "(path=$path)"); | |
1149 } | |
1150 | |
1151 my ($asm_cs,$cmp_cs); | |
1152 $asm_cs = $path[0]; | |
1153 $cmp_cs = $path[-1]; | |
1154 | |
1155 $combined_mapper = $casm_mapper->first_last_mapper(); | |
1156 $mid_cs = $casm_mapper->middle_CoordSystem(); | |
1157 $mid_name = 'middle'; | |
1158 $csa = $self->db->get_CoordSystemAdaptor(); | |
1159 | |
1160 my $mid_cs_id; | |
1161 | |
1162 # Check for the simple case where the ChainedMapper is short | |
1163 if ( !defined $mid_cs ) { | |
1164 $start_mid_mapper = $combined_mapper; | |
1165 } else { | |
1166 $mid_cs_id = $mid_cs->dbID(); | |
1167 } | |
1168 | |
1169 my @mid_ranges; | |
1170 my @start_ranges; | |
1171 | |
1172 my $to_cs = $to_slice->coord_system; | |
1173 foreach my $direction (1, 0){ | |
1174 my $id1; | |
1175 my $id2; | |
1176 if($direction){ | |
1177 $id1 = $seq_region_id; | |
1178 $id2 = $to_slice->get_seq_region_id(); | |
1179 } | |
1180 else{ | |
1181 $id2 = $seq_region_id; | |
1182 $id1 = $to_slice->get_seq_region_id(); | |
1183 } | |
1184 | |
1185 foreach my $range (@$ranges) { | |
1186 my ($start, $end) = @$range; | |
1187 $sth->bind_param(1,$id1,SQL_INTEGER); | |
1188 $sth->bind_param(2,$start,SQL_INTEGER); | |
1189 $sth->bind_param(3,$end,SQL_INTEGER); | |
1190 $sth->bind_param(4,$to_cs->dbID,SQL_INTEGER); | |
1191 $sth->bind_param(5,$id2,SQL_INTEGER); | |
1192 $sth->execute(); | |
1193 | |
1194 my ($mid_start, $mid_end, $mid_seq_region_id, $mid_seq_region, $mid_length, | |
1195 $ori, $start_start, $start_end); | |
1196 | |
1197 $sth->bind_columns(\$mid_start, \$mid_end, \$mid_seq_region_id, | |
1198 \$mid_seq_region, \$mid_length, \$ori, \$start_start, | |
1199 \$start_end); | |
1200 | |
1201 while($sth->fetch()) { | |
1202 $found = 1; | |
1203 | |
1204 if( defined $mid_cs ) { | |
1205 $start_mid_mapper->add_map_coordinates | |
1206 ( | |
1207 $id1,$start_start, $start_end, $ori, | |
1208 $mid_seq_region_id, $mid_start, $mid_end | |
1209 ); | |
1210 } else { | |
1211 if( $from eq "first") { | |
1212 if($direction){ | |
1213 $combined_mapper->add_map_coordinates | |
1214 ( | |
1215 $id1,$start_start, $start_end, $ori, | |
1216 $mid_seq_region_id, $mid_start, $mid_end | |
1217 ); | |
1218 } | |
1219 else{ | |
1220 $combined_mapper->add_map_coordinates | |
1221 ( | |
1222 $mid_seq_region_id, $mid_start, $mid_end, $ori, | |
1223 $id1,$start_start, $start_end | |
1224 ); | |
1225 } | |
1226 } else { | |
1227 if($direction){ | |
1228 $combined_mapper->add_map_coordinates | |
1229 ( | |
1230 $mid_seq_region_id, $mid_start, $mid_end, $ori, | |
1231 $id1,$start_start, $start_end | |
1232 ); | |
1233 } | |
1234 else{ | |
1235 $combined_mapper->add_map_coordinates | |
1236 ( | |
1237 $id1,$start_start, $start_end, $ori, | |
1238 $mid_seq_region_id, $mid_start, $mid_end | |
1239 ); | |
1240 } | |
1241 } | |
1242 } | |
1243 | |
1244 #update sr_name cache | |
1245 my $arr = [ $mid_seq_region_id, $mid_seq_region, | |
1246 $mid_cs_id, $mid_length ]; | |
1247 | |
1248 $self->{'sr_name_cache'}->{"$mid_seq_region:$mid_cs_id"} = $arr; | |
1249 $self->{'sr_id_cache'}->{"$mid_seq_region_id"} = $arr; | |
1250 | |
1251 push @mid_ranges,[$mid_seq_region_id,$mid_seq_region, | |
1252 $mid_start,$mid_end]; | |
1253 push @start_ranges, [ $id1, $start_start, $start_end ]; | |
1254 | |
1255 #the region that we actually register may actually be larger or smaller | |
1256 #than the region that we wanted to register. | |
1257 #register the intersection of the region so we do not end up doing | |
1258 #extra work later | |
1259 | |
1260 if($start_start < $start || $start_end > $end) { | |
1261 $start_registry->check_and_register($id1,$start_start, | |
1262 $start_end); | |
1263 } | |
1264 } | |
1265 $sth->finish(); | |
1266 } | |
1267 if($found){ | |
1268 if( ! defined $mid_cs ) { | |
1269 for my $range ( @mid_ranges ) { | |
1270 $end_registry->check_and_register( $range->[0], $range->[2], | |
1271 $range->[3] ); | |
1272 } | |
1273 | |
1274 # and thats it for the simple case ... | |
1275 return; | |
1276 } | |
1277 } | |
1278 } | |
1279 } | |
1280 | |
1281 | |
1282 =head2 register_all | |
1283 | |
1284 Arg [1] : Bio::EnsEMBL::AssemblyMapper $mapper | |
1285 Example : $mapper = $asm_mapper_adaptor->fetch_by_CoordSystems($cs1,$cs2); | |
1286 | |
1287 # make cache large enough to hold all of the mappings | |
1288 $mapper->max_pair_count(10e6); | |
1289 $asm_mapper_adaptor->register_all($mapper); | |
1290 | |
1291 # perform mappings as normal | |
1292 $mapper->map($slice->seq_region_name(), $sr_start, $sr_end, | |
1293 $sr_strand, $cs1); | |
1294 ... | |
1295 Description: This function registers the entire set of mappings between | |
1296 two coordinate systems in an assembly mapper. | |
1297 This will use a lot of memory but will be much more efficient | |
1298 when doing a lot of mapping which is spread over the entire | |
1299 genome. | |
1300 Returntype : none | |
1301 Exceptions : none | |
1302 Caller : specialised prograhsm | |
1303 Status : Stable | |
1304 | |
1305 =cut | |
1306 | |
1307 sub register_all { | |
1308 my $self = shift; | |
1309 my $mapper = shift; | |
1310 | |
1311 my $asm_cs_id = $mapper->assembled_CoordSystem()->dbID(); | |
1312 my $cmp_cs_id = $mapper->component_CoordSystem()->dbID(); | |
1313 | |
1314 # retrieve every relevant assembled/component pair from the assembly table | |
1315 | |
1316 my $q = qq{ | |
1317 SELECT | |
1318 asm.cmp_start, | |
1319 asm.cmp_end, | |
1320 asm.cmp_seq_region_id, | |
1321 cmp_sr.name, | |
1322 cmp_sr.length, | |
1323 asm.ori, | |
1324 asm.asm_start, | |
1325 asm.asm_end, | |
1326 asm.asm_seq_region_id, | |
1327 asm_sr.name, | |
1328 asm_sr.length | |
1329 FROM | |
1330 assembly asm, seq_region asm_sr, seq_region cmp_sr | |
1331 WHERE | |
1332 asm.cmp_seq_region_id = cmp_sr.seq_region_id AND | |
1333 asm.asm_seq_region_id = asm_sr.seq_region_id AND | |
1334 cmp_sr.coord_system_id = ? AND | |
1335 asm_sr.coord_system_id = ? | |
1336 }; | |
1337 | |
1338 my $sth = $self->prepare($q); | |
1339 | |
1340 $sth->bind_param(1,$cmp_cs_id,SQL_INTEGER); | |
1341 $sth->bind_param(2,$asm_cs_id,SQL_INTEGER); | |
1342 $sth->execute(); | |
1343 | |
1344 # load the mapper with the assembly information | |
1345 | |
1346 my ($cmp_start, $cmp_end, $cmp_seq_region_id, $cmp_seq_region, $cmp_length, | |
1347 $ori, | |
1348 $asm_start, $asm_end, $asm_seq_region_id, $asm_seq_region, $asm_length); | |
1349 | |
1350 $sth->bind_columns(\$cmp_start, \$cmp_end, \$cmp_seq_region_id, | |
1351 \$cmp_seq_region, \$cmp_length, \$ori, | |
1352 \$asm_start, \$asm_end, \$asm_seq_region_id, | |
1353 \$asm_seq_region, \$asm_length); | |
1354 | |
1355 my %asm_registered; | |
1356 | |
1357 while($sth->fetch()) { | |
1358 $mapper->register_component($cmp_seq_region_id); | |
1359 $mapper->mapper->add_map_coordinates( | |
1360 $asm_seq_region_id, $asm_start, $asm_end, | |
1361 $ori, | |
1362 $cmp_seq_region_id, $cmp_start, $cmp_end); | |
1363 | |
1364 my $arr = [$cmp_seq_region_id, $cmp_seq_region, $cmp_cs_id, $cmp_length]; | |
1365 | |
1366 $self->{'sr_name_cache'}->{"$cmp_seq_region:$cmp_cs_id"} = $arr; | |
1367 $self->{'sr_id_cache'}->{"$cmp_seq_region_id"} = $arr; | |
1368 | |
1369 # only register each asm seq_region once since it requires some work | |
1370 if(!$asm_registered{$asm_seq_region_id}) { | |
1371 $asm_registered{$asm_seq_region_id} = 1; | |
1372 | |
1373 # register all chunks from start of seq region to end | |
1374 my $end_chunk = $asm_length >> $CHUNKFACTOR; | |
1375 for(my $i = 0; $i <= $end_chunk; $i++) { | |
1376 $mapper->register_assembled($asm_seq_region_id, $i); | |
1377 } | |
1378 | |
1379 $arr = [$asm_seq_region_id, $asm_seq_region, $asm_cs_id, $asm_length]; | |
1380 | |
1381 $self->{'sr_name_cache'}->{"$asm_seq_region:$asm_cs_id"} = $arr; | |
1382 $self->{'sr_id_cache'}->{"$asm_seq_region_id"} = $arr; | |
1383 } | |
1384 } | |
1385 | |
1386 $sth->finish(); | |
1387 | |
1388 return; | |
1389 } | |
1390 | |
1391 | |
1392 | |
1393 =head2 register_all_chained | |
1394 | |
1395 Arg [1] : Bio::EnsEMBL::ChainedAssemblyMapper $casm_mapper | |
1396 Example : $mapper = $asm_mapper_adaptor->fetch_by_CoordSystems($cs1,$cs2); | |
1397 | |
1398 # make the cache large enough to hold all of the mappings | |
1399 $mapper->max_pair_count(10e6); | |
1400 # load all of the mapping data | |
1401 $asm_mapper_adaptor->register_all_chained($mapper); | |
1402 | |
1403 # perform mappings as normal | |
1404 $mapper->map($slice->seq_region_name(), $sr_start, $sr_end, | |
1405 $sr_strand, $cs1); | |
1406 ... | |
1407 Description: This function registers the entire set of mappings between | |
1408 two coordinate systems in a chained mapper. This will use a lot | |
1409 of memory but will be much more efficient when doing a lot of | |
1410 mapping which is spread over the entire genome. | |
1411 Returntype : none | |
1412 Exceptions : throw if mapper is between coord systems with unexpected | |
1413 mapping paths | |
1414 Caller : specialised programs doing a lot of genome-wide mapping | |
1415 Status : Stable | |
1416 | |
1417 =cut | |
1418 | |
1419 sub register_all_chained { | |
1420 my $self = shift; | |
1421 my $casm_mapper = shift; | |
1422 | |
1423 my $first_cs = $casm_mapper->first_CoordSystem(); | |
1424 my $mid_cs = $casm_mapper->middle_CoordSystem(); | |
1425 my $last_cs = $casm_mapper->last_CoordSystem(); | |
1426 | |
1427 my $start_mid_mapper = $casm_mapper->first_middle_mapper(); | |
1428 my $end_mid_mapper = $casm_mapper->last_middle_mapper(); | |
1429 my $combined_mapper = $casm_mapper->first_last_mapper(); | |
1430 | |
1431 my @ranges; | |
1432 | |
1433 my $sth = $self->prepare( | |
1434 'SELECT | |
1435 asm.cmp_start, | |
1436 asm.cmp_end, | |
1437 asm.cmp_seq_region_id, | |
1438 sr_cmp.name, | |
1439 sr_cmp.length, | |
1440 asm.ori, | |
1441 asm.asm_start, | |
1442 asm.asm_end, | |
1443 asm.asm_seq_region_id, | |
1444 sr_asm.name, | |
1445 sr_asm.length | |
1446 FROM | |
1447 assembly asm, seq_region sr_asm, seq_region sr_cmp | |
1448 WHERE | |
1449 sr_asm.seq_region_id = asm.asm_seq_region_id AND | |
1450 sr_cmp.seq_region_id = asm.cmp_seq_region_id AND | |
1451 sr_asm.coord_system_id = ? AND | |
1452 sr_cmp.coord_system_id = ?'); | |
1453 | |
1454 my $csa = $self->db()->get_CoordSystemAdaptor(); | |
1455 | |
1456 my @path; | |
1457 | |
1458 if ( !defined $mid_cs ) { | |
1459 @path = @{ $csa->get_mapping_path( $first_cs, $last_cs ) }; | |
1460 if ( !defined( $path[1] ) ) { | |
1461 splice( @path, 1, 1 ); | |
1462 } | |
1463 } else { | |
1464 @path = @{ $csa->get_mapping_path( $first_cs, $mid_cs ) }; | |
1465 # fix for when we have something like supercontig#contig#chromosome | |
1466 if ( !defined( $path[1] ) ) { | |
1467 splice( @path, 1, 1 ); | |
1468 } | |
1469 } | |
1470 | |
1471 if ( @path != 2 ) { | |
1472 my $path = | |
1473 join( ',', map( { $_->name . ' ' . $_->version } @path ) ); | |
1474 my $len = scalar(@path) - 1; | |
1475 throw( "Unexpected mapping path between start and intermediate " | |
1476 . "coord systems (" | |
1477 . $first_cs->name . " " | |
1478 . $first_cs->version . " and " | |
1479 . $mid_cs->name . " " | |
1480 . $mid_cs->version . ")." | |
1481 . "\nExpected path length 1, got $len. " | |
1482 . "(path=$path)" ); | |
1483 } | |
1484 | |
1485 my ($asm_cs,$cmp_cs) = @path; | |
1486 | |
1487 $sth->{mysql_use_result} = 1; | |
1488 $sth->bind_param(1,$asm_cs->dbID,SQL_INTEGER); | |
1489 $sth->bind_param(2,$cmp_cs->dbID,SQL_INTEGER); | |
1490 $sth->execute(); | |
1491 | |
1492 | |
1493 my ($mid_start, $mid_end, $mid_seq_region_id, $mid_seq_region, $mid_length, | |
1494 $ori, $start_start, $start_end, $start_seq_region_id, $start_seq_region, | |
1495 $start_length); | |
1496 | |
1497 if($asm_cs->equals($first_cs)) { | |
1498 $sth->bind_columns(\$mid_start, \$mid_end, \$mid_seq_region_id, | |
1499 \$mid_seq_region, \$mid_length, \$ori, \$start_start, | |
1500 \$start_end, \$start_seq_region_id, \$start_seq_region, | |
1501 \$start_length); | |
1502 } else { | |
1503 $sth->bind_columns(\$start_start, \$start_end, \$start_seq_region_id, | |
1504 \$start_seq_region, \$start_length, \$ori, | |
1505 \$mid_start, \$mid_end, \$mid_seq_region_id, | |
1506 \$mid_seq_region, \$mid_length); | |
1507 | |
1508 } | |
1509 | |
1510 my ( $mid_cs_id, $start_cs_id, $reg, $mapper ); | |
1511 if ( !defined $mid_cs ) { | |
1512 $mid_cs_id = $last_cs->dbID(); | |
1513 $start_cs_id = $first_cs->dbID(); | |
1514 $mapper = $combined_mapper; | |
1515 } else { | |
1516 $mid_cs_id = $mid_cs->dbID(); | |
1517 $start_cs_id = $first_cs->dbID(); | |
1518 $mapper = $start_mid_mapper; | |
1519 } | |
1520 | |
1521 $reg = $casm_mapper->first_registry(); | |
1522 | |
1523 while($sth->fetch()) { | |
1524 $mapper->add_map_coordinates | |
1525 ( | |
1526 $start_seq_region_id, $start_start, $start_end, $ori, | |
1527 $mid_seq_region_id, $mid_start, $mid_end | |
1528 ); | |
1529 push( @ranges, [$start_seq_region_id, $start_start, $start_end ] ); | |
1530 | |
1531 $reg->check_and_register( $start_seq_region_id, 1, $start_length ); | |
1532 if( ! defined $mid_cs ) { | |
1533 $casm_mapper->last_registry()->check_and_register | |
1534 ( $mid_seq_region_id, $mid_start, $mid_end ); | |
1535 } | |
1536 | |
1537 my $arr = [ $mid_seq_region_id, $mid_seq_region, | |
1538 $mid_cs_id, $mid_length ]; | |
1539 | |
1540 $self->{'sr_name_cache'}->{"$mid_seq_region:$mid_cs_id"} = $arr; | |
1541 $self->{'sr_id_cache'}->{"$mid_seq_region_id"} = $arr; | |
1542 | |
1543 $arr = [ $start_seq_region_id, $start_seq_region, | |
1544 $start_cs_id, $start_length ]; | |
1545 | |
1546 $self->{'sr_name_cache'}->{"$start_seq_region:$start_cs_id"} = $arr; | |
1547 $self->{'sr_id_cache'}->{"$start_seq_region_id"} = $arr; | |
1548 } | |
1549 | |
1550 if( ! defined $mid_cs ) { | |
1551 # thats it for the simple case | |
1552 return; | |
1553 } | |
1554 | |
1555 | |
1556 @path = @{ $csa->get_mapping_path( $last_cs, $mid_cs ) }; | |
1557 if ( defined($mid_cs) ) { | |
1558 if ( !defined( $path[1] ) ) { | |
1559 splice( @path, 1, 1 ); | |
1560 } | |
1561 } | |
1562 | |
1563 if ( @path != 2 ) { | |
1564 my $path = | |
1565 join( ',', map( { $_->name . ' ' . $_->version } @path ) ); | |
1566 my $len = scalar(@path) - 1; | |
1567 throw( "Unexpected mapping path between intermediate and last " | |
1568 . "coord systems (" | |
1569 . $last_cs->name . " " | |
1570 . $last_cs->version . " and " | |
1571 . $mid_cs->name . " " | |
1572 . $mid_cs->version . ")." | |
1573 . "\nExpected path length 1, got $len. " | |
1574 . "(path=$path)" ); | |
1575 } | |
1576 | |
1577 ($asm_cs,$cmp_cs) = @path; | |
1578 | |
1579 $sth->bind_param(1,$asm_cs->dbID,SQL_INTEGER); | |
1580 $sth->bind_param(2,$cmp_cs->dbID,SQL_INTEGER); | |
1581 $sth->execute(); | |
1582 | |
1583 | |
1584 my ($end_start, $end_end, $end_seq_region_id, $end_seq_region, | |
1585 $end_length); | |
1586 | |
1587 if($asm_cs->equals($mid_cs)) { | |
1588 $sth->bind_columns(\$end_start, \$end_end, \$end_seq_region_id, | |
1589 \$end_seq_region, \$end_length, \$ori, | |
1590 \$mid_start, \$mid_end, \$mid_seq_region_id, | |
1591 \$mid_seq_region, \$mid_length); | |
1592 } else { | |
1593 $sth->bind_columns(\$mid_start, \$mid_end, \$mid_seq_region_id, | |
1594 \$mid_seq_region, \$mid_length, \$ori, \$end_start, | |
1595 \$end_end, \$end_seq_region_id, \$end_seq_region, | |
1596 \$end_length); | |
1597 } | |
1598 | |
1599 my $end_cs_id = $last_cs->dbID(); | |
1600 $reg = $casm_mapper->last_registry(); | |
1601 | |
1602 while($sth->fetch()) { | |
1603 $end_mid_mapper->add_map_coordinates | |
1604 ( | |
1605 $end_seq_region_id, $end_start, $end_end, $ori, | |
1606 $mid_seq_region_id, $mid_start, $mid_end | |
1607 ); | |
1608 | |
1609 $reg->check_and_register( $end_seq_region_id, 1, $end_length ); | |
1610 | |
1611 my $arr = [ $end_seq_region_id, $end_seq_region, | |
1612 $end_cs_id, $end_length ]; | |
1613 $self->{'sr_name_cache'}->{"$end_seq_region:$end_cs_id"} = $arr; | |
1614 $self->{'sr_id_cache'}->{"$end_seq_region_id"} = $arr; | |
1615 } | |
1616 | |
1617 _build_combined_mapper( \@ranges, $start_mid_mapper, $end_mid_mapper, | |
1618 $combined_mapper, "first" ); | |
1619 | |
1620 return; | |
1621 } | |
1622 | |
1623 | |
1624 | |
1625 # after both halves of a chained mapper are loaded | |
1626 # this function maps all ranges in $ranges and loads the | |
1627 # results into the combined mapper | |
1628 sub _build_combined_mapper { | |
1629 my $ranges = shift; | |
1630 my $start_mid_mapper = shift; | |
1631 my $end_mid_mapper = shift; | |
1632 my $combined_mapper = shift; | |
1633 my $start_name = shift; | |
1634 | |
1635 my $mid_name = "middle"; | |
1636 | |
1637 foreach my $range (@$ranges) { | |
1638 my ( $seq_region_id, $start, $end) = @$range; | |
1639 | |
1640 my $sum = 0; | |
1641 | |
1642 my @initial_coords = $start_mid_mapper->map_coordinates($seq_region_id, | |
1643 $start,$end,1, | |
1644 $start_name); | |
1645 | |
1646 foreach my $icoord (@initial_coords) { | |
1647 #skip gaps | |
1648 if($icoord->isa('Bio::EnsEMBL::Mapper::Gap')) { | |
1649 $sum += $icoord->length(); | |
1650 next; | |
1651 } | |
1652 | |
1653 | |
1654 #feed the results of the first mapping into the second mapper | |
1655 my @final_coords = | |
1656 $end_mid_mapper->map_coordinates($icoord->id, $icoord->start, | |
1657 $icoord->end, | |
1658 $icoord->strand, $mid_name); | |
1659 | |
1660 | |
1661 foreach my $fcoord (@final_coords) { | |
1662 #load up the final mapper | |
1663 | |
1664 if($fcoord->isa('Bio::EnsEMBL::Mapper::Coordinate')) { | |
1665 my $total_start = $start + $sum; | |
1666 my $total_end = $total_start + $fcoord->length - 1; | |
1667 my $ori = $fcoord->strand(); | |
1668 | |
1669 if($start_name eq 'first') { # add coords in consistant order | |
1670 $combined_mapper->add_map_coordinates( | |
1671 $seq_region_id, $total_start, $total_end, $ori, | |
1672 $fcoord->id(), $fcoord->start(), $fcoord->end()); | |
1673 } else { | |
1674 $combined_mapper->add_map_coordinates( | |
1675 $fcoord->id(), $fcoord->start(), $fcoord->end(),$ori, | |
1676 $seq_region_id, $total_start, $total_end); | |
1677 } | |
1678 | |
1679 } | |
1680 $sum += $fcoord->length(); | |
1681 } | |
1682 } | |
1683 } | |
1684 #all done! | |
1685 } | |
1686 | |
1687 | |
1688 =head2 seq_regions_to_ids | |
1689 | |
1690 Arg [1] : Bio::EnsEMBL::CoordSystem $coord_system | |
1691 Arg [2] : listref of strings $seq_regions | |
1692 Example : my @ids = @{$asma->seq_regions_to_ids($coord_sys, \@seq_regs)}; | |
1693 Description: Converts a list of seq_region names to internal identifiers | |
1694 using the internal cache that has accumulated while registering | |
1695 regions for AssemblyMappers. If any requested regions are | |
1696 not found in the cache an attempt is made to retrieve them | |
1697 from the database. | |
1698 Returntype : listref of ints | |
1699 Exceptions : throw if a non-existant seqregion is provided | |
1700 Caller : general | |
1701 Status : Stable | |
1702 | |
1703 =cut | |
1704 | |
1705 sub seq_regions_to_ids { | |
1706 my $self = shift; | |
1707 my $coord_system = shift; | |
1708 my $seq_regions = shift; | |
1709 | |
1710 my $cs_id = $coord_system->dbID(); | |
1711 | |
1712 my @out; | |
1713 | |
1714 foreach my $sr (@$seq_regions) { | |
1715 my $arr = $self->{'sr_name_cache'}->{"$sr:$cs_id"}; | |
1716 if( $arr ) { | |
1717 push( @out, $arr->[0] ); | |
1718 } else { | |
1719 push @out, $self->_seq_region_name_to_id($sr,$cs_id); | |
1720 } | |
1721 } | |
1722 | |
1723 return \@out; | |
1724 } | |
1725 | |
1726 | |
1727 =head2 seq_ids_to_regions | |
1728 | |
1729 Arg [1] : listref of seq_region ids | |
1730 Example : my @ids = @{$asma->ids_to_seq_regions(\@seq_ids)}; | |
1731 Description: Converts a list of seq_region ids to seq region names | |
1732 using the internal cache that has accumulated while registering | |
1733 regions for AssemblyMappers. If any requested regions are | |
1734 not found in the cache an attempt is made to retrieve them | |
1735 from the database. | |
1736 Returntype : listref of strings | |
1737 Exceptions : throw if a non-existant seq_region_id is provided | |
1738 Caller : general | |
1739 Status : Stable | |
1740 | |
1741 =cut | |
1742 | |
1743 sub seq_ids_to_regions { | |
1744 my $self = shift; | |
1745 my $seq_region_ids = shift; | |
1746 | |
1747 my @out; | |
1748 | |
1749 foreach my $sr (@$seq_region_ids) { | |
1750 my $arr = $self->{'sr_id_cache'}->{"$sr"}; | |
1751 if( $arr ) { | |
1752 push( @out, $arr->[1] ); | |
1753 } else { | |
1754 push @out, $self->_seq_region_id_to_name($sr); | |
1755 } | |
1756 } | |
1757 | |
1758 return \@out; | |
1759 } | |
1760 | |
1761 =head2 delete_cache | |
1762 | |
1763 Description: Delete all the caches for the mappings/seq_regions | |
1764 Returntype : none | |
1765 Exceptions : none | |
1766 Caller : General | |
1767 Status : At risk | |
1768 | |
1769 =cut | |
1770 | |
1771 sub delete_cache{ | |
1772 my ($self) = @_; | |
1773 | |
1774 %{$self->{'sr_name_cache'}} = (); | |
1775 %{$self->{'sr_id_cache'}} = (); | |
1776 | |
1777 foreach my $key (keys %{$self->{'_asm_mapper_cache'}}){ | |
1778 $self->{'_asm_mapper_cache'}->{$key}->flush(); | |
1779 } | |
1780 %{$self->{'_asm_mapper_cache'}} = (); | |
1781 return; | |
1782 } | |
1783 | |
1784 | |
1785 =head2 register_region | |
1786 | |
1787 Description: DEPRECATED use register_assembled instead | |
1788 | |
1789 =cut | |
1790 | |
1791 sub register_region{ | |
1792 my ($self, $assmapper, $type, $chr_name, $start, $end) = @_; | |
1793 | |
1794 deprecate('Use register_assembled instead'); | |
1795 | |
1796 $self->register_assembled($assmapper, $chr_name, $start, $end); | |
1797 } | |
1798 | |
1799 | |
1800 =head2 register_contig | |
1801 | |
1802 Description: DEPRECATED use register_component instead | |
1803 | |
1804 =cut | |
1805 | |
1806 sub register_contig { | |
1807 my ($self, $assmapper, $type, $contig_id ) = @_; | |
1808 | |
1809 deprecate('Use register_component instead'); | |
1810 | |
1811 #not sure if the use is passing in a seq_region_name or a | |
1812 #seq_region_id... | |
1813 register_component($assmapper, $contig_id); | |
1814 } | |
1815 | |
1816 | |
1817 =head2 fetch_by_type | |
1818 | |
1819 Description: DEPRECATED use fetch_by_CoordSystems instead | |
1820 | |
1821 =cut | |
1822 | |
1823 sub fetch_by_type{ | |
1824 my ($self,$type) = @_; | |
1825 | |
1826 deprecate('Use fetch_by_CoordSystems instead'); | |
1827 | |
1828 #assume that what the user wanted was a mapper between the sequence coord | |
1829 #level and the top coord level | |
1830 | |
1831 my $csa = $self->db()->get_CoordSystemAdaptor(); | |
1832 | |
1833 my $cs1 = $csa->fetch_top_level($type); | |
1834 my $cs2 = $csa->fetch_sequence_level(); | |
1835 | |
1836 return $self->fetch_by_CoordSystems($cs1,$cs2); | |
1837 } | |
1838 | |
1839 | |
1840 | |
1841 1; |