0
|
1 =head1 LICENSE
|
|
2
|
|
3 Copyright (c) 1999-2012 The European Bioinformatics Institute and
|
|
4 Genome Research Limited. All rights reserved.
|
|
5
|
|
6 This software is distributed under a modified Apache license.
|
|
7 For license details, please see
|
|
8
|
|
9 http://www.ensembl.org/info/about/code_licence.html
|
|
10
|
|
11 =head1 CONTACT
|
|
12
|
|
13 Please email comments or questions to the public Ensembl
|
|
14 developers list at <dev@ensembl.org>.
|
|
15
|
|
16 Questions may also be sent to the Ensembl help desk at
|
|
17 <helpdesk@ensembl.org>.
|
|
18
|
|
19 =cut
|
|
20
|
|
21 =head1 NAME
|
|
22
|
|
23 Bio::EnsEMBL::ChainedAssemblyMapper -
|
|
24 Handles mapping between two coordinate systems using the information
|
|
25 stored in the assembly table
|
|
26
|
|
27 =head1 SYNOPSIS
|
|
28
|
|
29 $db = Bio::EnsEMBL::DBSQL::DBAdaptor->new(...);
|
|
30 $asma = $db->get_AssemblyMapperAdaptor();
|
|
31 $csa = $db->get_CoordSystemAdaptor();
|
|
32
|
|
33 my $chr_cs = $cs_adaptor->fetch_by_name( 'chromosome', 'NCBI33' );
|
|
34 my $cln_cs = $cs_adaptor->fetch_by_name('clone');
|
|
35
|
|
36 $asm_mapper = $map_adaptor->fetch_by_CoordSystems( $cs1, $cs2 );
|
|
37
|
|
38 # Map to contig coordinate system from chromosomal
|
|
39 @cln_coords =
|
|
40 $asm_mapper->map( 'X', 1_000_000, 2_000_000, 1, $chr_cs );
|
|
41
|
|
42 # Map to chromosome coordinate system from contig
|
|
43 @chr_coords =
|
|
44 $asm_mapper->map( 'AL30421.1', 100, 10000, -1, $cln_cs );
|
|
45
|
|
46 # List contig names for a region of chromsome
|
|
47 @cln_ids = $asm_mapper->list_ids( '13', 1_000_000, 1, $chr_cs );
|
|
48
|
|
49 # List chromosome names for a contig region
|
|
50 @chr_ids =
|
|
51 $asm_mapper->list_ids( 'AL30421.1', 1, 1000, -1, $cln_cs );
|
|
52
|
|
53 =head1 DESCRIPTION
|
|
54
|
|
55 The ChainedAssemblyMapper is an extension of the regular AssemblyMapper
|
|
56 that allows for mappings between coordinate systems that require
|
|
57 multi-step mapping. For example if explicit mappings are defined
|
|
58 between the following coordinate systems,
|
|
59
|
|
60 chromosome <-> contig
|
|
61 contig <-> clone
|
|
62
|
|
63 the ChainedAssemblyMapper would be able to perform implicit mapping
|
|
64 between the chromosome and clone coordinate systems. This should be
|
|
65 transparent to the user of this module, and users should not even
|
|
66 realise that they are using a chained assembly mapper as opposed to a
|
|
67 normal assembly mapper.
|
|
68
|
|
69 =head1 METHODS
|
|
70
|
|
71 =cut
|
|
72
|
|
73
|
|
74 my $FIRST = 'first';
|
|
75 my $MIDDLE = 'middle';
|
|
76 my $LAST = 'last';
|
|
77
|
|
78 package Bio::EnsEMBL::ChainedAssemblyMapper;
|
|
79
|
|
80 use strict;
|
|
81 use warnings;
|
|
82 use integer; #use proper arithmetic bitshifts
|
|
83
|
|
84 use Bio::EnsEMBL::Mapper;
|
|
85 use Bio::EnsEMBL::Mapper::RangeRegistry;
|
|
86 use Bio::EnsEMBL::Utils::Exception qw(throw deprecate);
|
|
87 use Scalar::Util qw(weaken);
|
|
88
|
|
89 #2^20 = approx 10^6
|
|
90 my $CHUNKFACTOR = 20;
|
|
91
|
|
92 # max size of the pair cache in the mappers
|
|
93 my $DEFAULT_MAX_PAIR_COUNT = 6000;
|
|
94
|
|
95 =head2 new
|
|
96
|
|
97 Arg [1] : Bio::EnsEMBL::DBSQL::AssemblyMapperAdaptor
|
|
98 Arg [2] : Bio::EnsEMBL::CoordSystem $src_cs
|
|
99 Arg [3] : Bio::EnsEMBL::CoordSystem $int_cs
|
|
100 Arg [4] : Bio::EnsEMBL::CoordSystem $dst_cs
|
|
101 Example : Should use AssemblyMapperAdaptor->fetch_by_CoordSystems
|
|
102 Description: Creates a new AssemblyMapper
|
|
103 Returntype : Bio::EnsEMBL::DBSQL::AssemblyMapperAdaptor
|
|
104 Exceptions : thrown if wrong number of coord_systems are provided
|
|
105 Caller : AssemblyMapperAdaptor
|
|
106 Status : Stable
|
|
107
|
|
108 =cut
|
|
109
|
|
110 sub new {
|
|
111 my ($caller,$adaptor,@coord_systems) = @_;
|
|
112
|
|
113 my $class = ref($caller) || $caller;
|
|
114
|
|
115 my $self = {};
|
|
116 bless $self, $class;
|
|
117
|
|
118 $self->adaptor($adaptor);
|
|
119
|
|
120 if(@coord_systems != 3) {
|
|
121 throw('ChainedMapper can only map between 3 coordinate systems. ' .
|
|
122 scalar(@coord_systems) . ' were provided');
|
|
123 }
|
|
124
|
|
125 $adaptor->cache_seq_ids_with_mult_assemblys();
|
|
126
|
|
127 # Set the component, intermediate and assembled coordinate systems
|
|
128 $self->{'first_cs'} = $coord_systems[0];
|
|
129 $self->{'mid_cs'} = $coord_systems[1];
|
|
130 $self->{'last_cs'} = $coord_systems[2];
|
|
131
|
|
132 #maps between first and intermediate coord systems
|
|
133 $self->{'first_mid_mapper'} = Bio::EnsEMBL::Mapper->new($FIRST, $MIDDLE);
|
|
134
|
|
135 #maps between last and intermediate
|
|
136 $self->{'last_mid_mapper'} = Bio::EnsEMBL::Mapper->new($LAST, $MIDDLE);
|
|
137
|
|
138 #mapper that is actually used and is loaded by the mappings generated
|
|
139 #by the other two mappers
|
|
140 $self->{'first_last_mapper'} = Bio::EnsEMBL::Mapper->new($FIRST, $LAST,
|
|
141 $coord_systems[0],
|
|
142 $coord_systems[2]);
|
|
143
|
|
144 #need registries to keep track of what regions are registered in source
|
|
145 #and destination coordinate systems
|
|
146 $self->{'first_registry'} = Bio::EnsEMBL::Mapper::RangeRegistry->new();
|
|
147 $self->{'last_registry'} = Bio::EnsEMBL::Mapper::RangeRegistry->new();
|
|
148
|
|
149 $self->{'max_pair_count'} = $DEFAULT_MAX_PAIR_COUNT;
|
|
150
|
|
151 return $self;
|
|
152 }
|
|
153
|
|
154
|
|
155 =head2 max_pair_count
|
|
156
|
|
157 Arg [1] : (optional) int $max_pair_count
|
|
158 Example : $mapper->max_pair_count(100000)
|
|
159 Description: Getter/Setter for the number of mapping pairs allowed in the
|
|
160 internal cache. This can be used to override the default value
|
|
161 (6000) to tune the performance and memory usage for certain
|
|
162 scenarios. Higher value = bigger cache, more memory used
|
|
163 Returntype : int
|
|
164 Exceptions : none
|
|
165 Caller : general
|
|
166 Status : Stable
|
|
167
|
|
168 =cut
|
|
169
|
|
170 sub max_pair_count {
|
|
171 my $self = shift;
|
|
172 $self->{'max_pair_count'} = shift if(@_);
|
|
173 return $self->{'max_pair_count'};
|
|
174 }
|
|
175
|
|
176
|
|
177
|
|
178
|
|
179 =head2 register_all
|
|
180
|
|
181 Arg [1] : none
|
|
182 Example : $mapper->max_pair_count(10e6);
|
|
183 $mapper->register_all();
|
|
184 Description: Pre-registers all assembly information in this mapper. The
|
|
185 cache size should be set to a sufficiently large value
|
|
186 so that all of the information can be stored. This method
|
|
187 is useful when *a lot* of mapping will be done in regions
|
|
188 which are distributed around the genome. After registration
|
|
189 the mapper will consume a lot of memory but will not have to
|
|
190 perform any SQL and will be faster.
|
|
191 Returntype : none
|
|
192 Exceptions : none
|
|
193 Caller : specialised programs doing a lot of mapping
|
|
194 Status : Stable
|
|
195
|
|
196 =cut
|
|
197
|
|
198 sub register_all {
|
|
199 my $self = shift;
|
|
200 $self->adaptor->register_all_chained($self);
|
|
201 return;
|
|
202 }
|
|
203
|
|
204
|
|
205
|
|
206
|
|
207 sub flush {
|
|
208 my $self = shift;
|
|
209 $self->{'first_registry'}->flush();
|
|
210 $self->{'last_registry'}->flush();
|
|
211
|
|
212 $self->{'first_mid_mapper'}->flush();
|
|
213 $self->{'last_mid_mapper'}->flush();
|
|
214 $self->{'first_last_mapper'}->flush();
|
|
215 }
|
|
216
|
|
217 =head2 size
|
|
218
|
|
219 Args : none
|
|
220 Example : $num_of_pairs = $mapper->size();
|
|
221 Description: return the number of pairs currently stored.
|
|
222 Returntype : int
|
|
223 Exceptions : none
|
|
224 Caller : general
|
|
225 Status : Stable
|
|
226
|
|
227 =cut
|
|
228
|
|
229 sub size {
|
|
230 my $self = shift;
|
|
231 return ( $self->{'first_last_mapper'}->{'pair_count'} +
|
|
232 $self->{'last_mid_mapper'}->{'pair_count'} +
|
|
233 $self->{'first_mid_mapper'}->{'pair_count'} );
|
|
234 }
|
|
235
|
|
236
|
|
237
|
|
238 =head2 map
|
|
239
|
|
240 Arg [1] : string $frm_seq_region
|
|
241 The name of the sequence region to transform FROM
|
|
242 Arg [2] : int $frm_start
|
|
243 The start of the region to transform FROM
|
|
244 Arg [3] : int $frm_end
|
|
245 The end of the region to transform FROM
|
|
246 Arg [4] : int $strand
|
|
247 The strand of the region to transform FROM
|
|
248 Arg [5] : Bio::EnsEMBL::CoordSystem
|
|
249 The coordinate system to transform FROM
|
|
250 Arg [6] : (optional) fastmap
|
|
251 Arg [7] : (optional) Bio::Ensembl::Slice
|
|
252 The slice to transform TO
|
|
253 Example : @coords = $asm_mapper->map('X', 1_000_000, 2_000_000,
|
|
254 1, $chr_cs);
|
|
255 Description: Transforms coordinates from one coordinate system
|
|
256 to another.
|
|
257 Returntype : List of Bio::EnsEMBL::Mapper::Coordinate and/or
|
|
258 Bio::EnsEMBL::Mapper:Gap objects
|
|
259 Exceptions : thrown if the specified TO coordinat system is not one
|
|
260 of the coordinate systems associated with this assembly mapper
|
|
261 Caller : general
|
|
262 Status : Stable
|
|
263
|
|
264 =cut
|
|
265
|
|
266 sub map {
|
|
267 throw('Incorrect number of arguments.') if(@_ < 6);
|
|
268
|
|
269 my ($self, $frm_seq_region_name, $frm_start,
|
|
270 $frm_end, $frm_strand, $frm_cs, $fastmap, $to_slice) = @_;
|
|
271
|
|
272 my $mapper = $self->{'first_last_mapper'};
|
|
273 my $first_cs = $self->{'first_cs'};
|
|
274 my $last_cs = $self->{'last_cs'};
|
|
275
|
|
276 my $is_insert = ($frm_end + 1 == $frm_start);
|
|
277
|
|
278 my $frm;
|
|
279 my $registry;
|
|
280
|
|
281
|
|
282
|
|
283
|
|
284 my @tmp;
|
|
285 push @tmp, $frm_seq_region_name;
|
|
286 my $seq_region_id = @{$self->adaptor()->seq_regions_to_ids($frm_cs, \@tmp)}[0];
|
|
287
|
|
288 #speed critical section:
|
|
289 #try to do simple pointer equality comparisons of the coord system objects
|
|
290 #first since this is likely to work most of the time and is much faster
|
|
291 #than a function call
|
|
292
|
|
293 if($frm_cs == $first_cs ||
|
|
294 ($frm_cs != $last_cs && $frm_cs->equals($first_cs))) {
|
|
295 $frm = $FIRST;
|
|
296 $registry = $self->{'first_registry'};
|
|
297 } elsif($frm_cs == $last_cs || $frm_cs->equals($last_cs)) {
|
|
298 $frm = $LAST;
|
|
299 $registry = $self->{'last_registry'};
|
|
300 } else {
|
|
301 throw("Coordinate system " . $frm_cs->name . " " . $frm_cs->version .
|
|
302 " is neither the first nor the last coordinate system " .
|
|
303 " of this ChainedAssemblyMapper");
|
|
304 }
|
|
305
|
|
306 #the minimum area we want to register if registration is necessary is
|
|
307 #about 1MB. Break requested ranges into chunks of 1MB and then register
|
|
308 #this larger region if we have a registry miss.
|
|
309
|
|
310 #use bitwise shift for fast and easy integer multiplication and division
|
|
311 my ($min_start, $min_end);
|
|
312
|
|
313 if($is_insert) {
|
|
314 $min_start = (($frm_end >> $CHUNKFACTOR) << $CHUNKFACTOR);
|
|
315 $min_end = ((($frm_start >> $CHUNKFACTOR) + 1) << $CHUNKFACTOR) - 1 ;
|
|
316 } else {
|
|
317 $min_start = (($frm_start >> $CHUNKFACTOR) << $CHUNKFACTOR);
|
|
318 $min_end = ((($frm_end >> $CHUNKFACTOR) + 1) << $CHUNKFACTOR) - 1 ;
|
|
319 }
|
|
320
|
|
321 #get a list of ranges in the requested region that have not been registered,
|
|
322 #and register them at the same
|
|
323
|
|
324 my $ranges;
|
|
325
|
|
326 if($is_insert) {
|
|
327 $ranges = $registry->check_and_register($seq_region_id, $frm_end,
|
|
328 $frm_start, $min_start, $min_end);
|
|
329 } else {
|
|
330 $ranges = $registry->check_and_register($seq_region_id, $frm_start,
|
|
331 $frm_end, $min_start, $min_end);
|
|
332 }
|
|
333
|
|
334 if(defined($ranges)) {
|
|
335 if( $self->size() > $self->{'max_pair_count'} ) {
|
|
336 $self->flush();
|
|
337
|
|
338 if($is_insert) {
|
|
339 $ranges = $registry->check_and_register
|
|
340 ($seq_region_id, $frm_end, $frm_start, $min_start, $min_end);
|
|
341 } else {
|
|
342 $ranges = $registry->check_and_register
|
|
343 ($seq_region_id, $frm_start, $frm_end, $min_start, $min_end);
|
|
344 }
|
|
345 }
|
|
346 $self->adaptor->register_chained($self,$frm,$seq_region_id,$ranges,$to_slice);
|
|
347 }
|
|
348
|
|
349 if($fastmap) {
|
|
350 return $mapper->fastmap($seq_region_id, $frm_start, $frm_end,
|
|
351 $frm_strand, $frm);
|
|
352 }
|
|
353
|
|
354 return $mapper->map_coordinates($seq_region_id, $frm_start, $frm_end,
|
|
355 $frm_strand, $frm);
|
|
356 }
|
|
357
|
|
358
|
|
359 sub fastmap {
|
|
360 my $self = shift;
|
|
361 return $self->map(@_,1);
|
|
362 }
|
|
363
|
|
364
|
|
365 =head2 list_ids
|
|
366
|
|
367 Arg [1] : string $frm_seq_region
|
|
368 The name of the sequence region of interest
|
|
369 Arg [2] : int $frm_start
|
|
370 The start of the region of interest
|
|
371 Arg [3] : int $frm_end
|
|
372 The end of the region to transform of interest
|
|
373 Arg [5] : Bio::EnsEMBL::CoordSystem $frm_cs
|
|
374 The coordinate system to obtain overlapping ids of
|
|
375 Example : foreach $id ($asm_mapper->list_ids('X',1,1000,$chr_cs)) {...}
|
|
376 Description: Retrieves a list of overlapping seq_region internal identifiers
|
|
377 of another coordinate system. This is the same as the
|
|
378 list_seq_regions method but uses internal identfiers rather
|
|
379 than seq_region strings
|
|
380 Returntype : List of ints
|
|
381 Exceptions : none
|
|
382 Caller : general
|
|
383 Status : Stable
|
|
384
|
|
385 =cut
|
|
386
|
|
387
|
|
388 sub list_ids {
|
|
389 throw('Incorrect number of arguments.') if(@_ != 5);
|
|
390 my($self, $frm_seq_region_name, $frm_start, $frm_end, $frm_cs) = @_;
|
|
391
|
|
392 my $is_insert = ($frm_start == $frm_end + 1);
|
|
393
|
|
394 #the minimum area we want to register if registration is necessary is
|
|
395 #about 1MB. Break requested ranges into chunks of 1MB and then register
|
|
396 #this larger region if we have a registry miss.
|
|
397
|
|
398 #use bitwise shift for fast and easy integer multiplication and division
|
|
399 my ($min_start, $min_end);
|
|
400
|
|
401 if($is_insert) {
|
|
402 $min_start = (($frm_end >> $CHUNKFACTOR) << $CHUNKFACTOR);
|
|
403 $min_end = ((($frm_start >> $CHUNKFACTOR) + 1) << $CHUNKFACTOR) - 1;
|
|
404 } else {
|
|
405 $min_start = (($frm_start >> $CHUNKFACTOR) << $CHUNKFACTOR);
|
|
406 $min_end = ((($frm_end >> $CHUNKFACTOR) + 1) << $CHUNKFACTOR) - 1;
|
|
407 }
|
|
408
|
|
409 my @tmp;
|
|
410 push @tmp, $frm_seq_region_name;
|
|
411 my $seq_region_id = @{$self->adaptor()->seq_regions_to_ids($frm_cs, \@tmp)}[0];
|
|
412
|
|
413 if($frm_cs->equals($self->{'first_cs'})) {
|
|
414 my $registry = $self->{'first_registry'};
|
|
415
|
|
416 my $ranges;
|
|
417
|
|
418
|
|
419 if($is_insert) {
|
|
420 $ranges = $registry->check_and_register
|
|
421 ($seq_region_id, $frm_end, $frm_start, $min_start, $min_end);
|
|
422 } else {
|
|
423 $ranges = $registry->check_and_register
|
|
424 ($seq_region_id, $frm_start, $frm_end, $min_start, $min_end);
|
|
425 }
|
|
426
|
|
427 if(defined($ranges)) {
|
|
428 $self->adaptor->register_chained($self,$FIRST,$seq_region_id,$ranges);
|
|
429 }
|
|
430
|
|
431 return map {$_->to()->id()}
|
|
432 $self->first_last_mapper()->list_pairs($seq_region_id, $frm_start,
|
|
433 $frm_end, $FIRST);
|
|
434
|
|
435 } elsif($frm_cs->equals($self->{'last_cs'})) {
|
|
436 my $registry = $self->{'last_registry'};
|
|
437
|
|
438 my $ranges;
|
|
439 if($is_insert) {
|
|
440 $ranges = $registry->check_and_register
|
|
441 ($seq_region_id, $frm_end, $frm_start, $min_start, $min_end);
|
|
442 } else {
|
|
443 $ranges = $registry->check_and_register
|
|
444 ($seq_region_id, $frm_start, $frm_end, $min_start, $min_end);
|
|
445 }
|
|
446
|
|
447 if(defined($ranges)) {
|
|
448 $self->adaptor->register_chained($self,$LAST,$seq_region_id,$ranges);
|
|
449 }
|
|
450
|
|
451 return map {$_->from()->id()}
|
|
452 $self->first_last_mapper()->list_pairs($seq_region_id, $frm_start,
|
|
453 $frm_end, $LAST);
|
|
454 } else {
|
|
455 throw("Coordinate system " . $frm_cs->name . " " . $frm_cs->version .
|
|
456 " is neither the first nor the last coordinate system " .
|
|
457 " of this ChainedAssemblyMapper");
|
|
458 }
|
|
459 }
|
|
460
|
|
461
|
|
462 =head2 list_seq_regions
|
|
463
|
|
464 Arg [1] : string $frm_seq_region
|
|
465 The name of the sequence region of interest
|
|
466 Arg [2] : int $frm_start
|
|
467 The start of the region of interest
|
|
468 Arg [3] : int $frm_end
|
|
469 The end of the region to transform of interest
|
|
470 Arg [5] : Bio::EnsEMBL::CoordSystem $frm_cs
|
|
471 The coordinate system to obtain overlapping ids of
|
|
472 Example : foreach $id ($asm_mapper->list_ids('X',1,1000,$ctg_cs)) {...}
|
|
473 Description: Retrieves a list of overlapping seq_region internal identifiers
|
|
474 of another coordinate system. This is the same as the
|
|
475 list_ids method but uses seq_region names rather internal ids
|
|
476 Returntype : List of strings
|
|
477 Exceptions : none
|
|
478 Caller : general
|
|
479 Status : Stable
|
|
480
|
|
481 =cut
|
|
482
|
|
483 sub list_seq_regions {
|
|
484 throw('Incorrect number of arguments.') if(@_ != 5);
|
|
485 my($self, $frm_seq_region, $frm_start, $frm_end, $frm_cs) = @_;
|
|
486
|
|
487 #retrieve the seq_region names
|
|
488 my @seq_regs =
|
|
489 $self->list_ids($frm_seq_region,$frm_start,$frm_end,$frm_cs);
|
|
490
|
|
491 #The seq_regions are from the 'to' coordinate system not the
|
|
492 #from coordinate system we used to obtain them
|
|
493 my $to_cs;
|
|
494 if($frm_cs->equals($self->first_CoordSystem())) {
|
|
495 $to_cs = $self->last_CoordSystem();
|
|
496 } else {
|
|
497 $to_cs = $self->first_CoordSystem();
|
|
498 }
|
|
499
|
|
500 #convert them to names
|
|
501 return @{$self->adaptor()->seq_ids_to_regions(\@seq_regs)};
|
|
502 }
|
|
503
|
|
504
|
|
505
|
|
506
|
|
507
|
|
508
|
|
509 =head2 first_last_mapper
|
|
510
|
|
511 Args : none
|
|
512 Example : $mapper = $cam->first_last_mapper();
|
|
513 Description: return the mapper.
|
|
514 Returntype : Bio::EnsEMBL::Mapper
|
|
515 Exceptions : none
|
|
516 Caller : internal
|
|
517 Status : Stable
|
|
518
|
|
519 =cut
|
|
520
|
|
521 sub first_last_mapper {
|
|
522 my $self = shift;
|
|
523 return $self->{'first_last_mapper'};
|
|
524 }
|
|
525
|
|
526 =head2 first_middle_mapper
|
|
527
|
|
528 Args : none
|
|
529 Example : $mapper = $cam->first_middle_mapper();
|
|
530 Description: return the mapper.
|
|
531 Returntype : Bio::EnsEMBL::Mapper
|
|
532 Exceptions : none
|
|
533 Caller : internal
|
|
534 Status : Stable
|
|
535
|
|
536 =cut
|
|
537
|
|
538
|
|
539 sub first_middle_mapper {
|
|
540 my $self = shift;
|
|
541 return $self->{'first_mid_mapper'};
|
|
542 }
|
|
543
|
|
544 =head2 last_middle_mapper
|
|
545
|
|
546 Args : none
|
|
547 Example : $mapper = $cam->last_middle_mapper();
|
|
548 Description: return the mapper.
|
|
549 Returntype : Bio::EnsEMBL::Mapper
|
|
550 Exceptions : none
|
|
551 Caller : internal
|
|
552 Status : Stable
|
|
553
|
|
554 =cut
|
|
555
|
|
556 sub last_middle_mapper {
|
|
557 my $self = shift;
|
|
558 return $self->{'last_mid_mapper'};
|
|
559 }
|
|
560
|
|
561
|
|
562 =head2 first_CoordSystem
|
|
563
|
|
564 Args : none
|
|
565 Example : $coordsys = $cam->first_CoordSystem();
|
|
566 Description: return the CoordSystem.
|
|
567 Returntype : Bio::EnsEMBL::CoordSystem
|
|
568 Exceptions : none
|
|
569 Caller : internal
|
|
570 Status : Stable
|
|
571
|
|
572 =cut
|
|
573
|
|
574 sub first_CoordSystem {
|
|
575 my $self = shift;
|
|
576 return $self->{'first_cs'};
|
|
577 }
|
|
578
|
|
579
|
|
580 =head2 middle_CoordSystem
|
|
581
|
|
582 Args : none
|
|
583 Example : $coordsys = $cam->middle_CoordSystem();
|
|
584 Description: return the CoordSystem.
|
|
585 Returntype : Bio::EnsEMBL::CoordSystem
|
|
586 Exceptions : none
|
|
587 Caller : internal
|
|
588 Status : Stable
|
|
589
|
|
590 =cut
|
|
591
|
|
592 sub middle_CoordSystem {
|
|
593 my $self = shift;
|
|
594 return $self->{'mid_cs'};
|
|
595 }
|
|
596
|
|
597 =head2 last_CoordSystem
|
|
598
|
|
599 Args : none
|
|
600 Example : $coordsys = $cam->last_CoordSystem();
|
|
601 Description: return the CoordSystem.
|
|
602 Returntype : Bio::EnsEMBL::CoordSystem
|
|
603 Exceptions : none
|
|
604 Caller : internal
|
|
605 Status : Stable
|
|
606
|
|
607 =cut
|
|
608
|
|
609 sub last_CoordSystem {
|
|
610 my $self = shift;
|
|
611 return $self->{'last_cs'};
|
|
612 }
|
|
613
|
|
614 =head2 first_registry
|
|
615
|
|
616 Args : none
|
|
617 Example : $rr = $cam->first_registry();
|
|
618 Description: return the Registry.
|
|
619 Returntype : Bio::EnsEMBL::Mapper::RangeRegistry
|
|
620 Exceptions : none
|
|
621 Caller : internal
|
|
622 Status : Stable
|
|
623
|
|
624 =cut
|
|
625
|
|
626 sub first_registry {
|
|
627 my $self = shift;
|
|
628 return $self->{'first_registry'};
|
|
629 }
|
|
630
|
|
631 =head2 last_registry
|
|
632
|
|
633 Args : none
|
|
634 Example : $rr = $cam->last_registry();
|
|
635 Description: return the Registry.
|
|
636 Returntype : Bio::EnsEMBL::Mapper::RangeRegistry
|
|
637 Exceptions : none
|
|
638 Caller : internal
|
|
639 Status : Stable
|
|
640
|
|
641 =cut
|
|
642
|
|
643 sub last_registry {
|
|
644 my $self = shift;
|
|
645 return $self->{'last_registry'};
|
|
646 }
|
|
647
|
|
648
|
|
649 #
|
|
650 # Methods supplied to maintain polymorphism with AssemblyMapper there
|
|
651 # is no real assembled or component in the chained mapper, since the
|
|
652 # ordering is arbitrary and both ends might actually be assembled, but
|
|
653 # these methods provide convenient synonyms
|
|
654 #
|
|
655
|
|
656 =head2 mapper
|
|
657
|
|
658 Args : none
|
|
659 Example : $mapper = $cam->mapper();
|
|
660 Description: return the first_last_mapper.
|
|
661 Returntype : Bio::EnsEMBL::Mapper
|
|
662 Exceptions : none
|
|
663 Caller : internal
|
|
664 Status : Stable
|
|
665
|
|
666 =cut
|
|
667
|
|
668 sub mapper {
|
|
669 my $self = shift;
|
|
670 return $self->first_last_mapper();
|
|
671 }
|
|
672
|
|
673 =head2 assembled_CoordSystem
|
|
674
|
|
675 Args : none
|
|
676 Example : $coordsys = $cam->assembled_CoordSystem();
|
|
677 Description: return the first CoordSystem.
|
|
678 Returntype : Bio::EnsEMBL::CoordSystem
|
|
679 Exceptions : none
|
|
680 Caller : internal
|
|
681 Status : Stable
|
|
682
|
|
683 =cut
|
|
684
|
|
685
|
|
686 sub assembled_CoordSystem {
|
|
687 my $self = shift;
|
|
688 return $self->{'first_cs'};
|
|
689 }
|
|
690
|
|
691 =head2 component_CoordSystem
|
|
692
|
|
693 Args : none
|
|
694 Example : $coordsys = $cam->component_CoordSystem();
|
|
695 Description: return the last CoordSystem.
|
|
696 Returntype : Bio::EnsEMBL::CoordSystem
|
|
697 Exceptions : none
|
|
698 Caller : internal
|
|
699 Status : Stable
|
|
700
|
|
701 =cut
|
|
702
|
|
703 sub component_CoordSystem {
|
|
704 my $self = shift;
|
|
705 return $self->{'last_cs'};
|
|
706 }
|
|
707
|
|
708
|
|
709 =head2 adaptor
|
|
710
|
|
711 Arg [1] : Bio::EnsEMBL::DBSQL::AssemblyMapperAdaptor $adaptor
|
|
712 Description: get/set for this objects database adaptor
|
|
713 Returntype : Bio::EnsEMBL::DBSQL::AssemblyMapperAdaptor
|
|
714 Exceptions : none
|
|
715 Caller : general
|
|
716 Status : Stable
|
|
717
|
|
718 =cut
|
|
719
|
|
720 sub adaptor {
|
|
721 my $self = shift;
|
|
722 weaken($self->{'adaptor'} = shift) if(@_);
|
|
723 return $self->{'adaptor'};
|
|
724 }
|
|
725
|
|
726
|
|
727 =head2 in_assembly
|
|
728
|
|
729 Deprecated. Use map() or list_ids() instead
|
|
730
|
|
731 =cut
|
|
732
|
|
733 sub in_assembly {
|
|
734 my ($self, $object) = @_;
|
|
735
|
|
736 deprecate('Use map() or list_ids() instead.');
|
|
737
|
|
738 my $csa = $self->db->get_CoordSystemAdaptor();
|
|
739
|
|
740 my $top_level = $csa->fetch_top_level();
|
|
741
|
|
742 my $asma = $self->adaptor->fetch_by_CoordSystems($object->coord_system(),
|
|
743 $top_level);
|
|
744
|
|
745 my @list = $asma->list_ids($object->seq_region(), $object->start(),
|
|
746 $object->end(), $object->coord_system());
|
|
747
|
|
748 return (@list > 0);
|
|
749 }
|
|
750
|
|
751
|
|
752 =head2 map_coordinates_to_assembly
|
|
753
|
|
754 DEPRECATED use map() instead
|
|
755
|
|
756 =cut
|
|
757
|
|
758 sub map_coordinates_to_assembly {
|
|
759 my ($self, $contig_id, $start, $end, $strand) = @_;
|
|
760
|
|
761 deprecate('Use map() instead.');
|
|
762
|
|
763 #not sure if contig_id is seq_region_id or name...
|
|
764 return $self->map($contig_id, $start, $end, $strand,
|
|
765 $self->contig_CoordSystem());
|
|
766
|
|
767 }
|
|
768
|
|
769
|
|
770 =head2 fast_to_assembly
|
|
771
|
|
772 DEPRECATED use map() instead
|
|
773
|
|
774 =cut
|
|
775
|
|
776 sub fast_to_assembly {
|
|
777 my ($self, $contig_id, $start, $end, $strand) = @_;
|
|
778
|
|
779 deprecate('Use map() instead.');
|
|
780
|
|
781 #not sure if contig_id is seq_region_id or name...
|
|
782 return $self->map($contig_id, $start, $end, $strand,
|
|
783 $self->contig_CoordSystem());
|
|
784 }
|
|
785
|
|
786
|
|
787 =head2 map_coordinates_to_rawcontig
|
|
788
|
|
789 DEPRECATED use map() instead
|
|
790
|
|
791 =cut
|
|
792
|
|
793 sub map_coordinates_to_rawcontig {
|
|
794 my ($self, $chr_name, $start, $end, $strand) = @_;
|
|
795
|
|
796 deprecate('Use map() instead.');
|
|
797
|
|
798 return $self->map($chr_name, $start, $end, $strand,
|
|
799 $self->assembled_CoordSystem());
|
|
800
|
|
801 }
|
|
802
|
|
803 =head2 list_contig_ids
|
|
804 DEPRECATED Use list_ids instead
|
|
805
|
|
806 =cut
|
|
807
|
|
808 sub list_contig_ids {
|
|
809 my ($self, $chr_name, $start, $end) = @_;
|
|
810
|
|
811 deprecate('Use list_ids() instead.');
|
|
812
|
|
813 return $self->list_ids($chr_name, $start, $end,
|
|
814 $self->assembled_CoordSystem());
|
|
815 }
|
|
816
|
|
817
|
|
818
|
|
819 1;
|