0
|
1 =head1 LICENSE
|
|
2
|
|
3 Copyright (c) 1999-2012 The European Bioinformatics Institute and
|
|
4 Genome Research Limited. All rights reserved.
|
|
5
|
|
6 This software is distributed under a modified Apache license.
|
|
7 For license details, please see
|
|
8
|
|
9 http://www.ensembl.org/info/about/code_licence.html
|
|
10
|
|
11 =head1 CONTACT
|
|
12
|
|
13 Please email comments or questions to the public Ensembl
|
|
14 developers list at <dev@ensembl.org>.
|
|
15
|
|
16 Questions may also be sent to the Ensembl help desk at
|
|
17 <helpdesk@ensembl.org>.
|
|
18
|
|
19 =cut
|
|
20
|
|
21 =head1 NAME
|
|
22
|
|
23 Bio::EnsEMBL::AssemblyMapper -
|
|
24 Handles mapping between two coordinate systems using the information
|
|
25 stored in the assembly table.
|
|
26
|
|
27 =head1 SYNOPSIS
|
|
28
|
|
29 $db = Bio::EnsEMBL::DBSQL::DBAdaptor->new(...);
|
|
30 $asma = $db->get_AssemblyMapperAdaptor();
|
|
31 $csa = $db->get_CoordSystemAdaptor();
|
|
32
|
|
33 my $chr_cs = $cs_adaptor->fetch_by_name( 'chromosome', 'NCBI33' );
|
|
34 my $ctg_cs = $cs_adaptor->fetch_by_name('contig');
|
|
35
|
|
36 $asm_mapper = $map_adaptor->fetch_by_CoordSystems( $cs1, $cs2 );
|
|
37
|
|
38 # Map to contig coordinate system from chromosomal.
|
|
39 @ctg_coords =
|
|
40 $asm_mapper->map( 'X', 1_000_000, 2_000_000, 1, $chr_cs );
|
|
41
|
|
42 # Map to chromosome coordinate system from contig.
|
|
43 @chr_coords =
|
|
44 $asm_mapper->map( 'AL30421.1.200.92341', 100, 10000, -1,
|
|
45 $ctg_cs );
|
|
46
|
|
47 # List contig names for a region of chromsome.
|
|
48 @ctg_ids = $asm_mapper->list_ids( '13', 1_000_000, 1, $chr_cs );
|
|
49
|
|
50 # List chromosome names for a contig region.
|
|
51 @chr_ids =
|
|
52 $asm_mapper->list_ids( 'AL30421.1.200.92341', 1, 1000, -1,
|
|
53 $ctg_cs );
|
|
54
|
|
55 =head1 DESCRIPTION
|
|
56
|
|
57 The AssemblyMapper is a database aware mapper which faciliates
|
|
58 conversion of coordinates between any two coordinate systems with an
|
|
59 relationship explicitly defined in the assembly table. In the future
|
|
60 it may be possible to perform multiple step (implicit) mapping between
|
|
61 coordinate systems.
|
|
62
|
|
63 It is implemented using the Bio::EnsEMBL::Mapper object, which is a
|
|
64 generic mapper object between disjoint coordinate systems.
|
|
65
|
|
66 =head1 METHODS
|
|
67
|
|
68 =cut
|
|
69
|
|
70
|
|
71 package Bio::EnsEMBL::AssemblyMapper;
|
|
72
|
|
73 use strict;
|
|
74 use warnings;
|
|
75
|
|
76 use Bio::EnsEMBL::Mapper;
|
|
77 use Bio::EnsEMBL::Utils::Exception qw(throw deprecate);
|
|
78 use Scalar::Util qw(weaken);
|
|
79
|
|
80 my $ASSEMBLED = 'assembled';
|
|
81 my $COMPONENT = 'component';
|
|
82
|
|
83 my $DEFAULT_MAX_PAIR_COUNT = 1000;
|
|
84
|
|
85
|
|
86 =head2 new
|
|
87
|
|
88 Arg [1] : Bio::EnsEMBL::DBSQL::AssemblyMapperAdaptor
|
|
89 Arg [2] : Bio::EnsEMBL::CoordSystem $asm_cs
|
|
90 Arg [3] : Bio::EnsEMBL::CoordSystem $cmp_cs
|
|
91 Example : Should use AssemblyMapperAdaptor->fetch_by_CoordSystems()
|
|
92 Description: Creates a new AssemblyMapper
|
|
93 Returntype : Bio::EnsEMBL::DBSQL::AssemblyMapperAdaptor
|
|
94 Exceptions : Throws if multiple coord_systems are provided
|
|
95 Caller : AssemblyMapperAdaptor
|
|
96 Status : Stable
|
|
97
|
|
98 =cut
|
|
99
|
|
100 sub new {
|
|
101 my ( $proto, $adaptor, @coord_systems ) = @_;
|
|
102
|
|
103 my $class = ref($proto) || $proto;
|
|
104
|
|
105 my $self = bless( {}, $class );
|
|
106
|
|
107 $self->adaptor($adaptor);
|
|
108
|
|
109 $adaptor->cache_seq_ids_with_mult_assemblys();
|
|
110
|
|
111 if ( @coord_systems != 2 ) {
|
|
112 throw( 'Can only map between two coordinate systems. '
|
|
113 . scalar(@coord_systems)
|
|
114 . ' were provided' );
|
|
115 }
|
|
116
|
|
117 # Set the component and assembled coordinate systems
|
|
118 $self->{'asm_cs'} = $coord_systems[0];
|
|
119 $self->{'cmp_cs'} = $coord_systems[1];
|
|
120
|
|
121 # We load the mapper calling the 'ASSEMBLED' the 'from' coord system
|
|
122 # and the 'COMPONENT' the 'to' coord system.
|
|
123
|
|
124 $self->{'mapper'} = Bio::EnsEMBL::Mapper->new( $ASSEMBLED, $COMPONENT,
|
|
125 $coord_systems[0], $coord_systems[1] );
|
|
126
|
|
127 $self->{'max_pair_count'} = $DEFAULT_MAX_PAIR_COUNT;
|
|
128
|
|
129 return $self;
|
|
130 } ## end sub new
|
|
131
|
|
132 =head2 max_pair_count
|
|
133
|
|
134 Arg [1] : (optional) int $max_pair_count
|
|
135 Example : $mapper->max_pair_count(100000)
|
|
136 Description: Getter/Setter for the number of mapping pairs allowed
|
|
137 in the internal cache. This can be used to override
|
|
138 the default value (1000) to tune the performance and
|
|
139 memory usage for certain scenarios. Higher value
|
|
140 means bigger cache, more memory used.
|
|
141 Return type: int
|
|
142 Exceptions : None
|
|
143 Caller : General
|
|
144 Status : Stable
|
|
145
|
|
146 =cut
|
|
147
|
|
148 sub max_pair_count {
|
|
149 my ( $self, $value ) = @_;
|
|
150
|
|
151 if ( defined($value) ) {
|
|
152 $self->{'max_pair_count'} = $value;
|
|
153 }
|
|
154
|
|
155 return $self->{'max_pair_count'};
|
|
156 }
|
|
157
|
|
158 =head2 register_all
|
|
159
|
|
160 Arg [1] : None
|
|
161 Example : $mapper->max_pair_count(10e6);
|
|
162 $mapper->register_all();
|
|
163 Description: Pre-registers all assembly information in this
|
|
164 mapper. The cache size should be set to a
|
|
165 sufficiently large value so that all of the
|
|
166 information can be stored. This method is useful
|
|
167 when *a lot* of mapping will be done in regions
|
|
168 which are distributed around the genome. After
|
|
169 registration the mapper will consume a lot of memory
|
|
170 but will not have to perform any SQL and will be
|
|
171 faster.
|
|
172 Return type: None
|
|
173 Exceptions : None
|
|
174 Caller : Specialised programs doing a lot of mapping.
|
|
175 Status : Stable
|
|
176
|
|
177 =cut
|
|
178
|
|
179 sub register_all {
|
|
180 my ($self) = @_;
|
|
181
|
|
182 $self->adaptor()->register_all($self);
|
|
183 }
|
|
184
|
|
185 =head2 map
|
|
186
|
|
187 Arg [1] : string $frm_seq_region
|
|
188 The name of the sequence region to transform FROM.
|
|
189 Arg [2] : int $frm_start
|
|
190 The start of the region to transform FROM.
|
|
191 Arg [3] : int $frm_end
|
|
192 The end of the region to transform FROM.
|
|
193 Arg [4] : int $strand
|
|
194 The strand of the region to transform FROM.
|
|
195 Arg [5] : Bio::EnsEMBL::CoordSystem
|
|
196 The coordinate system to transform FROM
|
|
197 Example : @coords =
|
|
198 $asm_mapper->map( 'X', 1_000_000, 2_000_000, 1,
|
|
199 $chr_cs );
|
|
200 Description: Transforms coordinates from one coordinate system to
|
|
201 another.
|
|
202 Return type: List of Bio::EnsEMBL::Mapper::Coordinate and/or
|
|
203 Bio::EnsEMBL::Mapper:Gap objects.
|
|
204 Exceptions : Throws if if the specified TO coordinat system is not
|
|
205 one of the coordinate systems associated with this
|
|
206 assembly mapper.
|
|
207 Caller : General
|
|
208 Status : Stable
|
|
209
|
|
210 =cut
|
|
211
|
|
212 sub map {
|
|
213 throw('Incorrect number of arguments.') if (!( @_ >= 6));
|
|
214
|
|
215 my ( $self, $frm_seq_region_name, $frm_start, $frm_end, $frm_strand,
|
|
216 $frm_cs, $to_slice )
|
|
217 = @_;
|
|
218
|
|
219 my $mapper = $self->{'mapper'};
|
|
220 my $asm_cs = $self->{'asm_cs'};
|
|
221 my $cmp_cs = $self->{'cmp_cs'};
|
|
222 my $adaptor = $self->{'adaptor'};
|
|
223 my $frm;
|
|
224
|
|
225
|
|
226 my $seq_region_id =
|
|
227 $self->adaptor()
|
|
228 ->seq_regions_to_ids( $frm_cs, [$frm_seq_region_name] )->[0];
|
|
229
|
|
230 # Speed critical section:
|
|
231 # Try to do simple pointer equality comparisons of the coord system
|
|
232 # objects first since this is likely to work most of the time and is
|
|
233 # much faster than a function call.
|
|
234
|
|
235 if ( $frm_cs == $cmp_cs
|
|
236 || ( $frm_cs != $asm_cs && $frm_cs->equals($cmp_cs) ) )
|
|
237 {
|
|
238 if ( !$self->{'cmp_register'}->{$seq_region_id} ) {
|
|
239 $adaptor->register_component( $self, $seq_region_id );
|
|
240 }
|
|
241 $frm = $COMPONENT;
|
|
242
|
|
243 } elsif ( $frm_cs == $asm_cs || $frm_cs->equals($asm_cs) ) {
|
|
244
|
|
245 # This can be probably be sped up some by only calling registered
|
|
246 # assembled if needed.
|
|
247 $adaptor->register_assembled( $self, $seq_region_id, $frm_start,
|
|
248 $frm_end );
|
|
249 $frm = $ASSEMBLED;
|
|
250
|
|
251 } else {
|
|
252
|
|
253 throw(
|
|
254 sprintf( "Coordinate system %s %s is neither the assembled "
|
|
255 . "nor the component coordinate system "
|
|
256 . "of this AssemblyMapper",
|
|
257 $frm_cs->name(), $frm_cs->version() ) );
|
|
258
|
|
259 }
|
|
260
|
|
261 return
|
|
262 $mapper->map_coordinates( $seq_region_id, $frm_start, $frm_end,
|
|
263 $frm_strand, $frm );
|
|
264 } ## end sub map
|
|
265
|
|
266
|
|
267 =head2 flush
|
|
268
|
|
269 Args : None
|
|
270 Example : None
|
|
271 Description: Remove all cached items from this AssemblyMapper.
|
|
272 Return type: None
|
|
273 Exceptions : None
|
|
274 Caller : AssemblyMapperAdaptor
|
|
275 Status : Stable
|
|
276
|
|
277 =cut
|
|
278
|
|
279 sub flush {
|
|
280 my ($self) = @_;
|
|
281
|
|
282 $self->{'mapper'}->flush();
|
|
283 $self->{'cmp_register'} = {};
|
|
284 $self->{'asm_register'} = {};
|
|
285 }
|
|
286
|
|
287 =head2 size
|
|
288
|
|
289 Args : None
|
|
290 Example : $num_of_pairs = $mapper->size();
|
|
291 Description: Returns the number of pairs currently stored.
|
|
292 Return type: int
|
|
293 Exceptions : None
|
|
294 Caller : General
|
|
295 Status : Stable
|
|
296
|
|
297 =cut
|
|
298
|
|
299 sub size {
|
|
300 my ($self) = @_;
|
|
301
|
|
302 return $self->{'mapper'}->{'pair_count'};
|
|
303 }
|
|
304
|
|
305 =head2 fastmap
|
|
306
|
|
307 Arg [1] : string $frm_seq_region
|
|
308 The name of the sequence region to transform FROM.
|
|
309 Arg [2] : int $frm_start
|
|
310 The start of the region to transform FROM.
|
|
311 Arg [3] : int $frm_end
|
|
312 The end of the region to transform FROM.
|
|
313 Arg [4] : int $strand
|
|
314 The strand of the region to transform FROM.
|
|
315 Arg [5] : Bio::EnsEMBL::CoordSystem
|
|
316 The coordinate system to transform FROM.
|
|
317 Example : @coords =
|
|
318 $asm_mapper->map( 'X', 1_000_000, 2_000_000, 1,
|
|
319 $chr_cs );
|
|
320 Description: Transforms coordinates from one coordinate system to
|
|
321 another.
|
|
322 Return type: List of Bio::EnsEMBL::Mapper::Coordinate and/or
|
|
323 Bio::EnsEMBL::Mapper:Gap objects.
|
|
324 Exceptions : Throws if the specified TO coordinat system is not
|
|
325 one of the coordinate systems associated with this
|
|
326 assembly mapper.
|
|
327 Caller : General
|
|
328 Status : Stable
|
|
329
|
|
330 =cut
|
|
331
|
|
332 sub fastmap {
|
|
333 if ( @_ != 6 ) {
|
|
334 throw('Incorrect number of arguments.');
|
|
335 }
|
|
336
|
|
337 my ( $self, $frm_seq_region_name, $frm_start, $frm_end, $frm_strand,
|
|
338 $frm_cs )
|
|
339 = @_;
|
|
340
|
|
341 my $mapper = $self->{'mapper'};
|
|
342 my $asm_cs = $self->{'asm_cs'};
|
|
343 my $cmp_cs = $self->{'cmp_cs'};
|
|
344 my $adaptor = $self->adaptor();
|
|
345 my $frm;
|
|
346
|
|
347 my @tmp;
|
|
348 push @tmp, $frm_seq_region_name;
|
|
349
|
|
350 my $seq_region_id =
|
|
351 $self->adaptor()->seq_regions_to_ids( $frm_cs, \@tmp )->[0];
|
|
352
|
|
353 # Speed critical section:
|
|
354 # Try to do simple pointer equality comparisons of the coord system
|
|
355 # objects first since this is likely to work most of the time and is
|
|
356 # much faster than a function call.
|
|
357
|
|
358 if ( $frm_cs == $cmp_cs
|
|
359 || ( $frm_cs != $asm_cs && $frm_cs->equals($cmp_cs) ) )
|
|
360 {
|
|
361
|
|
362 if ( !$self->{'cmp_register'}->{$seq_region_id} ) {
|
|
363 $adaptor->register_component( $self, $seq_region_id );
|
|
364 }
|
|
365 $frm = $COMPONENT;
|
|
366
|
|
367 } elsif ( $frm_cs == $asm_cs || $frm_cs->equals($asm_cs) ) {
|
|
368
|
|
369 # This can be probably be sped up some by only calling registered
|
|
370 # assembled if needed
|
|
371 $adaptor->register_assembled( $self, $seq_region_id, $frm_start,
|
|
372 $frm_end );
|
|
373 $frm = $ASSEMBLED;
|
|
374
|
|
375 } else {
|
|
376
|
|
377 throw(
|
|
378 sprintf( "Coordinate system %s %s is neither the assembled "
|
|
379 . "nor the component coordinate system "
|
|
380 . "of this AssemblyMapper",
|
|
381 $frm_cs->name(), $frm_cs->version() ) );
|
|
382
|
|
383 }
|
|
384
|
|
385 return
|
|
386 $mapper->fastmap( $seq_region_id, $frm_start, $frm_end, $frm_strand,
|
|
387 $frm );
|
|
388 } ## end sub fastmap
|
|
389
|
|
390 =head2 list_ids
|
|
391
|
|
392 Arg [1] : string $frm_seq_region
|
|
393 The name of the sequence region of interest.
|
|
394 Arg [2] : int $frm_start
|
|
395 The start of the region of interest.
|
|
396 Arg [3] : int $frm_end
|
|
397 The end of the region to transform of interest.
|
|
398 Arg [5] : Bio::EnsEMBL::CoordSystem $frm_cs
|
|
399 The coordinate system to obtain overlapping IDs of.
|
|
400 Example : foreach my $id (
|
|
401 $asm_mapper->list_ids( 'X', 1, 1000, $ctg_cs ) )
|
|
402 { ... }
|
|
403 Description: Retrieves a list of overlapping seq_region names of
|
|
404 another coordinate system. This is the same as the
|
|
405 list_ids method but uses seq_region names rather
|
|
406 internal IDs.
|
|
407 Return type: List of strings.
|
|
408 Exceptions : None
|
|
409 Caller : General
|
|
410 Status : Stable
|
|
411
|
|
412 =cut
|
|
413
|
|
414 sub list_ids {
|
|
415 if ( @_ != 5 ) {
|
|
416 throw('Incorrect number of arguments.');
|
|
417 }
|
|
418
|
|
419 my ( $self, $frm_seq_region_name, $frm_start, $frm_end, $frm_cs ) =
|
|
420 @_;
|
|
421
|
|
422 my @tmp = ($frm_seq_region_name);
|
|
423
|
|
424 my $seq_region_id =
|
|
425 $self->adaptor()->seq_regions_to_ids( $frm_cs, \@tmp )->[0];
|
|
426
|
|
427 if ( $frm_cs->equals( $self->component_CoordSystem() ) ) {
|
|
428
|
|
429 if ( !$self->have_registered_component($seq_region_id) ) {
|
|
430 $self->adaptor->register_component( $self, $seq_region_id );
|
|
431 }
|
|
432
|
|
433 # Pull out the 'from' identifiers of the mapper pairs. The we
|
|
434 # loaded the assembled side as the 'from' side in the constructor.
|
|
435
|
|
436 return
|
|
437 map ( { $_->from()->id() }
|
|
438 $self->mapper()->list_pairs(
|
|
439 $seq_region_id, $frm_start, $frm_end, $COMPONENT
|
|
440 ) );
|
|
441
|
|
442 } elsif ( $frm_cs->equals( $self->assembled_CoordSystem() ) ) {
|
|
443
|
|
444 $self->adaptor->register_assembled( $self, $seq_region_id,
|
|
445 $frm_start, $frm_end );
|
|
446
|
|
447 # Pull out the 'to' identifiers of the mapper pairs we loaded the
|
|
448 # component side as the 'to' coord system in the constructor.
|
|
449
|
|
450 return
|
|
451 map ( { $_->to->id() }
|
|
452 $self->mapper()->list_pairs(
|
|
453 $seq_region_id, $frm_start, $frm_end, $ASSEMBLED
|
|
454 ) );
|
|
455
|
|
456 } else {
|
|
457
|
|
458 throw(
|
|
459 sprintf( "Coordinate system %s %s is neither the assembled "
|
|
460 . "nor the component coordinate system "
|
|
461 . "of this AssemblyMapper",
|
|
462 $frm_cs->name(), $frm_cs->version() ) );
|
|
463
|
|
464 }
|
|
465 } ## end sub list_ids
|
|
466
|
|
467 #sub list_seq_regions {
|
|
468 # throw('Incorrect number of arguments.') if(@_ != 5);
|
|
469 # my($self, $frm_seq_region_name, $frm_start, $frm_end, $frm_cs) = @_;
|
|
470
|
|
471 # if($frm_cs->equals($self->component_CoordSystem())) {
|
|
472
|
|
473 # if(!$self->have_registered_component($seq_region_id)) {
|
|
474 # $self->adaptor->register_component($self,$seq_region_id);
|
|
475 # }
|
|
476
|
|
477 # #pull out the 'from' identifiers of the mapper pairs. The
|
|
478 # #we loaded the assembled side as the 'from' side in the constructor
|
|
479 # return
|
|
480 # map {$_->from()->id()}
|
|
481 # $self->mapper()->list_pairs($seq_region_id, $frm_start,
|
|
482 # $frm_end, $COMPONENT);
|
|
483
|
|
484 # } elsif($frm_cs->equals($self->assembled_CoordSystem())) {
|
|
485
|
|
486 # $self->adaptor->register_assembled($self,
|
|
487 # $frm_seq_region,$frm_start,$frm_end);
|
|
488
|
|
489 # #pull out the 'to' identifiers of the mapper pairs
|
|
490 # #we loaded the component side as the 'to' coord system in the constructor
|
|
491 # return
|
|
492 # map {$_->to->id()}
|
|
493 # $self->mapper()->list_pairs($frm_seq_region, $frm_start,
|
|
494 # $frm_end, $ASSEMBLED);
|
|
495 # } else {
|
|
496 # throw("Coordinate system " . $frm_cs->name . " " . $frm_cs->version .
|
|
497 # " is neither the assembled nor the component coordinate system " .
|
|
498 # " of this AssemblyMapper");
|
|
499 # }
|
|
500 #}
|
|
501
|
|
502
|
|
503 =head2 list_seq_regions
|
|
504
|
|
505 Arg [1] : string $frm_seq_region
|
|
506 The name of the sequence region of interest.
|
|
507 Arg [2] : int $frm_start
|
|
508 The start of the region of interest.
|
|
509 Arg [3] : int $frm_end
|
|
510 The end of the region to transform of interest.
|
|
511 Arg [5] : Bio::EnsEMBL::CoordSystem $frm_cs
|
|
512 The coordinate system to obtain overlapping IDs of.
|
|
513 Example : foreach my $id (
|
|
514 $asm_mapper->list_seq_regions(
|
|
515 'X', 1, 1000, $chr_cs
|
|
516 ) ) { ... }
|
|
517 Description: Retrieves a list of overlapping seq_region internal
|
|
518 identifiers of another coordinate system. This is
|
|
519 the same as the list_seq_regions method but uses
|
|
520 internal identfiers rather than seq_region strings.
|
|
521 Return type: List of ints.
|
|
522 Exceptions : None
|
|
523 Caller : General
|
|
524 Status : Stable
|
|
525
|
|
526 =cut
|
|
527
|
|
528 sub list_seq_regions {
|
|
529 if ( @_ != 5 ) {
|
|
530 throw('Incorrect number of arguments.');
|
|
531 }
|
|
532
|
|
533 my ( $self, $frm_seq_region, $frm_start, $frm_end, $frm_cs ) = @_;
|
|
534
|
|
535 # Retrieve the seq_region names.
|
|
536
|
|
537 my @seq_ids =
|
|
538 $self->list_ids( $frm_seq_region, $frm_start, $frm_end, $frm_cs );
|
|
539
|
|
540 # The seq_regions are from the 'to' coordinate system not the from
|
|
541 # coordinate system we used to obtain them.
|
|
542
|
|
543 my $to_cs;
|
|
544 if ( $frm_cs->equals( $self->assembled_CoordSystem() ) ) {
|
|
545 $to_cs = $self->component_CoordSystem();
|
|
546 } else {
|
|
547 $to_cs = $self->assembled_CoordSystem();
|
|
548 }
|
|
549
|
|
550 # Convert them to IDs.
|
|
551 return @{ $self->adaptor()->seq_ids_to_regions( \@seq_ids ) };
|
|
552 }
|
|
553
|
|
554 #sub list_ids {
|
|
555 # throw('Incorrect number of arguments.') if(@_ != 5);
|
|
556 # my($self, $frm_seq_region, $frm_start, $frm_end, $frm_cs) = @_;
|
|
557
|
|
558 # #retrieve the seq_region names
|
|
559 # my @seq_regs =
|
|
560 # $self->list_seq_regions($frm_seq_region,$frm_start,$frm_end,$frm_cs);
|
|
561
|
|
562 # #The seq_regions are from the 'to' coordinate system not the
|
|
563 # #from coordinate system we used to obtain them
|
|
564 # my $to_cs;
|
|
565 # if($frm_cs->equals($self->assembled_CoordSystem())) {
|
|
566 # $to_cs = $self->component_CoordSystem();
|
|
567 # } else {
|
|
568 # $to_cs = $self->assembled_CoordSystem();
|
|
569 # }
|
|
570
|
|
571 # #convert them to ids
|
|
572 # return @{$self->adaptor()->seq_regions_to_ids($to_cs, \@seq_regs)};
|
|
573 #}
|
|
574
|
|
575 =head2 have_registered_component
|
|
576
|
|
577 Arg [1] : string $cmp_seq_region
|
|
578 The name of the sequence region to check for
|
|
579 registration.
|
|
580 Example : if ( $asm_mapper->have_registered_component('AL240214.1') ) {}
|
|
581 Description: Returns true if a given component region has
|
|
582 been registered with this assembly mapper. This
|
|
583 should only be called by this class or the
|
|
584 AssemblyMapperAdaptor. In other words, do not use
|
|
585 this method unless you really know what you are
|
|
586 doing.
|
|
587 Return type: Boolean (0 or 1)
|
|
588 Exceptions : Throws on incorrect arguments.
|
|
589 Caller : Internal, AssemblyMapperAdaptor
|
|
590 Status : Stable
|
|
591
|
|
592 =cut
|
|
593
|
|
594 sub have_registered_component {
|
|
595 my ( $self, $cmp_seq_region ) = @_;
|
|
596
|
|
597 if ( !defined($cmp_seq_region) ) {
|
|
598 throw('cmp_seq_region argument is required');
|
|
599 }
|
|
600
|
|
601 if ( exists( $self->{'cmp_register'}->{$cmp_seq_region} ) ) {
|
|
602 return 1;
|
|
603 }
|
|
604
|
|
605 return 0;
|
|
606 }
|
|
607
|
|
608 =head2 have_registered_assembled
|
|
609
|
|
610 Arg [1] : string $asm_seq_region
|
|
611 The name of the sequence region to check for
|
|
612 registration.
|
|
613 Arg [2] : int $chunk_id
|
|
614 The chunk number of the provided seq_region to check
|
|
615 for registration.
|
|
616 Example : if ( $asm_mapper->have_registered_component( 'X', 9 ) ) { }
|
|
617 Description: Returns true if a given assembled region chunk
|
|
618 has been registered with this assembly mapper.
|
|
619 This should only be called by this class or the
|
|
620 AssemblyMapperAdaptor. In other words, do not use
|
|
621 this method unless you really know what you are
|
|
622 doing.
|
|
623 Return type: Boolean (0 or 1)
|
|
624 Exceptions : Throws on incorrect arguments
|
|
625 Caller : Internal, AssemblyMapperAdaptor
|
|
626 Status : Stable
|
|
627
|
|
628 =cut
|
|
629
|
|
630 sub have_registered_assembled {
|
|
631 my ( $self, $asm_seq_region, $chunk_id ) = @_;
|
|
632
|
|
633 if ( !defined($asm_seq_region) ) {
|
|
634 throw('asm_seq_region argument is required');
|
|
635 }
|
|
636 if ( !defined($chunk_id) ) {
|
|
637 throw('chunk_id is required');
|
|
638 }
|
|
639
|
|
640 if (
|
|
641 exists( $self->{'asm_register'}->{$asm_seq_region}->{$chunk_id} ) )
|
|
642 {
|
|
643 return 1;
|
|
644 }
|
|
645
|
|
646 return 0;
|
|
647 }
|
|
648
|
|
649
|
|
650 =head2 register_component
|
|
651
|
|
652 Arg [1] : integer $cmp_seq_region
|
|
653 The dbID of the component sequence region to
|
|
654 register.
|
|
655 Example : $asm_mapper->register_component('AL312341.1');
|
|
656 Description: Flags a given component sequence region as registered
|
|
657 in this assembly mapper. This should only be called
|
|
658 by this class or the AssemblyMapperAdaptor.
|
|
659 Return type: None
|
|
660 Exceptions : Throws on incorrect arguments
|
|
661 Caller : Internal, AssemblyMapperAdaptor
|
|
662 Status : Stable
|
|
663
|
|
664 =cut
|
|
665
|
|
666 sub register_component {
|
|
667 my ( $self, $cmp_seq_region ) = @_;
|
|
668
|
|
669 if ( !defined($cmp_seq_region) ) {
|
|
670 throw('cmp_seq_region argument is required');
|
|
671 }
|
|
672
|
|
673 $self->{'cmp_register'}->{$cmp_seq_region} = 1;
|
|
674 }
|
|
675
|
|
676 =head2 register_assembled
|
|
677
|
|
678 Arg [1] : integer $asm_seq_region
|
|
679 The dbID of the sequence region to register.
|
|
680 Arg [2] : int $chunk_id
|
|
681 The chunk number of the provided seq_region to register.
|
|
682 Example : $asm_mapper->register_assembled( 'X', 4 );
|
|
683 Description: Flags a given assembled region as registered in this
|
|
684 assembly mapper. This should only be called by this
|
|
685 class or the AssemblyMapperAdaptor. Do not call this
|
|
686 method unless you really know what you are doing.
|
|
687 Return type: None
|
|
688 Exceptions : Throws on incorrect arguments
|
|
689 Caller : Internal, AssemblyMapperAdaptor
|
|
690 Status : Stable
|
|
691
|
|
692 =cut
|
|
693
|
|
694 sub register_assembled {
|
|
695 my ( $self, $asm_seq_region, $chunk_id ) = @_;
|
|
696
|
|
697 if ( !defined($asm_seq_region) ) {
|
|
698 throw('asm_seq_region argument is required');
|
|
699 }
|
|
700 if ( !defined($chunk_id) ) {
|
|
701 throw('chunk_id srgument is required');
|
|
702 }
|
|
703
|
|
704 $self->{'asm_register'}->{$asm_seq_region}->{$chunk_id} = 1;
|
|
705 }
|
|
706
|
|
707 =head2 mapper
|
|
708
|
|
709 Arg [1] : None
|
|
710 Example : $mapper = $asm_mapper->mapper();
|
|
711 Description: Retrieves the internal mapper used by this Assembly
|
|
712 Mapper. This is unlikely to be useful unless you
|
|
713 _really_ know what you are doing.
|
|
714 Return type: Bio::EnsEMBL::Mapper
|
|
715 Exceptions : None
|
|
716 Caller : Internal, AssemblyMapperAdaptor
|
|
717 Status : Stable
|
|
718
|
|
719 =cut
|
|
720
|
|
721 sub mapper {
|
|
722 my ($self) = @_;
|
|
723
|
|
724 return $self->{'mapper'};
|
|
725 }
|
|
726
|
|
727 =head2 assembled_CoordSystem
|
|
728
|
|
729 Arg [1] : None
|
|
730 Example : $cs = $asm_mapper->assembled_CoordSystem();
|
|
731 Description: Retrieves the assembled CoordSystem from this
|
|
732 assembly mapper.
|
|
733 Return type: Bio::EnsEMBL::CoordSystem
|
|
734 Exceptions : None
|
|
735 Caller : Internal, AssemblyMapperAdaptor
|
|
736 Status : Stable
|
|
737
|
|
738 =cut
|
|
739
|
|
740 sub assembled_CoordSystem {
|
|
741 my ($self) = @_;
|
|
742
|
|
743 return $self->{'asm_cs'};
|
|
744 }
|
|
745
|
|
746 =head2 component_CoordSystem
|
|
747
|
|
748 Arg [1] : None
|
|
749 Example : $cs = $asm_mapper->component_CoordSystem();
|
|
750 Description: Retrieves the component CoordSystem from this
|
|
751 assembly mapper.
|
|
752 Return type: Bio::EnsEMBL::CoordSystem
|
|
753 Exceptions : None
|
|
754 Caller : Internal, AssemblyMapperAdaptor
|
|
755 Status : Stable
|
|
756
|
|
757 =cut
|
|
758
|
|
759 sub component_CoordSystem {
|
|
760 my ($self) = @_;
|
|
761
|
|
762 return $self->{'cmp_cs'};
|
|
763 }
|
|
764
|
|
765 =head2 adaptor
|
|
766
|
|
767 Arg [1] : Bio::EnsEMBL::DBSQL::AssemblyMapperAdaptor $adaptor
|
|
768 Description: Getter/set terfor this object's database adaptor.
|
|
769 Returntype : Bio::EnsEMBL::DBSQL::AssemblyMapperAdaptor
|
|
770 Exceptions : None
|
|
771 Caller : General
|
|
772 Status : Stable
|
|
773
|
|
774 =cut
|
|
775
|
|
776 sub adaptor {
|
|
777 my ( $self, $value ) = @_;
|
|
778
|
|
779 if ( defined($value) ) {
|
|
780 weaken($self->{'adaptor'} = $value);
|
|
781 }
|
|
782
|
|
783 return $self->{'adaptor'};
|
|
784 }
|
|
785
|
|
786 =head2 in_assembly
|
|
787
|
|
788 Description: DEPRECATED, use map() or list_ids() instead.
|
|
789
|
|
790 =cut
|
|
791
|
|
792 sub in_assembly {
|
|
793 my ( $self, $object ) = @_;
|
|
794
|
|
795 deprecate('Use map() or list_ids() instead.');
|
|
796
|
|
797 my $csa = $self->db->get_CoordSystemAdaptor();
|
|
798
|
|
799 my $top_level = $csa->fetch_top_level();
|
|
800
|
|
801 my $asma =
|
|
802 $self->adaptor->fetch_by_CoordSystems( $object->coord_system(),
|
|
803 $top_level );
|
|
804
|
|
805 my @list = $asma->list_ids( $object->seq_region(),
|
|
806 $object->start(),
|
|
807 $object->end(),
|
|
808 $object->coord_system() );
|
|
809
|
|
810 return ( @list > 0 );
|
|
811 }
|
|
812
|
|
813 =head2 map_coordinates_to_assembly
|
|
814
|
|
815 Description: DEPRECATED, use map() instead.
|
|
816
|
|
817 =cut
|
|
818
|
|
819 sub map_coordinates_to_assembly {
|
|
820 my ( $self, $contig_id, $start, $end, $strand ) = @_;
|
|
821
|
|
822 deprecate('Use map() instead.');
|
|
823
|
|
824 # Not sure if contig_id is seq_region_id or name...
|
|
825 return
|
|
826 $self->map( $contig_id, $start, $end, $strand,
|
|
827 $self->contig_CoordSystem() );
|
|
828
|
|
829 }
|
|
830
|
|
831 =head2 fast_to_assembly
|
|
832
|
|
833 Description: DEPRECATED, use map() instead.
|
|
834
|
|
835 =cut
|
|
836
|
|
837 sub fast_to_assembly {
|
|
838 my ( $self, $contig_id, $start, $end, $strand ) = @_;
|
|
839
|
|
840 deprecate('Use map() instead.');
|
|
841
|
|
842 # Not sure if contig_id is seq_region_id or name...
|
|
843 return
|
|
844 $self->map( $contig_id, $start, $end, $strand,
|
|
845 $self->contig_CoordSystem() );
|
|
846 }
|
|
847
|
|
848 =head2 map_coordinates_to_rawcontig
|
|
849
|
|
850 Description: DEPRECATED, use map() instead.
|
|
851
|
|
852 =cut
|
|
853
|
|
854 sub map_coordinates_to_rawcontig {
|
|
855 my ( $self, $chr_name, $start, $end, $strand ) = @_;
|
|
856
|
|
857 deprecate('Use map() instead.');
|
|
858
|
|
859 return
|
|
860 $self->map( $chr_name, $start, $end, $strand,
|
|
861 $self->assembled_CoordSystem() );
|
|
862 }
|
|
863
|
|
864 =head2 list_contig_ids
|
|
865
|
|
866 Description: DEPRECATED, use list_ids() instead.
|
|
867
|
|
868 =cut
|
|
869
|
|
870 sub list_contig_ids {
|
|
871 my ( $self, $chr_name, $start, $end ) = @_;
|
|
872
|
|
873 deprecate('Use list_ids() instead.');
|
|
874
|
|
875 return
|
|
876 $self->list_ids( $chr_name, $start, $end,
|
|
877 $self->assembled_CoordSystem() );
|
|
878 }
|
|
879
|
|
880 1;
|