0
|
1 =head1 LICENSE
|
|
2
|
|
3 Copyright (c) 1999-2012 The European Bioinformatics Institute and
|
|
4 Genome Research Limited. All rights reserved.
|
|
5
|
|
6 This software is distributed under a modified Apache license.
|
|
7 For license details, please see
|
|
8
|
|
9 http://www.ensembl.org/info/about/code_licence.html
|
|
10
|
|
11 =head1 CONTACT
|
|
12
|
|
13 Please email comments or questions to the public Ensembl
|
|
14 developers list at <dev@ensembl.org>.
|
|
15
|
|
16 Questions may also be sent to the Ensembl help desk at
|
|
17 <helpdesk@ensembl.org>.
|
|
18
|
|
19 =cut
|
|
20
|
|
21 =head1 NAME
|
|
22
|
|
23 Bio::EnsEMBL::TopLevelAssemblyMapper -
|
|
24 Handles mapping between a given coordinate system and the toplevel
|
|
25 pseudo coordinate system.
|
|
26
|
|
27 =head1 SYNOPSIS
|
|
28
|
|
29 $db = Bio::EnsEMBL::DBSQL::DBAdaptor->new(...);
|
|
30 $asma = $db->get_AssemblyMapperAdaptor();
|
|
31 $csa = $db->get_CoordSystemAdaptor();
|
|
32
|
|
33 my $toplevel = $cs_adaptor->fetch_by_name('toplevel');
|
|
34 my $ctg_cs = $cs_adaptor->fetch_by_name('contig');
|
|
35
|
|
36 $asm_mapper = $map_adaptor->fetch_by_CoordSystems( $cs1, $cs2 );
|
|
37
|
|
38 # map to toplevel coord system for this region
|
|
39 @chr_coords =
|
|
40 $asm_mapper->map( 'AL30421.1.200.92341', 100, 10000, -1, $ctg_cs );
|
|
41
|
|
42 # list toplevel seq_region_ids for this region
|
|
43 @chr_ids =
|
|
44 $asm_mapper->list_ids( 'AL30421.1.200.92341', 1, 1000, -1,
|
|
45 $ctg_cs );
|
|
46
|
|
47 =head1 DESCRIPTION
|
|
48
|
|
49 The TopLevelAssemblyMapper performs mapping between a provided
|
|
50 coordinate system and the toplevel pseudo cooordinate system. The
|
|
51 toplevel coordinate system is not a real coordinate system, but
|
|
52 represents the highest coordinate system that can be mapped to in a
|
|
53 given region. It is only possible to perform unidirectional mapping
|
|
54 using this mapper, because it does not make sense to map from the
|
|
55 toplevel coordinate system to another coordinate system.
|
|
56
|
|
57 =head1 METHODS
|
|
58
|
|
59 =cut
|
|
60
|
|
61
|
|
62 use strict;
|
|
63 use warnings;
|
|
64
|
|
65 package Bio::EnsEMBL::TopLevelAssemblyMapper;
|
|
66
|
|
67 use Bio::EnsEMBL::Utils::Exception qw(throw);
|
|
68 use Bio::EnsEMBL::Mapper;
|
|
69 use Bio::EnsEMBL::CoordSystem;
|
|
70 use Scalar::Util qw(weaken);
|
|
71
|
|
72 =head2 new
|
|
73
|
|
74 Arg [1] : Bio::EnsEMBL::DBAdaptor $dbadaptor the adaptor for
|
|
75 the database this mapper is using.
|
|
76 Arg [2] : Toplevel CoordSystem
|
|
77 Arg [3] : Other CoordSystem
|
|
78 Description: Creates a new TopLevelAssemblyMapper object
|
|
79 Returntype : Bio::EnsEMBL::DBSQL::TopLevelAssemblyMapper
|
|
80 Exceptions : throws if any of the 3 arguments are missing/ not
|
|
81 : of the correct type.
|
|
82 Caller : Bio::EnsEMBL::DBSQL::AssemblyMapperAdaptor
|
|
83 Status : Stable
|
|
84
|
|
85 =cut
|
|
86
|
|
87
|
|
88 sub new {
|
|
89 my ($caller, $adaptor, $toplevel_cs, $other_cs) = @_;
|
|
90
|
|
91 my $class = ref($caller) || $caller;
|
|
92
|
|
93 if(!ref($toplevel_cs)) {
|
|
94 throw('Toplevel CoordSystem argument expected.');
|
|
95 }
|
|
96 if(!ref($other_cs)) {
|
|
97 throw('Other CoordSystem argument expected.');
|
|
98 }
|
|
99
|
|
100 if(!$toplevel_cs->is_top_level()) {
|
|
101 throw($toplevel_cs->name() . " is not the toplevel CoordSystem.");
|
|
102 }
|
|
103 if($other_cs->is_top_level()) {
|
|
104 throw("Other CoordSystem argument should not be toplevel CoordSystem.");
|
|
105 }
|
|
106
|
|
107 my $cs_adaptor = $adaptor->db()->get_CoordSystemAdaptor();
|
|
108 my $coord_systems = $cs_adaptor->fetch_all();
|
|
109
|
|
110 my $self = bless {'coord_systems' => $coord_systems,
|
|
111 'toplevel_cs' => $toplevel_cs,
|
|
112 'other_cs' => $other_cs}, $class;
|
|
113
|
|
114 $self->adaptor($adaptor);
|
|
115 return $self;
|
|
116 }
|
|
117
|
|
118 sub adaptor {
|
|
119 my $self = shift;
|
|
120
|
|
121 weaken($self->{'adaptor'} = shift) if(@_);
|
|
122
|
|
123 return $self->{'adaptor'};
|
|
124 }
|
|
125
|
|
126 =head2 map
|
|
127
|
|
128 Arg [1] : string $frm_seq_region
|
|
129 The name of the sequence region to transform FROM
|
|
130 Arg [2] : int $frm_start
|
|
131 The start of the region to transform FROM
|
|
132 Arg [3] : int $frm_end
|
|
133 The end of the region to transform FROM
|
|
134 Arg [4] : int $strand
|
|
135 The strand of the region to transform FROM
|
|
136 Arg [5] : Bio::EnsEMBL::CoordSystem
|
|
137 The coordinate system to transform FROM
|
|
138 Arg [6] : if set will do a fastmap
|
|
139 Example : @coords = $mapper->map('X', 1_000_000, 2_000_000,
|
|
140 1, $chr_cs);
|
|
141 Description: Transforms coordinates from one coordinate system
|
|
142 to another.
|
|
143 Returntype : List of Bio::EnsEMBL::Mapper::Coordinate and/or
|
|
144 Bio::EnsEMBL::Mapper:Gap objects
|
|
145 Exceptions : thrown if if the specified TO coordinate system is not one
|
|
146 of the coordinate systems associated with this mapper
|
|
147 Caller : general
|
|
148 Status : Stable
|
|
149
|
|
150 =cut
|
|
151
|
|
152
|
|
153 sub map {
|
|
154 throw('Incorrect number of arguments.') if(@_ != 6 && @_ != 7);
|
|
155
|
|
156 my($self, $frm_seq_region_name, $frm_start, $frm_end, $frm_strand, $frm_cs,
|
|
157 $fastmap) = @_;
|
|
158
|
|
159 if($frm_cs->is_top_level()) {
|
|
160 throw("The toplevel CoordSystem can only be mapped TO, not FROM.");
|
|
161 }
|
|
162
|
|
163 my @tmp;
|
|
164 push @tmp, $frm_seq_region_name;
|
|
165 my $seq_region_id = @{$self->adaptor()->seq_regions_to_ids($frm_cs, \@tmp)}[0];
|
|
166
|
|
167 my $mapper = $self->{'mapper'};
|
|
168 my $toplevel_cs = $self->{'toplevel_cs'};
|
|
169 my $other_cs = $self->{'other_cs'};
|
|
170 my $adaptor = $self->adaptor;
|
|
171
|
|
172 if($frm_cs != $other_cs && !$frm_cs->equals($other_cs)) {
|
|
173 throw("Coordinate system " . $frm_cs->name . " " . $frm_cs->version .
|
|
174 " is neither the assembled nor the component coordinate system " .
|
|
175 " of this AssemblyMapper");
|
|
176 }
|
|
177
|
|
178 my $coord_systems = $self->{'coord_systems'};
|
|
179
|
|
180 my $csa = $self->adaptor()->db()->get_CoordSystemAdaptor();
|
|
181
|
|
182 #
|
|
183 # TBD try to make this more efficient
|
|
184 #
|
|
185 my $from_rank = $other_cs->rank();
|
|
186 foreach my $cs (@$coord_systems) {
|
|
187 last if($cs->rank >= $from_rank);
|
|
188
|
|
189 #check if a mapping path even exists to this coordinate system
|
|
190 my @mapping_path = @{ $csa->get_mapping_path( $cs, $other_cs ) };
|
|
191
|
|
192 if(@mapping_path) {
|
|
193
|
|
194 # Try to map to this coord system. If we get back any coordinates then
|
|
195 # it is our 'toplevel' that we were looking for
|
|
196 my $mapper = $adaptor->fetch_by_CoordSystems($other_cs, $cs);
|
|
197
|
|
198 if($fastmap) {
|
|
199 my @result = $mapper->fastmap($frm_seq_region_name, $frm_start, $frm_end,
|
|
200 $frm_strand, $frm_cs);
|
|
201 return @result if(@result);
|
|
202 } else {
|
|
203 my @coords = $mapper->map($frm_seq_region_name, $frm_start, $frm_end,
|
|
204 $frm_strand, $frm_cs);
|
|
205
|
|
206 if(@coords > 1 || !$coords[0]->isa('Bio::EnsEMBL::Mapper::Gap')) {
|
|
207 return @coords;
|
|
208 }
|
|
209 }
|
|
210 }
|
|
211 }
|
|
212
|
|
213 # the toplevel coordinate system for the region requested *is* the
|
|
214 # requested region.
|
|
215 if($fastmap) {
|
|
216 return ($seq_region_id,$frm_start, $frm_end, $frm_strand, $other_cs);
|
|
217 }
|
|
218 return Bio::EnsEMBL::Mapper::Coordinate->new
|
|
219 ($seq_region_id,$frm_start,$frm_end, $frm_strand, $other_cs);
|
|
220 }
|
|
221
|
|
222 #
|
|
223 # for polymorphism with AssemblyMapper
|
|
224 #
|
|
225 =head2 flush
|
|
226
|
|
227 Args : none
|
|
228 Example : none
|
|
229 Description: polymorphism with AssemblyMapper, does nothing
|
|
230 Returntype : none
|
|
231 Exceptions : none
|
|
232 Status : Stable
|
|
233
|
|
234 =cut
|
|
235
|
|
236 sub flush {}
|
|
237
|
|
238 =head2 fastmap
|
|
239
|
|
240 Arg [1] : string $frm_seq_region
|
|
241 The name of the sequence region to transform FROM
|
|
242 Arg [2] : int $frm_start
|
|
243 The start of the region to transform FROM
|
|
244 Arg [3] : int $frm_end
|
|
245 The end of the region to transform FROM
|
|
246 Arg [4] : int $strand
|
|
247 The strand of the region to transform FROM
|
|
248 Arg [5] : Bio::EnsEMBL::CoordSystem
|
|
249 The coordinate system to transform FROM
|
|
250 Example : @coords = $mapper->fastmap('X', 1_000_000, 2_000_000,
|
|
251 1, $chr_cs);
|
|
252 Description: Transforms coordinates from one coordinate system
|
|
253 to another.
|
|
254 Returntype : List of Bio::EnsEMBL::Mapper::Coordinate and/or
|
|
255 Bio::EnsEMBL::Mapper:Gap objects
|
|
256 Exceptions : thrown if if the specified TO coordinate system is not one
|
|
257 of the coordinate systems associated with this mapper
|
|
258 Caller : general
|
|
259 Status : Stable
|
|
260
|
|
261 =cut
|
|
262
|
|
263 sub fastmap {
|
|
264 my $self = shift;
|
|
265 return $self->map(@_,1);
|
|
266 }
|
|
267
|
|
268 =head2 assembled_CoordSystem
|
|
269
|
|
270 Arg [1] : none
|
|
271 Example : $cs = $mapper->assembled_CoordSystem
|
|
272 Description: Retrieves the assembled CoordSystem from this mapper
|
|
273 Returntype : Bio::EnsEMBL::CoordSystem
|
|
274 Exceptions : none
|
|
275 Caller : internal, AssemblyMapperAdaptor
|
|
276 Status : Stable
|
|
277
|
|
278 =cut
|
|
279
|
|
280 sub assembled_CoordSystem {
|
|
281 my $self = shift;
|
|
282 return $self->{'toplevel_cs'};
|
|
283 }
|
|
284
|
|
285 =head2 component_CoordSystem
|
|
286
|
|
287 Arg [1] : none
|
|
288 Example : $cs = $mapper->component_CoordSystem
|
|
289 Description: Retrieves the component CoordSystem from this mapper
|
|
290 Returntype : Bio::EnsEMBL::CoordSystem
|
|
291 Exceptions : none
|
|
292 Caller : internal, AssemblyMapperAdaptor
|
|
293 Status : Stable
|
|
294
|
|
295 =cut
|
|
296
|
|
297 sub component_CoordSystem {
|
|
298 my $self = shift;
|
|
299 return $self->{'other_cs'};
|
|
300 }
|
|
301
|
|
302
|
|
303 sub _list {
|
|
304 my($self, $frm_seq_region_name, $frm_start, $frm_end, $frm_cs, $seq_regions) = @_;
|
|
305
|
|
306 my $mapper = $self->{'mapper'};
|
|
307 my $toplevel_cs = $self->{'toplevel_cs'};
|
|
308 my $other_cs = $self->{'other_cs'};
|
|
309 my $adaptor = $self->adaptor;
|
|
310
|
|
311 if($frm_cs->is_top_level()) {
|
|
312 throw("The toplevel CoordSystem can only be mapped TO, not FROM.");
|
|
313 }
|
|
314 if($frm_cs != $other_cs && !$frm_cs->equals($other_cs)) {
|
|
315 throw("Coordinate system " . $frm_cs->name . " " . $frm_cs->version .
|
|
316 " is neither the assembled nor the component coordinate system " .
|
|
317 " of this AssemblyMapper");
|
|
318 }
|
|
319
|
|
320 my $coord_systems = $self->{'coord_systems'};
|
|
321 my $csa = $self->adaptor()->db()->get_CoordSystemAdaptor();
|
|
322
|
|
323 #
|
|
324 # TBD try to make this more efficient
|
|
325 #
|
|
326 my $from_rank = $other_cs->rank();
|
|
327 foreach my $cs (@$coord_systems) {
|
|
328 last if($cs->rank >= $from_rank);
|
|
329
|
|
330 #check if a mapping path even exists to this coordinate system
|
|
331 my @mapping_path = @{ $csa->get_mapping_path( $cs, $other_cs ) };
|
|
332
|
|
333 if(@mapping_path) {
|
|
334
|
|
335 # Try to map to this coord system. If we get back any coordinates then
|
|
336 # it is our 'toplevel' that we were looking for
|
|
337 my $mapper = $adaptor->fetch_by_CoordSystems($other_cs, $cs);
|
|
338
|
|
339 my @result;
|
|
340
|
|
341 my @tmp;
|
|
342 push @tmp, $frm_seq_region_name;
|
|
343 my $seq_region_id = @{$self->adaptor()->seq_regions_to_ids($frm_cs, \@tmp)}[0];
|
|
344
|
|
345 if($seq_regions) {
|
|
346 @result = $mapper->list_seq_regions($frm_seq_region_name, $frm_start,
|
|
347 $frm_end, $frm_cs);
|
|
348 } else {
|
|
349 @result = $mapper->list_ids($frm_seq_region_name, $frm_start,
|
|
350 $frm_end, $frm_cs);
|
|
351 }
|
|
352
|
|
353 return @result if(@result);
|
|
354 }
|
|
355 }
|
|
356
|
|
357 # the toplevel coordinate system for the region requested *is* the
|
|
358 return ($frm_seq_region_name);
|
|
359
|
|
360
|
|
361 # requested region.
|
|
362 if($seq_regions) {
|
|
363 return ($frm_seq_region_name);
|
|
364 }
|
|
365
|
|
366 #this seems a bit silly and inefficient, but it is probably never
|
|
367 #called anyway.
|
|
368 my $slice_adaptor = $adaptor->db()->get_SliceAdaptor();
|
|
369 my $slice = $slice_adaptor->fetch_by_region($other_cs->name(),
|
|
370 $frm_seq_region_name,
|
|
371 undef,undef,undef,$other_cs);
|
|
372 return ($slice_adaptor->get_seq_region_id($slice));
|
|
373 }
|
|
374
|
|
375
|
|
376
|
|
377 =head2 list_seq_regions
|
|
378
|
|
379 Arg [1] : string $frm_seq_region
|
|
380 The name of the sequence region of interest
|
|
381 Arg [2] : int $frm_start
|
|
382 The start of the region of interest
|
|
383 Arg [3] : int $frm_end
|
|
384 The end of the region to transform of interest
|
|
385 Arg [5] : Bio::EnsEMBL::CoordSystem $frm_cs
|
|
386 The coordinate system to obtain overlapping ids of
|
|
387 Example : foreach $id ($asm_mapper->list_ids('X',1,1000,$ctg_cs)) {...}
|
|
388 Description: Retrieves a list of overlapping seq_region names
|
|
389 of another coordinate system. This is the same as the
|
|
390 list_ids method but uses seq_region names rather internal ids
|
|
391 Returntype : List of strings
|
|
392 Exceptions : none
|
|
393 Caller : general
|
|
394 Status : Stable
|
|
395
|
|
396 =cut
|
|
397
|
|
398 sub list_seq_regions {
|
|
399 throw('Incorrect number of arguments.') if(@_ != 5);
|
|
400 return _list(@_,1);
|
|
401 }
|
|
402
|
|
403
|
|
404 =head2 list_ids
|
|
405
|
|
406 Arg [1] : string $frm_seq_region
|
|
407 The name of the sequence region of interest.
|
|
408 Arg [2] : int $frm_start
|
|
409 The start of the region of interest
|
|
410 Arg [3] : int $frm_end
|
|
411 The end of the region to transform of interest
|
|
412 Arg [5] : Bio::EnsEMBL::CoordSystem $frm_cs
|
|
413 The coordinate system to obtain overlapping ids of
|
|
414 Example : foreach $id ($asm_mapper->list_ids('X',1,1000,$chr_cs)) {...}
|
|
415 Description: Retrieves a list of overlapping seq_region internal identifiers
|
|
416 of another coordinate system. This is the same as the
|
|
417 list_seq_regions method but uses internal identfiers rather
|
|
418 than seq_region strings
|
|
419 Returntype : List of ints
|
|
420 Exceptions : thrown if the from CoordSystem is the toplevel coord system
|
|
421 thrown if the from CoordSystem is not the one used in the mapper
|
|
422 Caller : general
|
|
423 Status : Stable
|
|
424
|
|
425 =cut
|
|
426
|
|
427 sub list_ids {
|
|
428 throw('Incorrect number of arguments.') if(@_ != 5);
|
|
429 return _list(@_,0);
|
|
430 }
|
|
431
|
|
432
|
|
433
|
|
434
|
|
435
|
|
436 1;
|