comparison variant_effect_predictor/Bio/EnsEMBL/TopLevelAssemblyMapper.pm @ 0:2bc9b66ada89 draft default tip

Uploaded
author mahtabm
date Thu, 11 Apr 2013 06:29:17 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:2bc9b66ada89
1 =head1 LICENSE
2
3 Copyright (c) 1999-2012 The European Bioinformatics Institute and
4 Genome Research Limited. All rights reserved.
5
6 This software is distributed under a modified Apache license.
7 For license details, please see
8
9 http://www.ensembl.org/info/about/code_licence.html
10
11 =head1 CONTACT
12
13 Please email comments or questions to the public Ensembl
14 developers list at <dev@ensembl.org>.
15
16 Questions may also be sent to the Ensembl help desk at
17 <helpdesk@ensembl.org>.
18
19 =cut
20
21 =head1 NAME
22
23 Bio::EnsEMBL::TopLevelAssemblyMapper -
24 Handles mapping between a given coordinate system and the toplevel
25 pseudo coordinate system.
26
27 =head1 SYNOPSIS
28
29 $db = Bio::EnsEMBL::DBSQL::DBAdaptor->new(...);
30 $asma = $db->get_AssemblyMapperAdaptor();
31 $csa = $db->get_CoordSystemAdaptor();
32
33 my $toplevel = $cs_adaptor->fetch_by_name('toplevel');
34 my $ctg_cs = $cs_adaptor->fetch_by_name('contig');
35
36 $asm_mapper = $map_adaptor->fetch_by_CoordSystems( $cs1, $cs2 );
37
38 # map to toplevel coord system for this region
39 @chr_coords =
40 $asm_mapper->map( 'AL30421.1.200.92341', 100, 10000, -1, $ctg_cs );
41
42 # list toplevel seq_region_ids for this region
43 @chr_ids =
44 $asm_mapper->list_ids( 'AL30421.1.200.92341', 1, 1000, -1,
45 $ctg_cs );
46
47 =head1 DESCRIPTION
48
49 The TopLevelAssemblyMapper performs mapping between a provided
50 coordinate system and the toplevel pseudo cooordinate system. The
51 toplevel coordinate system is not a real coordinate system, but
52 represents the highest coordinate system that can be mapped to in a
53 given region. It is only possible to perform unidirectional mapping
54 using this mapper, because it does not make sense to map from the
55 toplevel coordinate system to another coordinate system.
56
57 =head1 METHODS
58
59 =cut
60
61
62 use strict;
63 use warnings;
64
65 package Bio::EnsEMBL::TopLevelAssemblyMapper;
66
67 use Bio::EnsEMBL::Utils::Exception qw(throw);
68 use Bio::EnsEMBL::Mapper;
69 use Bio::EnsEMBL::CoordSystem;
70 use Scalar::Util qw(weaken);
71
72 =head2 new
73
74 Arg [1] : Bio::EnsEMBL::DBAdaptor $dbadaptor the adaptor for
75 the database this mapper is using.
76 Arg [2] : Toplevel CoordSystem
77 Arg [3] : Other CoordSystem
78 Description: Creates a new TopLevelAssemblyMapper object
79 Returntype : Bio::EnsEMBL::DBSQL::TopLevelAssemblyMapper
80 Exceptions : throws if any of the 3 arguments are missing/ not
81 : of the correct type.
82 Caller : Bio::EnsEMBL::DBSQL::AssemblyMapperAdaptor
83 Status : Stable
84
85 =cut
86
87
88 sub new {
89 my ($caller, $adaptor, $toplevel_cs, $other_cs) = @_;
90
91 my $class = ref($caller) || $caller;
92
93 if(!ref($toplevel_cs)) {
94 throw('Toplevel CoordSystem argument expected.');
95 }
96 if(!ref($other_cs)) {
97 throw('Other CoordSystem argument expected.');
98 }
99
100 if(!$toplevel_cs->is_top_level()) {
101 throw($toplevel_cs->name() . " is not the toplevel CoordSystem.");
102 }
103 if($other_cs->is_top_level()) {
104 throw("Other CoordSystem argument should not be toplevel CoordSystem.");
105 }
106
107 my $cs_adaptor = $adaptor->db()->get_CoordSystemAdaptor();
108 my $coord_systems = $cs_adaptor->fetch_all();
109
110 my $self = bless {'coord_systems' => $coord_systems,
111 'toplevel_cs' => $toplevel_cs,
112 'other_cs' => $other_cs}, $class;
113
114 $self->adaptor($adaptor);
115 return $self;
116 }
117
118 sub adaptor {
119 my $self = shift;
120
121 weaken($self->{'adaptor'} = shift) if(@_);
122
123 return $self->{'adaptor'};
124 }
125
126 =head2 map
127
128 Arg [1] : string $frm_seq_region
129 The name of the sequence region to transform FROM
130 Arg [2] : int $frm_start
131 The start of the region to transform FROM
132 Arg [3] : int $frm_end
133 The end of the region to transform FROM
134 Arg [4] : int $strand
135 The strand of the region to transform FROM
136 Arg [5] : Bio::EnsEMBL::CoordSystem
137 The coordinate system to transform FROM
138 Arg [6] : if set will do a fastmap
139 Example : @coords = $mapper->map('X', 1_000_000, 2_000_000,
140 1, $chr_cs);
141 Description: Transforms coordinates from one coordinate system
142 to another.
143 Returntype : List of Bio::EnsEMBL::Mapper::Coordinate and/or
144 Bio::EnsEMBL::Mapper:Gap objects
145 Exceptions : thrown if if the specified TO coordinate system is not one
146 of the coordinate systems associated with this mapper
147 Caller : general
148 Status : Stable
149
150 =cut
151
152
153 sub map {
154 throw('Incorrect number of arguments.') if(@_ != 6 && @_ != 7);
155
156 my($self, $frm_seq_region_name, $frm_start, $frm_end, $frm_strand, $frm_cs,
157 $fastmap) = @_;
158
159 if($frm_cs->is_top_level()) {
160 throw("The toplevel CoordSystem can only be mapped TO, not FROM.");
161 }
162
163 my @tmp;
164 push @tmp, $frm_seq_region_name;
165 my $seq_region_id = @{$self->adaptor()->seq_regions_to_ids($frm_cs, \@tmp)}[0];
166
167 my $mapper = $self->{'mapper'};
168 my $toplevel_cs = $self->{'toplevel_cs'};
169 my $other_cs = $self->{'other_cs'};
170 my $adaptor = $self->adaptor;
171
172 if($frm_cs != $other_cs && !$frm_cs->equals($other_cs)) {
173 throw("Coordinate system " . $frm_cs->name . " " . $frm_cs->version .
174 " is neither the assembled nor the component coordinate system " .
175 " of this AssemblyMapper");
176 }
177
178 my $coord_systems = $self->{'coord_systems'};
179
180 my $csa = $self->adaptor()->db()->get_CoordSystemAdaptor();
181
182 #
183 # TBD try to make this more efficient
184 #
185 my $from_rank = $other_cs->rank();
186 foreach my $cs (@$coord_systems) {
187 last if($cs->rank >= $from_rank);
188
189 #check if a mapping path even exists to this coordinate system
190 my @mapping_path = @{ $csa->get_mapping_path( $cs, $other_cs ) };
191
192 if(@mapping_path) {
193
194 # Try to map to this coord system. If we get back any coordinates then
195 # it is our 'toplevel' that we were looking for
196 my $mapper = $adaptor->fetch_by_CoordSystems($other_cs, $cs);
197
198 if($fastmap) {
199 my @result = $mapper->fastmap($frm_seq_region_name, $frm_start, $frm_end,
200 $frm_strand, $frm_cs);
201 return @result if(@result);
202 } else {
203 my @coords = $mapper->map($frm_seq_region_name, $frm_start, $frm_end,
204 $frm_strand, $frm_cs);
205
206 if(@coords > 1 || !$coords[0]->isa('Bio::EnsEMBL::Mapper::Gap')) {
207 return @coords;
208 }
209 }
210 }
211 }
212
213 # the toplevel coordinate system for the region requested *is* the
214 # requested region.
215 if($fastmap) {
216 return ($seq_region_id,$frm_start, $frm_end, $frm_strand, $other_cs);
217 }
218 return Bio::EnsEMBL::Mapper::Coordinate->new
219 ($seq_region_id,$frm_start,$frm_end, $frm_strand, $other_cs);
220 }
221
222 #
223 # for polymorphism with AssemblyMapper
224 #
225 =head2 flush
226
227 Args : none
228 Example : none
229 Description: polymorphism with AssemblyMapper, does nothing
230 Returntype : none
231 Exceptions : none
232 Status : Stable
233
234 =cut
235
236 sub flush {}
237
238 =head2 fastmap
239
240 Arg [1] : string $frm_seq_region
241 The name of the sequence region to transform FROM
242 Arg [2] : int $frm_start
243 The start of the region to transform FROM
244 Arg [3] : int $frm_end
245 The end of the region to transform FROM
246 Arg [4] : int $strand
247 The strand of the region to transform FROM
248 Arg [5] : Bio::EnsEMBL::CoordSystem
249 The coordinate system to transform FROM
250 Example : @coords = $mapper->fastmap('X', 1_000_000, 2_000_000,
251 1, $chr_cs);
252 Description: Transforms coordinates from one coordinate system
253 to another.
254 Returntype : List of Bio::EnsEMBL::Mapper::Coordinate and/or
255 Bio::EnsEMBL::Mapper:Gap objects
256 Exceptions : thrown if if the specified TO coordinate system is not one
257 of the coordinate systems associated with this mapper
258 Caller : general
259 Status : Stable
260
261 =cut
262
263 sub fastmap {
264 my $self = shift;
265 return $self->map(@_,1);
266 }
267
268 =head2 assembled_CoordSystem
269
270 Arg [1] : none
271 Example : $cs = $mapper->assembled_CoordSystem
272 Description: Retrieves the assembled CoordSystem from this mapper
273 Returntype : Bio::EnsEMBL::CoordSystem
274 Exceptions : none
275 Caller : internal, AssemblyMapperAdaptor
276 Status : Stable
277
278 =cut
279
280 sub assembled_CoordSystem {
281 my $self = shift;
282 return $self->{'toplevel_cs'};
283 }
284
285 =head2 component_CoordSystem
286
287 Arg [1] : none
288 Example : $cs = $mapper->component_CoordSystem
289 Description: Retrieves the component CoordSystem from this mapper
290 Returntype : Bio::EnsEMBL::CoordSystem
291 Exceptions : none
292 Caller : internal, AssemblyMapperAdaptor
293 Status : Stable
294
295 =cut
296
297 sub component_CoordSystem {
298 my $self = shift;
299 return $self->{'other_cs'};
300 }
301
302
303 sub _list {
304 my($self, $frm_seq_region_name, $frm_start, $frm_end, $frm_cs, $seq_regions) = @_;
305
306 my $mapper = $self->{'mapper'};
307 my $toplevel_cs = $self->{'toplevel_cs'};
308 my $other_cs = $self->{'other_cs'};
309 my $adaptor = $self->adaptor;
310
311 if($frm_cs->is_top_level()) {
312 throw("The toplevel CoordSystem can only be mapped TO, not FROM.");
313 }
314 if($frm_cs != $other_cs && !$frm_cs->equals($other_cs)) {
315 throw("Coordinate system " . $frm_cs->name . " " . $frm_cs->version .
316 " is neither the assembled nor the component coordinate system " .
317 " of this AssemblyMapper");
318 }
319
320 my $coord_systems = $self->{'coord_systems'};
321 my $csa = $self->adaptor()->db()->get_CoordSystemAdaptor();
322
323 #
324 # TBD try to make this more efficient
325 #
326 my $from_rank = $other_cs->rank();
327 foreach my $cs (@$coord_systems) {
328 last if($cs->rank >= $from_rank);
329
330 #check if a mapping path even exists to this coordinate system
331 my @mapping_path = @{ $csa->get_mapping_path( $cs, $other_cs ) };
332
333 if(@mapping_path) {
334
335 # Try to map to this coord system. If we get back any coordinates then
336 # it is our 'toplevel' that we were looking for
337 my $mapper = $adaptor->fetch_by_CoordSystems($other_cs, $cs);
338
339 my @result;
340
341 my @tmp;
342 push @tmp, $frm_seq_region_name;
343 my $seq_region_id = @{$self->adaptor()->seq_regions_to_ids($frm_cs, \@tmp)}[0];
344
345 if($seq_regions) {
346 @result = $mapper->list_seq_regions($frm_seq_region_name, $frm_start,
347 $frm_end, $frm_cs);
348 } else {
349 @result = $mapper->list_ids($frm_seq_region_name, $frm_start,
350 $frm_end, $frm_cs);
351 }
352
353 return @result if(@result);
354 }
355 }
356
357 # the toplevel coordinate system for the region requested *is* the
358 return ($frm_seq_region_name);
359
360
361 # requested region.
362 if($seq_regions) {
363 return ($frm_seq_region_name);
364 }
365
366 #this seems a bit silly and inefficient, but it is probably never
367 #called anyway.
368 my $slice_adaptor = $adaptor->db()->get_SliceAdaptor();
369 my $slice = $slice_adaptor->fetch_by_region($other_cs->name(),
370 $frm_seq_region_name,
371 undef,undef,undef,$other_cs);
372 return ($slice_adaptor->get_seq_region_id($slice));
373 }
374
375
376
377 =head2 list_seq_regions
378
379 Arg [1] : string $frm_seq_region
380 The name of the sequence region of interest
381 Arg [2] : int $frm_start
382 The start of the region of interest
383 Arg [3] : int $frm_end
384 The end of the region to transform of interest
385 Arg [5] : Bio::EnsEMBL::CoordSystem $frm_cs
386 The coordinate system to obtain overlapping ids of
387 Example : foreach $id ($asm_mapper->list_ids('X',1,1000,$ctg_cs)) {...}
388 Description: Retrieves a list of overlapping seq_region names
389 of another coordinate system. This is the same as the
390 list_ids method but uses seq_region names rather internal ids
391 Returntype : List of strings
392 Exceptions : none
393 Caller : general
394 Status : Stable
395
396 =cut
397
398 sub list_seq_regions {
399 throw('Incorrect number of arguments.') if(@_ != 5);
400 return _list(@_,1);
401 }
402
403
404 =head2 list_ids
405
406 Arg [1] : string $frm_seq_region
407 The name of the sequence region of interest.
408 Arg [2] : int $frm_start
409 The start of the region of interest
410 Arg [3] : int $frm_end
411 The end of the region to transform of interest
412 Arg [5] : Bio::EnsEMBL::CoordSystem $frm_cs
413 The coordinate system to obtain overlapping ids of
414 Example : foreach $id ($asm_mapper->list_ids('X',1,1000,$chr_cs)) {...}
415 Description: Retrieves a list of overlapping seq_region internal identifiers
416 of another coordinate system. This is the same as the
417 list_seq_regions method but uses internal identfiers rather
418 than seq_region strings
419 Returntype : List of ints
420 Exceptions : thrown if the from CoordSystem is the toplevel coord system
421 thrown if the from CoordSystem is not the one used in the mapper
422 Caller : general
423 Status : Stable
424
425 =cut
426
427 sub list_ids {
428 throw('Incorrect number of arguments.') if(@_ != 5);
429 return _list(@_,0);
430 }
431
432
433
434
435
436 1;