Mercurial > repos > mahtabm > ensembl
comparison variant_effect_predictor/Bio/EnsEMBL/MappedSlice.pm @ 0:1f6dce3d34e0
Uploaded
author | mahtabm |
---|---|
date | Thu, 11 Apr 2013 02:01:53 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:1f6dce3d34e0 |
---|---|
1 =head1 LICENSE | |
2 | |
3 Copyright (c) 1999-2012 The European Bioinformatics Institute and | |
4 Genome Research Limited. All rights reserved. | |
5 | |
6 This software is distributed under a modified Apache license. | |
7 For license details, please see | |
8 | |
9 http://www.ensembl.org/info/about/code_licence.html | |
10 | |
11 =head1 CONTACT | |
12 | |
13 Please email comments or questions to the public Ensembl | |
14 developers list at <dev@ensembl.org>. | |
15 | |
16 Questions may also be sent to the Ensembl help desk at | |
17 <helpdesk@ensembl.org>. | |
18 | |
19 =cut | |
20 | |
21 =head1 NAME | |
22 | |
23 Bio::EnsEMBL::MappedSlice - an object representing a mapped slice | |
24 | |
25 =head1 SYNOPSIS | |
26 | |
27 # get a reference slice | |
28 my $slice = | |
29 $slice_adaptor->fetch_by_region( 'chromosome', 14, 900000, 950000 ); | |
30 | |
31 # create MappedSliceContainer based on the reference slice | |
32 my $msc = Bio::EnsEMBL::MappedSliceContainer->new( -SLICE => $slice ); | |
33 | |
34 # set the adaptor for fetching AssemblySlices | |
35 my $asa = $slice->adaptor->db->get_AssemblySliceAdaptor; | |
36 $msc->set_AssemblySliceAdaptor($asa); | |
37 | |
38 # add an AssemblySlice to your MappedSliceContainer | |
39 $msc->attach_AssemblySlice('NCBIM36'); | |
40 | |
41 foreach my $mapped_slice ( @{ $msc->get_all_MappedSlices } ) { | |
42 print $mapped_slice->name, "\n"; | |
43 | |
44 foreach my $sf ( @{ $mapped_slice->get_all_SimpleFeatures } ) { | |
45 print " ", &to_string($sf), "\n"; | |
46 } | |
47 } | |
48 | |
49 =head1 DESCRIPTION | |
50 | |
51 NOTE: this code is under development and not fully functional nor tested | |
52 yet. Use only for development. | |
53 | |
54 This object represents a mapped slice, i.e. a slice that's attached | |
55 to a reference slice and a mapper to convert coordinates to/from the | |
56 reference. The attachment is done via a MappedSliceContainer which | |
57 has the reference slice and the "container slice" defining the common | |
58 coordinate system for all MappedSlices. | |
59 | |
60 A MappedSlice is supposed to behave as close to a Bio::EnsEMBL::Slice | |
61 as possible. Most Slice methods are implemented in MappedSlice and will | |
62 return an equivalent value to what Slice does. There are some exceptions | |
63 of unimplemented methods, either because there is no useful equivalent | |
64 for a MappedSlice to do, or they are too complicated. | |
65 | |
66 Not supported Bio::EnsEMBL::Slice methods: | |
67 | |
68 All deprecated methods | |
69 All Bio::PrimarySeqI compliance methods | |
70 expand | |
71 get_generic_features | |
72 get_seq_region_id | |
73 seq_region_Slice | |
74 | |
75 Not currently supported but maybe should/could: | |
76 | |
77 calculate_pi | |
78 calculate_theta | |
79 get_base_count | |
80 get_by_Individual | |
81 get_by_strain | |
82 invert | |
83 | |
84 Internally, a MappedSlice is a collection of Bio::EnsEMBL::Slices and | |
85 associated Bio::EnsEMBL::Mappers which map the slices to the common | |
86 container coordinate system. | |
87 | |
88 MappedSlices are usually created and attached to a MappedSliceContainer | |
89 by an adaptor/factory. | |
90 | |
91 =head1 METHODS | |
92 | |
93 new | |
94 add_Slice_Mapper_pair | |
95 get_all_Slice_Mapper_pairs | |
96 adaptor | |
97 container | |
98 name | |
99 seq_region_name | |
100 start | |
101 end | |
102 strand | |
103 length | |
104 seq_region_length | |
105 centrepoint | |
106 coord_system | |
107 coord_system_name | |
108 is_toplevel | |
109 seq (not implemented yet) | |
110 subseq (not implemented yet) | |
111 get_repeatmasked_seq (not implemented yet) | |
112 sub_MappedSlice (not implemented yet) | |
113 project (not implemented yet) | |
114 | |
115 =head1 RELATED MODULES | |
116 | |
117 Bio::EnsEMBL::MappedSlice | |
118 Bio::EnsEMBL::DBSQL::AssemblySliceAdaptor | |
119 Bio::EnsEMBL::Compara::AlignSlice | |
120 Bio::EnsEMBL::Compara::AlignSlice::Slice | |
121 Bio::EnsEMBL::AlignStrainSlice | |
122 Bio::EnsEMBL::StrainSlice | |
123 | |
124 =cut | |
125 | |
126 package Bio::EnsEMBL::MappedSlice; | |
127 | |
128 use strict; | |
129 use warnings; | |
130 no warnings 'uninitialized'; | |
131 | |
132 use Bio::EnsEMBL::Utils::Argument qw(rearrange); | |
133 use Bio::EnsEMBL::Utils::Exception qw(throw warning); | |
134 use Bio::EnsEMBL::Mapper; | |
135 use Scalar::Util qw(weaken); | |
136 | |
137 use vars qw($AUTOLOAD); | |
138 | |
139 | |
140 =head2 new | |
141 | |
142 Arg [ADAPTOR] : Adaptor $adaptor - an adaptor of the appropriate type | |
143 Arg [CONTAINER] : Bio::EnsEMBL::MappedSliceContainer $container - the | |
144 container this MappedSlice is attached to | |
145 Arg [NAME] : String $name - name | |
146 Example : my $mapped_slice = Bio::EnsEMBL::MappedSlice->new( | |
147 -ADAPTOR => $adaptor, | |
148 -CONTAINER => $container, | |
149 -NAME => $name, | |
150 ); | |
151 Description : Constructor. Usually you won't call this method manually, but | |
152 the MappedSlice will be constructed by an adaptor/factory. | |
153 Return type : Bio::EnsEMBL::MappedSlice | |
154 Exceptions : thrown on wrong or missing arguments | |
155 Caller : general, MappedSlice adaptors | |
156 Status : At Risk | |
157 : under development | |
158 | |
159 =cut | |
160 | |
161 sub new { | |
162 my $caller = shift; | |
163 my $class = ref($caller) || $caller; | |
164 | |
165 my ($adaptor, $container, $name) = | |
166 rearrange([qw(ADAPTOR CONTAINER NAME)], @_); | |
167 | |
168 # arguement check | |
169 unless ($container and ref($container) and | |
170 $container->isa('Bio::EnsEMBL::MappedSliceContainer')) { | |
171 throw("Need a MappedSliceContainer."); | |
172 } | |
173 | |
174 my $self = {}; | |
175 bless ($self, $class); | |
176 | |
177 # | |
178 # initialise object | |
179 # | |
180 | |
181 # need to weaken reference to prevent circular reference | |
182 weaken($self->{'container'} = $container); | |
183 | |
184 $self->adaptor($adaptor) if (defined($adaptor)); | |
185 $self->{'name'} = $name if (defined($name)); | |
186 | |
187 $self->{'slice_mapper_pairs'} = []; | |
188 | |
189 return $self; | |
190 } | |
191 | |
192 | |
193 =head2 add_Slice_Mapper_pair | |
194 | |
195 Arg[1] : Bio::EnsEMBL::Slice $slice - slice to add | |
196 Arg[2] : Bio::EnsEMBL::Mapper $mapper - the mapper for this slice | |
197 Example : $mapped_slice->add_Slice_Mapper_pair($slice, $mapper); | |
198 Description : Adds a native slice and a corresponding mapper to map to/from | |
199 the artificial container coord system. | |
200 Return type : listref of Bio::EnsEMBL::MappedSlice | |
201 Exceptions : thrown on wrong or missing arguments | |
202 Caller : general, MappedSlice adaptors | |
203 Status : At Risk | |
204 : under development | |
205 | |
206 =cut | |
207 | |
208 sub add_Slice_Mapper_pair { | |
209 my $self = shift; | |
210 my $slice = shift; | |
211 my $mapper = shift; | |
212 | |
213 # argument check | |
214 unless ($slice and ref($slice) and ($slice->isa('Bio::EnsEMBL::Slice') or $slice->isa('Bio::EnsEMBL::LRGSlice')) ) { | |
215 throw("You must provide a slice."); | |
216 } | |
217 | |
218 unless ($mapper and ref($mapper) and $mapper->isa('Bio::EnsEMBL::Mapper')) { | |
219 throw("You must provide a mapper."); | |
220 } | |
221 | |
222 push @{ $self->{'slice_mapper_pairs'} }, [ $slice, $mapper ]; | |
223 | |
224 return $self->{'slice_mapper_pairs'}; | |
225 } | |
226 | |
227 | |
228 =head2 get_all_Slice_Mapper_pairs | |
229 | |
230 Example : foreach my $pair (@{ $self->get_all_Slice_Mapper_pairs }) { | |
231 my ($slice, $mapper) = @$pair; | |
232 | |
233 # get container coordinates | |
234 my @coords = $mapper->map_coordinates( | |
235 $slice->seq_region_name, | |
236 $slice->start, | |
237 $slice->end, | |
238 $slice->strand, | |
239 'mapped_slice' | |
240 ); | |
241 | |
242 # .... | |
243 } | |
244 Description : Gets all Slice/Mapper pairs this MappedSlice is composed of. | |
245 Each slice (and features on it) can be mapped onto the | |
246 artificial container coord system using the mapper. | |
247 Return type : listref of listref of a Bio::EnsEMBL::Slice and | |
248 Bio::EnsEMBL::Mapper pair | |
249 Exceptions : none | |
250 Caller : general | |
251 Status : At Risk | |
252 : under development | |
253 | |
254 =cut | |
255 | |
256 sub get_all_Slice_Mapper_pairs { | |
257 my $self = shift; | |
258 return $self->{'slice_mapper_pairs'}; | |
259 } | |
260 | |
261 | |
262 =head2 adaptor | |
263 | |
264 Arg[1] : (optional) Adaptor $adaptor - the adaptor/factory for this | |
265 object | |
266 Example : $mapped_slice->adaptor($assembly_slice_adaptor); | |
267 Description : Getter/setter for the adaptor/factory for this object. | |
268 Return type : Adaptor of appropriate type | |
269 Exceptions : none | |
270 Caller : general | |
271 Status : At Risk | |
272 : under development | |
273 | |
274 =cut | |
275 | |
276 sub adaptor { | |
277 my $self = shift; | |
278 weaken($self->{'adaptor'} = shift) if (@_); | |
279 return $self->{'adaptor'}; | |
280 } | |
281 | |
282 | |
283 =head2 container | |
284 | |
285 Arg[1] : (optional) Bio::EnsEMBL::MappedSliceContainer - the container | |
286 this object is attached to | |
287 Example : my $container = $mapped_slice->container; | |
288 print $container->ref_slice->name, "\n"; | |
289 Description : Getter/setter for the container this object is attached to. The | |
290 container will give you access to the reference slice, a common | |
291 artificial container slice, and a mapper to map to it from the | |
292 container coord system. | |
293 | |
294 The implementation uses a weak reference to attach the container | |
295 since the container holds a list of MappedSlices itself. | |
296 Return type : Bio::EnsEMBL::MappedSliceContainer | |
297 Exceptions : none | |
298 Caller : general | |
299 Status : At Risk | |
300 : under development | |
301 | |
302 =cut | |
303 | |
304 sub container { | |
305 my $self = shift; | |
306 weaken($self->{'container'} = shift) if (@_); | |
307 return $self->{'container'}; | |
308 } | |
309 | |
310 | |
311 =head2 name | |
312 | |
313 Arg[1] : String - the name of this object | |
314 Example : my $name = $mapped_slice->container->ref_slice->name . | |
315 ":mapped_" . $ident_string; | |
316 $mapped_slice->name($name); | |
317 Description : Getter/setter for this object's name | |
318 Return type : String | |
319 Exceptions : none | |
320 Caller : general | |
321 Status : At Risk | |
322 : under development | |
323 | |
324 =cut | |
325 | |
326 sub name { | |
327 my $self = shift; | |
328 $self->{'name'} = shift if (@_); | |
329 return $self->{'name'}; | |
330 } | |
331 | |
332 | |
333 =head2 seq_region_name | |
334 | |
335 Example : my $sr_name = $mapped_slice->seq_region_name; | |
336 Description : Returns the seq_region name of the reference slice. | |
337 Return type : String | |
338 Exceptions : none | |
339 Caller : general | |
340 Status : At Risk | |
341 : under development | |
342 | |
343 =cut | |
344 | |
345 sub seq_region_name { | |
346 my $self = shift; | |
347 return $self->container->ref_slice->seq_region_name; | |
348 } | |
349 | |
350 | |
351 =head2 start | |
352 | |
353 Example : my $start = $mapped_slice->start; | |
354 Description : Returns the start of the container slice. | |
355 Return type : Int | |
356 Exceptions : none | |
357 Caller : general | |
358 Status : At Risk | |
359 : under development | |
360 | |
361 =cut | |
362 | |
363 sub start { | |
364 my $self = shift; | |
365 return $self->container->container_slice->start; | |
366 } | |
367 | |
368 | |
369 =head2 end | |
370 | |
371 Example : my $end = $mapped_slice->end; | |
372 Description : Returns the end of the container slice. | |
373 Return type : Int | |
374 Exceptions : none | |
375 Caller : general | |
376 Status : At Risk | |
377 : under development | |
378 | |
379 =cut | |
380 | |
381 sub end { | |
382 my $self = shift; | |
383 return $self->container->container_slice->end; | |
384 } | |
385 | |
386 | |
387 =head2 strand | |
388 | |
389 Example : my $strand = $mapped_slice->strand; | |
390 Description : Returns the strand of the container slice. | |
391 Return type : Int | |
392 Exceptions : none | |
393 Caller : general | |
394 Status : At Risk | |
395 : under development | |
396 | |
397 =cut | |
398 | |
399 sub strand { | |
400 my $self = shift; | |
401 return $self->container->container_slice->strand; | |
402 } | |
403 | |
404 | |
405 =head2 length | |
406 | |
407 Example : my $length = $mapped_slice->length; | |
408 Description : Returns the length of the container slice | |
409 Return type : Int | |
410 Exceptions : none | |
411 Caller : general | |
412 Status : At Risk | |
413 : under development | |
414 | |
415 =cut | |
416 | |
417 sub length { | |
418 my $self = shift; | |
419 return $self->container->container_slice->length; | |
420 } | |
421 | |
422 | |
423 =head2 seq_region_length | |
424 | |
425 Example : my $sr_length = $mapped_slice->seq_region_length; | |
426 Description : Returns the seq_region length of the reference slice. | |
427 Return type : Int | |
428 Exceptions : none | |
429 Caller : general | |
430 Status : At Risk | |
431 : under development | |
432 | |
433 =cut | |
434 | |
435 sub seq_region_length { | |
436 my $self = shift; | |
437 return $self->container->ref_slice->seq_region_length; | |
438 } | |
439 | |
440 | |
441 =head2 centrepoint | |
442 | |
443 Example : my $centrepoint = $mapped_slice->centrepoint; | |
444 Description : Returns the centrepoint of the container slice. | |
445 Return type : Int | |
446 Exceptions : none | |
447 Caller : general | |
448 Status : At Risk | |
449 : under development | |
450 | |
451 =cut | |
452 | |
453 sub centrepoint { | |
454 my $self = shift; | |
455 return $self->container->container_slice->centrepoint; | |
456 } | |
457 | |
458 | |
459 =head2 coord_system | |
460 | |
461 Example : my $cs = $mapped_slice->coord_system; | |
462 Description : Returns the coord system of the container slice. | |
463 Return type : Bio::EnsEMBL::CoordSystem | |
464 Exceptions : none | |
465 Caller : general | |
466 Status : At Risk | |
467 : under development | |
468 | |
469 =cut | |
470 | |
471 sub coord_system { | |
472 my $self = shift; | |
473 return $self->container->container_slice->coord_system; | |
474 } | |
475 | |
476 =head2 coord_system_name | |
477 | |
478 Example : my $cs_name = $mapped_slice->coord_system_name; | |
479 Description : Returns the coord system name of the container slice. | |
480 Return type : Int | |
481 Exceptions : none | |
482 Caller : general | |
483 Status : At Risk | |
484 : under development | |
485 | |
486 =cut | |
487 | |
488 sub coord_system_name { | |
489 my $self = shift; | |
490 return $self->container->container_slice->coord_system_name; | |
491 } | |
492 | |
493 =head2 is_toplevel | |
494 | |
495 Example : my $toplevel_flag = $mapped_slice->is_toplevel; | |
496 Description : Returns weather the container slice is toplevel. | |
497 Return type : Int | |
498 Exceptions : none | |
499 Caller : general | |
500 Status : At Risk | |
501 : under development | |
502 | |
503 =cut | |
504 | |
505 sub is_toplevel { | |
506 my $self = shift; | |
507 return $self->container->container_slice->is_toplevel; | |
508 } | |
509 | |
510 | |
511 =head2 seq | |
512 | |
513 Example : my $seq = $mapped_slice->seq() | |
514 Description : Retrieves the expanded sequence of this mapped slice, | |
515 including "-" characters where there are inserts in any other | |
516 mapped slices. This will align with the sequence returned by | |
517 the container's seq() method. | |
518 Return type : String | |
519 Exceptions : none | |
520 Caller : general | |
521 Status : At Risk | |
522 : under development | |
523 | |
524 =cut | |
525 | |
526 sub seq { | |
527 my $self = shift; | |
528 | |
529 # create an empty string | |
530 my $ms_seq = ''; | |
531 | |
532 # this coord represents the current position in the MS sequence | |
533 my $start = 0; | |
534 | |
535 # get slice/mapper pairs from mapped slice (usually only one anyway) | |
536 foreach my $pair(@{$self->get_all_Slice_Mapper_pairs()}) { | |
537 my ($s, $m) = @$pair; | |
538 | |
539 # make sure to send extra args | |
540 # eg strain slices might need read coverage filtering | |
541 my $seq = $s->seq(@_); | |
542 | |
543 # project from mapped slice to reference slice using the mapper | |
544 foreach my $ref_coord($m->map_coordinates('mapped_slice', 1, CORE::length($seq), $s->strand, 'mapped_slice')) { | |
545 | |
546 # normal coord | |
547 if(!$ref_coord->isa('Bio::EnsEMBL::Mapper::IndelCoordinate') && !$ref_coord->isa('Bio::EnsEMBL::Mapper::Gap')) { | |
548 | |
549 # project from reference slice to container slice using the container's mapper | |
550 foreach my $ms_coord($self->container->mapper->map_coordinates($self->container->ref_slice->seq_region_name, $ref_coord->start, $ref_coord->end, $ref_coord->strand, 'ref_slice')) { | |
551 | |
552 # normal coord | |
553 if(!$ms_coord->isa('Bio::EnsEMBL::Mapper::IndelCoordinate') && !$ms_coord->isa('Bio::EnsEMBL::Mapper::Gap')) { | |
554 $ms_seq .= substr($seq, $start, $ms_coord->length); | |
555 | |
556 $start += $ms_coord->length(); | |
557 } | |
558 | |
559 # indel coord | |
560 else { | |
561 $ms_seq .= '-' x $ms_coord->length(); | |
562 } | |
563 } | |
564 } | |
565 | |
566 # indel / gap | |
567 else { | |
568 | |
569 # if there's a gap here aswell, add corresponding sequence | |
570 if($ref_coord->gap_length > 0) { | |
571 $ms_seq .= substr($seq, $start, $ref_coord->gap_length); | |
572 $start += $ref_coord->gap_length; | |
573 } | |
574 | |
575 # add "-" to the sequence | |
576 $ms_seq .= '-' x ($ref_coord->length() - $ref_coord->gap_length()); | |
577 } | |
578 } | |
579 } | |
580 | |
581 return $ms_seq; | |
582 } | |
583 | |
584 sub subseq { | |
585 } | |
586 | |
587 sub get_repeatmasked_seq { | |
588 } | |
589 | |
590 sub sub_MappedSlice { | |
591 } | |
592 | |
593 sub project { | |
594 } | |
595 | |
596 | |
597 =head2 AUTOLOAD | |
598 | |
599 Arg[1..N] : Arguments passed on to the calls on the underlying slices. | |
600 Example : my @simple_features = @{ $mapped_slice->get_all_SimpleFeatures }; | |
601 Description : Aggregate data gathered from composing Slices. | |
602 This will call Slice->get_all_* and combine the results. | |
603 Coordinates will be transformed to be on the container slice | |
604 coordinate system. | |
605 | |
606 Calls involving DAS features are skipped since the DAS adaptor | |
607 handles coordinate conversions natively. | |
608 Return type : listref of features (same type as corresponding Slice method) | |
609 Exceptions : none | |
610 Caller : general | |
611 Status : At Risk | |
612 : under development | |
613 | |
614 =cut | |
615 | |
616 sub AUTOLOAD { | |
617 my $self = shift; | |
618 | |
619 my $method = $AUTOLOAD; | |
620 $method =~ s/.*:://; | |
621 | |
622 # AUTOLOAD should only deal with get_all_* methods | |
623 return unless ($method =~ /^get_all_/); | |
624 | |
625 # skip DAS methods | |
626 return if ($method =~ /DAS/); | |
627 | |
628 my @mapped_features = (); | |
629 | |
630 foreach my $pair (@{ $self->get_all_Slice_Mapper_pairs }) { | |
631 my ($slice, $mapper) = @$pair; | |
632 #warn $slice->name; | |
633 | |
634 # call $method on each native slice composing the MappedSlice | |
635 my @features = @{ $slice->$method(@_) }; | |
636 | |
637 # map features onto the artificial container coordinate system | |
638 foreach my $f (@features) { | |
639 | |
640 my @coords = $mapper->map_coordinates( | |
641 $f->slice->seq_region_name, | |
642 $f->start, | |
643 $f->end, | |
644 $f->strand, | |
645 'mapped_slice' | |
646 ); | |
647 | |
648 # sanity check | |
649 if (scalar(@coords) > 1) { | |
650 warning("Got more than one Coordinate returned, expected only one!\n"); | |
651 } | |
652 | |
653 $f->start($coords[0]->start); | |
654 $f->end($coords[0]->end); | |
655 $f->strand($coords[0]->strand); | |
656 $f->slice($self->container->container_slice); | |
657 | |
658 push @mapped_features, $f; | |
659 } | |
660 | |
661 } | |
662 | |
663 return \@mapped_features; | |
664 } | |
665 | |
666 | |
667 1; | |
668 |