comparison variant_effect_predictor/Bio/EnsEMBL/MappedSlice.pm @ 0:1f6dce3d34e0

Uploaded
author mahtabm
date Thu, 11 Apr 2013 02:01:53 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:1f6dce3d34e0
1 =head1 LICENSE
2
3 Copyright (c) 1999-2012 The European Bioinformatics Institute and
4 Genome Research Limited. All rights reserved.
5
6 This software is distributed under a modified Apache license.
7 For license details, please see
8
9 http://www.ensembl.org/info/about/code_licence.html
10
11 =head1 CONTACT
12
13 Please email comments or questions to the public Ensembl
14 developers list at <dev@ensembl.org>.
15
16 Questions may also be sent to the Ensembl help desk at
17 <helpdesk@ensembl.org>.
18
19 =cut
20
21 =head1 NAME
22
23 Bio::EnsEMBL::MappedSlice - an object representing a mapped slice
24
25 =head1 SYNOPSIS
26
27 # get a reference slice
28 my $slice =
29 $slice_adaptor->fetch_by_region( 'chromosome', 14, 900000, 950000 );
30
31 # create MappedSliceContainer based on the reference slice
32 my $msc = Bio::EnsEMBL::MappedSliceContainer->new( -SLICE => $slice );
33
34 # set the adaptor for fetching AssemblySlices
35 my $asa = $slice->adaptor->db->get_AssemblySliceAdaptor;
36 $msc->set_AssemblySliceAdaptor($asa);
37
38 # add an AssemblySlice to your MappedSliceContainer
39 $msc->attach_AssemblySlice('NCBIM36');
40
41 foreach my $mapped_slice ( @{ $msc->get_all_MappedSlices } ) {
42 print $mapped_slice->name, "\n";
43
44 foreach my $sf ( @{ $mapped_slice->get_all_SimpleFeatures } ) {
45 print " ", &to_string($sf), "\n";
46 }
47 }
48
49 =head1 DESCRIPTION
50
51 NOTE: this code is under development and not fully functional nor tested
52 yet. Use only for development.
53
54 This object represents a mapped slice, i.e. a slice that's attached
55 to a reference slice and a mapper to convert coordinates to/from the
56 reference. The attachment is done via a MappedSliceContainer which
57 has the reference slice and the "container slice" defining the common
58 coordinate system for all MappedSlices.
59
60 A MappedSlice is supposed to behave as close to a Bio::EnsEMBL::Slice
61 as possible. Most Slice methods are implemented in MappedSlice and will
62 return an equivalent value to what Slice does. There are some exceptions
63 of unimplemented methods, either because there is no useful equivalent
64 for a MappedSlice to do, or they are too complicated.
65
66 Not supported Bio::EnsEMBL::Slice methods:
67
68 All deprecated methods
69 All Bio::PrimarySeqI compliance methods
70 expand
71 get_generic_features
72 get_seq_region_id
73 seq_region_Slice
74
75 Not currently supported but maybe should/could:
76
77 calculate_pi
78 calculate_theta
79 get_base_count
80 get_by_Individual
81 get_by_strain
82 invert
83
84 Internally, a MappedSlice is a collection of Bio::EnsEMBL::Slices and
85 associated Bio::EnsEMBL::Mappers which map the slices to the common
86 container coordinate system.
87
88 MappedSlices are usually created and attached to a MappedSliceContainer
89 by an adaptor/factory.
90
91 =head1 METHODS
92
93 new
94 add_Slice_Mapper_pair
95 get_all_Slice_Mapper_pairs
96 adaptor
97 container
98 name
99 seq_region_name
100 start
101 end
102 strand
103 length
104 seq_region_length
105 centrepoint
106 coord_system
107 coord_system_name
108 is_toplevel
109 seq (not implemented yet)
110 subseq (not implemented yet)
111 get_repeatmasked_seq (not implemented yet)
112 sub_MappedSlice (not implemented yet)
113 project (not implemented yet)
114
115 =head1 RELATED MODULES
116
117 Bio::EnsEMBL::MappedSlice
118 Bio::EnsEMBL::DBSQL::AssemblySliceAdaptor
119 Bio::EnsEMBL::Compara::AlignSlice
120 Bio::EnsEMBL::Compara::AlignSlice::Slice
121 Bio::EnsEMBL::AlignStrainSlice
122 Bio::EnsEMBL::StrainSlice
123
124 =cut
125
126 package Bio::EnsEMBL::MappedSlice;
127
128 use strict;
129 use warnings;
130 no warnings 'uninitialized';
131
132 use Bio::EnsEMBL::Utils::Argument qw(rearrange);
133 use Bio::EnsEMBL::Utils::Exception qw(throw warning);
134 use Bio::EnsEMBL::Mapper;
135 use Scalar::Util qw(weaken);
136
137 use vars qw($AUTOLOAD);
138
139
140 =head2 new
141
142 Arg [ADAPTOR] : Adaptor $adaptor - an adaptor of the appropriate type
143 Arg [CONTAINER] : Bio::EnsEMBL::MappedSliceContainer $container - the
144 container this MappedSlice is attached to
145 Arg [NAME] : String $name - name
146 Example : my $mapped_slice = Bio::EnsEMBL::MappedSlice->new(
147 -ADAPTOR => $adaptor,
148 -CONTAINER => $container,
149 -NAME => $name,
150 );
151 Description : Constructor. Usually you won't call this method manually, but
152 the MappedSlice will be constructed by an adaptor/factory.
153 Return type : Bio::EnsEMBL::MappedSlice
154 Exceptions : thrown on wrong or missing arguments
155 Caller : general, MappedSlice adaptors
156 Status : At Risk
157 : under development
158
159 =cut
160
161 sub new {
162 my $caller = shift;
163 my $class = ref($caller) || $caller;
164
165 my ($adaptor, $container, $name) =
166 rearrange([qw(ADAPTOR CONTAINER NAME)], @_);
167
168 # arguement check
169 unless ($container and ref($container) and
170 $container->isa('Bio::EnsEMBL::MappedSliceContainer')) {
171 throw("Need a MappedSliceContainer.");
172 }
173
174 my $self = {};
175 bless ($self, $class);
176
177 #
178 # initialise object
179 #
180
181 # need to weaken reference to prevent circular reference
182 weaken($self->{'container'} = $container);
183
184 $self->adaptor($adaptor) if (defined($adaptor));
185 $self->{'name'} = $name if (defined($name));
186
187 $self->{'slice_mapper_pairs'} = [];
188
189 return $self;
190 }
191
192
193 =head2 add_Slice_Mapper_pair
194
195 Arg[1] : Bio::EnsEMBL::Slice $slice - slice to add
196 Arg[2] : Bio::EnsEMBL::Mapper $mapper - the mapper for this slice
197 Example : $mapped_slice->add_Slice_Mapper_pair($slice, $mapper);
198 Description : Adds a native slice and a corresponding mapper to map to/from
199 the artificial container coord system.
200 Return type : listref of Bio::EnsEMBL::MappedSlice
201 Exceptions : thrown on wrong or missing arguments
202 Caller : general, MappedSlice adaptors
203 Status : At Risk
204 : under development
205
206 =cut
207
208 sub add_Slice_Mapper_pair {
209 my $self = shift;
210 my $slice = shift;
211 my $mapper = shift;
212
213 # argument check
214 unless ($slice and ref($slice) and ($slice->isa('Bio::EnsEMBL::Slice') or $slice->isa('Bio::EnsEMBL::LRGSlice')) ) {
215 throw("You must provide a slice.");
216 }
217
218 unless ($mapper and ref($mapper) and $mapper->isa('Bio::EnsEMBL::Mapper')) {
219 throw("You must provide a mapper.");
220 }
221
222 push @{ $self->{'slice_mapper_pairs'} }, [ $slice, $mapper ];
223
224 return $self->{'slice_mapper_pairs'};
225 }
226
227
228 =head2 get_all_Slice_Mapper_pairs
229
230 Example : foreach my $pair (@{ $self->get_all_Slice_Mapper_pairs }) {
231 my ($slice, $mapper) = @$pair;
232
233 # get container coordinates
234 my @coords = $mapper->map_coordinates(
235 $slice->seq_region_name,
236 $slice->start,
237 $slice->end,
238 $slice->strand,
239 'mapped_slice'
240 );
241
242 # ....
243 }
244 Description : Gets all Slice/Mapper pairs this MappedSlice is composed of.
245 Each slice (and features on it) can be mapped onto the
246 artificial container coord system using the mapper.
247 Return type : listref of listref of a Bio::EnsEMBL::Slice and
248 Bio::EnsEMBL::Mapper pair
249 Exceptions : none
250 Caller : general
251 Status : At Risk
252 : under development
253
254 =cut
255
256 sub get_all_Slice_Mapper_pairs {
257 my $self = shift;
258 return $self->{'slice_mapper_pairs'};
259 }
260
261
262 =head2 adaptor
263
264 Arg[1] : (optional) Adaptor $adaptor - the adaptor/factory for this
265 object
266 Example : $mapped_slice->adaptor($assembly_slice_adaptor);
267 Description : Getter/setter for the adaptor/factory for this object.
268 Return type : Adaptor of appropriate type
269 Exceptions : none
270 Caller : general
271 Status : At Risk
272 : under development
273
274 =cut
275
276 sub adaptor {
277 my $self = shift;
278 weaken($self->{'adaptor'} = shift) if (@_);
279 return $self->{'adaptor'};
280 }
281
282
283 =head2 container
284
285 Arg[1] : (optional) Bio::EnsEMBL::MappedSliceContainer - the container
286 this object is attached to
287 Example : my $container = $mapped_slice->container;
288 print $container->ref_slice->name, "\n";
289 Description : Getter/setter for the container this object is attached to. The
290 container will give you access to the reference slice, a common
291 artificial container slice, and a mapper to map to it from the
292 container coord system.
293
294 The implementation uses a weak reference to attach the container
295 since the container holds a list of MappedSlices itself.
296 Return type : Bio::EnsEMBL::MappedSliceContainer
297 Exceptions : none
298 Caller : general
299 Status : At Risk
300 : under development
301
302 =cut
303
304 sub container {
305 my $self = shift;
306 weaken($self->{'container'} = shift) if (@_);
307 return $self->{'container'};
308 }
309
310
311 =head2 name
312
313 Arg[1] : String - the name of this object
314 Example : my $name = $mapped_slice->container->ref_slice->name .
315 ":mapped_" . $ident_string;
316 $mapped_slice->name($name);
317 Description : Getter/setter for this object's name
318 Return type : String
319 Exceptions : none
320 Caller : general
321 Status : At Risk
322 : under development
323
324 =cut
325
326 sub name {
327 my $self = shift;
328 $self->{'name'} = shift if (@_);
329 return $self->{'name'};
330 }
331
332
333 =head2 seq_region_name
334
335 Example : my $sr_name = $mapped_slice->seq_region_name;
336 Description : Returns the seq_region name of the reference slice.
337 Return type : String
338 Exceptions : none
339 Caller : general
340 Status : At Risk
341 : under development
342
343 =cut
344
345 sub seq_region_name {
346 my $self = shift;
347 return $self->container->ref_slice->seq_region_name;
348 }
349
350
351 =head2 start
352
353 Example : my $start = $mapped_slice->start;
354 Description : Returns the start of the container slice.
355 Return type : Int
356 Exceptions : none
357 Caller : general
358 Status : At Risk
359 : under development
360
361 =cut
362
363 sub start {
364 my $self = shift;
365 return $self->container->container_slice->start;
366 }
367
368
369 =head2 end
370
371 Example : my $end = $mapped_slice->end;
372 Description : Returns the end of the container slice.
373 Return type : Int
374 Exceptions : none
375 Caller : general
376 Status : At Risk
377 : under development
378
379 =cut
380
381 sub end {
382 my $self = shift;
383 return $self->container->container_slice->end;
384 }
385
386
387 =head2 strand
388
389 Example : my $strand = $mapped_slice->strand;
390 Description : Returns the strand of the container slice.
391 Return type : Int
392 Exceptions : none
393 Caller : general
394 Status : At Risk
395 : under development
396
397 =cut
398
399 sub strand {
400 my $self = shift;
401 return $self->container->container_slice->strand;
402 }
403
404
405 =head2 length
406
407 Example : my $length = $mapped_slice->length;
408 Description : Returns the length of the container slice
409 Return type : Int
410 Exceptions : none
411 Caller : general
412 Status : At Risk
413 : under development
414
415 =cut
416
417 sub length {
418 my $self = shift;
419 return $self->container->container_slice->length;
420 }
421
422
423 =head2 seq_region_length
424
425 Example : my $sr_length = $mapped_slice->seq_region_length;
426 Description : Returns the seq_region length of the reference slice.
427 Return type : Int
428 Exceptions : none
429 Caller : general
430 Status : At Risk
431 : under development
432
433 =cut
434
435 sub seq_region_length {
436 my $self = shift;
437 return $self->container->ref_slice->seq_region_length;
438 }
439
440
441 =head2 centrepoint
442
443 Example : my $centrepoint = $mapped_slice->centrepoint;
444 Description : Returns the centrepoint of the container slice.
445 Return type : Int
446 Exceptions : none
447 Caller : general
448 Status : At Risk
449 : under development
450
451 =cut
452
453 sub centrepoint {
454 my $self = shift;
455 return $self->container->container_slice->centrepoint;
456 }
457
458
459 =head2 coord_system
460
461 Example : my $cs = $mapped_slice->coord_system;
462 Description : Returns the coord system of the container slice.
463 Return type : Bio::EnsEMBL::CoordSystem
464 Exceptions : none
465 Caller : general
466 Status : At Risk
467 : under development
468
469 =cut
470
471 sub coord_system {
472 my $self = shift;
473 return $self->container->container_slice->coord_system;
474 }
475
476 =head2 coord_system_name
477
478 Example : my $cs_name = $mapped_slice->coord_system_name;
479 Description : Returns the coord system name of the container slice.
480 Return type : Int
481 Exceptions : none
482 Caller : general
483 Status : At Risk
484 : under development
485
486 =cut
487
488 sub coord_system_name {
489 my $self = shift;
490 return $self->container->container_slice->coord_system_name;
491 }
492
493 =head2 is_toplevel
494
495 Example : my $toplevel_flag = $mapped_slice->is_toplevel;
496 Description : Returns weather the container slice is toplevel.
497 Return type : Int
498 Exceptions : none
499 Caller : general
500 Status : At Risk
501 : under development
502
503 =cut
504
505 sub is_toplevel {
506 my $self = shift;
507 return $self->container->container_slice->is_toplevel;
508 }
509
510
511 =head2 seq
512
513 Example : my $seq = $mapped_slice->seq()
514 Description : Retrieves the expanded sequence of this mapped slice,
515 including "-" characters where there are inserts in any other
516 mapped slices. This will align with the sequence returned by
517 the container's seq() method.
518 Return type : String
519 Exceptions : none
520 Caller : general
521 Status : At Risk
522 : under development
523
524 =cut
525
526 sub seq {
527 my $self = shift;
528
529 # create an empty string
530 my $ms_seq = '';
531
532 # this coord represents the current position in the MS sequence
533 my $start = 0;
534
535 # get slice/mapper pairs from mapped slice (usually only one anyway)
536 foreach my $pair(@{$self->get_all_Slice_Mapper_pairs()}) {
537 my ($s, $m) = @$pair;
538
539 # make sure to send extra args
540 # eg strain slices might need read coverage filtering
541 my $seq = $s->seq(@_);
542
543 # project from mapped slice to reference slice using the mapper
544 foreach my $ref_coord($m->map_coordinates('mapped_slice', 1, CORE::length($seq), $s->strand, 'mapped_slice')) {
545
546 # normal coord
547 if(!$ref_coord->isa('Bio::EnsEMBL::Mapper::IndelCoordinate') && !$ref_coord->isa('Bio::EnsEMBL::Mapper::Gap')) {
548
549 # project from reference slice to container slice using the container's mapper
550 foreach my $ms_coord($self->container->mapper->map_coordinates($self->container->ref_slice->seq_region_name, $ref_coord->start, $ref_coord->end, $ref_coord->strand, 'ref_slice')) {
551
552 # normal coord
553 if(!$ms_coord->isa('Bio::EnsEMBL::Mapper::IndelCoordinate') && !$ms_coord->isa('Bio::EnsEMBL::Mapper::Gap')) {
554 $ms_seq .= substr($seq, $start, $ms_coord->length);
555
556 $start += $ms_coord->length();
557 }
558
559 # indel coord
560 else {
561 $ms_seq .= '-' x $ms_coord->length();
562 }
563 }
564 }
565
566 # indel / gap
567 else {
568
569 # if there's a gap here aswell, add corresponding sequence
570 if($ref_coord->gap_length > 0) {
571 $ms_seq .= substr($seq, $start, $ref_coord->gap_length);
572 $start += $ref_coord->gap_length;
573 }
574
575 # add "-" to the sequence
576 $ms_seq .= '-' x ($ref_coord->length() - $ref_coord->gap_length());
577 }
578 }
579 }
580
581 return $ms_seq;
582 }
583
584 sub subseq {
585 }
586
587 sub get_repeatmasked_seq {
588 }
589
590 sub sub_MappedSlice {
591 }
592
593 sub project {
594 }
595
596
597 =head2 AUTOLOAD
598
599 Arg[1..N] : Arguments passed on to the calls on the underlying slices.
600 Example : my @simple_features = @{ $mapped_slice->get_all_SimpleFeatures };
601 Description : Aggregate data gathered from composing Slices.
602 This will call Slice->get_all_* and combine the results.
603 Coordinates will be transformed to be on the container slice
604 coordinate system.
605
606 Calls involving DAS features are skipped since the DAS adaptor
607 handles coordinate conversions natively.
608 Return type : listref of features (same type as corresponding Slice method)
609 Exceptions : none
610 Caller : general
611 Status : At Risk
612 : under development
613
614 =cut
615
616 sub AUTOLOAD {
617 my $self = shift;
618
619 my $method = $AUTOLOAD;
620 $method =~ s/.*:://;
621
622 # AUTOLOAD should only deal with get_all_* methods
623 return unless ($method =~ /^get_all_/);
624
625 # skip DAS methods
626 return if ($method =~ /DAS/);
627
628 my @mapped_features = ();
629
630 foreach my $pair (@{ $self->get_all_Slice_Mapper_pairs }) {
631 my ($slice, $mapper) = @$pair;
632 #warn $slice->name;
633
634 # call $method on each native slice composing the MappedSlice
635 my @features = @{ $slice->$method(@_) };
636
637 # map features onto the artificial container coordinate system
638 foreach my $f (@features) {
639
640 my @coords = $mapper->map_coordinates(
641 $f->slice->seq_region_name,
642 $f->start,
643 $f->end,
644 $f->strand,
645 'mapped_slice'
646 );
647
648 # sanity check
649 if (scalar(@coords) > 1) {
650 warning("Got more than one Coordinate returned, expected only one!\n");
651 }
652
653 $f->start($coords[0]->start);
654 $f->end($coords[0]->end);
655 $f->strand($coords[0]->strand);
656 $f->slice($self->container->container_slice);
657
658 push @mapped_features, $f;
659 }
660
661 }
662
663 return \@mapped_features;
664 }
665
666
667 1;
668