comparison variant_effect_predictor/Bio/EnsEMBL/MappedSliceContainer.pm @ 0:2bc9b66ada89 draft default tip

Uploaded
author mahtabm
date Thu, 11 Apr 2013 06:29:17 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:2bc9b66ada89
1 =head1 LICENSE
2
3 Copyright (c) 1999-2012 The European Bioinformatics Institute and
4 Genome Research Limited. All rights reserved.
5
6 This software is distributed under a modified Apache license.
7 For license details, please see
8
9 http://www.ensembl.org/info/about/code_licence.html
10
11 =head1 CONTACT
12
13 Please email comments or questions to the public Ensembl
14 developers list at <dev@ensembl.org>.
15
16 Questions may also be sent to the Ensembl help desk at
17 <helpdesk@ensembl.org>.
18
19 =cut
20
21 =head1 NAME
22
23 Bio::EnsEMBL::MappedSliceContainer - container for mapped slices
24
25 =head1 SYNOPSIS
26
27 # get a reference slice
28 my $slice =
29 $slice_adaptor->fetch_by_region( 'chromosome', 14, 900000, 950000 );
30
31 # create MappedSliceContainer based on the reference slice
32 my $msc = Bio::EnsEMBL::MappedSliceContainer->new( -SLICE => $slice );
33
34 # set the adaptor for fetching AssemblySlices
35 my $asa = $slice->adaptor->db->get_AssemblySliceAdaptor;
36 $msc->set_AssemblySliceAdaptor($asa);
37
38 # add an AssemblySlice to your MappedSliceContainer
39 $msc->attach_AssemblySlice('NCBIM36');
40
41 foreach my $mapped_slice ( @{ $msc->get_all_MappedSlices } ) {
42 print $mapped_slice->name, "\n";
43
44 foreach my $sf ( @{ $mapped_slice->get_all_SimpleFeatures } ) {
45 print " ", &to_string($sf), "\n";
46 }
47 }
48
49 =head1 DESCRIPTION
50
51 NOTE: this code is under development and not fully functional nor tested
52 yet. Use only for development.
53
54 A MappedSliceContainer holds a collection of one or more
55 Bio::EnsEMBL::MappedSlices. It is based on a real reference slice and
56 contains an artificial "container slice" which defines the common
57 coordinate system used by all attached MappedSlices. There is also a
58 mapper to convert coordinates between the reference and the container
59 slice.
60
61 Attaching MappedSlices to the container is delegated to adaptors
62 (which act more as object factories than as traditional Ensembl db
63 adaptors). The adaptors will also modify the container slice and
64 associated mapper if required. This design allows us to keep the
65 MappedSliceContainer generic and encapsulate the data source specific
66 code in the adaptor/factory module.
67
68 In the simplest use case, all required MappedSlices are attached to the
69 MappedSliceContainer at once (by a single call to the adaptor). This
70 object should also allow "hot-plugging" of MappedSlices (e.g. attach a
71 MappedSlice representing a strain to a container that already contains a
72 multi-species alignment). The methods for attaching new MappedSlice will
73 be responsable to perform the necessary adjustments to coordinates and
74 mapper on the existing MappedSlices.
75
76 =head1 METHODS
77
78 new
79 set_adaptor
80 get_adaptor
81 set_AssemblySliceAdaptor
82 get_AssemblySliceAdaptor
83 set_AlignSliceAdaptor (not implemented yet)
84 get_AlignSliceAdaptor (not implemented yet)
85 set_StrainSliceAdaptor (not implemented yet)
86 get_StrainSliceAdaptor (not implemented yet)
87 attach_AssemblySlice
88 attach_AlignSlice (not implemented yet)
89 attach_StrainSlice (not implemented yet)
90 get_all_MappedSlices
91 sub_MappedSliceContainer (not implemented yet)
92 ref_slice
93 container_slice
94 mapper
95 expanded
96
97 =head1 RELATED MODULES
98
99 Bio::EnsEMBL::MappedSlice
100 Bio::EnsEMBL::DBSQL::AssemblySliceAdaptor
101 Bio::EnsEMBL::Compara::AlignSlice
102 Bio::EnsEMBL::Compara::AlignSlice::Slice
103 Bio::EnsEMBL::AlignStrainSlice
104 Bio::EnsEMBL::StrainSlice
105
106 =cut
107
108 package Bio::EnsEMBL::MappedSliceContainer;
109
110 use strict;
111 use warnings;
112 no warnings 'uninitialized';
113
114 use Bio::EnsEMBL::Utils::Argument qw(rearrange);
115 use Bio::EnsEMBL::Utils::Exception qw(throw warning);
116 use Bio::EnsEMBL::CoordSystem;
117 use Bio::EnsEMBL::Slice;
118 use Bio::EnsEMBL::Mapper;
119
120
121 # define avalable adaptormajs to use with this container
122 my %adaptors = map { $_ => 1 } qw(assembly align strain);
123
124
125 =head2 new
126
127 Arg [SLICE] : Bio::EnsEMBL::Slice $slice - the reference slice for this
128 container
129 Arg [EXPANDED] : (optional) Boolean $expanded - set expanded mode (default:
130 collapsed)
131 Example : my $slice = $slice_adaptor->fetch_by_region('chromosome', 1,
132 9000000, 9500000);
133 my $msc = Bio::EnsEMBL::MappedSliceContainer->new(
134 -SLICE => $slice,
135 -EXPANDED => 1,
136 );
137 Description : Constructor. See the general documentation of this module for
138 details about this object. Note that the constructor creates an
139 empty container, so you'll have to attach MappedSlices to it to
140 be useful (this is usually done by an adaptor/factory).
141 Return type : Bio::EnsEMBL::MappedSliceContainer
142 Exceptions : thrown on wrong or missing argument
143 Caller : general
144 Status : At Risk
145 : under development
146
147 =cut
148
149 sub new {
150 my $caller = shift;
151 my $class = ref($caller) || $caller;
152
153 my ($ref_slice, $expanded) = rearrange([qw(SLICE EXPANDED)], @_);
154
155 # argument check
156 unless ($ref_slice and ref($ref_slice) and
157 ($ref_slice->isa('Bio::EnsEMBL::Slice') or $ref_slice->isa('Bio::EnsEMBL::LRGSlice')) ) {
158 throw("You must provide a reference slice.");
159 }
160
161 my $self = {};
162 bless ($self, $class);
163
164 # initialise object
165 $self->{'ref_slice'} = $ref_slice;
166 $self->{'expanded'} = $expanded || 0;
167
168 $self->{'mapped_slices'} = [];
169
170 # create the container slice
171 $self->_create_container_slice($ref_slice);
172
173 return $self;
174 }
175
176
177 #
178 # Create an artificial slice which represents the common coordinate system used
179 # for this MappedSliceContainer
180 #
181 sub _create_container_slice {
182 my $self = shift;
183 my $ref_slice = shift;
184
185 # argument check
186 unless ($ref_slice and ref($ref_slice) and
187 ($ref_slice->isa('Bio::EnsEMBL::Slice') or $ref_slice->isa('Bio::EnsEMBL::LRGSlice')) ) {
188 throw("You must provide a reference slice.");
189 }
190
191 # create an artificial coordinate system for the container slice
192 my $cs = Bio::EnsEMBL::CoordSystem->new(
193 -NAME => 'container',
194 -RANK => 1,
195 );
196
197 # Create a new artificial slice spanning your container. Initially this will
198 # simply span your reference slice
199 my $container_slice = Bio::EnsEMBL::Slice->new(
200 -COORD_SYSTEM => $cs,
201 -START => 1,
202 -END => $ref_slice->length,
203 -STRAND => 1,
204 -SEQ_REGION_NAME => 'container',
205 );
206
207 $self->{'container_slice'} = $container_slice;
208
209 # Create an Mapper to map to/from the reference slice to the container coord
210 # system.
211 my $mapper = Bio::EnsEMBL::Mapper->new('ref_slice', 'container');
212
213 $mapper->add_map_coordinates(
214 $ref_slice->seq_region_name,
215 $ref_slice->start,
216 $ref_slice->end,
217 1,
218 $container_slice->seq_region_name,
219 $container_slice->start,
220 $container_slice->end,
221 );
222
223 $self->{'mapper'} = $mapper;
224 }
225
226
227 =head2 set_adaptor
228
229 Arg[1] : String $type - the type of adaptor to set
230 Arg[2] : Adaptor $adaptor - the adaptor to set
231 Example : my $adaptor = Bio::EnsEMBL::DBSQL::AssemblySliceAdaptor->new;
232 $msc->set_adaptor('assembly', $adaptor);
233 Description : Parameterisable wrapper for all methods that set adaptors (see
234 below).
235 Return type : same as Arg 2
236 Exceptions : thrown on missing type
237 Caller : general
238 Status : At Risk
239 : under development
240
241 =cut
242
243 sub set_adaptor {
244 my $self = shift;
245 my $type = shift;
246 my $adaptor = shift;
247
248 # argument check
249 unless ($type and $adaptors{$type}) {
250 throw("Missing or unknown adaptor type.");
251 }
252
253 $type = ucfirst($type);
254 my $method = "set_${type}SliceAdaptor";
255
256 return $self->$method($adaptor);
257 }
258
259
260 =head2 get_adaptor
261
262 Arg[1] : String $type - the type of adaptor to get
263 Example : my $assembly_slice_adaptor = $msc->get_adaptor('assembly');
264 Description : Parameterisable wrapper for all methods that get adaptors (see
265 below).
266 Return type : An adaptor for the requested type of MappedSlice.
267 Exceptions : thrown on missing type
268 Caller : general
269 Status : At Risk
270 : under development
271
272 =cut
273
274 sub get_adaptor {
275 my $self = shift;
276 my $type = shift;
277
278 # argument check
279 unless ($type and $adaptors{$type}) {
280 throw("Missing or unknown adaptor type.");
281 }
282
283 $type = ucfirst($type);
284 my $method = "get_${type}SliceAdaptor";
285
286 return $self->$method;
287 }
288
289
290 =head2 set_AssemblySliceAdaptor
291
292 Arg[1] : Bio::EnsEMBL::DBSQL::AssemblySliceAdaptor - the adaptor to set
293 Example : my $adaptor = Bio::EnsEMBL::DBSQL::AssemblySliceAdaptor->new;
294 $msc->set_AssemblySliceAdaptor($adaptor);
295 Description : Sets an AssemblySliceAdaptor for this container. The adaptor can
296 be used to attach MappedSlice for alternative assemblies.
297 Return type : Bio::EnsEMBL::DBSQL::AssemblySliceAdaptor
298 Exceptions : thrown on wrong or missing argument
299 Caller : general, $self->get_adaptor
300 Status : At Risk
301 : under development
302
303 =cut
304
305 sub set_AssemblySliceAdaptor {
306 my $self = shift;
307 my $assembly_slice_adaptor = shift;
308
309 unless ($assembly_slice_adaptor and ref($assembly_slice_adaptor) and
310 $assembly_slice_adaptor->isa('Bio::EnsEMBL::DBSQL::AssemblySliceAdaptor')) {
311 throw("Need a Bio::EnsEMBL::AssemblySliceAdaptor.");
312 }
313
314 $self->{'adaptors'}->{'AssemblySlice'} = $assembly_slice_adaptor;
315 }
316
317
318 =head2 get_AssemblySliceAdaptor
319
320 Example : my $assembly_slice_adaptor = $msc->get_AssemblySliceAdaptor;
321 Description : Gets a AssemblySliceAdaptor from this container. The adaptor can
322 be used to attach MappedSlice for alternative assemblies.
323 Return type : Bio::EnsEMBL::DBSQL::AssemblySliceAdaptor
324 Exceptions : thrown on wrong or missing argument
325 Caller : general, $self->get_adaptor
326 Status : At Risk
327 : under development
328
329 =cut
330
331 sub get_AssemblySliceAdaptor {
332 my $self = shift;
333
334 unless ($self->{'adaptors'}->{'AssemblySlice'}) {
335 warning("No AssemblySliceAdaptor attached to MappedSliceContainer.");
336 }
337
338 return $self->{'adaptors'}->{'AssemblySlice'};
339 }
340
341
342 # [todo]
343 sub set_AlignSliceAdaptor {
344 throw("Not implemented yet!");
345 }
346
347
348 # [todo]
349 sub get_AlignSliceAdaptor {
350 throw("Not implemented yet!");
351 }
352
353
354 # [todo]
355 sub set_StrainSliceAdaptor {
356 my $self = shift;
357 my $strain_slice_adaptor = shift;
358
359 unless ($strain_slice_adaptor and ref($strain_slice_adaptor) and
360 $strain_slice_adaptor->isa('Bio::EnsEMBL::DBSQL::StrainSliceAdaptor')) {
361 throw("Need a Bio::EnsEMBL::StrainSliceAdaptor.");
362 }
363
364 $self->{'adaptors'}->{'StrainSlice'} = $strain_slice_adaptor;
365 }
366
367
368 # [todo]
369 sub get_StrainSliceAdaptor {
370 my $self = shift;
371
372 unless ($self->{'adaptors'}->{'StrainSlice'}) {
373 warning("No StrainSliceAdaptor attached to MappedSliceContainer.");
374 }
375
376 return $self->{'adaptors'}->{'StrainSlice'};
377 }
378
379
380 =head2 attach_AssemblySlice
381
382 Arg[1] : String $version - assembly version to attach
383 Example : $msc->attach_AssemblySlice('NCBIM36');
384 Description : Attaches a MappedSlice for an alternative assembly to this
385 container.
386 Return type : none
387 Exceptions : thrown on missing argument
388 Caller : general, Bio::EnsEMBL::DBSQL::AssemblySliceAdaptor
389 Status : At Risk
390 : under development
391
392 =cut
393
394 sub attach_AssemblySlice {
395 my $self = shift;
396 my $version = shift;
397
398 throw("Need a version.") unless ($version);
399
400 my $asa = $self->get_AssemblySliceAdaptor;
401 return unless ($asa);
402
403 my @mapped_slices = @{ $asa->fetch_by_version($self, $version) };
404
405 push @{ $self->{'mapped_slices'} }, @mapped_slices;
406 }
407
408
409 =head2 attach_StrainSlice
410
411 Arg[1] : String $strain - name of strain to attach
412 Example : $msc->attach_StrainSlice('Watson');
413 Description : Attaches a MappedSlice for an alternative strain to this
414 container.
415 Return type : none
416 Exceptions : thrown on missing argument
417 Caller : general, Bio::EnsEMBL::DBSQL::StrainSliceAdaptor
418 Status : At Risk
419 : under development
420
421 =cut
422
423 sub attach_StrainSlice {
424 my $self = shift;
425 my $strain = shift;
426
427 throw("Need a strain.") unless ($strain);
428
429 my $ssa = $self->get_StrainSliceAdaptor;
430 return unless ($ssa);
431
432 my @mapped_slices = @{ $ssa->fetch_by_name($self, $strain) };
433
434 push @{ $self->{'mapped_slices'} }, @mapped_slices;
435 }
436
437
438
439 =head2 get_all_MappedSlices
440
441 Example : foreach my $mapped_slice (@{ $msc->get_all_MappedSlices }) {
442 print $mapped_slice->name, "\n";
443 }
444 Description : Returns all MappedSlices attached to this container.
445 Return type : listref of Bio::EnsEMBL::MappedSlice
446 Exceptions : none
447 Caller : general
448 Status : At Risk
449 : under development
450
451 =cut
452
453 sub get_all_MappedSlices {
454 my $self = shift;
455 return $self->{'mapped_slices'};
456 }
457
458
459 # [todo]
460 sub sub_MappedSliceContainer {
461 throw("Not implemented yet!");
462 }
463
464
465 =head2 ref_slice
466
467 Arg[1] : (optional) Bio::EnsEMBL::Slice - the reference slice to set
468 Example : my $ref_slice = $mapped_slice_container->ref_slice;
469 print "This MappedSliceContainer is based on the reference
470 slice ", $ref_slice->name, "\n";
471 Description : Getter/setter for the reference slice.
472 Return type : Bio::EnsEMBL::Slice
473 Exceptions : thrown on wrong argument type
474 Caller : general
475 Status : At Risk
476 : under development
477
478 =cut
479
480 sub ref_slice {
481 my $self = shift;
482
483 if (@_) {
484 my $slice = shift;
485
486 unless (ref($slice) and ($slice->isa('Bio::EnsEMBL::Slice') or $slice->isa('Bio::EnsEMBL::LRGSlice'))) {
487 throw("Need a Bio::EnsEMBL::Slice.");
488 }
489
490 $self->{'ref_slice'} = $slice;
491 }
492
493 return $self->{'ref_slice'};
494 }
495
496
497 =head2 container_slice
498
499 Arg[1] : (optional) Bio::EnsEMBL::Slice - the container slice to set
500 Example : my $container_slice = $mapped_slice_container->container_slice;
501 print "The common slice used by this MappedSliceContainer is ",
502 $container_slice->name, "\n";
503 Description : Getter/setter for the container slice. This is an artificial
504 slice which defines the common coordinate system used by the
505 MappedSlices attached to this container.
506 Return type : Bio::EnsEMBL::Slice
507 Exceptions : thrown on wrong argument type
508 Caller : general
509 Status : At Risk
510 : under development
511
512 =cut
513
514 sub container_slice {
515 my $self = shift;
516
517 if (@_) {
518 my $slice = shift;
519
520 unless (ref($slice) and ($slice->isa('Bio::EnsEMBL::Slice') or $slice->isa('Bio::EnsEMBL::LRGSlice')) ) {
521 throw("Need a Bio::EnsEMBL::Slice.");
522 }
523
524 $self->{'container_slice'} = $slice;
525 }
526
527 return $self->{'container_slice'};
528 }
529
530
531 =head2 mapper
532
533 Arg[1] : (optional) Bio::EnsEMBL::Mapper - the mapper to set
534 Example : my $mapper = Bio::EnsEMBL::Mapper->new('ref', 'mapped');
535 $mapped_slice_container->mapper($mapper);
536 Description : Getter/setter for the mapper to map between reference slice and
537 the artificial container coord system.
538 Return type : Bio::EnsEMBL::Mapper
539 Exceptions : thrown on wrong argument type
540 Caller : internal, Bio::EnsEMBL::MappedSlice->AUTOLOAD
541 Status : At Risk
542 : under development
543
544 =cut
545
546 sub mapper {
547 my $self = shift;
548
549 if (@_) {
550 my $mapper = shift;
551
552 unless (ref($mapper) and $mapper->isa('Bio::EnsEMBL::Mapper')) {
553 throw("Need a Bio::EnsEMBL::Mapper.");
554 }
555
556 $self->{'mapper'} = $mapper;
557 }
558
559 return $self->{'mapper'};
560 }
561
562
563 =head2 expanded
564
565 Arg[1] : (optional) Boolean - expanded mode to set
566 Example : if ($mapped_slice_container->expanded) {
567 # do more elaborate mapping than in collapsed mode
568 [...]
569 }
570 Description : Getter/setter for expanded mode.
571
572 By default, MappedSliceContainer use collapsed mode, which
573 means that no inserts in the reference sequence are allowed
574 when constructing the MappedSlices. in this mode, the
575 mapped_slice artificial coord system will be identical with the
576 ref_slice coord system.
577
578 By setting expanded mode, you allow inserts in the reference
579 sequence.
580 Return type : Boolean
581 Exceptions : none
582 Caller : general
583 Status : At Risk
584 : under development
585
586 =cut
587
588 sub expanded {
589 my $self = shift;
590 $self->{'expanded'} = shift if (@_);
591 return $self->{'expanded'};
592 }
593
594 =head2 seq
595
596 Example : my $seq = $container->seq()
597 Description : Retrieves the expanded sequence of the artificial container
598 slice, including "-" characters where there are inserts in any
599 of the attached mapped slices.
600 Return type : String
601 Exceptions : none
602 Caller : general
603 Status : At Risk
604 : under development
605
606 =cut
607
608 sub seq {
609 my $self = shift;
610
611 my $container_seq = '';
612
613 # check there's a mapper
614 if(defined($self->mapper)) {
615 my $start = 0;
616 my $slice = $self->ref_slice();
617 my $seq = $slice->seq();
618
619 foreach my $coord($self->mapper->map_coordinates($slice->seq_region_name, $slice->start, $slice->end, $slice->strand, 'ref_slice')) {
620 # if it is a normal coordinate insert sequence
621 if(!$coord->isa('Bio::EnsEMBL::Mapper::IndelCoordinate')) {
622 $container_seq .= substr($seq, $start, $coord->length());
623 $start += $coord->length;
624 }
625
626 # if it is a gap or indel insert "-"
627 else {
628 $container_seq .= '-' x $coord->length();
629 }
630 }
631 }
632
633 return $container_seq;
634 }
635
636
637 1;
638