Mercurial > repos > mahtabm > ensemb_rep_gvl
comparison variant_effect_predictor/Bio/EnsEMBL/MappedSliceContainer.pm @ 0:2bc9b66ada89 draft default tip
Uploaded
author | mahtabm |
---|---|
date | Thu, 11 Apr 2013 06:29:17 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:2bc9b66ada89 |
---|---|
1 =head1 LICENSE | |
2 | |
3 Copyright (c) 1999-2012 The European Bioinformatics Institute and | |
4 Genome Research Limited. All rights reserved. | |
5 | |
6 This software is distributed under a modified Apache license. | |
7 For license details, please see | |
8 | |
9 http://www.ensembl.org/info/about/code_licence.html | |
10 | |
11 =head1 CONTACT | |
12 | |
13 Please email comments or questions to the public Ensembl | |
14 developers list at <dev@ensembl.org>. | |
15 | |
16 Questions may also be sent to the Ensembl help desk at | |
17 <helpdesk@ensembl.org>. | |
18 | |
19 =cut | |
20 | |
21 =head1 NAME | |
22 | |
23 Bio::EnsEMBL::MappedSliceContainer - container for mapped slices | |
24 | |
25 =head1 SYNOPSIS | |
26 | |
27 # get a reference slice | |
28 my $slice = | |
29 $slice_adaptor->fetch_by_region( 'chromosome', 14, 900000, 950000 ); | |
30 | |
31 # create MappedSliceContainer based on the reference slice | |
32 my $msc = Bio::EnsEMBL::MappedSliceContainer->new( -SLICE => $slice ); | |
33 | |
34 # set the adaptor for fetching AssemblySlices | |
35 my $asa = $slice->adaptor->db->get_AssemblySliceAdaptor; | |
36 $msc->set_AssemblySliceAdaptor($asa); | |
37 | |
38 # add an AssemblySlice to your MappedSliceContainer | |
39 $msc->attach_AssemblySlice('NCBIM36'); | |
40 | |
41 foreach my $mapped_slice ( @{ $msc->get_all_MappedSlices } ) { | |
42 print $mapped_slice->name, "\n"; | |
43 | |
44 foreach my $sf ( @{ $mapped_slice->get_all_SimpleFeatures } ) { | |
45 print " ", &to_string($sf), "\n"; | |
46 } | |
47 } | |
48 | |
49 =head1 DESCRIPTION | |
50 | |
51 NOTE: this code is under development and not fully functional nor tested | |
52 yet. Use only for development. | |
53 | |
54 A MappedSliceContainer holds a collection of one or more | |
55 Bio::EnsEMBL::MappedSlices. It is based on a real reference slice and | |
56 contains an artificial "container slice" which defines the common | |
57 coordinate system used by all attached MappedSlices. There is also a | |
58 mapper to convert coordinates between the reference and the container | |
59 slice. | |
60 | |
61 Attaching MappedSlices to the container is delegated to adaptors | |
62 (which act more as object factories than as traditional Ensembl db | |
63 adaptors). The adaptors will also modify the container slice and | |
64 associated mapper if required. This design allows us to keep the | |
65 MappedSliceContainer generic and encapsulate the data source specific | |
66 code in the adaptor/factory module. | |
67 | |
68 In the simplest use case, all required MappedSlices are attached to the | |
69 MappedSliceContainer at once (by a single call to the adaptor). This | |
70 object should also allow "hot-plugging" of MappedSlices (e.g. attach a | |
71 MappedSlice representing a strain to a container that already contains a | |
72 multi-species alignment). The methods for attaching new MappedSlice will | |
73 be responsable to perform the necessary adjustments to coordinates and | |
74 mapper on the existing MappedSlices. | |
75 | |
76 =head1 METHODS | |
77 | |
78 new | |
79 set_adaptor | |
80 get_adaptor | |
81 set_AssemblySliceAdaptor | |
82 get_AssemblySliceAdaptor | |
83 set_AlignSliceAdaptor (not implemented yet) | |
84 get_AlignSliceAdaptor (not implemented yet) | |
85 set_StrainSliceAdaptor (not implemented yet) | |
86 get_StrainSliceAdaptor (not implemented yet) | |
87 attach_AssemblySlice | |
88 attach_AlignSlice (not implemented yet) | |
89 attach_StrainSlice (not implemented yet) | |
90 get_all_MappedSlices | |
91 sub_MappedSliceContainer (not implemented yet) | |
92 ref_slice | |
93 container_slice | |
94 mapper | |
95 expanded | |
96 | |
97 =head1 RELATED MODULES | |
98 | |
99 Bio::EnsEMBL::MappedSlice | |
100 Bio::EnsEMBL::DBSQL::AssemblySliceAdaptor | |
101 Bio::EnsEMBL::Compara::AlignSlice | |
102 Bio::EnsEMBL::Compara::AlignSlice::Slice | |
103 Bio::EnsEMBL::AlignStrainSlice | |
104 Bio::EnsEMBL::StrainSlice | |
105 | |
106 =cut | |
107 | |
108 package Bio::EnsEMBL::MappedSliceContainer; | |
109 | |
110 use strict; | |
111 use warnings; | |
112 no warnings 'uninitialized'; | |
113 | |
114 use Bio::EnsEMBL::Utils::Argument qw(rearrange); | |
115 use Bio::EnsEMBL::Utils::Exception qw(throw warning); | |
116 use Bio::EnsEMBL::CoordSystem; | |
117 use Bio::EnsEMBL::Slice; | |
118 use Bio::EnsEMBL::Mapper; | |
119 | |
120 | |
121 # define avalable adaptormajs to use with this container | |
122 my %adaptors = map { $_ => 1 } qw(assembly align strain); | |
123 | |
124 | |
125 =head2 new | |
126 | |
127 Arg [SLICE] : Bio::EnsEMBL::Slice $slice - the reference slice for this | |
128 container | |
129 Arg [EXPANDED] : (optional) Boolean $expanded - set expanded mode (default: | |
130 collapsed) | |
131 Example : my $slice = $slice_adaptor->fetch_by_region('chromosome', 1, | |
132 9000000, 9500000); | |
133 my $msc = Bio::EnsEMBL::MappedSliceContainer->new( | |
134 -SLICE => $slice, | |
135 -EXPANDED => 1, | |
136 ); | |
137 Description : Constructor. See the general documentation of this module for | |
138 details about this object. Note that the constructor creates an | |
139 empty container, so you'll have to attach MappedSlices to it to | |
140 be useful (this is usually done by an adaptor/factory). | |
141 Return type : Bio::EnsEMBL::MappedSliceContainer | |
142 Exceptions : thrown on wrong or missing argument | |
143 Caller : general | |
144 Status : At Risk | |
145 : under development | |
146 | |
147 =cut | |
148 | |
149 sub new { | |
150 my $caller = shift; | |
151 my $class = ref($caller) || $caller; | |
152 | |
153 my ($ref_slice, $expanded) = rearrange([qw(SLICE EXPANDED)], @_); | |
154 | |
155 # argument check | |
156 unless ($ref_slice and ref($ref_slice) and | |
157 ($ref_slice->isa('Bio::EnsEMBL::Slice') or $ref_slice->isa('Bio::EnsEMBL::LRGSlice')) ) { | |
158 throw("You must provide a reference slice."); | |
159 } | |
160 | |
161 my $self = {}; | |
162 bless ($self, $class); | |
163 | |
164 # initialise object | |
165 $self->{'ref_slice'} = $ref_slice; | |
166 $self->{'expanded'} = $expanded || 0; | |
167 | |
168 $self->{'mapped_slices'} = []; | |
169 | |
170 # create the container slice | |
171 $self->_create_container_slice($ref_slice); | |
172 | |
173 return $self; | |
174 } | |
175 | |
176 | |
177 # | |
178 # Create an artificial slice which represents the common coordinate system used | |
179 # for this MappedSliceContainer | |
180 # | |
181 sub _create_container_slice { | |
182 my $self = shift; | |
183 my $ref_slice = shift; | |
184 | |
185 # argument check | |
186 unless ($ref_slice and ref($ref_slice) and | |
187 ($ref_slice->isa('Bio::EnsEMBL::Slice') or $ref_slice->isa('Bio::EnsEMBL::LRGSlice')) ) { | |
188 throw("You must provide a reference slice."); | |
189 } | |
190 | |
191 # create an artificial coordinate system for the container slice | |
192 my $cs = Bio::EnsEMBL::CoordSystem->new( | |
193 -NAME => 'container', | |
194 -RANK => 1, | |
195 ); | |
196 | |
197 # Create a new artificial slice spanning your container. Initially this will | |
198 # simply span your reference slice | |
199 my $container_slice = Bio::EnsEMBL::Slice->new( | |
200 -COORD_SYSTEM => $cs, | |
201 -START => 1, | |
202 -END => $ref_slice->length, | |
203 -STRAND => 1, | |
204 -SEQ_REGION_NAME => 'container', | |
205 ); | |
206 | |
207 $self->{'container_slice'} = $container_slice; | |
208 | |
209 # Create an Mapper to map to/from the reference slice to the container coord | |
210 # system. | |
211 my $mapper = Bio::EnsEMBL::Mapper->new('ref_slice', 'container'); | |
212 | |
213 $mapper->add_map_coordinates( | |
214 $ref_slice->seq_region_name, | |
215 $ref_slice->start, | |
216 $ref_slice->end, | |
217 1, | |
218 $container_slice->seq_region_name, | |
219 $container_slice->start, | |
220 $container_slice->end, | |
221 ); | |
222 | |
223 $self->{'mapper'} = $mapper; | |
224 } | |
225 | |
226 | |
227 =head2 set_adaptor | |
228 | |
229 Arg[1] : String $type - the type of adaptor to set | |
230 Arg[2] : Adaptor $adaptor - the adaptor to set | |
231 Example : my $adaptor = Bio::EnsEMBL::DBSQL::AssemblySliceAdaptor->new; | |
232 $msc->set_adaptor('assembly', $adaptor); | |
233 Description : Parameterisable wrapper for all methods that set adaptors (see | |
234 below). | |
235 Return type : same as Arg 2 | |
236 Exceptions : thrown on missing type | |
237 Caller : general | |
238 Status : At Risk | |
239 : under development | |
240 | |
241 =cut | |
242 | |
243 sub set_adaptor { | |
244 my $self = shift; | |
245 my $type = shift; | |
246 my $adaptor = shift; | |
247 | |
248 # argument check | |
249 unless ($type and $adaptors{$type}) { | |
250 throw("Missing or unknown adaptor type."); | |
251 } | |
252 | |
253 $type = ucfirst($type); | |
254 my $method = "set_${type}SliceAdaptor"; | |
255 | |
256 return $self->$method($adaptor); | |
257 } | |
258 | |
259 | |
260 =head2 get_adaptor | |
261 | |
262 Arg[1] : String $type - the type of adaptor to get | |
263 Example : my $assembly_slice_adaptor = $msc->get_adaptor('assembly'); | |
264 Description : Parameterisable wrapper for all methods that get adaptors (see | |
265 below). | |
266 Return type : An adaptor for the requested type of MappedSlice. | |
267 Exceptions : thrown on missing type | |
268 Caller : general | |
269 Status : At Risk | |
270 : under development | |
271 | |
272 =cut | |
273 | |
274 sub get_adaptor { | |
275 my $self = shift; | |
276 my $type = shift; | |
277 | |
278 # argument check | |
279 unless ($type and $adaptors{$type}) { | |
280 throw("Missing or unknown adaptor type."); | |
281 } | |
282 | |
283 $type = ucfirst($type); | |
284 my $method = "get_${type}SliceAdaptor"; | |
285 | |
286 return $self->$method; | |
287 } | |
288 | |
289 | |
290 =head2 set_AssemblySliceAdaptor | |
291 | |
292 Arg[1] : Bio::EnsEMBL::DBSQL::AssemblySliceAdaptor - the adaptor to set | |
293 Example : my $adaptor = Bio::EnsEMBL::DBSQL::AssemblySliceAdaptor->new; | |
294 $msc->set_AssemblySliceAdaptor($adaptor); | |
295 Description : Sets an AssemblySliceAdaptor for this container. The adaptor can | |
296 be used to attach MappedSlice for alternative assemblies. | |
297 Return type : Bio::EnsEMBL::DBSQL::AssemblySliceAdaptor | |
298 Exceptions : thrown on wrong or missing argument | |
299 Caller : general, $self->get_adaptor | |
300 Status : At Risk | |
301 : under development | |
302 | |
303 =cut | |
304 | |
305 sub set_AssemblySliceAdaptor { | |
306 my $self = shift; | |
307 my $assembly_slice_adaptor = shift; | |
308 | |
309 unless ($assembly_slice_adaptor and ref($assembly_slice_adaptor) and | |
310 $assembly_slice_adaptor->isa('Bio::EnsEMBL::DBSQL::AssemblySliceAdaptor')) { | |
311 throw("Need a Bio::EnsEMBL::AssemblySliceAdaptor."); | |
312 } | |
313 | |
314 $self->{'adaptors'}->{'AssemblySlice'} = $assembly_slice_adaptor; | |
315 } | |
316 | |
317 | |
318 =head2 get_AssemblySliceAdaptor | |
319 | |
320 Example : my $assembly_slice_adaptor = $msc->get_AssemblySliceAdaptor; | |
321 Description : Gets a AssemblySliceAdaptor from this container. The adaptor can | |
322 be used to attach MappedSlice for alternative assemblies. | |
323 Return type : Bio::EnsEMBL::DBSQL::AssemblySliceAdaptor | |
324 Exceptions : thrown on wrong or missing argument | |
325 Caller : general, $self->get_adaptor | |
326 Status : At Risk | |
327 : under development | |
328 | |
329 =cut | |
330 | |
331 sub get_AssemblySliceAdaptor { | |
332 my $self = shift; | |
333 | |
334 unless ($self->{'adaptors'}->{'AssemblySlice'}) { | |
335 warning("No AssemblySliceAdaptor attached to MappedSliceContainer."); | |
336 } | |
337 | |
338 return $self->{'adaptors'}->{'AssemblySlice'}; | |
339 } | |
340 | |
341 | |
342 # [todo] | |
343 sub set_AlignSliceAdaptor { | |
344 throw("Not implemented yet!"); | |
345 } | |
346 | |
347 | |
348 # [todo] | |
349 sub get_AlignSliceAdaptor { | |
350 throw("Not implemented yet!"); | |
351 } | |
352 | |
353 | |
354 # [todo] | |
355 sub set_StrainSliceAdaptor { | |
356 my $self = shift; | |
357 my $strain_slice_adaptor = shift; | |
358 | |
359 unless ($strain_slice_adaptor and ref($strain_slice_adaptor) and | |
360 $strain_slice_adaptor->isa('Bio::EnsEMBL::DBSQL::StrainSliceAdaptor')) { | |
361 throw("Need a Bio::EnsEMBL::StrainSliceAdaptor."); | |
362 } | |
363 | |
364 $self->{'adaptors'}->{'StrainSlice'} = $strain_slice_adaptor; | |
365 } | |
366 | |
367 | |
368 # [todo] | |
369 sub get_StrainSliceAdaptor { | |
370 my $self = shift; | |
371 | |
372 unless ($self->{'adaptors'}->{'StrainSlice'}) { | |
373 warning("No StrainSliceAdaptor attached to MappedSliceContainer."); | |
374 } | |
375 | |
376 return $self->{'adaptors'}->{'StrainSlice'}; | |
377 } | |
378 | |
379 | |
380 =head2 attach_AssemblySlice | |
381 | |
382 Arg[1] : String $version - assembly version to attach | |
383 Example : $msc->attach_AssemblySlice('NCBIM36'); | |
384 Description : Attaches a MappedSlice for an alternative assembly to this | |
385 container. | |
386 Return type : none | |
387 Exceptions : thrown on missing argument | |
388 Caller : general, Bio::EnsEMBL::DBSQL::AssemblySliceAdaptor | |
389 Status : At Risk | |
390 : under development | |
391 | |
392 =cut | |
393 | |
394 sub attach_AssemblySlice { | |
395 my $self = shift; | |
396 my $version = shift; | |
397 | |
398 throw("Need a version.") unless ($version); | |
399 | |
400 my $asa = $self->get_AssemblySliceAdaptor; | |
401 return unless ($asa); | |
402 | |
403 my @mapped_slices = @{ $asa->fetch_by_version($self, $version) }; | |
404 | |
405 push @{ $self->{'mapped_slices'} }, @mapped_slices; | |
406 } | |
407 | |
408 | |
409 =head2 attach_StrainSlice | |
410 | |
411 Arg[1] : String $strain - name of strain to attach | |
412 Example : $msc->attach_StrainSlice('Watson'); | |
413 Description : Attaches a MappedSlice for an alternative strain to this | |
414 container. | |
415 Return type : none | |
416 Exceptions : thrown on missing argument | |
417 Caller : general, Bio::EnsEMBL::DBSQL::StrainSliceAdaptor | |
418 Status : At Risk | |
419 : under development | |
420 | |
421 =cut | |
422 | |
423 sub attach_StrainSlice { | |
424 my $self = shift; | |
425 my $strain = shift; | |
426 | |
427 throw("Need a strain.") unless ($strain); | |
428 | |
429 my $ssa = $self->get_StrainSliceAdaptor; | |
430 return unless ($ssa); | |
431 | |
432 my @mapped_slices = @{ $ssa->fetch_by_name($self, $strain) }; | |
433 | |
434 push @{ $self->{'mapped_slices'} }, @mapped_slices; | |
435 } | |
436 | |
437 | |
438 | |
439 =head2 get_all_MappedSlices | |
440 | |
441 Example : foreach my $mapped_slice (@{ $msc->get_all_MappedSlices }) { | |
442 print $mapped_slice->name, "\n"; | |
443 } | |
444 Description : Returns all MappedSlices attached to this container. | |
445 Return type : listref of Bio::EnsEMBL::MappedSlice | |
446 Exceptions : none | |
447 Caller : general | |
448 Status : At Risk | |
449 : under development | |
450 | |
451 =cut | |
452 | |
453 sub get_all_MappedSlices { | |
454 my $self = shift; | |
455 return $self->{'mapped_slices'}; | |
456 } | |
457 | |
458 | |
459 # [todo] | |
460 sub sub_MappedSliceContainer { | |
461 throw("Not implemented yet!"); | |
462 } | |
463 | |
464 | |
465 =head2 ref_slice | |
466 | |
467 Arg[1] : (optional) Bio::EnsEMBL::Slice - the reference slice to set | |
468 Example : my $ref_slice = $mapped_slice_container->ref_slice; | |
469 print "This MappedSliceContainer is based on the reference | |
470 slice ", $ref_slice->name, "\n"; | |
471 Description : Getter/setter for the reference slice. | |
472 Return type : Bio::EnsEMBL::Slice | |
473 Exceptions : thrown on wrong argument type | |
474 Caller : general | |
475 Status : At Risk | |
476 : under development | |
477 | |
478 =cut | |
479 | |
480 sub ref_slice { | |
481 my $self = shift; | |
482 | |
483 if (@_) { | |
484 my $slice = shift; | |
485 | |
486 unless (ref($slice) and ($slice->isa('Bio::EnsEMBL::Slice') or $slice->isa('Bio::EnsEMBL::LRGSlice'))) { | |
487 throw("Need a Bio::EnsEMBL::Slice."); | |
488 } | |
489 | |
490 $self->{'ref_slice'} = $slice; | |
491 } | |
492 | |
493 return $self->{'ref_slice'}; | |
494 } | |
495 | |
496 | |
497 =head2 container_slice | |
498 | |
499 Arg[1] : (optional) Bio::EnsEMBL::Slice - the container slice to set | |
500 Example : my $container_slice = $mapped_slice_container->container_slice; | |
501 print "The common slice used by this MappedSliceContainer is ", | |
502 $container_slice->name, "\n"; | |
503 Description : Getter/setter for the container slice. This is an artificial | |
504 slice which defines the common coordinate system used by the | |
505 MappedSlices attached to this container. | |
506 Return type : Bio::EnsEMBL::Slice | |
507 Exceptions : thrown on wrong argument type | |
508 Caller : general | |
509 Status : At Risk | |
510 : under development | |
511 | |
512 =cut | |
513 | |
514 sub container_slice { | |
515 my $self = shift; | |
516 | |
517 if (@_) { | |
518 my $slice = shift; | |
519 | |
520 unless (ref($slice) and ($slice->isa('Bio::EnsEMBL::Slice') or $slice->isa('Bio::EnsEMBL::LRGSlice')) ) { | |
521 throw("Need a Bio::EnsEMBL::Slice."); | |
522 } | |
523 | |
524 $self->{'container_slice'} = $slice; | |
525 } | |
526 | |
527 return $self->{'container_slice'}; | |
528 } | |
529 | |
530 | |
531 =head2 mapper | |
532 | |
533 Arg[1] : (optional) Bio::EnsEMBL::Mapper - the mapper to set | |
534 Example : my $mapper = Bio::EnsEMBL::Mapper->new('ref', 'mapped'); | |
535 $mapped_slice_container->mapper($mapper); | |
536 Description : Getter/setter for the mapper to map between reference slice and | |
537 the artificial container coord system. | |
538 Return type : Bio::EnsEMBL::Mapper | |
539 Exceptions : thrown on wrong argument type | |
540 Caller : internal, Bio::EnsEMBL::MappedSlice->AUTOLOAD | |
541 Status : At Risk | |
542 : under development | |
543 | |
544 =cut | |
545 | |
546 sub mapper { | |
547 my $self = shift; | |
548 | |
549 if (@_) { | |
550 my $mapper = shift; | |
551 | |
552 unless (ref($mapper) and $mapper->isa('Bio::EnsEMBL::Mapper')) { | |
553 throw("Need a Bio::EnsEMBL::Mapper."); | |
554 } | |
555 | |
556 $self->{'mapper'} = $mapper; | |
557 } | |
558 | |
559 return $self->{'mapper'}; | |
560 } | |
561 | |
562 | |
563 =head2 expanded | |
564 | |
565 Arg[1] : (optional) Boolean - expanded mode to set | |
566 Example : if ($mapped_slice_container->expanded) { | |
567 # do more elaborate mapping than in collapsed mode | |
568 [...] | |
569 } | |
570 Description : Getter/setter for expanded mode. | |
571 | |
572 By default, MappedSliceContainer use collapsed mode, which | |
573 means that no inserts in the reference sequence are allowed | |
574 when constructing the MappedSlices. in this mode, the | |
575 mapped_slice artificial coord system will be identical with the | |
576 ref_slice coord system. | |
577 | |
578 By setting expanded mode, you allow inserts in the reference | |
579 sequence. | |
580 Return type : Boolean | |
581 Exceptions : none | |
582 Caller : general | |
583 Status : At Risk | |
584 : under development | |
585 | |
586 =cut | |
587 | |
588 sub expanded { | |
589 my $self = shift; | |
590 $self->{'expanded'} = shift if (@_); | |
591 return $self->{'expanded'}; | |
592 } | |
593 | |
594 =head2 seq | |
595 | |
596 Example : my $seq = $container->seq() | |
597 Description : Retrieves the expanded sequence of the artificial container | |
598 slice, including "-" characters where there are inserts in any | |
599 of the attached mapped slices. | |
600 Return type : String | |
601 Exceptions : none | |
602 Caller : general | |
603 Status : At Risk | |
604 : under development | |
605 | |
606 =cut | |
607 | |
608 sub seq { | |
609 my $self = shift; | |
610 | |
611 my $container_seq = ''; | |
612 | |
613 # check there's a mapper | |
614 if(defined($self->mapper)) { | |
615 my $start = 0; | |
616 my $slice = $self->ref_slice(); | |
617 my $seq = $slice->seq(); | |
618 | |
619 foreach my $coord($self->mapper->map_coordinates($slice->seq_region_name, $slice->start, $slice->end, $slice->strand, 'ref_slice')) { | |
620 # if it is a normal coordinate insert sequence | |
621 if(!$coord->isa('Bio::EnsEMBL::Mapper::IndelCoordinate')) { | |
622 $container_seq .= substr($seq, $start, $coord->length()); | |
623 $start += $coord->length; | |
624 } | |
625 | |
626 # if it is a gap or indel insert "-" | |
627 else { | |
628 $container_seq .= '-' x $coord->length(); | |
629 } | |
630 } | |
631 } | |
632 | |
633 return $container_seq; | |
634 } | |
635 | |
636 | |
637 1; | |
638 |