Mercurial > repos > mahtabm > ensembl
diff variant_effect_predictor/Bio/EnsEMBL/MappedSliceContainer.pm @ 0:1f6dce3d34e0
Uploaded
author | mahtabm |
---|---|
date | Thu, 11 Apr 2013 02:01:53 -0400 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/variant_effect_predictor/Bio/EnsEMBL/MappedSliceContainer.pm Thu Apr 11 02:01:53 2013 -0400 @@ -0,0 +1,638 @@ +=head1 LICENSE + + Copyright (c) 1999-2012 The European Bioinformatics Institute and + Genome Research Limited. All rights reserved. + + This software is distributed under a modified Apache license. + For license details, please see + + http://www.ensembl.org/info/about/code_licence.html + +=head1 CONTACT + + Please email comments or questions to the public Ensembl + developers list at <dev@ensembl.org>. + + Questions may also be sent to the Ensembl help desk at + <helpdesk@ensembl.org>. + +=cut + +=head1 NAME + +Bio::EnsEMBL::MappedSliceContainer - container for mapped slices + +=head1 SYNOPSIS + + # get a reference slice + my $slice = + $slice_adaptor->fetch_by_region( 'chromosome', 14, 900000, 950000 ); + + # create MappedSliceContainer based on the reference slice + my $msc = Bio::EnsEMBL::MappedSliceContainer->new( -SLICE => $slice ); + + # set the adaptor for fetching AssemblySlices + my $asa = $slice->adaptor->db->get_AssemblySliceAdaptor; + $msc->set_AssemblySliceAdaptor($asa); + + # add an AssemblySlice to your MappedSliceContainer + $msc->attach_AssemblySlice('NCBIM36'); + + foreach my $mapped_slice ( @{ $msc->get_all_MappedSlices } ) { + print $mapped_slice->name, "\n"; + + foreach my $sf ( @{ $mapped_slice->get_all_SimpleFeatures } ) { + print " ", &to_string($sf), "\n"; + } + } + +=head1 DESCRIPTION + +NOTE: this code is under development and not fully functional nor tested +yet. Use only for development. + +A MappedSliceContainer holds a collection of one or more +Bio::EnsEMBL::MappedSlices. It is based on a real reference slice and +contains an artificial "container slice" which defines the common +coordinate system used by all attached MappedSlices. There is also a +mapper to convert coordinates between the reference and the container +slice. + +Attaching MappedSlices to the container is delegated to adaptors +(which act more as object factories than as traditional Ensembl db +adaptors). The adaptors will also modify the container slice and +associated mapper if required. This design allows us to keep the +MappedSliceContainer generic and encapsulate the data source specific +code in the adaptor/factory module. + +In the simplest use case, all required MappedSlices are attached to the +MappedSliceContainer at once (by a single call to the adaptor). This +object should also allow "hot-plugging" of MappedSlices (e.g. attach a +MappedSlice representing a strain to a container that already contains a +multi-species alignment). The methods for attaching new MappedSlice will +be responsable to perform the necessary adjustments to coordinates and +mapper on the existing MappedSlices. + +=head1 METHODS + + new + set_adaptor + get_adaptor + set_AssemblySliceAdaptor + get_AssemblySliceAdaptor + set_AlignSliceAdaptor (not implemented yet) + get_AlignSliceAdaptor (not implemented yet) + set_StrainSliceAdaptor (not implemented yet) + get_StrainSliceAdaptor (not implemented yet) + attach_AssemblySlice + attach_AlignSlice (not implemented yet) + attach_StrainSlice (not implemented yet) + get_all_MappedSlices + sub_MappedSliceContainer (not implemented yet) + ref_slice + container_slice + mapper + expanded + +=head1 RELATED MODULES + + Bio::EnsEMBL::MappedSlice + Bio::EnsEMBL::DBSQL::AssemblySliceAdaptor + Bio::EnsEMBL::Compara::AlignSlice + Bio::EnsEMBL::Compara::AlignSlice::Slice + Bio::EnsEMBL::AlignStrainSlice + Bio::EnsEMBL::StrainSlice + +=cut + +package Bio::EnsEMBL::MappedSliceContainer; + +use strict; +use warnings; +no warnings 'uninitialized'; + +use Bio::EnsEMBL::Utils::Argument qw(rearrange); +use Bio::EnsEMBL::Utils::Exception qw(throw warning); +use Bio::EnsEMBL::CoordSystem; +use Bio::EnsEMBL::Slice; +use Bio::EnsEMBL::Mapper; + + +# define avalable adaptormajs to use with this container +my %adaptors = map { $_ => 1 } qw(assembly align strain); + + +=head2 new + + Arg [SLICE] : Bio::EnsEMBL::Slice $slice - the reference slice for this + container + Arg [EXPANDED] : (optional) Boolean $expanded - set expanded mode (default: + collapsed) + Example : my $slice = $slice_adaptor->fetch_by_region('chromosome', 1, + 9000000, 9500000); + my $msc = Bio::EnsEMBL::MappedSliceContainer->new( + -SLICE => $slice, + -EXPANDED => 1, + ); + Description : Constructor. See the general documentation of this module for + details about this object. Note that the constructor creates an + empty container, so you'll have to attach MappedSlices to it to + be useful (this is usually done by an adaptor/factory). + Return type : Bio::EnsEMBL::MappedSliceContainer + Exceptions : thrown on wrong or missing argument + Caller : general + Status : At Risk + : under development + +=cut + +sub new { + my $caller = shift; + my $class = ref($caller) || $caller; + + my ($ref_slice, $expanded) = rearrange([qw(SLICE EXPANDED)], @_); + + # argument check + unless ($ref_slice and ref($ref_slice) and + ($ref_slice->isa('Bio::EnsEMBL::Slice') or $ref_slice->isa('Bio::EnsEMBL::LRGSlice')) ) { + throw("You must provide a reference slice."); + } + + my $self = {}; + bless ($self, $class); + + # initialise object + $self->{'ref_slice'} = $ref_slice; + $self->{'expanded'} = $expanded || 0; + + $self->{'mapped_slices'} = []; + + # create the container slice + $self->_create_container_slice($ref_slice); + + return $self; +} + + +# +# Create an artificial slice which represents the common coordinate system used +# for this MappedSliceContainer +# +sub _create_container_slice { + my $self = shift; + my $ref_slice = shift; + + # argument check + unless ($ref_slice and ref($ref_slice) and + ($ref_slice->isa('Bio::EnsEMBL::Slice') or $ref_slice->isa('Bio::EnsEMBL::LRGSlice')) ) { + throw("You must provide a reference slice."); + } + + # create an artificial coordinate system for the container slice + my $cs = Bio::EnsEMBL::CoordSystem->new( + -NAME => 'container', + -RANK => 1, + ); + + # Create a new artificial slice spanning your container. Initially this will + # simply span your reference slice + my $container_slice = Bio::EnsEMBL::Slice->new( + -COORD_SYSTEM => $cs, + -START => 1, + -END => $ref_slice->length, + -STRAND => 1, + -SEQ_REGION_NAME => 'container', + ); + + $self->{'container_slice'} = $container_slice; + + # Create an Mapper to map to/from the reference slice to the container coord + # system. + my $mapper = Bio::EnsEMBL::Mapper->new('ref_slice', 'container'); + + $mapper->add_map_coordinates( + $ref_slice->seq_region_name, + $ref_slice->start, + $ref_slice->end, + 1, + $container_slice->seq_region_name, + $container_slice->start, + $container_slice->end, + ); + + $self->{'mapper'} = $mapper; +} + + +=head2 set_adaptor + + Arg[1] : String $type - the type of adaptor to set + Arg[2] : Adaptor $adaptor - the adaptor to set + Example : my $adaptor = Bio::EnsEMBL::DBSQL::AssemblySliceAdaptor->new; + $msc->set_adaptor('assembly', $adaptor); + Description : Parameterisable wrapper for all methods that set adaptors (see + below). + Return type : same as Arg 2 + Exceptions : thrown on missing type + Caller : general + Status : At Risk + : under development + +=cut + +sub set_adaptor { + my $self = shift; + my $type = shift; + my $adaptor = shift; + + # argument check + unless ($type and $adaptors{$type}) { + throw("Missing or unknown adaptor type."); + } + + $type = ucfirst($type); + my $method = "set_${type}SliceAdaptor"; + + return $self->$method($adaptor); +} + + +=head2 get_adaptor + + Arg[1] : String $type - the type of adaptor to get + Example : my $assembly_slice_adaptor = $msc->get_adaptor('assembly'); + Description : Parameterisable wrapper for all methods that get adaptors (see + below). + Return type : An adaptor for the requested type of MappedSlice. + Exceptions : thrown on missing type + Caller : general + Status : At Risk + : under development + +=cut + +sub get_adaptor { + my $self = shift; + my $type = shift; + + # argument check + unless ($type and $adaptors{$type}) { + throw("Missing or unknown adaptor type."); + } + + $type = ucfirst($type); + my $method = "get_${type}SliceAdaptor"; + + return $self->$method; +} + + +=head2 set_AssemblySliceAdaptor + + Arg[1] : Bio::EnsEMBL::DBSQL::AssemblySliceAdaptor - the adaptor to set + Example : my $adaptor = Bio::EnsEMBL::DBSQL::AssemblySliceAdaptor->new; + $msc->set_AssemblySliceAdaptor($adaptor); + Description : Sets an AssemblySliceAdaptor for this container. The adaptor can + be used to attach MappedSlice for alternative assemblies. + Return type : Bio::EnsEMBL::DBSQL::AssemblySliceAdaptor + Exceptions : thrown on wrong or missing argument + Caller : general, $self->get_adaptor + Status : At Risk + : under development + +=cut + +sub set_AssemblySliceAdaptor { + my $self = shift; + my $assembly_slice_adaptor = shift; + + unless ($assembly_slice_adaptor and ref($assembly_slice_adaptor) and + $assembly_slice_adaptor->isa('Bio::EnsEMBL::DBSQL::AssemblySliceAdaptor')) { + throw("Need a Bio::EnsEMBL::AssemblySliceAdaptor."); + } + + $self->{'adaptors'}->{'AssemblySlice'} = $assembly_slice_adaptor; +} + + +=head2 get_AssemblySliceAdaptor + + Example : my $assembly_slice_adaptor = $msc->get_AssemblySliceAdaptor; + Description : Gets a AssemblySliceAdaptor from this container. The adaptor can + be used to attach MappedSlice for alternative assemblies. + Return type : Bio::EnsEMBL::DBSQL::AssemblySliceAdaptor + Exceptions : thrown on wrong or missing argument + Caller : general, $self->get_adaptor + Status : At Risk + : under development + +=cut + +sub get_AssemblySliceAdaptor { + my $self = shift; + + unless ($self->{'adaptors'}->{'AssemblySlice'}) { + warning("No AssemblySliceAdaptor attached to MappedSliceContainer."); + } + + return $self->{'adaptors'}->{'AssemblySlice'}; +} + + +# [todo] +sub set_AlignSliceAdaptor { + throw("Not implemented yet!"); +} + + +# [todo] +sub get_AlignSliceAdaptor { + throw("Not implemented yet!"); +} + + +# [todo] +sub set_StrainSliceAdaptor { + my $self = shift; + my $strain_slice_adaptor = shift; + + unless ($strain_slice_adaptor and ref($strain_slice_adaptor) and + $strain_slice_adaptor->isa('Bio::EnsEMBL::DBSQL::StrainSliceAdaptor')) { + throw("Need a Bio::EnsEMBL::StrainSliceAdaptor."); + } + + $self->{'adaptors'}->{'StrainSlice'} = $strain_slice_adaptor; +} + + +# [todo] +sub get_StrainSliceAdaptor { + my $self = shift; + + unless ($self->{'adaptors'}->{'StrainSlice'}) { + warning("No StrainSliceAdaptor attached to MappedSliceContainer."); + } + + return $self->{'adaptors'}->{'StrainSlice'}; +} + + +=head2 attach_AssemblySlice + + Arg[1] : String $version - assembly version to attach + Example : $msc->attach_AssemblySlice('NCBIM36'); + Description : Attaches a MappedSlice for an alternative assembly to this + container. + Return type : none + Exceptions : thrown on missing argument + Caller : general, Bio::EnsEMBL::DBSQL::AssemblySliceAdaptor + Status : At Risk + : under development + +=cut + +sub attach_AssemblySlice { + my $self = shift; + my $version = shift; + + throw("Need a version.") unless ($version); + + my $asa = $self->get_AssemblySliceAdaptor; + return unless ($asa); + + my @mapped_slices = @{ $asa->fetch_by_version($self, $version) }; + + push @{ $self->{'mapped_slices'} }, @mapped_slices; +} + + +=head2 attach_StrainSlice + + Arg[1] : String $strain - name of strain to attach + Example : $msc->attach_StrainSlice('Watson'); + Description : Attaches a MappedSlice for an alternative strain to this + container. + Return type : none + Exceptions : thrown on missing argument + Caller : general, Bio::EnsEMBL::DBSQL::StrainSliceAdaptor + Status : At Risk + : under development + +=cut + +sub attach_StrainSlice { + my $self = shift; + my $strain = shift; + + throw("Need a strain.") unless ($strain); + + my $ssa = $self->get_StrainSliceAdaptor; + return unless ($ssa); + + my @mapped_slices = @{ $ssa->fetch_by_name($self, $strain) }; + + push @{ $self->{'mapped_slices'} }, @mapped_slices; +} + + + +=head2 get_all_MappedSlices + + Example : foreach my $mapped_slice (@{ $msc->get_all_MappedSlices }) { + print $mapped_slice->name, "\n"; + } + Description : Returns all MappedSlices attached to this container. + Return type : listref of Bio::EnsEMBL::MappedSlice + Exceptions : none + Caller : general + Status : At Risk + : under development + +=cut + +sub get_all_MappedSlices { + my $self = shift; + return $self->{'mapped_slices'}; +} + + +# [todo] +sub sub_MappedSliceContainer { + throw("Not implemented yet!"); +} + + +=head2 ref_slice + + Arg[1] : (optional) Bio::EnsEMBL::Slice - the reference slice to set + Example : my $ref_slice = $mapped_slice_container->ref_slice; + print "This MappedSliceContainer is based on the reference + slice ", $ref_slice->name, "\n"; + Description : Getter/setter for the reference slice. + Return type : Bio::EnsEMBL::Slice + Exceptions : thrown on wrong argument type + Caller : general + Status : At Risk + : under development + +=cut + +sub ref_slice { + my $self = shift; + + if (@_) { + my $slice = shift; + + unless (ref($slice) and ($slice->isa('Bio::EnsEMBL::Slice') or $slice->isa('Bio::EnsEMBL::LRGSlice'))) { + throw("Need a Bio::EnsEMBL::Slice."); + } + + $self->{'ref_slice'} = $slice; + } + + return $self->{'ref_slice'}; +} + + +=head2 container_slice + + Arg[1] : (optional) Bio::EnsEMBL::Slice - the container slice to set + Example : my $container_slice = $mapped_slice_container->container_slice; + print "The common slice used by this MappedSliceContainer is ", + $container_slice->name, "\n"; + Description : Getter/setter for the container slice. This is an artificial + slice which defines the common coordinate system used by the + MappedSlices attached to this container. + Return type : Bio::EnsEMBL::Slice + Exceptions : thrown on wrong argument type + Caller : general + Status : At Risk + : under development + +=cut + +sub container_slice { + my $self = shift; + + if (@_) { + my $slice = shift; + + unless (ref($slice) and ($slice->isa('Bio::EnsEMBL::Slice') or $slice->isa('Bio::EnsEMBL::LRGSlice')) ) { + throw("Need a Bio::EnsEMBL::Slice."); + } + + $self->{'container_slice'} = $slice; + } + + return $self->{'container_slice'}; +} + + +=head2 mapper + + Arg[1] : (optional) Bio::EnsEMBL::Mapper - the mapper to set + Example : my $mapper = Bio::EnsEMBL::Mapper->new('ref', 'mapped'); + $mapped_slice_container->mapper($mapper); + Description : Getter/setter for the mapper to map between reference slice and + the artificial container coord system. + Return type : Bio::EnsEMBL::Mapper + Exceptions : thrown on wrong argument type + Caller : internal, Bio::EnsEMBL::MappedSlice->AUTOLOAD + Status : At Risk + : under development + +=cut + +sub mapper { + my $self = shift; + + if (@_) { + my $mapper = shift; + + unless (ref($mapper) and $mapper->isa('Bio::EnsEMBL::Mapper')) { + throw("Need a Bio::EnsEMBL::Mapper."); + } + + $self->{'mapper'} = $mapper; + } + + return $self->{'mapper'}; +} + + +=head2 expanded + + Arg[1] : (optional) Boolean - expanded mode to set + Example : if ($mapped_slice_container->expanded) { + # do more elaborate mapping than in collapsed mode + [...] + } + Description : Getter/setter for expanded mode. + + By default, MappedSliceContainer use collapsed mode, which + means that no inserts in the reference sequence are allowed + when constructing the MappedSlices. in this mode, the + mapped_slice artificial coord system will be identical with the + ref_slice coord system. + + By setting expanded mode, you allow inserts in the reference + sequence. + Return type : Boolean + Exceptions : none + Caller : general + Status : At Risk + : under development + +=cut + +sub expanded { + my $self = shift; + $self->{'expanded'} = shift if (@_); + return $self->{'expanded'}; +} + +=head2 seq + + Example : my $seq = $container->seq() + Description : Retrieves the expanded sequence of the artificial container + slice, including "-" characters where there are inserts in any + of the attached mapped slices. + Return type : String + Exceptions : none + Caller : general + Status : At Risk + : under development + +=cut + +sub seq { + my $self = shift; + + my $container_seq = ''; + + # check there's a mapper + if(defined($self->mapper)) { + my $start = 0; + my $slice = $self->ref_slice(); + my $seq = $slice->seq(); + + foreach my $coord($self->mapper->map_coordinates($slice->seq_region_name, $slice->start, $slice->end, $slice->strand, 'ref_slice')) { + # if it is a normal coordinate insert sequence + if(!$coord->isa('Bio::EnsEMBL::Mapper::IndelCoordinate')) { + $container_seq .= substr($seq, $start, $coord->length()); + $start += $coord->length; + } + + # if it is a gap or indel insert "-" + else { + $container_seq .= '-' x $coord->length(); + } + } + } + + return $container_seq; +} + + +1; +