Mercurial > repos > mahtabm > ensembl
diff variant_effect_predictor/Bio/EnsEMBL/MappedSlice.pm @ 0:1f6dce3d34e0
Uploaded
author | mahtabm |
---|---|
date | Thu, 11 Apr 2013 02:01:53 -0400 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/variant_effect_predictor/Bio/EnsEMBL/MappedSlice.pm Thu Apr 11 02:01:53 2013 -0400 @@ -0,0 +1,668 @@ +=head1 LICENSE + + Copyright (c) 1999-2012 The European Bioinformatics Institute and + Genome Research Limited. All rights reserved. + + This software is distributed under a modified Apache license. + For license details, please see + + http://www.ensembl.org/info/about/code_licence.html + +=head1 CONTACT + + Please email comments or questions to the public Ensembl + developers list at <dev@ensembl.org>. + + Questions may also be sent to the Ensembl help desk at + <helpdesk@ensembl.org>. + +=cut + +=head1 NAME + +Bio::EnsEMBL::MappedSlice - an object representing a mapped slice + +=head1 SYNOPSIS + + # get a reference slice + my $slice = + $slice_adaptor->fetch_by_region( 'chromosome', 14, 900000, 950000 ); + + # create MappedSliceContainer based on the reference slice + my $msc = Bio::EnsEMBL::MappedSliceContainer->new( -SLICE => $slice ); + + # set the adaptor for fetching AssemblySlices + my $asa = $slice->adaptor->db->get_AssemblySliceAdaptor; + $msc->set_AssemblySliceAdaptor($asa); + + # add an AssemblySlice to your MappedSliceContainer + $msc->attach_AssemblySlice('NCBIM36'); + + foreach my $mapped_slice ( @{ $msc->get_all_MappedSlices } ) { + print $mapped_slice->name, "\n"; + + foreach my $sf ( @{ $mapped_slice->get_all_SimpleFeatures } ) { + print " ", &to_string($sf), "\n"; + } + } + +=head1 DESCRIPTION + +NOTE: this code is under development and not fully functional nor tested +yet. Use only for development. + +This object represents a mapped slice, i.e. a slice that's attached +to a reference slice and a mapper to convert coordinates to/from the +reference. The attachment is done via a MappedSliceContainer which +has the reference slice and the "container slice" defining the common +coordinate system for all MappedSlices. + +A MappedSlice is supposed to behave as close to a Bio::EnsEMBL::Slice +as possible. Most Slice methods are implemented in MappedSlice and will +return an equivalent value to what Slice does. There are some exceptions +of unimplemented methods, either because there is no useful equivalent +for a MappedSlice to do, or they are too complicated. + +Not supported Bio::EnsEMBL::Slice methods: + + All deprecated methods + All Bio::PrimarySeqI compliance methods + expand + get_generic_features + get_seq_region_id + seq_region_Slice + +Not currently supported but maybe should/could: + + calculate_pi + calculate_theta + get_base_count + get_by_Individual + get_by_strain + invert + +Internally, a MappedSlice is a collection of Bio::EnsEMBL::Slices and +associated Bio::EnsEMBL::Mappers which map the slices to the common +container coordinate system. + +MappedSlices are usually created and attached to a MappedSliceContainer +by an adaptor/factory. + +=head1 METHODS + + new + add_Slice_Mapper_pair + get_all_Slice_Mapper_pairs + adaptor + container + name + seq_region_name + start + end + strand + length + seq_region_length + centrepoint + coord_system + coord_system_name + is_toplevel + seq (not implemented yet) + subseq (not implemented yet) + get_repeatmasked_seq (not implemented yet) + sub_MappedSlice (not implemented yet) + project (not implemented yet) + +=head1 RELATED MODULES + + Bio::EnsEMBL::MappedSlice + Bio::EnsEMBL::DBSQL::AssemblySliceAdaptor + Bio::EnsEMBL::Compara::AlignSlice + Bio::EnsEMBL::Compara::AlignSlice::Slice + Bio::EnsEMBL::AlignStrainSlice + Bio::EnsEMBL::StrainSlice + +=cut + +package Bio::EnsEMBL::MappedSlice; + +use strict; +use warnings; +no warnings 'uninitialized'; + +use Bio::EnsEMBL::Utils::Argument qw(rearrange); +use Bio::EnsEMBL::Utils::Exception qw(throw warning); +use Bio::EnsEMBL::Mapper; +use Scalar::Util qw(weaken); + +use vars qw($AUTOLOAD); + + +=head2 new + + Arg [ADAPTOR] : Adaptor $adaptor - an adaptor of the appropriate type + Arg [CONTAINER] : Bio::EnsEMBL::MappedSliceContainer $container - the + container this MappedSlice is attached to + Arg [NAME] : String $name - name + Example : my $mapped_slice = Bio::EnsEMBL::MappedSlice->new( + -ADAPTOR => $adaptor, + -CONTAINER => $container, + -NAME => $name, + ); + Description : Constructor. Usually you won't call this method manually, but + the MappedSlice will be constructed by an adaptor/factory. + Return type : Bio::EnsEMBL::MappedSlice + Exceptions : thrown on wrong or missing arguments + Caller : general, MappedSlice adaptors + Status : At Risk + : under development + +=cut + +sub new { + my $caller = shift; + my $class = ref($caller) || $caller; + + my ($adaptor, $container, $name) = + rearrange([qw(ADAPTOR CONTAINER NAME)], @_); + + # arguement check + unless ($container and ref($container) and + $container->isa('Bio::EnsEMBL::MappedSliceContainer')) { + throw("Need a MappedSliceContainer."); + } + + my $self = {}; + bless ($self, $class); + + # + # initialise object + # + + # need to weaken reference to prevent circular reference + weaken($self->{'container'} = $container); + + $self->adaptor($adaptor) if (defined($adaptor)); + $self->{'name'} = $name if (defined($name)); + + $self->{'slice_mapper_pairs'} = []; + + return $self; +} + + +=head2 add_Slice_Mapper_pair + + Arg[1] : Bio::EnsEMBL::Slice $slice - slice to add + Arg[2] : Bio::EnsEMBL::Mapper $mapper - the mapper for this slice + Example : $mapped_slice->add_Slice_Mapper_pair($slice, $mapper); + Description : Adds a native slice and a corresponding mapper to map to/from + the artificial container coord system. + Return type : listref of Bio::EnsEMBL::MappedSlice + Exceptions : thrown on wrong or missing arguments + Caller : general, MappedSlice adaptors + Status : At Risk + : under development + +=cut + +sub add_Slice_Mapper_pair { + my $self = shift; + my $slice = shift; + my $mapper = shift; + + # argument check + unless ($slice and ref($slice) and ($slice->isa('Bio::EnsEMBL::Slice') or $slice->isa('Bio::EnsEMBL::LRGSlice')) ) { + throw("You must provide a slice."); + } + + unless ($mapper and ref($mapper) and $mapper->isa('Bio::EnsEMBL::Mapper')) { + throw("You must provide a mapper."); + } + + push @{ $self->{'slice_mapper_pairs'} }, [ $slice, $mapper ]; + + return $self->{'slice_mapper_pairs'}; +} + + +=head2 get_all_Slice_Mapper_pairs + + Example : foreach my $pair (@{ $self->get_all_Slice_Mapper_pairs }) { + my ($slice, $mapper) = @$pair; + + # get container coordinates + my @coords = $mapper->map_coordinates( + $slice->seq_region_name, + $slice->start, + $slice->end, + $slice->strand, + 'mapped_slice' + ); + + # .... + } + Description : Gets all Slice/Mapper pairs this MappedSlice is composed of. + Each slice (and features on it) can be mapped onto the + artificial container coord system using the mapper. + Return type : listref of listref of a Bio::EnsEMBL::Slice and + Bio::EnsEMBL::Mapper pair + Exceptions : none + Caller : general + Status : At Risk + : under development + +=cut + +sub get_all_Slice_Mapper_pairs { + my $self = shift; + return $self->{'slice_mapper_pairs'}; +} + + +=head2 adaptor + + Arg[1] : (optional) Adaptor $adaptor - the adaptor/factory for this + object + Example : $mapped_slice->adaptor($assembly_slice_adaptor); + Description : Getter/setter for the adaptor/factory for this object. + Return type : Adaptor of appropriate type + Exceptions : none + Caller : general + Status : At Risk + : under development + +=cut + +sub adaptor { + my $self = shift; + weaken($self->{'adaptor'} = shift) if (@_); + return $self->{'adaptor'}; +} + + +=head2 container + + Arg[1] : (optional) Bio::EnsEMBL::MappedSliceContainer - the container + this object is attached to + Example : my $container = $mapped_slice->container; + print $container->ref_slice->name, "\n"; + Description : Getter/setter for the container this object is attached to. The + container will give you access to the reference slice, a common + artificial container slice, and a mapper to map to it from the + container coord system. + + The implementation uses a weak reference to attach the container + since the container holds a list of MappedSlices itself. + Return type : Bio::EnsEMBL::MappedSliceContainer + Exceptions : none + Caller : general + Status : At Risk + : under development + +=cut + +sub container { + my $self = shift; + weaken($self->{'container'} = shift) if (@_); + return $self->{'container'}; +} + + +=head2 name + + Arg[1] : String - the name of this object + Example : my $name = $mapped_slice->container->ref_slice->name . + ":mapped_" . $ident_string; + $mapped_slice->name($name); + Description : Getter/setter for this object's name + Return type : String + Exceptions : none + Caller : general + Status : At Risk + : under development + +=cut + +sub name { + my $self = shift; + $self->{'name'} = shift if (@_); + return $self->{'name'}; +} + + +=head2 seq_region_name + + Example : my $sr_name = $mapped_slice->seq_region_name; + Description : Returns the seq_region name of the reference slice. + Return type : String + Exceptions : none + Caller : general + Status : At Risk + : under development + +=cut + +sub seq_region_name { + my $self = shift; + return $self->container->ref_slice->seq_region_name; +} + + +=head2 start + + Example : my $start = $mapped_slice->start; + Description : Returns the start of the container slice. + Return type : Int + Exceptions : none + Caller : general + Status : At Risk + : under development + +=cut + +sub start { + my $self = shift; + return $self->container->container_slice->start; +} + + +=head2 end + + Example : my $end = $mapped_slice->end; + Description : Returns the end of the container slice. + Return type : Int + Exceptions : none + Caller : general + Status : At Risk + : under development + +=cut + +sub end { + my $self = shift; + return $self->container->container_slice->end; +} + + +=head2 strand + + Example : my $strand = $mapped_slice->strand; + Description : Returns the strand of the container slice. + Return type : Int + Exceptions : none + Caller : general + Status : At Risk + : under development + +=cut + +sub strand { + my $self = shift; + return $self->container->container_slice->strand; +} + + +=head2 length + + Example : my $length = $mapped_slice->length; + Description : Returns the length of the container slice + Return type : Int + Exceptions : none + Caller : general + Status : At Risk + : under development + +=cut + +sub length { + my $self = shift; + return $self->container->container_slice->length; +} + + +=head2 seq_region_length + + Example : my $sr_length = $mapped_slice->seq_region_length; + Description : Returns the seq_region length of the reference slice. + Return type : Int + Exceptions : none + Caller : general + Status : At Risk + : under development + +=cut + +sub seq_region_length { + my $self = shift; + return $self->container->ref_slice->seq_region_length; +} + + +=head2 centrepoint + + Example : my $centrepoint = $mapped_slice->centrepoint; + Description : Returns the centrepoint of the container slice. + Return type : Int + Exceptions : none + Caller : general + Status : At Risk + : under development + +=cut + +sub centrepoint { + my $self = shift; + return $self->container->container_slice->centrepoint; +} + + +=head2 coord_system + + Example : my $cs = $mapped_slice->coord_system; + Description : Returns the coord system of the container slice. + Return type : Bio::EnsEMBL::CoordSystem + Exceptions : none + Caller : general + Status : At Risk + : under development + +=cut + +sub coord_system { + my $self = shift; + return $self->container->container_slice->coord_system; +} + +=head2 coord_system_name + + Example : my $cs_name = $mapped_slice->coord_system_name; + Description : Returns the coord system name of the container slice. + Return type : Int + Exceptions : none + Caller : general + Status : At Risk + : under development + +=cut + +sub coord_system_name { + my $self = shift; + return $self->container->container_slice->coord_system_name; +} + +=head2 is_toplevel + + Example : my $toplevel_flag = $mapped_slice->is_toplevel; + Description : Returns weather the container slice is toplevel. + Return type : Int + Exceptions : none + Caller : general + Status : At Risk + : under development + +=cut + +sub is_toplevel { + my $self = shift; + return $self->container->container_slice->is_toplevel; +} + + +=head2 seq + + Example : my $seq = $mapped_slice->seq() + Description : Retrieves the expanded sequence of this mapped slice, + including "-" characters where there are inserts in any other + mapped slices. This will align with the sequence returned by + the container's seq() method. + Return type : String + Exceptions : none + Caller : general + Status : At Risk + : under development + +=cut + +sub seq { + my $self = shift; + + # create an empty string + my $ms_seq = ''; + + # this coord represents the current position in the MS sequence + my $start = 0; + + # get slice/mapper pairs from mapped slice (usually only one anyway) + foreach my $pair(@{$self->get_all_Slice_Mapper_pairs()}) { + my ($s, $m) = @$pair; + + # make sure to send extra args + # eg strain slices might need read coverage filtering + my $seq = $s->seq(@_); + + # project from mapped slice to reference slice using the mapper + foreach my $ref_coord($m->map_coordinates('mapped_slice', 1, CORE::length($seq), $s->strand, 'mapped_slice')) { + + # normal coord + if(!$ref_coord->isa('Bio::EnsEMBL::Mapper::IndelCoordinate') && !$ref_coord->isa('Bio::EnsEMBL::Mapper::Gap')) { + + # project from reference slice to container slice using the container's mapper + foreach my $ms_coord($self->container->mapper->map_coordinates($self->container->ref_slice->seq_region_name, $ref_coord->start, $ref_coord->end, $ref_coord->strand, 'ref_slice')) { + + # normal coord + if(!$ms_coord->isa('Bio::EnsEMBL::Mapper::IndelCoordinate') && !$ms_coord->isa('Bio::EnsEMBL::Mapper::Gap')) { + $ms_seq .= substr($seq, $start, $ms_coord->length); + + $start += $ms_coord->length(); + } + + # indel coord + else { + $ms_seq .= '-' x $ms_coord->length(); + } + } + } + + # indel / gap + else { + + # if there's a gap here aswell, add corresponding sequence + if($ref_coord->gap_length > 0) { + $ms_seq .= substr($seq, $start, $ref_coord->gap_length); + $start += $ref_coord->gap_length; + } + + # add "-" to the sequence + $ms_seq .= '-' x ($ref_coord->length() - $ref_coord->gap_length()); + } + } + } + + return $ms_seq; +} + +sub subseq { +} + +sub get_repeatmasked_seq { +} + +sub sub_MappedSlice { +} + +sub project { +} + + +=head2 AUTOLOAD + + Arg[1..N] : Arguments passed on to the calls on the underlying slices. + Example : my @simple_features = @{ $mapped_slice->get_all_SimpleFeatures }; + Description : Aggregate data gathered from composing Slices. + This will call Slice->get_all_* and combine the results. + Coordinates will be transformed to be on the container slice + coordinate system. + + Calls involving DAS features are skipped since the DAS adaptor + handles coordinate conversions natively. + Return type : listref of features (same type as corresponding Slice method) + Exceptions : none + Caller : general + Status : At Risk + : under development + +=cut + +sub AUTOLOAD { + my $self = shift; + + my $method = $AUTOLOAD; + $method =~ s/.*:://; + + # AUTOLOAD should only deal with get_all_* methods + return unless ($method =~ /^get_all_/); + + # skip DAS methods + return if ($method =~ /DAS/); + + my @mapped_features = (); + + foreach my $pair (@{ $self->get_all_Slice_Mapper_pairs }) { + my ($slice, $mapper) = @$pair; + #warn $slice->name; + + # call $method on each native slice composing the MappedSlice + my @features = @{ $slice->$method(@_) }; + + # map features onto the artificial container coordinate system + foreach my $f (@features) { + + my @coords = $mapper->map_coordinates( + $f->slice->seq_region_name, + $f->start, + $f->end, + $f->strand, + 'mapped_slice' + ); + + # sanity check + if (scalar(@coords) > 1) { + warning("Got more than one Coordinate returned, expected only one!\n"); + } + + $f->start($coords[0]->start); + $f->end($coords[0]->end); + $f->strand($coords[0]->strand); + $f->slice($self->container->container_slice); + + push @mapped_features, $f; + } + + } + + return \@mapped_features; +} + + +1; +