Mercurial > repos > willmclaren > ensembl_vep
diff variant_effect_predictor/Bio/EnsEMBL/Funcgen/AnnotatedFeature.pm @ 0:21066c0abaf5 draft
Uploaded
author | willmclaren |
---|---|
date | Fri, 03 Aug 2012 10:04:48 -0400 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/variant_effect_predictor/Bio/EnsEMBL/Funcgen/AnnotatedFeature.pm Fri Aug 03 10:04:48 2012 -0400 @@ -0,0 +1,273 @@ +# +# Ensembl module for Bio::EnsEMBL::Funcgen::AnnotatedFeature +# +# You may distribute this module under the same terms as Perl itself + +=head1 LICENSE + + Copyright (c) 1999-2011 The European Bioinformatics Institute and + Genome Research Limited. All rights reserved. + + This software is distributed under a modified Apache license. + For license details, please see + + http://www.ensembl.org/info/about/code_licence.html + +=head1 CONTACT + + Please email comments or questions to the public Ensembl + developers list at <ensembl-dev@ebi.ac.uk>. + + Questions may also be sent to the Ensembl help desk at + <helpdesk@ensembl.org>. + + +=head1 NAME + +Bio::EnsEMBL::AnnotatedFeature - A module to represent a feature mapping as +predicted by the eFG pipeline. + +=head1 SYNOPSIS + +use Bio::EnsEMBL::Funcgen::AnnotatedFeature; + +my $feature = Bio::EnsEMBL::Funcgen::AnnotatedFeature->new + ( + -SLICE => $chr_1_slice, + -START => 1_000_000, + -SUMMIT => 1_000_019, + -END => 1_000_024, + -STRAND => -1, + -DISPLAY_LABEL => $text, + -SCORE => $score, + -FEATURE_SET => $fset, + ); + + + +=head1 DESCRIPTION + +An AnnotatedFeature object represents the genomic placement of a prediction +generated by the eFG analysis pipeline. This normally represents the +output of a peak calling analysis. It can have a score and/or a summit, the +meaning of which depend on the specific Analysis used to infer the feature. +For example, in the case of a feature derived from a peak call over a ChIP-seq +experiment, the score is the peak caller score, and summit is the point in the +feature where more reads align with the genome. + +=head1 SEE ALSO + +Bio::EnsEMBL::Funcgen::DBSQL::AnnotatedFeatureAdaptor + +=cut + +use strict; +use warnings; + +package Bio::EnsEMBL::Funcgen::AnnotatedFeature; + +use Bio::EnsEMBL::Utils::Argument qw( rearrange ); +use Bio::EnsEMBL::Utils::Exception qw( throw ); +use Bio::EnsEMBL::Funcgen::SetFeature; + +use vars qw(@ISA); +@ISA = qw(Bio::EnsEMBL::Funcgen::SetFeature); + + +=head2 new + + Arg [-SLICE] : Bio::EnsEMBL::Slice - The slice on which this feature is. + Arg [-START] : int - The start coordinate of this feature relative to the start of the slice + it is sitting on. Coordinates start at 1 and are inclusive. + Arg [-END] : int -The end coordinate of this feature relative to the start of the slice + Arg [-STRAND] : int - The orientation of this feature. Valid values are 1, -1 and 0. + it is sitting on. Coordinates start at 1 and are inclusive. + Arg [-DISPLAY_LABEL]: string - Display label for this feature + Arg [-SUMMIT] : optional int - seq_region peak summit position + Arg [-SCORE] : optional int - Score assigned by analysis pipeline + Arg [-dbID] : optional int - Internal database ID. + Arg [-ADAPTOR] : optional Bio::EnsEMBL::DBSQL::BaseAdaptor - Database adaptor. + Example : my $feature = Bio::EnsEMBL::Funcgen::AnnotatedFeature->new + ( + -SLICE => $chr_1_slice, + -START => 1_000_000, + -END => 1_000_024, + -STRAND => -1, + -FEATURE_SET => $fset, + -DISPLAY_LABEL => $text, + -SCORE => $score, + -SUMMIT => 1_000_019, + ); + + + Description: Constructor for AnnotatedFeature objects. + Returntype : Bio::EnsEMBL::Funcgen::AnnotatedFeature + Exceptions : None + Caller : General + Status : Medium Risk + +=cut + +sub new { + my $caller = shift; + + my $class = ref($caller) || $caller; + my $self = $class->SUPER::new(@_); + #Hard code strand => 0 here? And remove from input params? + my ($score, $summit) = rearrange(['SCORE', 'SUMMIT'], @_); + + #Direct assingment here removes need for set arg test in method + + $self->{'score'} = $score if defined $score; + $self->{'summit'} = $summit if defined $summit; + + return $self; +} + + +=head2 score + + Arg [1] : (optional) int - score + Example : my $score = $feature->score(); + Description: Getter for the score attribute for this feature. + Returntype : int + Exceptions : None + Caller : General + Status : Low Risk + +=cut + +sub score { + my $self = shift; + return $self->{'score'}; +} + +=head2 summit + + Arg [1] : (optional) int - summit postition + Example : my $peak_summit = $feature->summit; + Description: Getter for the summit attribute for this feature. + Returntype : int + Exceptions : None + Caller : General + Status : At Risk + +=cut + +sub summit { + my $self = shift; + return $self->{'summit'}; +} + + +=head2 display_label + + Example : my $label = $feature->display_label(); + Description: Getter for the display label of this feature. + Returntype : String + Exceptions : None + Caller : General + Status : Medium Risk + +=cut + +sub display_label { + my $self = shift; + + #auto generate here if not set in table + #need to go with one or other, or can we have both, split into diplay_name and display_label? + + if(! $self->{'display_label'} && $self->adaptor){ + $self->{'display_label'} = $self->feature_type->name()." -"; + $self->{'display_label'} .= " ".$self->cell_type->name(); + $self->{'display_label'} .= " Enriched Site"; + } + + return $self->{'display_label'}; +} + + +=head2 is_focus_feature + + Args : None + Example : if($feat->is_focus_feature){ ... } + Description: Returns true if AnnotatedFeature is part of a focus + set used in the RegulatoryBuild + Returntype : Boolean + Exceptions : None + Caller : General + Status : At Risk + +=cut + +sub is_focus_feature{ + my $self = shift; + + #Do we need to test for FeatureSet here? + + return $self->feature_set->is_focus_set; +} + + +=head2 get_underlying_structure + + Example : my @loci = @{ $af->get_underlying_structure() }; + Description: Returns and array of loci consisting of: + (start, (motif_feature_start, motif_feature_end)*, end) + Returntype : ARRAYREF + Exceptions : None + Caller : General + Status : At Risk - This is TFBS specific and could move to TranscriptionFactorFeature + +=cut + +#This should really be precomputed and stored in the DB to avoid the MF attr fetch +#Need to be aware of projecting here, as these will expire if we project after this method is called + +sub get_underlying_structure{ + my $self = shift; + + if(! defined $self->{underlying_structure}){ + my @loci = ($self->start); + + foreach my $mf(@{$self->get_associated_MotifFeatures}){ + push @loci, ($mf->start, $mf->end); + } + + push @loci, $self->end; + + $self->{underlying_structure} = \@loci; + } + + return $self->{underlying_structure}; +} + +=head2 get_associated_MotifFeatures + + Example : my @assoc_mfs = @{ $af->get_associated_MotifFeatures }; + Description: Returns and array associated MotifFeature i.e. MotifFeatures + representing a relevanting PWM/BindingMatrix + Returntype : ARRAYREF + Exceptions : None + Caller : General + Status : At Risk - This is TFBS specific and could move to TranscriptionFactorFeature + +=cut + +sub get_associated_MotifFeatures{ + my ($self) = @_; + + if(! defined $self->{'assoc_motif_features'}){ + my $mf_adaptor = $self->adaptor->db->get_MotifFeatureAdaptor; + + #These need reslicing! + + $self->{'assoc_motif_features'} = $mf_adaptor->fetch_all_by_AnnotatedFeature($self, $self->slice); + } + + return $self->{'assoc_motif_features'}; +} + + +1; +