Mercurial > repos > mahtabm > ensembl
diff variant_effect_predictor/Bio/EnsEMBL/Funcgen/RegulatoryFeature.pm @ 0:1f6dce3d34e0
Uploaded
author | mahtabm |
---|---|
date | Thu, 11 Apr 2013 02:01:53 -0400 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/variant_effect_predictor/Bio/EnsEMBL/Funcgen/RegulatoryFeature.pm Thu Apr 11 02:01:53 2013 -0400 @@ -0,0 +1,814 @@ + +=head1 LICENSE + + Copyright (c) 1999-2012 The European Bioinformatics Institute and + Genome Research Limited. All rights reserved. + + This software is distributed under a modified Apache license. + For license details, please see + + http://www.ensembl.org/info/about/code_licence.html + +=head1 CONTACT + + Please email comments or questions to the public Ensembl + developers list at <ensembl-dev@ebi.ac.uk>. + + Questions may also be sent to the Ensembl help desk at + <helpdesk@ensembl.org>. + +=head1 NAME + +Bio::EnsEMBL::Funcgen::RegulatoryFeature + +=head1 SYNOPSIS + + use Bio::EnsEMBL::Registry; + use Bio::EnsEMBL::Funcgen::RegulatoryFeature; + my $reg = Bio::EnsEMBL::Registry->load_adaptors_from_db + ( + -host => 'ensembldb.ensembl.org', + -user => 'anonymous' + ); + + my $regfeat_adaptor = $reg->get_adaptor($species, 'funcgen', 'RegulatoryFeature'); + + + ### Creating/storing a RegulatoryFeature Set ### + my $feature = Bio::EnsEMBL::Funcgen::RegulatoryFeature->new + ( + -SLICE => $chr_1_slice, + -START => 1_000_000, + -END => 1_000_024, + -STRAND => 0, + -DISPLAY_LABEL => $text, + -FEATURE_SET => $fset, + -FEATURE_TYPE => $reg_ftype, + -ATTRIBUTE_CACHE => \%attr_cache, + ); + + my ($stored_feat) = @{$regfeat_adaptor->store([$feature])}; + + + ### Fetching some RegualtoryFeatures + my @regfeats = @{$regfeat_adaptor->fetch_all_by_Slice_FeatureSets($slice, \@fsets)}; + + + ### Print the bound and core loci + print join(' - ', ($reg_feat->bound_start, + $reg_feat->start, + $reg_feat->end, + $reg_feat->bound_end)."\n"; + + + ### Getting some supporting evidence for a RegualtoryFeatures + my @reg_attrs = @{$reg_feat->regulatory_attributes('annotated')}; + + +=head1 DESCRIPTION + +A RegulatoryFeature object represents the output of the Ensembl RegulatoryBuild: + http://www.ensembl.org/info/docs/funcgen/regulatory_build.html + +It is comprises many possible histone, transcription factor, polymerase and open +chromatin features, which have been combined to provide a summary view and +classification of the regulatory status at a given loci. + + +=head1 SEE ALSO + +Bio::EnsEMBL:Funcgen::DBSQL::RegulatoryFeatureAdaptor +Bio::EnsEMBL::Funcgen::SetFeature + +=cut + + +package Bio::EnsEMBL::Funcgen::RegulatoryFeature; + +use Bio::EnsEMBL::Utils::Argument qw( rearrange ); +use Bio::EnsEMBL::Utils::Exception qw( throw ); +use strict; +use warnings; + +use base qw(Bio::EnsEMBL::Funcgen::SetFeature); #@ISA + + +=head2 new + + Arg [-SLICE] : Bio::EnsEMBL::Slice - The slice on which this feature is located. + Arg [-START] : int - The start coordinate of this feature relative to the start of the slice + it is sitting on. Coordinates start at 1 and are inclusive. + Arg [-END] : int -The end coordinate of this feature relative to the start of the slice + it is sitting on. Coordinates start at 1 and are inclusive. + Arg [-FEATURE_SET] : Bio::EnsEMBL::Funcgen::FeatureSet - Regulatory Feature set + Arg [-FEATURE_TYPE] : Bio::EnsEMBL::Funcgen::FeatureType - Regulatory Feature sub type + Arg [-BINARY_STRING] : (optional) string - Regulatory Build binary string + Arg [-STABLE_ID] : (optional) string - Stable ID for this RegualtoryFeature e.g. ENSR00000000001 + Arg [-DISPLAY_LABEL] : (optional) string - Display label for this feature + Arg [-ATTRIBUTE_CACHE] : (optional) HASHREF of feature class dbID|Object lists + Arg [-PROJECTED] : (optional) boolean - Flag to specify whether this feature has been projected or not + Arg [-dbID] : (optional) int - Internal database ID. + Arg [-ADAPTOR] : (optional) Bio::EnsEMBL::DBSQL::BaseAdaptor - Database adaptor. + + Example : my $feature = Bio::EnsEMBL::Funcgen::RegulatoryFeature->new( + -SLICE => $chr_1_slice, + -START => 1000000, + -END => 1000024, + -DISPLAY_LABEL => $text, + -FEATURE_SET => $fset, + -FEATURE_TYPE => $reg_ftype, + -ATTRIBUTE_CACHE => \%attr_cache, + ); + + + Description: Constructor for RegulatoryFeature objects. + Returntype : Bio::EnsEMBL::Funcgen::RegulatoryFeature + Exceptions : None + Caller : General + Status : Stable + +=cut + +sub new { + my $caller = shift; + my $class = ref($caller) || $caller; + my $self = $class->SUPER::new(@_); + + my ($stable_id, $attr_cache, $bin_string, $projected) + = rearrange(['STABLE_ID', 'ATTRIBUTE_CACHE', 'BINARY_STRING', 'PROJECTED'], @_); + + #None of these are mandatory at creation + #under different use cases + $self->{binary_string} = $bin_string if defined $bin_string; + $self->{stable_id} = $stable_id if defined $stable_id; + $self->{projected} = $projected if defined $projected; + $self->attribute_cache($attr_cache) if $attr_cache; + + return $self; +} + + +=head2 display_label + + Example : my $label = $feature->display_label(); + Description: Getter for the display label of this feature. + Returntype : String + Exceptions : None + Caller : General + Status : Stable + +=cut + +sub display_label { + my $self = shift; + + if(! defined $self->{'display_label'}){ + $self->{'display_label'} = $self->feature_type->name.' Regulatory Feature'; + + if( defined $self->cell_type ){ + $self->{'display_label'} .= ' - '.$self->cell_type->name; + } + } + + return $self->{display_label}; +} + +=head2 display_id + + Example : print $feature->display_id(); + Description: This method returns a string that is considered to be + the 'display' identifier. In this case the stable Id is + preferred + Returntype : String + Exceptions : none + Caller : web drawing code, Region Report tool + Status : Stable + +=cut + +sub display_id { return $_[0]->{stable_id}; } + + +=head2 binary_string + + Arg [1] : optional string - binary string from regulatory build + Example : my $bin_string = $feature->binary_string(); + Description: Getter and setter for the binary_string for this feature. + Returntype : String + Exceptions : None + Caller : General + Status : At Risk - May change to BLOB, remove setter functionality + +=cut + +sub binary_string{ + my ($self, $bin_string) = @_; + + if (defined $bin_string){ + #added v67 + warn "RegualtoryFeature::binary_string setter functionality is being removed\n"; + $self->{binary_string} = $bin_string; + } + + return $self->{binary_string}; +} + + +=head2 stable_id + + Arg [1] : (optional) string - stable_id e.g ENSR00000000001 + Example : my $stable_id = $feature->stable_id(); + Description: Getter and setter for the stable_id attribute for this feature. + Returntype : string + Exceptions : None + Caller : General + Status : At risk - setter functionality to be removed + +=cut + +sub stable_id { + my $self = shift; + + if (@_){ + #added v67 + warn "RegualtoryFeature::stable_id setter functionality is being removed\n"; + $self->{stable_id} = shift; + } + + return $self->{stable_id}; +} + + +=head2 regulatory_attributes + + Arg [1] : String (optional) - Class of feature e.g. annotated or motif + Example : print "Regulatory Attributes:\n\t".join("\n\t", (map $_->feature_type->name, @{$feature->regulatory_attributes()}))."\n"; + Description: Getter for the regulatory_attributes for this feature. + Returntype : ARRAYREF + Exceptions : Throws if feature class not valid + Caller : General + Status : At Risk + +=cut + + +sub regulatory_attributes{ + my ($self, $feature_class) = @_; + + #Incorporating the MFs like this does cause some redundancy in the DB + #But will speed up display of the RegFeat image including the MFs + #Redefine the cache to have class keys e.g. TFBS, OpenChromatin, Histone Mods + #Can't do this as we need the table key to be able to fetch the features + #Really need something to be able to draw the image first, then create the zmenu details later. + + my %adaptors = ( + 'annotated' => $self->adaptor->db->get_AnnotatedFeatureAdaptor, + 'motif' => $self->adaptor->db->get_MotifFeatureAdaptor, + #external + ); + + my @fclasses; + + if(defined $feature_class){ + + if(exists $adaptors{lc($feature_class)}){ + @fclasses = (lc($feature_class)); + } + else{ + throw("The feature class you specified is not valid:\t$feature_class\n". + "Please use one of:\t".join(', ', keys %adaptors)); + } + } + else{ + @fclasses = keys %adaptors; + } + + foreach my $fclass(@fclasses){ + #Now structured as hash to facilitate faster has_attribute method + #Very little difference to array based cache + + my @attr_dbIDs = keys %{$self->{'attribute_cache'}{$fclass}}; + + + if(scalar(@attr_dbIDs) > 0){ + + if( ! ( ref($self->{'regulatory_attributes'}{$fclass}->[0]) && + ref($self->{'regulatory_attributes'}{$fclass}->[0])->isa('Bio::EnsEMBL::Feature') )){ + + $adaptors{$fclass}->force_reslice(1);#So we don't lose attrs which aren't on the slice + $self->{'regulatory_attributes'}{$fclass} = $adaptors{$fclass}->fetch_all_by_dbID_list(\@attr_dbIDs, $self->slice); + + #Having problems here if we are trying to project between Y PAR and X + #Current dest_slice mapping code simply changes the start end values assuming the slice is correct + #currently no test for seq_region name match + + + #foreach my $attr(@{ $self->{'regulatory_attributes'}{$fclass}}){ + # warn "$attr ".$attr->dbID." ".$attr->feature_Slice->name."\n"; + #} + + + $adaptors{$fclass}->force_reslice(0); + + #Problems here with attrs not being returning when they do not lie on dest slice + #i.e. core projected to cell line, but dest slice only over laps a region of the core which + #actually has no attrs. + #either use the feature_Slice and reslice everthing to the dest slice + #or skip test in attr obj_frm_sth? + # + + #This method transfers to the query slice, do not use fetch_by_dbID + #It also should use _final_clause + #This is currently only specified in the MotifFeatureAdaptor + #as these are required to be sorted to relate to the structure string + + #but we are stll storing in has where order is not preserved!! + #so this will not match order of underlying strcture! + + #separate so we can have ordered array returned + #do we need redundant caches? + #defo need db id cache for 'has' methods + + #foreach my $attr(@{$fclass_attrs}){ + # $self->{'regulatory_attributes'}{$fclass}{$attr->dbID} = $attr; + #} + } + } + else{ + $self->{'regulatory_attributes'}{$fclass} = []; + } + } + + return [ map { @{$self->{'regulatory_attributes'}{$_}} } @fclasses ]; +} + +=head2 has_attribute + + Arg [1] : Attribute Feature dbID + Arg [2] : Attribute Feature class e.g. motif or annotated + Example : if($regf->has_attribute($af->dbID, 'annotated'){ #do something here } + Description: Identifies whether this RegualtoryFeature has a given attribute + Returntype : Boolean + Exceptions : Throws if args are not defined + Caller : General + Status : Stable + +=cut + + +sub has_attribute{ + my ($self, $dbID, $fclass) = @_; + + throw('Must provide a dbID and a Feature class argument') if ! $dbID && $fclass; + + return exists ${$self->attribute_cache}{$fclass}{$dbID}; +} + +=head2 get_focus_attributes + + Arg [1] : None + Example : my @focus_attrs = @{$regf->get_focus_attributes}; + Description: Getter for the focus features of this RegualtoryFeature, used to defined the core region + Returntype : ARRAYREF + Exceptions : None + Caller : General + Status : Stable + +=cut + +sub get_focus_attributes{ + my $self = shift; + + if(! exists $self->{'focus_attributes'} || + ! @{$self->{'focus_attributes'}}){ + $self->_sort_attributes; + } + + + return $self->{'focus_attributes'}; +} + + +=head2 get_nonfocus_attributes + + Arg [1] : None + Example : my @non_focus_attrs = @{$regf->get_nonfocus_attributes}; + Description: Getter for the non-focus features of this RegulatoryFeature, used to defined + the non core region i.e. the whiskers. + Returntype : ARRAYREF + Exceptions : None + Caller : General + Status : Stable + +=cut + +sub get_nonfocus_attributes{ + my $self = shift; + + #Test focus here as we may not have any nonfocus + #But focus will show that we have sorted already + if(! exists $self->{'focus_attributes'} || + ! @{$self->{'focus_attributes'}}){ + $self->_sort_attributes; + } + + return $self->{'nonfocus_attributes'}; +} + +#Add pod here + +sub _sort_attributes{ + my $self = shift; + + $self->{'focus_attributes'} = []; + $self->{'nonfocus_attributes'} = []; + + foreach my $attrf(@{$self->regulatory_attributes}){ + + if($attrf->isa('Bio::EnsEMBL::Funcgen::MotifFeature') || + $attrf->feature_set->is_focus_set){ + push @{$self->{'focus_attributes'}}, $attrf; + } + else{ + push @{$self->{'nonfocus_attributes'}}, $attrf; + } + } + + return; +} + + +=head2 attribute_cache + + Arg [1] : optional - HASHREF of attribute table keys with values as either a list of attribute + feature dbIDs or objects. If passing object, any MotifFeature objects should be in position + order with respect to the slice. + Example : $feature->attribute_cache(\%attribute_feature_info); + Description: Setter for the regulatory_attribute cache for this feature. This is a short cut method used by the + regulatory build and the webcode to avoid unnecessary fetching and enable enable lazy loading + Returntype : HASHREF + Exceptions : Throws if trying to overwrite existing cache + Caller : RegulatoryFeatureAdaptor.pm and build_regulatory_features.pl + Status : At Risk + +=cut + + +sub attribute_cache{ + my ($self, $attr_hash) = @_; + +# if(! defined $attr_hash){ +# $self->regulatory_attributes; #Fetch the attrs? +# +# +# #Do we need to do this now we have separated the caches? +# +# } + + if(defined $attr_hash){ + + foreach my $fclass(keys %{$attr_hash}){ + + if(exists $self->{'attribute_cache'}{$fclass}){ + throw("You are trying to overwrite a pre-existing regulatory attribute cache entry for feature class:\t$fclass"); + } + else{ + $self->{'attribute_cache'}{$fclass} = $attr_hash->{$fclass}; + } + } + } + + return $self->{'attribute_cache'} || {}; +} + + +=head2 bound_start + + Example : my $bound_start = $feature->bound_start(); + Description: Getter for the bound_start attribute for this feature. + Gives the 5' most start value of the underlying attribute + features. + Returntype : string + Exceptions : None + Caller : General + Status : Stable + +=cut + +sub bound_start { + my $self = shift; + $self->get_underlying_structure if ! defined $self->{'bound_start'}; + + return $self->{'bound_start'}; +} + + +=head2 bound_end + + Example : my $bound_end = $feature->bound_start(); + Description: Getter for the bound_end attribute for this feature. + Gives the 3' most end value of the underlying attribute + features. + Returntype : string + Exceptions : None + Caller : General + Status : Stable + +=cut + +sub bound_end { + my $self = shift; + $self->get_underlying_structure if ! defined $self->{'bound_end'}; + + return $self->{'bound_end'}; +} + + +=head2 bound_seq_region_start + + Example : my $bound_sr_start = $feature->bound_seq_region_start; + Description: Getter for the seq_region bound_start attribute for this feature. + Gives the 5' most start value of the underlying attribute + features. + Returntype : string + Exceptions : None + Caller : General + Status : Stable + +=cut + +sub bound_seq_region_start { + my $self = shift; + + if(! defined $self->{bound_seq_region_start}){ + + if($self->slice->strand == 1){ + $self->{bound_seq_region_start} = $self->slice->start + $self->bound_start - 1; + } + else{ #strand = -1 + $self->{bound_seq_region_start} = $self->slice->end - $self->bound_end + 1; + } + } + + return $self->{bound_seq_region_start}; +} + + +=head2 bound_seq_region_end + + Example : my $bound_sr_end = $feature->bound_seq_region_end; + Description: Getter for the seq_region bound_end attribute for this feature. + Gives the 3' most end value of the underlying attribute + features. + Returntype : string + Exceptions : None + Caller : General + Status : Stable + +=cut + + +sub bound_seq_region_end { + my $self = shift; + + if(! defined $self->{bound_seq_region_end}){ + + if($self->slice->strand == 1){ + $self->{bound_seq_region_end} = $self->slice->start + $self->bound_end - 1; + } + else{ #strand = -1 + $self->{bound_seq_region_end} = $self->slice->end - $self->bound_start + 1; + } + } + + return $self->{bound_seq_region_end}; +} + + + + + + +=head2 is_projected + + Arg [1] : optional - boolean + Example : if($regf->is_projected){ #do something different here } + Description: Getter/Setter for the projected attribute. + Returntype : boolean + Exceptions : None + Caller : General + Status : At risk - remove setter functionality + +=cut + +sub is_projected { + my $self = shift; + + if(@_){ + #added v67 + warn "RegulatoryFeature::is_projected setter functionality is being removed\n"; + $self->{'projected'} = shift; + } + + return $self->{'projected'}; +} + + +=head2 get_underlying_structure + + Example : $self->get_underlying_structure() if(! exists $self->{'bound_end'}); + Description: Getter for the bound_end attribute for this feature. + Gives the 3' most end value of the underlying attribute + features. + Returntype : string + Exceptions : None + Caller : General + Status : At Risk + +=cut + +#This should really be precomputed and stored in the DB to avoid the MF attr fetch +#Need to be aware of projecting here, as these will expire if we project after this method is called + +sub get_underlying_structure{ + my $self = shift; + + if(! defined $self->{underlying_structure}){ + + my @attrs = @{$self->regulatory_attributes()}; + + if(! @attrs){ + throw('No underlying regulatory_attribute features to get_underlying_structure for dbID '.$self->dbID); + #This should never happen even with a projection build + } + else{ + + + #We only need to set the bounds when storing on full slice/seq_region values + #else they should be fetched from the DB + + if(! defined $self->{'bound_start'}){ + + my (@start_ends); + + foreach my $attr(@attrs){ + push @start_ends, ($attr->start, $attr->end); + } + + #Accounts for core region, where data may be absent on this cell type + push @start_ends, ($self->start, $self->end); + + @start_ends = sort { $a <=> $b } @start_ends; + + $self->{'bound_end'} = pop @start_ends; + $self->{'bound_start'} = shift @start_ends; + + #Need to account for projection build here + #i.e. attr extremeties may not extend past core start/end + + if($self->is_projected){ + $self->{'bound_end'} = $self->end if $self->end > $self->{'bound_end'}; + $self->{'bound_start'} = $self->start if $self->start < $self->{'bound_start'}; + } + } + + #Now deal with MotifFeature loci + my @mf_loci; + + foreach my $mf(@{$self->regulatory_attributes('motif')}){ + push @mf_loci, ($mf->start, $mf->end); + } + + $self->{underlying_structure} = [$self->{'bound_start'}, $self->start, @mf_loci, $self->end, $self->{'bound_end'}]; + } + } + + return $self->{underlying_structure}; +} + + +=head2 is_unique_to_FeatureSets + + Arg[1] : optional - ARRAYREF of regualtory Bio::EnsEMBL::Funcgen::FeatureSet objects + Default is FeatureSet of given RegulatoryFeature, else need to be + defined explicitly. + Arg[2] : optional - HASHREF Params hash: + { + include_projected => 0|1, # Boolean, include 'projected' features + } + Example : if($reg_feat->is_unique_to_FeatureSets($fsets)}{ + #then do some analysis here + } + Description: Identifies whether this RegualtoryFeature is unique to a set of FeatureSets. + Returntype : boolean + Exceptions : Throw is arguments not stored or valid. + Caller : General + Status : At risk + +=cut + +#Probably want to add in an FeatureType constraint here +#e.g. so we can compare active vs inactive or poised promoters + +#omit include_multi doesn't make sense here + +sub is_unique_to_FeatureSets{ + my ($self, $fsets, $params_hash) = @_; + + $fsets ||= [$self->feature_set]; + my @fset_ids; + + + #define to avoid deref fails below. + $params_hash ||= {}; + if(ref($params_hash) ne 'HASH'){ + throw("The params hash argument must be a valid HASHREF:\t".ref($params_hash)); + } + + + foreach my $fset(@$fsets){ + #assume we have an adaptor set + $self->adaptor->db->is_stored_and_valid('Bio::EnsEMBL::Funcgen::FeatureSet', $fset); + + if($fset->feature_class ne 'regulatory'){ + throw('Found non-regulatory FeatureSet'); + } + + push @fset_ids, $fset->dbID; + } + + my $stable_id; + ($stable_id = $self->stable_id) =~ s/^[A-Z0]+//; + + + my @other_rf_ids = @{$self->adaptor->_fetch_other_dbIDs_by_stable_feature_set_ids + ($stable_id, + \@fset_ids, + { include_projected => $params_hash->{include_projected}} )}; + + return (@other_rf_ids) ? 0 : 1; +} + + + +=head2 get_other_RegulatoryFeatures + + Arg[1] : optional - ARRAYREF of regualtory Bio::EnsEMBL::Funcgen::FeatureSet objects + Default is FeatureSet of given RegulatoryFeature, else need to be + defined explicitly. + Arg[2] : optional - HASHREF Params hash: + { + include_projected => 0|1, # Boolean, include 'projected' features + include_multicell => 0|1, # Boolean, include MultiCell features + } + Example : my @other_fsets = @{$reg_feat->get_other_FeatureSets($fsets)}; + Description: Gets other RegualtoryFeatures (linked via the stable ID) which are present in the + specified list of FeatureSets. + Returntype : ARRAYREF of Bio::EnsEMBL::Funcgen::RegulatoryFeature objects + Exceptions : Throw is arguments not stored or valid. + Caller : General + Status : At risk + +=cut + +sub get_other_RegulatoryFeatures{ + my ($self, $fsets, $params_hash) = @_; + + #define to avoid deref fails below. + $params_hash ||= {}; + if(ref($params_hash) ne 'HASH'){ + throw("The params hash argument must be a valid HASHREF:\t".ref($params_hash)); + } + + $fsets ||= [$self->feature_set]; + my @fset_ids; + + foreach my $fset(@$fsets){ + #assume we have an adaptor set + $self->adaptor->db->is_stored_and_valid('Bio::EnsEMBL::Funcgen::FeatureSet', $fset); + + if($fset->feature_class ne 'regulatory'){ + throw('Found non-regulatory FeatureSet'); + } + + push @fset_ids, $fset->dbID; + } + + my $stable_id; + ($stable_id = $self->stable_id) =~ s/^[A-Z0]+//; + + my @other_fsets_ids = @{$self->adaptor->_fetch_other_dbIDs_by_stable_feature_set_ids + ($stable_id, \@fset_ids, + { + include_projected => $params_hash->{include_projected}, + include_multicell => $params_hash->{include_multicell}, + })}; + + return $self->adaptor->fetch_all_by_dbID_list(\@other_fsets_ids); +} + + + +1; + +__END__