Mercurial > repos > mahtabm > ensembl
view variant_effect_predictor/Bio/EnsEMBL/Funcgen/RegulatoryFeature.pm @ 0:1f6dce3d34e0
Uploaded
author | mahtabm |
---|---|
date | Thu, 11 Apr 2013 02:01:53 -0400 |
parents | |
children |
line wrap: on
line source
=head1 LICENSE Copyright (c) 1999-2012 The European Bioinformatics Institute and Genome Research Limited. All rights reserved. This software is distributed under a modified Apache license. For license details, please see http://www.ensembl.org/info/about/code_licence.html =head1 CONTACT Please email comments or questions to the public Ensembl developers list at <ensembl-dev@ebi.ac.uk>. Questions may also be sent to the Ensembl help desk at <helpdesk@ensembl.org>. =head1 NAME Bio::EnsEMBL::Funcgen::RegulatoryFeature =head1 SYNOPSIS use Bio::EnsEMBL::Registry; use Bio::EnsEMBL::Funcgen::RegulatoryFeature; my $reg = Bio::EnsEMBL::Registry->load_adaptors_from_db ( -host => 'ensembldb.ensembl.org', -user => 'anonymous' ); my $regfeat_adaptor = $reg->get_adaptor($species, 'funcgen', 'RegulatoryFeature'); ### Creating/storing a RegulatoryFeature Set ### my $feature = Bio::EnsEMBL::Funcgen::RegulatoryFeature->new ( -SLICE => $chr_1_slice, -START => 1_000_000, -END => 1_000_024, -STRAND => 0, -DISPLAY_LABEL => $text, -FEATURE_SET => $fset, -FEATURE_TYPE => $reg_ftype, -ATTRIBUTE_CACHE => \%attr_cache, ); my ($stored_feat) = @{$regfeat_adaptor->store([$feature])}; ### Fetching some RegualtoryFeatures my @regfeats = @{$regfeat_adaptor->fetch_all_by_Slice_FeatureSets($slice, \@fsets)}; ### Print the bound and core loci print join(' - ', ($reg_feat->bound_start, $reg_feat->start, $reg_feat->end, $reg_feat->bound_end)."\n"; ### Getting some supporting evidence for a RegualtoryFeatures my @reg_attrs = @{$reg_feat->regulatory_attributes('annotated')}; =head1 DESCRIPTION A RegulatoryFeature object represents the output of the Ensembl RegulatoryBuild: http://www.ensembl.org/info/docs/funcgen/regulatory_build.html It is comprises many possible histone, transcription factor, polymerase and open chromatin features, which have been combined to provide a summary view and classification of the regulatory status at a given loci. =head1 SEE ALSO Bio::EnsEMBL:Funcgen::DBSQL::RegulatoryFeatureAdaptor Bio::EnsEMBL::Funcgen::SetFeature =cut package Bio::EnsEMBL::Funcgen::RegulatoryFeature; use Bio::EnsEMBL::Utils::Argument qw( rearrange ); use Bio::EnsEMBL::Utils::Exception qw( throw ); use strict; use warnings; use base qw(Bio::EnsEMBL::Funcgen::SetFeature); #@ISA =head2 new Arg [-SLICE] : Bio::EnsEMBL::Slice - The slice on which this feature is located. Arg [-START] : int - The start coordinate of this feature relative to the start of the slice it is sitting on. Coordinates start at 1 and are inclusive. Arg [-END] : int -The end coordinate of this feature relative to the start of the slice it is sitting on. Coordinates start at 1 and are inclusive. Arg [-FEATURE_SET] : Bio::EnsEMBL::Funcgen::FeatureSet - Regulatory Feature set Arg [-FEATURE_TYPE] : Bio::EnsEMBL::Funcgen::FeatureType - Regulatory Feature sub type Arg [-BINARY_STRING] : (optional) string - Regulatory Build binary string Arg [-STABLE_ID] : (optional) string - Stable ID for this RegualtoryFeature e.g. ENSR00000000001 Arg [-DISPLAY_LABEL] : (optional) string - Display label for this feature Arg [-ATTRIBUTE_CACHE] : (optional) HASHREF of feature class dbID|Object lists Arg [-PROJECTED] : (optional) boolean - Flag to specify whether this feature has been projected or not Arg [-dbID] : (optional) int - Internal database ID. Arg [-ADAPTOR] : (optional) Bio::EnsEMBL::DBSQL::BaseAdaptor - Database adaptor. Example : my $feature = Bio::EnsEMBL::Funcgen::RegulatoryFeature->new( -SLICE => $chr_1_slice, -START => 1000000, -END => 1000024, -DISPLAY_LABEL => $text, -FEATURE_SET => $fset, -FEATURE_TYPE => $reg_ftype, -ATTRIBUTE_CACHE => \%attr_cache, ); Description: Constructor for RegulatoryFeature objects. Returntype : Bio::EnsEMBL::Funcgen::RegulatoryFeature Exceptions : None Caller : General Status : Stable =cut sub new { my $caller = shift; my $class = ref($caller) || $caller; my $self = $class->SUPER::new(@_); my ($stable_id, $attr_cache, $bin_string, $projected) = rearrange(['STABLE_ID', 'ATTRIBUTE_CACHE', 'BINARY_STRING', 'PROJECTED'], @_); #None of these are mandatory at creation #under different use cases $self->{binary_string} = $bin_string if defined $bin_string; $self->{stable_id} = $stable_id if defined $stable_id; $self->{projected} = $projected if defined $projected; $self->attribute_cache($attr_cache) if $attr_cache; return $self; } =head2 display_label Example : my $label = $feature->display_label(); Description: Getter for the display label of this feature. Returntype : String Exceptions : None Caller : General Status : Stable =cut sub display_label { my $self = shift; if(! defined $self->{'display_label'}){ $self->{'display_label'} = $self->feature_type->name.' Regulatory Feature'; if( defined $self->cell_type ){ $self->{'display_label'} .= ' - '.$self->cell_type->name; } } return $self->{display_label}; } =head2 display_id Example : print $feature->display_id(); Description: This method returns a string that is considered to be the 'display' identifier. In this case the stable Id is preferred Returntype : String Exceptions : none Caller : web drawing code, Region Report tool Status : Stable =cut sub display_id { return $_[0]->{stable_id}; } =head2 binary_string Arg [1] : optional string - binary string from regulatory build Example : my $bin_string = $feature->binary_string(); Description: Getter and setter for the binary_string for this feature. Returntype : String Exceptions : None Caller : General Status : At Risk - May change to BLOB, remove setter functionality =cut sub binary_string{ my ($self, $bin_string) = @_; if (defined $bin_string){ #added v67 warn "RegualtoryFeature::binary_string setter functionality is being removed\n"; $self->{binary_string} = $bin_string; } return $self->{binary_string}; } =head2 stable_id Arg [1] : (optional) string - stable_id e.g ENSR00000000001 Example : my $stable_id = $feature->stable_id(); Description: Getter and setter for the stable_id attribute for this feature. Returntype : string Exceptions : None Caller : General Status : At risk - setter functionality to be removed =cut sub stable_id { my $self = shift; if (@_){ #added v67 warn "RegualtoryFeature::stable_id setter functionality is being removed\n"; $self->{stable_id} = shift; } return $self->{stable_id}; } =head2 regulatory_attributes Arg [1] : String (optional) - Class of feature e.g. annotated or motif Example : print "Regulatory Attributes:\n\t".join("\n\t", (map $_->feature_type->name, @{$feature->regulatory_attributes()}))."\n"; Description: Getter for the regulatory_attributes for this feature. Returntype : ARRAYREF Exceptions : Throws if feature class not valid Caller : General Status : At Risk =cut sub regulatory_attributes{ my ($self, $feature_class) = @_; #Incorporating the MFs like this does cause some redundancy in the DB #But will speed up display of the RegFeat image including the MFs #Redefine the cache to have class keys e.g. TFBS, OpenChromatin, Histone Mods #Can't do this as we need the table key to be able to fetch the features #Really need something to be able to draw the image first, then create the zmenu details later. my %adaptors = ( 'annotated' => $self->adaptor->db->get_AnnotatedFeatureAdaptor, 'motif' => $self->adaptor->db->get_MotifFeatureAdaptor, #external ); my @fclasses; if(defined $feature_class){ if(exists $adaptors{lc($feature_class)}){ @fclasses = (lc($feature_class)); } else{ throw("The feature class you specified is not valid:\t$feature_class\n". "Please use one of:\t".join(', ', keys %adaptors)); } } else{ @fclasses = keys %adaptors; } foreach my $fclass(@fclasses){ #Now structured as hash to facilitate faster has_attribute method #Very little difference to array based cache my @attr_dbIDs = keys %{$self->{'attribute_cache'}{$fclass}}; if(scalar(@attr_dbIDs) > 0){ if( ! ( ref($self->{'regulatory_attributes'}{$fclass}->[0]) && ref($self->{'regulatory_attributes'}{$fclass}->[0])->isa('Bio::EnsEMBL::Feature') )){ $adaptors{$fclass}->force_reslice(1);#So we don't lose attrs which aren't on the slice $self->{'regulatory_attributes'}{$fclass} = $adaptors{$fclass}->fetch_all_by_dbID_list(\@attr_dbIDs, $self->slice); #Having problems here if we are trying to project between Y PAR and X #Current dest_slice mapping code simply changes the start end values assuming the slice is correct #currently no test for seq_region name match #foreach my $attr(@{ $self->{'regulatory_attributes'}{$fclass}}){ # warn "$attr ".$attr->dbID." ".$attr->feature_Slice->name."\n"; #} $adaptors{$fclass}->force_reslice(0); #Problems here with attrs not being returning when they do not lie on dest slice #i.e. core projected to cell line, but dest slice only over laps a region of the core which #actually has no attrs. #either use the feature_Slice and reslice everthing to the dest slice #or skip test in attr obj_frm_sth? # #This method transfers to the query slice, do not use fetch_by_dbID #It also should use _final_clause #This is currently only specified in the MotifFeatureAdaptor #as these are required to be sorted to relate to the structure string #but we are stll storing in has where order is not preserved!! #so this will not match order of underlying strcture! #separate so we can have ordered array returned #do we need redundant caches? #defo need db id cache for 'has' methods #foreach my $attr(@{$fclass_attrs}){ # $self->{'regulatory_attributes'}{$fclass}{$attr->dbID} = $attr; #} } } else{ $self->{'regulatory_attributes'}{$fclass} = []; } } return [ map { @{$self->{'regulatory_attributes'}{$_}} } @fclasses ]; } =head2 has_attribute Arg [1] : Attribute Feature dbID Arg [2] : Attribute Feature class e.g. motif or annotated Example : if($regf->has_attribute($af->dbID, 'annotated'){ #do something here } Description: Identifies whether this RegualtoryFeature has a given attribute Returntype : Boolean Exceptions : Throws if args are not defined Caller : General Status : Stable =cut sub has_attribute{ my ($self, $dbID, $fclass) = @_; throw('Must provide a dbID and a Feature class argument') if ! $dbID && $fclass; return exists ${$self->attribute_cache}{$fclass}{$dbID}; } =head2 get_focus_attributes Arg [1] : None Example : my @focus_attrs = @{$regf->get_focus_attributes}; Description: Getter for the focus features of this RegualtoryFeature, used to defined the core region Returntype : ARRAYREF Exceptions : None Caller : General Status : Stable =cut sub get_focus_attributes{ my $self = shift; if(! exists $self->{'focus_attributes'} || ! @{$self->{'focus_attributes'}}){ $self->_sort_attributes; } return $self->{'focus_attributes'}; } =head2 get_nonfocus_attributes Arg [1] : None Example : my @non_focus_attrs = @{$regf->get_nonfocus_attributes}; Description: Getter for the non-focus features of this RegulatoryFeature, used to defined the non core region i.e. the whiskers. Returntype : ARRAYREF Exceptions : None Caller : General Status : Stable =cut sub get_nonfocus_attributes{ my $self = shift; #Test focus here as we may not have any nonfocus #But focus will show that we have sorted already if(! exists $self->{'focus_attributes'} || ! @{$self->{'focus_attributes'}}){ $self->_sort_attributes; } return $self->{'nonfocus_attributes'}; } #Add pod here sub _sort_attributes{ my $self = shift; $self->{'focus_attributes'} = []; $self->{'nonfocus_attributes'} = []; foreach my $attrf(@{$self->regulatory_attributes}){ if($attrf->isa('Bio::EnsEMBL::Funcgen::MotifFeature') || $attrf->feature_set->is_focus_set){ push @{$self->{'focus_attributes'}}, $attrf; } else{ push @{$self->{'nonfocus_attributes'}}, $attrf; } } return; } =head2 attribute_cache Arg [1] : optional - HASHREF of attribute table keys with values as either a list of attribute feature dbIDs or objects. If passing object, any MotifFeature objects should be in position order with respect to the slice. Example : $feature->attribute_cache(\%attribute_feature_info); Description: Setter for the regulatory_attribute cache for this feature. This is a short cut method used by the regulatory build and the webcode to avoid unnecessary fetching and enable enable lazy loading Returntype : HASHREF Exceptions : Throws if trying to overwrite existing cache Caller : RegulatoryFeatureAdaptor.pm and build_regulatory_features.pl Status : At Risk =cut sub attribute_cache{ my ($self, $attr_hash) = @_; # if(! defined $attr_hash){ # $self->regulatory_attributes; #Fetch the attrs? # # # #Do we need to do this now we have separated the caches? # # } if(defined $attr_hash){ foreach my $fclass(keys %{$attr_hash}){ if(exists $self->{'attribute_cache'}{$fclass}){ throw("You are trying to overwrite a pre-existing regulatory attribute cache entry for feature class:\t$fclass"); } else{ $self->{'attribute_cache'}{$fclass} = $attr_hash->{$fclass}; } } } return $self->{'attribute_cache'} || {}; } =head2 bound_start Example : my $bound_start = $feature->bound_start(); Description: Getter for the bound_start attribute for this feature. Gives the 5' most start value of the underlying attribute features. Returntype : string Exceptions : None Caller : General Status : Stable =cut sub bound_start { my $self = shift; $self->get_underlying_structure if ! defined $self->{'bound_start'}; return $self->{'bound_start'}; } =head2 bound_end Example : my $bound_end = $feature->bound_start(); Description: Getter for the bound_end attribute for this feature. Gives the 3' most end value of the underlying attribute features. Returntype : string Exceptions : None Caller : General Status : Stable =cut sub bound_end { my $self = shift; $self->get_underlying_structure if ! defined $self->{'bound_end'}; return $self->{'bound_end'}; } =head2 bound_seq_region_start Example : my $bound_sr_start = $feature->bound_seq_region_start; Description: Getter for the seq_region bound_start attribute for this feature. Gives the 5' most start value of the underlying attribute features. Returntype : string Exceptions : None Caller : General Status : Stable =cut sub bound_seq_region_start { my $self = shift; if(! defined $self->{bound_seq_region_start}){ if($self->slice->strand == 1){ $self->{bound_seq_region_start} = $self->slice->start + $self->bound_start - 1; } else{ #strand = -1 $self->{bound_seq_region_start} = $self->slice->end - $self->bound_end + 1; } } return $self->{bound_seq_region_start}; } =head2 bound_seq_region_end Example : my $bound_sr_end = $feature->bound_seq_region_end; Description: Getter for the seq_region bound_end attribute for this feature. Gives the 3' most end value of the underlying attribute features. Returntype : string Exceptions : None Caller : General Status : Stable =cut sub bound_seq_region_end { my $self = shift; if(! defined $self->{bound_seq_region_end}){ if($self->slice->strand == 1){ $self->{bound_seq_region_end} = $self->slice->start + $self->bound_end - 1; } else{ #strand = -1 $self->{bound_seq_region_end} = $self->slice->end - $self->bound_start + 1; } } return $self->{bound_seq_region_end}; } =head2 is_projected Arg [1] : optional - boolean Example : if($regf->is_projected){ #do something different here } Description: Getter/Setter for the projected attribute. Returntype : boolean Exceptions : None Caller : General Status : At risk - remove setter functionality =cut sub is_projected { my $self = shift; if(@_){ #added v67 warn "RegulatoryFeature::is_projected setter functionality is being removed\n"; $self->{'projected'} = shift; } return $self->{'projected'}; } =head2 get_underlying_structure Example : $self->get_underlying_structure() if(! exists $self->{'bound_end'}); Description: Getter for the bound_end attribute for this feature. Gives the 3' most end value of the underlying attribute features. Returntype : string Exceptions : None Caller : General Status : At Risk =cut #This should really be precomputed and stored in the DB to avoid the MF attr fetch #Need to be aware of projecting here, as these will expire if we project after this method is called sub get_underlying_structure{ my $self = shift; if(! defined $self->{underlying_structure}){ my @attrs = @{$self->regulatory_attributes()}; if(! @attrs){ throw('No underlying regulatory_attribute features to get_underlying_structure for dbID '.$self->dbID); #This should never happen even with a projection build } else{ #We only need to set the bounds when storing on full slice/seq_region values #else they should be fetched from the DB if(! defined $self->{'bound_start'}){ my (@start_ends); foreach my $attr(@attrs){ push @start_ends, ($attr->start, $attr->end); } #Accounts for core region, where data may be absent on this cell type push @start_ends, ($self->start, $self->end); @start_ends = sort { $a <=> $b } @start_ends; $self->{'bound_end'} = pop @start_ends; $self->{'bound_start'} = shift @start_ends; #Need to account for projection build here #i.e. attr extremeties may not extend past core start/end if($self->is_projected){ $self->{'bound_end'} = $self->end if $self->end > $self->{'bound_end'}; $self->{'bound_start'} = $self->start if $self->start < $self->{'bound_start'}; } } #Now deal with MotifFeature loci my @mf_loci; foreach my $mf(@{$self->regulatory_attributes('motif')}){ push @mf_loci, ($mf->start, $mf->end); } $self->{underlying_structure} = [$self->{'bound_start'}, $self->start, @mf_loci, $self->end, $self->{'bound_end'}]; } } return $self->{underlying_structure}; } =head2 is_unique_to_FeatureSets Arg[1] : optional - ARRAYREF of regualtory Bio::EnsEMBL::Funcgen::FeatureSet objects Default is FeatureSet of given RegulatoryFeature, else need to be defined explicitly. Arg[2] : optional - HASHREF Params hash: { include_projected => 0|1, # Boolean, include 'projected' features } Example : if($reg_feat->is_unique_to_FeatureSets($fsets)}{ #then do some analysis here } Description: Identifies whether this RegualtoryFeature is unique to a set of FeatureSets. Returntype : boolean Exceptions : Throw is arguments not stored or valid. Caller : General Status : At risk =cut #Probably want to add in an FeatureType constraint here #e.g. so we can compare active vs inactive or poised promoters #omit include_multi doesn't make sense here sub is_unique_to_FeatureSets{ my ($self, $fsets, $params_hash) = @_; $fsets ||= [$self->feature_set]; my @fset_ids; #define to avoid deref fails below. $params_hash ||= {}; if(ref($params_hash) ne 'HASH'){ throw("The params hash argument must be a valid HASHREF:\t".ref($params_hash)); } foreach my $fset(@$fsets){ #assume we have an adaptor set $self->adaptor->db->is_stored_and_valid('Bio::EnsEMBL::Funcgen::FeatureSet', $fset); if($fset->feature_class ne 'regulatory'){ throw('Found non-regulatory FeatureSet'); } push @fset_ids, $fset->dbID; } my $stable_id; ($stable_id = $self->stable_id) =~ s/^[A-Z0]+//; my @other_rf_ids = @{$self->adaptor->_fetch_other_dbIDs_by_stable_feature_set_ids ($stable_id, \@fset_ids, { include_projected => $params_hash->{include_projected}} )}; return (@other_rf_ids) ? 0 : 1; } =head2 get_other_RegulatoryFeatures Arg[1] : optional - ARRAYREF of regualtory Bio::EnsEMBL::Funcgen::FeatureSet objects Default is FeatureSet of given RegulatoryFeature, else need to be defined explicitly. Arg[2] : optional - HASHREF Params hash: { include_projected => 0|1, # Boolean, include 'projected' features include_multicell => 0|1, # Boolean, include MultiCell features } Example : my @other_fsets = @{$reg_feat->get_other_FeatureSets($fsets)}; Description: Gets other RegualtoryFeatures (linked via the stable ID) which are present in the specified list of FeatureSets. Returntype : ARRAYREF of Bio::EnsEMBL::Funcgen::RegulatoryFeature objects Exceptions : Throw is arguments not stored or valid. Caller : General Status : At risk =cut sub get_other_RegulatoryFeatures{ my ($self, $fsets, $params_hash) = @_; #define to avoid deref fails below. $params_hash ||= {}; if(ref($params_hash) ne 'HASH'){ throw("The params hash argument must be a valid HASHREF:\t".ref($params_hash)); } $fsets ||= [$self->feature_set]; my @fset_ids; foreach my $fset(@$fsets){ #assume we have an adaptor set $self->adaptor->db->is_stored_and_valid('Bio::EnsEMBL::Funcgen::FeatureSet', $fset); if($fset->feature_class ne 'regulatory'){ throw('Found non-regulatory FeatureSet'); } push @fset_ids, $fset->dbID; } my $stable_id; ($stable_id = $self->stable_id) =~ s/^[A-Z0]+//; my @other_fsets_ids = @{$self->adaptor->_fetch_other_dbIDs_by_stable_feature_set_ids ($stable_id, \@fset_ids, { include_projected => $params_hash->{include_projected}, include_multicell => $params_hash->{include_multicell}, })}; return $self->adaptor->fetch_all_by_dbID_list(\@other_fsets_ids); } 1; __END__