comparison variant_effect_predictor/Bio/EnsEMBL/Funcgen/MotifFeature.pm @ 0:1f6dce3d34e0

Uploaded
author mahtabm
date Thu, 11 Apr 2013 02:01:53 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:1f6dce3d34e0
1 #
2 # Ensembl module for Bio::EnsEMBL::Funcgen::MotifFeature
3 #
4 # You may distribute this module under the same terms as Perl itself
5
6 =head1 NAME
7
8 Bio::EnsEMBL::MotifFeature - A module to represent a feature mapping as based
9 on a binding matrix e.g position weight matrix
10
11 =head1 SYNOPSIS
12
13 use Bio::EnsEMBL::Funcgen::MotifFeature;
14
15 my $feature = Bio::EnsEMBL::Funcgen::MotifFeature->new(
16 -SLICE => $chr_1_slice,
17 -START => 1_000_000,
18 -END => 1_000_024,
19 -STRAND => -1,
20 -DISPLAY_LABEL => $text,
21 -SCORE => $score,
22 -FEATURE_TYPE => $ftype,
23 -INTERDB_STABLE_ID => 1,
24 );
25
26
27
28 =head1 DESCRIPTION
29
30 A MotifFeature object represents the genomic placement of a sequence motif.
31 For example a transcription factor binding site motif associated with a
32 position weight matrix. These are generally associated with AnnotatedFeatures
33 of the corresponding FeatureType.
34
35
36 =head1 SEE ALSO
37
38 Bio::EnsEMBL::Funcgen::DBSQL::MotifFeatureAdaptor
39
40
41 =head1 LICENSE
42
43 Copyright (c) 1999-2009 The European Bioinformatics Institute and
44 Genome Research Limited. All rights reserved.
45
46 This software is distributed under a modified Apache license.
47 For license details, please see
48
49 http://www.ensembl.org/info/about/code_licence.html
50
51 =head1 CONTACT
52
53 Please email comments or questions to the public Ensembl
54 developers list at <ensembl-dev@ebi.ac.uk>.
55
56 Questions may also be sent to the Ensembl help desk at
57 <helpdesk@ensembl.org>.
58
59
60 =cut
61
62 use strict;
63 use warnings;
64
65 package Bio::EnsEMBL::Funcgen::MotifFeature;
66
67 use Bio::EnsEMBL::Utils::Argument qw( rearrange );
68 use Bio::EnsEMBL::Utils::Exception qw( throw );
69 use Bio::EnsEMBL::Feature;
70 use Bio::EnsEMBL::Funcgen::Storable;
71
72
73 use vars qw(@ISA);
74 @ISA = qw(Bio::EnsEMBL::Feature Bio::EnsEMBL::Funcgen::Storable);
75
76
77 =head2 new
78
79
80 Arg [-SCORE] : (optional) int - Score assigned by analysis pipeline
81 Arg [-SLICE] : Bio::EnsEMBL::Slice - The slice on which this feature is.
82 Arg [-BINDING_MATRIX] : Bio::EnsEMBL::Funcgen::BindingMatrix - Binding Matrix associated to this feature.
83 Arg [-START] : int - The start coordinate of this feature relative to the start of the slice
84 it is sitting on. Coordinates start at 1 and are inclusive.
85 Arg [-END] : int -The end coordinate of this feature relative to the start of the slice
86 it is sitting on. Coordinates start at 1 and are inclusive.
87 Arg [-DISPLAY_LABEL] : string - Display label for this feature
88 Arg [-STRAND] : int - The orientation of this feature. Valid values are 1, -1 and 0.
89 Arg [-dbID] : (optional) int - Internal database ID.
90 Arg [-ADAPTOR] : (optional) Bio::EnsEMBL::DBSQL::BaseAdaptor - Database adaptor.
91
92 Example : my $feature = Bio::EnsEMBL::Funcgen::MotifFeature->new(
93 -SLICE => $chr_1_slice,
94 -START => 1_000_000,
95 -END => 1_000_024,
96 -STRAND => -1,
97 -BINDING_MATRIX => $bm,
98 -DISPLAY_LABEL => $text,
99 -SCORE => $score,
100 -INTERDB_STABLE_ID => 1,
101 );
102
103
104 Description: Constructor for MotifFeature objects.
105 Returntype : Bio::EnsEMBL::Funcgen::MotifFeature
106 Exceptions : Throws if BindingMatrix not valid
107 Caller : General
108 Status : Medium Risk
109
110 =cut
111
112 sub new {
113 my $caller = shift;
114
115 my $class = ref($caller) || $caller;
116 my $self = $class->SUPER::new(@_);
117 my $bmatrix;
118 ($self->{'score'}, $bmatrix, $self->{'display_label'}, $self->{'interdb_stable_id'})
119 = rearrange(['SCORE', 'BINDING_MATRIX', 'DISPLAY_LABEL', 'INTERDB_STABLE_ID'], @_);
120
121
122 if(! (ref($bmatrix) && $bmatrix->isa('Bio::EnsEMBL::Funcgen::BindingMatrix'))){
123 throw('You must pass be a valid Bio::EnsEMBL::Funcgen::BindingMatrix');
124 }
125
126 $self->{'binding_matrix'} = $bmatrix;
127
128 return $self;
129 }
130
131 =head2 new_fast
132
133 Args : Hashref with all internal attributes set
134 Example : none
135 Description: Quick and dirty version of new. Only works if the calling code
136 is very disciplined.
137 Returntype : Bio::EnsEMBL::Funcgen::MotifFeature
138 Exceptions : None
139 Caller : General
140 Status : At Risk
141
142 =cut
143
144 sub new_fast {
145 return bless ($_[1], $_[0]);
146 }
147
148
149
150 =head2 binding_matrix
151
152 Example : my $bmatrix_name = $mfeat->binding_matrix->name;
153 Description: Getter for the BindingMatrix attribute for this feature.
154 Returntype : Bio::EnsEMBL::Funcgen::BindingMatrix
155 Exceptions : None
156 Caller : General
157 Status : At risk
158
159 =cut
160
161 sub binding_matrix{
162 return $_[0]->{'binding_matrix'};
163 }
164
165 =head2 score
166
167 Example : my $score = $feature->score();
168 Description: Getter for the score attribute for this feature.
169 Returntype : double
170 Exceptions : None
171 Caller : General
172 Status : Low Risk
173
174 =cut
175
176 sub score {
177 return $_[0]->{'score'};
178 }
179
180
181 =head2 display_label
182
183 Example : my $label = $feature->display_label();
184 Description: Getter for the display label of this feature.
185 Returntype : str
186 Exceptions : None
187 Caller : General
188 Status : Medium Risk
189
190 =cut
191
192 sub display_label {
193 #If not set in new before store, a default is stored as:
194 #$mf->binding_matrix->feature_type->name.':'.$mf->binding_matrix->name();
195
196 return $_[0]->{'display_label'};
197 }
198
199
200
201 =head2 associated_annotated_features
202
203 Example : my @associated_afs = @{$feature->associated_annotated_features()};
204 Description: Getter/Setter for associated AnntoatedFeatures.
205 Returntype : ARRAYREF of Bio::EnsEMBL::Funcgen:AnnotatedFeature objects
206 Exceptions : None
207 Caller : General
208 Status : At risk - may change to associated_transcript_factor_features
209
210 =cut
211
212 sub associated_annotated_features{
213 my ($self, $afs) = @_;
214
215 #Lazy load as we don't want to have to do a join on all features when most will not have any
216
217
218 if(defined $afs){
219
220 if(ref($afs) eq 'ARRAY'){
221
222 foreach my $af(@$afs){
223
224 if( ! $af->isa('Bio::EnsEMBL::Funcgen::AnnotatedFeature') ){
225 throw('You must pass and ARRAYREF of stored Bio::EnsEMBL::Funcgen::AnnotatedFeature objects');
226 }
227 #test is stored in adaptor
228 }
229
230 if(defined $self->{'associated_annotated_features'}){
231 warn('You are overwriting associated_annotated_features for the MotifFeature');
232 #we could simply add the new ones and make them NR.
233 }
234
235 $self->{'associated_annotated_features'} = $afs;
236 }
237 else{
238 throw('You must pass and ARRAYREF of stored Bio::EnsEMBL::Funcgen::AnnotatedFeature objects');
239 }
240 }
241
242
243 if(! defined $self->{'associated_annotated_features'}){
244
245 if(defined $self->adaptor){
246 $self->{'associated_annotated_features'} =
247 $self->adaptor->db->get_AnnotatedFeatureAdaptor->fetch_all_by_associated_MotifFeature($self);
248 }
249 }
250
251 #This has the potential to return undef, or an arrayref which may be empty.
252 return $self->{'associated_annotated_features'};
253 }
254
255
256 =head2 is_position_informative
257
258 Arg [1] : int - 1-based position within the Motif
259 Example : $mf->is_position_informative($pos);
260 Description: Indicates if a given position within the motif is highly informative
261 Returntype : boolean
262 Exceptions : throws if position out of bounds ( < 1 or > length of motif)
263 Caller : General
264 Status : At High risk
265
266 =cut
267
268 sub is_position_informative {
269 my ($self,$position) = (shift,shift);
270 throw "Need a position" if(!defined($position));
271 throw "Position out of bounds" if(($position<1) || ($position>$self->binding_matrix->length));
272 #if on the opposite strand, then need to reverse complement the position
273 if($self->strand < 0){ $position = $self->binding_matrix->length - $position + 1; }
274 return $self->binding_matrix->is_position_informative($position);
275 }
276
277
278 =head2 infer_variation_consequence
279
280 Arg [1] : Bio::EnsEMBL::Variation::VariationFeature
281 Arg [2] : boolean - 1 if results in linear scale (default is log scale)
282 Example : my $vfs = $vf_adaptor->fetch_all_by_Slice($slice_adaptor->fetch_by_region('toplevel',$mf->seq_region_name,$mf->start,$mf->end,$mf->strand));
283 foreach my $vf (@{$vfs}){
284 print $mf->infer_variation_consequence($vf)."\n";
285 }
286
287 Description: Calculates the potential influence of a given variation in a motif feature.
288 Returns a value between -100% (lost) and +100% (gain) indicating the difference
289 in strength between the motif in the reference and after the variation.
290
291 The variation feature slice needs to be the motif feature, including the strand
292 Returntype : float
293 Exceptions : throws if argument is not a Bio::EnsEMBL::Variation::VariationFeature
294 throws if the variation feature is not contained in the motif feature
295 Caller : General
296 Status : At High risk
297
298 =cut
299
300 sub infer_variation_consequence{
301 my ($self, $vf, $linear) = (shift, shift, shift);
302
303 if(! $vf->isa('Bio::EnsEMBL::Variation::VariationFeature')){
304 throw "We expect a Bio::EnsEMBL::Variation::VariationFeature object, not a ".$vf->class;
305 }
306
307 #See if these checks are required or if there are more efficient ways to do the checks...
308 #if(($self->slice->seq_region_name ne $vf->slice->seq_region_name) ||
309 # ($self->slice->start != $vf->slice->start) ||
310 # ($self->slice->end != $vf->slice->end) ){
311 # throw "Variation and Motif are on distinct slices";
312 #}
313 #if(!(($vf->start >= $self->start) && ($vf->end <= $self->end ))){
314 # throw "Variation should be entirely contained in the Motif";
315 #}
316
317 if( ($vf->start < 1) || ($vf->end > $self->binding_matrix->length)){ throw "Variation not entirely contained in the motif feature"; }
318
319 if(!($vf->allele_string =~ /^[ACTG]\/[ACTG]$/)){ throw "Currently only SNPs are supported"; }
320
321 my $ref_seq = $self->seq;
322
323 my $variant = $vf->allele_string;
324 $variant =~ s/^.*\///;
325 $variant =~ s/\s*$//;
326
327 my ($vf_start,$vf_end) = ($vf->start, $vf->end);
328 if($vf->strand == -1){
329 #Needed for insertions
330 $variant = reverse($variant);
331 $variant =~ tr/ACGT/TGCA/;
332 }
333 my $var_seq = substr($ref_seq,0, $vf_start - 1).$variant.substr($ref_seq, $vf_start+length($variant)-1);
334
335 my $bm = $self->{'binding_matrix'};
336 return 100 * ($bm->relative_affinity($var_seq,$linear) - $bm->relative_affinity($ref_seq,$linear));
337
338 }
339
340 =head2 interdb_stable_id
341
342 Arg [1] : (optional) int - stable_id e.g 1
343 Example : my $idb_sid = $feature->interdb_stable_id();
344 Description: Getter for the interdb_stable_id attribute for this feature.
345 This is simply to avoid using internal db IDs for inter DB linking
346 Returntype : int
347 Exceptions : None
348 Caller : General
349 Status : At Risk
350
351 =cut
352
353 sub interdb_stable_id {
354 return $_[0]->{'interdb_stable_id'};
355 }
356
357
358
359 1;
360