0
|
1 #
|
|
2 # Ensembl module for Bio::EnsEMBL::Funcgen::MotifFeature
|
|
3 #
|
|
4 # You may distribute this module under the same terms as Perl itself
|
|
5
|
|
6 =head1 NAME
|
|
7
|
|
8 Bio::EnsEMBL::MotifFeature - A module to represent a feature mapping as based
|
|
9 on a binding matrix e.g position weight matrix
|
|
10
|
|
11 =head1 SYNOPSIS
|
|
12
|
|
13 use Bio::EnsEMBL::Funcgen::MotifFeature;
|
|
14
|
|
15 my $feature = Bio::EnsEMBL::Funcgen::MotifFeature->new(
|
|
16 -SLICE => $chr_1_slice,
|
|
17 -START => 1_000_000,
|
|
18 -END => 1_000_024,
|
|
19 -STRAND => -1,
|
|
20 -DISPLAY_LABEL => $text,
|
|
21 -SCORE => $score,
|
|
22 -FEATURE_TYPE => $ftype,
|
|
23 -INTERDB_STABLE_ID => 1,
|
|
24 );
|
|
25
|
|
26
|
|
27
|
|
28 =head1 DESCRIPTION
|
|
29
|
|
30 A MotifFeature object represents the genomic placement of a sequence motif.
|
|
31 For example a transcription factor binding site motif associated with a
|
|
32 position weight matrix. These are generally associated with AnnotatedFeatures
|
|
33 of the corresponding FeatureType.
|
|
34
|
|
35
|
|
36 =head1 SEE ALSO
|
|
37
|
|
38 Bio::EnsEMBL::Funcgen::DBSQL::MotifFeatureAdaptor
|
|
39
|
|
40
|
|
41 =head1 LICENSE
|
|
42
|
|
43 Copyright (c) 1999-2009 The European Bioinformatics Institute and
|
|
44 Genome Research Limited. All rights reserved.
|
|
45
|
|
46 This software is distributed under a modified Apache license.
|
|
47 For license details, please see
|
|
48
|
|
49 http://www.ensembl.org/info/about/code_licence.html
|
|
50
|
|
51 =head1 CONTACT
|
|
52
|
|
53 Please email comments or questions to the public Ensembl
|
|
54 developers list at <ensembl-dev@ebi.ac.uk>.
|
|
55
|
|
56 Questions may also be sent to the Ensembl help desk at
|
|
57 <helpdesk@ensembl.org>.
|
|
58
|
|
59
|
|
60 =cut
|
|
61
|
|
62 use strict;
|
|
63 use warnings;
|
|
64
|
|
65 package Bio::EnsEMBL::Funcgen::MotifFeature;
|
|
66
|
|
67 use Bio::EnsEMBL::Utils::Argument qw( rearrange );
|
|
68 use Bio::EnsEMBL::Utils::Exception qw( throw );
|
|
69 use Bio::EnsEMBL::Feature;
|
|
70 use Bio::EnsEMBL::Funcgen::Storable;
|
|
71
|
|
72
|
|
73 use vars qw(@ISA);
|
|
74 @ISA = qw(Bio::EnsEMBL::Feature Bio::EnsEMBL::Funcgen::Storable);
|
|
75
|
|
76
|
|
77 =head2 new
|
|
78
|
|
79
|
|
80 Arg [-SCORE] : (optional) int - Score assigned by analysis pipeline
|
|
81 Arg [-SLICE] : Bio::EnsEMBL::Slice - The slice on which this feature is.
|
|
82 Arg [-BINDING_MATRIX] : Bio::EnsEMBL::Funcgen::BindingMatrix - Binding Matrix associated to this feature.
|
|
83 Arg [-START] : int - The start coordinate of this feature relative to the start of the slice
|
|
84 it is sitting on. Coordinates start at 1 and are inclusive.
|
|
85 Arg [-END] : int -The end coordinate of this feature relative to the start of the slice
|
|
86 it is sitting on. Coordinates start at 1 and are inclusive.
|
|
87 Arg [-DISPLAY_LABEL] : string - Display label for this feature
|
|
88 Arg [-STRAND] : int - The orientation of this feature. Valid values are 1, -1 and 0.
|
|
89 Arg [-dbID] : (optional) int - Internal database ID.
|
|
90 Arg [-ADAPTOR] : (optional) Bio::EnsEMBL::DBSQL::BaseAdaptor - Database adaptor.
|
|
91
|
|
92 Example : my $feature = Bio::EnsEMBL::Funcgen::MotifFeature->new(
|
|
93 -SLICE => $chr_1_slice,
|
|
94 -START => 1_000_000,
|
|
95 -END => 1_000_024,
|
|
96 -STRAND => -1,
|
|
97 -BINDING_MATRIX => $bm,
|
|
98 -DISPLAY_LABEL => $text,
|
|
99 -SCORE => $score,
|
|
100 -INTERDB_STABLE_ID => 1,
|
|
101 );
|
|
102
|
|
103
|
|
104 Description: Constructor for MotifFeature objects.
|
|
105 Returntype : Bio::EnsEMBL::Funcgen::MotifFeature
|
|
106 Exceptions : Throws if BindingMatrix not valid
|
|
107 Caller : General
|
|
108 Status : Medium Risk
|
|
109
|
|
110 =cut
|
|
111
|
|
112 sub new {
|
|
113 my $caller = shift;
|
|
114
|
|
115 my $class = ref($caller) || $caller;
|
|
116 my $self = $class->SUPER::new(@_);
|
|
117 my $bmatrix;
|
|
118 ($self->{'score'}, $bmatrix, $self->{'display_label'}, $self->{'interdb_stable_id'})
|
|
119 = rearrange(['SCORE', 'BINDING_MATRIX', 'DISPLAY_LABEL', 'INTERDB_STABLE_ID'], @_);
|
|
120
|
|
121
|
|
122 if(! (ref($bmatrix) && $bmatrix->isa('Bio::EnsEMBL::Funcgen::BindingMatrix'))){
|
|
123 throw('You must pass be a valid Bio::EnsEMBL::Funcgen::BindingMatrix');
|
|
124 }
|
|
125
|
|
126 $self->{'binding_matrix'} = $bmatrix;
|
|
127
|
|
128 return $self;
|
|
129 }
|
|
130
|
|
131 =head2 new_fast
|
|
132
|
|
133 Args : Hashref with all internal attributes set
|
|
134 Example : none
|
|
135 Description: Quick and dirty version of new. Only works if the calling code
|
|
136 is very disciplined.
|
|
137 Returntype : Bio::EnsEMBL::Funcgen::MotifFeature
|
|
138 Exceptions : None
|
|
139 Caller : General
|
|
140 Status : At Risk
|
|
141
|
|
142 =cut
|
|
143
|
|
144 sub new_fast {
|
|
145 return bless ($_[1], $_[0]);
|
|
146 }
|
|
147
|
|
148
|
|
149
|
|
150 =head2 binding_matrix
|
|
151
|
|
152 Example : my $bmatrix_name = $mfeat->binding_matrix->name;
|
|
153 Description: Getter for the BindingMatrix attribute for this feature.
|
|
154 Returntype : Bio::EnsEMBL::Funcgen::BindingMatrix
|
|
155 Exceptions : None
|
|
156 Caller : General
|
|
157 Status : At risk
|
|
158
|
|
159 =cut
|
|
160
|
|
161 sub binding_matrix{
|
|
162 return $_[0]->{'binding_matrix'};
|
|
163 }
|
|
164
|
|
165 =head2 score
|
|
166
|
|
167 Example : my $score = $feature->score();
|
|
168 Description: Getter for the score attribute for this feature.
|
|
169 Returntype : double
|
|
170 Exceptions : None
|
|
171 Caller : General
|
|
172 Status : Low Risk
|
|
173
|
|
174 =cut
|
|
175
|
|
176 sub score {
|
|
177 return $_[0]->{'score'};
|
|
178 }
|
|
179
|
|
180
|
|
181 =head2 display_label
|
|
182
|
|
183 Example : my $label = $feature->display_label();
|
|
184 Description: Getter for the display label of this feature.
|
|
185 Returntype : str
|
|
186 Exceptions : None
|
|
187 Caller : General
|
|
188 Status : Medium Risk
|
|
189
|
|
190 =cut
|
|
191
|
|
192 sub display_label {
|
|
193 #If not set in new before store, a default is stored as:
|
|
194 #$mf->binding_matrix->feature_type->name.':'.$mf->binding_matrix->name();
|
|
195
|
|
196 return $_[0]->{'display_label'};
|
|
197 }
|
|
198
|
|
199
|
|
200
|
|
201 =head2 associated_annotated_features
|
|
202
|
|
203 Example : my @associated_afs = @{$feature->associated_annotated_features()};
|
|
204 Description: Getter/Setter for associated AnntoatedFeatures.
|
|
205 Returntype : ARRAYREF of Bio::EnsEMBL::Funcgen:AnnotatedFeature objects
|
|
206 Exceptions : None
|
|
207 Caller : General
|
|
208 Status : At risk - may change to associated_transcript_factor_features
|
|
209
|
|
210 =cut
|
|
211
|
|
212 sub associated_annotated_features{
|
|
213 my ($self, $afs) = @_;
|
|
214
|
|
215 #Lazy load as we don't want to have to do a join on all features when most will not have any
|
|
216
|
|
217
|
|
218 if(defined $afs){
|
|
219
|
|
220 if(ref($afs) eq 'ARRAY'){
|
|
221
|
|
222 foreach my $af(@$afs){
|
|
223
|
|
224 if( ! $af->isa('Bio::EnsEMBL::Funcgen::AnnotatedFeature') ){
|
|
225 throw('You must pass and ARRAYREF of stored Bio::EnsEMBL::Funcgen::AnnotatedFeature objects');
|
|
226 }
|
|
227 #test is stored in adaptor
|
|
228 }
|
|
229
|
|
230 if(defined $self->{'associated_annotated_features'}){
|
|
231 warn('You are overwriting associated_annotated_features for the MotifFeature');
|
|
232 #we could simply add the new ones and make them NR.
|
|
233 }
|
|
234
|
|
235 $self->{'associated_annotated_features'} = $afs;
|
|
236 }
|
|
237 else{
|
|
238 throw('You must pass and ARRAYREF of stored Bio::EnsEMBL::Funcgen::AnnotatedFeature objects');
|
|
239 }
|
|
240 }
|
|
241
|
|
242
|
|
243 if(! defined $self->{'associated_annotated_features'}){
|
|
244
|
|
245 if(defined $self->adaptor){
|
|
246 $self->{'associated_annotated_features'} =
|
|
247 $self->adaptor->db->get_AnnotatedFeatureAdaptor->fetch_all_by_associated_MotifFeature($self);
|
|
248 }
|
|
249 }
|
|
250
|
|
251 #This has the potential to return undef, or an arrayref which may be empty.
|
|
252 return $self->{'associated_annotated_features'};
|
|
253 }
|
|
254
|
|
255
|
|
256 =head2 is_position_informative
|
|
257
|
|
258 Arg [1] : int - 1-based position within the Motif
|
|
259 Example : $mf->is_position_informative($pos);
|
|
260 Description: Indicates if a given position within the motif is highly informative
|
|
261 Returntype : boolean
|
|
262 Exceptions : throws if position out of bounds ( < 1 or > length of motif)
|
|
263 Caller : General
|
|
264 Status : At High risk
|
|
265
|
|
266 =cut
|
|
267
|
|
268 sub is_position_informative {
|
|
269 my ($self,$position) = (shift,shift);
|
|
270 throw "Need a position" if(!defined($position));
|
|
271 throw "Position out of bounds" if(($position<1) || ($position>$self->binding_matrix->length));
|
|
272 #if on the opposite strand, then need to reverse complement the position
|
|
273 if($self->strand < 0){ $position = $self->binding_matrix->length - $position + 1; }
|
|
274 return $self->binding_matrix->is_position_informative($position);
|
|
275 }
|
|
276
|
|
277
|
|
278 =head2 infer_variation_consequence
|
|
279
|
|
280 Arg [1] : Bio::EnsEMBL::Variation::VariationFeature
|
|
281 Arg [2] : boolean - 1 if results in linear scale (default is log scale)
|
|
282 Example : my $vfs = $vf_adaptor->fetch_all_by_Slice($slice_adaptor->fetch_by_region('toplevel',$mf->seq_region_name,$mf->start,$mf->end,$mf->strand));
|
|
283 foreach my $vf (@{$vfs}){
|
|
284 print $mf->infer_variation_consequence($vf)."\n";
|
|
285 }
|
|
286
|
|
287 Description: Calculates the potential influence of a given variation in a motif feature.
|
|
288 Returns a value between -100% (lost) and +100% (gain) indicating the difference
|
|
289 in strength between the motif in the reference and after the variation.
|
|
290
|
|
291 The variation feature slice needs to be the motif feature, including the strand
|
|
292 Returntype : float
|
|
293 Exceptions : throws if argument is not a Bio::EnsEMBL::Variation::VariationFeature
|
|
294 throws if the variation feature is not contained in the motif feature
|
|
295 Caller : General
|
|
296 Status : At High risk
|
|
297
|
|
298 =cut
|
|
299
|
|
300 sub infer_variation_consequence{
|
|
301 my ($self, $vf, $linear) = (shift, shift, shift);
|
|
302
|
|
303 if(! $vf->isa('Bio::EnsEMBL::Variation::VariationFeature')){
|
|
304 throw "We expect a Bio::EnsEMBL::Variation::VariationFeature object, not a ".$vf->class;
|
|
305 }
|
|
306
|
|
307 #See if these checks are required or if there are more efficient ways to do the checks...
|
|
308 #if(($self->slice->seq_region_name ne $vf->slice->seq_region_name) ||
|
|
309 # ($self->slice->start != $vf->slice->start) ||
|
|
310 # ($self->slice->end != $vf->slice->end) ){
|
|
311 # throw "Variation and Motif are on distinct slices";
|
|
312 #}
|
|
313 #if(!(($vf->start >= $self->start) && ($vf->end <= $self->end ))){
|
|
314 # throw "Variation should be entirely contained in the Motif";
|
|
315 #}
|
|
316
|
|
317 if( ($vf->start < 1) || ($vf->end > $self->binding_matrix->length)){ throw "Variation not entirely contained in the motif feature"; }
|
|
318
|
|
319 if(!($vf->allele_string =~ /^[ACTG]\/[ACTG]$/)){ throw "Currently only SNPs are supported"; }
|
|
320
|
|
321 my $ref_seq = $self->seq;
|
|
322
|
|
323 my $variant = $vf->allele_string;
|
|
324 $variant =~ s/^.*\///;
|
|
325 $variant =~ s/\s*$//;
|
|
326
|
|
327 my ($vf_start,$vf_end) = ($vf->start, $vf->end);
|
|
328 if($vf->strand == -1){
|
|
329 #Needed for insertions
|
|
330 $variant = reverse($variant);
|
|
331 $variant =~ tr/ACGT/TGCA/;
|
|
332 }
|
|
333 my $var_seq = substr($ref_seq,0, $vf_start - 1).$variant.substr($ref_seq, $vf_start+length($variant)-1);
|
|
334
|
|
335 my $bm = $self->{'binding_matrix'};
|
|
336 return 100 * ($bm->relative_affinity($var_seq,$linear) - $bm->relative_affinity($ref_seq,$linear));
|
|
337
|
|
338 }
|
|
339
|
|
340 =head2 interdb_stable_id
|
|
341
|
|
342 Arg [1] : (optional) int - stable_id e.g 1
|
|
343 Example : my $idb_sid = $feature->interdb_stable_id();
|
|
344 Description: Getter for the interdb_stable_id attribute for this feature.
|
|
345 This is simply to avoid using internal db IDs for inter DB linking
|
|
346 Returntype : int
|
|
347 Exceptions : None
|
|
348 Caller : General
|
|
349 Status : At Risk
|
|
350
|
|
351 =cut
|
|
352
|
|
353 sub interdb_stable_id {
|
|
354 return $_[0]->{'interdb_stable_id'};
|
|
355 }
|
|
356
|
|
357
|
|
358
|
|
359 1;
|
|
360
|