Mercurial > repos > mahtabm > ensembl
comparison variant_effect_predictor/Bio/EnsEMBL/Funcgen/MotifFeature.pm @ 0:1f6dce3d34e0
Uploaded
author | mahtabm |
---|---|
date | Thu, 11 Apr 2013 02:01:53 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:1f6dce3d34e0 |
---|---|
1 # | |
2 # Ensembl module for Bio::EnsEMBL::Funcgen::MotifFeature | |
3 # | |
4 # You may distribute this module under the same terms as Perl itself | |
5 | |
6 =head1 NAME | |
7 | |
8 Bio::EnsEMBL::MotifFeature - A module to represent a feature mapping as based | |
9 on a binding matrix e.g position weight matrix | |
10 | |
11 =head1 SYNOPSIS | |
12 | |
13 use Bio::EnsEMBL::Funcgen::MotifFeature; | |
14 | |
15 my $feature = Bio::EnsEMBL::Funcgen::MotifFeature->new( | |
16 -SLICE => $chr_1_slice, | |
17 -START => 1_000_000, | |
18 -END => 1_000_024, | |
19 -STRAND => -1, | |
20 -DISPLAY_LABEL => $text, | |
21 -SCORE => $score, | |
22 -FEATURE_TYPE => $ftype, | |
23 -INTERDB_STABLE_ID => 1, | |
24 ); | |
25 | |
26 | |
27 | |
28 =head1 DESCRIPTION | |
29 | |
30 A MotifFeature object represents the genomic placement of a sequence motif. | |
31 For example a transcription factor binding site motif associated with a | |
32 position weight matrix. These are generally associated with AnnotatedFeatures | |
33 of the corresponding FeatureType. | |
34 | |
35 | |
36 =head1 SEE ALSO | |
37 | |
38 Bio::EnsEMBL::Funcgen::DBSQL::MotifFeatureAdaptor | |
39 | |
40 | |
41 =head1 LICENSE | |
42 | |
43 Copyright (c) 1999-2009 The European Bioinformatics Institute and | |
44 Genome Research Limited. All rights reserved. | |
45 | |
46 This software is distributed under a modified Apache license. | |
47 For license details, please see | |
48 | |
49 http://www.ensembl.org/info/about/code_licence.html | |
50 | |
51 =head1 CONTACT | |
52 | |
53 Please email comments or questions to the public Ensembl | |
54 developers list at <ensembl-dev@ebi.ac.uk>. | |
55 | |
56 Questions may also be sent to the Ensembl help desk at | |
57 <helpdesk@ensembl.org>. | |
58 | |
59 | |
60 =cut | |
61 | |
62 use strict; | |
63 use warnings; | |
64 | |
65 package Bio::EnsEMBL::Funcgen::MotifFeature; | |
66 | |
67 use Bio::EnsEMBL::Utils::Argument qw( rearrange ); | |
68 use Bio::EnsEMBL::Utils::Exception qw( throw ); | |
69 use Bio::EnsEMBL::Feature; | |
70 use Bio::EnsEMBL::Funcgen::Storable; | |
71 | |
72 | |
73 use vars qw(@ISA); | |
74 @ISA = qw(Bio::EnsEMBL::Feature Bio::EnsEMBL::Funcgen::Storable); | |
75 | |
76 | |
77 =head2 new | |
78 | |
79 | |
80 Arg [-SCORE] : (optional) int - Score assigned by analysis pipeline | |
81 Arg [-SLICE] : Bio::EnsEMBL::Slice - The slice on which this feature is. | |
82 Arg [-BINDING_MATRIX] : Bio::EnsEMBL::Funcgen::BindingMatrix - Binding Matrix associated to this feature. | |
83 Arg [-START] : int - The start coordinate of this feature relative to the start of the slice | |
84 it is sitting on. Coordinates start at 1 and are inclusive. | |
85 Arg [-END] : int -The end coordinate of this feature relative to the start of the slice | |
86 it is sitting on. Coordinates start at 1 and are inclusive. | |
87 Arg [-DISPLAY_LABEL] : string - Display label for this feature | |
88 Arg [-STRAND] : int - The orientation of this feature. Valid values are 1, -1 and 0. | |
89 Arg [-dbID] : (optional) int - Internal database ID. | |
90 Arg [-ADAPTOR] : (optional) Bio::EnsEMBL::DBSQL::BaseAdaptor - Database adaptor. | |
91 | |
92 Example : my $feature = Bio::EnsEMBL::Funcgen::MotifFeature->new( | |
93 -SLICE => $chr_1_slice, | |
94 -START => 1_000_000, | |
95 -END => 1_000_024, | |
96 -STRAND => -1, | |
97 -BINDING_MATRIX => $bm, | |
98 -DISPLAY_LABEL => $text, | |
99 -SCORE => $score, | |
100 -INTERDB_STABLE_ID => 1, | |
101 ); | |
102 | |
103 | |
104 Description: Constructor for MotifFeature objects. | |
105 Returntype : Bio::EnsEMBL::Funcgen::MotifFeature | |
106 Exceptions : Throws if BindingMatrix not valid | |
107 Caller : General | |
108 Status : Medium Risk | |
109 | |
110 =cut | |
111 | |
112 sub new { | |
113 my $caller = shift; | |
114 | |
115 my $class = ref($caller) || $caller; | |
116 my $self = $class->SUPER::new(@_); | |
117 my $bmatrix; | |
118 ($self->{'score'}, $bmatrix, $self->{'display_label'}, $self->{'interdb_stable_id'}) | |
119 = rearrange(['SCORE', 'BINDING_MATRIX', 'DISPLAY_LABEL', 'INTERDB_STABLE_ID'], @_); | |
120 | |
121 | |
122 if(! (ref($bmatrix) && $bmatrix->isa('Bio::EnsEMBL::Funcgen::BindingMatrix'))){ | |
123 throw('You must pass be a valid Bio::EnsEMBL::Funcgen::BindingMatrix'); | |
124 } | |
125 | |
126 $self->{'binding_matrix'} = $bmatrix; | |
127 | |
128 return $self; | |
129 } | |
130 | |
131 =head2 new_fast | |
132 | |
133 Args : Hashref with all internal attributes set | |
134 Example : none | |
135 Description: Quick and dirty version of new. Only works if the calling code | |
136 is very disciplined. | |
137 Returntype : Bio::EnsEMBL::Funcgen::MotifFeature | |
138 Exceptions : None | |
139 Caller : General | |
140 Status : At Risk | |
141 | |
142 =cut | |
143 | |
144 sub new_fast { | |
145 return bless ($_[1], $_[0]); | |
146 } | |
147 | |
148 | |
149 | |
150 =head2 binding_matrix | |
151 | |
152 Example : my $bmatrix_name = $mfeat->binding_matrix->name; | |
153 Description: Getter for the BindingMatrix attribute for this feature. | |
154 Returntype : Bio::EnsEMBL::Funcgen::BindingMatrix | |
155 Exceptions : None | |
156 Caller : General | |
157 Status : At risk | |
158 | |
159 =cut | |
160 | |
161 sub binding_matrix{ | |
162 return $_[0]->{'binding_matrix'}; | |
163 } | |
164 | |
165 =head2 score | |
166 | |
167 Example : my $score = $feature->score(); | |
168 Description: Getter for the score attribute for this feature. | |
169 Returntype : double | |
170 Exceptions : None | |
171 Caller : General | |
172 Status : Low Risk | |
173 | |
174 =cut | |
175 | |
176 sub score { | |
177 return $_[0]->{'score'}; | |
178 } | |
179 | |
180 | |
181 =head2 display_label | |
182 | |
183 Example : my $label = $feature->display_label(); | |
184 Description: Getter for the display label of this feature. | |
185 Returntype : str | |
186 Exceptions : None | |
187 Caller : General | |
188 Status : Medium Risk | |
189 | |
190 =cut | |
191 | |
192 sub display_label { | |
193 #If not set in new before store, a default is stored as: | |
194 #$mf->binding_matrix->feature_type->name.':'.$mf->binding_matrix->name(); | |
195 | |
196 return $_[0]->{'display_label'}; | |
197 } | |
198 | |
199 | |
200 | |
201 =head2 associated_annotated_features | |
202 | |
203 Example : my @associated_afs = @{$feature->associated_annotated_features()}; | |
204 Description: Getter/Setter for associated AnntoatedFeatures. | |
205 Returntype : ARRAYREF of Bio::EnsEMBL::Funcgen:AnnotatedFeature objects | |
206 Exceptions : None | |
207 Caller : General | |
208 Status : At risk - may change to associated_transcript_factor_features | |
209 | |
210 =cut | |
211 | |
212 sub associated_annotated_features{ | |
213 my ($self, $afs) = @_; | |
214 | |
215 #Lazy load as we don't want to have to do a join on all features when most will not have any | |
216 | |
217 | |
218 if(defined $afs){ | |
219 | |
220 if(ref($afs) eq 'ARRAY'){ | |
221 | |
222 foreach my $af(@$afs){ | |
223 | |
224 if( ! $af->isa('Bio::EnsEMBL::Funcgen::AnnotatedFeature') ){ | |
225 throw('You must pass and ARRAYREF of stored Bio::EnsEMBL::Funcgen::AnnotatedFeature objects'); | |
226 } | |
227 #test is stored in adaptor | |
228 } | |
229 | |
230 if(defined $self->{'associated_annotated_features'}){ | |
231 warn('You are overwriting associated_annotated_features for the MotifFeature'); | |
232 #we could simply add the new ones and make them NR. | |
233 } | |
234 | |
235 $self->{'associated_annotated_features'} = $afs; | |
236 } | |
237 else{ | |
238 throw('You must pass and ARRAYREF of stored Bio::EnsEMBL::Funcgen::AnnotatedFeature objects'); | |
239 } | |
240 } | |
241 | |
242 | |
243 if(! defined $self->{'associated_annotated_features'}){ | |
244 | |
245 if(defined $self->adaptor){ | |
246 $self->{'associated_annotated_features'} = | |
247 $self->adaptor->db->get_AnnotatedFeatureAdaptor->fetch_all_by_associated_MotifFeature($self); | |
248 } | |
249 } | |
250 | |
251 #This has the potential to return undef, or an arrayref which may be empty. | |
252 return $self->{'associated_annotated_features'}; | |
253 } | |
254 | |
255 | |
256 =head2 is_position_informative | |
257 | |
258 Arg [1] : int - 1-based position within the Motif | |
259 Example : $mf->is_position_informative($pos); | |
260 Description: Indicates if a given position within the motif is highly informative | |
261 Returntype : boolean | |
262 Exceptions : throws if position out of bounds ( < 1 or > length of motif) | |
263 Caller : General | |
264 Status : At High risk | |
265 | |
266 =cut | |
267 | |
268 sub is_position_informative { | |
269 my ($self,$position) = (shift,shift); | |
270 throw "Need a position" if(!defined($position)); | |
271 throw "Position out of bounds" if(($position<1) || ($position>$self->binding_matrix->length)); | |
272 #if on the opposite strand, then need to reverse complement the position | |
273 if($self->strand < 0){ $position = $self->binding_matrix->length - $position + 1; } | |
274 return $self->binding_matrix->is_position_informative($position); | |
275 } | |
276 | |
277 | |
278 =head2 infer_variation_consequence | |
279 | |
280 Arg [1] : Bio::EnsEMBL::Variation::VariationFeature | |
281 Arg [2] : boolean - 1 if results in linear scale (default is log scale) | |
282 Example : my $vfs = $vf_adaptor->fetch_all_by_Slice($slice_adaptor->fetch_by_region('toplevel',$mf->seq_region_name,$mf->start,$mf->end,$mf->strand)); | |
283 foreach my $vf (@{$vfs}){ | |
284 print $mf->infer_variation_consequence($vf)."\n"; | |
285 } | |
286 | |
287 Description: Calculates the potential influence of a given variation in a motif feature. | |
288 Returns a value between -100% (lost) and +100% (gain) indicating the difference | |
289 in strength between the motif in the reference and after the variation. | |
290 | |
291 The variation feature slice needs to be the motif feature, including the strand | |
292 Returntype : float | |
293 Exceptions : throws if argument is not a Bio::EnsEMBL::Variation::VariationFeature | |
294 throws if the variation feature is not contained in the motif feature | |
295 Caller : General | |
296 Status : At High risk | |
297 | |
298 =cut | |
299 | |
300 sub infer_variation_consequence{ | |
301 my ($self, $vf, $linear) = (shift, shift, shift); | |
302 | |
303 if(! $vf->isa('Bio::EnsEMBL::Variation::VariationFeature')){ | |
304 throw "We expect a Bio::EnsEMBL::Variation::VariationFeature object, not a ".$vf->class; | |
305 } | |
306 | |
307 #See if these checks are required or if there are more efficient ways to do the checks... | |
308 #if(($self->slice->seq_region_name ne $vf->slice->seq_region_name) || | |
309 # ($self->slice->start != $vf->slice->start) || | |
310 # ($self->slice->end != $vf->slice->end) ){ | |
311 # throw "Variation and Motif are on distinct slices"; | |
312 #} | |
313 #if(!(($vf->start >= $self->start) && ($vf->end <= $self->end ))){ | |
314 # throw "Variation should be entirely contained in the Motif"; | |
315 #} | |
316 | |
317 if( ($vf->start < 1) || ($vf->end > $self->binding_matrix->length)){ throw "Variation not entirely contained in the motif feature"; } | |
318 | |
319 if(!($vf->allele_string =~ /^[ACTG]\/[ACTG]$/)){ throw "Currently only SNPs are supported"; } | |
320 | |
321 my $ref_seq = $self->seq; | |
322 | |
323 my $variant = $vf->allele_string; | |
324 $variant =~ s/^.*\///; | |
325 $variant =~ s/\s*$//; | |
326 | |
327 my ($vf_start,$vf_end) = ($vf->start, $vf->end); | |
328 if($vf->strand == -1){ | |
329 #Needed for insertions | |
330 $variant = reverse($variant); | |
331 $variant =~ tr/ACGT/TGCA/; | |
332 } | |
333 my $var_seq = substr($ref_seq,0, $vf_start - 1).$variant.substr($ref_seq, $vf_start+length($variant)-1); | |
334 | |
335 my $bm = $self->{'binding_matrix'}; | |
336 return 100 * ($bm->relative_affinity($var_seq,$linear) - $bm->relative_affinity($ref_seq,$linear)); | |
337 | |
338 } | |
339 | |
340 =head2 interdb_stable_id | |
341 | |
342 Arg [1] : (optional) int - stable_id e.g 1 | |
343 Example : my $idb_sid = $feature->interdb_stable_id(); | |
344 Description: Getter for the interdb_stable_id attribute for this feature. | |
345 This is simply to avoid using internal db IDs for inter DB linking | |
346 Returntype : int | |
347 Exceptions : None | |
348 Caller : General | |
349 Status : At Risk | |
350 | |
351 =cut | |
352 | |
353 sub interdb_stable_id { | |
354 return $_[0]->{'interdb_stable_id'}; | |
355 } | |
356 | |
357 | |
358 | |
359 1; | |
360 |