0
|
1 =head1 LICENSE
|
|
2
|
|
3 Copyright (c) 1999-2012 The European Bioinformatics Institute and
|
|
4 Genome Research Limited. All rights reserved.
|
|
5
|
|
6 This software is distributed under a modified Apache license.
|
|
7 For license details, please see
|
|
8
|
|
9 http://www.ensembl.org/info/about/code_licence.html
|
|
10
|
|
11 =head1 CONTACT
|
|
12
|
|
13 Please email comments or questions to the public Ensembl
|
|
14 developers list at <dev@ensembl.org>.
|
|
15
|
|
16 Questions may also be sent to the Ensembl help desk at
|
|
17 <helpdesk@ensembl.org>.
|
|
18
|
|
19 =cut
|
|
20
|
|
21 package Bio::EnsEMBL::Variation::MotifFeatureVariationAllele;
|
|
22
|
|
23 use strict;
|
|
24 use warnings;
|
|
25
|
|
26 use Bio::EnsEMBL::Variation::Utils::VariationEffect qw(overlap);
|
|
27 use Bio::EnsEMBL::Utils::Scalar qw(assert_ref);
|
|
28
|
|
29 use base qw(Bio::EnsEMBL::Variation::VariationFeatureOverlapAllele);
|
|
30
|
|
31 sub new_fast {
|
|
32 my ($self, $hashref) = @_;
|
|
33
|
|
34 # swap a motif_feature_variation argument for a variation_feature_overlap one
|
|
35
|
|
36 if ($hashref->{motif_feature_variation}) {
|
|
37 $hashref->{variation_feature_overlap} = delete $hashref->{motif_feature_variation};
|
|
38 }
|
|
39
|
|
40 # and call the superclass
|
|
41
|
|
42 return $self->SUPER::new_fast($hashref);
|
|
43 }
|
|
44
|
|
45 =head2 motif_feature_variation
|
|
46
|
|
47 Description: Get/set the associated MotifFeatureVariation
|
|
48 Returntype : Bio::EnsEMBL::Variation::MotifFeatureVariation
|
|
49 Status : At Risk
|
|
50
|
|
51 =cut
|
|
52
|
|
53 sub motif_feature_variation {
|
|
54 my ($self, $mfv) = shift;
|
|
55 assert_ref($mfv, 'Bio::EnsEMBL::Variation::MotifFeatureVariation') if $mfv;
|
|
56 return $self->variation_feature_overlap($mfv);
|
|
57 }
|
|
58
|
|
59 =head2 motif_feature
|
|
60
|
|
61 Description: Get/set the associated MotifFeature
|
|
62 Returntype : Bio::EnsEMBL::Funcgen::MotifFeature
|
|
63 Status : At Risk
|
|
64
|
|
65 =cut
|
|
66
|
|
67 sub motif_feature {
|
|
68 my $self = shift;
|
|
69 return $self->motif_feature_variation->motif_feature;
|
|
70 }
|
|
71
|
|
72 =head2 motif_start
|
|
73
|
|
74 Description: Get the (1-based) relative position of the variation feature start in the motif
|
|
75 Returntype : integer
|
|
76 Status : At Risk
|
|
77
|
|
78 =cut
|
|
79
|
|
80 sub motif_start {
|
|
81
|
|
82 my $self = shift;
|
|
83
|
|
84 my $mf = $self->motif_feature;
|
|
85 my $vf = $self->variation_feature;
|
|
86
|
|
87 return undef unless defined $vf->seq_region_start && defined $mf->seq_region_start;
|
|
88
|
|
89 my $mf_start = $vf->seq_region_start - $mf->seq_region_start + 1;
|
|
90
|
|
91 # adjust if the motif is on the reverse strand
|
|
92
|
|
93 $mf_start = $mf->binding_matrix->length - $mf_start + 1 if $mf->strand < 0;
|
|
94
|
|
95 # check that we're in bounds
|
|
96
|
|
97 return undef if $mf_start > $mf->length;
|
|
98
|
|
99 return $mf_start;
|
|
100 }
|
|
101
|
|
102 =head2 motif_end
|
|
103
|
|
104 Description: Get the (1-based) relative position of the variation feature end in the motif
|
|
105 Returntype : integer
|
|
106 Status : At Risk
|
|
107
|
|
108 =cut
|
|
109
|
|
110 sub motif_end {
|
|
111
|
|
112 my $self = shift;
|
|
113
|
|
114 my $mf = $self->motif_feature;
|
|
115 my $vf = $self->variation_feature;
|
|
116
|
|
117 return undef unless defined $vf->seq_region_end && defined $mf->seq_region_start;
|
|
118
|
|
119 my $mf_end = $vf->seq_region_end - $mf->seq_region_start + 1;
|
|
120
|
|
121 # adjust if the motif is on the reverse strand
|
|
122
|
|
123 $mf_end = $mf->binding_matrix->length - $mf_end + 1 if $mf->strand < 0;
|
|
124
|
|
125 # check that we're in bounds
|
|
126
|
|
127 return undef if $mf_end < 1;
|
|
128
|
|
129 return $mf_end;
|
|
130 }
|
|
131
|
|
132 =head2 in_informative_position
|
|
133
|
|
134 Description: Identify if the variation feature falls in a high information position of the motif
|
|
135 Returntype : boolean
|
|
136 Status : At Risk
|
|
137
|
|
138 =cut
|
|
139
|
|
140 sub in_informative_position {
|
|
141 my $self = shift;
|
|
142
|
|
143 # we can only call this for true SNPs
|
|
144
|
|
145 my $vf = $self->variation_feature;
|
|
146
|
|
147 unless (($vf->start == $vf->end) && ($self->variation_feature_seq ne '-')) {
|
|
148 return undef;
|
|
149 }
|
|
150
|
|
151 # get the 1-based position
|
|
152
|
|
153 my $start = $self->motif_start;
|
|
154
|
|
155 return undef unless defined $start && $start >= 1 && $start <= $self->motif_feature->length;
|
|
156
|
|
157 return $self->motif_feature->binding_matrix->is_position_informative($start);
|
|
158 }
|
|
159
|
|
160 =head2 motif_score_delta
|
|
161
|
|
162 Description: Calculate the difference in motif score between the reference and variant sequences
|
|
163 Returntype : float
|
|
164 Status : At Risk
|
|
165
|
|
166 =cut
|
|
167
|
|
168 sub motif_score_delta {
|
|
169
|
|
170 my $self = shift;
|
|
171 my $linear = shift;
|
|
172
|
|
173 unless ($self->{motif_score_delta}) {
|
|
174
|
|
175 my $vf = $self->motif_feature_variation->variation_feature;
|
|
176 my $mf = $self->motif_feature;
|
|
177
|
|
178 my $allele_seq = $self->feature_seq;
|
|
179 my $ref_allele_seq = $self->motif_feature_variation->get_reference_MotifFeatureVariationAllele->feature_seq;
|
|
180
|
|
181 if ($allele_seq eq '-' ||
|
|
182 $ref_allele_seq eq '-' ||
|
|
183 length($allele_seq) != length($ref_allele_seq)) {
|
|
184 # we can't call a score because the sequence will change length
|
|
185 return undef;
|
|
186 }
|
|
187
|
|
188 my ($mf_start, $mf_end) = ($self->motif_start, $self->motif_end);
|
|
189
|
|
190 return undef unless defined $mf_start && defined $mf_end;
|
|
191
|
|
192 my $mf_seq = $self->motif_feature_variation->_motif_feature_seq;
|
|
193 my $mf_seq_length = length($mf_seq);
|
|
194
|
|
195 # trim allele_seq
|
|
196 if($mf_start < 1) {
|
|
197 $allele_seq = substr($allele_seq, 1 - $mf_start);
|
|
198 $mf_start = 1;
|
|
199 }
|
|
200
|
|
201 if($mf_end > $mf_seq_length) {
|
|
202 $allele_seq = substr($allele_seq, 0, $mf_seq_length - $mf_start + 1);
|
|
203 $mf_end = $mf_seq_length;
|
|
204 }
|
|
205
|
|
206 my $var_len = length($allele_seq);
|
|
207
|
|
208 return undef if $var_len > $mf->length;
|
|
209
|
|
210 my $matrix = $mf->binding_matrix;
|
|
211
|
|
212 # get the binding affinity of the reference sequence
|
|
213 my $ref_affinity = $matrix->relative_affinity($mf_seq, $linear);
|
|
214
|
|
215 # splice in the variant sequence (0-based)
|
|
216 substr($mf_seq, $mf_start - 1, $var_len) = $allele_seq;
|
|
217
|
|
218 # check length hasn't changed
|
|
219 return undef if length($mf_seq) != $mf_seq_length;
|
|
220
|
|
221 # and get the affinity of the variant sequence
|
|
222 my $var_affinity = $matrix->relative_affinity($mf_seq, $linear);
|
|
223
|
|
224 $self->{motif_score_delta} = ($var_affinity - $ref_affinity);
|
|
225 }
|
|
226
|
|
227 return $self->{motif_score_delta};
|
|
228 }
|
|
229
|
|
230 1;
|