annotate variant_effect_predictor/Bio/EnsEMBL/Variation/VariationFeatureOverlapAllele.pm @ 0:21066c0abaf5 draft

Uploaded
author willmclaren
date Fri, 03 Aug 2012 10:04:48 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1 =head1 LICENSE
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
2
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
3 Copyright (c) 1999-2012 The European Bioinformatics Institute and
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
4 Genome Research Limited. All rights reserved.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
5
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
6 This software is distributed under a modified Apache license.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
7 For license details, please see
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
8
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
9 http://www.ensembl.org/info/about/code_licence.html
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
10
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
11 =head1 CONTACT
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
12
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
13 Please email comments or questions to the public Ensembl
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
14 developers list at <dev@ensembl.org>.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
15
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
16 Questions may also be sent to the Ensembl help desk at
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
17 <helpdesk@ensembl.org>.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
18
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
19 =cut
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
20
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
21 =head1 NAME
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
22
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
23 Bio::EnsEMBL::Variation::VariationFeatureOverlapAllele
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
24
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
25 =head1 SYNOPSIS
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
26
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
27 use Bio::EnsEMBL::Variation::VariationFeatureOverlapAllele;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
28
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
29 my $vfoa = Bio::EnsEMBL::Variation::VariationFeatureOverlapAllele->new(
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
30 -variation_feature_overlap => $vfo,
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
31 -variation_feature_seq => 'A',
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
32 -is_reference => 0,
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
33 );
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
34
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
35 print "sequence with respect to the feature: ", $vfoa->feature_seq, "\n";
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
36 print "sequence with respect to the variation feature: ", $vfoa->variation_feature_seq, "\n";
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
37 print "consequence SO terms: ", (join ",", map { $_->SO_term } @{ $vfoa->get_all_OverlapConsequences }), "\n";
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
38
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
39 =head1 DESCRIPTION
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
40
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
41 A VariationFeatureOverlapAllele object represents a single allele of a
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
42 VariationFeatureOverlap. It is the super-class of various feature-specific allele
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
43 classes such as TranscriptVariationAllele and RegulatoryFeatureVariationAllele and
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
44 contains methods not specific to any particular feature type. Ordinarily you will
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
45 not create these objects yourself, but instead you would create e.g. a
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
46 TranscriptVariation object which will then create VariationFeatureOverlapAlleles
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
47 based on the allele string of the associated VariationFeature.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
48
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
49 =cut
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
50
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
51 package Bio::EnsEMBL::Variation::VariationFeatureOverlapAllele;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
52
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
53 use strict;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
54 use warnings;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
55
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
56 use Bio::EnsEMBL::Utils::Argument qw(rearrange);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
57 use Bio::EnsEMBL::Utils::Scalar qw(assert_ref);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
58 use Bio::EnsEMBL::Utils::Exception qw(throw);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
59 use Bio::EnsEMBL::Utils::Sequence qw(reverse_comp);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
60
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
61 use base qw(Bio::EnsEMBL::Variation::BaseVariationFeatureOverlapAllele);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
62
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
63 our $UNAMBIGUOUS_NUCLEOTIDES = qr/^[ACGT-]+$/i;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
64
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
65 our $ALL_NUCLEOTIDES = qr/^[ACGTUMRWSYKVHDBXN-]+$/i;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
66
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
67 our $SPECIFIED_LENGTH = qr /(\d+) BP (INSERTION|DELETION)/i;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
68
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
69 =head2 new
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
70
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
71 Arg [-VARIATION_FEATURE_OVERLAP] :
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
72 The Bio::EnsEMBL::VariationFeatureOverlap with which this allele is
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
73 associated
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
74
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
75 Arg [-VARIATION_FEATURE_SEQ] :
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
76 The allele sequence with respect to the associated VariationFeature
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
77
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
78 Arg [-IS_REFERENCE] :
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
79 A flag indicating if this allele is the reference allele or not
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
80
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
81 Example :
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
82 my $vfoa = Bio::EnsEMBL::Variation::VariationFeatureOverlapAllele->new(
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
83 -variation_feature_ovelap => $vfo,
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
84 -variation_feature_seq => 'A',
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
85 -is_reference => 0
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
86 );
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
87
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
88 Description: Constructs a new VariationFeatureOverlapAllele instance given a
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
89 VariationFeatureOverlap and the sequence of the allele
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
90 Returntype : A new Bio::EnsEMBL::Variation::VariationFeatureOverlapAllele instance
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
91 Exceptions : throws unless both VARIATION_FEATURE_OVERLAP and VARIATION_FEATURE_SEQ
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
92 are supplied
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
93 Status : At Risk
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
94
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
95 =cut
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
96
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
97 sub new {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
98 my $class = shift;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
99
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
100 my %args = @_;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
101
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
102 # swap a '-variation_feature_overlap' argument for a '-base_variation_feature_overlap'
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
103 # and a '-variation_feature' for a '-base_variation_feature' for the superclass
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
104
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
105 for my $arg (keys %args) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
106 if (lc($arg) eq '-variation_feature_overlap') {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
107 $args{'-base_variation_feature_overlap'} = delete $args{$arg};
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
108 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
109 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
110
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
111 my $self = $class->SUPER::new(%args);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
112
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
113 assert_ref($self->base_variation_feature_overlap, 'Bio::EnsEMBL::Variation::VariationFeatureOverlap');
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
114
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
115 my (
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
116 $variation_feature_seq,
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
117 ) = rearrange([qw(
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
118 VARIATION_FEATURE_SEQ
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
119 )], %args);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
120
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
121
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
122 throw("Allele sequence required (variation "+$self->variation_feature->variation_name+")")
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
123 unless $variation_feature_seq;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
124
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
125 $self->{variation_feature_seq} = $variation_feature_seq;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
126
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
127 return $self;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
128 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
129
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
130 sub new_fast {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
131 my ($class, $hashref) = @_;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
132
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
133 # swap a variation_feature_overlap argument for a base_variation_feature_overlap one
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
134
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
135 if ($hashref->{variation_feature_overlap}) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
136 $hashref->{base_variation_feature_overlap} = delete $hashref->{variation_feature_overlap};
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
137 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
138
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
139 # and call the superclass
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
140
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
141 return $class->SUPER::new_fast($hashref);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
142 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
143
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
144 =head2 dbID
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
145
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
146 Description: Get/set the dbID of this VariationFeatureOverlapAllele
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
147 Returntype : integer
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
148 Exceptions : none
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
149 Status : At Risk
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
150
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
151 =cut
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
152
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
153 sub dbID {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
154 my ($self, $dbID) = @_;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
155 $self->{dbID} = $dbID if defined $dbID;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
156 return $self->{dbID};
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
157 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
158
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
159 =head2 variation_feature_overlap
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
160
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
161 Description: Get/set the associated VariationFeatureOverlap
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
162 Returntype : Bio::EnsEMBL::Variation::VariationFeatureOverlap
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
163 Exceptions : throws if the argument is the wrong type
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
164 Status : At Risk
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
165
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
166 =cut
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
167
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
168 sub variation_feature_overlap {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
169 my ($self, $variation_feature_overlap) = @_;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
170
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
171 if ($variation_feature_overlap) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
172 assert_ref($variation_feature_overlap, 'Bio::EnsEMBL::Variation::VariationFeatureOverlap');
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
173 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
174
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
175 return $self->base_variation_feature_overlap($variation_feature_overlap);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
176 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
177
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
178 =head2 variation_feature
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
179
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
180 Description: Get the associated VariationFeature
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
181 Returntype : Bio::EnsEMBL::Variation::VariationFeature
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
182 Exceptions : none
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
183 Status : At Risk
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
184
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
185 =cut
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
186
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
187 sub variation_feature {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
188 my $self = shift;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
189 return $self->variation_feature_overlap->variation_feature;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
190 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
191
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
192 =head2 feature_seq
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
193
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
194 Description: Get the sequence of this allele relative to the associated Feature.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
195 This will be the same as the variation_feature_seq when the associated
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
196 VariationFeature is on the same strand as the Feature, or the reverse
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
197 complement when the strands differ.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
198 Returntype : string
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
199 Exceptions : none
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
200 Status : At Risk
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
201
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
202 =cut
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
203
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
204 sub feature_seq {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
205 my $self = shift;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
206
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
207 unless ($self->{feature_seq}) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
208
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
209 # check if we need to reverse complement the variation_feature_seq
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
210
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
211 if (($self->variation_feature->strand != $self->feature->strand) && $self->seq_is_dna) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
212 my $vf_seq = $self->variation_feature_seq;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
213 reverse_comp(\$vf_seq);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
214 $self->{feature_seq} = $vf_seq;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
215 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
216 else {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
217 $self->{feature_seq} = $self->{variation_feature_seq};
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
218 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
219 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
220
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
221 return $self->{feature_seq};
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
222 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
223
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
224 =head2 variation_feature_seq
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
225
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
226 Args [1] : The allele sequence relative to the VariationFeature
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
227 Description: Get/set the sequence of this allele relative to the associated VariationFeature.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
228 Returntype : string
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
229 Exceptions : none
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
230 Status : At Risk
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
231
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
232 =cut
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
233
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
234 sub variation_feature_seq {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
235 # the sequence of this allele relative to the variation feature
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
236 my ($self, $variation_feature_seq) = @_;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
237 $self->{variation_feature_seq} = $variation_feature_seq if $variation_feature_seq;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
238 return $self->{variation_feature_seq};
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
239 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
240
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
241 =head2 seq_is_unambiguous_dna
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
242
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
243 Description: identify if the sequence of this allele is unambiguous DNA
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
244 i.e. if we can meaningfully translate it
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
245 Returntype : bool
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
246 Exceptions : none
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
247 Status : At Risk
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
248
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
249 =cut
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
250
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
251 sub seq_is_unambiguous_dna {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
252 my $self = shift;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
253
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
254 unless (defined $self->{seq_is_unambiguous_dna}) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
255 $self->{seq_is_unambiguous_dna} =
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
256 $self->{variation_feature_seq} =~ /$UNAMBIGUOUS_NUCLEOTIDES/ ? 1 : 0;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
257 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
258
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
259 return $self->{seq_is_unambiguous_dna};
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
260 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
261
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
262 =head2 seq_is_dna
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
263
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
264 Description: identify if the sequence of this allele is DNA including ambiguity
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
265 codes, use seq_is_unambiguous_dna to check for alleles that do not
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
266 include ambiguity codes
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
267 Returntype : bool
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
268 Exceptions : none
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
269 Status : At Risk
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
270
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
271 =cut
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
272
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
273 sub seq_is_dna {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
274 my $self = shift;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
275
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
276 unless (defined $self->{seq_is_dna}) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
277 $self->{seq_is_dna} =
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
278 $self->{variation_feature_seq} =~ /$ALL_NUCLEOTIDES/ ? 1 : 0;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
279 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
280
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
281 return $self->{seq_is_dna};
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
282 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
283
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
284 =head2 seq_length
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
285
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
286 Description: return the length of this allele sequence, this is better than
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
287 just using length($vfoa->feature_seq) because we check if the
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
288 sequence is valid DNA, and also look for allele strings like
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
289 "(3 BP INSERTION)" to determine the length
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
290 Returntype : int or undef if we cannot determine the length
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
291 Exceptions : none
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
292 Status : At Risk
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
293
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
294 =cut
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
295
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
296 sub seq_length {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
297 my $self = shift;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
298
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
299 my $seq = $self->variation_feature_seq;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
300
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
301 if ($self->seq_is_dna) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
302 if ($seq eq '-') {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
303 return 0;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
304 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
305 else {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
306 return length($seq);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
307 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
308 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
309 elsif ($seq =~ /$SPECIFIED_LENGTH/) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
310 return $1;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
311 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
312
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
313 return undef;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
314 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
315
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
316 =head2 allele_string
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
317
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
318 Description: Return a '/' delimited string of the reference allele variation_feature_seq
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
319 and the variation_feature_seq of this allele
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
320 Returntype : string
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
321 Exceptions : none
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
322 Status : At Risk
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
323
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
324 =cut
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
325
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
326 sub allele_string {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
327 my $self = shift;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
328
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
329 my $ref = $self->variation_feature_overlap->get_reference_VariationFeatureOverlapAllele->variation_feature_seq;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
330
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
331 # for the HGMDs and CNV probes where the alleles are artificially set to be
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
332 # the same, just return the reference sequence
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
333
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
334 if ($ref eq $self->variation_feature_seq) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
335 return $ref;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
336 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
337 else {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
338 return $ref.'/'.$self->variation_feature_seq;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
339 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
340 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
341
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
342
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
343 sub _convert_to_sara {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
344 my $self = shift;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
345
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
346 my $oc = Bio::EnsEMBL::Variation::OverlapConsequence->new_fast({
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
347 'label' => 'SARA',
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
348 'description' => 'Same as reference allele',
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
349 'rank' => '99',
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
350 'display_term' => 'SARA',
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
351 'SO_term' => 'SARA',
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
352 });
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
353
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
354 $self->add_OverlapConsequence($oc);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
355
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
356 return $self;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
357 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
358
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
359 1;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
360