0
|
1 # $Id: DNAMutation.pm,v 1.11 2002/10/22 07:38:49 lapp Exp $
|
|
2 #
|
|
3 # BioPerl module for Bio::Variation::DNAMutation
|
|
4 #
|
|
5 # Cared for by Heikki Lehvaslaiho <heikki@ebi.ac.uk>
|
|
6 #
|
|
7 # Copyright Heikki Lehvaslaiho
|
|
8 #
|
|
9 # You may distribute this module under the same terms as perl itself
|
|
10
|
|
11 # POD documentation - main docs before the code
|
|
12
|
|
13 =head1 NAME
|
|
14
|
|
15 Bio::Variation::DNAMutation - DNA level mutation class
|
|
16
|
|
17 =head1 SYNOPSIS
|
|
18
|
|
19 $dnamut = Bio::Variation::DNAMutation->new
|
|
20 ('-start' => $start,
|
|
21 '-end' => $end,
|
|
22 '-length' => $len,
|
|
23 '-upStreamSeq' => $upflank,
|
|
24 '-dnStreamSeq' => $dnflank,
|
|
25 '-proof' => $proof,
|
|
26 '-isMutation' => 1,
|
|
27 '-mut_number' => $mut_number
|
|
28 );
|
|
29 $a1 = Bio::Variation::Allele->new;
|
|
30 $a1->seq('a');
|
|
31 $dnamut->allele_ori($a1);
|
|
32 my $a2 = Bio::Variation::Allele->new;
|
|
33 $a2->seq('t');
|
|
34 $dnamut->add_Allele($a2);
|
|
35
|
|
36 print "Restriction changes are ", $dnamut->restriction_changes, "\n";
|
|
37
|
|
38 # add it to a SeqDiff container object
|
|
39 $seqdiff->add_Variant($dnamut);
|
|
40
|
|
41
|
|
42 =head1 DESCRIPTION
|
|
43
|
|
44 The instantiable class Bio::Variation::DNAMutation describes basic
|
|
45 sequence changes in genomic DNA level. It uses methods defined in
|
|
46 superclass Bio::Variation::VariantI. See L<Bio::Variation::VariantI>
|
|
47 for details.
|
|
48
|
|
49 If the variation described by a DNAMutation object is transcibed, link
|
|
50 the corresponding Bio::Variation::RNAChange object to it using
|
|
51 method RNAChange(). See L<Bio::Variation::RNAChange> for more information.
|
|
52
|
|
53 =head1 FEEDBACK
|
|
54
|
|
55 =head2 Mailing Lists
|
|
56
|
|
57 User feedback is an integral part of the evolution of this and other
|
|
58 Bioperl modules. Send your comments and suggestions preferably to the
|
|
59 Bioperl mailing lists Your participation is much appreciated.
|
|
60
|
|
61 bioperl-l@bioperl.org - General discussion
|
|
62 http://bio.perl.org/MailList.html - About the mailing lists
|
|
63
|
|
64 =head2 Reporting Bugs
|
|
65
|
|
66 report bugs to the Bioperl bug tracking system to help us keep track
|
|
67 the bugs and their resolution. Bug reports can be submitted via
|
|
68 email or the web:
|
|
69
|
|
70 bioperl-bugs@bio.perl.org
|
|
71 http://bugzilla.bioperl.org/
|
|
72
|
|
73 =head1 AUTHOR - Heikki Lehvaslaiho
|
|
74
|
|
75 Email: heikki@ebi.ac.uk
|
|
76 Address:
|
|
77
|
|
78 EMBL Outstation, European Bioinformatics Institute
|
|
79 Wellcome Trust Genome Campus, Hinxton
|
|
80 Cambs. CB10 1SD, United Kingdom
|
|
81
|
|
82 =head1 APPENDIX
|
|
83
|
|
84 The rest of the documentation details each of the object
|
|
85 methods. Internal methods are usually preceded with a _
|
|
86
|
|
87 =cut
|
|
88
|
|
89
|
|
90 # Let the code begin...
|
|
91
|
|
92
|
|
93 package Bio::Variation::DNAMutation;
|
|
94 my $VERSION=1.0;
|
|
95 use vars qw(@ISA);
|
|
96 use strict;
|
|
97
|
|
98 # Object preamble - inheritance
|
|
99 use Bio::Variation::VariantI;
|
|
100
|
|
101 @ISA = qw( Bio::Variation::VariantI );
|
|
102
|
|
103 sub new {
|
|
104 my($class,@args) = @_;
|
|
105 my $self = $class->SUPER::new(@args);
|
|
106
|
|
107 my ($start, $end, $length, $strand, $primary, $source,
|
|
108 $frame, $score, $gff_string,
|
|
109 $allele_ori, $allele_mut, $upstreamseq, $dnstreamseq,
|
|
110 $label, $status, $proof, $region, $region_value, $region_dist, $numbering,
|
|
111 $cpg, $mut_number, $ismutation) =
|
|
112 $self->_rearrange([qw(START
|
|
113 END
|
|
114 LENGTH
|
|
115 STRAND
|
|
116 PRIMARY
|
|
117 SOURCE
|
|
118 FRAME
|
|
119 SCORE
|
|
120 GFF_STRING
|
|
121 ALLELE_ORI
|
|
122 ALLELE_MUT
|
|
123 UPSTREAMSEQ
|
|
124 DNSTREAMSEQ
|
|
125 LABEL
|
|
126 STATUS
|
|
127 PROOF
|
|
128 REGION
|
|
129 REGION_VALUE
|
|
130 REGION_DIST
|
|
131 NUMBERING
|
|
132 CPG
|
|
133 MUT_NUMBER
|
|
134 ISMUTATION
|
|
135 )],
|
|
136 @args);
|
|
137
|
|
138 $self->primary_tag("Variation");
|
|
139
|
|
140 $self->{ 'alleles' } = [];
|
|
141
|
|
142 $start && $self->start($start);
|
|
143 $end && $self->end($end);
|
|
144 $length && $self->length($length);
|
|
145 $strand && $self->strand($strand);
|
|
146 $primary && $self->primary_tag($primary);
|
|
147 $source && $self->source_tag($source);
|
|
148 $frame && $self->frame($frame);
|
|
149 $score && $self->score($score);
|
|
150 $gff_string && $self->_from_gff_string($gff_string);
|
|
151
|
|
152 $allele_ori && $self->allele_ori($allele_ori);
|
|
153 $allele_mut && $self->allele_mut($allele_mut);
|
|
154 $upstreamseq && $self->upStreamSeq($upstreamseq);
|
|
155 $dnstreamseq && $self->dnStreamSeq($dnstreamseq);
|
|
156
|
|
157 $label && $self->label($label);
|
|
158 $status && $self->status($status);
|
|
159 $proof && $self->proof($proof);
|
|
160 $region && $self->region($region);
|
|
161 $region_value && $self->region_value($region_value);
|
|
162 $region_dist && $self->region_dist($region_dist);
|
|
163 $numbering && $self->numbering($numbering);
|
|
164 $mut_number && $self->mut_number($mut_number);
|
|
165 $ismutation && $self->isMutation($ismutation);
|
|
166
|
|
167 $cpg && $self->CpG($cpg);
|
|
168
|
|
169 return $self; # success - we hope!
|
|
170 }
|
|
171
|
|
172
|
|
173 =head2 CpG
|
|
174
|
|
175 Title : CpG
|
|
176 Usage : $obj->CpG()
|
|
177 Function: sets and returns boolean values for variation
|
|
178 hitting a CpG site. Unset value return -1.
|
|
179 Example : $obj->CpG()
|
|
180 Returns : boolean
|
|
181 Args : optional true of false value
|
|
182
|
|
183
|
|
184 =cut
|
|
185
|
|
186
|
|
187 sub CpG {
|
|
188 my ($obj,$value) = @_;
|
|
189 if( defined $value) {
|
|
190 $value ? ($value = 1) : ($value = 0);
|
|
191 $obj->{'cpg'} = $value;
|
|
192 }
|
|
193 elsif (not defined $obj->{'label'}) {
|
|
194 $obj->{'cpg'} = $obj->_CpG_value;
|
|
195 }
|
|
196 else {
|
|
197 return $obj->{'cpg'};
|
|
198 }
|
|
199 }
|
|
200
|
|
201
|
|
202
|
|
203 sub _CpG_value {
|
|
204 my ($self) = @_;
|
|
205 if ($self->allele_ori eq $self->allele_mut and length ($self->allele_ori) == 1 ) {
|
|
206
|
|
207 # valid only for point mutations
|
|
208 # CpG methylation-mediated deamination:
|
|
209 # CG -> TG | CG -> CA substitutions
|
|
210 # implementation here is less strict: if CpG dinucleotide was hit
|
|
211
|
|
212 if ( ( ($self->allele_ori eq 'c') && (substr($self->upStreamSeq, 0, 1) eq 'g') ) ||
|
|
213 ( ($self->allele_ori eq 'g') && (substr($self->dnStreamSeq, -1, 1) eq 'c') ) ) {
|
|
214 return 1;
|
|
215 }
|
|
216 else {
|
|
217 return 0;
|
|
218 }
|
|
219 } else {
|
|
220 $self->warn('CpG makes sense only in the context of point mutation');
|
|
221 return undef;
|
|
222 }
|
|
223 }
|
|
224
|
|
225
|
|
226 =head2 RNAChange
|
|
227
|
|
228 Title : RNAChange
|
|
229 Usage : $mutobj = $obj->RNAChange;
|
|
230 : $mutobj = $obj->RNAChange($objref);
|
|
231 Function: Returns or sets the link-reference to a mutation/change object.
|
|
232 If there is no link, it will return undef
|
|
233 Returns : an obj_ref or undef
|
|
234
|
|
235 =cut
|
|
236
|
|
237
|
|
238 sub RNAChange {
|
|
239 my ($self,$value) = @_;
|
|
240 if (defined $value) {
|
|
241 if( ! $value->isa('Bio::Variation::RNAChange') ) {
|
|
242 $self->throw("Is not a Bio::Variation::RNAChange object but a [$self]");
|
|
243 return (undef);
|
|
244 }
|
|
245 else {
|
|
246 $self->{'RNAChange'} = $value;
|
|
247 }
|
|
248 }
|
|
249 unless (exists $self->{'RNAChange'}) {
|
|
250 return (undef);
|
|
251 } else {
|
|
252 return $self->{'RNAChange'};
|
|
253 }
|
|
254 }
|
|
255
|
|
256
|
|
257 =head2 label
|
|
258
|
|
259 Title : label
|
|
260 Usage : $obj->label();
|
|
261 Function:
|
|
262
|
|
263 Sets and returns mutation event label(s). If value is not
|
|
264 set, or no argument is given returns false. Each
|
|
265 instantiable subclass of L<Bio::Variation::VariantI> needs
|
|
266 to implement this method. Valid values are listed in
|
|
267 'Mutation event controlled vocabulary' in
|
|
268 http://www.ebi.ac.uk/mutations/recommendations/mutevent.html.
|
|
269
|
|
270 Example :
|
|
271 Returns : string
|
|
272 Args : string
|
|
273
|
|
274 =cut
|
|
275
|
|
276
|
|
277 sub label {
|
|
278 my ($self, $value) = @_;
|
|
279 my ($o, $m, $type);
|
|
280 $o = $self->allele_ori->seq if $self->allele_ori and $self->allele_ori->seq;
|
|
281 $m = $self->allele_mut->seq if $self->allele_mut and $self->allele_mut->seq;
|
|
282
|
|
283 if (not $o and not $m ) {
|
|
284 $self->warn("[DNAMutation, label] Both alleles should not be empty!\n");
|
|
285 $type = 'no change'; # is this enough?
|
|
286 }
|
|
287 elsif ($o && $m && length($o) == length($m) && length($o) == 1) {
|
|
288 $type = 'point';
|
|
289 $type .= ", ". _point_type_label($o, $m);
|
|
290 }
|
|
291 elsif (not $o ) {
|
|
292 $type = 'insertion';
|
|
293 }
|
|
294 elsif (not $m ) {
|
|
295 $type = 'deletion';
|
|
296 }
|
|
297 else {
|
|
298 $type = 'complex';
|
|
299 }
|
|
300 $self->{'label'} = $type;
|
|
301 return $self->{'label'};
|
|
302 }
|
|
303
|
|
304
|
|
305 sub _point_type_label {
|
|
306 my ($o, $m) = @_;
|
|
307 my ($type);
|
|
308 my %transition = ('a' => 'g',
|
|
309 'g' => 'a',
|
|
310 'c' => 't',
|
|
311 't' => 'c');
|
|
312 $o = lc $o;
|
|
313 $m = lc $m;
|
|
314 if ($o eq $m) {
|
|
315 $type = 'no change';
|
|
316 }
|
|
317 elsif ($transition{$o} eq $m ) {
|
|
318 $type = 'transition';
|
|
319 }
|
|
320 else {
|
|
321 $type = 'transversion';
|
|
322 }
|
|
323 }
|
|
324
|
|
325
|
|
326 =head2 sysname
|
|
327
|
|
328 Title : sysname
|
|
329 Usage : $self->sysname
|
|
330 Function:
|
|
331
|
|
332 This subroutine creates a string corresponding to the
|
|
333 'systematic name' of the mutation. Systematic name is
|
|
334 specified in Antonorakis & MDI Nomenclature Working Group:
|
|
335 Human Mutation 11:1-3, 1998.
|
|
336 http://www.interscience.wiley.com/jpages/1059-7794/nomenclature.html
|
|
337 Returns : string
|
|
338
|
|
339 =cut
|
|
340
|
|
341
|
|
342 sub sysname {
|
|
343 my ($self,$value) = @_;
|
|
344 if( defined $value) {
|
|
345 $self->{'sysname'} = $value;
|
|
346 } else {
|
|
347 $self->warn('Mutation start position is not defined')
|
|
348 if not defined $self->start;
|
|
349 my $sysname = '';
|
|
350 # show the alphabet only if $self->SeqDiff->alphabet is set;
|
|
351 my $mol = '';
|
|
352 if ($self->SeqDiff->alphabet && $self->SeqDiff->alphabet eq 'dna') {
|
|
353 $mol = 'g.';
|
|
354 }
|
|
355 elsif ($self->SeqDiff->alphabet && $self->SeqDiff->alphabet eq 'rna') {
|
|
356 $mol = 'c.';
|
|
357 }
|
|
358 my $sep;
|
|
359 if ($self->isMutation) {
|
|
360 $sep = '>';
|
|
361 } else {
|
|
362 $sep = '|';
|
|
363 }
|
|
364 my $sign = '+';
|
|
365 $sign = '' if $self->start < 1;
|
|
366 $sysname .= $mol ;#if $mol;
|
|
367 $sysname .= $sign. $self->start;
|
|
368
|
|
369 my @alleles = $self->each_Allele;
|
|
370 $self->allele_mut($alleles[0]);
|
|
371
|
|
372 $sysname .= 'del' if $self->label =~ /deletion/;
|
|
373 $sysname .= 'ins' if $self->label =~ /insertion/;
|
|
374 $sysname .= uc $self->allele_ori->seq if $self->allele_ori->seq;
|
|
375
|
|
376
|
|
377
|
|
378 #push @alleles, $self->allele_mut if $self->allele_mut;
|
|
379 foreach my $allele (@alleles) {
|
|
380 $self->allele_mut($allele);
|
|
381 $sysname .= $sep if $self->label =~ /point/ or $self->label =~ /complex/;
|
|
382 $sysname .= uc $self->allele_mut->seq if $self->allele_mut->seq;
|
|
383 }
|
|
384 $self->{'sysname'} = $sysname;
|
|
385 #$self->{'sysname'} = $sign. $self->start.
|
|
386 # uc $self->allele_ori->seq. $sep. uc $self->allele_mut->seq;
|
|
387 }
|
|
388 return $self->{'sysname'};
|
|
389 }
|
|
390
|
|
391 1;
|