comparison variant_effect_predictor/Bio/Variation/RNAChange.pm @ 0:1f6dce3d34e0

Uploaded
author mahtabm
date Thu, 11 Apr 2013 02:01:53 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:1f6dce3d34e0
1 # $Id: RNAChange.pm,v 1.10 2002/10/22 07:38:49 lapp Exp $
2 #
3 # BioPerl module for Bio::Variation::RNAChange
4 #
5 # Cared for by Heikki Lehvaslaiho <heikki@ebi.ac.uk>
6 #
7 # Copyright Heikki Lehvaslaiho
8 #
9 # You may distribute this module under the same terms as perl itself
10
11 # POD documentation - main docs before the code
12
13 =head1 NAME
14
15 Bio::Variation::RNAChange - Sequence change class for RNA level
16
17 =head1 SYNOPSIS
18
19 $rnachange = Bio::Variation::RNAChange->new
20 ('-start' => $start,
21 '-end' => $end,
22 '-length' => $len,
23 '-codon_pos' => $cp,
24 '-upStreamSeq' => $upflank,
25 '-dnStreamSeq' => $dnflank,
26 '-proof' => $proof,
27 '-isMutation' => 1,
28 '-mut_number' => $mut_number
29 );
30 $a1 = Bio::Variation::Allele->new;
31 $a1->seq('a');
32 $rnachange->allele_ori($a1);
33 my $a2 = Bio::Variation::Allele->new;
34 $a2->seq('t');
35 $rnachange->add_Allele($a2);
36 $rnachange->allele_mut($a2);
37
38 print "The codon change is ", $rnachange->codon_ori,
39 ">", $rnachange->codon_mut, "\n";
40
41 # add it to a SeqDiff container object
42 $seqdiff->add_Variant($rnachange);
43
44 # and create links to and from DNA level mutation objects
45 $rnachange->DNAMutation($dnamut);
46 $dnamut->RNAChange($rnachange);
47
48 =head1 DESCRIPTION
49
50 The instantiable class Bio::Variation::DNAMutation describes basic
51 sequence changes at RNA molecule level. It uses methods defined in
52 superclass Bio::Variation::VariantI. See L<Bio::Variation::VariantI>
53 for details.
54
55 You are normally expected to create a corresponding
56 Bio::Variation::DNAMutation object even if mutation is defined at
57 RNA level. The numbering follows then cDNA numbering. Link the
58 DNAMutation object to the RNAChange object using the method
59 DNAMutation(). If the variation described by a RNAChange object is
60 translated, link the corresponding Bio::Variation::AAChange object
61 to it using method AAChange(). See L<Bio::Variation::DNAMutation> and
62 L<Bio::Variation::AAChange> for more information.
63
64
65 =head1 FEEDBACK
66
67 =head2 Mailing Lists
68
69 User feedback is an integral part of the evolution of this and other
70 Bioperl modules. Send your comments and suggestions preferably to the
71 Bioperl mailing lists Your participation is much appreciated.
72
73 bioperl-l@bioperl.org - General discussion
74 http://bio.perl.org/MailList.html - About the mailing lists
75
76 =head2 Reporting Bugs
77
78 report bugs to the Bioperl bug tracking system to help us keep track
79 the bugs and their resolution. Bug reports can be submitted via
80 email or the web:
81
82 bioperl-bugs@bio.perl.org
83 http://bugzilla.bioperl.org/
84
85 =head1 AUTHOR - Heikki Lehvaslaiho
86
87 Email: heikki@ebi.ac.uk
88 Address:
89
90 EMBL Outstation, European Bioinformatics Institute
91 Wellcome Trust Genome Campus, Hinxton
92 Cambs. CB10 1SD, United Kingdom
93
94 =head1 APPENDIX
95
96 The rest of the documentation details each of the object
97 methods. Internal methods are usually preceded with a _
98
99 =cut
100
101
102 # Let the code begin...
103
104
105 package Bio::Variation::RNAChange;
106 use vars qw(@ISA);
107 use strict;
108
109 # Object preamble - inheritance
110 my $VERSION=1.0;
111 use Bio::Variation::VariantI;
112 use Bio::Tools::CodonTable;
113
114 @ISA = qw( Bio::Variation::VariantI );
115
116 sub new {
117 my($class,@args) = @_;
118 my $self = $class->SUPER::new(@args);
119
120 my ($start, $end, $length, $strand, $primary, $source,
121 $frame, $score, $gff_string,
122 $allele_ori, $allele_mut, $upstreamseq, $dnstreamseq,
123 $label, $status, $proof, $region, $region_value, $region_dist, $numbering,
124 $mut_number, $isMutation,
125 $codon_ori, $codon_mut, $codon_pos, $codon_table, $cds_end) =
126 $self->_rearrange([qw(START
127 END
128 LENGTH
129 STRAND
130 PRIMARY
131 SOURCE
132 FRAME
133 SCORE
134 GFF_STRING
135 ALLELE_ORI
136 ALLELE_MUT
137 UPSTREAMSEQ
138 DNSTREAMSEQ
139 LABEL
140 STATUS
141 PROOF
142 REGION
143 REGION_VALUE
144 REGION_DIST
145 NUMBERING
146 MUT_NUMBER
147 ISMUTATION
148 CODON_ORI
149 CODON_MUT
150 CODON_POS
151 TRANSLATION_TABLE
152 CDS_END
153 )],@args);
154
155 $self->primary_tag("Variation");
156
157 $self->{ 'alleles' } = [];
158
159 $start && $self->start($start);
160 $end && $self->end($end);
161 $length && $self->length($length);
162 $strand && $self->strand($strand);
163 $primary && $self->primary_tag($primary);
164 $source && $self->source_tag($source);
165 $frame && $self->frame($frame);
166 $score && $self->score($score);
167 $gff_string && $self->_from_gff_string($gff_string);
168
169 $allele_ori && $self->allele_ori($allele_ori);
170 $allele_mut && $self->allele_mut($allele_mut);
171 $upstreamseq && $self->upStreamSeq($upstreamseq);
172 $dnstreamseq && $self->dnStreamSeq($dnstreamseq);
173
174 $label && $self->label($label);
175 $status && $self->status($status);
176 $proof && $self->proof($proof);
177 $region && $self->region($region);
178 $region_value && $self->region_value($region_value);
179 $region_dist && $self->region_dist($region_dist);
180 $numbering && $self->numbering($numbering);
181 $mut_number && $self->mut_number($mut_number);
182 $isMutation && $self->isMutation($isMutation);
183
184 $codon_ori && $self->codon_ori($codon_ori);
185 $codon_mut && $self->codon_mut($codon_mut);
186 $codon_pos && $self->codon_pos($codon_pos);
187 $codon_table && $self->codon_table($codon_table);
188 $cds_end && $self->cds_end($cds_end);
189 return $self; # success - we hope!
190 }
191
192
193 =head2 codon_ori
194
195 Title : codon_ori
196 Usage : $obj->codon_ori();
197 Function:
198
199 Sets and returns codon_ori triplet. If value is not set,
200 creates the codon triplet from the codon position and
201 flanking sequences. The string has to be three characters
202 long. The character content is not checked.
203
204 Example :
205 Returns : string
206 Args : string
207
208 =cut
209
210 sub codon_ori {
211 my ($self,$value) = @_;
212 if (defined $value) {
213 if (length $value != 3) {
214 $self->warn("Codon string \"$value\" is not three characters long");
215 }
216 $self->{'codon_ori'} = $value;
217 }
218 elsif (! $self->{'codon_ori'}) {
219 my $codon_ori = '';
220
221 if ($self->region eq 'coding' && $self->start && $self->start >= 1) {
222
223 $self->warn('Codon position is not defined')
224 if not defined $self->codon_pos;
225 $self->warn('Upstream flanking sequence is not defined')
226 if not defined $self->upStreamSeq;
227 $self->warn('Downstream flanking sequence is not defined')
228 if not defined $self->dnStreamSeq;
229
230 my $cpos = $self->codon_pos;
231 $codon_ori = substr($self->upStreamSeq, -$cpos +1 , $cpos-1);
232 $codon_ori .= substr($self->allele_ori->seq, 0, 4-$cpos)
233 if $self->allele_ori and $self->allele_ori->seq;
234 $codon_ori .= substr($self->dnStreamSeq, 0, 3-length($codon_ori));
235 }
236 $self->{'codon_ori'} = lc $codon_ori;
237 }
238 return $self->{'codon_ori'};
239 }
240
241
242 =head2 codon_mut
243
244 Title : codon_mut
245 Usage : $obj->codon_mut();
246 Function:
247
248 Sets and returns codon_mut triplet. If value is not
249 set, creates the codon triplet from the codon position and
250 flanking sequences. Return undef for other than point mutations.
251
252 Example :
253 Returns : string
254 Args : string
255
256 =cut
257
258
259 sub codon_mut {
260 my ($self,$value) = @_;
261 if (defined $value) {
262 if (length $value != 3 ) {
263 $self->warn("Codon string \"$value\" is not three characters long");
264 }
265 $self->{'codon_mut'} = $value;
266 }
267 else {
268 my $codon_mut = '';
269 if ($self->allele_ori->seq and $self->allele_mut->seq and
270 CORE::length($self->allele_ori->seq) == 1 and
271 CORE::length($self->allele_mut->seq) == 1 and
272 $self->region eq 'coding' and $self->start >= 1) {
273
274 $self->warn('Codon position is not defined')
275 if not defined $self->codon_pos;
276 $self->warn('Upstream flanking sequnce is not defined')
277 if not defined $self->upStreamSeq;
278 $self->warn('Downstream flanking sequnce is not defined')
279 if not defined $self->dnStreamSeq;
280 $self->throw('Mutated allele is not defined')
281 if not defined $self->allele_mut;
282
283 my $cpos = $self->codon_pos;
284 $codon_mut = substr($self->upStreamSeq, -$cpos +1 , $cpos-1);
285 $codon_mut .= substr($self->allele_mut->seq, 0, 4-$cpos)
286 if $self->allele_mut and $self->allele_mut->seq;
287 $codon_mut .= substr($self->dnStreamSeq, 0, 3-length($codon_mut));
288
289 $self->{'codon_mut'} = lc $codon_mut;
290 }
291 }
292 return $self->{'codon_mut'};
293 }
294
295
296 =head2 codon_pos
297
298 Title : codon_pos
299 Usage : $obj->codon_pos();
300 Function:
301
302 Sets and returns the position of the mutation start in the
303 codon. If value is not set, returns false.
304
305 Example :
306 Returns : 1,2,3
307 Args : none if get, the new value if set
308
309 =cut
310
311
312 sub codon_pos {
313 my ($self,$value) = @_;
314 if( defined $value) {
315 if ( $value !~ /[123]/ ) {
316 $self->throw("'$value' is not a valid codon position");
317 }
318 $self->{'codon_pos'} = $value;
319 }
320 return $self->{'codon_pos'};
321 }
322
323
324 =head2 codon_table
325
326 Title : codon_table
327 Usage : $obj->codon_table();
328 Function:
329
330 Sets and returns the codon table id of the RNA
331 If value is not set, returns 1, 'universal' code, as the default.
332
333 Example :
334 Returns : integer
335 Args : none if get, the new value if set
336
337 =cut
338
339
340 sub codon_table {
341 my ($self,$value) = @_;
342 if( defined $value) {
343 if ( not $value =~ /^\d$/ ) {
344 $self->throw("'$value' is not a valid codon table ID\n".
345 "Has to be a positive integer. Defaulting to 1\n");
346 } else {
347 $self->{'codon_table'} = $value;
348 }
349 }
350 if( ! exists $self->{'codon_table'} ) {
351 return 1;
352 } else {
353 return $self->{'codon_table'};
354 }
355 }
356
357
358 =head2 DNAMutation
359
360 Title : DNAMutation
361 Usage : $mutobj = $obj->DNAMutation;
362 : $mutobj = $obj->DNAMutation($objref);
363 Function: Returns or sets the link-reference to a mutation/change object.
364 If there is no link, it will return undef
365 Returns : an obj_ref or undef
366
367 =cut
368
369
370 sub DNAMutation {
371 my ($self,$value) = @_;
372 if (defined $value) {
373 if( ! $value->isa('Bio::Variation::DNAMutation') ) {
374 $self->throw("Is not a Bio::Variation::DNAMutation object but a [$self]");
375 return (undef);
376 }
377 else {
378 $self->{'DNAMutation'} = $value;
379 }
380 }
381 unless (exists $self->{'DNAMutation'}) {
382 return (undef);
383 } else {
384 return $self->{'DNAMutation'};
385 }
386 }
387
388
389 =head2 AAChange
390
391 Title : AAChange
392 Usage : $mutobj = $obj->AAChange;
393 : $mutobj = $obj->AAChange($objref);
394 Function: Returns or sets the link-reference to a mutation/change object.
395 If there is no link, it will return undef
396 Returns : an obj_ref or undef
397
398 =cut
399
400 sub AAChange {
401 my ($self,$value) = @_;
402 if (defined $value) {
403 if( ! $value->isa('Bio::Variation::AAChange') ) {
404 $self->throw("Is not a Bio::Variation::AAChange object but a [$self]");
405 return (undef);
406 }
407 else {
408 $self->{'AAChange'} = $value;
409 }
410 }
411 unless (exists $self->{'AAChange'}) {
412 return (undef);
413 } else {
414 return $self->{'AAChange'};
415 }
416 }
417
418
419 =head2 exons_modified
420
421 Title : exons_modified
422 Usage : $modified = $obj->exons_modified;
423 : $modified = $obj->exons_modified(1);
424 Function: Returns or sets information (example: a simple boolean flag) about
425 the modification of exons as a result of a mutation.
426
427 =cut
428
429 sub exons_modified {
430 my ($self,$value)=@_;
431 if (defined($value)) {
432 $self->{'exons_modified'}=$value;
433 }
434 return ($self->{'exons_modified'});
435 }
436
437 =head2 region
438
439 Title : region
440 Usage : $obj->region();
441 Function:
442
443 Sets and returns the name of the sequence region type or
444 protein domain at this location. If value is not set,
445 returns false.
446
447 Example :
448 Returns : string
449 Args : string
450
451 =cut
452
453
454
455 sub region {
456 my ($self,$value) = @_;
457 if( defined $value) {
458 $self->{'region'} = $value;
459 }
460 elsif (not defined $self->{'region'}) {
461
462 $self->warn('Mutation start position is not defined')
463 if not defined $self->start and $self->verbose;
464 $self->warn('Mutation end position is not defined')
465 if not defined $self->end and $self->verbose;
466 $self->warn('Length of the CDS is not defined, the mutation can be beyond coding region!')
467 if not defined $self->cds_end and $self->verbose;
468
469 $self->region('coding');
470 if ($self->end && $self->end < 0 ){
471 $self->region('5\'UTR');
472 }
473 elsif ($self->start && $self->cds_end && $self->start > $self->cds_end ) {
474 $self->region('3\'UTR');
475 }
476 }
477 return $self->{'region'};
478 }
479
480 =head2 cds_end
481
482 Title : cds_end
483 Usage : $cds_end = $obj->get_cds_end();
484 Function:
485
486 Sets or returns the cds_end from the beginning of the DNA sequence
487 to the coordinate start used to describe variants.
488 Should be the location of the last nucleotide of the
489 terminator codon of the gene.
490
491 Example :
492 Returns : value of cds_end, a scalar
493 Args :
494
495 =cut
496
497
498
499 sub cds_end {
500 my ($self, $value) = @_;
501 if (defined $value) {
502 $self->warn("[$value] is not a good value for sequence position")
503 if not $value =~ /^\d+$/ ;
504 $self->{'cds_end'} = $value;
505 } else {
506 $self->{'cds_end'} = $self->SeqDiff->cds_end if $self->SeqDiff;
507 }
508 return $self->{'cds_end'};
509 }
510
511
512 =head2 label
513
514 Title : label
515 Usage : $obj->label();
516 Function:
517
518 Sets and returns mutation event label(s). If value is not
519 set, or no argument is given returns false. Each
520 instantiable subclass of L<Bio::Variation::VariantI> needs
521 to implement this method. Valid values are listed in
522 'Mutation event controlled vocabulary' in
523 http://www.ebi.ac.uk/mutations/recommendations/mutevent.html.
524
525 Example :
526 Returns : string
527 Args : string
528
529 =cut
530
531 sub label {
532 my ($self) = @_;
533 my ($o, $m, $type);
534 $o = $self->allele_ori->seq if $self->allele_ori and $self->allele_ori->seq;
535 $m = $self->allele_mut->seq if $self->allele_mut and $self->allele_mut->seq;
536
537 my $ct = Bio::Tools::CodonTable -> new ( -id => $self->codon_table );
538 if ($o and $m and CORE::length($o) == 1 and CORE::length($m) == 1) {
539 if (defined $self->AAChange) {
540 if ($self->start > 0 and $self->start < 4 ) {
541 $type = 'initiation codon';
542 }
543 elsif ($self->codon_ori && $ct->is_ter_codon($self->codon_ori) ) {
544 #AAChange->allele_ori and $self->AAChange->allele_ori->seq eq '*' ) {
545 $type = 'termination codon';
546 }
547 elsif ($self->codon_mut && $ct->is_ter_codon($self->codon_mut) ) {
548 #elsif ($self->AAChange->allele_mut and $self->AAChange->allele_mut->seq eq "*") {
549 $type = 'nonsense';
550 }
551 elsif ($o and $m and ($o eq $m or
552 $self->AAChange->allele_ori->seq eq
553 $self->AAChange->allele_mut->seq)) {
554 $type = 'silent';
555 } else {
556 $type = 'missense';
557 }
558 } else {
559 $type = 'unknown';
560 }
561 } else {
562 my $len = 0;
563 $len = CORE::length($o) if $o;
564 $len -= CORE::length($m) if $m;
565 if ($len%3 == 0 ) {
566 $type = 'inframe';
567 } else {
568 $type = 'frameshift';
569 }
570 if (not $m ) {
571 $type .= ', '. 'deletion';
572 }
573 elsif (not $o ) {
574 $type .= ', '. 'insertion';
575 }
576 else {
577 $type .= ', '. 'complex';
578 }
579 if ($self->codon_ori && $ct->is_ter_codon($self->codon_ori) ) {
580 $type .= ', '. 'termination codon';
581 }
582 }
583
584 $self->{'label'} = $type;
585 return $self->{'label'};
586 }
587
588
589 =head2 _change_codon_pos
590
591 Title : _change_codon_pos
592 Usage : $newCodonPos = _change_codon_pos($myCodonPos, 5)
593 Function:
594
595 Keeps track of the codon position in a changeing sequence
596
597 Returns : codon_pos = integer 1, 2 or 3
598 Args : valid codon position
599 signed integer offset to a new location in sequence
600
601 =cut
602
603
604 sub _change_codon_pos ($$) {
605 my ($cpos, $i) = @_;
606
607 $cpos = ($cpos + $i%3)%3;
608 if ($cpos > 3 ) {
609 $cpos = $cpos - 3;
610 }
611 elsif ($cpos < 1 ) {
612 $cpos = $cpos + 3;
613 }
614 return $cpos;
615 }
616
617 1;