Mercurial > repos > mahtabm > ensembl
comparison variant_effect_predictor/Bio/Variation/RNAChange.pm @ 0:1f6dce3d34e0
Uploaded
author | mahtabm |
---|---|
date | Thu, 11 Apr 2013 02:01:53 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:1f6dce3d34e0 |
---|---|
1 # $Id: RNAChange.pm,v 1.10 2002/10/22 07:38:49 lapp Exp $ | |
2 # | |
3 # BioPerl module for Bio::Variation::RNAChange | |
4 # | |
5 # Cared for by Heikki Lehvaslaiho <heikki@ebi.ac.uk> | |
6 # | |
7 # Copyright Heikki Lehvaslaiho | |
8 # | |
9 # You may distribute this module under the same terms as perl itself | |
10 | |
11 # POD documentation - main docs before the code | |
12 | |
13 =head1 NAME | |
14 | |
15 Bio::Variation::RNAChange - Sequence change class for RNA level | |
16 | |
17 =head1 SYNOPSIS | |
18 | |
19 $rnachange = Bio::Variation::RNAChange->new | |
20 ('-start' => $start, | |
21 '-end' => $end, | |
22 '-length' => $len, | |
23 '-codon_pos' => $cp, | |
24 '-upStreamSeq' => $upflank, | |
25 '-dnStreamSeq' => $dnflank, | |
26 '-proof' => $proof, | |
27 '-isMutation' => 1, | |
28 '-mut_number' => $mut_number | |
29 ); | |
30 $a1 = Bio::Variation::Allele->new; | |
31 $a1->seq('a'); | |
32 $rnachange->allele_ori($a1); | |
33 my $a2 = Bio::Variation::Allele->new; | |
34 $a2->seq('t'); | |
35 $rnachange->add_Allele($a2); | |
36 $rnachange->allele_mut($a2); | |
37 | |
38 print "The codon change is ", $rnachange->codon_ori, | |
39 ">", $rnachange->codon_mut, "\n"; | |
40 | |
41 # add it to a SeqDiff container object | |
42 $seqdiff->add_Variant($rnachange); | |
43 | |
44 # and create links to and from DNA level mutation objects | |
45 $rnachange->DNAMutation($dnamut); | |
46 $dnamut->RNAChange($rnachange); | |
47 | |
48 =head1 DESCRIPTION | |
49 | |
50 The instantiable class Bio::Variation::DNAMutation describes basic | |
51 sequence changes at RNA molecule level. It uses methods defined in | |
52 superclass Bio::Variation::VariantI. See L<Bio::Variation::VariantI> | |
53 for details. | |
54 | |
55 You are normally expected to create a corresponding | |
56 Bio::Variation::DNAMutation object even if mutation is defined at | |
57 RNA level. The numbering follows then cDNA numbering. Link the | |
58 DNAMutation object to the RNAChange object using the method | |
59 DNAMutation(). If the variation described by a RNAChange object is | |
60 translated, link the corresponding Bio::Variation::AAChange object | |
61 to it using method AAChange(). See L<Bio::Variation::DNAMutation> and | |
62 L<Bio::Variation::AAChange> for more information. | |
63 | |
64 | |
65 =head1 FEEDBACK | |
66 | |
67 =head2 Mailing Lists | |
68 | |
69 User feedback is an integral part of the evolution of this and other | |
70 Bioperl modules. Send your comments and suggestions preferably to the | |
71 Bioperl mailing lists Your participation is much appreciated. | |
72 | |
73 bioperl-l@bioperl.org - General discussion | |
74 http://bio.perl.org/MailList.html - About the mailing lists | |
75 | |
76 =head2 Reporting Bugs | |
77 | |
78 report bugs to the Bioperl bug tracking system to help us keep track | |
79 the bugs and their resolution. Bug reports can be submitted via | |
80 email or the web: | |
81 | |
82 bioperl-bugs@bio.perl.org | |
83 http://bugzilla.bioperl.org/ | |
84 | |
85 =head1 AUTHOR - Heikki Lehvaslaiho | |
86 | |
87 Email: heikki@ebi.ac.uk | |
88 Address: | |
89 | |
90 EMBL Outstation, European Bioinformatics Institute | |
91 Wellcome Trust Genome Campus, Hinxton | |
92 Cambs. CB10 1SD, United Kingdom | |
93 | |
94 =head1 APPENDIX | |
95 | |
96 The rest of the documentation details each of the object | |
97 methods. Internal methods are usually preceded with a _ | |
98 | |
99 =cut | |
100 | |
101 | |
102 # Let the code begin... | |
103 | |
104 | |
105 package Bio::Variation::RNAChange; | |
106 use vars qw(@ISA); | |
107 use strict; | |
108 | |
109 # Object preamble - inheritance | |
110 my $VERSION=1.0; | |
111 use Bio::Variation::VariantI; | |
112 use Bio::Tools::CodonTable; | |
113 | |
114 @ISA = qw( Bio::Variation::VariantI ); | |
115 | |
116 sub new { | |
117 my($class,@args) = @_; | |
118 my $self = $class->SUPER::new(@args); | |
119 | |
120 my ($start, $end, $length, $strand, $primary, $source, | |
121 $frame, $score, $gff_string, | |
122 $allele_ori, $allele_mut, $upstreamseq, $dnstreamseq, | |
123 $label, $status, $proof, $region, $region_value, $region_dist, $numbering, | |
124 $mut_number, $isMutation, | |
125 $codon_ori, $codon_mut, $codon_pos, $codon_table, $cds_end) = | |
126 $self->_rearrange([qw(START | |
127 END | |
128 LENGTH | |
129 STRAND | |
130 PRIMARY | |
131 SOURCE | |
132 FRAME | |
133 SCORE | |
134 GFF_STRING | |
135 ALLELE_ORI | |
136 ALLELE_MUT | |
137 UPSTREAMSEQ | |
138 DNSTREAMSEQ | |
139 LABEL | |
140 STATUS | |
141 PROOF | |
142 REGION | |
143 REGION_VALUE | |
144 REGION_DIST | |
145 NUMBERING | |
146 MUT_NUMBER | |
147 ISMUTATION | |
148 CODON_ORI | |
149 CODON_MUT | |
150 CODON_POS | |
151 TRANSLATION_TABLE | |
152 CDS_END | |
153 )],@args); | |
154 | |
155 $self->primary_tag("Variation"); | |
156 | |
157 $self->{ 'alleles' } = []; | |
158 | |
159 $start && $self->start($start); | |
160 $end && $self->end($end); | |
161 $length && $self->length($length); | |
162 $strand && $self->strand($strand); | |
163 $primary && $self->primary_tag($primary); | |
164 $source && $self->source_tag($source); | |
165 $frame && $self->frame($frame); | |
166 $score && $self->score($score); | |
167 $gff_string && $self->_from_gff_string($gff_string); | |
168 | |
169 $allele_ori && $self->allele_ori($allele_ori); | |
170 $allele_mut && $self->allele_mut($allele_mut); | |
171 $upstreamseq && $self->upStreamSeq($upstreamseq); | |
172 $dnstreamseq && $self->dnStreamSeq($dnstreamseq); | |
173 | |
174 $label && $self->label($label); | |
175 $status && $self->status($status); | |
176 $proof && $self->proof($proof); | |
177 $region && $self->region($region); | |
178 $region_value && $self->region_value($region_value); | |
179 $region_dist && $self->region_dist($region_dist); | |
180 $numbering && $self->numbering($numbering); | |
181 $mut_number && $self->mut_number($mut_number); | |
182 $isMutation && $self->isMutation($isMutation); | |
183 | |
184 $codon_ori && $self->codon_ori($codon_ori); | |
185 $codon_mut && $self->codon_mut($codon_mut); | |
186 $codon_pos && $self->codon_pos($codon_pos); | |
187 $codon_table && $self->codon_table($codon_table); | |
188 $cds_end && $self->cds_end($cds_end); | |
189 return $self; # success - we hope! | |
190 } | |
191 | |
192 | |
193 =head2 codon_ori | |
194 | |
195 Title : codon_ori | |
196 Usage : $obj->codon_ori(); | |
197 Function: | |
198 | |
199 Sets and returns codon_ori triplet. If value is not set, | |
200 creates the codon triplet from the codon position and | |
201 flanking sequences. The string has to be three characters | |
202 long. The character content is not checked. | |
203 | |
204 Example : | |
205 Returns : string | |
206 Args : string | |
207 | |
208 =cut | |
209 | |
210 sub codon_ori { | |
211 my ($self,$value) = @_; | |
212 if (defined $value) { | |
213 if (length $value != 3) { | |
214 $self->warn("Codon string \"$value\" is not three characters long"); | |
215 } | |
216 $self->{'codon_ori'} = $value; | |
217 } | |
218 elsif (! $self->{'codon_ori'}) { | |
219 my $codon_ori = ''; | |
220 | |
221 if ($self->region eq 'coding' && $self->start && $self->start >= 1) { | |
222 | |
223 $self->warn('Codon position is not defined') | |
224 if not defined $self->codon_pos; | |
225 $self->warn('Upstream flanking sequence is not defined') | |
226 if not defined $self->upStreamSeq; | |
227 $self->warn('Downstream flanking sequence is not defined') | |
228 if not defined $self->dnStreamSeq; | |
229 | |
230 my $cpos = $self->codon_pos; | |
231 $codon_ori = substr($self->upStreamSeq, -$cpos +1 , $cpos-1); | |
232 $codon_ori .= substr($self->allele_ori->seq, 0, 4-$cpos) | |
233 if $self->allele_ori and $self->allele_ori->seq; | |
234 $codon_ori .= substr($self->dnStreamSeq, 0, 3-length($codon_ori)); | |
235 } | |
236 $self->{'codon_ori'} = lc $codon_ori; | |
237 } | |
238 return $self->{'codon_ori'}; | |
239 } | |
240 | |
241 | |
242 =head2 codon_mut | |
243 | |
244 Title : codon_mut | |
245 Usage : $obj->codon_mut(); | |
246 Function: | |
247 | |
248 Sets and returns codon_mut triplet. If value is not | |
249 set, creates the codon triplet from the codon position and | |
250 flanking sequences. Return undef for other than point mutations. | |
251 | |
252 Example : | |
253 Returns : string | |
254 Args : string | |
255 | |
256 =cut | |
257 | |
258 | |
259 sub codon_mut { | |
260 my ($self,$value) = @_; | |
261 if (defined $value) { | |
262 if (length $value != 3 ) { | |
263 $self->warn("Codon string \"$value\" is not three characters long"); | |
264 } | |
265 $self->{'codon_mut'} = $value; | |
266 } | |
267 else { | |
268 my $codon_mut = ''; | |
269 if ($self->allele_ori->seq and $self->allele_mut->seq and | |
270 CORE::length($self->allele_ori->seq) == 1 and | |
271 CORE::length($self->allele_mut->seq) == 1 and | |
272 $self->region eq 'coding' and $self->start >= 1) { | |
273 | |
274 $self->warn('Codon position is not defined') | |
275 if not defined $self->codon_pos; | |
276 $self->warn('Upstream flanking sequnce is not defined') | |
277 if not defined $self->upStreamSeq; | |
278 $self->warn('Downstream flanking sequnce is not defined') | |
279 if not defined $self->dnStreamSeq; | |
280 $self->throw('Mutated allele is not defined') | |
281 if not defined $self->allele_mut; | |
282 | |
283 my $cpos = $self->codon_pos; | |
284 $codon_mut = substr($self->upStreamSeq, -$cpos +1 , $cpos-1); | |
285 $codon_mut .= substr($self->allele_mut->seq, 0, 4-$cpos) | |
286 if $self->allele_mut and $self->allele_mut->seq; | |
287 $codon_mut .= substr($self->dnStreamSeq, 0, 3-length($codon_mut)); | |
288 | |
289 $self->{'codon_mut'} = lc $codon_mut; | |
290 } | |
291 } | |
292 return $self->{'codon_mut'}; | |
293 } | |
294 | |
295 | |
296 =head2 codon_pos | |
297 | |
298 Title : codon_pos | |
299 Usage : $obj->codon_pos(); | |
300 Function: | |
301 | |
302 Sets and returns the position of the mutation start in the | |
303 codon. If value is not set, returns false. | |
304 | |
305 Example : | |
306 Returns : 1,2,3 | |
307 Args : none if get, the new value if set | |
308 | |
309 =cut | |
310 | |
311 | |
312 sub codon_pos { | |
313 my ($self,$value) = @_; | |
314 if( defined $value) { | |
315 if ( $value !~ /[123]/ ) { | |
316 $self->throw("'$value' is not a valid codon position"); | |
317 } | |
318 $self->{'codon_pos'} = $value; | |
319 } | |
320 return $self->{'codon_pos'}; | |
321 } | |
322 | |
323 | |
324 =head2 codon_table | |
325 | |
326 Title : codon_table | |
327 Usage : $obj->codon_table(); | |
328 Function: | |
329 | |
330 Sets and returns the codon table id of the RNA | |
331 If value is not set, returns 1, 'universal' code, as the default. | |
332 | |
333 Example : | |
334 Returns : integer | |
335 Args : none if get, the new value if set | |
336 | |
337 =cut | |
338 | |
339 | |
340 sub codon_table { | |
341 my ($self,$value) = @_; | |
342 if( defined $value) { | |
343 if ( not $value =~ /^\d$/ ) { | |
344 $self->throw("'$value' is not a valid codon table ID\n". | |
345 "Has to be a positive integer. Defaulting to 1\n"); | |
346 } else { | |
347 $self->{'codon_table'} = $value; | |
348 } | |
349 } | |
350 if( ! exists $self->{'codon_table'} ) { | |
351 return 1; | |
352 } else { | |
353 return $self->{'codon_table'}; | |
354 } | |
355 } | |
356 | |
357 | |
358 =head2 DNAMutation | |
359 | |
360 Title : DNAMutation | |
361 Usage : $mutobj = $obj->DNAMutation; | |
362 : $mutobj = $obj->DNAMutation($objref); | |
363 Function: Returns or sets the link-reference to a mutation/change object. | |
364 If there is no link, it will return undef | |
365 Returns : an obj_ref or undef | |
366 | |
367 =cut | |
368 | |
369 | |
370 sub DNAMutation { | |
371 my ($self,$value) = @_; | |
372 if (defined $value) { | |
373 if( ! $value->isa('Bio::Variation::DNAMutation') ) { | |
374 $self->throw("Is not a Bio::Variation::DNAMutation object but a [$self]"); | |
375 return (undef); | |
376 } | |
377 else { | |
378 $self->{'DNAMutation'} = $value; | |
379 } | |
380 } | |
381 unless (exists $self->{'DNAMutation'}) { | |
382 return (undef); | |
383 } else { | |
384 return $self->{'DNAMutation'}; | |
385 } | |
386 } | |
387 | |
388 | |
389 =head2 AAChange | |
390 | |
391 Title : AAChange | |
392 Usage : $mutobj = $obj->AAChange; | |
393 : $mutobj = $obj->AAChange($objref); | |
394 Function: Returns or sets the link-reference to a mutation/change object. | |
395 If there is no link, it will return undef | |
396 Returns : an obj_ref or undef | |
397 | |
398 =cut | |
399 | |
400 sub AAChange { | |
401 my ($self,$value) = @_; | |
402 if (defined $value) { | |
403 if( ! $value->isa('Bio::Variation::AAChange') ) { | |
404 $self->throw("Is not a Bio::Variation::AAChange object but a [$self]"); | |
405 return (undef); | |
406 } | |
407 else { | |
408 $self->{'AAChange'} = $value; | |
409 } | |
410 } | |
411 unless (exists $self->{'AAChange'}) { | |
412 return (undef); | |
413 } else { | |
414 return $self->{'AAChange'}; | |
415 } | |
416 } | |
417 | |
418 | |
419 =head2 exons_modified | |
420 | |
421 Title : exons_modified | |
422 Usage : $modified = $obj->exons_modified; | |
423 : $modified = $obj->exons_modified(1); | |
424 Function: Returns or sets information (example: a simple boolean flag) about | |
425 the modification of exons as a result of a mutation. | |
426 | |
427 =cut | |
428 | |
429 sub exons_modified { | |
430 my ($self,$value)=@_; | |
431 if (defined($value)) { | |
432 $self->{'exons_modified'}=$value; | |
433 } | |
434 return ($self->{'exons_modified'}); | |
435 } | |
436 | |
437 =head2 region | |
438 | |
439 Title : region | |
440 Usage : $obj->region(); | |
441 Function: | |
442 | |
443 Sets and returns the name of the sequence region type or | |
444 protein domain at this location. If value is not set, | |
445 returns false. | |
446 | |
447 Example : | |
448 Returns : string | |
449 Args : string | |
450 | |
451 =cut | |
452 | |
453 | |
454 | |
455 sub region { | |
456 my ($self,$value) = @_; | |
457 if( defined $value) { | |
458 $self->{'region'} = $value; | |
459 } | |
460 elsif (not defined $self->{'region'}) { | |
461 | |
462 $self->warn('Mutation start position is not defined') | |
463 if not defined $self->start and $self->verbose; | |
464 $self->warn('Mutation end position is not defined') | |
465 if not defined $self->end and $self->verbose; | |
466 $self->warn('Length of the CDS is not defined, the mutation can be beyond coding region!') | |
467 if not defined $self->cds_end and $self->verbose; | |
468 | |
469 $self->region('coding'); | |
470 if ($self->end && $self->end < 0 ){ | |
471 $self->region('5\'UTR'); | |
472 } | |
473 elsif ($self->start && $self->cds_end && $self->start > $self->cds_end ) { | |
474 $self->region('3\'UTR'); | |
475 } | |
476 } | |
477 return $self->{'region'}; | |
478 } | |
479 | |
480 =head2 cds_end | |
481 | |
482 Title : cds_end | |
483 Usage : $cds_end = $obj->get_cds_end(); | |
484 Function: | |
485 | |
486 Sets or returns the cds_end from the beginning of the DNA sequence | |
487 to the coordinate start used to describe variants. | |
488 Should be the location of the last nucleotide of the | |
489 terminator codon of the gene. | |
490 | |
491 Example : | |
492 Returns : value of cds_end, a scalar | |
493 Args : | |
494 | |
495 =cut | |
496 | |
497 | |
498 | |
499 sub cds_end { | |
500 my ($self, $value) = @_; | |
501 if (defined $value) { | |
502 $self->warn("[$value] is not a good value for sequence position") | |
503 if not $value =~ /^\d+$/ ; | |
504 $self->{'cds_end'} = $value; | |
505 } else { | |
506 $self->{'cds_end'} = $self->SeqDiff->cds_end if $self->SeqDiff; | |
507 } | |
508 return $self->{'cds_end'}; | |
509 } | |
510 | |
511 | |
512 =head2 label | |
513 | |
514 Title : label | |
515 Usage : $obj->label(); | |
516 Function: | |
517 | |
518 Sets and returns mutation event label(s). If value is not | |
519 set, or no argument is given returns false. Each | |
520 instantiable subclass of L<Bio::Variation::VariantI> needs | |
521 to implement this method. Valid values are listed in | |
522 'Mutation event controlled vocabulary' in | |
523 http://www.ebi.ac.uk/mutations/recommendations/mutevent.html. | |
524 | |
525 Example : | |
526 Returns : string | |
527 Args : string | |
528 | |
529 =cut | |
530 | |
531 sub label { | |
532 my ($self) = @_; | |
533 my ($o, $m, $type); | |
534 $o = $self->allele_ori->seq if $self->allele_ori and $self->allele_ori->seq; | |
535 $m = $self->allele_mut->seq if $self->allele_mut and $self->allele_mut->seq; | |
536 | |
537 my $ct = Bio::Tools::CodonTable -> new ( -id => $self->codon_table ); | |
538 if ($o and $m and CORE::length($o) == 1 and CORE::length($m) == 1) { | |
539 if (defined $self->AAChange) { | |
540 if ($self->start > 0 and $self->start < 4 ) { | |
541 $type = 'initiation codon'; | |
542 } | |
543 elsif ($self->codon_ori && $ct->is_ter_codon($self->codon_ori) ) { | |
544 #AAChange->allele_ori and $self->AAChange->allele_ori->seq eq '*' ) { | |
545 $type = 'termination codon'; | |
546 } | |
547 elsif ($self->codon_mut && $ct->is_ter_codon($self->codon_mut) ) { | |
548 #elsif ($self->AAChange->allele_mut and $self->AAChange->allele_mut->seq eq "*") { | |
549 $type = 'nonsense'; | |
550 } | |
551 elsif ($o and $m and ($o eq $m or | |
552 $self->AAChange->allele_ori->seq eq | |
553 $self->AAChange->allele_mut->seq)) { | |
554 $type = 'silent'; | |
555 } else { | |
556 $type = 'missense'; | |
557 } | |
558 } else { | |
559 $type = 'unknown'; | |
560 } | |
561 } else { | |
562 my $len = 0; | |
563 $len = CORE::length($o) if $o; | |
564 $len -= CORE::length($m) if $m; | |
565 if ($len%3 == 0 ) { | |
566 $type = 'inframe'; | |
567 } else { | |
568 $type = 'frameshift'; | |
569 } | |
570 if (not $m ) { | |
571 $type .= ', '. 'deletion'; | |
572 } | |
573 elsif (not $o ) { | |
574 $type .= ', '. 'insertion'; | |
575 } | |
576 else { | |
577 $type .= ', '. 'complex'; | |
578 } | |
579 if ($self->codon_ori && $ct->is_ter_codon($self->codon_ori) ) { | |
580 $type .= ', '. 'termination codon'; | |
581 } | |
582 } | |
583 | |
584 $self->{'label'} = $type; | |
585 return $self->{'label'}; | |
586 } | |
587 | |
588 | |
589 =head2 _change_codon_pos | |
590 | |
591 Title : _change_codon_pos | |
592 Usage : $newCodonPos = _change_codon_pos($myCodonPos, 5) | |
593 Function: | |
594 | |
595 Keeps track of the codon position in a changeing sequence | |
596 | |
597 Returns : codon_pos = integer 1, 2 or 3 | |
598 Args : valid codon position | |
599 signed integer offset to a new location in sequence | |
600 | |
601 =cut | |
602 | |
603 | |
604 sub _change_codon_pos ($$) { | |
605 my ($cpos, $i) = @_; | |
606 | |
607 $cpos = ($cpos + $i%3)%3; | |
608 if ($cpos > 3 ) { | |
609 $cpos = $cpos - 3; | |
610 } | |
611 elsif ($cpos < 1 ) { | |
612 $cpos = $cpos + 3; | |
613 } | |
614 return $cpos; | |
615 } | |
616 | |
617 1; |