Mercurial > repos > willmclaren > ensembl_vep
comparison variant_effect_predictor/Bio/LiveSeq/Mutator.pm @ 0:21066c0abaf5 draft
Uploaded
author | willmclaren |
---|---|
date | Fri, 03 Aug 2012 10:04:48 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:21066c0abaf5 |
---|---|
1 # $Id: Mutator.pm,v 1.26 2002/10/22 07:38:34 lapp Exp $ | |
2 # | |
3 # bioperl module for Bio::LiveSeq::Mutator | |
4 # | |
5 # Cared for by Heikki Lehvaslaiho <heikki@ebi.ac.uk> | |
6 # | |
7 # Copyright Joseph Insana | |
8 # | |
9 # You may distribute this module under the same terms as perl itself | |
10 # | |
11 # POD documentation - main docs before the code | |
12 | |
13 =head1 NAME | |
14 | |
15 Bio::LiveSeq::Mutator - Package mutating LiveSequences | |
16 | |
17 =head1 SYNOPSIS | |
18 | |
19 # $gene is a Bio::LiveSeq::Gene object | |
20 my $mutate = Bio::LiveSeq::Mutator->new('-gene' => $gene, | |
21 '-numbering' => "coding" | |
22 ); | |
23 # $mut is a Bio::LiveSeq::Mutation object | |
24 $mutate->add_Mutation($mut); | |
25 # $results is a Bio::Variation::SeqDiff object | |
26 my $results=$mutate->change_gene(); | |
27 if ($results) { | |
28 my $out = Bio::Variation::IO->new( '-format' => 'flat'); | |
29 $out->write($results); | |
30 } | |
31 | |
32 =head1 DESCRIPTION | |
33 | |
34 This class mutates Bio::LiveSeq::Gene objects and returns a | |
35 Bio::Variation::SeqDiff object. Mutations are described as | |
36 Bio::LiveSeq::Mutation objects. See L<Bio::LiveSeq::Gene>, | |
37 L<Bio::Variation::SeqDiff>, and L<Bio::LiveSeq::Mutation> for details. | |
38 | |
39 =head1 FEEDBACK | |
40 | |
41 | |
42 User feedback is an integral part of the evolution of this and other | |
43 Bioperl modules. Send your comments and suggestions preferably to the | |
44 Bioperl mailing lists Your participation is much appreciated. | |
45 | |
46 bioperl-l@bioperl.org - General discussion | |
47 http://bio.perl.org/MailList.html - About the mailing lists | |
48 | |
49 =head2 Reporting Bugs | |
50 | |
51 report bugs to the Bioperl bug tracking system to help us keep track | |
52 the bugs and their resolution. Bug reports can be submitted via | |
53 email or the web: | |
54 | |
55 bioperl-bugs@bio.perl.org | |
56 http://bugzilla.bioperl.org/ | |
57 | |
58 =head1 AUTHOR - Heikki Lehvaslaiho & Joseph A.L. Insana | |
59 | |
60 Email: heikki@ebi.ac.uk | |
61 insana@ebi.ac.uk, jinsana@gmx.net | |
62 | |
63 Address: | |
64 | |
65 EMBL Outstation, European Bioinformatics Institute | |
66 Wellcome Trust Genome Campus, Hinxton | |
67 Cambs. CB10 1SD, United Kingdom | |
68 | |
69 =head1 APPENDIX | |
70 | |
71 The rest of the documentation details each of the object | |
72 methods. Internal methods are usually preceded with a _ | |
73 | |
74 =cut | |
75 | |
76 # Let the code begin... | |
77 | |
78 package Bio::LiveSeq::Mutator; | |
79 use vars qw(@ISA); | |
80 use strict; | |
81 | |
82 use vars qw($VERSION @ISA); | |
83 use Bio::Variation::SeqDiff; | |
84 use Bio::Variation::DNAMutation; | |
85 use Bio::Variation::RNAChange; | |
86 use Bio::Variation::AAChange; | |
87 use Bio::Variation::Allele; | |
88 use Bio::LiveSeq::Mutation; | |
89 | |
90 #use integer; | |
91 # Object preamble - inheritance | |
92 | |
93 use Bio::Root::Root; | |
94 | |
95 @ISA = qw( Bio::Root::Root ); | |
96 | |
97 sub new { | |
98 my($class,@args) = @_; | |
99 my $self; | |
100 $self = {}; | |
101 bless $self, $class; | |
102 | |
103 my ($gene, $numbering) = | |
104 $self->_rearrange([qw(GENE | |
105 NUMBERING | |
106 )], | |
107 @args); | |
108 | |
109 $self->{ 'mutations' } = []; | |
110 | |
111 $gene && $self->gene($gene); | |
112 $numbering && $self->numbering($numbering); | |
113 | |
114 #class constant; | |
115 $self->{'flanklen'} = 25; | |
116 return $self; # success - we hope! | |
117 } | |
118 | |
119 =head2 gene | |
120 | |
121 Title : gene | |
122 Usage : $mutobj = $obj->gene; | |
123 : $mutobj = $obj->gene($objref); | |
124 Function: | |
125 | |
126 Returns or sets the link-reference to a | |
127 Bio::LiveSeq::Gene object. If no value has ben set, it | |
128 will return undef | |
129 | |
130 Returns : an object reference or undef | |
131 Args : a Bio::LiveSeq::Gene | |
132 | |
133 See L<Bio::LiveSeq::Gene> for more information. | |
134 | |
135 =cut | |
136 | |
137 sub gene { | |
138 my ($self,$value) = @_; | |
139 if (defined $value) { | |
140 if( ! $value->isa('Bio::LiveSeq::Gene') ) { | |
141 $self->throw("Is not a Bio::LiveSeq::Gene object but a [$value]"); | |
142 return undef; | |
143 } | |
144 else { | |
145 $self->{'gene'} = $value; | |
146 } | |
147 } | |
148 unless (exists $self->{'gene'}) { | |
149 return (undef); | |
150 } else { | |
151 return $self->{'gene'}; | |
152 } | |
153 } | |
154 | |
155 | |
156 =head2 numbering | |
157 | |
158 Title : numbering | |
159 Usage : $obj->numbering(); | |
160 Function: | |
161 | |
162 Sets and returns coordinate system used in positioning the | |
163 mutations. See L<change_gene> for details. | |
164 | |
165 Example : | |
166 Returns : string | |
167 Args : string (coding [transcript number] | gene | entry) | |
168 | |
169 =cut | |
170 | |
171 | |
172 sub numbering { | |
173 my ($self,$value) = @_; | |
174 if( defined $value) { | |
175 if ($value =~ /(coding)( )?(\d+)?/ or $value eq 'entry' or $value eq 'gene') { | |
176 $self->{'numbering'} = $value; | |
177 } else { # defaulting to 'coding' | |
178 $self->{'numbering'} = 'coding'; | |
179 } | |
180 } | |
181 unless (exists $self->{'numbering'}) { | |
182 return 'coding'; | |
183 } else { | |
184 return $self->{'numbering'}; | |
185 } | |
186 } | |
187 | |
188 =head2 add_Mutation | |
189 | |
190 Title : add_Mutation | |
191 Usage : $self->add_Mutation($ref) | |
192 Function: adds a Bio::LiveSeq::Mutation object | |
193 Example : | |
194 Returns : | |
195 Args : a Bio::LiveSeq::Mutation | |
196 | |
197 See L<Bio::LiveSeq::Mutation> for more information. | |
198 | |
199 =cut | |
200 | |
201 sub add_Mutation{ | |
202 my ($self,$value) = @_; | |
203 if( $value->isa('Bio::Liveseq::Mutation') ) { | |
204 my $com = ref $value; | |
205 $self->throw("Is not a Mutation object but a [$com]" ); | |
206 return undef; | |
207 } | |
208 if (! $value->pos) { | |
209 $self->warn("No value for mutation position in the sequence!"); | |
210 return undef; | |
211 } | |
212 if (! $value->seq && ! $value->len) { | |
213 $self->warn("Either mutated sequence or length of the deletion must be given!"); | |
214 return undef; | |
215 } | |
216 push(@{$self->{'mutations'}},$value); | |
217 } | |
218 | |
219 =head2 each_Mutation | |
220 | |
221 Title : each_Mutation | |
222 Usage : foreach $ref ( $a->each_Mutation ) | |
223 Function: gets an array of Bio::LiveSeq::Mutation objects | |
224 Example : | |
225 Returns : array of Mutations | |
226 Args : | |
227 | |
228 See L<Bio::LiveSeq::Mutation> for more information. | |
229 | |
230 =cut | |
231 | |
232 sub each_Mutation{ | |
233 my ($self) = @_; | |
234 return @{$self->{'mutations'}}; | |
235 } | |
236 | |
237 | |
238 =head2 mutation | |
239 | |
240 Title : mutation | |
241 Usage : $mutobj = $obj->mutation; | |
242 : $mutobj = $obj->mutation($objref); | |
243 Function: | |
244 | |
245 Returns or sets the link-reference to the current mutation | |
246 object. If the value is not set, it will return undef. | |
247 Internal method. | |
248 | |
249 Returns : an object reference or undef | |
250 | |
251 =cut | |
252 | |
253 | |
254 sub mutation { | |
255 my ($self,$value) = @_; | |
256 if (defined $value) { | |
257 if( ! $value->isa('Bio::LiveSeq::Mutation') ) { | |
258 $self->throw("Is not a Bio::LiveSeq::Mutation object but a [$value]"); | |
259 return undef; | |
260 } | |
261 else { | |
262 $self->{'mutation'} = $value; | |
263 } | |
264 } | |
265 unless (exists $self->{'mutation'}) { | |
266 return (undef); | |
267 } else { | |
268 return $self->{'mutation'}; | |
269 } | |
270 } | |
271 | |
272 =head2 DNA | |
273 | |
274 Title : DNA | |
275 Usage : $mutobj = $obj->DNA; | |
276 : $mutobj = $obj->DNA($objref); | |
277 Function: | |
278 | |
279 Returns or sets the reference to the LiveSeq object holding | |
280 the reference sequence. If there is no link, it will return | |
281 undef. | |
282 Internal method. | |
283 | |
284 Returns : an object reference or undef | |
285 | |
286 =cut | |
287 | |
288 sub DNA { | |
289 my ($self,$value) = @_; | |
290 if (defined $value) { | |
291 if( ! $value->isa('Bio::LiveSeq::DNA') and ! $value->isa('Bio::LiveSeq::Transcript') ) { | |
292 $self->throw("Is not a Bio::LiveSeq::DNA/Transcript object but a [$value]"); | |
293 return undef; | |
294 } | |
295 else { | |
296 $self->{'DNA'} = $value; | |
297 } | |
298 } | |
299 unless (exists $self->{'DNA'}) { | |
300 return (undef); | |
301 } else { | |
302 return $self->{'DNA'}; | |
303 } | |
304 } | |
305 | |
306 | |
307 =head2 RNA | |
308 | |
309 Title : RNA | |
310 Usage : $mutobj = $obj->RNA; | |
311 : $mutobj = $obj->RNA($objref); | |
312 Function: | |
313 | |
314 Returns or sets the reference to the LiveSeq object holding | |
315 the reference sequence. If the value is not set, it will return | |
316 undef. | |
317 Internal method. | |
318 | |
319 Returns : an object reference or undef | |
320 | |
321 =cut | |
322 | |
323 | |
324 sub RNA { | |
325 my ($self,$value) = @_; | |
326 if (defined $value) { | |
327 if( ! $value->isa('Bio::LiveSeq::Transcript') ) { | |
328 $self->throw("Is not a Bio::LiveSeq::RNA/Transcript object but a [$value]"); | |
329 return undef; | |
330 } | |
331 else { | |
332 $self->{'RNA'} = $value; | |
333 } | |
334 } | |
335 unless (exists $self->{'RNA'}) { | |
336 return (undef); | |
337 } else { | |
338 return $self->{'RNA'}; | |
339 } | |
340 } | |
341 | |
342 | |
343 =head2 dnamut | |
344 | |
345 Title : dnamut | |
346 Usage : $mutobj = $obj->dnamut; | |
347 : $mutobj = $obj->dnamut($objref); | |
348 Function: | |
349 | |
350 Returns or sets the reference to the current DNAMutation object. | |
351 If the value is not set, it will return undef. | |
352 Internal method. | |
353 | |
354 Returns : a Bio::Variation::DNAMutation object or undef | |
355 | |
356 See L<Bio::Variation::DNAMutation> for more information. | |
357 | |
358 =cut | |
359 | |
360 | |
361 sub dnamut { | |
362 my ($self,$value) = @_; | |
363 if (defined $value) { | |
364 if( ! $value->isa('Bio::Variation::DNAMutation') ) { | |
365 $self->throw("Is not a Bio::Variation::DNAMutation object but a [$value]"); | |
366 return undef; | |
367 } | |
368 else { | |
369 $self->{'dnamut'} = $value; | |
370 } | |
371 } | |
372 unless (exists $self->{'dnamut'}) { | |
373 return (undef); | |
374 } else { | |
375 return $self->{'dnamut'}; | |
376 } | |
377 } | |
378 | |
379 | |
380 =head2 rnachange | |
381 | |
382 Title : rnachange | |
383 Usage : $mutobj = $obj->rnachange; | |
384 : $mutobj = $obj->rnachange($objref); | |
385 Function: | |
386 | |
387 Returns or sets the reference to the current RNAChange object. | |
388 If the value is not set, it will return undef. | |
389 Internal method. | |
390 | |
391 Returns : a Bio::Variation::RNAChange object or undef | |
392 | |
393 See L<Bio::Variation::RNAChange> for more information. | |
394 | |
395 =cut | |
396 | |
397 | |
398 sub rnachange { | |
399 my ($self,$value) = @_; | |
400 if (defined $value) { | |
401 if( ! $value->isa('Bio::Variation::RNAChange') ) { | |
402 $self->throw("Is not a Bio::Variation::RNAChange object but a [$value]"); | |
403 return undef; | |
404 } | |
405 else { | |
406 $self->{'rnachange'} = $value; | |
407 } | |
408 } | |
409 unless (exists $self->{'rnachange'}) { | |
410 return (undef); | |
411 } else { | |
412 return $self->{'rnachange'}; | |
413 } | |
414 } | |
415 | |
416 | |
417 =head2 aachange | |
418 | |
419 Title : aachange | |
420 Usage : $mutobj = $obj->aachange; | |
421 : $mutobj = $obj->aachange($objref); | |
422 Function: | |
423 | |
424 Returns or sets the reference to the current AAChange object. | |
425 If the value is not set, it will return undef. | |
426 Internal method. | |
427 | |
428 Returns : a Bio::Variation::AAChange object or undef | |
429 | |
430 See L<Bio::Variation::AAChange> for more information. | |
431 | |
432 =cut | |
433 | |
434 | |
435 sub aachange { | |
436 my ($self,$value) = @_; | |
437 if (defined $value) { | |
438 if( ! $value->isa('Bio::Variation::AAChange') ) { | |
439 $self->throw("Is not a Bio::Variation::AAChange object but a [$value]"); | |
440 return undef; | |
441 } | |
442 else { | |
443 $self->{'aachange'} = $value; | |
444 } | |
445 } | |
446 unless (exists $self->{'aachange'}) { | |
447 return (undef); | |
448 } else { | |
449 return $self->{'aachange'}; | |
450 } | |
451 } | |
452 | |
453 | |
454 =head2 exons | |
455 | |
456 Title : exons | |
457 Usage : $mutobj = $obj->exons; | |
458 : $mutobj = $obj->exons($objref); | |
459 Function: | |
460 | |
461 Returns or sets the reference to a current array of Exons. | |
462 If the value is not set, it will return undef. | |
463 Internal method. | |
464 | |
465 Returns : an array of Bio::LiveSeq::Exon objects or undef | |
466 | |
467 See L<Bio::LiveSeq::Exon> for more information. | |
468 | |
469 =cut | |
470 | |
471 | |
472 sub exons { | |
473 my ($self,$value) = @_; | |
474 if (defined $value) { | |
475 $self->{'exons'} = $value; | |
476 } | |
477 unless (exists $self->{'exons'}) { | |
478 return (undef); | |
479 } else { | |
480 return $self->{'exons'}; | |
481 } | |
482 } | |
483 | |
484 =head2 change_gene_with_alignment | |
485 | |
486 Title : change_gene_with_alignment | |
487 Usage : $results=$mutate->change_gene_with_alignment($aln); | |
488 | |
489 Function: | |
490 | |
491 Returns a Bio::Variation::SeqDiff object containing the | |
492 results of the changes in the alignment. The alignment has | |
493 to be pairwise and have one sequence named 'QUERY', the | |
494 other one is assumed to be a part of the sequence from | |
495 $gene. | |
496 | |
497 This method offers a shortcut to change_gene and | |
498 automates the creation of Bio::LiveSeq::Mutation objects. | |
499 Use it with almost identical sequnces, e.g. to locate a SNP. | |
500 | |
501 Args : Bio::SimpleAlign object representing a short local alignment | |
502 Returns : Bio::Variation::SeqDiff object or 0 on error | |
503 | |
504 See L<Bio::LiveSeq::Mutation>, L<Bio::SimpleAlign>, and | |
505 L<Bio::Variation::SeqDiff> for more information. | |
506 | |
507 =cut | |
508 | |
509 sub change_gene_with_alignment { | |
510 my ($self, $aln) = @_; | |
511 | |
512 # | |
513 # Sanity checks | |
514 # | |
515 | |
516 $self->throw("Argument is not a Bio::SimpleAlign object but a [$aln]") | |
517 unless $aln->isa('Bio::SimpleAlign'); | |
518 $self->throw("'Pairwise alignments only, please") | |
519 if $aln->no_sequences != 2; | |
520 | |
521 # find out the order the two sequences are given | |
522 my $queryseq_pos = 1; #default | |
523 my $refseq_pos = 2; | |
524 unless ($aln->get_seq_by_pos(1)->id eq 'QUERY') { | |
525 carp('Query sequence has to be named QUERY') | |
526 if $aln->get_seq_by_pos(2)->id ne 'QUERY'; | |
527 $queryseq_pos = 2; # alternative | |
528 $refseq_pos = 1; | |
529 } | |
530 | |
531 # trim the alignment | |
532 my $start = $aln->column_from_residue_number('QUERY', 1); | |
533 my $end = $aln->column_from_residue_number('QUERY', | |
534 $aln->get_seq_by_pos($queryseq_pos)->end ); | |
535 | |
536 my $aln2 = $aln->slice($start, $end); | |
537 | |
538 # | |
539 # extracting mutations | |
540 # | |
541 | |
542 my $cs = $aln2->consensus_string(51); | |
543 my $queryseq = $aln2->get_seq_by_pos($queryseq_pos); | |
544 my $refseq = $aln2->get_seq_by_pos($refseq_pos); | |
545 | |
546 while ( $cs =~ /(\?+)/g) { | |
547 # pos in local coordinates | |
548 my $pos = pos($cs) - length($1) + 1; | |
549 my $mutation = create_mutation($self, | |
550 $refseq, | |
551 $queryseq, | |
552 $pos, | |
553 CORE::length($1) | |
554 ); | |
555 # reset pos to refseq coordinates | |
556 $pos += $refseq->start - 1; | |
557 $mutation->pos($pos); | |
558 | |
559 $self->add_Mutation($mutation); | |
560 } | |
561 return $self->change_gene(); | |
562 } | |
563 | |
564 =head2 create_mutation | |
565 | |
566 Title : create_mutation | |
567 Usage : | |
568 Function: | |
569 | |
570 Formats sequence differences from two sequences into | |
571 Bio::LiveSeq::Mutation objects which can be applied to a | |
572 gene. | |
573 | |
574 To keep it generic, sequence arguments need not to be | |
575 Bio::LocatableSeq. Coordinate change to parent sequence | |
576 numbering needs to be done by the calling code. | |
577 | |
578 Called from change_gene_with_alignment | |
579 | |
580 Args : Bio::PrimarySeqI inheriting object for the reference sequence | |
581 Bio::PrimarySeqI inheriting object for the query sequence | |
582 integer for the start position of the local sequence difference | |
583 integer for the length of the sequence difference | |
584 Returns : Bio::LiveSeq::Mutation object | |
585 | |
586 =cut | |
587 | |
588 sub create_mutation { | |
589 my ($self, $refseq, $queryseq, $pos, $len) = @_; | |
590 | |
591 $self->throw("Is not a Bio::PrimarySeqI object but a [$refseq]") | |
592 unless $refseq->isa('Bio::PrimarySeqI'); | |
593 $self->throw("Is not a Bio::PrimarySeqI object but a [$queryseq]") | |
594 unless $queryseq->isa('Bio::PrimarySeqI'); | |
595 $self->throw("Position is not a positive integer but [$pos]") | |
596 unless $pos =~ /^\+?\d+$/; | |
597 $self->throw("Length is not a positive integer but [$len]") | |
598 unless $len =~ /^\+?\d+$/; | |
599 | |
600 my $mutation; | |
601 my $refstring = $refseq->subseq($pos, $pos + $len - 1); | |
602 my $varstring = $queryseq->subseq($pos, $pos + $len - 1); | |
603 | |
604 if ($len == 1 and $refstring =~ /[^\.\-\*\?]/ and | |
605 $varstring =~ /[^\.\-\*\?]/ ) { #point | |
606 $mutation = new Bio::LiveSeq::Mutation (-seq => $varstring, | |
607 -pos => $pos, | |
608 ); | |
609 } | |
610 elsif ( $refstring =~ /^[^\.\-\*\?]+$/ and | |
611 $varstring !~ /^[^\.\-\*\?]+$/ ) { # deletion | |
612 $mutation = new Bio::LiveSeq::Mutation (-pos => $pos, | |
613 -len => $len | |
614 ); | |
615 } | |
616 elsif ( $refstring !~ /^[^\.\-\*\?]+$/ and | |
617 $varstring =~ /^[^\.\-\*\?]+$/ ) { # insertion | |
618 $mutation = new Bio::LiveSeq::Mutation (-seq => $varstring, | |
619 -pos => $pos, | |
620 -len => 0 | |
621 ); | |
622 } else { # complex | |
623 $mutation = new Bio::LiveSeq::Mutation (-seq => $varstring, | |
624 -pos => $pos, | |
625 -len => $len | |
626 ); | |
627 } | |
628 | |
629 return $mutation; | |
630 } | |
631 | |
632 =head2 change_gene | |
633 | |
634 Title : change_gene | |
635 Usage : my $mutate = Bio::LiveSeq::Mutator->new(-gene => $gene, | |
636 numbering => "coding" | |
637 ); | |
638 # $mut is Bio::LiveSeq::Mutation object | |
639 $mutate->add_Mutation($mut); | |
640 my $results=$mutate->change_gene(); | |
641 | |
642 Function: | |
643 | |
644 Returns a Bio::Variation::SeqDiff object containing the | |
645 results of the changes performed according to the | |
646 instructions present in Mutation(s). The -numbering | |
647 argument decides what molecule is being changed and what | |
648 numbering scheme being used: | |
649 | |
650 -numbering => "entry" | |
651 | |
652 determines the DNA level, using the numbering from the | |
653 beginning of the sequence | |
654 | |
655 -numbering => "coding" | |
656 | |
657 determines the RNA level, using the numbering from the | |
658 beginning of the 1st transcript | |
659 | |
660 Alternative transcripts can be used by specifying | |
661 "coding 2" or "coding 3" ... | |
662 | |
663 -numbering => "gene" | |
664 | |
665 determines the DNA level, using the numbering from the | |
666 beginning of the 1st transcript and inluding introns. | |
667 The meaning equals 'coding' if the reference molecule | |
668 is cDNA. | |
669 | |
670 Args : Bio::LiveSeq::Gene object | |
671 Bio::LiveSeq::Mutation object(s) | |
672 string specifying a numbering scheme (defaults to 'coding') | |
673 Returns : Bio::Variation::SeqDiff object or 0 on error | |
674 | |
675 =cut | |
676 | |
677 sub change_gene { | |
678 my ($self) = @_; | |
679 | |
680 # | |
681 # Sanity check | |
682 # | |
683 unless ($self->gene) { | |
684 $self->warn("Input object Bio::LiveSeq::Gene is not given"); | |
685 return 0; | |
686 } | |
687 # | |
688 # Setting the reference sequence based on -numbering | |
689 # | |
690 my @transcripts=@{$self->gene->get_Transcripts}; | |
691 my $refseq; # will hold Bio::LiveSeq:Transcript object or Bio::LiveSeq::DNA | |
692 | |
693 # 'gene' eq 'coding' if reference sequence is cDNA | |
694 $self->numbering ('coding') if $self->gene->get_DNA->alphabet eq 'rna' and $self->numbering eq 'gene'; | |
695 | |
696 if ($self->numbering =~ /(coding)( )?(\d+)?/ ) { | |
697 $self->numbering($1); | |
698 my $transnumber = $3; | |
699 $transnumber-- if $3; # 1 -> 0, 2 -> 1 | |
700 if ($transnumber && $transnumber >= 0 && $transnumber <= $#transcripts) { | |
701 $refseq=$transcripts[$transnumber]; | |
702 } else { | |
703 $transnumber && $self->warn("The alternative transcript number ". $transnumber+1 . | |
704 "- does not exist. Reverting to the 1st transcript\n"); | |
705 $refseq=$transcripts[0]; | |
706 } | |
707 } else { | |
708 $refseq=$transcripts[0]->{'seq'}; | |
709 } | |
710 # | |
711 # Recording the state: SeqDiff object creation ?? transcript no.?? | |
712 # | |
713 my $seqDiff = Bio::Variation::SeqDiff->new(); | |
714 $seqDiff->alphabet($self->gene->get_DNA->alphabet); | |
715 $seqDiff->numbering($self->numbering); | |
716 my ($DNAobj, $RNAobj); | |
717 if ($refseq->isa("Bio::LiveSeq::Transcript")) { | |
718 $self->RNA($refseq); | |
719 $self->DNA($refseq->{'seq'}); | |
720 $seqDiff->rna_ori($refseq->seq ); | |
721 $seqDiff->aa_ori($refseq->get_Translation->seq); | |
722 } else { | |
723 $self->DNA($refseq); | |
724 $self->RNA($transcripts[0]); | |
725 $seqDiff->rna_ori($self->RNA->seq); | |
726 $seqDiff->aa_ori($self->RNA->get_Translation->seq); | |
727 } | |
728 $seqDiff->dna_ori($self->DNA->seq); | |
729 # put the accession number into the SeqDiff object ID | |
730 $seqDiff->id($self->DNA->accession_number); | |
731 | |
732 # the atg_offset takes in account that DNA object could be a subset of the | |
733 # whole entry (via the light_weight loader) | |
734 my $atg_label=$self->RNA->start; | |
735 my $atg_offset=$self->DNA->position($atg_label)+($self->DNA->start)-1; | |
736 $seqDiff->offset($atg_offset - 1); | |
737 $self->DNA->coordinate_start($atg_label); | |
738 | |
739 my @exons = $self->RNA->all_Exons; | |
740 $seqDiff->cds_end($exons[$#exons]->end); | |
741 | |
742 # | |
743 # Converting mutation positions to labels | |
744 # | |
745 $self->warn("no mutations"), return 0 | |
746 unless $self->_mutationpos2label($refseq, $seqDiff); | |
747 | |
748 # need to add more than one rna & aa | |
749 #foreach $transcript (@transcripts) { | |
750 # $seqDiff{"ori_transcript_${i}_seq"}=$transcript->seq; | |
751 # $seqDiff{"ori_translation_${i}_seq"}=$transcript->get_Translation->seq; | |
752 #} | |
753 | |
754 # do changes | |
755 my $k; | |
756 foreach my $mutation ($self->each_Mutation) { | |
757 next unless $mutation->label > 0; | |
758 $self->mutation($mutation); | |
759 | |
760 $mutation->issue(++$k); | |
761 # | |
762 # current position on the transcript | |
763 # | |
764 if ($self->numbering =~ /coding/) { | |
765 $mutation->transpos($mutation->pos); # transpos given by user | |
766 } else { | |
767 #transpos of label / It will be 0 if mutating an intron, negative if upstream of ATG | |
768 $mutation->transpos($self->RNA->position($mutation->label,$atg_label)); | |
769 } | |
770 # | |
771 # Calculate adjacent labels based on the position on the current sequence | |
772 # | |
773 $mutation->prelabel($self->DNA->label(-1, $mutation->label)); # 1 before label | |
774 if ($mutation->len == 0) { | |
775 $mutation->postlabel($mutation->label); | |
776 $mutation->lastlabel($mutation->label); | |
777 } elsif ($mutation->len == 1) { | |
778 $mutation->lastlabel($mutation->label); # last nucleotide affected | |
779 $mutation->postlabel($self->DNA->label(2,$mutation->lastlabel)); # $len after label | |
780 } else { | |
781 $mutation->lastlabel($self->DNA->label($mutation->len,$mutation->label)); | |
782 $mutation->postlabel($self->DNA->label(2,$mutation->lastlabel)); | |
783 } | |
784 my $dnamut = $self->_set_DNAMutation($seqDiff); | |
785 # | |
786 # | |
787 # | |
788 if ($self->_rnaAffected) { | |
789 $self->_set_effects($seqDiff, $dnamut); | |
790 } | |
791 elsif ($seqDiff->offset != 0 and $dnamut->region ne 'intron') { | |
792 $self->_untranslated ($seqDiff, $dnamut); | |
793 } else { | |
794 #$self->warn("Mutation starts outside coding region, RNAChange object not created"); | |
795 } | |
796 | |
797 ######################################################################### | |
798 # Mutations are done here! # | |
799 $refseq->labelchange($mutation->seq, $mutation->label, $mutation->len); # | |
800 ######################################################################### | |
801 | |
802 $self->_post_mutation ($seqDiff); | |
803 | |
804 $self->dnamut(undef); | |
805 $self->rnachange(undef); | |
806 $self->aachange(undef); | |
807 $self->exons(undef); | |
808 } | |
809 # record the final state of all three sequences | |
810 $seqDiff->dna_mut($self->DNA->seq); | |
811 $seqDiff->rna_mut($self->RNA->seq); | |
812 if ($refseq->isa("Bio::LiveSeq::Transcript")) { | |
813 $seqDiff->aa_mut($refseq->get_Translation->seq); | |
814 } else { | |
815 $seqDiff->aa_mut($self->RNA->get_Translation->seq); | |
816 } | |
817 | |
818 #$seqDiff{mut_dna_seq}=$gene->get_DNA->seq; | |
819 #my $i=1; | |
820 #foreach $transcript (@transcripts) { | |
821 # $seqDiff{"mut_transcript_${i}_seq"}=$transcript->seq; | |
822 # $seqDiff{"mut_translation_${i}_seq"}=$transcript->get_Translation->seq; | |
823 #} | |
824 return $seqDiff; | |
825 } | |
826 | |
827 =head2 _mutationpos2label | |
828 | |
829 Title : _mutationpos2label | |
830 Usage : | |
831 Function: converts mutation positions into labels | |
832 Example : | |
833 Returns : number of valid mutations | |
834 Args : LiveSeq sequence object | |
835 | |
836 =cut | |
837 | |
838 sub _mutationpos2label { | |
839 my ($self, $refseq, $SeqDiff) = @_; | |
840 my $count; | |
841 my @bb = @{$self->{'mutations'}}; | |
842 my $cc = scalar @bb; | |
843 foreach my $mut (@{$self->{'mutations'}}) { | |
844 # if ($self->numbering eq 'gene' and $mut->pos < 1) { | |
845 # my $tmp = $mut->pos; | |
846 # print STDERR "pos: ", "$tmp\n"; | |
847 # $tmp++ if $tmp < 1; | |
848 # $tmp += $SeqDiff->offset; | |
849 # print STDERR "pos2: ", "$tmp\n"; | |
850 # $mut->pos($tmp); | |
851 # } | |
852 # elsif ($self->numbering eq 'entry') { | |
853 if ($self->numbering eq 'entry') { | |
854 my $tmp = $mut->pos; | |
855 $tmp -= $SeqDiff->offset; | |
856 $tmp-- if $tmp < 1; | |
857 $mut->pos($tmp); | |
858 } | |
859 | |
860 my $label = $refseq->label($mut->pos); # get the label for the position | |
861 $mut->label($label), $count++ if $label > 0 ; | |
862 #print STDERR "x", $mut->pos,'|' ,$mut->label, "\n"; | |
863 } | |
864 return $count; | |
865 } | |
866 | |
867 # | |
868 # Calculate labels around mutated nucleotide | |
869 # | |
870 | |
871 =head2 _set_DNAMutation | |
872 | |
873 Title : _set_DNAMutation | |
874 Usage : | |
875 Function: | |
876 | |
877 Stores DNA level mutation attributes before mutation into | |
878 Bio::Variation::DNAMutation object. Links it to SeqDiff | |
879 object. | |
880 | |
881 Example : | |
882 Returns : Bio::Variation::DNAMutation object | |
883 Args : Bio::Variation::SeqDiff object | |
884 | |
885 See L<Bio::Variation::DNAMutation> and L<Bio::Variation::SeqDiff>. | |
886 | |
887 =cut | |
888 | |
889 sub _set_DNAMutation { | |
890 my ($self, $seqDiff) = @_; | |
891 | |
892 my $dnamut_start = $self->mutation->label - $seqDiff->offset; | |
893 # if negative DNA positions (before ATG) | |
894 $dnamut_start-- if $dnamut_start <= 0; | |
895 my $dnamut_end; | |
896 ($self->mutation->len == 0 or $self->mutation->len == 1) ? | |
897 ($dnamut_end = $dnamut_start) : | |
898 ($dnamut_end = $dnamut_start+$self->mutation->len); | |
899 #print "start:$dnamut_start, end:$dnamut_end\n"; | |
900 my $dnamut = Bio::Variation::DNAMutation->new(-start => $dnamut_start, | |
901 -end => $dnamut_end, | |
902 ); | |
903 $dnamut->mut_number($self->mutation->issue); | |
904 $dnamut->isMutation(1); | |
905 my $da_m = Bio::Variation::Allele->new; | |
906 $da_m->seq($self->mutation->seq) if $self->mutation->seq; | |
907 $dnamut->allele_mut($da_m); | |
908 $dnamut->add_Allele($da_m); | |
909 # allele_ori | |
910 my $allele_ori = $self->DNA->labelsubseq($self->mutation->prelabel, | |
911 undef, | |
912 $self->mutation->postlabel); # get seq | |
913 chop $allele_ori; # chop the postlabel nucleotide | |
914 $allele_ori=substr($allele_ori,1); # away the prelabel nucleotide | |
915 my $da_o = Bio::Variation::Allele->new; | |
916 $da_o->seq($allele_ori) if $allele_ori; | |
917 $dnamut->allele_ori($da_o); | |
918 ($self->mutation->len == 0) ? | |
919 ($dnamut->length($self->mutation->len)) : ($dnamut->length(CORE::length $allele_ori)); | |
920 #print " --------------- $dnamut_start -$len- $dnamut_end -\n"; | |
921 $seqDiff->add_Variant($dnamut); | |
922 $self->dnamut($dnamut); | |
923 $dnamut->mut_number($self->mutation->issue); | |
924 # setting proof | |
925 if ($seqDiff->numbering eq "entry" or $seqDiff->numbering eq "gene") { | |
926 $dnamut->proof('experimental'); | |
927 } else { | |
928 $dnamut->proof('computed'); | |
929 } | |
930 # how many nucleotides to store upstream and downstream of the change | |
931 my $flanklen = $self->{'flanklen'}; | |
932 #print `date`, " flanking sequences start\n"; | |
933 my $uplabel = $self->DNA->label(1-$flanklen,$self->mutation->prelabel); # this could be unavailable! | |
934 | |
935 my $upstreamseq; | |
936 if ($uplabel > 0) { | |
937 $upstreamseq = | |
938 $self->DNA->labelsubseq($uplabel, undef, $self->mutation->prelabel); | |
939 } else { # from start (less than $flanklen nucleotides) | |
940 $upstreamseq = | |
941 $self->DNA->labelsubseq($self->DNA->start, undef, $self->mutation->prelabel); | |
942 } | |
943 $dnamut->upStreamSeq($upstreamseq); | |
944 my $dnstreamseq = $self->DNA->labelsubseq($self->mutation->postlabel, $flanklen); | |
945 $dnamut->dnStreamSeq($dnstreamseq); # $flanklen or less nucleotides | |
946 return $dnamut; | |
947 } | |
948 | |
949 | |
950 # | |
951 ### Check if mutation propagates to RNA (and AA) level | |
952 # | |
953 # side effect: sets intron/exon information | |
954 # returns a boolean value | |
955 # | |
956 | |
957 sub _rnaAffected { | |
958 my ($self) = @_; | |
959 my @exons=$self->RNA->all_Exons; | |
960 my $RNAstart=$self->RNA->start; | |
961 my $RNAend=$self->RNA->end; | |
962 my ($firstexon,$before,$after,$i); | |
963 my ($rnaAffected) = 0; | |
964 | |
965 # check for inserted labels (that require follows instead of <,>) | |
966 my $DNAend=$self->RNA->{'seq'}->end; | |
967 if ($self->mutation->prelabel > $DNAend or $self->mutation->postlabel > $DNAend) { | |
968 #this means one of the two labels is an inserted one | |
969 #(coming from a previous mutation. This would falsify all <,> | |
970 #checks, so the follow() has to be used | |
971 $self->warn("Attention, workaround not fully tested yet! Expect unpredictable results.\n"); | |
972 if (($self->mutation->postlabel==$RNAstart) or (follows($self->mutation->postlabel,$RNAstart))) { | |
973 $self->warn("RNA not affected because change occurs before RNAstart"); | |
974 } | |
975 elsif (($RNAend==$self->mutation->prelabel) or (follows($RNAend,$self->mutation->prelabel))) { | |
976 $self->warn("RNA not affected because change occurs after RNAend"); | |
977 } | |
978 elsif (scalar @exons == 1) { | |
979 #no introns, just one exon | |
980 $rnaAffected = 1; # then RNA is affected! | |
981 } else { | |
982 # otherwise check for change occurring inside an intron | |
983 $firstexon=shift(@exons); | |
984 $before=$firstexon->end; | |
985 | |
986 foreach $i (0..$#exons) { | |
987 $after=$exons[$i]->start; | |
988 if (follows($self->mutation->prelabel,$before) or | |
989 ($after==$self->mutation->prelabel) or | |
990 follows($after,$self->mutation->prelabel) or | |
991 follows($after,$self->mutation->postlabel)) { | |
992 | |
993 $rnaAffected = 1; | |
994 # $i is number of exon and can be used for proximity check | |
995 } | |
996 $before=$exons[$i]->end; | |
997 } | |
998 | |
999 } | |
1000 } else { | |
1001 my $strand = $exons[0]->strand; | |
1002 if (($strand == 1 and $self->mutation->postlabel <= $RNAstart) or | |
1003 ($strand != 1 and $self->mutation->postlabel >= $RNAstart)) { | |
1004 #$self->warn("RNA not affected because change occurs before RNAstart"); | |
1005 $rnaAffected = 0; | |
1006 } | |
1007 elsif (($strand == 1 and $self->mutation->prelabel >= $RNAend) or | |
1008 ($strand != 1 and $self->mutation->prelabel <= $RNAend)) { | |
1009 #$self->warn("RNA not affected because change occurs after RNAend"); | |
1010 $rnaAffected = 0; | |
1011 my $dist; | |
1012 if ($strand == 1){ | |
1013 $dist = $self->mutation->prelabel - $RNAend; | |
1014 } else { | |
1015 $dist = $RNAend - $self->mutation->prelabel; | |
1016 } | |
1017 $self->dnamut->region_dist($dist); | |
1018 } | |
1019 elsif (scalar @exons == 1) { | |
1020 #if just one exon -> no introns, | |
1021 $rnaAffected = 1; # then RNA is affected! | |
1022 } else { | |
1023 # otherwise check for mutation occurring inside an intron | |
1024 $firstexon=shift(@exons); | |
1025 $before=$firstexon->end; | |
1026 if ( ($strand == 1 and $self->mutation->prelabel < $before) or | |
1027 ($strand == -1 and $self->mutation->prelabel > $before) | |
1028 ) { | |
1029 $rnaAffected = 1 ; | |
1030 | |
1031 #print "Exon 1 : ", $firstexon->start, " - ", $firstexon->end, "<br>\n"; | |
1032 my $afterdist = $self->mutation->prelabel - $firstexon->start; | |
1033 my $beforedist = $firstexon->end - $self->mutation->postlabel; | |
1034 my $exonvalue = $i + 1; | |
1035 $self->dnamut->region('exon'); | |
1036 $self->dnamut->region_value($exonvalue); | |
1037 if ($afterdist < $beforedist) { | |
1038 $afterdist++; | |
1039 $afterdist++; | |
1040 $self->dnamut->region_dist($afterdist); | |
1041 #print "splice site $afterdist nt upstream!<br>"; | |
1042 } else { | |
1043 $self->dnamut->region_dist($beforedist); | |
1044 #print "splice site $beforedist nt downstream!<br>"; | |
1045 } | |
1046 } else { | |
1047 #print "first exon : ", $firstexon->start, " - ", $firstexon->end, "<br>\n"; | |
1048 foreach $i (0..$#exons) { | |
1049 $after=$exons[$i]->start; | |
1050 #proximity test for intronic mutations | |
1051 if ( ($strand == 1 and | |
1052 $self->mutation->prelabel >= $before and | |
1053 $self->mutation->postlabel <= $after) | |
1054 or | |
1055 ($strand == -1 and | |
1056 $self->mutation->prelabel <= $before and | |
1057 $self->mutation->postlabel >= $after) ) { | |
1058 $self->dnamut->region('intron'); | |
1059 #$self->dnamut->region_value($i); | |
1060 my $afterdist = $self->mutation->prelabel - $before; | |
1061 my $beforedist = $after - $self->mutation->postlabel; | |
1062 my $intronvalue = $i + 1; | |
1063 if ($afterdist < $beforedist) { | |
1064 $afterdist++; | |
1065 $self->dnamut->region_value($intronvalue); | |
1066 $self->dnamut->region_dist($afterdist); | |
1067 #print "splice site $afterdist nt upstream!<br>"; | |
1068 } else { | |
1069 $self->dnamut->region_value($intronvalue); | |
1070 $self->dnamut->region_dist($beforedist * -1); | |
1071 #print "splice site $beforedist nt downstream!<br>"; | |
1072 } | |
1073 $self->rnachange(undef); | |
1074 last; | |
1075 } | |
1076 #proximity test for exon mutations | |
1077 elsif ( ( $strand == 1 and | |
1078 $exons[$i]->start <= $self->mutation->prelabel and | |
1079 $exons[$i]->end >= $self->mutation->postlabel) or | |
1080 ( $strand == -1 and | |
1081 $exons[$i]->start >= $self->mutation->prelabel and | |
1082 $exons[$i]->end <= $self->mutation->postlabel) ) { | |
1083 $rnaAffected = 1; | |
1084 | |
1085 my $afterdist = $self->mutation->prelabel - $exons[$i]->start; | |
1086 my $beforedist = $exons[$i]->end - $self->mutation->postlabel; | |
1087 my $exonvalue = $i + 1; | |
1088 $self->dnamut->region('exon'); | |
1089 if ($afterdist < $beforedist) { | |
1090 $afterdist++; | |
1091 $self->dnamut->region_value($exonvalue+1); | |
1092 $self->dnamut->region_dist($afterdist); | |
1093 #print "splice site $afterdist nt upstream!<br>"; | |
1094 } else { | |
1095 #$beforedist; | |
1096 $self->dnamut->region_value($exonvalue+1); | |
1097 $self->dnamut->region_dist($beforedist * -1); | |
1098 #print "splice site $beforedist nt downstream!<br>"; | |
1099 } | |
1100 last; | |
1101 } | |
1102 $before=$exons[$i]->end; | |
1103 } | |
1104 } | |
1105 } | |
1106 } | |
1107 #$self->warn("RNA not affected because change occurs inside an intron"); | |
1108 #return(0); # if still not returned, then not affected, return 0 | |
1109 return $rnaAffected; | |
1110 } | |
1111 | |
1112 # | |
1113 # ### Creation of RNA and AA variation objects | |
1114 # | |
1115 | |
1116 =head2 _set_effects | |
1117 | |
1118 Title : _set_effects | |
1119 Usage : | |
1120 Function: | |
1121 | |
1122 Stores RNA and AA level mutation attributes before mutation | |
1123 into Bio::Variation::RNAChange and | |
1124 Bio::Variation::AAChange objects. Links them to | |
1125 SeqDiff object. | |
1126 | |
1127 Example : | |
1128 Returns : | |
1129 Args : Bio::Variation::SeqDiff object | |
1130 Bio::Variation::DNAMutation object | |
1131 | |
1132 See L<Bio::Variation::RNAChange>, L<Bio::Variation::RNAChange>, | |
1133 L<Bio::Variation::SeqDiff>, and L<Bio::Variation::DNAMutation>. | |
1134 | |
1135 =cut | |
1136 | |
1137 sub _set_effects { | |
1138 my ($self, $seqDiff, $dnamut) = @_; | |
1139 my ($rnapos_end, $upstreamseq, $dnstreamseq); | |
1140 my $flanklen = $self->{'flanklen'}; | |
1141 | |
1142 ($self->mutation->len == 0) ? | |
1143 ($rnapos_end = $self->mutation->transpos) : | |
1144 ($rnapos_end = $self->mutation->transpos + $self->mutation->len -1); | |
1145 my $rnachange = Bio::Variation::RNAChange->new(-start => $self->mutation->transpos, | |
1146 -end => $rnapos_end | |
1147 ); | |
1148 $rnachange->isMutation(1); | |
1149 | |
1150 # setting proof | |
1151 if ($seqDiff->numbering eq "coding") { | |
1152 $rnachange->proof('experimental'); | |
1153 } else { | |
1154 $rnachange->proof('computed'); | |
1155 } | |
1156 | |
1157 $seqDiff->add_Variant($rnachange); | |
1158 $self->rnachange($rnachange); | |
1159 $rnachange->DNAMutation($dnamut); | |
1160 $dnamut->RNAChange($rnachange); | |
1161 $rnachange->mut_number($self->mutation->issue); | |
1162 | |
1163 # setting the codon_position of the "start" nucleotide of the change | |
1164 $rnachange->codon_pos(($self->RNA->frame($self->mutation->label))+1); # codon_pos=frame+1 | |
1165 | |
1166 my @exons=$self->RNA->all_Exons; | |
1167 $self->exons(\@exons); | |
1168 #print `date`, " before flank, after exons. RNAObj query\n"; | |
1169 # if cannot retrieve from Transcript, Transcript::upstream_seq will be used | |
1170 # before "fac7 g 65" bug discovered | |
1171 # $uplabel=$self->RNA->label(1-$flanklen,$prelabel); | |
1172 my $RNAprelabel=$self->RNA->label(-1,$self->mutation->label); # to fix fac7g65 bug | |
1173 # for the fix, all prelabel used in the next block have been changed to RNAprelabel | |
1174 my $uplabel=$self->RNA->label(1-$flanklen,$RNAprelabel); | |
1175 if ($self->RNA->valid($uplabel)) { | |
1176 $upstreamseq = $self->RNA->labelsubseq($uplabel, undef, $RNAprelabel); | |
1177 } else { | |
1178 $upstreamseq = $self->RNA->labelsubseq($self->RNA->start, undef, $RNAprelabel) | |
1179 if $self->RNA->valid($RNAprelabel); | |
1180 my $lacking=$flanklen-length($upstreamseq); # how many missing | |
1181 my $upstream_atg=$exons[0]->subseq(-$lacking,-1); | |
1182 $upstreamseq=$upstream_atg . $upstreamseq; | |
1183 } | |
1184 | |
1185 $rnachange->upStreamSeq($upstreamseq); | |
1186 | |
1187 # won't work OK if postlabel NOT in Transcript | |
1188 # now added RNApostlabel but this has to be /fully tested/ | |
1189 # for the fix, all postlabel used in the next block have been changed to RNApostlabel | |
1190 my $RNApostlabel; # to fix fac7g64 bug | |
1191 if ($self->mutation->len == 0) { | |
1192 $RNApostlabel=$self->mutation->label; | |
1193 } else { | |
1194 my $mutlen = 1 + $self->mutation->len; | |
1195 $RNApostlabel=$self->RNA->label($mutlen,$self->mutation->label); | |
1196 } | |
1197 $dnstreamseq=$self->RNA->labelsubseq($RNApostlabel, $flanklen); | |
1198 if ($dnstreamseq eq '-1') { # if out of transcript was requested | |
1199 my $lastexon=$exons[-1]; | |
1200 my $lastexonlength=$lastexon->length; | |
1201 $dnstreamseq=$self->RNA->labelsubseq($RNApostlabel); # retrieves till RNAend | |
1202 my $lacking=$flanklen-length($dnstreamseq); # how many missing | |
1203 my $downstream_stop=$lastexon->subseq($lastexonlength+1,undef,$lacking); | |
1204 $dnstreamseq .= $downstream_stop; | |
1205 } else { | |
1206 $rnachange->dnStreamSeq($dnstreamseq); | |
1207 } | |
1208 # AAChange creation | |
1209 my $AAobj=$self->RNA->get_Translation; | |
1210 # storage of prelabel here, to be used in create_mut_objs_after | |
1211 my $aachange = Bio::Variation::AAChange->new(-start => $RNAprelabel | |
1212 ); | |
1213 $aachange->isMutation(1); | |
1214 $aachange->proof('computed'); | |
1215 | |
1216 $seqDiff->add_Variant($aachange); | |
1217 $self->aachange($aachange); | |
1218 $rnachange->AAChange($aachange); | |
1219 $aachange->RNAChange($rnachange); | |
1220 | |
1221 $aachange->mut_number($self->mutation->issue); | |
1222 # $before_mutation{aachange}=$aachange; | |
1223 | |
1224 my $ra_o = Bio::Variation::Allele->new; | |
1225 $ra_o->seq($dnamut->allele_ori->seq) if $dnamut->allele_ori->seq; | |
1226 $rnachange->allele_ori($ra_o); | |
1227 | |
1228 $rnachange->length(CORE::length $rnachange->allele_ori->seq); | |
1229 | |
1230 my $ra_m = Bio::Variation::Allele->new; | |
1231 $ra_m->seq($self->mutation->seq) if $self->mutation->seq; | |
1232 $rnachange->allele_mut($ra_m); | |
1233 $rnachange->add_Allele($ra_m); | |
1234 | |
1235 #$rnachange->allele_mut($seq); | |
1236 $rnachange->end($rnachange->start) if $rnachange->length == 0; | |
1237 | |
1238 # this holds the aminoacid sequence that will be affected by the mutation | |
1239 my $aa_allele_ori=$AAobj->labelsubseq($self->mutation->label,undef, | |
1240 $self->mutation->lastlabel); | |
1241 | |
1242 my $aa_o = Bio::Variation::Allele->new; | |
1243 $aa_o->seq($aa_allele_ori) if $aa_allele_ori; | |
1244 $aachange->allele_ori($aa_o); | |
1245 #$aachange->allele_ori($aa_allele_ori); | |
1246 | |
1247 my $aa_length_ori = length($aa_allele_ori); | |
1248 $aachange->length($aa_length_ori); #print "==========$aa_length_ori\n"; | |
1249 $aachange->end($aachange->start + $aa_length_ori - 1 ); | |
1250 } | |
1251 | |
1252 =head2 _untranslated | |
1253 | |
1254 Title : _untranslated | |
1255 Usage : | |
1256 Function: | |
1257 | |
1258 Stores RNA change attributes before mutation | |
1259 into Bio::Variation::RNAChange object. Links it to | |
1260 SeqDiff object. | |
1261 | |
1262 Example : | |
1263 Returns : | |
1264 Args : Bio::Variation::SeqDiff object | |
1265 Bio::Variation::DNAMutation object | |
1266 | |
1267 See L<Bio::Variation::RNAChange>, L<Bio::Variation::SeqDiff> and | |
1268 L<Bio::Variation::DNAMutation> for details. | |
1269 | |
1270 =cut | |
1271 | |
1272 sub _untranslated { | |
1273 my ($self, $seqDiff, $dnamut) = @_; | |
1274 my $rnapos_end; | |
1275 ($self->mutation->len == 0) ? | |
1276 ($rnapos_end = $self->mutation->transpos) : | |
1277 ($rnapos_end = $self->mutation->transpos + $self->mutation->len -1); | |
1278 my $rnachange = Bio::Variation::RNAChange->new(-start => $self->mutation->transpos, | |
1279 -end => $rnapos_end | |
1280 ); | |
1281 #my $rnachange = Bio::Variation::RNAChange->new; | |
1282 | |
1283 $rnachange->isMutation(1); | |
1284 my $ra_o = Bio::Variation::Allele->new; | |
1285 $ra_o->seq($dnamut->allele_ori->seq) if $dnamut->allele_ori->seq; | |
1286 $rnachange->allele_ori($ra_o); | |
1287 my $ra_m = Bio::Variation::Allele->new; | |
1288 $ra_m->seq($dnamut->allele_mut->seq) if $dnamut->allele_mut->seq; | |
1289 $rnachange->allele_mut($ra_m); | |
1290 $rnachange->add_Allele($ra_m); | |
1291 $rnachange->upStreamSeq($dnamut->upStreamSeq); | |
1292 $rnachange->dnStreamSeq($dnamut->dnStreamSeq); | |
1293 $rnachange->length($dnamut->length); | |
1294 $rnachange->mut_number($dnamut->mut_number); | |
1295 # setting proof | |
1296 if ($seqDiff->numbering eq "coding") { | |
1297 $rnachange->proof('experimental'); | |
1298 } else { | |
1299 $rnachange->proof('computed'); | |
1300 } | |
1301 | |
1302 my $dist; | |
1303 if ($rnachange->end < 0) { | |
1304 $rnachange->region('5\'UTR'); | |
1305 $dnamut->region('5\'UTR'); | |
1306 my $dist = $dnamut->end ; | |
1307 $dnamut->region_dist($dist); | |
1308 $dist = $seqDiff->offset - $self->gene->maxtranscript->start + 1 + $dist; | |
1309 $rnachange->region_dist($dist); | |
1310 return if $dist < 1; # if mutation is not in mRNA | |
1311 } else { | |
1312 $rnachange->region('3\'UTR'); | |
1313 $dnamut->region('3\'UTR'); | |
1314 my $dist = $dnamut->start - $seqDiff->cds_end + $seqDiff->offset; | |
1315 $dnamut->region_dist($dist); | |
1316 $dist = $seqDiff->cds_end - $self->gene->maxtranscript->end -1 + $dist; | |
1317 $rnachange->region_dist($dist); | |
1318 return if $dist > 0; # if mutation is not in mRNA | |
1319 } | |
1320 $seqDiff->add_Variant($rnachange); | |
1321 $self->rnachange($rnachange); | |
1322 $rnachange->DNAMutation($dnamut); | |
1323 $dnamut->RNAChange($rnachange); | |
1324 } | |
1325 | |
1326 # args: reference to label changearray, reference to position changearray | |
1327 # Function: take care of the creation of mutation objects, with | |
1328 # information AFTER the change takes place | |
1329 sub _post_mutation { | |
1330 my ($self, $seqDiff) = @_; | |
1331 | |
1332 if ($self->rnachange and $self->rnachange->region eq 'coding') { | |
1333 | |
1334 #$seqDiff->add_Variant($self->rnachange); | |
1335 | |
1336 my $aachange=$self->aachange; | |
1337 my ($AAobj,$aa_start_prelabel,$aa_start,$mut_translation); | |
1338 $AAobj=$self->RNA->get_Translation; | |
1339 $aa_start_prelabel=$aachange->start; | |
1340 $aa_start=$AAobj->position($self->RNA->label(2,$aa_start_prelabel)); | |
1341 $aachange->start($aa_start); | |
1342 $mut_translation=$AAobj->seq; | |
1343 | |
1344 # this now takes in account possible preinsertions | |
1345 my $aa_m = Bio::Variation::Allele->new; | |
1346 $aa_m->seq(substr($mut_translation,$aa_start-1)) if substr($mut_translation,$aa_start-1); | |
1347 $aachange->allele_mut($aa_m); | |
1348 $aachange->add_Allele($aa_m); | |
1349 #$aachange->allele_mut(substr($mut_translation,$aa_start-1)); | |
1350 #$aachange->allele_mut($mut_translation); | |
1351 my ($rlenori, $rlenmut); | |
1352 $rlenori = CORE::length($aachange->RNAChange->allele_ori->seq); | |
1353 $rlenmut = CORE::length($aachange->RNAChange->allele_mut->seq); | |
1354 #point mutation | |
1355 | |
1356 if ($rlenori == 1 and $rlenmut == 1 and $aachange->allele_ori->seq ne '*') { | |
1357 my $alleleseq; | |
1358 if ($aachange->allele_mut->seq) { | |
1359 $alleleseq = substr($aachange->allele_mut->seq, 0, 1); | |
1360 $aachange->allele_mut->seq($alleleseq); | |
1361 } | |
1362 $aachange->end($aachange->start); | |
1363 $aachange->length(1); | |
1364 } | |
1365 elsif ( $rlenori == $rlenmut and | |
1366 $aachange->allele_ori->seq ne '*' ) { #complex inframe mutation | |
1367 $aachange->allele_mut->seq(substr $aachange->allele_mut->seq, | |
1368 0, | |
1369 length($aachange->allele_ori->seq)); | |
1370 } | |
1371 #inframe mutation | |
1372 elsif ((int($rlenori-$rlenmut))%3 == 0) { | |
1373 if ($aachange->RNAChange->allele_mut->seq and | |
1374 $aachange->RNAChange->allele_ori->seq ) { | |
1375 # complex | |
1376 my $rna_len = length ($aachange->RNAChange->allele_mut->seq); | |
1377 my $len = $rna_len/3; | |
1378 $len++ unless $rna_len%3 == 0; | |
1379 $aachange->allele_mut->seq(substr $aachange->allele_mut->seq, 0, $len ); | |
1380 } | |
1381 elsif ($aachange->RNAChange->codon_pos == 1){ | |
1382 # deletion | |
1383 if ($aachange->RNAChange->allele_mut->seq eq '') { | |
1384 $aachange->allele_mut->seq(''); | |
1385 $aachange->end($aachange->start + $aachange->length - 1 ); | |
1386 } | |
1387 # insertion | |
1388 elsif ($aachange->RNAChange->allele_ori->seq eq '' ) { | |
1389 $aachange->allele_mut->seq(substr $aachange->allele_mut->seq, 0, | |
1390 length ($aachange->RNAChange->allele_mut->seq) / 3); | |
1391 $aachange->allele_ori->seq(''); | |
1392 $aachange->end($aachange->start + $aachange->length - 1 ); | |
1393 $aachange->length(0); | |
1394 } | |
1395 } else { | |
1396 #elsif ($aachange->RNAChange->codon_pos == 2){ | |
1397 # deletion | |
1398 if (not $aachange->RNAChange->allele_mut->seq ) { | |
1399 $aachange->allele_mut->seq(substr $aachange->allele_mut->seq, 0, 1); | |
1400 } | |
1401 # insertion | |
1402 elsif (not $aachange->RNAChange->allele_ori->seq) { | |
1403 $aachange->allele_mut->seq(substr $aachange->allele_mut->seq, 0, | |
1404 length ($aachange->RNAChange->allele_mut->seq) / 3 +1); | |
1405 } | |
1406 } | |
1407 } else { | |
1408 #frameshift | |
1409 #my $pos = index $aachange->allele_mut | |
1410 #$aachange->allele_mut(substr($aachange->allele_mut, 0, 1)); | |
1411 $aachange->length(CORE::length($aachange->allele_ori->seq)); | |
1412 my $aaend = $aachange->start + $aachange->length -1; | |
1413 $aachange->end($aachange->start); | |
1414 } | |
1415 | |
1416 # splicing site deletion check | |
1417 my @beforeexons=@{$self->exons}; | |
1418 my @afterexons=$self->RNA->all_Exons; | |
1419 my $i; | |
1420 if (scalar(@beforeexons) ne scalar(@afterexons)) { | |
1421 my $mut_number = $self->mutation->issue; | |
1422 $self->warn("Exons have been modified at mutation n.$mut_number!"); | |
1423 $self->rnachange->exons_modified(1); | |
1424 } else { | |
1425 EXONCHECK: | |
1426 foreach $i (0..$#beforeexons) { | |
1427 if ($beforeexons[$i] ne $afterexons[$i]) { | |
1428 my $mut_number = $self->mutation->issue; | |
1429 $self->warn("Exons have been modified at mutation n.$mut_number!"); | |
1430 $self->rnachange->exons_modified(1); | |
1431 last EXONCHECK; | |
1432 } | |
1433 } | |
1434 } | |
1435 } else { | |
1436 #$seqDiff->rnachange(undef); | |
1437 #print "getting here?"; | |
1438 } | |
1439 return 1; | |
1440 } | |
1441 | |
1442 1; |