Mercurial > repos > mahtabm > ensembl
comparison variant_effect_predictor/Bio/Variation/SeqDiff.pm @ 0:1f6dce3d34e0
Uploaded
author | mahtabm |
---|---|
date | Thu, 11 Apr 2013 02:01:53 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:1f6dce3d34e0 |
---|---|
1 # $Id: SeqDiff.pm,v 1.16 2002/10/22 07:38:49 lapp Exp $ | |
2 # bioperl module for Bio::Variation::SeqDiff | |
3 # | |
4 # Cared for by Heikki Lehvaslaiho <heikki@ebi.ac.uk> | |
5 # | |
6 # Copyright Heikki Lehvaslaiho | |
7 # | |
8 # You may distribute this module under the same terms as perl itself | |
9 # | |
10 # POD documentation - main docs before the code | |
11 | |
12 # cds_end definition? | |
13 | |
14 =head1 NAME | |
15 | |
16 Bio::Variation::SeqDiff - Container class for mutation/variant descriptions | |
17 | |
18 =head1 SYNOPSIS | |
19 | |
20 $seqDiff = Bio::Variation::SeqDiff->new ( | |
21 -id => $M20132, | |
22 -alphabet => 'rna', | |
23 -gene_symbol => 'AR' | |
24 -chromosome => 'X', | |
25 -numbering => 'coding' | |
26 ); | |
27 # get a DNAMutation object somehow | |
28 $seqDiff->add_Variant($dnamut); | |
29 print $seqDiff->sys_name(), "\n"; | |
30 | |
31 =head1 DESCRIPTION | |
32 | |
33 SeqDiff stores Bio::Variation::VariantI object references and | |
34 descriptive information common to all changes in a sequence. Mutations | |
35 are understood to be any kind of sequence markers and are expected to | |
36 occur in the same chromosome. See L<Bio::Variation::VariantI> for details. | |
37 | |
38 The methods of SeqDiff are geared towards describing mutations in | |
39 human genes using gene-based coordinate system where 'A' of the | |
40 initiator codon has number 1 and the one before it -1. This is | |
41 according to conventions of human genetics. | |
42 | |
43 There will be class Bio::Variation::Genotype to describe markers in | |
44 different chromosomes and diploid genototypes. | |
45 | |
46 Classes implementing Bio::Variation::VariantI interface are | |
47 Bio::Variation::DNAMutation, Bio::Variation::RNAChange, and | |
48 Bio::Variation::AAChange. See L<Bio::Variation::VariantI>, | |
49 L<Bio::Variation::DNAMutation>, L<Bio::Variation::RNAChange>, and | |
50 L<Bio::Variation::AAChange> for more information. | |
51 | |
52 Variant objects can be added using two ways: an array passed to the | |
53 constructor or as individual Variant objects with add_Variant | |
54 method. | |
55 | |
56 | |
57 =head1 FEEDBACK | |
58 | |
59 =head2 Mailing Lists | |
60 | |
61 User feedback is an integral part of the evolution of this and other | |
62 Bioperl modules. Send your comments and suggestions preferably to the | |
63 Bioperl mailing lists Your participation is much appreciated. | |
64 | |
65 bioperl-l@bioperl.org - General discussion | |
66 http://bio.perl.org/MailList.html - About the mailing lists | |
67 | |
68 =head2 Reporting Bugs | |
69 | |
70 report bugs to the Bioperl bug tracking system to help us keep track | |
71 the bugs and their resolution. Bug reports can be submitted via | |
72 email or the web: | |
73 | |
74 bioperl-bugs@bio.perl.org | |
75 http://bugzilla.bioperl.org/ | |
76 | |
77 =head1 AUTHOR - Heikki Lehvaslaiho | |
78 | |
79 Email: heikki@ebi.ac.uk | |
80 Address: | |
81 | |
82 EMBL Outstation, European Bioinformatics Institute | |
83 Wellcome Trust Genome Campus, Hinxton | |
84 Cambs. CB10 1SD, United Kingdom | |
85 | |
86 =head1 CONTRIBUTORS | |
87 | |
88 Eckhard Lehmann, ecky@e-lehmann.de | |
89 | |
90 =head1 APPENDIX | |
91 | |
92 The rest of the documentation details each of the object | |
93 methods. Internal methods are usually preceded with a _ | |
94 | |
95 =cut | |
96 | |
97 # Let the code begin... | |
98 | |
99 package Bio::Variation::SeqDiff; | |
100 my $VERSION=1.0; | |
101 | |
102 use strict; | |
103 use vars qw($VERSION @ISA); | |
104 use Bio::Root::Root; | |
105 use Bio::Tools::CodonTable; | |
106 use Bio::PrimarySeq; | |
107 | |
108 @ISA = qw( Bio::Root::Root ); | |
109 | |
110 | |
111 =head2 new | |
112 | |
113 Title : new | |
114 Usage : $seqDiff = Bio::Variation::SeqDiff->new; | |
115 Function: generates a new Bio::Variation::SeqDiff | |
116 Returns : reference to a new object of class SeqDiff | |
117 Args : | |
118 | |
119 =cut | |
120 | |
121 sub new { | |
122 my($class,@args) = @_; | |
123 my $self = $class->SUPER::new(@args); | |
124 | |
125 my($id, $sysname, $trivname, $chr, $gene_symbol, | |
126 $desc, $alphabet, $numbering, $offset, $rna_offset, $rna_id, $cds_end, | |
127 $dna_ori, $dna_mut, $rna_ori, $rna_mut, $aa_ori, $aa_mut | |
128 #@variants, @genes | |
129 ) = | |
130 $self->_rearrange([qw(ID | |
131 SYSNAME | |
132 TRIVNAME | |
133 CHR | |
134 GENE_SYMBOL | |
135 DESC | |
136 ALPHABET | |
137 NUMBERING | |
138 OFFSET | |
139 RNA_OFFSET | |
140 RNA_ID | |
141 CDS_END | |
142 DNA_ORI | |
143 DNA_MUT | |
144 RNA_ORI | |
145 AA_ORI | |
146 AA_MUT | |
147 )], | |
148 @args); | |
149 | |
150 #my $make = $self->SUPER::_initialize(@args); | |
151 | |
152 $id && $self->id($id); | |
153 $sysname && $self->sysname($sysname); | |
154 $trivname && $self->trivname($trivname); | |
155 $chr && $self->chromosome($chr); | |
156 $gene_symbol && $self->gene_symbol($chr); | |
157 $desc && $self->description($desc); | |
158 $alphabet && $self->alphabet($alphabet); | |
159 $numbering && $self->numbering($numbering); | |
160 $offset && $self->offset($offset); | |
161 $rna_offset && $self->rna_offset($rna_offset); | |
162 $rna_id && $self->rna_id($rna_id); | |
163 $cds_end && $self->cds_end($cds_end); | |
164 | |
165 $dna_ori && $self->dna_ori($dna_ori); | |
166 $dna_mut && $self->dna_mut($dna_mut); | |
167 $rna_ori && $self->rna_ori($rna_ori); | |
168 $rna_mut && $self->rna_mut($rna_mut); | |
169 $aa_ori && $self->aa_ori ($aa_ori); | |
170 $aa_mut && $self->aa_mut ($aa_mut); | |
171 | |
172 $self->{ 'variants' } = []; | |
173 #@variants && push(@{$self->{'variants'}},@variants); | |
174 | |
175 $self->{ 'genes' } = []; | |
176 #@genes && push(@{$self->{'genes'}},@genes); | |
177 | |
178 return $self; # success - we hope! | |
179 } | |
180 | |
181 | |
182 =head2 id | |
183 | |
184 Title : id | |
185 Usage : $obj->id(H0001); $id = $obj->id(); | |
186 Function: | |
187 | |
188 Sets or returns the id of the seqDiff. | |
189 Should be used to give the collection of variants a UID | |
190 without semantic associations. | |
191 | |
192 Example : | |
193 Returns : value of id, a scalar | |
194 Args : newvalue (optional) | |
195 | |
196 =cut | |
197 | |
198 | |
199 sub id { | |
200 my ($self,$value) = @_; | |
201 if (defined $value) { | |
202 $self->{'id'} = $value; | |
203 } | |
204 # unless (exists $self->{'id'}) { | |
205 # return "undefined"; | |
206 # } | |
207 else { | |
208 return $self->{'id'}; | |
209 } | |
210 } | |
211 | |
212 | |
213 =head2 sysname | |
214 | |
215 Title : sysname | |
216 Usage : $obj->sysname('5C>G'); $sysname = $obj->sysname(); | |
217 Function: | |
218 | |
219 Sets or returns the systematic name of the seqDiff. The | |
220 name should follow the HUGO Mutation Database Initiative | |
221 approved nomenclature. If called without first setting the | |
222 value, will generate it from L<Bio::Variation::DNAMutation> | |
223 objects attached. | |
224 | |
225 Example : | |
226 Returns : value of sysname, a scalar | |
227 Args : newvalue (optional) | |
228 | |
229 =cut | |
230 | |
231 | |
232 sub sysname { | |
233 my ($self,$value) = @_; | |
234 if (defined $value) { | |
235 $self->{'sysname'} = $value; | |
236 } | |
237 elsif (not defined $self->{'sysname'}) { | |
238 | |
239 my $sysname = ''; | |
240 my $c = 0; | |
241 foreach my $mut ($self->each_Variant) { | |
242 if( $mut->isa('Bio::Variation::DNAMutation') ) { | |
243 $c++; | |
244 if ($c == 1 ) { | |
245 $sysname = $mut->sysname ; | |
246 } | |
247 else { | |
248 $sysname .= ";". $mut->sysname; | |
249 } | |
250 } | |
251 } | |
252 $sysname = "[". $sysname. "]" if $c > 1; | |
253 $self->{'sysname'} = $sysname; | |
254 } | |
255 return $self->{'sysname'}; | |
256 } | |
257 | |
258 | |
259 =head2 trivname | |
260 | |
261 Title : trivname | |
262 Usage : $obj->trivname('[A2G;T56G]'); $trivname = $obj->trivname(); | |
263 Function: | |
264 | |
265 Sets or returns the trivial name of the seqDiff. | |
266 The name should follow the HUGO Mutation Database Initiative | |
267 approved nomenclature. If called without first setting the | |
268 value, will generate it from L<Bio::Variation::AAChange> | |
269 objects attached. | |
270 | |
271 Example : | |
272 Returns : value of trivname, a scalar | |
273 Args : newvalue (optional) | |
274 | |
275 =cut | |
276 | |
277 | |
278 sub trivname { | |
279 my ($self,$value) = @_; | |
280 if (defined $value) { | |
281 $self->{'trivname'} = $value; | |
282 } | |
283 elsif (not defined $self->{'trivname'}) { | |
284 | |
285 my $trivname = ''; | |
286 my $c = 0; | |
287 foreach my $mut ($self->each_Variant) { | |
288 if( $mut->isa('Bio::Variation::AAChange') ) { | |
289 $c++; | |
290 if ($c == 1 ) { | |
291 $trivname = $mut->trivname ; | |
292 } | |
293 else { | |
294 $trivname .= ";". $mut->trivname; | |
295 } | |
296 } | |
297 } | |
298 $trivname = "[". $trivname. "]" if $c > 1; | |
299 $self->{'trivname'} = $trivname; | |
300 } | |
301 | |
302 else { | |
303 return $self->{'trivname'}; | |
304 } | |
305 } | |
306 | |
307 | |
308 =head2 chromosome | |
309 | |
310 Title : chromosome | |
311 Usage : $obj->chromosome('X'); $chromosome = $obj->chromosome(); | |
312 Function: | |
313 | |
314 Sets or returns the chromosome ("linkage group") of the seqDiff. | |
315 | |
316 Example : | |
317 Returns : value of chromosome, a scalar | |
318 Args : newvalue (optional) | |
319 | |
320 =cut | |
321 | |
322 | |
323 sub chromosome { | |
324 my ($self,$value) = @_; | |
325 if (defined $value) { | |
326 $self->{'chromosome'} = $value; | |
327 } | |
328 else { | |
329 return $self->{'chromosome'}; | |
330 } | |
331 } | |
332 | |
333 | |
334 =head2 gene_symbol | |
335 | |
336 Title : gene_symbol | |
337 Usage : $obj->gene_symbol('FOS'); $gene_symbol = $obj->gene_symbol; | |
338 Function: | |
339 | |
340 Sets or returns the gene symbol for the studied CDS. | |
341 | |
342 Example : | |
343 Returns : value of gene_symbol, a scalar | |
344 Args : newvalue (optional) | |
345 | |
346 =cut | |
347 | |
348 | |
349 sub gene_symbol { | |
350 my ($self,$value) = @_; | |
351 if (defined $value) { | |
352 $self->{'gene_symbol'} = $value; | |
353 } | |
354 else { | |
355 return $self->{'gene_symbol'}; | |
356 } | |
357 } | |
358 | |
359 | |
360 | |
361 =head2 description | |
362 | |
363 Title : description | |
364 Usage : $obj->description('short description'); $descr = $obj->description(); | |
365 Function: | |
366 | |
367 Sets or returns the short description of the seqDiff. | |
368 | |
369 Example : | |
370 Returns : value of description, a scalar | |
371 Args : newvalue (optional) | |
372 | |
373 =cut | |
374 | |
375 | |
376 sub description { | |
377 my ($self,$value) = @_; | |
378 if (defined $value) { | |
379 $self->{'description'} = $value; | |
380 } | |
381 else { | |
382 return $self->{'description'}; | |
383 } | |
384 } | |
385 | |
386 | |
387 =head2 alphabet | |
388 | |
389 Title : alphabet | |
390 Usage : if( $obj->alphabet eq 'dna' ) { /Do Something/ } | |
391 Function: Returns the type of primary reference sequence being one of | |
392 'dna', 'rna' or 'protein'. This is case sensitive. | |
393 | |
394 Returns : a string either 'dna','rna','protein'. | |
395 Args : none | |
396 | |
397 | |
398 =cut | |
399 | |
400 sub alphabet { | |
401 my ($self,$value) = @_; | |
402 my %type = (dna => 1, | |
403 rna => 1, | |
404 protein => 1); | |
405 if( defined $value ) { | |
406 if ($type{$value}) { | |
407 $self->{'alphabet'} = $value; | |
408 } else { | |
409 $self->throw("$value is not valid alphabet value!"); | |
410 } | |
411 } | |
412 return $self->{'alphabet'}; | |
413 } | |
414 | |
415 | |
416 =head2 numbering | |
417 | |
418 Title : numbering | |
419 Usage : $obj->numbering('coding'); $numbering = $obj->numbering(); | |
420 Function: | |
421 | |
422 Sets or returns the string giving the numbering schema used | |
423 to describe the variants. | |
424 | |
425 Example : | |
426 Returns : value of numbering, a scalar | |
427 Args : newvalue (optional) | |
428 | |
429 =cut | |
430 | |
431 | |
432 | |
433 sub numbering { | |
434 my ($self,$value) = @_; | |
435 if (defined $value) { | |
436 $self->{'numbering'} = $value; | |
437 } | |
438 else { | |
439 return $self->{'numbering'}; | |
440 } | |
441 } | |
442 | |
443 | |
444 =head2 offset | |
445 | |
446 Title : offset | |
447 Usage : $obj->offset(124); $offset = $obj->offset(); | |
448 Function: | |
449 | |
450 Sets or returns the offset from the beginning of the DNA sequence | |
451 to the coordinate start used to describe variants. Typically | |
452 the beginning of the coding region of the gene. | |
453 The cds_start should be 1 + offset. | |
454 | |
455 Example : | |
456 Returns : value of offset, a scalar | |
457 Args : newvalue (optional) | |
458 | |
459 =cut | |
460 | |
461 | |
462 | |
463 sub offset { | |
464 my ($self,$value) = @_; | |
465 if (defined $value) { | |
466 $self->{'offset'} = $value; | |
467 } | |
468 elsif (not defined $self->{'offset'} ) { | |
469 return $self->{'offset'} = 0; | |
470 } | |
471 else { | |
472 return $self->{'offset'}; | |
473 } | |
474 } | |
475 | |
476 | |
477 =head2 cds_start | |
478 | |
479 Title : cds_start | |
480 Usage : $obj->cds_start(123); $cds_start = $obj->cds_start(); | |
481 Function: | |
482 | |
483 Sets or returns the cds_start from the beginning of the DNA | |
484 sequence to the coordinate start used to describe | |
485 variants. Typically the beginning of the coding region of | |
486 the gene. Needs to be and is implemented as 1 + offset. | |
487 | |
488 Example : | |
489 Returns : value of cds_start, a scalar | |
490 Args : newvalue (optional) | |
491 | |
492 =cut | |
493 | |
494 | |
495 | |
496 sub cds_start { | |
497 my ($self,$value) = @_; | |
498 if (defined $value) { | |
499 $self->{'offset'} = $value - 1; | |
500 } | |
501 else { | |
502 return $self->{'offset'} + 1; | |
503 } | |
504 } | |
505 | |
506 | |
507 =head2 cds_end | |
508 | |
509 Title : cds_end | |
510 Usage : $obj->cds_end(321); $cds_end = $obj->cds_end(); | |
511 Function: | |
512 | |
513 Sets or returns the position of the last nucleotitide of the | |
514 termination codon. The coordinate system starts from cds_start. | |
515 | |
516 Example : | |
517 Returns : value of cds_end, a scalar | |
518 Args : newvalue (optional) | |
519 | |
520 =cut | |
521 | |
522 | |
523 | |
524 sub cds_end { | |
525 my ($self,$value) = @_; | |
526 if (defined $value) { | |
527 $self->{'cds_end'} = $value; | |
528 } | |
529 else { | |
530 return $self->{'cds_end'}; | |
531 #$self->{'cds_end'} = CORE::length($self->SeqDiff->rna_ori)/3; | |
532 } | |
533 } | |
534 | |
535 | |
536 =head2 rna_offset | |
537 | |
538 Title : rna_offset | |
539 Usage : $obj->rna_offset(124); $rna_offset = $obj->rna_offset(); | |
540 Function: | |
541 | |
542 Sets or returns the rna_offset from the beginning of the RNA sequence | |
543 to the coordinate start used to describe variants. Typically | |
544 the beginning of the coding region of the gene. | |
545 | |
546 Example : | |
547 Returns : value of rna_offset, a scalar | |
548 Args : newvalue (optional) | |
549 | |
550 =cut | |
551 | |
552 | |
553 | |
554 sub rna_offset { | |
555 my ($self,$value) = @_; | |
556 if (defined $value) { | |
557 $self->{'rna_offset'} = $value; | |
558 } | |
559 elsif (not defined $self->{'rna_offset'} ) { | |
560 return $self->{'rna_offset'} = 0; | |
561 } | |
562 else { | |
563 return $self->{'rna_offset'}; | |
564 } | |
565 } | |
566 | |
567 | |
568 =head2 rna_id | |
569 | |
570 Title : rna_id | |
571 Usage : $obj->rna_id('transcript#3'); $rna_id = $obj->rna_id(); | |
572 Function: | |
573 | |
574 Sets or returns the ID for original RNA sequence of the seqDiff. | |
575 | |
576 Example : | |
577 Returns : value of rna_id, a scalar | |
578 Args : newvalue (optional) | |
579 | |
580 =cut | |
581 | |
582 | |
583 sub rna_id { | |
584 my ($self,$value) = @_; | |
585 if (defined $value) { | |
586 $self->{'rna_id'} = $value; | |
587 } | |
588 else { | |
589 return $self->{'rna_id'}; | |
590 } | |
591 } | |
592 | |
593 | |
594 | |
595 =head2 add_Variant | |
596 | |
597 Title : add_Variant | |
598 Usage : $obj->add_Variant($variant) | |
599 Function: | |
600 | |
601 Pushes one Bio::Variation::Variant into the list of variants. | |
602 At the same time, creates a link from the Variant to SeqDiff | |
603 using its SeqDiff method. | |
604 | |
605 Example : | |
606 Returns : 1 when succeeds, 0 for failure. | |
607 Args : Variant object | |
608 | |
609 =cut | |
610 | |
611 sub add_Variant { | |
612 my ($self,$value) = @_; | |
613 if (defined $value) { | |
614 if( ! $value->isa('Bio::Variation::VariantI') ) { | |
615 $self->throw("Is not a VariantI complying object but a [$self]"); | |
616 return 0; | |
617 } | |
618 else { | |
619 push(@{$self->{'variants'}},$value); | |
620 $value->SeqDiff($self); | |
621 return 1; | |
622 } | |
623 } | |
624 else { | |
625 return 0; | |
626 } | |
627 } | |
628 | |
629 | |
630 =head2 each_Variant | |
631 | |
632 Title : each_Variant | |
633 Usage : $obj->each_Variant(); | |
634 Function: | |
635 | |
636 Returns a list of Variants. | |
637 | |
638 Example : | |
639 Returns : list of Variants | |
640 Args : none | |
641 | |
642 =cut | |
643 | |
644 sub each_Variant{ | |
645 my ($self,@args) = @_; | |
646 | |
647 return @{$self->{'variants'}}; | |
648 } | |
649 | |
650 | |
651 | |
652 =head2 add_Gene | |
653 | |
654 Title : add_Gene | |
655 Usage : $obj->add_Gene($gene) | |
656 Function: | |
657 | |
658 Pushes one L<Bio::LiveSeq::Gene> into the list of genes. | |
659 | |
660 Example : | |
661 Returns : 1 when succeeds, 0 for failure. | |
662 Args : Bio::LiveSeq::Gene object | |
663 | |
664 See L<Bio::LiveSeq::Gene> for more information. | |
665 | |
666 =cut | |
667 | |
668 | |
669 sub add_Gene { | |
670 my ($self,$value) = @_; | |
671 if (defined $value) { | |
672 if( ! $value->isa('Bio::LiveSeq::Gene') ) { | |
673 $value->throw("Is not a Bio::LiveSeq::Gene object but a [$value]"); | |
674 return 0; | |
675 } | |
676 else { | |
677 push(@{$self->{'genes'}},$value); | |
678 return 1; | |
679 } | |
680 } | |
681 else { | |
682 return 0; | |
683 } | |
684 } | |
685 | |
686 | |
687 =head2 each_Gene | |
688 | |
689 Title : each_Gene | |
690 Usage : $obj->each_Gene(); | |
691 Function: | |
692 | |
693 Returns a list of L<Bio::LiveSeq::Gene>s. | |
694 | |
695 Example : | |
696 Returns : list of Genes | |
697 Args : none | |
698 | |
699 =cut | |
700 | |
701 sub each_Gene{ | |
702 my ($self,@args) = @_; | |
703 | |
704 return @{$self->{'genes'}}; | |
705 } | |
706 | |
707 | |
708 =head2 dna_ori | |
709 | |
710 Title : dna_ori | |
711 Usage : $obj->dna_ori('atgctgctgctgct'); $dna_ori = $obj->dna_ori(); | |
712 Function: | |
713 | |
714 Sets or returns the original DNA sequence string of the seqDiff. | |
715 | |
716 Example : | |
717 Returns : value of dna_ori, a scalar | |
718 Args : newvalue (optional) | |
719 | |
720 =cut | |
721 | |
722 | |
723 sub dna_ori { | |
724 my ($self,$value) = @_; | |
725 if (defined $value) { | |
726 $self->{'dna_ori'} = $value; | |
727 } | |
728 else { | |
729 return $self->{'dna_ori'}; | |
730 } | |
731 } | |
732 | |
733 | |
734 =head2 dna_mut | |
735 | |
736 Title : dna_mut | |
737 Usage : $obj->dna_mut('atgctggtgctgct'); $dna_mut = $obj->dna_mut(); | |
738 Function: | |
739 | |
740 Sets or returns the mutated DNA sequence of the seqDiff. | |
741 If sequence has not been set generates it from the | |
742 original sequence and DNA mutations. | |
743 | |
744 Example : | |
745 Returns : value of dna_mut, a scalar | |
746 Args : newvalue (optional) | |
747 | |
748 =cut | |
749 | |
750 | |
751 sub dna_mut { | |
752 my ($self,$value) = @_; | |
753 if (defined $value) { | |
754 $self->{'dna_mut'} = $value; | |
755 } | |
756 else { | |
757 $self->_set_dnamut() unless $self->{'dna_mut'}; | |
758 return $self->{'dna_mut'}; | |
759 } | |
760 } | |
761 | |
762 sub _set_dnamut { | |
763 my $self = shift; | |
764 | |
765 return undef unless $self->{'dna_ori'} && $self->each_Variant; | |
766 | |
767 $self->{'dna_mut'} = $self->{'dna_ori'}; | |
768 foreach ($self->each_Variant) { | |
769 next unless $_->isa('Bio::Variation::DNAMutation'); | |
770 next unless $_->isMutation; | |
771 | |
772 my ($s, $la, $le); | |
773 #lies the mutation less than 25 bases after the start of sequence? | |
774 if ($_->start < 25) { | |
775 $s = 0; $la = $_->start - 1; | |
776 } else { | |
777 $s = $_->start - 25; $la = 25; | |
778 } | |
779 | |
780 #is the mutation an insertion? | |
781 $_->end($_->start) unless $_->allele_ori->seq; | |
782 | |
783 #does the mutation end greater than 25 bases before the end of | |
784 #sequence? | |
785 if (($_->end + 25) > length($self->{'dna_mut'})) { | |
786 $le = length($self->{'dna_mut'}) - $_->end; | |
787 } else { | |
788 $le = 25; | |
789 } | |
790 | |
791 $_->dnStreamSeq(substr($self->{'dna_mut'}, $s, $la)); | |
792 $_->upStreamSeq(substr($self->{'dna_mut'}, $_->end, $le)); | |
793 | |
794 my $s_ori = $_->dnStreamSeq . $_->allele_ori->seq . $_->upStreamSeq; | |
795 my $s_mut = $_->dnStreamSeq . $_->allele_mut->seq . $_->upStreamSeq; | |
796 | |
797 (my $str = $self->{'dna_mut'}) =~ s/$s_ori/$s_mut/; | |
798 $self->{'dna_mut'} = $str; | |
799 } | |
800 } | |
801 | |
802 | |
803 =head2 rna_ori | |
804 | |
805 Title : rna_ori | |
806 Usage : $obj->rna_ori('atgctgctgctgct'); $rna_ori = $obj->rna_ori(); | |
807 Function: | |
808 | |
809 Sets or returns the original RNA sequence of the seqDiff. | |
810 | |
811 Example : | |
812 Returns : value of rna_ori, a scalar | |
813 Args : newvalue (optional) | |
814 | |
815 =cut | |
816 | |
817 | |
818 sub rna_ori { | |
819 my ($self,$value) = @_; | |
820 if (defined $value) { | |
821 $self->{'rna_ori'} = $value; | |
822 } | |
823 else { | |
824 return $self->{'rna_ori'}; | |
825 } | |
826 } | |
827 | |
828 | |
829 =head2 rna_mut | |
830 | |
831 Title : rna_mut | |
832 Usage : $obj->rna_mut('atgctggtgctgct'); $rna_mut = $obj->rna_mut(); | |
833 Function: | |
834 | |
835 Sets or returns the mutated RNA sequence of the seqDiff. | |
836 | |
837 Example : | |
838 Returns : value of rna_mut, a scalar | |
839 Args : newvalue (optional) | |
840 | |
841 =cut | |
842 | |
843 | |
844 sub rna_mut { | |
845 my ($self,$value) = @_; | |
846 if (defined $value) { | |
847 $self->{'rna_mut'} = $value; | |
848 } | |
849 else { | |
850 return $self->{'rna_mut'}; | |
851 } | |
852 } | |
853 | |
854 | |
855 =head2 aa_ori | |
856 | |
857 Title : aa_ori | |
858 Usage : $obj->aa_ori('MAGVLL*'); $aa_ori = $obj->aa_ori(); | |
859 Function: | |
860 | |
861 Sets or returns the original protein sequence of the seqDiff. | |
862 | |
863 Example : | |
864 Returns : value of aa_ori, a scalar | |
865 Args : newvalue (optional) | |
866 | |
867 =cut | |
868 | |
869 | |
870 sub aa_ori { | |
871 my ($self,$value) = @_; | |
872 if (defined $value) { | |
873 $self->{'aa_ori'} = $value; | |
874 } | |
875 else { | |
876 return $self->{'aa_ori'}; | |
877 } | |
878 } | |
879 | |
880 | |
881 =head2 aa_mut | |
882 | |
883 Title : aa_mut | |
884 Usage : $obj->aa_mut('MA*'); $aa_mut = $obj->aa_mut(); | |
885 Function: | |
886 | |
887 Sets or returns the mutated protein sequence of the seqDiff. | |
888 | |
889 Example : | |
890 Returns : value of aa_mut, a scalar | |
891 Args : newvalue (optional) | |
892 | |
893 =cut | |
894 | |
895 | |
896 sub aa_mut { | |
897 my ($self,$value) = @_; | |
898 if (defined $value) { | |
899 $self->{'aa_mut'} = $value; | |
900 } | |
901 else { | |
902 return $self->{'aa_mut'}; | |
903 } | |
904 } | |
905 | |
906 | |
907 =head2 seqobj | |
908 | |
909 Title : seqobj | |
910 Usage : $dnaobj = $obj->seqobj('dna_mut'); | |
911 Function: | |
912 | |
913 Returns the any original or mutated sequences as a | |
914 Bio::PrimarySeq object. | |
915 | |
916 Example : | |
917 Returns : Bio::PrimarySeq object for the requested sequence | |
918 Args : string, method name for the sequence requested | |
919 | |
920 See L<Bio::PrimarySeq> for more information. | |
921 | |
922 =cut | |
923 | |
924 sub seqobj { | |
925 my ($self,$value) = @_; | |
926 my $out; | |
927 my %valid_obj = | |
928 map {$_, 1} qw(dna_ori rna_ori aa_ori dna_mut rna_mut aa_mut); | |
929 $valid_obj{$value} || | |
930 $self->throw("Sequence type '$value' is not a valid type (". | |
931 join(',', map "'$_'", sort keys %valid_obj) .") lowercase"); | |
932 my ($alphabet) = $value =~ /([^_]+)/; | |
933 my $id = $self->id; | |
934 $id = $self->rna_id if $self->rna_id; | |
935 $alphabet = 'protein' if $alphabet eq 'aa'; | |
936 $out = Bio::PrimarySeq->new | |
937 ( '-seq' => $self->{$value}, | |
938 '-display_id' => $id, | |
939 '-accession_number' => $self->id, | |
940 '-alphabet' => $alphabet | |
941 ) if $self->{$value} ; | |
942 return $out; | |
943 } | |
944 | |
945 =head2 alignment | |
946 | |
947 Title : alignment | |
948 Usage : $obj->alignment | |
949 Function: | |
950 | |
951 Returns a pretty RNA/AA sequence alignment from linked | |
952 objects. Under construction: Only simple coding region | |
953 point mutations work. | |
954 | |
955 Example : | |
956 Returns : | |
957 Args : none | |
958 | |
959 =cut | |
960 | |
961 | |
962 sub alignment { | |
963 my $self = shift; | |
964 my (@entry, $text); | |
965 | |
966 my $maxflanklen = 12; | |
967 | |
968 foreach my $mut ($self->each_Variant) { | |
969 if( $mut->isa('Bio::Variation::RNAChange') ) { | |
970 | |
971 my $upflank = $mut->upStreamSeq; | |
972 my $dnflank = $mut->dnStreamSeq; | |
973 my $cposd = $mut->codon_pos; | |
974 my $rori = $mut->allele_ori->seq; | |
975 my $rmut = $mut->allele_mut->seq; | |
976 my $rseqoriu = ''; | |
977 my $rseqmutu = ''; | |
978 my $rseqorid = ''; | |
979 my $rseqmutd = ''; | |
980 my $aaseqmutu = ''; | |
981 my (@rseqori, @rseqmut ); | |
982 | |
983 # point | |
984 if ($mut->DNAMutation->label =~ /point/) { | |
985 if ($cposd == 1 ) { | |
986 my $nt2d = substr($dnflank, 0, 2); | |
987 push @rseqori, $rori. $nt2d; | |
988 push @rseqmut, uc ($rmut). $nt2d; | |
989 $dnflank = substr($dnflank, 2); | |
990 } | |
991 elsif ($cposd == 2) { | |
992 my $ntu = chop $upflank; | |
993 my $ntd = substr($dnflank, 0, 1); | |
994 push @rseqori, $ntu. $rori. $ntd; | |
995 push @rseqmut, $ntu. uc ($rmut). $ntd; | |
996 $dnflank = substr($dnflank, 1); | |
997 } | |
998 elsif ($cposd == 3) { | |
999 my $ntu1 = chop $upflank; | |
1000 my $ntu2 = chop $upflank; | |
1001 push (@rseqori, $ntu2. $ntu1. $rori); | |
1002 push (@rseqmut, $ntu2. $ntu1. uc $rmut); | |
1003 } | |
1004 } | |
1005 #deletion | |
1006 elsif ($mut->DNAMutation->label =~ /deletion/) { | |
1007 if ($cposd == 2 ) { | |
1008 $rseqorid = chop $upflank; | |
1009 $rseqmutd = $rseqorid; | |
1010 } | |
1011 for (my $i=1; $i<=$mut->length; $i++) { | |
1012 my $ntd .= substr($mut->allele_ori, $i-1, 1); | |
1013 $rseqorid .= $ntd; | |
1014 if (length($rseqorid) == 3 ) { | |
1015 push (@rseqori, $rseqorid); | |
1016 push (@rseqmut, " "); | |
1017 $rseqorid = ''; | |
1018 } | |
1019 } | |
1020 | |
1021 if ($rseqorid) { | |
1022 $rseqorid .= substr($dnflank, 0, 3-$rseqorid); | |
1023 push (@rseqori, $rseqorid); | |
1024 push (@rseqmut, " "); | |
1025 $dnflank = substr($dnflank,3-$rseqorid); | |
1026 } | |
1027 } | |
1028 $upflank = reverse $upflank; | |
1029 # loop throught the flanks | |
1030 for (my $i=1; $i<=length($dnflank); $i++) { | |
1031 | |
1032 last if $i > $maxflanklen; | |
1033 | |
1034 my $ntd .= substr($dnflank, $i-1, 1); | |
1035 my $ntu .= substr($upflank, $i-1, 1); | |
1036 | |
1037 $rseqmutd .= $ntd; | |
1038 $rseqorid .= $ntd; | |
1039 $rseqmutu = $ntu. $rseqmutu; | |
1040 $rseqoriu = $ntu. $rseqoriu; | |
1041 | |
1042 if (length($rseqorid) == 3 and length($rseqorid) == 3) { | |
1043 push (@rseqori, $rseqorid); | |
1044 push (@rseqmut, $rseqmutd); | |
1045 $rseqorid = $rseqmutd =''; | |
1046 } | |
1047 if (length($rseqoriu) == 3 and length($rseqoriu) == 3) { | |
1048 unshift (@rseqori, $rseqoriu); | |
1049 unshift (@rseqmut, $rseqmutu); | |
1050 $rseqoriu = $rseqmutu =''; | |
1051 } | |
1052 | |
1053 #print "|i=$i, $cposd, $rseqmutd, $rseqorid\n"; | |
1054 #print "|i=$i, $cposu, $rseqmutu, $rseqoriu\n\n"; | |
1055 | |
1056 } | |
1057 | |
1058 push (@rseqori, $rseqorid); | |
1059 unshift (@rseqori, $rseqoriu); | |
1060 push (@rseqmut, $rseqmutd); | |
1061 unshift (@rseqmut, $rseqmutu); | |
1062 | |
1063 return unless $mut->AAChange; | |
1064 #translate | |
1065 my $tr = new Bio::Tools::CodonTable ('-id' => $mut->codon_table); | |
1066 my $apos = $mut->AAChange->start; | |
1067 my $aposmax = CORE::length($self->aa_ori); #terminator codon no | |
1068 my $rseqori; | |
1069 my $rseqmut; | |
1070 my $aaseqori; | |
1071 my $aaseqmut = ""; | |
1072 for (my $i = 0; $i <= $#rseqori; $i++) { | |
1073 my $a = ''; | |
1074 | |
1075 $a = $tr->translate($rseqori[$i]) if length($rseqori[$i]) == 3; | |
1076 | |
1077 if (length($a) != 1 or | |
1078 $apos - ( $maxflanklen/2 -1) + $i < 1 or | |
1079 $apos - ( $maxflanklen/2 -1) + $i > $aposmax ) { | |
1080 $aaseqori .= " "; | |
1081 } else { | |
1082 $aaseqori .= " ". $a. " "; | |
1083 } | |
1084 my $b = ''; | |
1085 if (length($rseqmut[$i]) == 3) { | |
1086 if ($rseqmut[$i] eq ' ') { | |
1087 $b = "_"; | |
1088 } else { | |
1089 $b = $tr->translate($rseqmut[$i]); | |
1090 } | |
1091 } | |
1092 if (( $b ne $a and | |
1093 length($b) == 1 and | |
1094 $apos - ( $maxflanklen/2 -1) + $i >= 1 ) or | |
1095 ( $apos - ( $maxflanklen/2 -1) + $i >= $aposmax and | |
1096 $mut->label =~ 'termination') | |
1097 ) { | |
1098 $aaseqmut .= " ". $b. " "; | |
1099 } else { | |
1100 $aaseqmut .= " "; | |
1101 } | |
1102 | |
1103 if ($i == 0 and length($rseqori[$i]) != 3) { | |
1104 my $l = 3 - length($rseqori[$i]); | |
1105 $rseqori[$i] = (" " x $l). $rseqori[$i]; | |
1106 $rseqmut[$i] = (" " x $l). $rseqmut[$i]; | |
1107 } | |
1108 $rseqori .= $rseqori[$i]. " " if $rseqori[$i] ne ''; | |
1109 $rseqmut .= $rseqmut[$i]. " " if $rseqmut[$i] ne ''; | |
1110 } | |
1111 | |
1112 # collect the results | |
1113 push (@entry, | |
1114 "\n" | |
1115 ); | |
1116 $text = " ". $aaseqmut; | |
1117 push (@entry, | |
1118 $text | |
1119 ); | |
1120 $text = "Variant : ". $rseqmut; | |
1121 push (@entry, | |
1122 $text | |
1123 ); | |
1124 $text = "Reference: ". $rseqori; | |
1125 push (@entry, | |
1126 $text | |
1127 ); | |
1128 $text = " ". $aaseqori; | |
1129 push (@entry, | |
1130 $text | |
1131 ); | |
1132 push (@entry, | |
1133 "\n" | |
1134 ); | |
1135 } | |
1136 | |
1137 } | |
1138 | |
1139 my $res; | |
1140 foreach my $line (@entry) { | |
1141 $res .= "$line\n"; | |
1142 } | |
1143 return $res; | |
1144 } | |
1145 | |
1146 1; |