comparison variant_effect_predictor/Bio/Variation/SeqDiff.pm @ 0:1f6dce3d34e0

Uploaded
author mahtabm
date Thu, 11 Apr 2013 02:01:53 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:1f6dce3d34e0
1 # $Id: SeqDiff.pm,v 1.16 2002/10/22 07:38:49 lapp Exp $
2 # bioperl module for Bio::Variation::SeqDiff
3 #
4 # Cared for by Heikki Lehvaslaiho <heikki@ebi.ac.uk>
5 #
6 # Copyright Heikki Lehvaslaiho
7 #
8 # You may distribute this module under the same terms as perl itself
9 #
10 # POD documentation - main docs before the code
11
12 # cds_end definition?
13
14 =head1 NAME
15
16 Bio::Variation::SeqDiff - Container class for mutation/variant descriptions
17
18 =head1 SYNOPSIS
19
20 $seqDiff = Bio::Variation::SeqDiff->new (
21 -id => $M20132,
22 -alphabet => 'rna',
23 -gene_symbol => 'AR'
24 -chromosome => 'X',
25 -numbering => 'coding'
26 );
27 # get a DNAMutation object somehow
28 $seqDiff->add_Variant($dnamut);
29 print $seqDiff->sys_name(), "\n";
30
31 =head1 DESCRIPTION
32
33 SeqDiff stores Bio::Variation::VariantI object references and
34 descriptive information common to all changes in a sequence. Mutations
35 are understood to be any kind of sequence markers and are expected to
36 occur in the same chromosome. See L<Bio::Variation::VariantI> for details.
37
38 The methods of SeqDiff are geared towards describing mutations in
39 human genes using gene-based coordinate system where 'A' of the
40 initiator codon has number 1 and the one before it -1. This is
41 according to conventions of human genetics.
42
43 There will be class Bio::Variation::Genotype to describe markers in
44 different chromosomes and diploid genototypes.
45
46 Classes implementing Bio::Variation::VariantI interface are
47 Bio::Variation::DNAMutation, Bio::Variation::RNAChange, and
48 Bio::Variation::AAChange. See L<Bio::Variation::VariantI>,
49 L<Bio::Variation::DNAMutation>, L<Bio::Variation::RNAChange>, and
50 L<Bio::Variation::AAChange> for more information.
51
52 Variant objects can be added using two ways: an array passed to the
53 constructor or as individual Variant objects with add_Variant
54 method.
55
56
57 =head1 FEEDBACK
58
59 =head2 Mailing Lists
60
61 User feedback is an integral part of the evolution of this and other
62 Bioperl modules. Send your comments and suggestions preferably to the
63 Bioperl mailing lists Your participation is much appreciated.
64
65 bioperl-l@bioperl.org - General discussion
66 http://bio.perl.org/MailList.html - About the mailing lists
67
68 =head2 Reporting Bugs
69
70 report bugs to the Bioperl bug tracking system to help us keep track
71 the bugs and their resolution. Bug reports can be submitted via
72 email or the web:
73
74 bioperl-bugs@bio.perl.org
75 http://bugzilla.bioperl.org/
76
77 =head1 AUTHOR - Heikki Lehvaslaiho
78
79 Email: heikki@ebi.ac.uk
80 Address:
81
82 EMBL Outstation, European Bioinformatics Institute
83 Wellcome Trust Genome Campus, Hinxton
84 Cambs. CB10 1SD, United Kingdom
85
86 =head1 CONTRIBUTORS
87
88 Eckhard Lehmann, ecky@e-lehmann.de
89
90 =head1 APPENDIX
91
92 The rest of the documentation details each of the object
93 methods. Internal methods are usually preceded with a _
94
95 =cut
96
97 # Let the code begin...
98
99 package Bio::Variation::SeqDiff;
100 my $VERSION=1.0;
101
102 use strict;
103 use vars qw($VERSION @ISA);
104 use Bio::Root::Root;
105 use Bio::Tools::CodonTable;
106 use Bio::PrimarySeq;
107
108 @ISA = qw( Bio::Root::Root );
109
110
111 =head2 new
112
113 Title : new
114 Usage : $seqDiff = Bio::Variation::SeqDiff->new;
115 Function: generates a new Bio::Variation::SeqDiff
116 Returns : reference to a new object of class SeqDiff
117 Args :
118
119 =cut
120
121 sub new {
122 my($class,@args) = @_;
123 my $self = $class->SUPER::new(@args);
124
125 my($id, $sysname, $trivname, $chr, $gene_symbol,
126 $desc, $alphabet, $numbering, $offset, $rna_offset, $rna_id, $cds_end,
127 $dna_ori, $dna_mut, $rna_ori, $rna_mut, $aa_ori, $aa_mut
128 #@variants, @genes
129 ) =
130 $self->_rearrange([qw(ID
131 SYSNAME
132 TRIVNAME
133 CHR
134 GENE_SYMBOL
135 DESC
136 ALPHABET
137 NUMBERING
138 OFFSET
139 RNA_OFFSET
140 RNA_ID
141 CDS_END
142 DNA_ORI
143 DNA_MUT
144 RNA_ORI
145 AA_ORI
146 AA_MUT
147 )],
148 @args);
149
150 #my $make = $self->SUPER::_initialize(@args);
151
152 $id && $self->id($id);
153 $sysname && $self->sysname($sysname);
154 $trivname && $self->trivname($trivname);
155 $chr && $self->chromosome($chr);
156 $gene_symbol && $self->gene_symbol($chr);
157 $desc && $self->description($desc);
158 $alphabet && $self->alphabet($alphabet);
159 $numbering && $self->numbering($numbering);
160 $offset && $self->offset($offset);
161 $rna_offset && $self->rna_offset($rna_offset);
162 $rna_id && $self->rna_id($rna_id);
163 $cds_end && $self->cds_end($cds_end);
164
165 $dna_ori && $self->dna_ori($dna_ori);
166 $dna_mut && $self->dna_mut($dna_mut);
167 $rna_ori && $self->rna_ori($rna_ori);
168 $rna_mut && $self->rna_mut($rna_mut);
169 $aa_ori && $self->aa_ori ($aa_ori);
170 $aa_mut && $self->aa_mut ($aa_mut);
171
172 $self->{ 'variants' } = [];
173 #@variants && push(@{$self->{'variants'}},@variants);
174
175 $self->{ 'genes' } = [];
176 #@genes && push(@{$self->{'genes'}},@genes);
177
178 return $self; # success - we hope!
179 }
180
181
182 =head2 id
183
184 Title : id
185 Usage : $obj->id(H0001); $id = $obj->id();
186 Function:
187
188 Sets or returns the id of the seqDiff.
189 Should be used to give the collection of variants a UID
190 without semantic associations.
191
192 Example :
193 Returns : value of id, a scalar
194 Args : newvalue (optional)
195
196 =cut
197
198
199 sub id {
200 my ($self,$value) = @_;
201 if (defined $value) {
202 $self->{'id'} = $value;
203 }
204 # unless (exists $self->{'id'}) {
205 # return "undefined";
206 # }
207 else {
208 return $self->{'id'};
209 }
210 }
211
212
213 =head2 sysname
214
215 Title : sysname
216 Usage : $obj->sysname('5C>G'); $sysname = $obj->sysname();
217 Function:
218
219 Sets or returns the systematic name of the seqDiff. The
220 name should follow the HUGO Mutation Database Initiative
221 approved nomenclature. If called without first setting the
222 value, will generate it from L<Bio::Variation::DNAMutation>
223 objects attached.
224
225 Example :
226 Returns : value of sysname, a scalar
227 Args : newvalue (optional)
228
229 =cut
230
231
232 sub sysname {
233 my ($self,$value) = @_;
234 if (defined $value) {
235 $self->{'sysname'} = $value;
236 }
237 elsif (not defined $self->{'sysname'}) {
238
239 my $sysname = '';
240 my $c = 0;
241 foreach my $mut ($self->each_Variant) {
242 if( $mut->isa('Bio::Variation::DNAMutation') ) {
243 $c++;
244 if ($c == 1 ) {
245 $sysname = $mut->sysname ;
246 }
247 else {
248 $sysname .= ";". $mut->sysname;
249 }
250 }
251 }
252 $sysname = "[". $sysname. "]" if $c > 1;
253 $self->{'sysname'} = $sysname;
254 }
255 return $self->{'sysname'};
256 }
257
258
259 =head2 trivname
260
261 Title : trivname
262 Usage : $obj->trivname('[A2G;T56G]'); $trivname = $obj->trivname();
263 Function:
264
265 Sets or returns the trivial name of the seqDiff.
266 The name should follow the HUGO Mutation Database Initiative
267 approved nomenclature. If called without first setting the
268 value, will generate it from L<Bio::Variation::AAChange>
269 objects attached.
270
271 Example :
272 Returns : value of trivname, a scalar
273 Args : newvalue (optional)
274
275 =cut
276
277
278 sub trivname {
279 my ($self,$value) = @_;
280 if (defined $value) {
281 $self->{'trivname'} = $value;
282 }
283 elsif (not defined $self->{'trivname'}) {
284
285 my $trivname = '';
286 my $c = 0;
287 foreach my $mut ($self->each_Variant) {
288 if( $mut->isa('Bio::Variation::AAChange') ) {
289 $c++;
290 if ($c == 1 ) {
291 $trivname = $mut->trivname ;
292 }
293 else {
294 $trivname .= ";". $mut->trivname;
295 }
296 }
297 }
298 $trivname = "[". $trivname. "]" if $c > 1;
299 $self->{'trivname'} = $trivname;
300 }
301
302 else {
303 return $self->{'trivname'};
304 }
305 }
306
307
308 =head2 chromosome
309
310 Title : chromosome
311 Usage : $obj->chromosome('X'); $chromosome = $obj->chromosome();
312 Function:
313
314 Sets or returns the chromosome ("linkage group") of the seqDiff.
315
316 Example :
317 Returns : value of chromosome, a scalar
318 Args : newvalue (optional)
319
320 =cut
321
322
323 sub chromosome {
324 my ($self,$value) = @_;
325 if (defined $value) {
326 $self->{'chromosome'} = $value;
327 }
328 else {
329 return $self->{'chromosome'};
330 }
331 }
332
333
334 =head2 gene_symbol
335
336 Title : gene_symbol
337 Usage : $obj->gene_symbol('FOS'); $gene_symbol = $obj->gene_symbol;
338 Function:
339
340 Sets or returns the gene symbol for the studied CDS.
341
342 Example :
343 Returns : value of gene_symbol, a scalar
344 Args : newvalue (optional)
345
346 =cut
347
348
349 sub gene_symbol {
350 my ($self,$value) = @_;
351 if (defined $value) {
352 $self->{'gene_symbol'} = $value;
353 }
354 else {
355 return $self->{'gene_symbol'};
356 }
357 }
358
359
360
361 =head2 description
362
363 Title : description
364 Usage : $obj->description('short description'); $descr = $obj->description();
365 Function:
366
367 Sets or returns the short description of the seqDiff.
368
369 Example :
370 Returns : value of description, a scalar
371 Args : newvalue (optional)
372
373 =cut
374
375
376 sub description {
377 my ($self,$value) = @_;
378 if (defined $value) {
379 $self->{'description'} = $value;
380 }
381 else {
382 return $self->{'description'};
383 }
384 }
385
386
387 =head2 alphabet
388
389 Title : alphabet
390 Usage : if( $obj->alphabet eq 'dna' ) { /Do Something/ }
391 Function: Returns the type of primary reference sequence being one of
392 'dna', 'rna' or 'protein'. This is case sensitive.
393
394 Returns : a string either 'dna','rna','protein'.
395 Args : none
396
397
398 =cut
399
400 sub alphabet {
401 my ($self,$value) = @_;
402 my %type = (dna => 1,
403 rna => 1,
404 protein => 1);
405 if( defined $value ) {
406 if ($type{$value}) {
407 $self->{'alphabet'} = $value;
408 } else {
409 $self->throw("$value is not valid alphabet value!");
410 }
411 }
412 return $self->{'alphabet'};
413 }
414
415
416 =head2 numbering
417
418 Title : numbering
419 Usage : $obj->numbering('coding'); $numbering = $obj->numbering();
420 Function:
421
422 Sets or returns the string giving the numbering schema used
423 to describe the variants.
424
425 Example :
426 Returns : value of numbering, a scalar
427 Args : newvalue (optional)
428
429 =cut
430
431
432
433 sub numbering {
434 my ($self,$value) = @_;
435 if (defined $value) {
436 $self->{'numbering'} = $value;
437 }
438 else {
439 return $self->{'numbering'};
440 }
441 }
442
443
444 =head2 offset
445
446 Title : offset
447 Usage : $obj->offset(124); $offset = $obj->offset();
448 Function:
449
450 Sets or returns the offset from the beginning of the DNA sequence
451 to the coordinate start used to describe variants. Typically
452 the beginning of the coding region of the gene.
453 The cds_start should be 1 + offset.
454
455 Example :
456 Returns : value of offset, a scalar
457 Args : newvalue (optional)
458
459 =cut
460
461
462
463 sub offset {
464 my ($self,$value) = @_;
465 if (defined $value) {
466 $self->{'offset'} = $value;
467 }
468 elsif (not defined $self->{'offset'} ) {
469 return $self->{'offset'} = 0;
470 }
471 else {
472 return $self->{'offset'};
473 }
474 }
475
476
477 =head2 cds_start
478
479 Title : cds_start
480 Usage : $obj->cds_start(123); $cds_start = $obj->cds_start();
481 Function:
482
483 Sets or returns the cds_start from the beginning of the DNA
484 sequence to the coordinate start used to describe
485 variants. Typically the beginning of the coding region of
486 the gene. Needs to be and is implemented as 1 + offset.
487
488 Example :
489 Returns : value of cds_start, a scalar
490 Args : newvalue (optional)
491
492 =cut
493
494
495
496 sub cds_start {
497 my ($self,$value) = @_;
498 if (defined $value) {
499 $self->{'offset'} = $value - 1;
500 }
501 else {
502 return $self->{'offset'} + 1;
503 }
504 }
505
506
507 =head2 cds_end
508
509 Title : cds_end
510 Usage : $obj->cds_end(321); $cds_end = $obj->cds_end();
511 Function:
512
513 Sets or returns the position of the last nucleotitide of the
514 termination codon. The coordinate system starts from cds_start.
515
516 Example :
517 Returns : value of cds_end, a scalar
518 Args : newvalue (optional)
519
520 =cut
521
522
523
524 sub cds_end {
525 my ($self,$value) = @_;
526 if (defined $value) {
527 $self->{'cds_end'} = $value;
528 }
529 else {
530 return $self->{'cds_end'};
531 #$self->{'cds_end'} = CORE::length($self->SeqDiff->rna_ori)/3;
532 }
533 }
534
535
536 =head2 rna_offset
537
538 Title : rna_offset
539 Usage : $obj->rna_offset(124); $rna_offset = $obj->rna_offset();
540 Function:
541
542 Sets or returns the rna_offset from the beginning of the RNA sequence
543 to the coordinate start used to describe variants. Typically
544 the beginning of the coding region of the gene.
545
546 Example :
547 Returns : value of rna_offset, a scalar
548 Args : newvalue (optional)
549
550 =cut
551
552
553
554 sub rna_offset {
555 my ($self,$value) = @_;
556 if (defined $value) {
557 $self->{'rna_offset'} = $value;
558 }
559 elsif (not defined $self->{'rna_offset'} ) {
560 return $self->{'rna_offset'} = 0;
561 }
562 else {
563 return $self->{'rna_offset'};
564 }
565 }
566
567
568 =head2 rna_id
569
570 Title : rna_id
571 Usage : $obj->rna_id('transcript#3'); $rna_id = $obj->rna_id();
572 Function:
573
574 Sets or returns the ID for original RNA sequence of the seqDiff.
575
576 Example :
577 Returns : value of rna_id, a scalar
578 Args : newvalue (optional)
579
580 =cut
581
582
583 sub rna_id {
584 my ($self,$value) = @_;
585 if (defined $value) {
586 $self->{'rna_id'} = $value;
587 }
588 else {
589 return $self->{'rna_id'};
590 }
591 }
592
593
594
595 =head2 add_Variant
596
597 Title : add_Variant
598 Usage : $obj->add_Variant($variant)
599 Function:
600
601 Pushes one Bio::Variation::Variant into the list of variants.
602 At the same time, creates a link from the Variant to SeqDiff
603 using its SeqDiff method.
604
605 Example :
606 Returns : 1 when succeeds, 0 for failure.
607 Args : Variant object
608
609 =cut
610
611 sub add_Variant {
612 my ($self,$value) = @_;
613 if (defined $value) {
614 if( ! $value->isa('Bio::Variation::VariantI') ) {
615 $self->throw("Is not a VariantI complying object but a [$self]");
616 return 0;
617 }
618 else {
619 push(@{$self->{'variants'}},$value);
620 $value->SeqDiff($self);
621 return 1;
622 }
623 }
624 else {
625 return 0;
626 }
627 }
628
629
630 =head2 each_Variant
631
632 Title : each_Variant
633 Usage : $obj->each_Variant();
634 Function:
635
636 Returns a list of Variants.
637
638 Example :
639 Returns : list of Variants
640 Args : none
641
642 =cut
643
644 sub each_Variant{
645 my ($self,@args) = @_;
646
647 return @{$self->{'variants'}};
648 }
649
650
651
652 =head2 add_Gene
653
654 Title : add_Gene
655 Usage : $obj->add_Gene($gene)
656 Function:
657
658 Pushes one L<Bio::LiveSeq::Gene> into the list of genes.
659
660 Example :
661 Returns : 1 when succeeds, 0 for failure.
662 Args : Bio::LiveSeq::Gene object
663
664 See L<Bio::LiveSeq::Gene> for more information.
665
666 =cut
667
668
669 sub add_Gene {
670 my ($self,$value) = @_;
671 if (defined $value) {
672 if( ! $value->isa('Bio::LiveSeq::Gene') ) {
673 $value->throw("Is not a Bio::LiveSeq::Gene object but a [$value]");
674 return 0;
675 }
676 else {
677 push(@{$self->{'genes'}},$value);
678 return 1;
679 }
680 }
681 else {
682 return 0;
683 }
684 }
685
686
687 =head2 each_Gene
688
689 Title : each_Gene
690 Usage : $obj->each_Gene();
691 Function:
692
693 Returns a list of L<Bio::LiveSeq::Gene>s.
694
695 Example :
696 Returns : list of Genes
697 Args : none
698
699 =cut
700
701 sub each_Gene{
702 my ($self,@args) = @_;
703
704 return @{$self->{'genes'}};
705 }
706
707
708 =head2 dna_ori
709
710 Title : dna_ori
711 Usage : $obj->dna_ori('atgctgctgctgct'); $dna_ori = $obj->dna_ori();
712 Function:
713
714 Sets or returns the original DNA sequence string of the seqDiff.
715
716 Example :
717 Returns : value of dna_ori, a scalar
718 Args : newvalue (optional)
719
720 =cut
721
722
723 sub dna_ori {
724 my ($self,$value) = @_;
725 if (defined $value) {
726 $self->{'dna_ori'} = $value;
727 }
728 else {
729 return $self->{'dna_ori'};
730 }
731 }
732
733
734 =head2 dna_mut
735
736 Title : dna_mut
737 Usage : $obj->dna_mut('atgctggtgctgct'); $dna_mut = $obj->dna_mut();
738 Function:
739
740 Sets or returns the mutated DNA sequence of the seqDiff.
741 If sequence has not been set generates it from the
742 original sequence and DNA mutations.
743
744 Example :
745 Returns : value of dna_mut, a scalar
746 Args : newvalue (optional)
747
748 =cut
749
750
751 sub dna_mut {
752 my ($self,$value) = @_;
753 if (defined $value) {
754 $self->{'dna_mut'} = $value;
755 }
756 else {
757 $self->_set_dnamut() unless $self->{'dna_mut'};
758 return $self->{'dna_mut'};
759 }
760 }
761
762 sub _set_dnamut {
763 my $self = shift;
764
765 return undef unless $self->{'dna_ori'} && $self->each_Variant;
766
767 $self->{'dna_mut'} = $self->{'dna_ori'};
768 foreach ($self->each_Variant) {
769 next unless $_->isa('Bio::Variation::DNAMutation');
770 next unless $_->isMutation;
771
772 my ($s, $la, $le);
773 #lies the mutation less than 25 bases after the start of sequence?
774 if ($_->start < 25) {
775 $s = 0; $la = $_->start - 1;
776 } else {
777 $s = $_->start - 25; $la = 25;
778 }
779
780 #is the mutation an insertion?
781 $_->end($_->start) unless $_->allele_ori->seq;
782
783 #does the mutation end greater than 25 bases before the end of
784 #sequence?
785 if (($_->end + 25) > length($self->{'dna_mut'})) {
786 $le = length($self->{'dna_mut'}) - $_->end;
787 } else {
788 $le = 25;
789 }
790
791 $_->dnStreamSeq(substr($self->{'dna_mut'}, $s, $la));
792 $_->upStreamSeq(substr($self->{'dna_mut'}, $_->end, $le));
793
794 my $s_ori = $_->dnStreamSeq . $_->allele_ori->seq . $_->upStreamSeq;
795 my $s_mut = $_->dnStreamSeq . $_->allele_mut->seq . $_->upStreamSeq;
796
797 (my $str = $self->{'dna_mut'}) =~ s/$s_ori/$s_mut/;
798 $self->{'dna_mut'} = $str;
799 }
800 }
801
802
803 =head2 rna_ori
804
805 Title : rna_ori
806 Usage : $obj->rna_ori('atgctgctgctgct'); $rna_ori = $obj->rna_ori();
807 Function:
808
809 Sets or returns the original RNA sequence of the seqDiff.
810
811 Example :
812 Returns : value of rna_ori, a scalar
813 Args : newvalue (optional)
814
815 =cut
816
817
818 sub rna_ori {
819 my ($self,$value) = @_;
820 if (defined $value) {
821 $self->{'rna_ori'} = $value;
822 }
823 else {
824 return $self->{'rna_ori'};
825 }
826 }
827
828
829 =head2 rna_mut
830
831 Title : rna_mut
832 Usage : $obj->rna_mut('atgctggtgctgct'); $rna_mut = $obj->rna_mut();
833 Function:
834
835 Sets or returns the mutated RNA sequence of the seqDiff.
836
837 Example :
838 Returns : value of rna_mut, a scalar
839 Args : newvalue (optional)
840
841 =cut
842
843
844 sub rna_mut {
845 my ($self,$value) = @_;
846 if (defined $value) {
847 $self->{'rna_mut'} = $value;
848 }
849 else {
850 return $self->{'rna_mut'};
851 }
852 }
853
854
855 =head2 aa_ori
856
857 Title : aa_ori
858 Usage : $obj->aa_ori('MAGVLL*'); $aa_ori = $obj->aa_ori();
859 Function:
860
861 Sets or returns the original protein sequence of the seqDiff.
862
863 Example :
864 Returns : value of aa_ori, a scalar
865 Args : newvalue (optional)
866
867 =cut
868
869
870 sub aa_ori {
871 my ($self,$value) = @_;
872 if (defined $value) {
873 $self->{'aa_ori'} = $value;
874 }
875 else {
876 return $self->{'aa_ori'};
877 }
878 }
879
880
881 =head2 aa_mut
882
883 Title : aa_mut
884 Usage : $obj->aa_mut('MA*'); $aa_mut = $obj->aa_mut();
885 Function:
886
887 Sets or returns the mutated protein sequence of the seqDiff.
888
889 Example :
890 Returns : value of aa_mut, a scalar
891 Args : newvalue (optional)
892
893 =cut
894
895
896 sub aa_mut {
897 my ($self,$value) = @_;
898 if (defined $value) {
899 $self->{'aa_mut'} = $value;
900 }
901 else {
902 return $self->{'aa_mut'};
903 }
904 }
905
906
907 =head2 seqobj
908
909 Title : seqobj
910 Usage : $dnaobj = $obj->seqobj('dna_mut');
911 Function:
912
913 Returns the any original or mutated sequences as a
914 Bio::PrimarySeq object.
915
916 Example :
917 Returns : Bio::PrimarySeq object for the requested sequence
918 Args : string, method name for the sequence requested
919
920 See L<Bio::PrimarySeq> for more information.
921
922 =cut
923
924 sub seqobj {
925 my ($self,$value) = @_;
926 my $out;
927 my %valid_obj =
928 map {$_, 1} qw(dna_ori rna_ori aa_ori dna_mut rna_mut aa_mut);
929 $valid_obj{$value} ||
930 $self->throw("Sequence type '$value' is not a valid type (".
931 join(',', map "'$_'", sort keys %valid_obj) .") lowercase");
932 my ($alphabet) = $value =~ /([^_]+)/;
933 my $id = $self->id;
934 $id = $self->rna_id if $self->rna_id;
935 $alphabet = 'protein' if $alphabet eq 'aa';
936 $out = Bio::PrimarySeq->new
937 ( '-seq' => $self->{$value},
938 '-display_id' => $id,
939 '-accession_number' => $self->id,
940 '-alphabet' => $alphabet
941 ) if $self->{$value} ;
942 return $out;
943 }
944
945 =head2 alignment
946
947 Title : alignment
948 Usage : $obj->alignment
949 Function:
950
951 Returns a pretty RNA/AA sequence alignment from linked
952 objects. Under construction: Only simple coding region
953 point mutations work.
954
955 Example :
956 Returns :
957 Args : none
958
959 =cut
960
961
962 sub alignment {
963 my $self = shift;
964 my (@entry, $text);
965
966 my $maxflanklen = 12;
967
968 foreach my $mut ($self->each_Variant) {
969 if( $mut->isa('Bio::Variation::RNAChange') ) {
970
971 my $upflank = $mut->upStreamSeq;
972 my $dnflank = $mut->dnStreamSeq;
973 my $cposd = $mut->codon_pos;
974 my $rori = $mut->allele_ori->seq;
975 my $rmut = $mut->allele_mut->seq;
976 my $rseqoriu = '';
977 my $rseqmutu = '';
978 my $rseqorid = '';
979 my $rseqmutd = '';
980 my $aaseqmutu = '';
981 my (@rseqori, @rseqmut );
982
983 # point
984 if ($mut->DNAMutation->label =~ /point/) {
985 if ($cposd == 1 ) {
986 my $nt2d = substr($dnflank, 0, 2);
987 push @rseqori, $rori. $nt2d;
988 push @rseqmut, uc ($rmut). $nt2d;
989 $dnflank = substr($dnflank, 2);
990 }
991 elsif ($cposd == 2) {
992 my $ntu = chop $upflank;
993 my $ntd = substr($dnflank, 0, 1);
994 push @rseqori, $ntu. $rori. $ntd;
995 push @rseqmut, $ntu. uc ($rmut). $ntd;
996 $dnflank = substr($dnflank, 1);
997 }
998 elsif ($cposd == 3) {
999 my $ntu1 = chop $upflank;
1000 my $ntu2 = chop $upflank;
1001 push (@rseqori, $ntu2. $ntu1. $rori);
1002 push (@rseqmut, $ntu2. $ntu1. uc $rmut);
1003 }
1004 }
1005 #deletion
1006 elsif ($mut->DNAMutation->label =~ /deletion/) {
1007 if ($cposd == 2 ) {
1008 $rseqorid = chop $upflank;
1009 $rseqmutd = $rseqorid;
1010 }
1011 for (my $i=1; $i<=$mut->length; $i++) {
1012 my $ntd .= substr($mut->allele_ori, $i-1, 1);
1013 $rseqorid .= $ntd;
1014 if (length($rseqorid) == 3 ) {
1015 push (@rseqori, $rseqorid);
1016 push (@rseqmut, " ");
1017 $rseqorid = '';
1018 }
1019 }
1020
1021 if ($rseqorid) {
1022 $rseqorid .= substr($dnflank, 0, 3-$rseqorid);
1023 push (@rseqori, $rseqorid);
1024 push (@rseqmut, " ");
1025 $dnflank = substr($dnflank,3-$rseqorid);
1026 }
1027 }
1028 $upflank = reverse $upflank;
1029 # loop throught the flanks
1030 for (my $i=1; $i<=length($dnflank); $i++) {
1031
1032 last if $i > $maxflanklen;
1033
1034 my $ntd .= substr($dnflank, $i-1, 1);
1035 my $ntu .= substr($upflank, $i-1, 1);
1036
1037 $rseqmutd .= $ntd;
1038 $rseqorid .= $ntd;
1039 $rseqmutu = $ntu. $rseqmutu;
1040 $rseqoriu = $ntu. $rseqoriu;
1041
1042 if (length($rseqorid) == 3 and length($rseqorid) == 3) {
1043 push (@rseqori, $rseqorid);
1044 push (@rseqmut, $rseqmutd);
1045 $rseqorid = $rseqmutd ='';
1046 }
1047 if (length($rseqoriu) == 3 and length($rseqoriu) == 3) {
1048 unshift (@rseqori, $rseqoriu);
1049 unshift (@rseqmut, $rseqmutu);
1050 $rseqoriu = $rseqmutu ='';
1051 }
1052
1053 #print "|i=$i, $cposd, $rseqmutd, $rseqorid\n";
1054 #print "|i=$i, $cposu, $rseqmutu, $rseqoriu\n\n";
1055
1056 }
1057
1058 push (@rseqori, $rseqorid);
1059 unshift (@rseqori, $rseqoriu);
1060 push (@rseqmut, $rseqmutd);
1061 unshift (@rseqmut, $rseqmutu);
1062
1063 return unless $mut->AAChange;
1064 #translate
1065 my $tr = new Bio::Tools::CodonTable ('-id' => $mut->codon_table);
1066 my $apos = $mut->AAChange->start;
1067 my $aposmax = CORE::length($self->aa_ori); #terminator codon no
1068 my $rseqori;
1069 my $rseqmut;
1070 my $aaseqori;
1071 my $aaseqmut = "";
1072 for (my $i = 0; $i <= $#rseqori; $i++) {
1073 my $a = '';
1074
1075 $a = $tr->translate($rseqori[$i]) if length($rseqori[$i]) == 3;
1076
1077 if (length($a) != 1 or
1078 $apos - ( $maxflanklen/2 -1) + $i < 1 or
1079 $apos - ( $maxflanklen/2 -1) + $i > $aposmax ) {
1080 $aaseqori .= " ";
1081 } else {
1082 $aaseqori .= " ". $a. " ";
1083 }
1084 my $b = '';
1085 if (length($rseqmut[$i]) == 3) {
1086 if ($rseqmut[$i] eq ' ') {
1087 $b = "_";
1088 } else {
1089 $b = $tr->translate($rseqmut[$i]);
1090 }
1091 }
1092 if (( $b ne $a and
1093 length($b) == 1 and
1094 $apos - ( $maxflanklen/2 -1) + $i >= 1 ) or
1095 ( $apos - ( $maxflanklen/2 -1) + $i >= $aposmax and
1096 $mut->label =~ 'termination')
1097 ) {
1098 $aaseqmut .= " ". $b. " ";
1099 } else {
1100 $aaseqmut .= " ";
1101 }
1102
1103 if ($i == 0 and length($rseqori[$i]) != 3) {
1104 my $l = 3 - length($rseqori[$i]);
1105 $rseqori[$i] = (" " x $l). $rseqori[$i];
1106 $rseqmut[$i] = (" " x $l). $rseqmut[$i];
1107 }
1108 $rseqori .= $rseqori[$i]. " " if $rseqori[$i] ne '';
1109 $rseqmut .= $rseqmut[$i]. " " if $rseqmut[$i] ne '';
1110 }
1111
1112 # collect the results
1113 push (@entry,
1114 "\n"
1115 );
1116 $text = " ". $aaseqmut;
1117 push (@entry,
1118 $text
1119 );
1120 $text = "Variant : ". $rseqmut;
1121 push (@entry,
1122 $text
1123 );
1124 $text = "Reference: ". $rseqori;
1125 push (@entry,
1126 $text
1127 );
1128 $text = " ". $aaseqori;
1129 push (@entry,
1130 $text
1131 );
1132 push (@entry,
1133 "\n"
1134 );
1135 }
1136
1137 }
1138
1139 my $res;
1140 foreach my $line (@entry) {
1141 $res .= "$line\n";
1142 }
1143 return $res;
1144 }
1145
1146 1;