0
|
1 # $Id: SeqDiff.pm,v 1.16 2002/10/22 07:38:49 lapp Exp $
|
|
2 # bioperl module for Bio::Variation::SeqDiff
|
|
3 #
|
|
4 # Cared for by Heikki Lehvaslaiho <heikki@ebi.ac.uk>
|
|
5 #
|
|
6 # Copyright Heikki Lehvaslaiho
|
|
7 #
|
|
8 # You may distribute this module under the same terms as perl itself
|
|
9 #
|
|
10 # POD documentation - main docs before the code
|
|
11
|
|
12 # cds_end definition?
|
|
13
|
|
14 =head1 NAME
|
|
15
|
|
16 Bio::Variation::SeqDiff - Container class for mutation/variant descriptions
|
|
17
|
|
18 =head1 SYNOPSIS
|
|
19
|
|
20 $seqDiff = Bio::Variation::SeqDiff->new (
|
|
21 -id => $M20132,
|
|
22 -alphabet => 'rna',
|
|
23 -gene_symbol => 'AR'
|
|
24 -chromosome => 'X',
|
|
25 -numbering => 'coding'
|
|
26 );
|
|
27 # get a DNAMutation object somehow
|
|
28 $seqDiff->add_Variant($dnamut);
|
|
29 print $seqDiff->sys_name(), "\n";
|
|
30
|
|
31 =head1 DESCRIPTION
|
|
32
|
|
33 SeqDiff stores Bio::Variation::VariantI object references and
|
|
34 descriptive information common to all changes in a sequence. Mutations
|
|
35 are understood to be any kind of sequence markers and are expected to
|
|
36 occur in the same chromosome. See L<Bio::Variation::VariantI> for details.
|
|
37
|
|
38 The methods of SeqDiff are geared towards describing mutations in
|
|
39 human genes using gene-based coordinate system where 'A' of the
|
|
40 initiator codon has number 1 and the one before it -1. This is
|
|
41 according to conventions of human genetics.
|
|
42
|
|
43 There will be class Bio::Variation::Genotype to describe markers in
|
|
44 different chromosomes and diploid genototypes.
|
|
45
|
|
46 Classes implementing Bio::Variation::VariantI interface are
|
|
47 Bio::Variation::DNAMutation, Bio::Variation::RNAChange, and
|
|
48 Bio::Variation::AAChange. See L<Bio::Variation::VariantI>,
|
|
49 L<Bio::Variation::DNAMutation>, L<Bio::Variation::RNAChange>, and
|
|
50 L<Bio::Variation::AAChange> for more information.
|
|
51
|
|
52 Variant objects can be added using two ways: an array passed to the
|
|
53 constructor or as individual Variant objects with add_Variant
|
|
54 method.
|
|
55
|
|
56
|
|
57 =head1 FEEDBACK
|
|
58
|
|
59 =head2 Mailing Lists
|
|
60
|
|
61 User feedback is an integral part of the evolution of this and other
|
|
62 Bioperl modules. Send your comments and suggestions preferably to the
|
|
63 Bioperl mailing lists Your participation is much appreciated.
|
|
64
|
|
65 bioperl-l@bioperl.org - General discussion
|
|
66 http://bio.perl.org/MailList.html - About the mailing lists
|
|
67
|
|
68 =head2 Reporting Bugs
|
|
69
|
|
70 report bugs to the Bioperl bug tracking system to help us keep track
|
|
71 the bugs and their resolution. Bug reports can be submitted via
|
|
72 email or the web:
|
|
73
|
|
74 bioperl-bugs@bio.perl.org
|
|
75 http://bugzilla.bioperl.org/
|
|
76
|
|
77 =head1 AUTHOR - Heikki Lehvaslaiho
|
|
78
|
|
79 Email: heikki@ebi.ac.uk
|
|
80 Address:
|
|
81
|
|
82 EMBL Outstation, European Bioinformatics Institute
|
|
83 Wellcome Trust Genome Campus, Hinxton
|
|
84 Cambs. CB10 1SD, United Kingdom
|
|
85
|
|
86 =head1 CONTRIBUTORS
|
|
87
|
|
88 Eckhard Lehmann, ecky@e-lehmann.de
|
|
89
|
|
90 =head1 APPENDIX
|
|
91
|
|
92 The rest of the documentation details each of the object
|
|
93 methods. Internal methods are usually preceded with a _
|
|
94
|
|
95 =cut
|
|
96
|
|
97 # Let the code begin...
|
|
98
|
|
99 package Bio::Variation::SeqDiff;
|
|
100 my $VERSION=1.0;
|
|
101
|
|
102 use strict;
|
|
103 use vars qw($VERSION @ISA);
|
|
104 use Bio::Root::Root;
|
|
105 use Bio::Tools::CodonTable;
|
|
106 use Bio::PrimarySeq;
|
|
107
|
|
108 @ISA = qw( Bio::Root::Root );
|
|
109
|
|
110
|
|
111 =head2 new
|
|
112
|
|
113 Title : new
|
|
114 Usage : $seqDiff = Bio::Variation::SeqDiff->new;
|
|
115 Function: generates a new Bio::Variation::SeqDiff
|
|
116 Returns : reference to a new object of class SeqDiff
|
|
117 Args :
|
|
118
|
|
119 =cut
|
|
120
|
|
121 sub new {
|
|
122 my($class,@args) = @_;
|
|
123 my $self = $class->SUPER::new(@args);
|
|
124
|
|
125 my($id, $sysname, $trivname, $chr, $gene_symbol,
|
|
126 $desc, $alphabet, $numbering, $offset, $rna_offset, $rna_id, $cds_end,
|
|
127 $dna_ori, $dna_mut, $rna_ori, $rna_mut, $aa_ori, $aa_mut
|
|
128 #@variants, @genes
|
|
129 ) =
|
|
130 $self->_rearrange([qw(ID
|
|
131 SYSNAME
|
|
132 TRIVNAME
|
|
133 CHR
|
|
134 GENE_SYMBOL
|
|
135 DESC
|
|
136 ALPHABET
|
|
137 NUMBERING
|
|
138 OFFSET
|
|
139 RNA_OFFSET
|
|
140 RNA_ID
|
|
141 CDS_END
|
|
142 DNA_ORI
|
|
143 DNA_MUT
|
|
144 RNA_ORI
|
|
145 AA_ORI
|
|
146 AA_MUT
|
|
147 )],
|
|
148 @args);
|
|
149
|
|
150 #my $make = $self->SUPER::_initialize(@args);
|
|
151
|
|
152 $id && $self->id($id);
|
|
153 $sysname && $self->sysname($sysname);
|
|
154 $trivname && $self->trivname($trivname);
|
|
155 $chr && $self->chromosome($chr);
|
|
156 $gene_symbol && $self->gene_symbol($chr);
|
|
157 $desc && $self->description($desc);
|
|
158 $alphabet && $self->alphabet($alphabet);
|
|
159 $numbering && $self->numbering($numbering);
|
|
160 $offset && $self->offset($offset);
|
|
161 $rna_offset && $self->rna_offset($rna_offset);
|
|
162 $rna_id && $self->rna_id($rna_id);
|
|
163 $cds_end && $self->cds_end($cds_end);
|
|
164
|
|
165 $dna_ori && $self->dna_ori($dna_ori);
|
|
166 $dna_mut && $self->dna_mut($dna_mut);
|
|
167 $rna_ori && $self->rna_ori($rna_ori);
|
|
168 $rna_mut && $self->rna_mut($rna_mut);
|
|
169 $aa_ori && $self->aa_ori ($aa_ori);
|
|
170 $aa_mut && $self->aa_mut ($aa_mut);
|
|
171
|
|
172 $self->{ 'variants' } = [];
|
|
173 #@variants && push(@{$self->{'variants'}},@variants);
|
|
174
|
|
175 $self->{ 'genes' } = [];
|
|
176 #@genes && push(@{$self->{'genes'}},@genes);
|
|
177
|
|
178 return $self; # success - we hope!
|
|
179 }
|
|
180
|
|
181
|
|
182 =head2 id
|
|
183
|
|
184 Title : id
|
|
185 Usage : $obj->id(H0001); $id = $obj->id();
|
|
186 Function:
|
|
187
|
|
188 Sets or returns the id of the seqDiff.
|
|
189 Should be used to give the collection of variants a UID
|
|
190 without semantic associations.
|
|
191
|
|
192 Example :
|
|
193 Returns : value of id, a scalar
|
|
194 Args : newvalue (optional)
|
|
195
|
|
196 =cut
|
|
197
|
|
198
|
|
199 sub id {
|
|
200 my ($self,$value) = @_;
|
|
201 if (defined $value) {
|
|
202 $self->{'id'} = $value;
|
|
203 }
|
|
204 # unless (exists $self->{'id'}) {
|
|
205 # return "undefined";
|
|
206 # }
|
|
207 else {
|
|
208 return $self->{'id'};
|
|
209 }
|
|
210 }
|
|
211
|
|
212
|
|
213 =head2 sysname
|
|
214
|
|
215 Title : sysname
|
|
216 Usage : $obj->sysname('5C>G'); $sysname = $obj->sysname();
|
|
217 Function:
|
|
218
|
|
219 Sets or returns the systematic name of the seqDiff. The
|
|
220 name should follow the HUGO Mutation Database Initiative
|
|
221 approved nomenclature. If called without first setting the
|
|
222 value, will generate it from L<Bio::Variation::DNAMutation>
|
|
223 objects attached.
|
|
224
|
|
225 Example :
|
|
226 Returns : value of sysname, a scalar
|
|
227 Args : newvalue (optional)
|
|
228
|
|
229 =cut
|
|
230
|
|
231
|
|
232 sub sysname {
|
|
233 my ($self,$value) = @_;
|
|
234 if (defined $value) {
|
|
235 $self->{'sysname'} = $value;
|
|
236 }
|
|
237 elsif (not defined $self->{'sysname'}) {
|
|
238
|
|
239 my $sysname = '';
|
|
240 my $c = 0;
|
|
241 foreach my $mut ($self->each_Variant) {
|
|
242 if( $mut->isa('Bio::Variation::DNAMutation') ) {
|
|
243 $c++;
|
|
244 if ($c == 1 ) {
|
|
245 $sysname = $mut->sysname ;
|
|
246 }
|
|
247 else {
|
|
248 $sysname .= ";". $mut->sysname;
|
|
249 }
|
|
250 }
|
|
251 }
|
|
252 $sysname = "[". $sysname. "]" if $c > 1;
|
|
253 $self->{'sysname'} = $sysname;
|
|
254 }
|
|
255 return $self->{'sysname'};
|
|
256 }
|
|
257
|
|
258
|
|
259 =head2 trivname
|
|
260
|
|
261 Title : trivname
|
|
262 Usage : $obj->trivname('[A2G;T56G]'); $trivname = $obj->trivname();
|
|
263 Function:
|
|
264
|
|
265 Sets or returns the trivial name of the seqDiff.
|
|
266 The name should follow the HUGO Mutation Database Initiative
|
|
267 approved nomenclature. If called without first setting the
|
|
268 value, will generate it from L<Bio::Variation::AAChange>
|
|
269 objects attached.
|
|
270
|
|
271 Example :
|
|
272 Returns : value of trivname, a scalar
|
|
273 Args : newvalue (optional)
|
|
274
|
|
275 =cut
|
|
276
|
|
277
|
|
278 sub trivname {
|
|
279 my ($self,$value) = @_;
|
|
280 if (defined $value) {
|
|
281 $self->{'trivname'} = $value;
|
|
282 }
|
|
283 elsif (not defined $self->{'trivname'}) {
|
|
284
|
|
285 my $trivname = '';
|
|
286 my $c = 0;
|
|
287 foreach my $mut ($self->each_Variant) {
|
|
288 if( $mut->isa('Bio::Variation::AAChange') ) {
|
|
289 $c++;
|
|
290 if ($c == 1 ) {
|
|
291 $trivname = $mut->trivname ;
|
|
292 }
|
|
293 else {
|
|
294 $trivname .= ";". $mut->trivname;
|
|
295 }
|
|
296 }
|
|
297 }
|
|
298 $trivname = "[". $trivname. "]" if $c > 1;
|
|
299 $self->{'trivname'} = $trivname;
|
|
300 }
|
|
301
|
|
302 else {
|
|
303 return $self->{'trivname'};
|
|
304 }
|
|
305 }
|
|
306
|
|
307
|
|
308 =head2 chromosome
|
|
309
|
|
310 Title : chromosome
|
|
311 Usage : $obj->chromosome('X'); $chromosome = $obj->chromosome();
|
|
312 Function:
|
|
313
|
|
314 Sets or returns the chromosome ("linkage group") of the seqDiff.
|
|
315
|
|
316 Example :
|
|
317 Returns : value of chromosome, a scalar
|
|
318 Args : newvalue (optional)
|
|
319
|
|
320 =cut
|
|
321
|
|
322
|
|
323 sub chromosome {
|
|
324 my ($self,$value) = @_;
|
|
325 if (defined $value) {
|
|
326 $self->{'chromosome'} = $value;
|
|
327 }
|
|
328 else {
|
|
329 return $self->{'chromosome'};
|
|
330 }
|
|
331 }
|
|
332
|
|
333
|
|
334 =head2 gene_symbol
|
|
335
|
|
336 Title : gene_symbol
|
|
337 Usage : $obj->gene_symbol('FOS'); $gene_symbol = $obj->gene_symbol;
|
|
338 Function:
|
|
339
|
|
340 Sets or returns the gene symbol for the studied CDS.
|
|
341
|
|
342 Example :
|
|
343 Returns : value of gene_symbol, a scalar
|
|
344 Args : newvalue (optional)
|
|
345
|
|
346 =cut
|
|
347
|
|
348
|
|
349 sub gene_symbol {
|
|
350 my ($self,$value) = @_;
|
|
351 if (defined $value) {
|
|
352 $self->{'gene_symbol'} = $value;
|
|
353 }
|
|
354 else {
|
|
355 return $self->{'gene_symbol'};
|
|
356 }
|
|
357 }
|
|
358
|
|
359
|
|
360
|
|
361 =head2 description
|
|
362
|
|
363 Title : description
|
|
364 Usage : $obj->description('short description'); $descr = $obj->description();
|
|
365 Function:
|
|
366
|
|
367 Sets or returns the short description of the seqDiff.
|
|
368
|
|
369 Example :
|
|
370 Returns : value of description, a scalar
|
|
371 Args : newvalue (optional)
|
|
372
|
|
373 =cut
|
|
374
|
|
375
|
|
376 sub description {
|
|
377 my ($self,$value) = @_;
|
|
378 if (defined $value) {
|
|
379 $self->{'description'} = $value;
|
|
380 }
|
|
381 else {
|
|
382 return $self->{'description'};
|
|
383 }
|
|
384 }
|
|
385
|
|
386
|
|
387 =head2 alphabet
|
|
388
|
|
389 Title : alphabet
|
|
390 Usage : if( $obj->alphabet eq 'dna' ) { /Do Something/ }
|
|
391 Function: Returns the type of primary reference sequence being one of
|
|
392 'dna', 'rna' or 'protein'. This is case sensitive.
|
|
393
|
|
394 Returns : a string either 'dna','rna','protein'.
|
|
395 Args : none
|
|
396
|
|
397
|
|
398 =cut
|
|
399
|
|
400 sub alphabet {
|
|
401 my ($self,$value) = @_;
|
|
402 my %type = (dna => 1,
|
|
403 rna => 1,
|
|
404 protein => 1);
|
|
405 if( defined $value ) {
|
|
406 if ($type{$value}) {
|
|
407 $self->{'alphabet'} = $value;
|
|
408 } else {
|
|
409 $self->throw("$value is not valid alphabet value!");
|
|
410 }
|
|
411 }
|
|
412 return $self->{'alphabet'};
|
|
413 }
|
|
414
|
|
415
|
|
416 =head2 numbering
|
|
417
|
|
418 Title : numbering
|
|
419 Usage : $obj->numbering('coding'); $numbering = $obj->numbering();
|
|
420 Function:
|
|
421
|
|
422 Sets or returns the string giving the numbering schema used
|
|
423 to describe the variants.
|
|
424
|
|
425 Example :
|
|
426 Returns : value of numbering, a scalar
|
|
427 Args : newvalue (optional)
|
|
428
|
|
429 =cut
|
|
430
|
|
431
|
|
432
|
|
433 sub numbering {
|
|
434 my ($self,$value) = @_;
|
|
435 if (defined $value) {
|
|
436 $self->{'numbering'} = $value;
|
|
437 }
|
|
438 else {
|
|
439 return $self->{'numbering'};
|
|
440 }
|
|
441 }
|
|
442
|
|
443
|
|
444 =head2 offset
|
|
445
|
|
446 Title : offset
|
|
447 Usage : $obj->offset(124); $offset = $obj->offset();
|
|
448 Function:
|
|
449
|
|
450 Sets or returns the offset from the beginning of the DNA sequence
|
|
451 to the coordinate start used to describe variants. Typically
|
|
452 the beginning of the coding region of the gene.
|
|
453 The cds_start should be 1 + offset.
|
|
454
|
|
455 Example :
|
|
456 Returns : value of offset, a scalar
|
|
457 Args : newvalue (optional)
|
|
458
|
|
459 =cut
|
|
460
|
|
461
|
|
462
|
|
463 sub offset {
|
|
464 my ($self,$value) = @_;
|
|
465 if (defined $value) {
|
|
466 $self->{'offset'} = $value;
|
|
467 }
|
|
468 elsif (not defined $self->{'offset'} ) {
|
|
469 return $self->{'offset'} = 0;
|
|
470 }
|
|
471 else {
|
|
472 return $self->{'offset'};
|
|
473 }
|
|
474 }
|
|
475
|
|
476
|
|
477 =head2 cds_start
|
|
478
|
|
479 Title : cds_start
|
|
480 Usage : $obj->cds_start(123); $cds_start = $obj->cds_start();
|
|
481 Function:
|
|
482
|
|
483 Sets or returns the cds_start from the beginning of the DNA
|
|
484 sequence to the coordinate start used to describe
|
|
485 variants. Typically the beginning of the coding region of
|
|
486 the gene. Needs to be and is implemented as 1 + offset.
|
|
487
|
|
488 Example :
|
|
489 Returns : value of cds_start, a scalar
|
|
490 Args : newvalue (optional)
|
|
491
|
|
492 =cut
|
|
493
|
|
494
|
|
495
|
|
496 sub cds_start {
|
|
497 my ($self,$value) = @_;
|
|
498 if (defined $value) {
|
|
499 $self->{'offset'} = $value - 1;
|
|
500 }
|
|
501 else {
|
|
502 return $self->{'offset'} + 1;
|
|
503 }
|
|
504 }
|
|
505
|
|
506
|
|
507 =head2 cds_end
|
|
508
|
|
509 Title : cds_end
|
|
510 Usage : $obj->cds_end(321); $cds_end = $obj->cds_end();
|
|
511 Function:
|
|
512
|
|
513 Sets or returns the position of the last nucleotitide of the
|
|
514 termination codon. The coordinate system starts from cds_start.
|
|
515
|
|
516 Example :
|
|
517 Returns : value of cds_end, a scalar
|
|
518 Args : newvalue (optional)
|
|
519
|
|
520 =cut
|
|
521
|
|
522
|
|
523
|
|
524 sub cds_end {
|
|
525 my ($self,$value) = @_;
|
|
526 if (defined $value) {
|
|
527 $self->{'cds_end'} = $value;
|
|
528 }
|
|
529 else {
|
|
530 return $self->{'cds_end'};
|
|
531 #$self->{'cds_end'} = CORE::length($self->SeqDiff->rna_ori)/3;
|
|
532 }
|
|
533 }
|
|
534
|
|
535
|
|
536 =head2 rna_offset
|
|
537
|
|
538 Title : rna_offset
|
|
539 Usage : $obj->rna_offset(124); $rna_offset = $obj->rna_offset();
|
|
540 Function:
|
|
541
|
|
542 Sets or returns the rna_offset from the beginning of the RNA sequence
|
|
543 to the coordinate start used to describe variants. Typically
|
|
544 the beginning of the coding region of the gene.
|
|
545
|
|
546 Example :
|
|
547 Returns : value of rna_offset, a scalar
|
|
548 Args : newvalue (optional)
|
|
549
|
|
550 =cut
|
|
551
|
|
552
|
|
553
|
|
554 sub rna_offset {
|
|
555 my ($self,$value) = @_;
|
|
556 if (defined $value) {
|
|
557 $self->{'rna_offset'} = $value;
|
|
558 }
|
|
559 elsif (not defined $self->{'rna_offset'} ) {
|
|
560 return $self->{'rna_offset'} = 0;
|
|
561 }
|
|
562 else {
|
|
563 return $self->{'rna_offset'};
|
|
564 }
|
|
565 }
|
|
566
|
|
567
|
|
568 =head2 rna_id
|
|
569
|
|
570 Title : rna_id
|
|
571 Usage : $obj->rna_id('transcript#3'); $rna_id = $obj->rna_id();
|
|
572 Function:
|
|
573
|
|
574 Sets or returns the ID for original RNA sequence of the seqDiff.
|
|
575
|
|
576 Example :
|
|
577 Returns : value of rna_id, a scalar
|
|
578 Args : newvalue (optional)
|
|
579
|
|
580 =cut
|
|
581
|
|
582
|
|
583 sub rna_id {
|
|
584 my ($self,$value) = @_;
|
|
585 if (defined $value) {
|
|
586 $self->{'rna_id'} = $value;
|
|
587 }
|
|
588 else {
|
|
589 return $self->{'rna_id'};
|
|
590 }
|
|
591 }
|
|
592
|
|
593
|
|
594
|
|
595 =head2 add_Variant
|
|
596
|
|
597 Title : add_Variant
|
|
598 Usage : $obj->add_Variant($variant)
|
|
599 Function:
|
|
600
|
|
601 Pushes one Bio::Variation::Variant into the list of variants.
|
|
602 At the same time, creates a link from the Variant to SeqDiff
|
|
603 using its SeqDiff method.
|
|
604
|
|
605 Example :
|
|
606 Returns : 1 when succeeds, 0 for failure.
|
|
607 Args : Variant object
|
|
608
|
|
609 =cut
|
|
610
|
|
611 sub add_Variant {
|
|
612 my ($self,$value) = @_;
|
|
613 if (defined $value) {
|
|
614 if( ! $value->isa('Bio::Variation::VariantI') ) {
|
|
615 $self->throw("Is not a VariantI complying object but a [$self]");
|
|
616 return 0;
|
|
617 }
|
|
618 else {
|
|
619 push(@{$self->{'variants'}},$value);
|
|
620 $value->SeqDiff($self);
|
|
621 return 1;
|
|
622 }
|
|
623 }
|
|
624 else {
|
|
625 return 0;
|
|
626 }
|
|
627 }
|
|
628
|
|
629
|
|
630 =head2 each_Variant
|
|
631
|
|
632 Title : each_Variant
|
|
633 Usage : $obj->each_Variant();
|
|
634 Function:
|
|
635
|
|
636 Returns a list of Variants.
|
|
637
|
|
638 Example :
|
|
639 Returns : list of Variants
|
|
640 Args : none
|
|
641
|
|
642 =cut
|
|
643
|
|
644 sub each_Variant{
|
|
645 my ($self,@args) = @_;
|
|
646
|
|
647 return @{$self->{'variants'}};
|
|
648 }
|
|
649
|
|
650
|
|
651
|
|
652 =head2 add_Gene
|
|
653
|
|
654 Title : add_Gene
|
|
655 Usage : $obj->add_Gene($gene)
|
|
656 Function:
|
|
657
|
|
658 Pushes one L<Bio::LiveSeq::Gene> into the list of genes.
|
|
659
|
|
660 Example :
|
|
661 Returns : 1 when succeeds, 0 for failure.
|
|
662 Args : Bio::LiveSeq::Gene object
|
|
663
|
|
664 See L<Bio::LiveSeq::Gene> for more information.
|
|
665
|
|
666 =cut
|
|
667
|
|
668
|
|
669 sub add_Gene {
|
|
670 my ($self,$value) = @_;
|
|
671 if (defined $value) {
|
|
672 if( ! $value->isa('Bio::LiveSeq::Gene') ) {
|
|
673 $value->throw("Is not a Bio::LiveSeq::Gene object but a [$value]");
|
|
674 return 0;
|
|
675 }
|
|
676 else {
|
|
677 push(@{$self->{'genes'}},$value);
|
|
678 return 1;
|
|
679 }
|
|
680 }
|
|
681 else {
|
|
682 return 0;
|
|
683 }
|
|
684 }
|
|
685
|
|
686
|
|
687 =head2 each_Gene
|
|
688
|
|
689 Title : each_Gene
|
|
690 Usage : $obj->each_Gene();
|
|
691 Function:
|
|
692
|
|
693 Returns a list of L<Bio::LiveSeq::Gene>s.
|
|
694
|
|
695 Example :
|
|
696 Returns : list of Genes
|
|
697 Args : none
|
|
698
|
|
699 =cut
|
|
700
|
|
701 sub each_Gene{
|
|
702 my ($self,@args) = @_;
|
|
703
|
|
704 return @{$self->{'genes'}};
|
|
705 }
|
|
706
|
|
707
|
|
708 =head2 dna_ori
|
|
709
|
|
710 Title : dna_ori
|
|
711 Usage : $obj->dna_ori('atgctgctgctgct'); $dna_ori = $obj->dna_ori();
|
|
712 Function:
|
|
713
|
|
714 Sets or returns the original DNA sequence string of the seqDiff.
|
|
715
|
|
716 Example :
|
|
717 Returns : value of dna_ori, a scalar
|
|
718 Args : newvalue (optional)
|
|
719
|
|
720 =cut
|
|
721
|
|
722
|
|
723 sub dna_ori {
|
|
724 my ($self,$value) = @_;
|
|
725 if (defined $value) {
|
|
726 $self->{'dna_ori'} = $value;
|
|
727 }
|
|
728 else {
|
|
729 return $self->{'dna_ori'};
|
|
730 }
|
|
731 }
|
|
732
|
|
733
|
|
734 =head2 dna_mut
|
|
735
|
|
736 Title : dna_mut
|
|
737 Usage : $obj->dna_mut('atgctggtgctgct'); $dna_mut = $obj->dna_mut();
|
|
738 Function:
|
|
739
|
|
740 Sets or returns the mutated DNA sequence of the seqDiff.
|
|
741 If sequence has not been set generates it from the
|
|
742 original sequence and DNA mutations.
|
|
743
|
|
744 Example :
|
|
745 Returns : value of dna_mut, a scalar
|
|
746 Args : newvalue (optional)
|
|
747
|
|
748 =cut
|
|
749
|
|
750
|
|
751 sub dna_mut {
|
|
752 my ($self,$value) = @_;
|
|
753 if (defined $value) {
|
|
754 $self->{'dna_mut'} = $value;
|
|
755 }
|
|
756 else {
|
|
757 $self->_set_dnamut() unless $self->{'dna_mut'};
|
|
758 return $self->{'dna_mut'};
|
|
759 }
|
|
760 }
|
|
761
|
|
762 sub _set_dnamut {
|
|
763 my $self = shift;
|
|
764
|
|
765 return undef unless $self->{'dna_ori'} && $self->each_Variant;
|
|
766
|
|
767 $self->{'dna_mut'} = $self->{'dna_ori'};
|
|
768 foreach ($self->each_Variant) {
|
|
769 next unless $_->isa('Bio::Variation::DNAMutation');
|
|
770 next unless $_->isMutation;
|
|
771
|
|
772 my ($s, $la, $le);
|
|
773 #lies the mutation less than 25 bases after the start of sequence?
|
|
774 if ($_->start < 25) {
|
|
775 $s = 0; $la = $_->start - 1;
|
|
776 } else {
|
|
777 $s = $_->start - 25; $la = 25;
|
|
778 }
|
|
779
|
|
780 #is the mutation an insertion?
|
|
781 $_->end($_->start) unless $_->allele_ori->seq;
|
|
782
|
|
783 #does the mutation end greater than 25 bases before the end of
|
|
784 #sequence?
|
|
785 if (($_->end + 25) > length($self->{'dna_mut'})) {
|
|
786 $le = length($self->{'dna_mut'}) - $_->end;
|
|
787 } else {
|
|
788 $le = 25;
|
|
789 }
|
|
790
|
|
791 $_->dnStreamSeq(substr($self->{'dna_mut'}, $s, $la));
|
|
792 $_->upStreamSeq(substr($self->{'dna_mut'}, $_->end, $le));
|
|
793
|
|
794 my $s_ori = $_->dnStreamSeq . $_->allele_ori->seq . $_->upStreamSeq;
|
|
795 my $s_mut = $_->dnStreamSeq . $_->allele_mut->seq . $_->upStreamSeq;
|
|
796
|
|
797 (my $str = $self->{'dna_mut'}) =~ s/$s_ori/$s_mut/;
|
|
798 $self->{'dna_mut'} = $str;
|
|
799 }
|
|
800 }
|
|
801
|
|
802
|
|
803 =head2 rna_ori
|
|
804
|
|
805 Title : rna_ori
|
|
806 Usage : $obj->rna_ori('atgctgctgctgct'); $rna_ori = $obj->rna_ori();
|
|
807 Function:
|
|
808
|
|
809 Sets or returns the original RNA sequence of the seqDiff.
|
|
810
|
|
811 Example :
|
|
812 Returns : value of rna_ori, a scalar
|
|
813 Args : newvalue (optional)
|
|
814
|
|
815 =cut
|
|
816
|
|
817
|
|
818 sub rna_ori {
|
|
819 my ($self,$value) = @_;
|
|
820 if (defined $value) {
|
|
821 $self->{'rna_ori'} = $value;
|
|
822 }
|
|
823 else {
|
|
824 return $self->{'rna_ori'};
|
|
825 }
|
|
826 }
|
|
827
|
|
828
|
|
829 =head2 rna_mut
|
|
830
|
|
831 Title : rna_mut
|
|
832 Usage : $obj->rna_mut('atgctggtgctgct'); $rna_mut = $obj->rna_mut();
|
|
833 Function:
|
|
834
|
|
835 Sets or returns the mutated RNA sequence of the seqDiff.
|
|
836
|
|
837 Example :
|
|
838 Returns : value of rna_mut, a scalar
|
|
839 Args : newvalue (optional)
|
|
840
|
|
841 =cut
|
|
842
|
|
843
|
|
844 sub rna_mut {
|
|
845 my ($self,$value) = @_;
|
|
846 if (defined $value) {
|
|
847 $self->{'rna_mut'} = $value;
|
|
848 }
|
|
849 else {
|
|
850 return $self->{'rna_mut'};
|
|
851 }
|
|
852 }
|
|
853
|
|
854
|
|
855 =head2 aa_ori
|
|
856
|
|
857 Title : aa_ori
|
|
858 Usage : $obj->aa_ori('MAGVLL*'); $aa_ori = $obj->aa_ori();
|
|
859 Function:
|
|
860
|
|
861 Sets or returns the original protein sequence of the seqDiff.
|
|
862
|
|
863 Example :
|
|
864 Returns : value of aa_ori, a scalar
|
|
865 Args : newvalue (optional)
|
|
866
|
|
867 =cut
|
|
868
|
|
869
|
|
870 sub aa_ori {
|
|
871 my ($self,$value) = @_;
|
|
872 if (defined $value) {
|
|
873 $self->{'aa_ori'} = $value;
|
|
874 }
|
|
875 else {
|
|
876 return $self->{'aa_ori'};
|
|
877 }
|
|
878 }
|
|
879
|
|
880
|
|
881 =head2 aa_mut
|
|
882
|
|
883 Title : aa_mut
|
|
884 Usage : $obj->aa_mut('MA*'); $aa_mut = $obj->aa_mut();
|
|
885 Function:
|
|
886
|
|
887 Sets or returns the mutated protein sequence of the seqDiff.
|
|
888
|
|
889 Example :
|
|
890 Returns : value of aa_mut, a scalar
|
|
891 Args : newvalue (optional)
|
|
892
|
|
893 =cut
|
|
894
|
|
895
|
|
896 sub aa_mut {
|
|
897 my ($self,$value) = @_;
|
|
898 if (defined $value) {
|
|
899 $self->{'aa_mut'} = $value;
|
|
900 }
|
|
901 else {
|
|
902 return $self->{'aa_mut'};
|
|
903 }
|
|
904 }
|
|
905
|
|
906
|
|
907 =head2 seqobj
|
|
908
|
|
909 Title : seqobj
|
|
910 Usage : $dnaobj = $obj->seqobj('dna_mut');
|
|
911 Function:
|
|
912
|
|
913 Returns the any original or mutated sequences as a
|
|
914 Bio::PrimarySeq object.
|
|
915
|
|
916 Example :
|
|
917 Returns : Bio::PrimarySeq object for the requested sequence
|
|
918 Args : string, method name for the sequence requested
|
|
919
|
|
920 See L<Bio::PrimarySeq> for more information.
|
|
921
|
|
922 =cut
|
|
923
|
|
924 sub seqobj {
|
|
925 my ($self,$value) = @_;
|
|
926 my $out;
|
|
927 my %valid_obj =
|
|
928 map {$_, 1} qw(dna_ori rna_ori aa_ori dna_mut rna_mut aa_mut);
|
|
929 $valid_obj{$value} ||
|
|
930 $self->throw("Sequence type '$value' is not a valid type (".
|
|
931 join(',', map "'$_'", sort keys %valid_obj) .") lowercase");
|
|
932 my ($alphabet) = $value =~ /([^_]+)/;
|
|
933 my $id = $self->id;
|
|
934 $id = $self->rna_id if $self->rna_id;
|
|
935 $alphabet = 'protein' if $alphabet eq 'aa';
|
|
936 $out = Bio::PrimarySeq->new
|
|
937 ( '-seq' => $self->{$value},
|
|
938 '-display_id' => $id,
|
|
939 '-accession_number' => $self->id,
|
|
940 '-alphabet' => $alphabet
|
|
941 ) if $self->{$value} ;
|
|
942 return $out;
|
|
943 }
|
|
944
|
|
945 =head2 alignment
|
|
946
|
|
947 Title : alignment
|
|
948 Usage : $obj->alignment
|
|
949 Function:
|
|
950
|
|
951 Returns a pretty RNA/AA sequence alignment from linked
|
|
952 objects. Under construction: Only simple coding region
|
|
953 point mutations work.
|
|
954
|
|
955 Example :
|
|
956 Returns :
|
|
957 Args : none
|
|
958
|
|
959 =cut
|
|
960
|
|
961
|
|
962 sub alignment {
|
|
963 my $self = shift;
|
|
964 my (@entry, $text);
|
|
965
|
|
966 my $maxflanklen = 12;
|
|
967
|
|
968 foreach my $mut ($self->each_Variant) {
|
|
969 if( $mut->isa('Bio::Variation::RNAChange') ) {
|
|
970
|
|
971 my $upflank = $mut->upStreamSeq;
|
|
972 my $dnflank = $mut->dnStreamSeq;
|
|
973 my $cposd = $mut->codon_pos;
|
|
974 my $rori = $mut->allele_ori->seq;
|
|
975 my $rmut = $mut->allele_mut->seq;
|
|
976 my $rseqoriu = '';
|
|
977 my $rseqmutu = '';
|
|
978 my $rseqorid = '';
|
|
979 my $rseqmutd = '';
|
|
980 my $aaseqmutu = '';
|
|
981 my (@rseqori, @rseqmut );
|
|
982
|
|
983 # point
|
|
984 if ($mut->DNAMutation->label =~ /point/) {
|
|
985 if ($cposd == 1 ) {
|
|
986 my $nt2d = substr($dnflank, 0, 2);
|
|
987 push @rseqori, $rori. $nt2d;
|
|
988 push @rseqmut, uc ($rmut). $nt2d;
|
|
989 $dnflank = substr($dnflank, 2);
|
|
990 }
|
|
991 elsif ($cposd == 2) {
|
|
992 my $ntu = chop $upflank;
|
|
993 my $ntd = substr($dnflank, 0, 1);
|
|
994 push @rseqori, $ntu. $rori. $ntd;
|
|
995 push @rseqmut, $ntu. uc ($rmut). $ntd;
|
|
996 $dnflank = substr($dnflank, 1);
|
|
997 }
|
|
998 elsif ($cposd == 3) {
|
|
999 my $ntu1 = chop $upflank;
|
|
1000 my $ntu2 = chop $upflank;
|
|
1001 push (@rseqori, $ntu2. $ntu1. $rori);
|
|
1002 push (@rseqmut, $ntu2. $ntu1. uc $rmut);
|
|
1003 }
|
|
1004 }
|
|
1005 #deletion
|
|
1006 elsif ($mut->DNAMutation->label =~ /deletion/) {
|
|
1007 if ($cposd == 2 ) {
|
|
1008 $rseqorid = chop $upflank;
|
|
1009 $rseqmutd = $rseqorid;
|
|
1010 }
|
|
1011 for (my $i=1; $i<=$mut->length; $i++) {
|
|
1012 my $ntd .= substr($mut->allele_ori, $i-1, 1);
|
|
1013 $rseqorid .= $ntd;
|
|
1014 if (length($rseqorid) == 3 ) {
|
|
1015 push (@rseqori, $rseqorid);
|
|
1016 push (@rseqmut, " ");
|
|
1017 $rseqorid = '';
|
|
1018 }
|
|
1019 }
|
|
1020
|
|
1021 if ($rseqorid) {
|
|
1022 $rseqorid .= substr($dnflank, 0, 3-$rseqorid);
|
|
1023 push (@rseqori, $rseqorid);
|
|
1024 push (@rseqmut, " ");
|
|
1025 $dnflank = substr($dnflank,3-$rseqorid);
|
|
1026 }
|
|
1027 }
|
|
1028 $upflank = reverse $upflank;
|
|
1029 # loop throught the flanks
|
|
1030 for (my $i=1; $i<=length($dnflank); $i++) {
|
|
1031
|
|
1032 last if $i > $maxflanklen;
|
|
1033
|
|
1034 my $ntd .= substr($dnflank, $i-1, 1);
|
|
1035 my $ntu .= substr($upflank, $i-1, 1);
|
|
1036
|
|
1037 $rseqmutd .= $ntd;
|
|
1038 $rseqorid .= $ntd;
|
|
1039 $rseqmutu = $ntu. $rseqmutu;
|
|
1040 $rseqoriu = $ntu. $rseqoriu;
|
|
1041
|
|
1042 if (length($rseqorid) == 3 and length($rseqorid) == 3) {
|
|
1043 push (@rseqori, $rseqorid);
|
|
1044 push (@rseqmut, $rseqmutd);
|
|
1045 $rseqorid = $rseqmutd ='';
|
|
1046 }
|
|
1047 if (length($rseqoriu) == 3 and length($rseqoriu) == 3) {
|
|
1048 unshift (@rseqori, $rseqoriu);
|
|
1049 unshift (@rseqmut, $rseqmutu);
|
|
1050 $rseqoriu = $rseqmutu ='';
|
|
1051 }
|
|
1052
|
|
1053 #print "|i=$i, $cposd, $rseqmutd, $rseqorid\n";
|
|
1054 #print "|i=$i, $cposu, $rseqmutu, $rseqoriu\n\n";
|
|
1055
|
|
1056 }
|
|
1057
|
|
1058 push (@rseqori, $rseqorid);
|
|
1059 unshift (@rseqori, $rseqoriu);
|
|
1060 push (@rseqmut, $rseqmutd);
|
|
1061 unshift (@rseqmut, $rseqmutu);
|
|
1062
|
|
1063 return unless $mut->AAChange;
|
|
1064 #translate
|
|
1065 my $tr = new Bio::Tools::CodonTable ('-id' => $mut->codon_table);
|
|
1066 my $apos = $mut->AAChange->start;
|
|
1067 my $aposmax = CORE::length($self->aa_ori); #terminator codon no
|
|
1068 my $rseqori;
|
|
1069 my $rseqmut;
|
|
1070 my $aaseqori;
|
|
1071 my $aaseqmut = "";
|
|
1072 for (my $i = 0; $i <= $#rseqori; $i++) {
|
|
1073 my $a = '';
|
|
1074
|
|
1075 $a = $tr->translate($rseqori[$i]) if length($rseqori[$i]) == 3;
|
|
1076
|
|
1077 if (length($a) != 1 or
|
|
1078 $apos - ( $maxflanklen/2 -1) + $i < 1 or
|
|
1079 $apos - ( $maxflanklen/2 -1) + $i > $aposmax ) {
|
|
1080 $aaseqori .= " ";
|
|
1081 } else {
|
|
1082 $aaseqori .= " ". $a. " ";
|
|
1083 }
|
|
1084 my $b = '';
|
|
1085 if (length($rseqmut[$i]) == 3) {
|
|
1086 if ($rseqmut[$i] eq ' ') {
|
|
1087 $b = "_";
|
|
1088 } else {
|
|
1089 $b = $tr->translate($rseqmut[$i]);
|
|
1090 }
|
|
1091 }
|
|
1092 if (( $b ne $a and
|
|
1093 length($b) == 1 and
|
|
1094 $apos - ( $maxflanklen/2 -1) + $i >= 1 ) or
|
|
1095 ( $apos - ( $maxflanklen/2 -1) + $i >= $aposmax and
|
|
1096 $mut->label =~ 'termination')
|
|
1097 ) {
|
|
1098 $aaseqmut .= " ". $b. " ";
|
|
1099 } else {
|
|
1100 $aaseqmut .= " ";
|
|
1101 }
|
|
1102
|
|
1103 if ($i == 0 and length($rseqori[$i]) != 3) {
|
|
1104 my $l = 3 - length($rseqori[$i]);
|
|
1105 $rseqori[$i] = (" " x $l). $rseqori[$i];
|
|
1106 $rseqmut[$i] = (" " x $l). $rseqmut[$i];
|
|
1107 }
|
|
1108 $rseqori .= $rseqori[$i]. " " if $rseqori[$i] ne '';
|
|
1109 $rseqmut .= $rseqmut[$i]. " " if $rseqmut[$i] ne '';
|
|
1110 }
|
|
1111
|
|
1112 # collect the results
|
|
1113 push (@entry,
|
|
1114 "\n"
|
|
1115 );
|
|
1116 $text = " ". $aaseqmut;
|
|
1117 push (@entry,
|
|
1118 $text
|
|
1119 );
|
|
1120 $text = "Variant : ". $rseqmut;
|
|
1121 push (@entry,
|
|
1122 $text
|
|
1123 );
|
|
1124 $text = "Reference: ". $rseqori;
|
|
1125 push (@entry,
|
|
1126 $text
|
|
1127 );
|
|
1128 $text = " ". $aaseqori;
|
|
1129 push (@entry,
|
|
1130 $text
|
|
1131 );
|
|
1132 push (@entry,
|
|
1133 "\n"
|
|
1134 );
|
|
1135 }
|
|
1136
|
|
1137 }
|
|
1138
|
|
1139 my $res;
|
|
1140 foreach my $line (@entry) {
|
|
1141 $res .= "$line\n";
|
|
1142 }
|
|
1143 return $res;
|
|
1144 }
|
|
1145
|
|
1146 1;
|