0
|
1 # $Id: Mutator.pm,v 1.26 2002/10/22 07:38:34 lapp Exp $
|
|
2 #
|
|
3 # bioperl module for Bio::LiveSeq::Mutator
|
|
4 #
|
|
5 # Cared for by Heikki Lehvaslaiho <heikki@ebi.ac.uk>
|
|
6 #
|
|
7 # Copyright Joseph Insana
|
|
8 #
|
|
9 # You may distribute this module under the same terms as perl itself
|
|
10 #
|
|
11 # POD documentation - main docs before the code
|
|
12
|
|
13 =head1 NAME
|
|
14
|
|
15 Bio::LiveSeq::Mutator - Package mutating LiveSequences
|
|
16
|
|
17 =head1 SYNOPSIS
|
|
18
|
|
19 # $gene is a Bio::LiveSeq::Gene object
|
|
20 my $mutate = Bio::LiveSeq::Mutator->new('-gene' => $gene,
|
|
21 '-numbering' => "coding"
|
|
22 );
|
|
23 # $mut is a Bio::LiveSeq::Mutation object
|
|
24 $mutate->add_Mutation($mut);
|
|
25 # $results is a Bio::Variation::SeqDiff object
|
|
26 my $results=$mutate->change_gene();
|
|
27 if ($results) {
|
|
28 my $out = Bio::Variation::IO->new( '-format' => 'flat');
|
|
29 $out->write($results);
|
|
30 }
|
|
31
|
|
32 =head1 DESCRIPTION
|
|
33
|
|
34 This class mutates Bio::LiveSeq::Gene objects and returns a
|
|
35 Bio::Variation::SeqDiff object. Mutations are described as
|
|
36 Bio::LiveSeq::Mutation objects. See L<Bio::LiveSeq::Gene>,
|
|
37 L<Bio::Variation::SeqDiff>, and L<Bio::LiveSeq::Mutation> for details.
|
|
38
|
|
39 =head1 FEEDBACK
|
|
40
|
|
41
|
|
42 User feedback is an integral part of the evolution of this and other
|
|
43 Bioperl modules. Send your comments and suggestions preferably to the
|
|
44 Bioperl mailing lists Your participation is much appreciated.
|
|
45
|
|
46 bioperl-l@bioperl.org - General discussion
|
|
47 http://bio.perl.org/MailList.html - About the mailing lists
|
|
48
|
|
49 =head2 Reporting Bugs
|
|
50
|
|
51 report bugs to the Bioperl bug tracking system to help us keep track
|
|
52 the bugs and their resolution. Bug reports can be submitted via
|
|
53 email or the web:
|
|
54
|
|
55 bioperl-bugs@bio.perl.org
|
|
56 http://bugzilla.bioperl.org/
|
|
57
|
|
58 =head1 AUTHOR - Heikki Lehvaslaiho & Joseph A.L. Insana
|
|
59
|
|
60 Email: heikki@ebi.ac.uk
|
|
61 insana@ebi.ac.uk, jinsana@gmx.net
|
|
62
|
|
63 Address:
|
|
64
|
|
65 EMBL Outstation, European Bioinformatics Institute
|
|
66 Wellcome Trust Genome Campus, Hinxton
|
|
67 Cambs. CB10 1SD, United Kingdom
|
|
68
|
|
69 =head1 APPENDIX
|
|
70
|
|
71 The rest of the documentation details each of the object
|
|
72 methods. Internal methods are usually preceded with a _
|
|
73
|
|
74 =cut
|
|
75
|
|
76 # Let the code begin...
|
|
77
|
|
78 package Bio::LiveSeq::Mutator;
|
|
79 use vars qw(@ISA);
|
|
80 use strict;
|
|
81
|
|
82 use vars qw($VERSION @ISA);
|
|
83 use Bio::Variation::SeqDiff;
|
|
84 use Bio::Variation::DNAMutation;
|
|
85 use Bio::Variation::RNAChange;
|
|
86 use Bio::Variation::AAChange;
|
|
87 use Bio::Variation::Allele;
|
|
88 use Bio::LiveSeq::Mutation;
|
|
89
|
|
90 #use integer;
|
|
91 # Object preamble - inheritance
|
|
92
|
|
93 use Bio::Root::Root;
|
|
94
|
|
95 @ISA = qw( Bio::Root::Root );
|
|
96
|
|
97 sub new {
|
|
98 my($class,@args) = @_;
|
|
99 my $self;
|
|
100 $self = {};
|
|
101 bless $self, $class;
|
|
102
|
|
103 my ($gene, $numbering) =
|
|
104 $self->_rearrange([qw(GENE
|
|
105 NUMBERING
|
|
106 )],
|
|
107 @args);
|
|
108
|
|
109 $self->{ 'mutations' } = [];
|
|
110
|
|
111 $gene && $self->gene($gene);
|
|
112 $numbering && $self->numbering($numbering);
|
|
113
|
|
114 #class constant;
|
|
115 $self->{'flanklen'} = 25;
|
|
116 return $self; # success - we hope!
|
|
117 }
|
|
118
|
|
119 =head2 gene
|
|
120
|
|
121 Title : gene
|
|
122 Usage : $mutobj = $obj->gene;
|
|
123 : $mutobj = $obj->gene($objref);
|
|
124 Function:
|
|
125
|
|
126 Returns or sets the link-reference to a
|
|
127 Bio::LiveSeq::Gene object. If no value has ben set, it
|
|
128 will return undef
|
|
129
|
|
130 Returns : an object reference or undef
|
|
131 Args : a Bio::LiveSeq::Gene
|
|
132
|
|
133 See L<Bio::LiveSeq::Gene> for more information.
|
|
134
|
|
135 =cut
|
|
136
|
|
137 sub gene {
|
|
138 my ($self,$value) = @_;
|
|
139 if (defined $value) {
|
|
140 if( ! $value->isa('Bio::LiveSeq::Gene') ) {
|
|
141 $self->throw("Is not a Bio::LiveSeq::Gene object but a [$value]");
|
|
142 return undef;
|
|
143 }
|
|
144 else {
|
|
145 $self->{'gene'} = $value;
|
|
146 }
|
|
147 }
|
|
148 unless (exists $self->{'gene'}) {
|
|
149 return (undef);
|
|
150 } else {
|
|
151 return $self->{'gene'};
|
|
152 }
|
|
153 }
|
|
154
|
|
155
|
|
156 =head2 numbering
|
|
157
|
|
158 Title : numbering
|
|
159 Usage : $obj->numbering();
|
|
160 Function:
|
|
161
|
|
162 Sets and returns coordinate system used in positioning the
|
|
163 mutations. See L<change_gene> for details.
|
|
164
|
|
165 Example :
|
|
166 Returns : string
|
|
167 Args : string (coding [transcript number] | gene | entry)
|
|
168
|
|
169 =cut
|
|
170
|
|
171
|
|
172 sub numbering {
|
|
173 my ($self,$value) = @_;
|
|
174 if( defined $value) {
|
|
175 if ($value =~ /(coding)( )?(\d+)?/ or $value eq 'entry' or $value eq 'gene') {
|
|
176 $self->{'numbering'} = $value;
|
|
177 } else { # defaulting to 'coding'
|
|
178 $self->{'numbering'} = 'coding';
|
|
179 }
|
|
180 }
|
|
181 unless (exists $self->{'numbering'}) {
|
|
182 return 'coding';
|
|
183 } else {
|
|
184 return $self->{'numbering'};
|
|
185 }
|
|
186 }
|
|
187
|
|
188 =head2 add_Mutation
|
|
189
|
|
190 Title : add_Mutation
|
|
191 Usage : $self->add_Mutation($ref)
|
|
192 Function: adds a Bio::LiveSeq::Mutation object
|
|
193 Example :
|
|
194 Returns :
|
|
195 Args : a Bio::LiveSeq::Mutation
|
|
196
|
|
197 See L<Bio::LiveSeq::Mutation> for more information.
|
|
198
|
|
199 =cut
|
|
200
|
|
201 sub add_Mutation{
|
|
202 my ($self,$value) = @_;
|
|
203 if( $value->isa('Bio::Liveseq::Mutation') ) {
|
|
204 my $com = ref $value;
|
|
205 $self->throw("Is not a Mutation object but a [$com]" );
|
|
206 return undef;
|
|
207 }
|
|
208 if (! $value->pos) {
|
|
209 $self->warn("No value for mutation position in the sequence!");
|
|
210 return undef;
|
|
211 }
|
|
212 if (! $value->seq && ! $value->len) {
|
|
213 $self->warn("Either mutated sequence or length of the deletion must be given!");
|
|
214 return undef;
|
|
215 }
|
|
216 push(@{$self->{'mutations'}},$value);
|
|
217 }
|
|
218
|
|
219 =head2 each_Mutation
|
|
220
|
|
221 Title : each_Mutation
|
|
222 Usage : foreach $ref ( $a->each_Mutation )
|
|
223 Function: gets an array of Bio::LiveSeq::Mutation objects
|
|
224 Example :
|
|
225 Returns : array of Mutations
|
|
226 Args :
|
|
227
|
|
228 See L<Bio::LiveSeq::Mutation> for more information.
|
|
229
|
|
230 =cut
|
|
231
|
|
232 sub each_Mutation{
|
|
233 my ($self) = @_;
|
|
234 return @{$self->{'mutations'}};
|
|
235 }
|
|
236
|
|
237
|
|
238 =head2 mutation
|
|
239
|
|
240 Title : mutation
|
|
241 Usage : $mutobj = $obj->mutation;
|
|
242 : $mutobj = $obj->mutation($objref);
|
|
243 Function:
|
|
244
|
|
245 Returns or sets the link-reference to the current mutation
|
|
246 object. If the value is not set, it will return undef.
|
|
247 Internal method.
|
|
248
|
|
249 Returns : an object reference or undef
|
|
250
|
|
251 =cut
|
|
252
|
|
253
|
|
254 sub mutation {
|
|
255 my ($self,$value) = @_;
|
|
256 if (defined $value) {
|
|
257 if( ! $value->isa('Bio::LiveSeq::Mutation') ) {
|
|
258 $self->throw("Is not a Bio::LiveSeq::Mutation object but a [$value]");
|
|
259 return undef;
|
|
260 }
|
|
261 else {
|
|
262 $self->{'mutation'} = $value;
|
|
263 }
|
|
264 }
|
|
265 unless (exists $self->{'mutation'}) {
|
|
266 return (undef);
|
|
267 } else {
|
|
268 return $self->{'mutation'};
|
|
269 }
|
|
270 }
|
|
271
|
|
272 =head2 DNA
|
|
273
|
|
274 Title : DNA
|
|
275 Usage : $mutobj = $obj->DNA;
|
|
276 : $mutobj = $obj->DNA($objref);
|
|
277 Function:
|
|
278
|
|
279 Returns or sets the reference to the LiveSeq object holding
|
|
280 the reference sequence. If there is no link, it will return
|
|
281 undef.
|
|
282 Internal method.
|
|
283
|
|
284 Returns : an object reference or undef
|
|
285
|
|
286 =cut
|
|
287
|
|
288 sub DNA {
|
|
289 my ($self,$value) = @_;
|
|
290 if (defined $value) {
|
|
291 if( ! $value->isa('Bio::LiveSeq::DNA') and ! $value->isa('Bio::LiveSeq::Transcript') ) {
|
|
292 $self->throw("Is not a Bio::LiveSeq::DNA/Transcript object but a [$value]");
|
|
293 return undef;
|
|
294 }
|
|
295 else {
|
|
296 $self->{'DNA'} = $value;
|
|
297 }
|
|
298 }
|
|
299 unless (exists $self->{'DNA'}) {
|
|
300 return (undef);
|
|
301 } else {
|
|
302 return $self->{'DNA'};
|
|
303 }
|
|
304 }
|
|
305
|
|
306
|
|
307 =head2 RNA
|
|
308
|
|
309 Title : RNA
|
|
310 Usage : $mutobj = $obj->RNA;
|
|
311 : $mutobj = $obj->RNA($objref);
|
|
312 Function:
|
|
313
|
|
314 Returns or sets the reference to the LiveSeq object holding
|
|
315 the reference sequence. If the value is not set, it will return
|
|
316 undef.
|
|
317 Internal method.
|
|
318
|
|
319 Returns : an object reference or undef
|
|
320
|
|
321 =cut
|
|
322
|
|
323
|
|
324 sub RNA {
|
|
325 my ($self,$value) = @_;
|
|
326 if (defined $value) {
|
|
327 if( ! $value->isa('Bio::LiveSeq::Transcript') ) {
|
|
328 $self->throw("Is not a Bio::LiveSeq::RNA/Transcript object but a [$value]");
|
|
329 return undef;
|
|
330 }
|
|
331 else {
|
|
332 $self->{'RNA'} = $value;
|
|
333 }
|
|
334 }
|
|
335 unless (exists $self->{'RNA'}) {
|
|
336 return (undef);
|
|
337 } else {
|
|
338 return $self->{'RNA'};
|
|
339 }
|
|
340 }
|
|
341
|
|
342
|
|
343 =head2 dnamut
|
|
344
|
|
345 Title : dnamut
|
|
346 Usage : $mutobj = $obj->dnamut;
|
|
347 : $mutobj = $obj->dnamut($objref);
|
|
348 Function:
|
|
349
|
|
350 Returns or sets the reference to the current DNAMutation object.
|
|
351 If the value is not set, it will return undef.
|
|
352 Internal method.
|
|
353
|
|
354 Returns : a Bio::Variation::DNAMutation object or undef
|
|
355
|
|
356 See L<Bio::Variation::DNAMutation> for more information.
|
|
357
|
|
358 =cut
|
|
359
|
|
360
|
|
361 sub dnamut {
|
|
362 my ($self,$value) = @_;
|
|
363 if (defined $value) {
|
|
364 if( ! $value->isa('Bio::Variation::DNAMutation') ) {
|
|
365 $self->throw("Is not a Bio::Variation::DNAMutation object but a [$value]");
|
|
366 return undef;
|
|
367 }
|
|
368 else {
|
|
369 $self->{'dnamut'} = $value;
|
|
370 }
|
|
371 }
|
|
372 unless (exists $self->{'dnamut'}) {
|
|
373 return (undef);
|
|
374 } else {
|
|
375 return $self->{'dnamut'};
|
|
376 }
|
|
377 }
|
|
378
|
|
379
|
|
380 =head2 rnachange
|
|
381
|
|
382 Title : rnachange
|
|
383 Usage : $mutobj = $obj->rnachange;
|
|
384 : $mutobj = $obj->rnachange($objref);
|
|
385 Function:
|
|
386
|
|
387 Returns or sets the reference to the current RNAChange object.
|
|
388 If the value is not set, it will return undef.
|
|
389 Internal method.
|
|
390
|
|
391 Returns : a Bio::Variation::RNAChange object or undef
|
|
392
|
|
393 See L<Bio::Variation::RNAChange> for more information.
|
|
394
|
|
395 =cut
|
|
396
|
|
397
|
|
398 sub rnachange {
|
|
399 my ($self,$value) = @_;
|
|
400 if (defined $value) {
|
|
401 if( ! $value->isa('Bio::Variation::RNAChange') ) {
|
|
402 $self->throw("Is not a Bio::Variation::RNAChange object but a [$value]");
|
|
403 return undef;
|
|
404 }
|
|
405 else {
|
|
406 $self->{'rnachange'} = $value;
|
|
407 }
|
|
408 }
|
|
409 unless (exists $self->{'rnachange'}) {
|
|
410 return (undef);
|
|
411 } else {
|
|
412 return $self->{'rnachange'};
|
|
413 }
|
|
414 }
|
|
415
|
|
416
|
|
417 =head2 aachange
|
|
418
|
|
419 Title : aachange
|
|
420 Usage : $mutobj = $obj->aachange;
|
|
421 : $mutobj = $obj->aachange($objref);
|
|
422 Function:
|
|
423
|
|
424 Returns or sets the reference to the current AAChange object.
|
|
425 If the value is not set, it will return undef.
|
|
426 Internal method.
|
|
427
|
|
428 Returns : a Bio::Variation::AAChange object or undef
|
|
429
|
|
430 See L<Bio::Variation::AAChange> for more information.
|
|
431
|
|
432 =cut
|
|
433
|
|
434
|
|
435 sub aachange {
|
|
436 my ($self,$value) = @_;
|
|
437 if (defined $value) {
|
|
438 if( ! $value->isa('Bio::Variation::AAChange') ) {
|
|
439 $self->throw("Is not a Bio::Variation::AAChange object but a [$value]");
|
|
440 return undef;
|
|
441 }
|
|
442 else {
|
|
443 $self->{'aachange'} = $value;
|
|
444 }
|
|
445 }
|
|
446 unless (exists $self->{'aachange'}) {
|
|
447 return (undef);
|
|
448 } else {
|
|
449 return $self->{'aachange'};
|
|
450 }
|
|
451 }
|
|
452
|
|
453
|
|
454 =head2 exons
|
|
455
|
|
456 Title : exons
|
|
457 Usage : $mutobj = $obj->exons;
|
|
458 : $mutobj = $obj->exons($objref);
|
|
459 Function:
|
|
460
|
|
461 Returns or sets the reference to a current array of Exons.
|
|
462 If the value is not set, it will return undef.
|
|
463 Internal method.
|
|
464
|
|
465 Returns : an array of Bio::LiveSeq::Exon objects or undef
|
|
466
|
|
467 See L<Bio::LiveSeq::Exon> for more information.
|
|
468
|
|
469 =cut
|
|
470
|
|
471
|
|
472 sub exons {
|
|
473 my ($self,$value) = @_;
|
|
474 if (defined $value) {
|
|
475 $self->{'exons'} = $value;
|
|
476 }
|
|
477 unless (exists $self->{'exons'}) {
|
|
478 return (undef);
|
|
479 } else {
|
|
480 return $self->{'exons'};
|
|
481 }
|
|
482 }
|
|
483
|
|
484 =head2 change_gene_with_alignment
|
|
485
|
|
486 Title : change_gene_with_alignment
|
|
487 Usage : $results=$mutate->change_gene_with_alignment($aln);
|
|
488
|
|
489 Function:
|
|
490
|
|
491 Returns a Bio::Variation::SeqDiff object containing the
|
|
492 results of the changes in the alignment. The alignment has
|
|
493 to be pairwise and have one sequence named 'QUERY', the
|
|
494 other one is assumed to be a part of the sequence from
|
|
495 $gene.
|
|
496
|
|
497 This method offers a shortcut to change_gene and
|
|
498 automates the creation of Bio::LiveSeq::Mutation objects.
|
|
499 Use it with almost identical sequnces, e.g. to locate a SNP.
|
|
500
|
|
501 Args : Bio::SimpleAlign object representing a short local alignment
|
|
502 Returns : Bio::Variation::SeqDiff object or 0 on error
|
|
503
|
|
504 See L<Bio::LiveSeq::Mutation>, L<Bio::SimpleAlign>, and
|
|
505 L<Bio::Variation::SeqDiff> for more information.
|
|
506
|
|
507 =cut
|
|
508
|
|
509 sub change_gene_with_alignment {
|
|
510 my ($self, $aln) = @_;
|
|
511
|
|
512 #
|
|
513 # Sanity checks
|
|
514 #
|
|
515
|
|
516 $self->throw("Argument is not a Bio::SimpleAlign object but a [$aln]")
|
|
517 unless $aln->isa('Bio::SimpleAlign');
|
|
518 $self->throw("'Pairwise alignments only, please")
|
|
519 if $aln->no_sequences != 2;
|
|
520
|
|
521 # find out the order the two sequences are given
|
|
522 my $queryseq_pos = 1; #default
|
|
523 my $refseq_pos = 2;
|
|
524 unless ($aln->get_seq_by_pos(1)->id eq 'QUERY') {
|
|
525 carp('Query sequence has to be named QUERY')
|
|
526 if $aln->get_seq_by_pos(2)->id ne 'QUERY';
|
|
527 $queryseq_pos = 2; # alternative
|
|
528 $refseq_pos = 1;
|
|
529 }
|
|
530
|
|
531 # trim the alignment
|
|
532 my $start = $aln->column_from_residue_number('QUERY', 1);
|
|
533 my $end = $aln->column_from_residue_number('QUERY',
|
|
534 $aln->get_seq_by_pos($queryseq_pos)->end );
|
|
535
|
|
536 my $aln2 = $aln->slice($start, $end);
|
|
537
|
|
538 #
|
|
539 # extracting mutations
|
|
540 #
|
|
541
|
|
542 my $cs = $aln2->consensus_string(51);
|
|
543 my $queryseq = $aln2->get_seq_by_pos($queryseq_pos);
|
|
544 my $refseq = $aln2->get_seq_by_pos($refseq_pos);
|
|
545
|
|
546 while ( $cs =~ /(\?+)/g) {
|
|
547 # pos in local coordinates
|
|
548 my $pos = pos($cs) - length($1) + 1;
|
|
549 my $mutation = create_mutation($self,
|
|
550 $refseq,
|
|
551 $queryseq,
|
|
552 $pos,
|
|
553 CORE::length($1)
|
|
554 );
|
|
555 # reset pos to refseq coordinates
|
|
556 $pos += $refseq->start - 1;
|
|
557 $mutation->pos($pos);
|
|
558
|
|
559 $self->add_Mutation($mutation);
|
|
560 }
|
|
561 return $self->change_gene();
|
|
562 }
|
|
563
|
|
564 =head2 create_mutation
|
|
565
|
|
566 Title : create_mutation
|
|
567 Usage :
|
|
568 Function:
|
|
569
|
|
570 Formats sequence differences from two sequences into
|
|
571 Bio::LiveSeq::Mutation objects which can be applied to a
|
|
572 gene.
|
|
573
|
|
574 To keep it generic, sequence arguments need not to be
|
|
575 Bio::LocatableSeq. Coordinate change to parent sequence
|
|
576 numbering needs to be done by the calling code.
|
|
577
|
|
578 Called from change_gene_with_alignment
|
|
579
|
|
580 Args : Bio::PrimarySeqI inheriting object for the reference sequence
|
|
581 Bio::PrimarySeqI inheriting object for the query sequence
|
|
582 integer for the start position of the local sequence difference
|
|
583 integer for the length of the sequence difference
|
|
584 Returns : Bio::LiveSeq::Mutation object
|
|
585
|
|
586 =cut
|
|
587
|
|
588 sub create_mutation {
|
|
589 my ($self, $refseq, $queryseq, $pos, $len) = @_;
|
|
590
|
|
591 $self->throw("Is not a Bio::PrimarySeqI object but a [$refseq]")
|
|
592 unless $refseq->isa('Bio::PrimarySeqI');
|
|
593 $self->throw("Is not a Bio::PrimarySeqI object but a [$queryseq]")
|
|
594 unless $queryseq->isa('Bio::PrimarySeqI');
|
|
595 $self->throw("Position is not a positive integer but [$pos]")
|
|
596 unless $pos =~ /^\+?\d+$/;
|
|
597 $self->throw("Length is not a positive integer but [$len]")
|
|
598 unless $len =~ /^\+?\d+$/;
|
|
599
|
|
600 my $mutation;
|
|
601 my $refstring = $refseq->subseq($pos, $pos + $len - 1);
|
|
602 my $varstring = $queryseq->subseq($pos, $pos + $len - 1);
|
|
603
|
|
604 if ($len == 1 and $refstring =~ /[^\.\-\*\?]/ and
|
|
605 $varstring =~ /[^\.\-\*\?]/ ) { #point
|
|
606 $mutation = new Bio::LiveSeq::Mutation (-seq => $varstring,
|
|
607 -pos => $pos,
|
|
608 );
|
|
609 }
|
|
610 elsif ( $refstring =~ /^[^\.\-\*\?]+$/ and
|
|
611 $varstring !~ /^[^\.\-\*\?]+$/ ) { # deletion
|
|
612 $mutation = new Bio::LiveSeq::Mutation (-pos => $pos,
|
|
613 -len => $len
|
|
614 );
|
|
615 }
|
|
616 elsif ( $refstring !~ /^[^\.\-\*\?]+$/ and
|
|
617 $varstring =~ /^[^\.\-\*\?]+$/ ) { # insertion
|
|
618 $mutation = new Bio::LiveSeq::Mutation (-seq => $varstring,
|
|
619 -pos => $pos,
|
|
620 -len => 0
|
|
621 );
|
|
622 } else { # complex
|
|
623 $mutation = new Bio::LiveSeq::Mutation (-seq => $varstring,
|
|
624 -pos => $pos,
|
|
625 -len => $len
|
|
626 );
|
|
627 }
|
|
628
|
|
629 return $mutation;
|
|
630 }
|
|
631
|
|
632 =head2 change_gene
|
|
633
|
|
634 Title : change_gene
|
|
635 Usage : my $mutate = Bio::LiveSeq::Mutator->new(-gene => $gene,
|
|
636 numbering => "coding"
|
|
637 );
|
|
638 # $mut is Bio::LiveSeq::Mutation object
|
|
639 $mutate->add_Mutation($mut);
|
|
640 my $results=$mutate->change_gene();
|
|
641
|
|
642 Function:
|
|
643
|
|
644 Returns a Bio::Variation::SeqDiff object containing the
|
|
645 results of the changes performed according to the
|
|
646 instructions present in Mutation(s). The -numbering
|
|
647 argument decides what molecule is being changed and what
|
|
648 numbering scheme being used:
|
|
649
|
|
650 -numbering => "entry"
|
|
651
|
|
652 determines the DNA level, using the numbering from the
|
|
653 beginning of the sequence
|
|
654
|
|
655 -numbering => "coding"
|
|
656
|
|
657 determines the RNA level, using the numbering from the
|
|
658 beginning of the 1st transcript
|
|
659
|
|
660 Alternative transcripts can be used by specifying
|
|
661 "coding 2" or "coding 3" ...
|
|
662
|
|
663 -numbering => "gene"
|
|
664
|
|
665 determines the DNA level, using the numbering from the
|
|
666 beginning of the 1st transcript and inluding introns.
|
|
667 The meaning equals 'coding' if the reference molecule
|
|
668 is cDNA.
|
|
669
|
|
670 Args : Bio::LiveSeq::Gene object
|
|
671 Bio::LiveSeq::Mutation object(s)
|
|
672 string specifying a numbering scheme (defaults to 'coding')
|
|
673 Returns : Bio::Variation::SeqDiff object or 0 on error
|
|
674
|
|
675 =cut
|
|
676
|
|
677 sub change_gene {
|
|
678 my ($self) = @_;
|
|
679
|
|
680 #
|
|
681 # Sanity check
|
|
682 #
|
|
683 unless ($self->gene) {
|
|
684 $self->warn("Input object Bio::LiveSeq::Gene is not given");
|
|
685 return 0;
|
|
686 }
|
|
687 #
|
|
688 # Setting the reference sequence based on -numbering
|
|
689 #
|
|
690 my @transcripts=@{$self->gene->get_Transcripts};
|
|
691 my $refseq; # will hold Bio::LiveSeq:Transcript object or Bio::LiveSeq::DNA
|
|
692
|
|
693 # 'gene' eq 'coding' if reference sequence is cDNA
|
|
694 $self->numbering ('coding') if $self->gene->get_DNA->alphabet eq 'rna' and $self->numbering eq 'gene';
|
|
695
|
|
696 if ($self->numbering =~ /(coding)( )?(\d+)?/ ) {
|
|
697 $self->numbering($1);
|
|
698 my $transnumber = $3;
|
|
699 $transnumber-- if $3; # 1 -> 0, 2 -> 1
|
|
700 if ($transnumber && $transnumber >= 0 && $transnumber <= $#transcripts) {
|
|
701 $refseq=$transcripts[$transnumber];
|
|
702 } else {
|
|
703 $transnumber && $self->warn("The alternative transcript number ". $transnumber+1 .
|
|
704 "- does not exist. Reverting to the 1st transcript\n");
|
|
705 $refseq=$transcripts[0];
|
|
706 }
|
|
707 } else {
|
|
708 $refseq=$transcripts[0]->{'seq'};
|
|
709 }
|
|
710 #
|
|
711 # Recording the state: SeqDiff object creation ?? transcript no.??
|
|
712 #
|
|
713 my $seqDiff = Bio::Variation::SeqDiff->new();
|
|
714 $seqDiff->alphabet($self->gene->get_DNA->alphabet);
|
|
715 $seqDiff->numbering($self->numbering);
|
|
716 my ($DNAobj, $RNAobj);
|
|
717 if ($refseq->isa("Bio::LiveSeq::Transcript")) {
|
|
718 $self->RNA($refseq);
|
|
719 $self->DNA($refseq->{'seq'});
|
|
720 $seqDiff->rna_ori($refseq->seq );
|
|
721 $seqDiff->aa_ori($refseq->get_Translation->seq);
|
|
722 } else {
|
|
723 $self->DNA($refseq);
|
|
724 $self->RNA($transcripts[0]);
|
|
725 $seqDiff->rna_ori($self->RNA->seq);
|
|
726 $seqDiff->aa_ori($self->RNA->get_Translation->seq);
|
|
727 }
|
|
728 $seqDiff->dna_ori($self->DNA->seq);
|
|
729 # put the accession number into the SeqDiff object ID
|
|
730 $seqDiff->id($self->DNA->accession_number);
|
|
731
|
|
732 # the atg_offset takes in account that DNA object could be a subset of the
|
|
733 # whole entry (via the light_weight loader)
|
|
734 my $atg_label=$self->RNA->start;
|
|
735 my $atg_offset=$self->DNA->position($atg_label)+($self->DNA->start)-1;
|
|
736 $seqDiff->offset($atg_offset - 1);
|
|
737 $self->DNA->coordinate_start($atg_label);
|
|
738
|
|
739 my @exons = $self->RNA->all_Exons;
|
|
740 $seqDiff->cds_end($exons[$#exons]->end);
|
|
741
|
|
742 #
|
|
743 # Converting mutation positions to labels
|
|
744 #
|
|
745 $self->warn("no mutations"), return 0
|
|
746 unless $self->_mutationpos2label($refseq, $seqDiff);
|
|
747
|
|
748 # need to add more than one rna & aa
|
|
749 #foreach $transcript (@transcripts) {
|
|
750 # $seqDiff{"ori_transcript_${i}_seq"}=$transcript->seq;
|
|
751 # $seqDiff{"ori_translation_${i}_seq"}=$transcript->get_Translation->seq;
|
|
752 #}
|
|
753
|
|
754 # do changes
|
|
755 my $k;
|
|
756 foreach my $mutation ($self->each_Mutation) {
|
|
757 next unless $mutation->label > 0;
|
|
758 $self->mutation($mutation);
|
|
759
|
|
760 $mutation->issue(++$k);
|
|
761 #
|
|
762 # current position on the transcript
|
|
763 #
|
|
764 if ($self->numbering =~ /coding/) {
|
|
765 $mutation->transpos($mutation->pos); # transpos given by user
|
|
766 } else {
|
|
767 #transpos of label / It will be 0 if mutating an intron, negative if upstream of ATG
|
|
768 $mutation->transpos($self->RNA->position($mutation->label,$atg_label));
|
|
769 }
|
|
770 #
|
|
771 # Calculate adjacent labels based on the position on the current sequence
|
|
772 #
|
|
773 $mutation->prelabel($self->DNA->label(-1, $mutation->label)); # 1 before label
|
|
774 if ($mutation->len == 0) {
|
|
775 $mutation->postlabel($mutation->label);
|
|
776 $mutation->lastlabel($mutation->label);
|
|
777 } elsif ($mutation->len == 1) {
|
|
778 $mutation->lastlabel($mutation->label); # last nucleotide affected
|
|
779 $mutation->postlabel($self->DNA->label(2,$mutation->lastlabel)); # $len after label
|
|
780 } else {
|
|
781 $mutation->lastlabel($self->DNA->label($mutation->len,$mutation->label));
|
|
782 $mutation->postlabel($self->DNA->label(2,$mutation->lastlabel));
|
|
783 }
|
|
784 my $dnamut = $self->_set_DNAMutation($seqDiff);
|
|
785 #
|
|
786 #
|
|
787 #
|
|
788 if ($self->_rnaAffected) {
|
|
789 $self->_set_effects($seqDiff, $dnamut);
|
|
790 }
|
|
791 elsif ($seqDiff->offset != 0 and $dnamut->region ne 'intron') {
|
|
792 $self->_untranslated ($seqDiff, $dnamut);
|
|
793 } else {
|
|
794 #$self->warn("Mutation starts outside coding region, RNAChange object not created");
|
|
795 }
|
|
796
|
|
797 #########################################################################
|
|
798 # Mutations are done here! #
|
|
799 $refseq->labelchange($mutation->seq, $mutation->label, $mutation->len); #
|
|
800 #########################################################################
|
|
801
|
|
802 $self->_post_mutation ($seqDiff);
|
|
803
|
|
804 $self->dnamut(undef);
|
|
805 $self->rnachange(undef);
|
|
806 $self->aachange(undef);
|
|
807 $self->exons(undef);
|
|
808 }
|
|
809 # record the final state of all three sequences
|
|
810 $seqDiff->dna_mut($self->DNA->seq);
|
|
811 $seqDiff->rna_mut($self->RNA->seq);
|
|
812 if ($refseq->isa("Bio::LiveSeq::Transcript")) {
|
|
813 $seqDiff->aa_mut($refseq->get_Translation->seq);
|
|
814 } else {
|
|
815 $seqDiff->aa_mut($self->RNA->get_Translation->seq);
|
|
816 }
|
|
817
|
|
818 #$seqDiff{mut_dna_seq}=$gene->get_DNA->seq;
|
|
819 #my $i=1;
|
|
820 #foreach $transcript (@transcripts) {
|
|
821 # $seqDiff{"mut_transcript_${i}_seq"}=$transcript->seq;
|
|
822 # $seqDiff{"mut_translation_${i}_seq"}=$transcript->get_Translation->seq;
|
|
823 #}
|
|
824 return $seqDiff;
|
|
825 }
|
|
826
|
|
827 =head2 _mutationpos2label
|
|
828
|
|
829 Title : _mutationpos2label
|
|
830 Usage :
|
|
831 Function: converts mutation positions into labels
|
|
832 Example :
|
|
833 Returns : number of valid mutations
|
|
834 Args : LiveSeq sequence object
|
|
835
|
|
836 =cut
|
|
837
|
|
838 sub _mutationpos2label {
|
|
839 my ($self, $refseq, $SeqDiff) = @_;
|
|
840 my $count;
|
|
841 my @bb = @{$self->{'mutations'}};
|
|
842 my $cc = scalar @bb;
|
|
843 foreach my $mut (@{$self->{'mutations'}}) {
|
|
844 # if ($self->numbering eq 'gene' and $mut->pos < 1) {
|
|
845 # my $tmp = $mut->pos;
|
|
846 # print STDERR "pos: ", "$tmp\n";
|
|
847 # $tmp++ if $tmp < 1;
|
|
848 # $tmp += $SeqDiff->offset;
|
|
849 # print STDERR "pos2: ", "$tmp\n";
|
|
850 # $mut->pos($tmp);
|
|
851 # }
|
|
852 # elsif ($self->numbering eq 'entry') {
|
|
853 if ($self->numbering eq 'entry') {
|
|
854 my $tmp = $mut->pos;
|
|
855 $tmp -= $SeqDiff->offset;
|
|
856 $tmp-- if $tmp < 1;
|
|
857 $mut->pos($tmp);
|
|
858 }
|
|
859
|
|
860 my $label = $refseq->label($mut->pos); # get the label for the position
|
|
861 $mut->label($label), $count++ if $label > 0 ;
|
|
862 #print STDERR "x", $mut->pos,'|' ,$mut->label, "\n";
|
|
863 }
|
|
864 return $count;
|
|
865 }
|
|
866
|
|
867 #
|
|
868 # Calculate labels around mutated nucleotide
|
|
869 #
|
|
870
|
|
871 =head2 _set_DNAMutation
|
|
872
|
|
873 Title : _set_DNAMutation
|
|
874 Usage :
|
|
875 Function:
|
|
876
|
|
877 Stores DNA level mutation attributes before mutation into
|
|
878 Bio::Variation::DNAMutation object. Links it to SeqDiff
|
|
879 object.
|
|
880
|
|
881 Example :
|
|
882 Returns : Bio::Variation::DNAMutation object
|
|
883 Args : Bio::Variation::SeqDiff object
|
|
884
|
|
885 See L<Bio::Variation::DNAMutation> and L<Bio::Variation::SeqDiff>.
|
|
886
|
|
887 =cut
|
|
888
|
|
889 sub _set_DNAMutation {
|
|
890 my ($self, $seqDiff) = @_;
|
|
891
|
|
892 my $dnamut_start = $self->mutation->label - $seqDiff->offset;
|
|
893 # if negative DNA positions (before ATG)
|
|
894 $dnamut_start-- if $dnamut_start <= 0;
|
|
895 my $dnamut_end;
|
|
896 ($self->mutation->len == 0 or $self->mutation->len == 1) ?
|
|
897 ($dnamut_end = $dnamut_start) :
|
|
898 ($dnamut_end = $dnamut_start+$self->mutation->len);
|
|
899 #print "start:$dnamut_start, end:$dnamut_end\n";
|
|
900 my $dnamut = Bio::Variation::DNAMutation->new(-start => $dnamut_start,
|
|
901 -end => $dnamut_end,
|
|
902 );
|
|
903 $dnamut->mut_number($self->mutation->issue);
|
|
904 $dnamut->isMutation(1);
|
|
905 my $da_m = Bio::Variation::Allele->new;
|
|
906 $da_m->seq($self->mutation->seq) if $self->mutation->seq;
|
|
907 $dnamut->allele_mut($da_m);
|
|
908 $dnamut->add_Allele($da_m);
|
|
909 # allele_ori
|
|
910 my $allele_ori = $self->DNA->labelsubseq($self->mutation->prelabel,
|
|
911 undef,
|
|
912 $self->mutation->postlabel); # get seq
|
|
913 chop $allele_ori; # chop the postlabel nucleotide
|
|
914 $allele_ori=substr($allele_ori,1); # away the prelabel nucleotide
|
|
915 my $da_o = Bio::Variation::Allele->new;
|
|
916 $da_o->seq($allele_ori) if $allele_ori;
|
|
917 $dnamut->allele_ori($da_o);
|
|
918 ($self->mutation->len == 0) ?
|
|
919 ($dnamut->length($self->mutation->len)) : ($dnamut->length(CORE::length $allele_ori));
|
|
920 #print " --------------- $dnamut_start -$len- $dnamut_end -\n";
|
|
921 $seqDiff->add_Variant($dnamut);
|
|
922 $self->dnamut($dnamut);
|
|
923 $dnamut->mut_number($self->mutation->issue);
|
|
924 # setting proof
|
|
925 if ($seqDiff->numbering eq "entry" or $seqDiff->numbering eq "gene") {
|
|
926 $dnamut->proof('experimental');
|
|
927 } else {
|
|
928 $dnamut->proof('computed');
|
|
929 }
|
|
930 # how many nucleotides to store upstream and downstream of the change
|
|
931 my $flanklen = $self->{'flanklen'};
|
|
932 #print `date`, " flanking sequences start\n";
|
|
933 my $uplabel = $self->DNA->label(1-$flanklen,$self->mutation->prelabel); # this could be unavailable!
|
|
934
|
|
935 my $upstreamseq;
|
|
936 if ($uplabel > 0) {
|
|
937 $upstreamseq =
|
|
938 $self->DNA->labelsubseq($uplabel, undef, $self->mutation->prelabel);
|
|
939 } else { # from start (less than $flanklen nucleotides)
|
|
940 $upstreamseq =
|
|
941 $self->DNA->labelsubseq($self->DNA->start, undef, $self->mutation->prelabel);
|
|
942 }
|
|
943 $dnamut->upStreamSeq($upstreamseq);
|
|
944 my $dnstreamseq = $self->DNA->labelsubseq($self->mutation->postlabel, $flanklen);
|
|
945 $dnamut->dnStreamSeq($dnstreamseq); # $flanklen or less nucleotides
|
|
946 return $dnamut;
|
|
947 }
|
|
948
|
|
949
|
|
950 #
|
|
951 ### Check if mutation propagates to RNA (and AA) level
|
|
952 #
|
|
953 # side effect: sets intron/exon information
|
|
954 # returns a boolean value
|
|
955 #
|
|
956
|
|
957 sub _rnaAffected {
|
|
958 my ($self) = @_;
|
|
959 my @exons=$self->RNA->all_Exons;
|
|
960 my $RNAstart=$self->RNA->start;
|
|
961 my $RNAend=$self->RNA->end;
|
|
962 my ($firstexon,$before,$after,$i);
|
|
963 my ($rnaAffected) = 0;
|
|
964
|
|
965 # check for inserted labels (that require follows instead of <,>)
|
|
966 my $DNAend=$self->RNA->{'seq'}->end;
|
|
967 if ($self->mutation->prelabel > $DNAend or $self->mutation->postlabel > $DNAend) {
|
|
968 #this means one of the two labels is an inserted one
|
|
969 #(coming from a previous mutation. This would falsify all <,>
|
|
970 #checks, so the follow() has to be used
|
|
971 $self->warn("Attention, workaround not fully tested yet! Expect unpredictable results.\n");
|
|
972 if (($self->mutation->postlabel==$RNAstart) or (follows($self->mutation->postlabel,$RNAstart))) {
|
|
973 $self->warn("RNA not affected because change occurs before RNAstart");
|
|
974 }
|
|
975 elsif (($RNAend==$self->mutation->prelabel) or (follows($RNAend,$self->mutation->prelabel))) {
|
|
976 $self->warn("RNA not affected because change occurs after RNAend");
|
|
977 }
|
|
978 elsif (scalar @exons == 1) {
|
|
979 #no introns, just one exon
|
|
980 $rnaAffected = 1; # then RNA is affected!
|
|
981 } else {
|
|
982 # otherwise check for change occurring inside an intron
|
|
983 $firstexon=shift(@exons);
|
|
984 $before=$firstexon->end;
|
|
985
|
|
986 foreach $i (0..$#exons) {
|
|
987 $after=$exons[$i]->start;
|
|
988 if (follows($self->mutation->prelabel,$before) or
|
|
989 ($after==$self->mutation->prelabel) or
|
|
990 follows($after,$self->mutation->prelabel) or
|
|
991 follows($after,$self->mutation->postlabel)) {
|
|
992
|
|
993 $rnaAffected = 1;
|
|
994 # $i is number of exon and can be used for proximity check
|
|
995 }
|
|
996 $before=$exons[$i]->end;
|
|
997 }
|
|
998
|
|
999 }
|
|
1000 } else {
|
|
1001 my $strand = $exons[0]->strand;
|
|
1002 if (($strand == 1 and $self->mutation->postlabel <= $RNAstart) or
|
|
1003 ($strand != 1 and $self->mutation->postlabel >= $RNAstart)) {
|
|
1004 #$self->warn("RNA not affected because change occurs before RNAstart");
|
|
1005 $rnaAffected = 0;
|
|
1006 }
|
|
1007 elsif (($strand == 1 and $self->mutation->prelabel >= $RNAend) or
|
|
1008 ($strand != 1 and $self->mutation->prelabel <= $RNAend)) {
|
|
1009 #$self->warn("RNA not affected because change occurs after RNAend");
|
|
1010 $rnaAffected = 0;
|
|
1011 my $dist;
|
|
1012 if ($strand == 1){
|
|
1013 $dist = $self->mutation->prelabel - $RNAend;
|
|
1014 } else {
|
|
1015 $dist = $RNAend - $self->mutation->prelabel;
|
|
1016 }
|
|
1017 $self->dnamut->region_dist($dist);
|
|
1018 }
|
|
1019 elsif (scalar @exons == 1) {
|
|
1020 #if just one exon -> no introns,
|
|
1021 $rnaAffected = 1; # then RNA is affected!
|
|
1022 } else {
|
|
1023 # otherwise check for mutation occurring inside an intron
|
|
1024 $firstexon=shift(@exons);
|
|
1025 $before=$firstexon->end;
|
|
1026 if ( ($strand == 1 and $self->mutation->prelabel < $before) or
|
|
1027 ($strand == -1 and $self->mutation->prelabel > $before)
|
|
1028 ) {
|
|
1029 $rnaAffected = 1 ;
|
|
1030
|
|
1031 #print "Exon 1 : ", $firstexon->start, " - ", $firstexon->end, "<br>\n";
|
|
1032 my $afterdist = $self->mutation->prelabel - $firstexon->start;
|
|
1033 my $beforedist = $firstexon->end - $self->mutation->postlabel;
|
|
1034 my $exonvalue = $i + 1;
|
|
1035 $self->dnamut->region('exon');
|
|
1036 $self->dnamut->region_value($exonvalue);
|
|
1037 if ($afterdist < $beforedist) {
|
|
1038 $afterdist++;
|
|
1039 $afterdist++;
|
|
1040 $self->dnamut->region_dist($afterdist);
|
|
1041 #print "splice site $afterdist nt upstream!<br>";
|
|
1042 } else {
|
|
1043 $self->dnamut->region_dist($beforedist);
|
|
1044 #print "splice site $beforedist nt downstream!<br>";
|
|
1045 }
|
|
1046 } else {
|
|
1047 #print "first exon : ", $firstexon->start, " - ", $firstexon->end, "<br>\n";
|
|
1048 foreach $i (0..$#exons) {
|
|
1049 $after=$exons[$i]->start;
|
|
1050 #proximity test for intronic mutations
|
|
1051 if ( ($strand == 1 and
|
|
1052 $self->mutation->prelabel >= $before and
|
|
1053 $self->mutation->postlabel <= $after)
|
|
1054 or
|
|
1055 ($strand == -1 and
|
|
1056 $self->mutation->prelabel <= $before and
|
|
1057 $self->mutation->postlabel >= $after) ) {
|
|
1058 $self->dnamut->region('intron');
|
|
1059 #$self->dnamut->region_value($i);
|
|
1060 my $afterdist = $self->mutation->prelabel - $before;
|
|
1061 my $beforedist = $after - $self->mutation->postlabel;
|
|
1062 my $intronvalue = $i + 1;
|
|
1063 if ($afterdist < $beforedist) {
|
|
1064 $afterdist++;
|
|
1065 $self->dnamut->region_value($intronvalue);
|
|
1066 $self->dnamut->region_dist($afterdist);
|
|
1067 #print "splice site $afterdist nt upstream!<br>";
|
|
1068 } else {
|
|
1069 $self->dnamut->region_value($intronvalue);
|
|
1070 $self->dnamut->region_dist($beforedist * -1);
|
|
1071 #print "splice site $beforedist nt downstream!<br>";
|
|
1072 }
|
|
1073 $self->rnachange(undef);
|
|
1074 last;
|
|
1075 }
|
|
1076 #proximity test for exon mutations
|
|
1077 elsif ( ( $strand == 1 and
|
|
1078 $exons[$i]->start <= $self->mutation->prelabel and
|
|
1079 $exons[$i]->end >= $self->mutation->postlabel) or
|
|
1080 ( $strand == -1 and
|
|
1081 $exons[$i]->start >= $self->mutation->prelabel and
|
|
1082 $exons[$i]->end <= $self->mutation->postlabel) ) {
|
|
1083 $rnaAffected = 1;
|
|
1084
|
|
1085 my $afterdist = $self->mutation->prelabel - $exons[$i]->start;
|
|
1086 my $beforedist = $exons[$i]->end - $self->mutation->postlabel;
|
|
1087 my $exonvalue = $i + 1;
|
|
1088 $self->dnamut->region('exon');
|
|
1089 if ($afterdist < $beforedist) {
|
|
1090 $afterdist++;
|
|
1091 $self->dnamut->region_value($exonvalue+1);
|
|
1092 $self->dnamut->region_dist($afterdist);
|
|
1093 #print "splice site $afterdist nt upstream!<br>";
|
|
1094 } else {
|
|
1095 #$beforedist;
|
|
1096 $self->dnamut->region_value($exonvalue+1);
|
|
1097 $self->dnamut->region_dist($beforedist * -1);
|
|
1098 #print "splice site $beforedist nt downstream!<br>";
|
|
1099 }
|
|
1100 last;
|
|
1101 }
|
|
1102 $before=$exons[$i]->end;
|
|
1103 }
|
|
1104 }
|
|
1105 }
|
|
1106 }
|
|
1107 #$self->warn("RNA not affected because change occurs inside an intron");
|
|
1108 #return(0); # if still not returned, then not affected, return 0
|
|
1109 return $rnaAffected;
|
|
1110 }
|
|
1111
|
|
1112 #
|
|
1113 # ### Creation of RNA and AA variation objects
|
|
1114 #
|
|
1115
|
|
1116 =head2 _set_effects
|
|
1117
|
|
1118 Title : _set_effects
|
|
1119 Usage :
|
|
1120 Function:
|
|
1121
|
|
1122 Stores RNA and AA level mutation attributes before mutation
|
|
1123 into Bio::Variation::RNAChange and
|
|
1124 Bio::Variation::AAChange objects. Links them to
|
|
1125 SeqDiff object.
|
|
1126
|
|
1127 Example :
|
|
1128 Returns :
|
|
1129 Args : Bio::Variation::SeqDiff object
|
|
1130 Bio::Variation::DNAMutation object
|
|
1131
|
|
1132 See L<Bio::Variation::RNAChange>, L<Bio::Variation::RNAChange>,
|
|
1133 L<Bio::Variation::SeqDiff>, and L<Bio::Variation::DNAMutation>.
|
|
1134
|
|
1135 =cut
|
|
1136
|
|
1137 sub _set_effects {
|
|
1138 my ($self, $seqDiff, $dnamut) = @_;
|
|
1139 my ($rnapos_end, $upstreamseq, $dnstreamseq);
|
|
1140 my $flanklen = $self->{'flanklen'};
|
|
1141
|
|
1142 ($self->mutation->len == 0) ?
|
|
1143 ($rnapos_end = $self->mutation->transpos) :
|
|
1144 ($rnapos_end = $self->mutation->transpos + $self->mutation->len -1);
|
|
1145 my $rnachange = Bio::Variation::RNAChange->new(-start => $self->mutation->transpos,
|
|
1146 -end => $rnapos_end
|
|
1147 );
|
|
1148 $rnachange->isMutation(1);
|
|
1149
|
|
1150 # setting proof
|
|
1151 if ($seqDiff->numbering eq "coding") {
|
|
1152 $rnachange->proof('experimental');
|
|
1153 } else {
|
|
1154 $rnachange->proof('computed');
|
|
1155 }
|
|
1156
|
|
1157 $seqDiff->add_Variant($rnachange);
|
|
1158 $self->rnachange($rnachange);
|
|
1159 $rnachange->DNAMutation($dnamut);
|
|
1160 $dnamut->RNAChange($rnachange);
|
|
1161 $rnachange->mut_number($self->mutation->issue);
|
|
1162
|
|
1163 # setting the codon_position of the "start" nucleotide of the change
|
|
1164 $rnachange->codon_pos(($self->RNA->frame($self->mutation->label))+1); # codon_pos=frame+1
|
|
1165
|
|
1166 my @exons=$self->RNA->all_Exons;
|
|
1167 $self->exons(\@exons);
|
|
1168 #print `date`, " before flank, after exons. RNAObj query\n";
|
|
1169 # if cannot retrieve from Transcript, Transcript::upstream_seq will be used
|
|
1170 # before "fac7 g 65" bug discovered
|
|
1171 # $uplabel=$self->RNA->label(1-$flanklen,$prelabel);
|
|
1172 my $RNAprelabel=$self->RNA->label(-1,$self->mutation->label); # to fix fac7g65 bug
|
|
1173 # for the fix, all prelabel used in the next block have been changed to RNAprelabel
|
|
1174 my $uplabel=$self->RNA->label(1-$flanklen,$RNAprelabel);
|
|
1175 if ($self->RNA->valid($uplabel)) {
|
|
1176 $upstreamseq = $self->RNA->labelsubseq($uplabel, undef, $RNAprelabel);
|
|
1177 } else {
|
|
1178 $upstreamseq = $self->RNA->labelsubseq($self->RNA->start, undef, $RNAprelabel)
|
|
1179 if $self->RNA->valid($RNAprelabel);
|
|
1180 my $lacking=$flanklen-length($upstreamseq); # how many missing
|
|
1181 my $upstream_atg=$exons[0]->subseq(-$lacking,-1);
|
|
1182 $upstreamseq=$upstream_atg . $upstreamseq;
|
|
1183 }
|
|
1184
|
|
1185 $rnachange->upStreamSeq($upstreamseq);
|
|
1186
|
|
1187 # won't work OK if postlabel NOT in Transcript
|
|
1188 # now added RNApostlabel but this has to be /fully tested/
|
|
1189 # for the fix, all postlabel used in the next block have been changed to RNApostlabel
|
|
1190 my $RNApostlabel; # to fix fac7g64 bug
|
|
1191 if ($self->mutation->len == 0) {
|
|
1192 $RNApostlabel=$self->mutation->label;
|
|
1193 } else {
|
|
1194 my $mutlen = 1 + $self->mutation->len;
|
|
1195 $RNApostlabel=$self->RNA->label($mutlen,$self->mutation->label);
|
|
1196 }
|
|
1197 $dnstreamseq=$self->RNA->labelsubseq($RNApostlabel, $flanklen);
|
|
1198 if ($dnstreamseq eq '-1') { # if out of transcript was requested
|
|
1199 my $lastexon=$exons[-1];
|
|
1200 my $lastexonlength=$lastexon->length;
|
|
1201 $dnstreamseq=$self->RNA->labelsubseq($RNApostlabel); # retrieves till RNAend
|
|
1202 my $lacking=$flanklen-length($dnstreamseq); # how many missing
|
|
1203 my $downstream_stop=$lastexon->subseq($lastexonlength+1,undef,$lacking);
|
|
1204 $dnstreamseq .= $downstream_stop;
|
|
1205 } else {
|
|
1206 $rnachange->dnStreamSeq($dnstreamseq);
|
|
1207 }
|
|
1208 # AAChange creation
|
|
1209 my $AAobj=$self->RNA->get_Translation;
|
|
1210 # storage of prelabel here, to be used in create_mut_objs_after
|
|
1211 my $aachange = Bio::Variation::AAChange->new(-start => $RNAprelabel
|
|
1212 );
|
|
1213 $aachange->isMutation(1);
|
|
1214 $aachange->proof('computed');
|
|
1215
|
|
1216 $seqDiff->add_Variant($aachange);
|
|
1217 $self->aachange($aachange);
|
|
1218 $rnachange->AAChange($aachange);
|
|
1219 $aachange->RNAChange($rnachange);
|
|
1220
|
|
1221 $aachange->mut_number($self->mutation->issue);
|
|
1222 # $before_mutation{aachange}=$aachange;
|
|
1223
|
|
1224 my $ra_o = Bio::Variation::Allele->new;
|
|
1225 $ra_o->seq($dnamut->allele_ori->seq) if $dnamut->allele_ori->seq;
|
|
1226 $rnachange->allele_ori($ra_o);
|
|
1227
|
|
1228 $rnachange->length(CORE::length $rnachange->allele_ori->seq);
|
|
1229
|
|
1230 my $ra_m = Bio::Variation::Allele->new;
|
|
1231 $ra_m->seq($self->mutation->seq) if $self->mutation->seq;
|
|
1232 $rnachange->allele_mut($ra_m);
|
|
1233 $rnachange->add_Allele($ra_m);
|
|
1234
|
|
1235 #$rnachange->allele_mut($seq);
|
|
1236 $rnachange->end($rnachange->start) if $rnachange->length == 0;
|
|
1237
|
|
1238 # this holds the aminoacid sequence that will be affected by the mutation
|
|
1239 my $aa_allele_ori=$AAobj->labelsubseq($self->mutation->label,undef,
|
|
1240 $self->mutation->lastlabel);
|
|
1241
|
|
1242 my $aa_o = Bio::Variation::Allele->new;
|
|
1243 $aa_o->seq($aa_allele_ori) if $aa_allele_ori;
|
|
1244 $aachange->allele_ori($aa_o);
|
|
1245 #$aachange->allele_ori($aa_allele_ori);
|
|
1246
|
|
1247 my $aa_length_ori = length($aa_allele_ori);
|
|
1248 $aachange->length($aa_length_ori); #print "==========$aa_length_ori\n";
|
|
1249 $aachange->end($aachange->start + $aa_length_ori - 1 );
|
|
1250 }
|
|
1251
|
|
1252 =head2 _untranslated
|
|
1253
|
|
1254 Title : _untranslated
|
|
1255 Usage :
|
|
1256 Function:
|
|
1257
|
|
1258 Stores RNA change attributes before mutation
|
|
1259 into Bio::Variation::RNAChange object. Links it to
|
|
1260 SeqDiff object.
|
|
1261
|
|
1262 Example :
|
|
1263 Returns :
|
|
1264 Args : Bio::Variation::SeqDiff object
|
|
1265 Bio::Variation::DNAMutation object
|
|
1266
|
|
1267 See L<Bio::Variation::RNAChange>, L<Bio::Variation::SeqDiff> and
|
|
1268 L<Bio::Variation::DNAMutation> for details.
|
|
1269
|
|
1270 =cut
|
|
1271
|
|
1272 sub _untranslated {
|
|
1273 my ($self, $seqDiff, $dnamut) = @_;
|
|
1274 my $rnapos_end;
|
|
1275 ($self->mutation->len == 0) ?
|
|
1276 ($rnapos_end = $self->mutation->transpos) :
|
|
1277 ($rnapos_end = $self->mutation->transpos + $self->mutation->len -1);
|
|
1278 my $rnachange = Bio::Variation::RNAChange->new(-start => $self->mutation->transpos,
|
|
1279 -end => $rnapos_end
|
|
1280 );
|
|
1281 #my $rnachange = Bio::Variation::RNAChange->new;
|
|
1282
|
|
1283 $rnachange->isMutation(1);
|
|
1284 my $ra_o = Bio::Variation::Allele->new;
|
|
1285 $ra_o->seq($dnamut->allele_ori->seq) if $dnamut->allele_ori->seq;
|
|
1286 $rnachange->allele_ori($ra_o);
|
|
1287 my $ra_m = Bio::Variation::Allele->new;
|
|
1288 $ra_m->seq($dnamut->allele_mut->seq) if $dnamut->allele_mut->seq;
|
|
1289 $rnachange->allele_mut($ra_m);
|
|
1290 $rnachange->add_Allele($ra_m);
|
|
1291 $rnachange->upStreamSeq($dnamut->upStreamSeq);
|
|
1292 $rnachange->dnStreamSeq($dnamut->dnStreamSeq);
|
|
1293 $rnachange->length($dnamut->length);
|
|
1294 $rnachange->mut_number($dnamut->mut_number);
|
|
1295 # setting proof
|
|
1296 if ($seqDiff->numbering eq "coding") {
|
|
1297 $rnachange->proof('experimental');
|
|
1298 } else {
|
|
1299 $rnachange->proof('computed');
|
|
1300 }
|
|
1301
|
|
1302 my $dist;
|
|
1303 if ($rnachange->end < 0) {
|
|
1304 $rnachange->region('5\'UTR');
|
|
1305 $dnamut->region('5\'UTR');
|
|
1306 my $dist = $dnamut->end ;
|
|
1307 $dnamut->region_dist($dist);
|
|
1308 $dist = $seqDiff->offset - $self->gene->maxtranscript->start + 1 + $dist;
|
|
1309 $rnachange->region_dist($dist);
|
|
1310 return if $dist < 1; # if mutation is not in mRNA
|
|
1311 } else {
|
|
1312 $rnachange->region('3\'UTR');
|
|
1313 $dnamut->region('3\'UTR');
|
|
1314 my $dist = $dnamut->start - $seqDiff->cds_end + $seqDiff->offset;
|
|
1315 $dnamut->region_dist($dist);
|
|
1316 $dist = $seqDiff->cds_end - $self->gene->maxtranscript->end -1 + $dist;
|
|
1317 $rnachange->region_dist($dist);
|
|
1318 return if $dist > 0; # if mutation is not in mRNA
|
|
1319 }
|
|
1320 $seqDiff->add_Variant($rnachange);
|
|
1321 $self->rnachange($rnachange);
|
|
1322 $rnachange->DNAMutation($dnamut);
|
|
1323 $dnamut->RNAChange($rnachange);
|
|
1324 }
|
|
1325
|
|
1326 # args: reference to label changearray, reference to position changearray
|
|
1327 # Function: take care of the creation of mutation objects, with
|
|
1328 # information AFTER the change takes place
|
|
1329 sub _post_mutation {
|
|
1330 my ($self, $seqDiff) = @_;
|
|
1331
|
|
1332 if ($self->rnachange and $self->rnachange->region eq 'coding') {
|
|
1333
|
|
1334 #$seqDiff->add_Variant($self->rnachange);
|
|
1335
|
|
1336 my $aachange=$self->aachange;
|
|
1337 my ($AAobj,$aa_start_prelabel,$aa_start,$mut_translation);
|
|
1338 $AAobj=$self->RNA->get_Translation;
|
|
1339 $aa_start_prelabel=$aachange->start;
|
|
1340 $aa_start=$AAobj->position($self->RNA->label(2,$aa_start_prelabel));
|
|
1341 $aachange->start($aa_start);
|
|
1342 $mut_translation=$AAobj->seq;
|
|
1343
|
|
1344 # this now takes in account possible preinsertions
|
|
1345 my $aa_m = Bio::Variation::Allele->new;
|
|
1346 $aa_m->seq(substr($mut_translation,$aa_start-1)) if substr($mut_translation,$aa_start-1);
|
|
1347 $aachange->allele_mut($aa_m);
|
|
1348 $aachange->add_Allele($aa_m);
|
|
1349 #$aachange->allele_mut(substr($mut_translation,$aa_start-1));
|
|
1350 #$aachange->allele_mut($mut_translation);
|
|
1351 my ($rlenori, $rlenmut);
|
|
1352 $rlenori = CORE::length($aachange->RNAChange->allele_ori->seq);
|
|
1353 $rlenmut = CORE::length($aachange->RNAChange->allele_mut->seq);
|
|
1354 #point mutation
|
|
1355
|
|
1356 if ($rlenori == 1 and $rlenmut == 1 and $aachange->allele_ori->seq ne '*') {
|
|
1357 my $alleleseq;
|
|
1358 if ($aachange->allele_mut->seq) {
|
|
1359 $alleleseq = substr($aachange->allele_mut->seq, 0, 1);
|
|
1360 $aachange->allele_mut->seq($alleleseq);
|
|
1361 }
|
|
1362 $aachange->end($aachange->start);
|
|
1363 $aachange->length(1);
|
|
1364 }
|
|
1365 elsif ( $rlenori == $rlenmut and
|
|
1366 $aachange->allele_ori->seq ne '*' ) { #complex inframe mutation
|
|
1367 $aachange->allele_mut->seq(substr $aachange->allele_mut->seq,
|
|
1368 0,
|
|
1369 length($aachange->allele_ori->seq));
|
|
1370 }
|
|
1371 #inframe mutation
|
|
1372 elsif ((int($rlenori-$rlenmut))%3 == 0) {
|
|
1373 if ($aachange->RNAChange->allele_mut->seq and
|
|
1374 $aachange->RNAChange->allele_ori->seq ) {
|
|
1375 # complex
|
|
1376 my $rna_len = length ($aachange->RNAChange->allele_mut->seq);
|
|
1377 my $len = $rna_len/3;
|
|
1378 $len++ unless $rna_len%3 == 0;
|
|
1379 $aachange->allele_mut->seq(substr $aachange->allele_mut->seq, 0, $len );
|
|
1380 }
|
|
1381 elsif ($aachange->RNAChange->codon_pos == 1){
|
|
1382 # deletion
|
|
1383 if ($aachange->RNAChange->allele_mut->seq eq '') {
|
|
1384 $aachange->allele_mut->seq('');
|
|
1385 $aachange->end($aachange->start + $aachange->length - 1 );
|
|
1386 }
|
|
1387 # insertion
|
|
1388 elsif ($aachange->RNAChange->allele_ori->seq eq '' ) {
|
|
1389 $aachange->allele_mut->seq(substr $aachange->allele_mut->seq, 0,
|
|
1390 length ($aachange->RNAChange->allele_mut->seq) / 3);
|
|
1391 $aachange->allele_ori->seq('');
|
|
1392 $aachange->end($aachange->start + $aachange->length - 1 );
|
|
1393 $aachange->length(0);
|
|
1394 }
|
|
1395 } else {
|
|
1396 #elsif ($aachange->RNAChange->codon_pos == 2){
|
|
1397 # deletion
|
|
1398 if (not $aachange->RNAChange->allele_mut->seq ) {
|
|
1399 $aachange->allele_mut->seq(substr $aachange->allele_mut->seq, 0, 1);
|
|
1400 }
|
|
1401 # insertion
|
|
1402 elsif (not $aachange->RNAChange->allele_ori->seq) {
|
|
1403 $aachange->allele_mut->seq(substr $aachange->allele_mut->seq, 0,
|
|
1404 length ($aachange->RNAChange->allele_mut->seq) / 3 +1);
|
|
1405 }
|
|
1406 }
|
|
1407 } else {
|
|
1408 #frameshift
|
|
1409 #my $pos = index $aachange->allele_mut
|
|
1410 #$aachange->allele_mut(substr($aachange->allele_mut, 0, 1));
|
|
1411 $aachange->length(CORE::length($aachange->allele_ori->seq));
|
|
1412 my $aaend = $aachange->start + $aachange->length -1;
|
|
1413 $aachange->end($aachange->start);
|
|
1414 }
|
|
1415
|
|
1416 # splicing site deletion check
|
|
1417 my @beforeexons=@{$self->exons};
|
|
1418 my @afterexons=$self->RNA->all_Exons;
|
|
1419 my $i;
|
|
1420 if (scalar(@beforeexons) ne scalar(@afterexons)) {
|
|
1421 my $mut_number = $self->mutation->issue;
|
|
1422 $self->warn("Exons have been modified at mutation n.$mut_number!");
|
|
1423 $self->rnachange->exons_modified(1);
|
|
1424 } else {
|
|
1425 EXONCHECK:
|
|
1426 foreach $i (0..$#beforeexons) {
|
|
1427 if ($beforeexons[$i] ne $afterexons[$i]) {
|
|
1428 my $mut_number = $self->mutation->issue;
|
|
1429 $self->warn("Exons have been modified at mutation n.$mut_number!");
|
|
1430 $self->rnachange->exons_modified(1);
|
|
1431 last EXONCHECK;
|
|
1432 }
|
|
1433 }
|
|
1434 }
|
|
1435 } else {
|
|
1436 #$seqDiff->rnachange(undef);
|
|
1437 #print "getting here?";
|
|
1438 }
|
|
1439 return 1;
|
|
1440 }
|
|
1441
|
|
1442 1;
|