Mercurial > repos > mahtabm > ensembl
comparison variant_effect_predictor/Bio/LiveSeq/Mutator.pm @ 0:1f6dce3d34e0
Uploaded
| author | mahtabm |
|---|---|
| date | Thu, 11 Apr 2013 02:01:53 -0400 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:1f6dce3d34e0 |
|---|---|
| 1 # $Id: Mutator.pm,v 1.26 2002/10/22 07:38:34 lapp Exp $ | |
| 2 # | |
| 3 # bioperl module for Bio::LiveSeq::Mutator | |
| 4 # | |
| 5 # Cared for by Heikki Lehvaslaiho <heikki@ebi.ac.uk> | |
| 6 # | |
| 7 # Copyright Joseph Insana | |
| 8 # | |
| 9 # You may distribute this module under the same terms as perl itself | |
| 10 # | |
| 11 # POD documentation - main docs before the code | |
| 12 | |
| 13 =head1 NAME | |
| 14 | |
| 15 Bio::LiveSeq::Mutator - Package mutating LiveSequences | |
| 16 | |
| 17 =head1 SYNOPSIS | |
| 18 | |
| 19 # $gene is a Bio::LiveSeq::Gene object | |
| 20 my $mutate = Bio::LiveSeq::Mutator->new('-gene' => $gene, | |
| 21 '-numbering' => "coding" | |
| 22 ); | |
| 23 # $mut is a Bio::LiveSeq::Mutation object | |
| 24 $mutate->add_Mutation($mut); | |
| 25 # $results is a Bio::Variation::SeqDiff object | |
| 26 my $results=$mutate->change_gene(); | |
| 27 if ($results) { | |
| 28 my $out = Bio::Variation::IO->new( '-format' => 'flat'); | |
| 29 $out->write($results); | |
| 30 } | |
| 31 | |
| 32 =head1 DESCRIPTION | |
| 33 | |
| 34 This class mutates Bio::LiveSeq::Gene objects and returns a | |
| 35 Bio::Variation::SeqDiff object. Mutations are described as | |
| 36 Bio::LiveSeq::Mutation objects. See L<Bio::LiveSeq::Gene>, | |
| 37 L<Bio::Variation::SeqDiff>, and L<Bio::LiveSeq::Mutation> for details. | |
| 38 | |
| 39 =head1 FEEDBACK | |
| 40 | |
| 41 | |
| 42 User feedback is an integral part of the evolution of this and other | |
| 43 Bioperl modules. Send your comments and suggestions preferably to the | |
| 44 Bioperl mailing lists Your participation is much appreciated. | |
| 45 | |
| 46 bioperl-l@bioperl.org - General discussion | |
| 47 http://bio.perl.org/MailList.html - About the mailing lists | |
| 48 | |
| 49 =head2 Reporting Bugs | |
| 50 | |
| 51 report bugs to the Bioperl bug tracking system to help us keep track | |
| 52 the bugs and their resolution. Bug reports can be submitted via | |
| 53 email or the web: | |
| 54 | |
| 55 bioperl-bugs@bio.perl.org | |
| 56 http://bugzilla.bioperl.org/ | |
| 57 | |
| 58 =head1 AUTHOR - Heikki Lehvaslaiho & Joseph A.L. Insana | |
| 59 | |
| 60 Email: heikki@ebi.ac.uk | |
| 61 insana@ebi.ac.uk, jinsana@gmx.net | |
| 62 | |
| 63 Address: | |
| 64 | |
| 65 EMBL Outstation, European Bioinformatics Institute | |
| 66 Wellcome Trust Genome Campus, Hinxton | |
| 67 Cambs. CB10 1SD, United Kingdom | |
| 68 | |
| 69 =head1 APPENDIX | |
| 70 | |
| 71 The rest of the documentation details each of the object | |
| 72 methods. Internal methods are usually preceded with a _ | |
| 73 | |
| 74 =cut | |
| 75 | |
| 76 # Let the code begin... | |
| 77 | |
| 78 package Bio::LiveSeq::Mutator; | |
| 79 use vars qw(@ISA); | |
| 80 use strict; | |
| 81 | |
| 82 use vars qw($VERSION @ISA); | |
| 83 use Bio::Variation::SeqDiff; | |
| 84 use Bio::Variation::DNAMutation; | |
| 85 use Bio::Variation::RNAChange; | |
| 86 use Bio::Variation::AAChange; | |
| 87 use Bio::Variation::Allele; | |
| 88 use Bio::LiveSeq::Mutation; | |
| 89 | |
| 90 #use integer; | |
| 91 # Object preamble - inheritance | |
| 92 | |
| 93 use Bio::Root::Root; | |
| 94 | |
| 95 @ISA = qw( Bio::Root::Root ); | |
| 96 | |
| 97 sub new { | |
| 98 my($class,@args) = @_; | |
| 99 my $self; | |
| 100 $self = {}; | |
| 101 bless $self, $class; | |
| 102 | |
| 103 my ($gene, $numbering) = | |
| 104 $self->_rearrange([qw(GENE | |
| 105 NUMBERING | |
| 106 )], | |
| 107 @args); | |
| 108 | |
| 109 $self->{ 'mutations' } = []; | |
| 110 | |
| 111 $gene && $self->gene($gene); | |
| 112 $numbering && $self->numbering($numbering); | |
| 113 | |
| 114 #class constant; | |
| 115 $self->{'flanklen'} = 25; | |
| 116 return $self; # success - we hope! | |
| 117 } | |
| 118 | |
| 119 =head2 gene | |
| 120 | |
| 121 Title : gene | |
| 122 Usage : $mutobj = $obj->gene; | |
| 123 : $mutobj = $obj->gene($objref); | |
| 124 Function: | |
| 125 | |
| 126 Returns or sets the link-reference to a | |
| 127 Bio::LiveSeq::Gene object. If no value has ben set, it | |
| 128 will return undef | |
| 129 | |
| 130 Returns : an object reference or undef | |
| 131 Args : a Bio::LiveSeq::Gene | |
| 132 | |
| 133 See L<Bio::LiveSeq::Gene> for more information. | |
| 134 | |
| 135 =cut | |
| 136 | |
| 137 sub gene { | |
| 138 my ($self,$value) = @_; | |
| 139 if (defined $value) { | |
| 140 if( ! $value->isa('Bio::LiveSeq::Gene') ) { | |
| 141 $self->throw("Is not a Bio::LiveSeq::Gene object but a [$value]"); | |
| 142 return undef; | |
| 143 } | |
| 144 else { | |
| 145 $self->{'gene'} = $value; | |
| 146 } | |
| 147 } | |
| 148 unless (exists $self->{'gene'}) { | |
| 149 return (undef); | |
| 150 } else { | |
| 151 return $self->{'gene'}; | |
| 152 } | |
| 153 } | |
| 154 | |
| 155 | |
| 156 =head2 numbering | |
| 157 | |
| 158 Title : numbering | |
| 159 Usage : $obj->numbering(); | |
| 160 Function: | |
| 161 | |
| 162 Sets and returns coordinate system used in positioning the | |
| 163 mutations. See L<change_gene> for details. | |
| 164 | |
| 165 Example : | |
| 166 Returns : string | |
| 167 Args : string (coding [transcript number] | gene | entry) | |
| 168 | |
| 169 =cut | |
| 170 | |
| 171 | |
| 172 sub numbering { | |
| 173 my ($self,$value) = @_; | |
| 174 if( defined $value) { | |
| 175 if ($value =~ /(coding)( )?(\d+)?/ or $value eq 'entry' or $value eq 'gene') { | |
| 176 $self->{'numbering'} = $value; | |
| 177 } else { # defaulting to 'coding' | |
| 178 $self->{'numbering'} = 'coding'; | |
| 179 } | |
| 180 } | |
| 181 unless (exists $self->{'numbering'}) { | |
| 182 return 'coding'; | |
| 183 } else { | |
| 184 return $self->{'numbering'}; | |
| 185 } | |
| 186 } | |
| 187 | |
| 188 =head2 add_Mutation | |
| 189 | |
| 190 Title : add_Mutation | |
| 191 Usage : $self->add_Mutation($ref) | |
| 192 Function: adds a Bio::LiveSeq::Mutation object | |
| 193 Example : | |
| 194 Returns : | |
| 195 Args : a Bio::LiveSeq::Mutation | |
| 196 | |
| 197 See L<Bio::LiveSeq::Mutation> for more information. | |
| 198 | |
| 199 =cut | |
| 200 | |
| 201 sub add_Mutation{ | |
| 202 my ($self,$value) = @_; | |
| 203 if( $value->isa('Bio::Liveseq::Mutation') ) { | |
| 204 my $com = ref $value; | |
| 205 $self->throw("Is not a Mutation object but a [$com]" ); | |
| 206 return undef; | |
| 207 } | |
| 208 if (! $value->pos) { | |
| 209 $self->warn("No value for mutation position in the sequence!"); | |
| 210 return undef; | |
| 211 } | |
| 212 if (! $value->seq && ! $value->len) { | |
| 213 $self->warn("Either mutated sequence or length of the deletion must be given!"); | |
| 214 return undef; | |
| 215 } | |
| 216 push(@{$self->{'mutations'}},$value); | |
| 217 } | |
| 218 | |
| 219 =head2 each_Mutation | |
| 220 | |
| 221 Title : each_Mutation | |
| 222 Usage : foreach $ref ( $a->each_Mutation ) | |
| 223 Function: gets an array of Bio::LiveSeq::Mutation objects | |
| 224 Example : | |
| 225 Returns : array of Mutations | |
| 226 Args : | |
| 227 | |
| 228 See L<Bio::LiveSeq::Mutation> for more information. | |
| 229 | |
| 230 =cut | |
| 231 | |
| 232 sub each_Mutation{ | |
| 233 my ($self) = @_; | |
| 234 return @{$self->{'mutations'}}; | |
| 235 } | |
| 236 | |
| 237 | |
| 238 =head2 mutation | |
| 239 | |
| 240 Title : mutation | |
| 241 Usage : $mutobj = $obj->mutation; | |
| 242 : $mutobj = $obj->mutation($objref); | |
| 243 Function: | |
| 244 | |
| 245 Returns or sets the link-reference to the current mutation | |
| 246 object. If the value is not set, it will return undef. | |
| 247 Internal method. | |
| 248 | |
| 249 Returns : an object reference or undef | |
| 250 | |
| 251 =cut | |
| 252 | |
| 253 | |
| 254 sub mutation { | |
| 255 my ($self,$value) = @_; | |
| 256 if (defined $value) { | |
| 257 if( ! $value->isa('Bio::LiveSeq::Mutation') ) { | |
| 258 $self->throw("Is not a Bio::LiveSeq::Mutation object but a [$value]"); | |
| 259 return undef; | |
| 260 } | |
| 261 else { | |
| 262 $self->{'mutation'} = $value; | |
| 263 } | |
| 264 } | |
| 265 unless (exists $self->{'mutation'}) { | |
| 266 return (undef); | |
| 267 } else { | |
| 268 return $self->{'mutation'}; | |
| 269 } | |
| 270 } | |
| 271 | |
| 272 =head2 DNA | |
| 273 | |
| 274 Title : DNA | |
| 275 Usage : $mutobj = $obj->DNA; | |
| 276 : $mutobj = $obj->DNA($objref); | |
| 277 Function: | |
| 278 | |
| 279 Returns or sets the reference to the LiveSeq object holding | |
| 280 the reference sequence. If there is no link, it will return | |
| 281 undef. | |
| 282 Internal method. | |
| 283 | |
| 284 Returns : an object reference or undef | |
| 285 | |
| 286 =cut | |
| 287 | |
| 288 sub DNA { | |
| 289 my ($self,$value) = @_; | |
| 290 if (defined $value) { | |
| 291 if( ! $value->isa('Bio::LiveSeq::DNA') and ! $value->isa('Bio::LiveSeq::Transcript') ) { | |
| 292 $self->throw("Is not a Bio::LiveSeq::DNA/Transcript object but a [$value]"); | |
| 293 return undef; | |
| 294 } | |
| 295 else { | |
| 296 $self->{'DNA'} = $value; | |
| 297 } | |
| 298 } | |
| 299 unless (exists $self->{'DNA'}) { | |
| 300 return (undef); | |
| 301 } else { | |
| 302 return $self->{'DNA'}; | |
| 303 } | |
| 304 } | |
| 305 | |
| 306 | |
| 307 =head2 RNA | |
| 308 | |
| 309 Title : RNA | |
| 310 Usage : $mutobj = $obj->RNA; | |
| 311 : $mutobj = $obj->RNA($objref); | |
| 312 Function: | |
| 313 | |
| 314 Returns or sets the reference to the LiveSeq object holding | |
| 315 the reference sequence. If the value is not set, it will return | |
| 316 undef. | |
| 317 Internal method. | |
| 318 | |
| 319 Returns : an object reference or undef | |
| 320 | |
| 321 =cut | |
| 322 | |
| 323 | |
| 324 sub RNA { | |
| 325 my ($self,$value) = @_; | |
| 326 if (defined $value) { | |
| 327 if( ! $value->isa('Bio::LiveSeq::Transcript') ) { | |
| 328 $self->throw("Is not a Bio::LiveSeq::RNA/Transcript object but a [$value]"); | |
| 329 return undef; | |
| 330 } | |
| 331 else { | |
| 332 $self->{'RNA'} = $value; | |
| 333 } | |
| 334 } | |
| 335 unless (exists $self->{'RNA'}) { | |
| 336 return (undef); | |
| 337 } else { | |
| 338 return $self->{'RNA'}; | |
| 339 } | |
| 340 } | |
| 341 | |
| 342 | |
| 343 =head2 dnamut | |
| 344 | |
| 345 Title : dnamut | |
| 346 Usage : $mutobj = $obj->dnamut; | |
| 347 : $mutobj = $obj->dnamut($objref); | |
| 348 Function: | |
| 349 | |
| 350 Returns or sets the reference to the current DNAMutation object. | |
| 351 If the value is not set, it will return undef. | |
| 352 Internal method. | |
| 353 | |
| 354 Returns : a Bio::Variation::DNAMutation object or undef | |
| 355 | |
| 356 See L<Bio::Variation::DNAMutation> for more information. | |
| 357 | |
| 358 =cut | |
| 359 | |
| 360 | |
| 361 sub dnamut { | |
| 362 my ($self,$value) = @_; | |
| 363 if (defined $value) { | |
| 364 if( ! $value->isa('Bio::Variation::DNAMutation') ) { | |
| 365 $self->throw("Is not a Bio::Variation::DNAMutation object but a [$value]"); | |
| 366 return undef; | |
| 367 } | |
| 368 else { | |
| 369 $self->{'dnamut'} = $value; | |
| 370 } | |
| 371 } | |
| 372 unless (exists $self->{'dnamut'}) { | |
| 373 return (undef); | |
| 374 } else { | |
| 375 return $self->{'dnamut'}; | |
| 376 } | |
| 377 } | |
| 378 | |
| 379 | |
| 380 =head2 rnachange | |
| 381 | |
| 382 Title : rnachange | |
| 383 Usage : $mutobj = $obj->rnachange; | |
| 384 : $mutobj = $obj->rnachange($objref); | |
| 385 Function: | |
| 386 | |
| 387 Returns or sets the reference to the current RNAChange object. | |
| 388 If the value is not set, it will return undef. | |
| 389 Internal method. | |
| 390 | |
| 391 Returns : a Bio::Variation::RNAChange object or undef | |
| 392 | |
| 393 See L<Bio::Variation::RNAChange> for more information. | |
| 394 | |
| 395 =cut | |
| 396 | |
| 397 | |
| 398 sub rnachange { | |
| 399 my ($self,$value) = @_; | |
| 400 if (defined $value) { | |
| 401 if( ! $value->isa('Bio::Variation::RNAChange') ) { | |
| 402 $self->throw("Is not a Bio::Variation::RNAChange object but a [$value]"); | |
| 403 return undef; | |
| 404 } | |
| 405 else { | |
| 406 $self->{'rnachange'} = $value; | |
| 407 } | |
| 408 } | |
| 409 unless (exists $self->{'rnachange'}) { | |
| 410 return (undef); | |
| 411 } else { | |
| 412 return $self->{'rnachange'}; | |
| 413 } | |
| 414 } | |
| 415 | |
| 416 | |
| 417 =head2 aachange | |
| 418 | |
| 419 Title : aachange | |
| 420 Usage : $mutobj = $obj->aachange; | |
| 421 : $mutobj = $obj->aachange($objref); | |
| 422 Function: | |
| 423 | |
| 424 Returns or sets the reference to the current AAChange object. | |
| 425 If the value is not set, it will return undef. | |
| 426 Internal method. | |
| 427 | |
| 428 Returns : a Bio::Variation::AAChange object or undef | |
| 429 | |
| 430 See L<Bio::Variation::AAChange> for more information. | |
| 431 | |
| 432 =cut | |
| 433 | |
| 434 | |
| 435 sub aachange { | |
| 436 my ($self,$value) = @_; | |
| 437 if (defined $value) { | |
| 438 if( ! $value->isa('Bio::Variation::AAChange') ) { | |
| 439 $self->throw("Is not a Bio::Variation::AAChange object but a [$value]"); | |
| 440 return undef; | |
| 441 } | |
| 442 else { | |
| 443 $self->{'aachange'} = $value; | |
| 444 } | |
| 445 } | |
| 446 unless (exists $self->{'aachange'}) { | |
| 447 return (undef); | |
| 448 } else { | |
| 449 return $self->{'aachange'}; | |
| 450 } | |
| 451 } | |
| 452 | |
| 453 | |
| 454 =head2 exons | |
| 455 | |
| 456 Title : exons | |
| 457 Usage : $mutobj = $obj->exons; | |
| 458 : $mutobj = $obj->exons($objref); | |
| 459 Function: | |
| 460 | |
| 461 Returns or sets the reference to a current array of Exons. | |
| 462 If the value is not set, it will return undef. | |
| 463 Internal method. | |
| 464 | |
| 465 Returns : an array of Bio::LiveSeq::Exon objects or undef | |
| 466 | |
| 467 See L<Bio::LiveSeq::Exon> for more information. | |
| 468 | |
| 469 =cut | |
| 470 | |
| 471 | |
| 472 sub exons { | |
| 473 my ($self,$value) = @_; | |
| 474 if (defined $value) { | |
| 475 $self->{'exons'} = $value; | |
| 476 } | |
| 477 unless (exists $self->{'exons'}) { | |
| 478 return (undef); | |
| 479 } else { | |
| 480 return $self->{'exons'}; | |
| 481 } | |
| 482 } | |
| 483 | |
| 484 =head2 change_gene_with_alignment | |
| 485 | |
| 486 Title : change_gene_with_alignment | |
| 487 Usage : $results=$mutate->change_gene_with_alignment($aln); | |
| 488 | |
| 489 Function: | |
| 490 | |
| 491 Returns a Bio::Variation::SeqDiff object containing the | |
| 492 results of the changes in the alignment. The alignment has | |
| 493 to be pairwise and have one sequence named 'QUERY', the | |
| 494 other one is assumed to be a part of the sequence from | |
| 495 $gene. | |
| 496 | |
| 497 This method offers a shortcut to change_gene and | |
| 498 automates the creation of Bio::LiveSeq::Mutation objects. | |
| 499 Use it with almost identical sequnces, e.g. to locate a SNP. | |
| 500 | |
| 501 Args : Bio::SimpleAlign object representing a short local alignment | |
| 502 Returns : Bio::Variation::SeqDiff object or 0 on error | |
| 503 | |
| 504 See L<Bio::LiveSeq::Mutation>, L<Bio::SimpleAlign>, and | |
| 505 L<Bio::Variation::SeqDiff> for more information. | |
| 506 | |
| 507 =cut | |
| 508 | |
| 509 sub change_gene_with_alignment { | |
| 510 my ($self, $aln) = @_; | |
| 511 | |
| 512 # | |
| 513 # Sanity checks | |
| 514 # | |
| 515 | |
| 516 $self->throw("Argument is not a Bio::SimpleAlign object but a [$aln]") | |
| 517 unless $aln->isa('Bio::SimpleAlign'); | |
| 518 $self->throw("'Pairwise alignments only, please") | |
| 519 if $aln->no_sequences != 2; | |
| 520 | |
| 521 # find out the order the two sequences are given | |
| 522 my $queryseq_pos = 1; #default | |
| 523 my $refseq_pos = 2; | |
| 524 unless ($aln->get_seq_by_pos(1)->id eq 'QUERY') { | |
| 525 carp('Query sequence has to be named QUERY') | |
| 526 if $aln->get_seq_by_pos(2)->id ne 'QUERY'; | |
| 527 $queryseq_pos = 2; # alternative | |
| 528 $refseq_pos = 1; | |
| 529 } | |
| 530 | |
| 531 # trim the alignment | |
| 532 my $start = $aln->column_from_residue_number('QUERY', 1); | |
| 533 my $end = $aln->column_from_residue_number('QUERY', | |
| 534 $aln->get_seq_by_pos($queryseq_pos)->end ); | |
| 535 | |
| 536 my $aln2 = $aln->slice($start, $end); | |
| 537 | |
| 538 # | |
| 539 # extracting mutations | |
| 540 # | |
| 541 | |
| 542 my $cs = $aln2->consensus_string(51); | |
| 543 my $queryseq = $aln2->get_seq_by_pos($queryseq_pos); | |
| 544 my $refseq = $aln2->get_seq_by_pos($refseq_pos); | |
| 545 | |
| 546 while ( $cs =~ /(\?+)/g) { | |
| 547 # pos in local coordinates | |
| 548 my $pos = pos($cs) - length($1) + 1; | |
| 549 my $mutation = create_mutation($self, | |
| 550 $refseq, | |
| 551 $queryseq, | |
| 552 $pos, | |
| 553 CORE::length($1) | |
| 554 ); | |
| 555 # reset pos to refseq coordinates | |
| 556 $pos += $refseq->start - 1; | |
| 557 $mutation->pos($pos); | |
| 558 | |
| 559 $self->add_Mutation($mutation); | |
| 560 } | |
| 561 return $self->change_gene(); | |
| 562 } | |
| 563 | |
| 564 =head2 create_mutation | |
| 565 | |
| 566 Title : create_mutation | |
| 567 Usage : | |
| 568 Function: | |
| 569 | |
| 570 Formats sequence differences from two sequences into | |
| 571 Bio::LiveSeq::Mutation objects which can be applied to a | |
| 572 gene. | |
| 573 | |
| 574 To keep it generic, sequence arguments need not to be | |
| 575 Bio::LocatableSeq. Coordinate change to parent sequence | |
| 576 numbering needs to be done by the calling code. | |
| 577 | |
| 578 Called from change_gene_with_alignment | |
| 579 | |
| 580 Args : Bio::PrimarySeqI inheriting object for the reference sequence | |
| 581 Bio::PrimarySeqI inheriting object for the query sequence | |
| 582 integer for the start position of the local sequence difference | |
| 583 integer for the length of the sequence difference | |
| 584 Returns : Bio::LiveSeq::Mutation object | |
| 585 | |
| 586 =cut | |
| 587 | |
| 588 sub create_mutation { | |
| 589 my ($self, $refseq, $queryseq, $pos, $len) = @_; | |
| 590 | |
| 591 $self->throw("Is not a Bio::PrimarySeqI object but a [$refseq]") | |
| 592 unless $refseq->isa('Bio::PrimarySeqI'); | |
| 593 $self->throw("Is not a Bio::PrimarySeqI object but a [$queryseq]") | |
| 594 unless $queryseq->isa('Bio::PrimarySeqI'); | |
| 595 $self->throw("Position is not a positive integer but [$pos]") | |
| 596 unless $pos =~ /^\+?\d+$/; | |
| 597 $self->throw("Length is not a positive integer but [$len]") | |
| 598 unless $len =~ /^\+?\d+$/; | |
| 599 | |
| 600 my $mutation; | |
| 601 my $refstring = $refseq->subseq($pos, $pos + $len - 1); | |
| 602 my $varstring = $queryseq->subseq($pos, $pos + $len - 1); | |
| 603 | |
| 604 if ($len == 1 and $refstring =~ /[^\.\-\*\?]/ and | |
| 605 $varstring =~ /[^\.\-\*\?]/ ) { #point | |
| 606 $mutation = new Bio::LiveSeq::Mutation (-seq => $varstring, | |
| 607 -pos => $pos, | |
| 608 ); | |
| 609 } | |
| 610 elsif ( $refstring =~ /^[^\.\-\*\?]+$/ and | |
| 611 $varstring !~ /^[^\.\-\*\?]+$/ ) { # deletion | |
| 612 $mutation = new Bio::LiveSeq::Mutation (-pos => $pos, | |
| 613 -len => $len | |
| 614 ); | |
| 615 } | |
| 616 elsif ( $refstring !~ /^[^\.\-\*\?]+$/ and | |
| 617 $varstring =~ /^[^\.\-\*\?]+$/ ) { # insertion | |
| 618 $mutation = new Bio::LiveSeq::Mutation (-seq => $varstring, | |
| 619 -pos => $pos, | |
| 620 -len => 0 | |
| 621 ); | |
| 622 } else { # complex | |
| 623 $mutation = new Bio::LiveSeq::Mutation (-seq => $varstring, | |
| 624 -pos => $pos, | |
| 625 -len => $len | |
| 626 ); | |
| 627 } | |
| 628 | |
| 629 return $mutation; | |
| 630 } | |
| 631 | |
| 632 =head2 change_gene | |
| 633 | |
| 634 Title : change_gene | |
| 635 Usage : my $mutate = Bio::LiveSeq::Mutator->new(-gene => $gene, | |
| 636 numbering => "coding" | |
| 637 ); | |
| 638 # $mut is Bio::LiveSeq::Mutation object | |
| 639 $mutate->add_Mutation($mut); | |
| 640 my $results=$mutate->change_gene(); | |
| 641 | |
| 642 Function: | |
| 643 | |
| 644 Returns a Bio::Variation::SeqDiff object containing the | |
| 645 results of the changes performed according to the | |
| 646 instructions present in Mutation(s). The -numbering | |
| 647 argument decides what molecule is being changed and what | |
| 648 numbering scheme being used: | |
| 649 | |
| 650 -numbering => "entry" | |
| 651 | |
| 652 determines the DNA level, using the numbering from the | |
| 653 beginning of the sequence | |
| 654 | |
| 655 -numbering => "coding" | |
| 656 | |
| 657 determines the RNA level, using the numbering from the | |
| 658 beginning of the 1st transcript | |
| 659 | |
| 660 Alternative transcripts can be used by specifying | |
| 661 "coding 2" or "coding 3" ... | |
| 662 | |
| 663 -numbering => "gene" | |
| 664 | |
| 665 determines the DNA level, using the numbering from the | |
| 666 beginning of the 1st transcript and inluding introns. | |
| 667 The meaning equals 'coding' if the reference molecule | |
| 668 is cDNA. | |
| 669 | |
| 670 Args : Bio::LiveSeq::Gene object | |
| 671 Bio::LiveSeq::Mutation object(s) | |
| 672 string specifying a numbering scheme (defaults to 'coding') | |
| 673 Returns : Bio::Variation::SeqDiff object or 0 on error | |
| 674 | |
| 675 =cut | |
| 676 | |
| 677 sub change_gene { | |
| 678 my ($self) = @_; | |
| 679 | |
| 680 # | |
| 681 # Sanity check | |
| 682 # | |
| 683 unless ($self->gene) { | |
| 684 $self->warn("Input object Bio::LiveSeq::Gene is not given"); | |
| 685 return 0; | |
| 686 } | |
| 687 # | |
| 688 # Setting the reference sequence based on -numbering | |
| 689 # | |
| 690 my @transcripts=@{$self->gene->get_Transcripts}; | |
| 691 my $refseq; # will hold Bio::LiveSeq:Transcript object or Bio::LiveSeq::DNA | |
| 692 | |
| 693 # 'gene' eq 'coding' if reference sequence is cDNA | |
| 694 $self->numbering ('coding') if $self->gene->get_DNA->alphabet eq 'rna' and $self->numbering eq 'gene'; | |
| 695 | |
| 696 if ($self->numbering =~ /(coding)( )?(\d+)?/ ) { | |
| 697 $self->numbering($1); | |
| 698 my $transnumber = $3; | |
| 699 $transnumber-- if $3; # 1 -> 0, 2 -> 1 | |
| 700 if ($transnumber && $transnumber >= 0 && $transnumber <= $#transcripts) { | |
| 701 $refseq=$transcripts[$transnumber]; | |
| 702 } else { | |
| 703 $transnumber && $self->warn("The alternative transcript number ". $transnumber+1 . | |
| 704 "- does not exist. Reverting to the 1st transcript\n"); | |
| 705 $refseq=$transcripts[0]; | |
| 706 } | |
| 707 } else { | |
| 708 $refseq=$transcripts[0]->{'seq'}; | |
| 709 } | |
| 710 # | |
| 711 # Recording the state: SeqDiff object creation ?? transcript no.?? | |
| 712 # | |
| 713 my $seqDiff = Bio::Variation::SeqDiff->new(); | |
| 714 $seqDiff->alphabet($self->gene->get_DNA->alphabet); | |
| 715 $seqDiff->numbering($self->numbering); | |
| 716 my ($DNAobj, $RNAobj); | |
| 717 if ($refseq->isa("Bio::LiveSeq::Transcript")) { | |
| 718 $self->RNA($refseq); | |
| 719 $self->DNA($refseq->{'seq'}); | |
| 720 $seqDiff->rna_ori($refseq->seq ); | |
| 721 $seqDiff->aa_ori($refseq->get_Translation->seq); | |
| 722 } else { | |
| 723 $self->DNA($refseq); | |
| 724 $self->RNA($transcripts[0]); | |
| 725 $seqDiff->rna_ori($self->RNA->seq); | |
| 726 $seqDiff->aa_ori($self->RNA->get_Translation->seq); | |
| 727 } | |
| 728 $seqDiff->dna_ori($self->DNA->seq); | |
| 729 # put the accession number into the SeqDiff object ID | |
| 730 $seqDiff->id($self->DNA->accession_number); | |
| 731 | |
| 732 # the atg_offset takes in account that DNA object could be a subset of the | |
| 733 # whole entry (via the light_weight loader) | |
| 734 my $atg_label=$self->RNA->start; | |
| 735 my $atg_offset=$self->DNA->position($atg_label)+($self->DNA->start)-1; | |
| 736 $seqDiff->offset($atg_offset - 1); | |
| 737 $self->DNA->coordinate_start($atg_label); | |
| 738 | |
| 739 my @exons = $self->RNA->all_Exons; | |
| 740 $seqDiff->cds_end($exons[$#exons]->end); | |
| 741 | |
| 742 # | |
| 743 # Converting mutation positions to labels | |
| 744 # | |
| 745 $self->warn("no mutations"), return 0 | |
| 746 unless $self->_mutationpos2label($refseq, $seqDiff); | |
| 747 | |
| 748 # need to add more than one rna & aa | |
| 749 #foreach $transcript (@transcripts) { | |
| 750 # $seqDiff{"ori_transcript_${i}_seq"}=$transcript->seq; | |
| 751 # $seqDiff{"ori_translation_${i}_seq"}=$transcript->get_Translation->seq; | |
| 752 #} | |
| 753 | |
| 754 # do changes | |
| 755 my $k; | |
| 756 foreach my $mutation ($self->each_Mutation) { | |
| 757 next unless $mutation->label > 0; | |
| 758 $self->mutation($mutation); | |
| 759 | |
| 760 $mutation->issue(++$k); | |
| 761 # | |
| 762 # current position on the transcript | |
| 763 # | |
| 764 if ($self->numbering =~ /coding/) { | |
| 765 $mutation->transpos($mutation->pos); # transpos given by user | |
| 766 } else { | |
| 767 #transpos of label / It will be 0 if mutating an intron, negative if upstream of ATG | |
| 768 $mutation->transpos($self->RNA->position($mutation->label,$atg_label)); | |
| 769 } | |
| 770 # | |
| 771 # Calculate adjacent labels based on the position on the current sequence | |
| 772 # | |
| 773 $mutation->prelabel($self->DNA->label(-1, $mutation->label)); # 1 before label | |
| 774 if ($mutation->len == 0) { | |
| 775 $mutation->postlabel($mutation->label); | |
| 776 $mutation->lastlabel($mutation->label); | |
| 777 } elsif ($mutation->len == 1) { | |
| 778 $mutation->lastlabel($mutation->label); # last nucleotide affected | |
| 779 $mutation->postlabel($self->DNA->label(2,$mutation->lastlabel)); # $len after label | |
| 780 } else { | |
| 781 $mutation->lastlabel($self->DNA->label($mutation->len,$mutation->label)); | |
| 782 $mutation->postlabel($self->DNA->label(2,$mutation->lastlabel)); | |
| 783 } | |
| 784 my $dnamut = $self->_set_DNAMutation($seqDiff); | |
| 785 # | |
| 786 # | |
| 787 # | |
| 788 if ($self->_rnaAffected) { | |
| 789 $self->_set_effects($seqDiff, $dnamut); | |
| 790 } | |
| 791 elsif ($seqDiff->offset != 0 and $dnamut->region ne 'intron') { | |
| 792 $self->_untranslated ($seqDiff, $dnamut); | |
| 793 } else { | |
| 794 #$self->warn("Mutation starts outside coding region, RNAChange object not created"); | |
| 795 } | |
| 796 | |
| 797 ######################################################################### | |
| 798 # Mutations are done here! # | |
| 799 $refseq->labelchange($mutation->seq, $mutation->label, $mutation->len); # | |
| 800 ######################################################################### | |
| 801 | |
| 802 $self->_post_mutation ($seqDiff); | |
| 803 | |
| 804 $self->dnamut(undef); | |
| 805 $self->rnachange(undef); | |
| 806 $self->aachange(undef); | |
| 807 $self->exons(undef); | |
| 808 } | |
| 809 # record the final state of all three sequences | |
| 810 $seqDiff->dna_mut($self->DNA->seq); | |
| 811 $seqDiff->rna_mut($self->RNA->seq); | |
| 812 if ($refseq->isa("Bio::LiveSeq::Transcript")) { | |
| 813 $seqDiff->aa_mut($refseq->get_Translation->seq); | |
| 814 } else { | |
| 815 $seqDiff->aa_mut($self->RNA->get_Translation->seq); | |
| 816 } | |
| 817 | |
| 818 #$seqDiff{mut_dna_seq}=$gene->get_DNA->seq; | |
| 819 #my $i=1; | |
| 820 #foreach $transcript (@transcripts) { | |
| 821 # $seqDiff{"mut_transcript_${i}_seq"}=$transcript->seq; | |
| 822 # $seqDiff{"mut_translation_${i}_seq"}=$transcript->get_Translation->seq; | |
| 823 #} | |
| 824 return $seqDiff; | |
| 825 } | |
| 826 | |
| 827 =head2 _mutationpos2label | |
| 828 | |
| 829 Title : _mutationpos2label | |
| 830 Usage : | |
| 831 Function: converts mutation positions into labels | |
| 832 Example : | |
| 833 Returns : number of valid mutations | |
| 834 Args : LiveSeq sequence object | |
| 835 | |
| 836 =cut | |
| 837 | |
| 838 sub _mutationpos2label { | |
| 839 my ($self, $refseq, $SeqDiff) = @_; | |
| 840 my $count; | |
| 841 my @bb = @{$self->{'mutations'}}; | |
| 842 my $cc = scalar @bb; | |
| 843 foreach my $mut (@{$self->{'mutations'}}) { | |
| 844 # if ($self->numbering eq 'gene' and $mut->pos < 1) { | |
| 845 # my $tmp = $mut->pos; | |
| 846 # print STDERR "pos: ", "$tmp\n"; | |
| 847 # $tmp++ if $tmp < 1; | |
| 848 # $tmp += $SeqDiff->offset; | |
| 849 # print STDERR "pos2: ", "$tmp\n"; | |
| 850 # $mut->pos($tmp); | |
| 851 # } | |
| 852 # elsif ($self->numbering eq 'entry') { | |
| 853 if ($self->numbering eq 'entry') { | |
| 854 my $tmp = $mut->pos; | |
| 855 $tmp -= $SeqDiff->offset; | |
| 856 $tmp-- if $tmp < 1; | |
| 857 $mut->pos($tmp); | |
| 858 } | |
| 859 | |
| 860 my $label = $refseq->label($mut->pos); # get the label for the position | |
| 861 $mut->label($label), $count++ if $label > 0 ; | |
| 862 #print STDERR "x", $mut->pos,'|' ,$mut->label, "\n"; | |
| 863 } | |
| 864 return $count; | |
| 865 } | |
| 866 | |
| 867 # | |
| 868 # Calculate labels around mutated nucleotide | |
| 869 # | |
| 870 | |
| 871 =head2 _set_DNAMutation | |
| 872 | |
| 873 Title : _set_DNAMutation | |
| 874 Usage : | |
| 875 Function: | |
| 876 | |
| 877 Stores DNA level mutation attributes before mutation into | |
| 878 Bio::Variation::DNAMutation object. Links it to SeqDiff | |
| 879 object. | |
| 880 | |
| 881 Example : | |
| 882 Returns : Bio::Variation::DNAMutation object | |
| 883 Args : Bio::Variation::SeqDiff object | |
| 884 | |
| 885 See L<Bio::Variation::DNAMutation> and L<Bio::Variation::SeqDiff>. | |
| 886 | |
| 887 =cut | |
| 888 | |
| 889 sub _set_DNAMutation { | |
| 890 my ($self, $seqDiff) = @_; | |
| 891 | |
| 892 my $dnamut_start = $self->mutation->label - $seqDiff->offset; | |
| 893 # if negative DNA positions (before ATG) | |
| 894 $dnamut_start-- if $dnamut_start <= 0; | |
| 895 my $dnamut_end; | |
| 896 ($self->mutation->len == 0 or $self->mutation->len == 1) ? | |
| 897 ($dnamut_end = $dnamut_start) : | |
| 898 ($dnamut_end = $dnamut_start+$self->mutation->len); | |
| 899 #print "start:$dnamut_start, end:$dnamut_end\n"; | |
| 900 my $dnamut = Bio::Variation::DNAMutation->new(-start => $dnamut_start, | |
| 901 -end => $dnamut_end, | |
| 902 ); | |
| 903 $dnamut->mut_number($self->mutation->issue); | |
| 904 $dnamut->isMutation(1); | |
| 905 my $da_m = Bio::Variation::Allele->new; | |
| 906 $da_m->seq($self->mutation->seq) if $self->mutation->seq; | |
| 907 $dnamut->allele_mut($da_m); | |
| 908 $dnamut->add_Allele($da_m); | |
| 909 # allele_ori | |
| 910 my $allele_ori = $self->DNA->labelsubseq($self->mutation->prelabel, | |
| 911 undef, | |
| 912 $self->mutation->postlabel); # get seq | |
| 913 chop $allele_ori; # chop the postlabel nucleotide | |
| 914 $allele_ori=substr($allele_ori,1); # away the prelabel nucleotide | |
| 915 my $da_o = Bio::Variation::Allele->new; | |
| 916 $da_o->seq($allele_ori) if $allele_ori; | |
| 917 $dnamut->allele_ori($da_o); | |
| 918 ($self->mutation->len == 0) ? | |
| 919 ($dnamut->length($self->mutation->len)) : ($dnamut->length(CORE::length $allele_ori)); | |
| 920 #print " --------------- $dnamut_start -$len- $dnamut_end -\n"; | |
| 921 $seqDiff->add_Variant($dnamut); | |
| 922 $self->dnamut($dnamut); | |
| 923 $dnamut->mut_number($self->mutation->issue); | |
| 924 # setting proof | |
| 925 if ($seqDiff->numbering eq "entry" or $seqDiff->numbering eq "gene") { | |
| 926 $dnamut->proof('experimental'); | |
| 927 } else { | |
| 928 $dnamut->proof('computed'); | |
| 929 } | |
| 930 # how many nucleotides to store upstream and downstream of the change | |
| 931 my $flanklen = $self->{'flanklen'}; | |
| 932 #print `date`, " flanking sequences start\n"; | |
| 933 my $uplabel = $self->DNA->label(1-$flanklen,$self->mutation->prelabel); # this could be unavailable! | |
| 934 | |
| 935 my $upstreamseq; | |
| 936 if ($uplabel > 0) { | |
| 937 $upstreamseq = | |
| 938 $self->DNA->labelsubseq($uplabel, undef, $self->mutation->prelabel); | |
| 939 } else { # from start (less than $flanklen nucleotides) | |
| 940 $upstreamseq = | |
| 941 $self->DNA->labelsubseq($self->DNA->start, undef, $self->mutation->prelabel); | |
| 942 } | |
| 943 $dnamut->upStreamSeq($upstreamseq); | |
| 944 my $dnstreamseq = $self->DNA->labelsubseq($self->mutation->postlabel, $flanklen); | |
| 945 $dnamut->dnStreamSeq($dnstreamseq); # $flanklen or less nucleotides | |
| 946 return $dnamut; | |
| 947 } | |
| 948 | |
| 949 | |
| 950 # | |
| 951 ### Check if mutation propagates to RNA (and AA) level | |
| 952 # | |
| 953 # side effect: sets intron/exon information | |
| 954 # returns a boolean value | |
| 955 # | |
| 956 | |
| 957 sub _rnaAffected { | |
| 958 my ($self) = @_; | |
| 959 my @exons=$self->RNA->all_Exons; | |
| 960 my $RNAstart=$self->RNA->start; | |
| 961 my $RNAend=$self->RNA->end; | |
| 962 my ($firstexon,$before,$after,$i); | |
| 963 my ($rnaAffected) = 0; | |
| 964 | |
| 965 # check for inserted labels (that require follows instead of <,>) | |
| 966 my $DNAend=$self->RNA->{'seq'}->end; | |
| 967 if ($self->mutation->prelabel > $DNAend or $self->mutation->postlabel > $DNAend) { | |
| 968 #this means one of the two labels is an inserted one | |
| 969 #(coming from a previous mutation. This would falsify all <,> | |
| 970 #checks, so the follow() has to be used | |
| 971 $self->warn("Attention, workaround not fully tested yet! Expect unpredictable results.\n"); | |
| 972 if (($self->mutation->postlabel==$RNAstart) or (follows($self->mutation->postlabel,$RNAstart))) { | |
| 973 $self->warn("RNA not affected because change occurs before RNAstart"); | |
| 974 } | |
| 975 elsif (($RNAend==$self->mutation->prelabel) or (follows($RNAend,$self->mutation->prelabel))) { | |
| 976 $self->warn("RNA not affected because change occurs after RNAend"); | |
| 977 } | |
| 978 elsif (scalar @exons == 1) { | |
| 979 #no introns, just one exon | |
| 980 $rnaAffected = 1; # then RNA is affected! | |
| 981 } else { | |
| 982 # otherwise check for change occurring inside an intron | |
| 983 $firstexon=shift(@exons); | |
| 984 $before=$firstexon->end; | |
| 985 | |
| 986 foreach $i (0..$#exons) { | |
| 987 $after=$exons[$i]->start; | |
| 988 if (follows($self->mutation->prelabel,$before) or | |
| 989 ($after==$self->mutation->prelabel) or | |
| 990 follows($after,$self->mutation->prelabel) or | |
| 991 follows($after,$self->mutation->postlabel)) { | |
| 992 | |
| 993 $rnaAffected = 1; | |
| 994 # $i is number of exon and can be used for proximity check | |
| 995 } | |
| 996 $before=$exons[$i]->end; | |
| 997 } | |
| 998 | |
| 999 } | |
| 1000 } else { | |
| 1001 my $strand = $exons[0]->strand; | |
| 1002 if (($strand == 1 and $self->mutation->postlabel <= $RNAstart) or | |
| 1003 ($strand != 1 and $self->mutation->postlabel >= $RNAstart)) { | |
| 1004 #$self->warn("RNA not affected because change occurs before RNAstart"); | |
| 1005 $rnaAffected = 0; | |
| 1006 } | |
| 1007 elsif (($strand == 1 and $self->mutation->prelabel >= $RNAend) or | |
| 1008 ($strand != 1 and $self->mutation->prelabel <= $RNAend)) { | |
| 1009 #$self->warn("RNA not affected because change occurs after RNAend"); | |
| 1010 $rnaAffected = 0; | |
| 1011 my $dist; | |
| 1012 if ($strand == 1){ | |
| 1013 $dist = $self->mutation->prelabel - $RNAend; | |
| 1014 } else { | |
| 1015 $dist = $RNAend - $self->mutation->prelabel; | |
| 1016 } | |
| 1017 $self->dnamut->region_dist($dist); | |
| 1018 } | |
| 1019 elsif (scalar @exons == 1) { | |
| 1020 #if just one exon -> no introns, | |
| 1021 $rnaAffected = 1; # then RNA is affected! | |
| 1022 } else { | |
| 1023 # otherwise check for mutation occurring inside an intron | |
| 1024 $firstexon=shift(@exons); | |
| 1025 $before=$firstexon->end; | |
| 1026 if ( ($strand == 1 and $self->mutation->prelabel < $before) or | |
| 1027 ($strand == -1 and $self->mutation->prelabel > $before) | |
| 1028 ) { | |
| 1029 $rnaAffected = 1 ; | |
| 1030 | |
| 1031 #print "Exon 1 : ", $firstexon->start, " - ", $firstexon->end, "<br>\n"; | |
| 1032 my $afterdist = $self->mutation->prelabel - $firstexon->start; | |
| 1033 my $beforedist = $firstexon->end - $self->mutation->postlabel; | |
| 1034 my $exonvalue = $i + 1; | |
| 1035 $self->dnamut->region('exon'); | |
| 1036 $self->dnamut->region_value($exonvalue); | |
| 1037 if ($afterdist < $beforedist) { | |
| 1038 $afterdist++; | |
| 1039 $afterdist++; | |
| 1040 $self->dnamut->region_dist($afterdist); | |
| 1041 #print "splice site $afterdist nt upstream!<br>"; | |
| 1042 } else { | |
| 1043 $self->dnamut->region_dist($beforedist); | |
| 1044 #print "splice site $beforedist nt downstream!<br>"; | |
| 1045 } | |
| 1046 } else { | |
| 1047 #print "first exon : ", $firstexon->start, " - ", $firstexon->end, "<br>\n"; | |
| 1048 foreach $i (0..$#exons) { | |
| 1049 $after=$exons[$i]->start; | |
| 1050 #proximity test for intronic mutations | |
| 1051 if ( ($strand == 1 and | |
| 1052 $self->mutation->prelabel >= $before and | |
| 1053 $self->mutation->postlabel <= $after) | |
| 1054 or | |
| 1055 ($strand == -1 and | |
| 1056 $self->mutation->prelabel <= $before and | |
| 1057 $self->mutation->postlabel >= $after) ) { | |
| 1058 $self->dnamut->region('intron'); | |
| 1059 #$self->dnamut->region_value($i); | |
| 1060 my $afterdist = $self->mutation->prelabel - $before; | |
| 1061 my $beforedist = $after - $self->mutation->postlabel; | |
| 1062 my $intronvalue = $i + 1; | |
| 1063 if ($afterdist < $beforedist) { | |
| 1064 $afterdist++; | |
| 1065 $self->dnamut->region_value($intronvalue); | |
| 1066 $self->dnamut->region_dist($afterdist); | |
| 1067 #print "splice site $afterdist nt upstream!<br>"; | |
| 1068 } else { | |
| 1069 $self->dnamut->region_value($intronvalue); | |
| 1070 $self->dnamut->region_dist($beforedist * -1); | |
| 1071 #print "splice site $beforedist nt downstream!<br>"; | |
| 1072 } | |
| 1073 $self->rnachange(undef); | |
| 1074 last; | |
| 1075 } | |
| 1076 #proximity test for exon mutations | |
| 1077 elsif ( ( $strand == 1 and | |
| 1078 $exons[$i]->start <= $self->mutation->prelabel and | |
| 1079 $exons[$i]->end >= $self->mutation->postlabel) or | |
| 1080 ( $strand == -1 and | |
| 1081 $exons[$i]->start >= $self->mutation->prelabel and | |
| 1082 $exons[$i]->end <= $self->mutation->postlabel) ) { | |
| 1083 $rnaAffected = 1; | |
| 1084 | |
| 1085 my $afterdist = $self->mutation->prelabel - $exons[$i]->start; | |
| 1086 my $beforedist = $exons[$i]->end - $self->mutation->postlabel; | |
| 1087 my $exonvalue = $i + 1; | |
| 1088 $self->dnamut->region('exon'); | |
| 1089 if ($afterdist < $beforedist) { | |
| 1090 $afterdist++; | |
| 1091 $self->dnamut->region_value($exonvalue+1); | |
| 1092 $self->dnamut->region_dist($afterdist); | |
| 1093 #print "splice site $afterdist nt upstream!<br>"; | |
| 1094 } else { | |
| 1095 #$beforedist; | |
| 1096 $self->dnamut->region_value($exonvalue+1); | |
| 1097 $self->dnamut->region_dist($beforedist * -1); | |
| 1098 #print "splice site $beforedist nt downstream!<br>"; | |
| 1099 } | |
| 1100 last; | |
| 1101 } | |
| 1102 $before=$exons[$i]->end; | |
| 1103 } | |
| 1104 } | |
| 1105 } | |
| 1106 } | |
| 1107 #$self->warn("RNA not affected because change occurs inside an intron"); | |
| 1108 #return(0); # if still not returned, then not affected, return 0 | |
| 1109 return $rnaAffected; | |
| 1110 } | |
| 1111 | |
| 1112 # | |
| 1113 # ### Creation of RNA and AA variation objects | |
| 1114 # | |
| 1115 | |
| 1116 =head2 _set_effects | |
| 1117 | |
| 1118 Title : _set_effects | |
| 1119 Usage : | |
| 1120 Function: | |
| 1121 | |
| 1122 Stores RNA and AA level mutation attributes before mutation | |
| 1123 into Bio::Variation::RNAChange and | |
| 1124 Bio::Variation::AAChange objects. Links them to | |
| 1125 SeqDiff object. | |
| 1126 | |
| 1127 Example : | |
| 1128 Returns : | |
| 1129 Args : Bio::Variation::SeqDiff object | |
| 1130 Bio::Variation::DNAMutation object | |
| 1131 | |
| 1132 See L<Bio::Variation::RNAChange>, L<Bio::Variation::RNAChange>, | |
| 1133 L<Bio::Variation::SeqDiff>, and L<Bio::Variation::DNAMutation>. | |
| 1134 | |
| 1135 =cut | |
| 1136 | |
| 1137 sub _set_effects { | |
| 1138 my ($self, $seqDiff, $dnamut) = @_; | |
| 1139 my ($rnapos_end, $upstreamseq, $dnstreamseq); | |
| 1140 my $flanklen = $self->{'flanklen'}; | |
| 1141 | |
| 1142 ($self->mutation->len == 0) ? | |
| 1143 ($rnapos_end = $self->mutation->transpos) : | |
| 1144 ($rnapos_end = $self->mutation->transpos + $self->mutation->len -1); | |
| 1145 my $rnachange = Bio::Variation::RNAChange->new(-start => $self->mutation->transpos, | |
| 1146 -end => $rnapos_end | |
| 1147 ); | |
| 1148 $rnachange->isMutation(1); | |
| 1149 | |
| 1150 # setting proof | |
| 1151 if ($seqDiff->numbering eq "coding") { | |
| 1152 $rnachange->proof('experimental'); | |
| 1153 } else { | |
| 1154 $rnachange->proof('computed'); | |
| 1155 } | |
| 1156 | |
| 1157 $seqDiff->add_Variant($rnachange); | |
| 1158 $self->rnachange($rnachange); | |
| 1159 $rnachange->DNAMutation($dnamut); | |
| 1160 $dnamut->RNAChange($rnachange); | |
| 1161 $rnachange->mut_number($self->mutation->issue); | |
| 1162 | |
| 1163 # setting the codon_position of the "start" nucleotide of the change | |
| 1164 $rnachange->codon_pos(($self->RNA->frame($self->mutation->label))+1); # codon_pos=frame+1 | |
| 1165 | |
| 1166 my @exons=$self->RNA->all_Exons; | |
| 1167 $self->exons(\@exons); | |
| 1168 #print `date`, " before flank, after exons. RNAObj query\n"; | |
| 1169 # if cannot retrieve from Transcript, Transcript::upstream_seq will be used | |
| 1170 # before "fac7 g 65" bug discovered | |
| 1171 # $uplabel=$self->RNA->label(1-$flanklen,$prelabel); | |
| 1172 my $RNAprelabel=$self->RNA->label(-1,$self->mutation->label); # to fix fac7g65 bug | |
| 1173 # for the fix, all prelabel used in the next block have been changed to RNAprelabel | |
| 1174 my $uplabel=$self->RNA->label(1-$flanklen,$RNAprelabel); | |
| 1175 if ($self->RNA->valid($uplabel)) { | |
| 1176 $upstreamseq = $self->RNA->labelsubseq($uplabel, undef, $RNAprelabel); | |
| 1177 } else { | |
| 1178 $upstreamseq = $self->RNA->labelsubseq($self->RNA->start, undef, $RNAprelabel) | |
| 1179 if $self->RNA->valid($RNAprelabel); | |
| 1180 my $lacking=$flanklen-length($upstreamseq); # how many missing | |
| 1181 my $upstream_atg=$exons[0]->subseq(-$lacking,-1); | |
| 1182 $upstreamseq=$upstream_atg . $upstreamseq; | |
| 1183 } | |
| 1184 | |
| 1185 $rnachange->upStreamSeq($upstreamseq); | |
| 1186 | |
| 1187 # won't work OK if postlabel NOT in Transcript | |
| 1188 # now added RNApostlabel but this has to be /fully tested/ | |
| 1189 # for the fix, all postlabel used in the next block have been changed to RNApostlabel | |
| 1190 my $RNApostlabel; # to fix fac7g64 bug | |
| 1191 if ($self->mutation->len == 0) { | |
| 1192 $RNApostlabel=$self->mutation->label; | |
| 1193 } else { | |
| 1194 my $mutlen = 1 + $self->mutation->len; | |
| 1195 $RNApostlabel=$self->RNA->label($mutlen,$self->mutation->label); | |
| 1196 } | |
| 1197 $dnstreamseq=$self->RNA->labelsubseq($RNApostlabel, $flanklen); | |
| 1198 if ($dnstreamseq eq '-1') { # if out of transcript was requested | |
| 1199 my $lastexon=$exons[-1]; | |
| 1200 my $lastexonlength=$lastexon->length; | |
| 1201 $dnstreamseq=$self->RNA->labelsubseq($RNApostlabel); # retrieves till RNAend | |
| 1202 my $lacking=$flanklen-length($dnstreamseq); # how many missing | |
| 1203 my $downstream_stop=$lastexon->subseq($lastexonlength+1,undef,$lacking); | |
| 1204 $dnstreamseq .= $downstream_stop; | |
| 1205 } else { | |
| 1206 $rnachange->dnStreamSeq($dnstreamseq); | |
| 1207 } | |
| 1208 # AAChange creation | |
| 1209 my $AAobj=$self->RNA->get_Translation; | |
| 1210 # storage of prelabel here, to be used in create_mut_objs_after | |
| 1211 my $aachange = Bio::Variation::AAChange->new(-start => $RNAprelabel | |
| 1212 ); | |
| 1213 $aachange->isMutation(1); | |
| 1214 $aachange->proof('computed'); | |
| 1215 | |
| 1216 $seqDiff->add_Variant($aachange); | |
| 1217 $self->aachange($aachange); | |
| 1218 $rnachange->AAChange($aachange); | |
| 1219 $aachange->RNAChange($rnachange); | |
| 1220 | |
| 1221 $aachange->mut_number($self->mutation->issue); | |
| 1222 # $before_mutation{aachange}=$aachange; | |
| 1223 | |
| 1224 my $ra_o = Bio::Variation::Allele->new; | |
| 1225 $ra_o->seq($dnamut->allele_ori->seq) if $dnamut->allele_ori->seq; | |
| 1226 $rnachange->allele_ori($ra_o); | |
| 1227 | |
| 1228 $rnachange->length(CORE::length $rnachange->allele_ori->seq); | |
| 1229 | |
| 1230 my $ra_m = Bio::Variation::Allele->new; | |
| 1231 $ra_m->seq($self->mutation->seq) if $self->mutation->seq; | |
| 1232 $rnachange->allele_mut($ra_m); | |
| 1233 $rnachange->add_Allele($ra_m); | |
| 1234 | |
| 1235 #$rnachange->allele_mut($seq); | |
| 1236 $rnachange->end($rnachange->start) if $rnachange->length == 0; | |
| 1237 | |
| 1238 # this holds the aminoacid sequence that will be affected by the mutation | |
| 1239 my $aa_allele_ori=$AAobj->labelsubseq($self->mutation->label,undef, | |
| 1240 $self->mutation->lastlabel); | |
| 1241 | |
| 1242 my $aa_o = Bio::Variation::Allele->new; | |
| 1243 $aa_o->seq($aa_allele_ori) if $aa_allele_ori; | |
| 1244 $aachange->allele_ori($aa_o); | |
| 1245 #$aachange->allele_ori($aa_allele_ori); | |
| 1246 | |
| 1247 my $aa_length_ori = length($aa_allele_ori); | |
| 1248 $aachange->length($aa_length_ori); #print "==========$aa_length_ori\n"; | |
| 1249 $aachange->end($aachange->start + $aa_length_ori - 1 ); | |
| 1250 } | |
| 1251 | |
| 1252 =head2 _untranslated | |
| 1253 | |
| 1254 Title : _untranslated | |
| 1255 Usage : | |
| 1256 Function: | |
| 1257 | |
| 1258 Stores RNA change attributes before mutation | |
| 1259 into Bio::Variation::RNAChange object. Links it to | |
| 1260 SeqDiff object. | |
| 1261 | |
| 1262 Example : | |
| 1263 Returns : | |
| 1264 Args : Bio::Variation::SeqDiff object | |
| 1265 Bio::Variation::DNAMutation object | |
| 1266 | |
| 1267 See L<Bio::Variation::RNAChange>, L<Bio::Variation::SeqDiff> and | |
| 1268 L<Bio::Variation::DNAMutation> for details. | |
| 1269 | |
| 1270 =cut | |
| 1271 | |
| 1272 sub _untranslated { | |
| 1273 my ($self, $seqDiff, $dnamut) = @_; | |
| 1274 my $rnapos_end; | |
| 1275 ($self->mutation->len == 0) ? | |
| 1276 ($rnapos_end = $self->mutation->transpos) : | |
| 1277 ($rnapos_end = $self->mutation->transpos + $self->mutation->len -1); | |
| 1278 my $rnachange = Bio::Variation::RNAChange->new(-start => $self->mutation->transpos, | |
| 1279 -end => $rnapos_end | |
| 1280 ); | |
| 1281 #my $rnachange = Bio::Variation::RNAChange->new; | |
| 1282 | |
| 1283 $rnachange->isMutation(1); | |
| 1284 my $ra_o = Bio::Variation::Allele->new; | |
| 1285 $ra_o->seq($dnamut->allele_ori->seq) if $dnamut->allele_ori->seq; | |
| 1286 $rnachange->allele_ori($ra_o); | |
| 1287 my $ra_m = Bio::Variation::Allele->new; | |
| 1288 $ra_m->seq($dnamut->allele_mut->seq) if $dnamut->allele_mut->seq; | |
| 1289 $rnachange->allele_mut($ra_m); | |
| 1290 $rnachange->add_Allele($ra_m); | |
| 1291 $rnachange->upStreamSeq($dnamut->upStreamSeq); | |
| 1292 $rnachange->dnStreamSeq($dnamut->dnStreamSeq); | |
| 1293 $rnachange->length($dnamut->length); | |
| 1294 $rnachange->mut_number($dnamut->mut_number); | |
| 1295 # setting proof | |
| 1296 if ($seqDiff->numbering eq "coding") { | |
| 1297 $rnachange->proof('experimental'); | |
| 1298 } else { | |
| 1299 $rnachange->proof('computed'); | |
| 1300 } | |
| 1301 | |
| 1302 my $dist; | |
| 1303 if ($rnachange->end < 0) { | |
| 1304 $rnachange->region('5\'UTR'); | |
| 1305 $dnamut->region('5\'UTR'); | |
| 1306 my $dist = $dnamut->end ; | |
| 1307 $dnamut->region_dist($dist); | |
| 1308 $dist = $seqDiff->offset - $self->gene->maxtranscript->start + 1 + $dist; | |
| 1309 $rnachange->region_dist($dist); | |
| 1310 return if $dist < 1; # if mutation is not in mRNA | |
| 1311 } else { | |
| 1312 $rnachange->region('3\'UTR'); | |
| 1313 $dnamut->region('3\'UTR'); | |
| 1314 my $dist = $dnamut->start - $seqDiff->cds_end + $seqDiff->offset; | |
| 1315 $dnamut->region_dist($dist); | |
| 1316 $dist = $seqDiff->cds_end - $self->gene->maxtranscript->end -1 + $dist; | |
| 1317 $rnachange->region_dist($dist); | |
| 1318 return if $dist > 0; # if mutation is not in mRNA | |
| 1319 } | |
| 1320 $seqDiff->add_Variant($rnachange); | |
| 1321 $self->rnachange($rnachange); | |
| 1322 $rnachange->DNAMutation($dnamut); | |
| 1323 $dnamut->RNAChange($rnachange); | |
| 1324 } | |
| 1325 | |
| 1326 # args: reference to label changearray, reference to position changearray | |
| 1327 # Function: take care of the creation of mutation objects, with | |
| 1328 # information AFTER the change takes place | |
| 1329 sub _post_mutation { | |
| 1330 my ($self, $seqDiff) = @_; | |
| 1331 | |
| 1332 if ($self->rnachange and $self->rnachange->region eq 'coding') { | |
| 1333 | |
| 1334 #$seqDiff->add_Variant($self->rnachange); | |
| 1335 | |
| 1336 my $aachange=$self->aachange; | |
| 1337 my ($AAobj,$aa_start_prelabel,$aa_start,$mut_translation); | |
| 1338 $AAobj=$self->RNA->get_Translation; | |
| 1339 $aa_start_prelabel=$aachange->start; | |
| 1340 $aa_start=$AAobj->position($self->RNA->label(2,$aa_start_prelabel)); | |
| 1341 $aachange->start($aa_start); | |
| 1342 $mut_translation=$AAobj->seq; | |
| 1343 | |
| 1344 # this now takes in account possible preinsertions | |
| 1345 my $aa_m = Bio::Variation::Allele->new; | |
| 1346 $aa_m->seq(substr($mut_translation,$aa_start-1)) if substr($mut_translation,$aa_start-1); | |
| 1347 $aachange->allele_mut($aa_m); | |
| 1348 $aachange->add_Allele($aa_m); | |
| 1349 #$aachange->allele_mut(substr($mut_translation,$aa_start-1)); | |
| 1350 #$aachange->allele_mut($mut_translation); | |
| 1351 my ($rlenori, $rlenmut); | |
| 1352 $rlenori = CORE::length($aachange->RNAChange->allele_ori->seq); | |
| 1353 $rlenmut = CORE::length($aachange->RNAChange->allele_mut->seq); | |
| 1354 #point mutation | |
| 1355 | |
| 1356 if ($rlenori == 1 and $rlenmut == 1 and $aachange->allele_ori->seq ne '*') { | |
| 1357 my $alleleseq; | |
| 1358 if ($aachange->allele_mut->seq) { | |
| 1359 $alleleseq = substr($aachange->allele_mut->seq, 0, 1); | |
| 1360 $aachange->allele_mut->seq($alleleseq); | |
| 1361 } | |
| 1362 $aachange->end($aachange->start); | |
| 1363 $aachange->length(1); | |
| 1364 } | |
| 1365 elsif ( $rlenori == $rlenmut and | |
| 1366 $aachange->allele_ori->seq ne '*' ) { #complex inframe mutation | |
| 1367 $aachange->allele_mut->seq(substr $aachange->allele_mut->seq, | |
| 1368 0, | |
| 1369 length($aachange->allele_ori->seq)); | |
| 1370 } | |
| 1371 #inframe mutation | |
| 1372 elsif ((int($rlenori-$rlenmut))%3 == 0) { | |
| 1373 if ($aachange->RNAChange->allele_mut->seq and | |
| 1374 $aachange->RNAChange->allele_ori->seq ) { | |
| 1375 # complex | |
| 1376 my $rna_len = length ($aachange->RNAChange->allele_mut->seq); | |
| 1377 my $len = $rna_len/3; | |
| 1378 $len++ unless $rna_len%3 == 0; | |
| 1379 $aachange->allele_mut->seq(substr $aachange->allele_mut->seq, 0, $len ); | |
| 1380 } | |
| 1381 elsif ($aachange->RNAChange->codon_pos == 1){ | |
| 1382 # deletion | |
| 1383 if ($aachange->RNAChange->allele_mut->seq eq '') { | |
| 1384 $aachange->allele_mut->seq(''); | |
| 1385 $aachange->end($aachange->start + $aachange->length - 1 ); | |
| 1386 } | |
| 1387 # insertion | |
| 1388 elsif ($aachange->RNAChange->allele_ori->seq eq '' ) { | |
| 1389 $aachange->allele_mut->seq(substr $aachange->allele_mut->seq, 0, | |
| 1390 length ($aachange->RNAChange->allele_mut->seq) / 3); | |
| 1391 $aachange->allele_ori->seq(''); | |
| 1392 $aachange->end($aachange->start + $aachange->length - 1 ); | |
| 1393 $aachange->length(0); | |
| 1394 } | |
| 1395 } else { | |
| 1396 #elsif ($aachange->RNAChange->codon_pos == 2){ | |
| 1397 # deletion | |
| 1398 if (not $aachange->RNAChange->allele_mut->seq ) { | |
| 1399 $aachange->allele_mut->seq(substr $aachange->allele_mut->seq, 0, 1); | |
| 1400 } | |
| 1401 # insertion | |
| 1402 elsif (not $aachange->RNAChange->allele_ori->seq) { | |
| 1403 $aachange->allele_mut->seq(substr $aachange->allele_mut->seq, 0, | |
| 1404 length ($aachange->RNAChange->allele_mut->seq) / 3 +1); | |
| 1405 } | |
| 1406 } | |
| 1407 } else { | |
| 1408 #frameshift | |
| 1409 #my $pos = index $aachange->allele_mut | |
| 1410 #$aachange->allele_mut(substr($aachange->allele_mut, 0, 1)); | |
| 1411 $aachange->length(CORE::length($aachange->allele_ori->seq)); | |
| 1412 my $aaend = $aachange->start + $aachange->length -1; | |
| 1413 $aachange->end($aachange->start); | |
| 1414 } | |
| 1415 | |
| 1416 # splicing site deletion check | |
| 1417 my @beforeexons=@{$self->exons}; | |
| 1418 my @afterexons=$self->RNA->all_Exons; | |
| 1419 my $i; | |
| 1420 if (scalar(@beforeexons) ne scalar(@afterexons)) { | |
| 1421 my $mut_number = $self->mutation->issue; | |
| 1422 $self->warn("Exons have been modified at mutation n.$mut_number!"); | |
| 1423 $self->rnachange->exons_modified(1); | |
| 1424 } else { | |
| 1425 EXONCHECK: | |
| 1426 foreach $i (0..$#beforeexons) { | |
| 1427 if ($beforeexons[$i] ne $afterexons[$i]) { | |
| 1428 my $mut_number = $self->mutation->issue; | |
| 1429 $self->warn("Exons have been modified at mutation n.$mut_number!"); | |
| 1430 $self->rnachange->exons_modified(1); | |
| 1431 last EXONCHECK; | |
| 1432 } | |
| 1433 } | |
| 1434 } | |
| 1435 } else { | |
| 1436 #$seqDiff->rnachange(undef); | |
| 1437 #print "getting here?"; | |
| 1438 } | |
| 1439 return 1; | |
| 1440 } | |
| 1441 | |
| 1442 1; |
