Mercurial > repos > mahtabm > ensembl
comparison variant_effect_predictor/Bio/Variation/SeqDiff.pm @ 0:1f6dce3d34e0
Uploaded
| author | mahtabm |
|---|---|
| date | Thu, 11 Apr 2013 02:01:53 -0400 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:1f6dce3d34e0 |
|---|---|
| 1 # $Id: SeqDiff.pm,v 1.16 2002/10/22 07:38:49 lapp Exp $ | |
| 2 # bioperl module for Bio::Variation::SeqDiff | |
| 3 # | |
| 4 # Cared for by Heikki Lehvaslaiho <heikki@ebi.ac.uk> | |
| 5 # | |
| 6 # Copyright Heikki Lehvaslaiho | |
| 7 # | |
| 8 # You may distribute this module under the same terms as perl itself | |
| 9 # | |
| 10 # POD documentation - main docs before the code | |
| 11 | |
| 12 # cds_end definition? | |
| 13 | |
| 14 =head1 NAME | |
| 15 | |
| 16 Bio::Variation::SeqDiff - Container class for mutation/variant descriptions | |
| 17 | |
| 18 =head1 SYNOPSIS | |
| 19 | |
| 20 $seqDiff = Bio::Variation::SeqDiff->new ( | |
| 21 -id => $M20132, | |
| 22 -alphabet => 'rna', | |
| 23 -gene_symbol => 'AR' | |
| 24 -chromosome => 'X', | |
| 25 -numbering => 'coding' | |
| 26 ); | |
| 27 # get a DNAMutation object somehow | |
| 28 $seqDiff->add_Variant($dnamut); | |
| 29 print $seqDiff->sys_name(), "\n"; | |
| 30 | |
| 31 =head1 DESCRIPTION | |
| 32 | |
| 33 SeqDiff stores Bio::Variation::VariantI object references and | |
| 34 descriptive information common to all changes in a sequence. Mutations | |
| 35 are understood to be any kind of sequence markers and are expected to | |
| 36 occur in the same chromosome. See L<Bio::Variation::VariantI> for details. | |
| 37 | |
| 38 The methods of SeqDiff are geared towards describing mutations in | |
| 39 human genes using gene-based coordinate system where 'A' of the | |
| 40 initiator codon has number 1 and the one before it -1. This is | |
| 41 according to conventions of human genetics. | |
| 42 | |
| 43 There will be class Bio::Variation::Genotype to describe markers in | |
| 44 different chromosomes and diploid genototypes. | |
| 45 | |
| 46 Classes implementing Bio::Variation::VariantI interface are | |
| 47 Bio::Variation::DNAMutation, Bio::Variation::RNAChange, and | |
| 48 Bio::Variation::AAChange. See L<Bio::Variation::VariantI>, | |
| 49 L<Bio::Variation::DNAMutation>, L<Bio::Variation::RNAChange>, and | |
| 50 L<Bio::Variation::AAChange> for more information. | |
| 51 | |
| 52 Variant objects can be added using two ways: an array passed to the | |
| 53 constructor or as individual Variant objects with add_Variant | |
| 54 method. | |
| 55 | |
| 56 | |
| 57 =head1 FEEDBACK | |
| 58 | |
| 59 =head2 Mailing Lists | |
| 60 | |
| 61 User feedback is an integral part of the evolution of this and other | |
| 62 Bioperl modules. Send your comments and suggestions preferably to the | |
| 63 Bioperl mailing lists Your participation is much appreciated. | |
| 64 | |
| 65 bioperl-l@bioperl.org - General discussion | |
| 66 http://bio.perl.org/MailList.html - About the mailing lists | |
| 67 | |
| 68 =head2 Reporting Bugs | |
| 69 | |
| 70 report bugs to the Bioperl bug tracking system to help us keep track | |
| 71 the bugs and their resolution. Bug reports can be submitted via | |
| 72 email or the web: | |
| 73 | |
| 74 bioperl-bugs@bio.perl.org | |
| 75 http://bugzilla.bioperl.org/ | |
| 76 | |
| 77 =head1 AUTHOR - Heikki Lehvaslaiho | |
| 78 | |
| 79 Email: heikki@ebi.ac.uk | |
| 80 Address: | |
| 81 | |
| 82 EMBL Outstation, European Bioinformatics Institute | |
| 83 Wellcome Trust Genome Campus, Hinxton | |
| 84 Cambs. CB10 1SD, United Kingdom | |
| 85 | |
| 86 =head1 CONTRIBUTORS | |
| 87 | |
| 88 Eckhard Lehmann, ecky@e-lehmann.de | |
| 89 | |
| 90 =head1 APPENDIX | |
| 91 | |
| 92 The rest of the documentation details each of the object | |
| 93 methods. Internal methods are usually preceded with a _ | |
| 94 | |
| 95 =cut | |
| 96 | |
| 97 # Let the code begin... | |
| 98 | |
| 99 package Bio::Variation::SeqDiff; | |
| 100 my $VERSION=1.0; | |
| 101 | |
| 102 use strict; | |
| 103 use vars qw($VERSION @ISA); | |
| 104 use Bio::Root::Root; | |
| 105 use Bio::Tools::CodonTable; | |
| 106 use Bio::PrimarySeq; | |
| 107 | |
| 108 @ISA = qw( Bio::Root::Root ); | |
| 109 | |
| 110 | |
| 111 =head2 new | |
| 112 | |
| 113 Title : new | |
| 114 Usage : $seqDiff = Bio::Variation::SeqDiff->new; | |
| 115 Function: generates a new Bio::Variation::SeqDiff | |
| 116 Returns : reference to a new object of class SeqDiff | |
| 117 Args : | |
| 118 | |
| 119 =cut | |
| 120 | |
| 121 sub new { | |
| 122 my($class,@args) = @_; | |
| 123 my $self = $class->SUPER::new(@args); | |
| 124 | |
| 125 my($id, $sysname, $trivname, $chr, $gene_symbol, | |
| 126 $desc, $alphabet, $numbering, $offset, $rna_offset, $rna_id, $cds_end, | |
| 127 $dna_ori, $dna_mut, $rna_ori, $rna_mut, $aa_ori, $aa_mut | |
| 128 #@variants, @genes | |
| 129 ) = | |
| 130 $self->_rearrange([qw(ID | |
| 131 SYSNAME | |
| 132 TRIVNAME | |
| 133 CHR | |
| 134 GENE_SYMBOL | |
| 135 DESC | |
| 136 ALPHABET | |
| 137 NUMBERING | |
| 138 OFFSET | |
| 139 RNA_OFFSET | |
| 140 RNA_ID | |
| 141 CDS_END | |
| 142 DNA_ORI | |
| 143 DNA_MUT | |
| 144 RNA_ORI | |
| 145 AA_ORI | |
| 146 AA_MUT | |
| 147 )], | |
| 148 @args); | |
| 149 | |
| 150 #my $make = $self->SUPER::_initialize(@args); | |
| 151 | |
| 152 $id && $self->id($id); | |
| 153 $sysname && $self->sysname($sysname); | |
| 154 $trivname && $self->trivname($trivname); | |
| 155 $chr && $self->chromosome($chr); | |
| 156 $gene_symbol && $self->gene_symbol($chr); | |
| 157 $desc && $self->description($desc); | |
| 158 $alphabet && $self->alphabet($alphabet); | |
| 159 $numbering && $self->numbering($numbering); | |
| 160 $offset && $self->offset($offset); | |
| 161 $rna_offset && $self->rna_offset($rna_offset); | |
| 162 $rna_id && $self->rna_id($rna_id); | |
| 163 $cds_end && $self->cds_end($cds_end); | |
| 164 | |
| 165 $dna_ori && $self->dna_ori($dna_ori); | |
| 166 $dna_mut && $self->dna_mut($dna_mut); | |
| 167 $rna_ori && $self->rna_ori($rna_ori); | |
| 168 $rna_mut && $self->rna_mut($rna_mut); | |
| 169 $aa_ori && $self->aa_ori ($aa_ori); | |
| 170 $aa_mut && $self->aa_mut ($aa_mut); | |
| 171 | |
| 172 $self->{ 'variants' } = []; | |
| 173 #@variants && push(@{$self->{'variants'}},@variants); | |
| 174 | |
| 175 $self->{ 'genes' } = []; | |
| 176 #@genes && push(@{$self->{'genes'}},@genes); | |
| 177 | |
| 178 return $self; # success - we hope! | |
| 179 } | |
| 180 | |
| 181 | |
| 182 =head2 id | |
| 183 | |
| 184 Title : id | |
| 185 Usage : $obj->id(H0001); $id = $obj->id(); | |
| 186 Function: | |
| 187 | |
| 188 Sets or returns the id of the seqDiff. | |
| 189 Should be used to give the collection of variants a UID | |
| 190 without semantic associations. | |
| 191 | |
| 192 Example : | |
| 193 Returns : value of id, a scalar | |
| 194 Args : newvalue (optional) | |
| 195 | |
| 196 =cut | |
| 197 | |
| 198 | |
| 199 sub id { | |
| 200 my ($self,$value) = @_; | |
| 201 if (defined $value) { | |
| 202 $self->{'id'} = $value; | |
| 203 } | |
| 204 # unless (exists $self->{'id'}) { | |
| 205 # return "undefined"; | |
| 206 # } | |
| 207 else { | |
| 208 return $self->{'id'}; | |
| 209 } | |
| 210 } | |
| 211 | |
| 212 | |
| 213 =head2 sysname | |
| 214 | |
| 215 Title : sysname | |
| 216 Usage : $obj->sysname('5C>G'); $sysname = $obj->sysname(); | |
| 217 Function: | |
| 218 | |
| 219 Sets or returns the systematic name of the seqDiff. The | |
| 220 name should follow the HUGO Mutation Database Initiative | |
| 221 approved nomenclature. If called without first setting the | |
| 222 value, will generate it from L<Bio::Variation::DNAMutation> | |
| 223 objects attached. | |
| 224 | |
| 225 Example : | |
| 226 Returns : value of sysname, a scalar | |
| 227 Args : newvalue (optional) | |
| 228 | |
| 229 =cut | |
| 230 | |
| 231 | |
| 232 sub sysname { | |
| 233 my ($self,$value) = @_; | |
| 234 if (defined $value) { | |
| 235 $self->{'sysname'} = $value; | |
| 236 } | |
| 237 elsif (not defined $self->{'sysname'}) { | |
| 238 | |
| 239 my $sysname = ''; | |
| 240 my $c = 0; | |
| 241 foreach my $mut ($self->each_Variant) { | |
| 242 if( $mut->isa('Bio::Variation::DNAMutation') ) { | |
| 243 $c++; | |
| 244 if ($c == 1 ) { | |
| 245 $sysname = $mut->sysname ; | |
| 246 } | |
| 247 else { | |
| 248 $sysname .= ";". $mut->sysname; | |
| 249 } | |
| 250 } | |
| 251 } | |
| 252 $sysname = "[". $sysname. "]" if $c > 1; | |
| 253 $self->{'sysname'} = $sysname; | |
| 254 } | |
| 255 return $self->{'sysname'}; | |
| 256 } | |
| 257 | |
| 258 | |
| 259 =head2 trivname | |
| 260 | |
| 261 Title : trivname | |
| 262 Usage : $obj->trivname('[A2G;T56G]'); $trivname = $obj->trivname(); | |
| 263 Function: | |
| 264 | |
| 265 Sets or returns the trivial name of the seqDiff. | |
| 266 The name should follow the HUGO Mutation Database Initiative | |
| 267 approved nomenclature. If called without first setting the | |
| 268 value, will generate it from L<Bio::Variation::AAChange> | |
| 269 objects attached. | |
| 270 | |
| 271 Example : | |
| 272 Returns : value of trivname, a scalar | |
| 273 Args : newvalue (optional) | |
| 274 | |
| 275 =cut | |
| 276 | |
| 277 | |
| 278 sub trivname { | |
| 279 my ($self,$value) = @_; | |
| 280 if (defined $value) { | |
| 281 $self->{'trivname'} = $value; | |
| 282 } | |
| 283 elsif (not defined $self->{'trivname'}) { | |
| 284 | |
| 285 my $trivname = ''; | |
| 286 my $c = 0; | |
| 287 foreach my $mut ($self->each_Variant) { | |
| 288 if( $mut->isa('Bio::Variation::AAChange') ) { | |
| 289 $c++; | |
| 290 if ($c == 1 ) { | |
| 291 $trivname = $mut->trivname ; | |
| 292 } | |
| 293 else { | |
| 294 $trivname .= ";". $mut->trivname; | |
| 295 } | |
| 296 } | |
| 297 } | |
| 298 $trivname = "[". $trivname. "]" if $c > 1; | |
| 299 $self->{'trivname'} = $trivname; | |
| 300 } | |
| 301 | |
| 302 else { | |
| 303 return $self->{'trivname'}; | |
| 304 } | |
| 305 } | |
| 306 | |
| 307 | |
| 308 =head2 chromosome | |
| 309 | |
| 310 Title : chromosome | |
| 311 Usage : $obj->chromosome('X'); $chromosome = $obj->chromosome(); | |
| 312 Function: | |
| 313 | |
| 314 Sets or returns the chromosome ("linkage group") of the seqDiff. | |
| 315 | |
| 316 Example : | |
| 317 Returns : value of chromosome, a scalar | |
| 318 Args : newvalue (optional) | |
| 319 | |
| 320 =cut | |
| 321 | |
| 322 | |
| 323 sub chromosome { | |
| 324 my ($self,$value) = @_; | |
| 325 if (defined $value) { | |
| 326 $self->{'chromosome'} = $value; | |
| 327 } | |
| 328 else { | |
| 329 return $self->{'chromosome'}; | |
| 330 } | |
| 331 } | |
| 332 | |
| 333 | |
| 334 =head2 gene_symbol | |
| 335 | |
| 336 Title : gene_symbol | |
| 337 Usage : $obj->gene_symbol('FOS'); $gene_symbol = $obj->gene_symbol; | |
| 338 Function: | |
| 339 | |
| 340 Sets or returns the gene symbol for the studied CDS. | |
| 341 | |
| 342 Example : | |
| 343 Returns : value of gene_symbol, a scalar | |
| 344 Args : newvalue (optional) | |
| 345 | |
| 346 =cut | |
| 347 | |
| 348 | |
| 349 sub gene_symbol { | |
| 350 my ($self,$value) = @_; | |
| 351 if (defined $value) { | |
| 352 $self->{'gene_symbol'} = $value; | |
| 353 } | |
| 354 else { | |
| 355 return $self->{'gene_symbol'}; | |
| 356 } | |
| 357 } | |
| 358 | |
| 359 | |
| 360 | |
| 361 =head2 description | |
| 362 | |
| 363 Title : description | |
| 364 Usage : $obj->description('short description'); $descr = $obj->description(); | |
| 365 Function: | |
| 366 | |
| 367 Sets or returns the short description of the seqDiff. | |
| 368 | |
| 369 Example : | |
| 370 Returns : value of description, a scalar | |
| 371 Args : newvalue (optional) | |
| 372 | |
| 373 =cut | |
| 374 | |
| 375 | |
| 376 sub description { | |
| 377 my ($self,$value) = @_; | |
| 378 if (defined $value) { | |
| 379 $self->{'description'} = $value; | |
| 380 } | |
| 381 else { | |
| 382 return $self->{'description'}; | |
| 383 } | |
| 384 } | |
| 385 | |
| 386 | |
| 387 =head2 alphabet | |
| 388 | |
| 389 Title : alphabet | |
| 390 Usage : if( $obj->alphabet eq 'dna' ) { /Do Something/ } | |
| 391 Function: Returns the type of primary reference sequence being one of | |
| 392 'dna', 'rna' or 'protein'. This is case sensitive. | |
| 393 | |
| 394 Returns : a string either 'dna','rna','protein'. | |
| 395 Args : none | |
| 396 | |
| 397 | |
| 398 =cut | |
| 399 | |
| 400 sub alphabet { | |
| 401 my ($self,$value) = @_; | |
| 402 my %type = (dna => 1, | |
| 403 rna => 1, | |
| 404 protein => 1); | |
| 405 if( defined $value ) { | |
| 406 if ($type{$value}) { | |
| 407 $self->{'alphabet'} = $value; | |
| 408 } else { | |
| 409 $self->throw("$value is not valid alphabet value!"); | |
| 410 } | |
| 411 } | |
| 412 return $self->{'alphabet'}; | |
| 413 } | |
| 414 | |
| 415 | |
| 416 =head2 numbering | |
| 417 | |
| 418 Title : numbering | |
| 419 Usage : $obj->numbering('coding'); $numbering = $obj->numbering(); | |
| 420 Function: | |
| 421 | |
| 422 Sets or returns the string giving the numbering schema used | |
| 423 to describe the variants. | |
| 424 | |
| 425 Example : | |
| 426 Returns : value of numbering, a scalar | |
| 427 Args : newvalue (optional) | |
| 428 | |
| 429 =cut | |
| 430 | |
| 431 | |
| 432 | |
| 433 sub numbering { | |
| 434 my ($self,$value) = @_; | |
| 435 if (defined $value) { | |
| 436 $self->{'numbering'} = $value; | |
| 437 } | |
| 438 else { | |
| 439 return $self->{'numbering'}; | |
| 440 } | |
| 441 } | |
| 442 | |
| 443 | |
| 444 =head2 offset | |
| 445 | |
| 446 Title : offset | |
| 447 Usage : $obj->offset(124); $offset = $obj->offset(); | |
| 448 Function: | |
| 449 | |
| 450 Sets or returns the offset from the beginning of the DNA sequence | |
| 451 to the coordinate start used to describe variants. Typically | |
| 452 the beginning of the coding region of the gene. | |
| 453 The cds_start should be 1 + offset. | |
| 454 | |
| 455 Example : | |
| 456 Returns : value of offset, a scalar | |
| 457 Args : newvalue (optional) | |
| 458 | |
| 459 =cut | |
| 460 | |
| 461 | |
| 462 | |
| 463 sub offset { | |
| 464 my ($self,$value) = @_; | |
| 465 if (defined $value) { | |
| 466 $self->{'offset'} = $value; | |
| 467 } | |
| 468 elsif (not defined $self->{'offset'} ) { | |
| 469 return $self->{'offset'} = 0; | |
| 470 } | |
| 471 else { | |
| 472 return $self->{'offset'}; | |
| 473 } | |
| 474 } | |
| 475 | |
| 476 | |
| 477 =head2 cds_start | |
| 478 | |
| 479 Title : cds_start | |
| 480 Usage : $obj->cds_start(123); $cds_start = $obj->cds_start(); | |
| 481 Function: | |
| 482 | |
| 483 Sets or returns the cds_start from the beginning of the DNA | |
| 484 sequence to the coordinate start used to describe | |
| 485 variants. Typically the beginning of the coding region of | |
| 486 the gene. Needs to be and is implemented as 1 + offset. | |
| 487 | |
| 488 Example : | |
| 489 Returns : value of cds_start, a scalar | |
| 490 Args : newvalue (optional) | |
| 491 | |
| 492 =cut | |
| 493 | |
| 494 | |
| 495 | |
| 496 sub cds_start { | |
| 497 my ($self,$value) = @_; | |
| 498 if (defined $value) { | |
| 499 $self->{'offset'} = $value - 1; | |
| 500 } | |
| 501 else { | |
| 502 return $self->{'offset'} + 1; | |
| 503 } | |
| 504 } | |
| 505 | |
| 506 | |
| 507 =head2 cds_end | |
| 508 | |
| 509 Title : cds_end | |
| 510 Usage : $obj->cds_end(321); $cds_end = $obj->cds_end(); | |
| 511 Function: | |
| 512 | |
| 513 Sets or returns the position of the last nucleotitide of the | |
| 514 termination codon. The coordinate system starts from cds_start. | |
| 515 | |
| 516 Example : | |
| 517 Returns : value of cds_end, a scalar | |
| 518 Args : newvalue (optional) | |
| 519 | |
| 520 =cut | |
| 521 | |
| 522 | |
| 523 | |
| 524 sub cds_end { | |
| 525 my ($self,$value) = @_; | |
| 526 if (defined $value) { | |
| 527 $self->{'cds_end'} = $value; | |
| 528 } | |
| 529 else { | |
| 530 return $self->{'cds_end'}; | |
| 531 #$self->{'cds_end'} = CORE::length($self->SeqDiff->rna_ori)/3; | |
| 532 } | |
| 533 } | |
| 534 | |
| 535 | |
| 536 =head2 rna_offset | |
| 537 | |
| 538 Title : rna_offset | |
| 539 Usage : $obj->rna_offset(124); $rna_offset = $obj->rna_offset(); | |
| 540 Function: | |
| 541 | |
| 542 Sets or returns the rna_offset from the beginning of the RNA sequence | |
| 543 to the coordinate start used to describe variants. Typically | |
| 544 the beginning of the coding region of the gene. | |
| 545 | |
| 546 Example : | |
| 547 Returns : value of rna_offset, a scalar | |
| 548 Args : newvalue (optional) | |
| 549 | |
| 550 =cut | |
| 551 | |
| 552 | |
| 553 | |
| 554 sub rna_offset { | |
| 555 my ($self,$value) = @_; | |
| 556 if (defined $value) { | |
| 557 $self->{'rna_offset'} = $value; | |
| 558 } | |
| 559 elsif (not defined $self->{'rna_offset'} ) { | |
| 560 return $self->{'rna_offset'} = 0; | |
| 561 } | |
| 562 else { | |
| 563 return $self->{'rna_offset'}; | |
| 564 } | |
| 565 } | |
| 566 | |
| 567 | |
| 568 =head2 rna_id | |
| 569 | |
| 570 Title : rna_id | |
| 571 Usage : $obj->rna_id('transcript#3'); $rna_id = $obj->rna_id(); | |
| 572 Function: | |
| 573 | |
| 574 Sets or returns the ID for original RNA sequence of the seqDiff. | |
| 575 | |
| 576 Example : | |
| 577 Returns : value of rna_id, a scalar | |
| 578 Args : newvalue (optional) | |
| 579 | |
| 580 =cut | |
| 581 | |
| 582 | |
| 583 sub rna_id { | |
| 584 my ($self,$value) = @_; | |
| 585 if (defined $value) { | |
| 586 $self->{'rna_id'} = $value; | |
| 587 } | |
| 588 else { | |
| 589 return $self->{'rna_id'}; | |
| 590 } | |
| 591 } | |
| 592 | |
| 593 | |
| 594 | |
| 595 =head2 add_Variant | |
| 596 | |
| 597 Title : add_Variant | |
| 598 Usage : $obj->add_Variant($variant) | |
| 599 Function: | |
| 600 | |
| 601 Pushes one Bio::Variation::Variant into the list of variants. | |
| 602 At the same time, creates a link from the Variant to SeqDiff | |
| 603 using its SeqDiff method. | |
| 604 | |
| 605 Example : | |
| 606 Returns : 1 when succeeds, 0 for failure. | |
| 607 Args : Variant object | |
| 608 | |
| 609 =cut | |
| 610 | |
| 611 sub add_Variant { | |
| 612 my ($self,$value) = @_; | |
| 613 if (defined $value) { | |
| 614 if( ! $value->isa('Bio::Variation::VariantI') ) { | |
| 615 $self->throw("Is not a VariantI complying object but a [$self]"); | |
| 616 return 0; | |
| 617 } | |
| 618 else { | |
| 619 push(@{$self->{'variants'}},$value); | |
| 620 $value->SeqDiff($self); | |
| 621 return 1; | |
| 622 } | |
| 623 } | |
| 624 else { | |
| 625 return 0; | |
| 626 } | |
| 627 } | |
| 628 | |
| 629 | |
| 630 =head2 each_Variant | |
| 631 | |
| 632 Title : each_Variant | |
| 633 Usage : $obj->each_Variant(); | |
| 634 Function: | |
| 635 | |
| 636 Returns a list of Variants. | |
| 637 | |
| 638 Example : | |
| 639 Returns : list of Variants | |
| 640 Args : none | |
| 641 | |
| 642 =cut | |
| 643 | |
| 644 sub each_Variant{ | |
| 645 my ($self,@args) = @_; | |
| 646 | |
| 647 return @{$self->{'variants'}}; | |
| 648 } | |
| 649 | |
| 650 | |
| 651 | |
| 652 =head2 add_Gene | |
| 653 | |
| 654 Title : add_Gene | |
| 655 Usage : $obj->add_Gene($gene) | |
| 656 Function: | |
| 657 | |
| 658 Pushes one L<Bio::LiveSeq::Gene> into the list of genes. | |
| 659 | |
| 660 Example : | |
| 661 Returns : 1 when succeeds, 0 for failure. | |
| 662 Args : Bio::LiveSeq::Gene object | |
| 663 | |
| 664 See L<Bio::LiveSeq::Gene> for more information. | |
| 665 | |
| 666 =cut | |
| 667 | |
| 668 | |
| 669 sub add_Gene { | |
| 670 my ($self,$value) = @_; | |
| 671 if (defined $value) { | |
| 672 if( ! $value->isa('Bio::LiveSeq::Gene') ) { | |
| 673 $value->throw("Is not a Bio::LiveSeq::Gene object but a [$value]"); | |
| 674 return 0; | |
| 675 } | |
| 676 else { | |
| 677 push(@{$self->{'genes'}},$value); | |
| 678 return 1; | |
| 679 } | |
| 680 } | |
| 681 else { | |
| 682 return 0; | |
| 683 } | |
| 684 } | |
| 685 | |
| 686 | |
| 687 =head2 each_Gene | |
| 688 | |
| 689 Title : each_Gene | |
| 690 Usage : $obj->each_Gene(); | |
| 691 Function: | |
| 692 | |
| 693 Returns a list of L<Bio::LiveSeq::Gene>s. | |
| 694 | |
| 695 Example : | |
| 696 Returns : list of Genes | |
| 697 Args : none | |
| 698 | |
| 699 =cut | |
| 700 | |
| 701 sub each_Gene{ | |
| 702 my ($self,@args) = @_; | |
| 703 | |
| 704 return @{$self->{'genes'}}; | |
| 705 } | |
| 706 | |
| 707 | |
| 708 =head2 dna_ori | |
| 709 | |
| 710 Title : dna_ori | |
| 711 Usage : $obj->dna_ori('atgctgctgctgct'); $dna_ori = $obj->dna_ori(); | |
| 712 Function: | |
| 713 | |
| 714 Sets or returns the original DNA sequence string of the seqDiff. | |
| 715 | |
| 716 Example : | |
| 717 Returns : value of dna_ori, a scalar | |
| 718 Args : newvalue (optional) | |
| 719 | |
| 720 =cut | |
| 721 | |
| 722 | |
| 723 sub dna_ori { | |
| 724 my ($self,$value) = @_; | |
| 725 if (defined $value) { | |
| 726 $self->{'dna_ori'} = $value; | |
| 727 } | |
| 728 else { | |
| 729 return $self->{'dna_ori'}; | |
| 730 } | |
| 731 } | |
| 732 | |
| 733 | |
| 734 =head2 dna_mut | |
| 735 | |
| 736 Title : dna_mut | |
| 737 Usage : $obj->dna_mut('atgctggtgctgct'); $dna_mut = $obj->dna_mut(); | |
| 738 Function: | |
| 739 | |
| 740 Sets or returns the mutated DNA sequence of the seqDiff. | |
| 741 If sequence has not been set generates it from the | |
| 742 original sequence and DNA mutations. | |
| 743 | |
| 744 Example : | |
| 745 Returns : value of dna_mut, a scalar | |
| 746 Args : newvalue (optional) | |
| 747 | |
| 748 =cut | |
| 749 | |
| 750 | |
| 751 sub dna_mut { | |
| 752 my ($self,$value) = @_; | |
| 753 if (defined $value) { | |
| 754 $self->{'dna_mut'} = $value; | |
| 755 } | |
| 756 else { | |
| 757 $self->_set_dnamut() unless $self->{'dna_mut'}; | |
| 758 return $self->{'dna_mut'}; | |
| 759 } | |
| 760 } | |
| 761 | |
| 762 sub _set_dnamut { | |
| 763 my $self = shift; | |
| 764 | |
| 765 return undef unless $self->{'dna_ori'} && $self->each_Variant; | |
| 766 | |
| 767 $self->{'dna_mut'} = $self->{'dna_ori'}; | |
| 768 foreach ($self->each_Variant) { | |
| 769 next unless $_->isa('Bio::Variation::DNAMutation'); | |
| 770 next unless $_->isMutation; | |
| 771 | |
| 772 my ($s, $la, $le); | |
| 773 #lies the mutation less than 25 bases after the start of sequence? | |
| 774 if ($_->start < 25) { | |
| 775 $s = 0; $la = $_->start - 1; | |
| 776 } else { | |
| 777 $s = $_->start - 25; $la = 25; | |
| 778 } | |
| 779 | |
| 780 #is the mutation an insertion? | |
| 781 $_->end($_->start) unless $_->allele_ori->seq; | |
| 782 | |
| 783 #does the mutation end greater than 25 bases before the end of | |
| 784 #sequence? | |
| 785 if (($_->end + 25) > length($self->{'dna_mut'})) { | |
| 786 $le = length($self->{'dna_mut'}) - $_->end; | |
| 787 } else { | |
| 788 $le = 25; | |
| 789 } | |
| 790 | |
| 791 $_->dnStreamSeq(substr($self->{'dna_mut'}, $s, $la)); | |
| 792 $_->upStreamSeq(substr($self->{'dna_mut'}, $_->end, $le)); | |
| 793 | |
| 794 my $s_ori = $_->dnStreamSeq . $_->allele_ori->seq . $_->upStreamSeq; | |
| 795 my $s_mut = $_->dnStreamSeq . $_->allele_mut->seq . $_->upStreamSeq; | |
| 796 | |
| 797 (my $str = $self->{'dna_mut'}) =~ s/$s_ori/$s_mut/; | |
| 798 $self->{'dna_mut'} = $str; | |
| 799 } | |
| 800 } | |
| 801 | |
| 802 | |
| 803 =head2 rna_ori | |
| 804 | |
| 805 Title : rna_ori | |
| 806 Usage : $obj->rna_ori('atgctgctgctgct'); $rna_ori = $obj->rna_ori(); | |
| 807 Function: | |
| 808 | |
| 809 Sets or returns the original RNA sequence of the seqDiff. | |
| 810 | |
| 811 Example : | |
| 812 Returns : value of rna_ori, a scalar | |
| 813 Args : newvalue (optional) | |
| 814 | |
| 815 =cut | |
| 816 | |
| 817 | |
| 818 sub rna_ori { | |
| 819 my ($self,$value) = @_; | |
| 820 if (defined $value) { | |
| 821 $self->{'rna_ori'} = $value; | |
| 822 } | |
| 823 else { | |
| 824 return $self->{'rna_ori'}; | |
| 825 } | |
| 826 } | |
| 827 | |
| 828 | |
| 829 =head2 rna_mut | |
| 830 | |
| 831 Title : rna_mut | |
| 832 Usage : $obj->rna_mut('atgctggtgctgct'); $rna_mut = $obj->rna_mut(); | |
| 833 Function: | |
| 834 | |
| 835 Sets or returns the mutated RNA sequence of the seqDiff. | |
| 836 | |
| 837 Example : | |
| 838 Returns : value of rna_mut, a scalar | |
| 839 Args : newvalue (optional) | |
| 840 | |
| 841 =cut | |
| 842 | |
| 843 | |
| 844 sub rna_mut { | |
| 845 my ($self,$value) = @_; | |
| 846 if (defined $value) { | |
| 847 $self->{'rna_mut'} = $value; | |
| 848 } | |
| 849 else { | |
| 850 return $self->{'rna_mut'}; | |
| 851 } | |
| 852 } | |
| 853 | |
| 854 | |
| 855 =head2 aa_ori | |
| 856 | |
| 857 Title : aa_ori | |
| 858 Usage : $obj->aa_ori('MAGVLL*'); $aa_ori = $obj->aa_ori(); | |
| 859 Function: | |
| 860 | |
| 861 Sets or returns the original protein sequence of the seqDiff. | |
| 862 | |
| 863 Example : | |
| 864 Returns : value of aa_ori, a scalar | |
| 865 Args : newvalue (optional) | |
| 866 | |
| 867 =cut | |
| 868 | |
| 869 | |
| 870 sub aa_ori { | |
| 871 my ($self,$value) = @_; | |
| 872 if (defined $value) { | |
| 873 $self->{'aa_ori'} = $value; | |
| 874 } | |
| 875 else { | |
| 876 return $self->{'aa_ori'}; | |
| 877 } | |
| 878 } | |
| 879 | |
| 880 | |
| 881 =head2 aa_mut | |
| 882 | |
| 883 Title : aa_mut | |
| 884 Usage : $obj->aa_mut('MA*'); $aa_mut = $obj->aa_mut(); | |
| 885 Function: | |
| 886 | |
| 887 Sets or returns the mutated protein sequence of the seqDiff. | |
| 888 | |
| 889 Example : | |
| 890 Returns : value of aa_mut, a scalar | |
| 891 Args : newvalue (optional) | |
| 892 | |
| 893 =cut | |
| 894 | |
| 895 | |
| 896 sub aa_mut { | |
| 897 my ($self,$value) = @_; | |
| 898 if (defined $value) { | |
| 899 $self->{'aa_mut'} = $value; | |
| 900 } | |
| 901 else { | |
| 902 return $self->{'aa_mut'}; | |
| 903 } | |
| 904 } | |
| 905 | |
| 906 | |
| 907 =head2 seqobj | |
| 908 | |
| 909 Title : seqobj | |
| 910 Usage : $dnaobj = $obj->seqobj('dna_mut'); | |
| 911 Function: | |
| 912 | |
| 913 Returns the any original or mutated sequences as a | |
| 914 Bio::PrimarySeq object. | |
| 915 | |
| 916 Example : | |
| 917 Returns : Bio::PrimarySeq object for the requested sequence | |
| 918 Args : string, method name for the sequence requested | |
| 919 | |
| 920 See L<Bio::PrimarySeq> for more information. | |
| 921 | |
| 922 =cut | |
| 923 | |
| 924 sub seqobj { | |
| 925 my ($self,$value) = @_; | |
| 926 my $out; | |
| 927 my %valid_obj = | |
| 928 map {$_, 1} qw(dna_ori rna_ori aa_ori dna_mut rna_mut aa_mut); | |
| 929 $valid_obj{$value} || | |
| 930 $self->throw("Sequence type '$value' is not a valid type (". | |
| 931 join(',', map "'$_'", sort keys %valid_obj) .") lowercase"); | |
| 932 my ($alphabet) = $value =~ /([^_]+)/; | |
| 933 my $id = $self->id; | |
| 934 $id = $self->rna_id if $self->rna_id; | |
| 935 $alphabet = 'protein' if $alphabet eq 'aa'; | |
| 936 $out = Bio::PrimarySeq->new | |
| 937 ( '-seq' => $self->{$value}, | |
| 938 '-display_id' => $id, | |
| 939 '-accession_number' => $self->id, | |
| 940 '-alphabet' => $alphabet | |
| 941 ) if $self->{$value} ; | |
| 942 return $out; | |
| 943 } | |
| 944 | |
| 945 =head2 alignment | |
| 946 | |
| 947 Title : alignment | |
| 948 Usage : $obj->alignment | |
| 949 Function: | |
| 950 | |
| 951 Returns a pretty RNA/AA sequence alignment from linked | |
| 952 objects. Under construction: Only simple coding region | |
| 953 point mutations work. | |
| 954 | |
| 955 Example : | |
| 956 Returns : | |
| 957 Args : none | |
| 958 | |
| 959 =cut | |
| 960 | |
| 961 | |
| 962 sub alignment { | |
| 963 my $self = shift; | |
| 964 my (@entry, $text); | |
| 965 | |
| 966 my $maxflanklen = 12; | |
| 967 | |
| 968 foreach my $mut ($self->each_Variant) { | |
| 969 if( $mut->isa('Bio::Variation::RNAChange') ) { | |
| 970 | |
| 971 my $upflank = $mut->upStreamSeq; | |
| 972 my $dnflank = $mut->dnStreamSeq; | |
| 973 my $cposd = $mut->codon_pos; | |
| 974 my $rori = $mut->allele_ori->seq; | |
| 975 my $rmut = $mut->allele_mut->seq; | |
| 976 my $rseqoriu = ''; | |
| 977 my $rseqmutu = ''; | |
| 978 my $rseqorid = ''; | |
| 979 my $rseqmutd = ''; | |
| 980 my $aaseqmutu = ''; | |
| 981 my (@rseqori, @rseqmut ); | |
| 982 | |
| 983 # point | |
| 984 if ($mut->DNAMutation->label =~ /point/) { | |
| 985 if ($cposd == 1 ) { | |
| 986 my $nt2d = substr($dnflank, 0, 2); | |
| 987 push @rseqori, $rori. $nt2d; | |
| 988 push @rseqmut, uc ($rmut). $nt2d; | |
| 989 $dnflank = substr($dnflank, 2); | |
| 990 } | |
| 991 elsif ($cposd == 2) { | |
| 992 my $ntu = chop $upflank; | |
| 993 my $ntd = substr($dnflank, 0, 1); | |
| 994 push @rseqori, $ntu. $rori. $ntd; | |
| 995 push @rseqmut, $ntu. uc ($rmut). $ntd; | |
| 996 $dnflank = substr($dnflank, 1); | |
| 997 } | |
| 998 elsif ($cposd == 3) { | |
| 999 my $ntu1 = chop $upflank; | |
| 1000 my $ntu2 = chop $upflank; | |
| 1001 push (@rseqori, $ntu2. $ntu1. $rori); | |
| 1002 push (@rseqmut, $ntu2. $ntu1. uc $rmut); | |
| 1003 } | |
| 1004 } | |
| 1005 #deletion | |
| 1006 elsif ($mut->DNAMutation->label =~ /deletion/) { | |
| 1007 if ($cposd == 2 ) { | |
| 1008 $rseqorid = chop $upflank; | |
| 1009 $rseqmutd = $rseqorid; | |
| 1010 } | |
| 1011 for (my $i=1; $i<=$mut->length; $i++) { | |
| 1012 my $ntd .= substr($mut->allele_ori, $i-1, 1); | |
| 1013 $rseqorid .= $ntd; | |
| 1014 if (length($rseqorid) == 3 ) { | |
| 1015 push (@rseqori, $rseqorid); | |
| 1016 push (@rseqmut, " "); | |
| 1017 $rseqorid = ''; | |
| 1018 } | |
| 1019 } | |
| 1020 | |
| 1021 if ($rseqorid) { | |
| 1022 $rseqorid .= substr($dnflank, 0, 3-$rseqorid); | |
| 1023 push (@rseqori, $rseqorid); | |
| 1024 push (@rseqmut, " "); | |
| 1025 $dnflank = substr($dnflank,3-$rseqorid); | |
| 1026 } | |
| 1027 } | |
| 1028 $upflank = reverse $upflank; | |
| 1029 # loop throught the flanks | |
| 1030 for (my $i=1; $i<=length($dnflank); $i++) { | |
| 1031 | |
| 1032 last if $i > $maxflanklen; | |
| 1033 | |
| 1034 my $ntd .= substr($dnflank, $i-1, 1); | |
| 1035 my $ntu .= substr($upflank, $i-1, 1); | |
| 1036 | |
| 1037 $rseqmutd .= $ntd; | |
| 1038 $rseqorid .= $ntd; | |
| 1039 $rseqmutu = $ntu. $rseqmutu; | |
| 1040 $rseqoriu = $ntu. $rseqoriu; | |
| 1041 | |
| 1042 if (length($rseqorid) == 3 and length($rseqorid) == 3) { | |
| 1043 push (@rseqori, $rseqorid); | |
| 1044 push (@rseqmut, $rseqmutd); | |
| 1045 $rseqorid = $rseqmutd =''; | |
| 1046 } | |
| 1047 if (length($rseqoriu) == 3 and length($rseqoriu) == 3) { | |
| 1048 unshift (@rseqori, $rseqoriu); | |
| 1049 unshift (@rseqmut, $rseqmutu); | |
| 1050 $rseqoriu = $rseqmutu =''; | |
| 1051 } | |
| 1052 | |
| 1053 #print "|i=$i, $cposd, $rseqmutd, $rseqorid\n"; | |
| 1054 #print "|i=$i, $cposu, $rseqmutu, $rseqoriu\n\n"; | |
| 1055 | |
| 1056 } | |
| 1057 | |
| 1058 push (@rseqori, $rseqorid); | |
| 1059 unshift (@rseqori, $rseqoriu); | |
| 1060 push (@rseqmut, $rseqmutd); | |
| 1061 unshift (@rseqmut, $rseqmutu); | |
| 1062 | |
| 1063 return unless $mut->AAChange; | |
| 1064 #translate | |
| 1065 my $tr = new Bio::Tools::CodonTable ('-id' => $mut->codon_table); | |
| 1066 my $apos = $mut->AAChange->start; | |
| 1067 my $aposmax = CORE::length($self->aa_ori); #terminator codon no | |
| 1068 my $rseqori; | |
| 1069 my $rseqmut; | |
| 1070 my $aaseqori; | |
| 1071 my $aaseqmut = ""; | |
| 1072 for (my $i = 0; $i <= $#rseqori; $i++) { | |
| 1073 my $a = ''; | |
| 1074 | |
| 1075 $a = $tr->translate($rseqori[$i]) if length($rseqori[$i]) == 3; | |
| 1076 | |
| 1077 if (length($a) != 1 or | |
| 1078 $apos - ( $maxflanklen/2 -1) + $i < 1 or | |
| 1079 $apos - ( $maxflanklen/2 -1) + $i > $aposmax ) { | |
| 1080 $aaseqori .= " "; | |
| 1081 } else { | |
| 1082 $aaseqori .= " ". $a. " "; | |
| 1083 } | |
| 1084 my $b = ''; | |
| 1085 if (length($rseqmut[$i]) == 3) { | |
| 1086 if ($rseqmut[$i] eq ' ') { | |
| 1087 $b = "_"; | |
| 1088 } else { | |
| 1089 $b = $tr->translate($rseqmut[$i]); | |
| 1090 } | |
| 1091 } | |
| 1092 if (( $b ne $a and | |
| 1093 length($b) == 1 and | |
| 1094 $apos - ( $maxflanklen/2 -1) + $i >= 1 ) or | |
| 1095 ( $apos - ( $maxflanklen/2 -1) + $i >= $aposmax and | |
| 1096 $mut->label =~ 'termination') | |
| 1097 ) { | |
| 1098 $aaseqmut .= " ". $b. " "; | |
| 1099 } else { | |
| 1100 $aaseqmut .= " "; | |
| 1101 } | |
| 1102 | |
| 1103 if ($i == 0 and length($rseqori[$i]) != 3) { | |
| 1104 my $l = 3 - length($rseqori[$i]); | |
| 1105 $rseqori[$i] = (" " x $l). $rseqori[$i]; | |
| 1106 $rseqmut[$i] = (" " x $l). $rseqmut[$i]; | |
| 1107 } | |
| 1108 $rseqori .= $rseqori[$i]. " " if $rseqori[$i] ne ''; | |
| 1109 $rseqmut .= $rseqmut[$i]. " " if $rseqmut[$i] ne ''; | |
| 1110 } | |
| 1111 | |
| 1112 # collect the results | |
| 1113 push (@entry, | |
| 1114 "\n" | |
| 1115 ); | |
| 1116 $text = " ". $aaseqmut; | |
| 1117 push (@entry, | |
| 1118 $text | |
| 1119 ); | |
| 1120 $text = "Variant : ". $rseqmut; | |
| 1121 push (@entry, | |
| 1122 $text | |
| 1123 ); | |
| 1124 $text = "Reference: ". $rseqori; | |
| 1125 push (@entry, | |
| 1126 $text | |
| 1127 ); | |
| 1128 $text = " ". $aaseqori; | |
| 1129 push (@entry, | |
| 1130 $text | |
| 1131 ); | |
| 1132 push (@entry, | |
| 1133 "\n" | |
| 1134 ); | |
| 1135 } | |
| 1136 | |
| 1137 } | |
| 1138 | |
| 1139 my $res; | |
| 1140 foreach my $line (@entry) { | |
| 1141 $res .= "$line\n"; | |
| 1142 } | |
| 1143 return $res; | |
| 1144 } | |
| 1145 | |
| 1146 1; |
