Mercurial > repos > mahtabm > ensembl
diff variant_effect_predictor/Bio/Variation/RNAChange.pm @ 0:1f6dce3d34e0
Uploaded
author | mahtabm |
---|---|
date | Thu, 11 Apr 2013 02:01:53 -0400 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/variant_effect_predictor/Bio/Variation/RNAChange.pm Thu Apr 11 02:01:53 2013 -0400 @@ -0,0 +1,617 @@ +# $Id: RNAChange.pm,v 1.10 2002/10/22 07:38:49 lapp Exp $ +# +# BioPerl module for Bio::Variation::RNAChange +# +# Cared for by Heikki Lehvaslaiho <heikki@ebi.ac.uk> +# +# Copyright Heikki Lehvaslaiho +# +# You may distribute this module under the same terms as perl itself + +# POD documentation - main docs before the code + +=head1 NAME + +Bio::Variation::RNAChange - Sequence change class for RNA level + +=head1 SYNOPSIS + + $rnachange = Bio::Variation::RNAChange->new + ('-start' => $start, + '-end' => $end, + '-length' => $len, + '-codon_pos' => $cp, + '-upStreamSeq' => $upflank, + '-dnStreamSeq' => $dnflank, + '-proof' => $proof, + '-isMutation' => 1, + '-mut_number' => $mut_number + ); + $a1 = Bio::Variation::Allele->new; + $a1->seq('a'); + $rnachange->allele_ori($a1); + my $a2 = Bio::Variation::Allele->new; + $a2->seq('t'); + $rnachange->add_Allele($a2); + $rnachange->allele_mut($a2); + + print "The codon change is ", $rnachange->codon_ori, + ">", $rnachange->codon_mut, "\n"; + + # add it to a SeqDiff container object + $seqdiff->add_Variant($rnachange); + + # and create links to and from DNA level mutation objects + $rnachange->DNAMutation($dnamut); + $dnamut->RNAChange($rnachange); + +=head1 DESCRIPTION + +The instantiable class Bio::Variation::DNAMutation describes basic +sequence changes at RNA molecule level. It uses methods defined in +superclass Bio::Variation::VariantI. See L<Bio::Variation::VariantI> +for details. + +You are normally expected to create a corresponding +Bio::Variation::DNAMutation object even if mutation is defined at +RNA level. The numbering follows then cDNA numbering. Link the +DNAMutation object to the RNAChange object using the method +DNAMutation(). If the variation described by a RNAChange object is +translated, link the corresponding Bio::Variation::AAChange object +to it using method AAChange(). See L<Bio::Variation::DNAMutation> and +L<Bio::Variation::AAChange> for more information. + + +=head1 FEEDBACK + +=head2 Mailing Lists + +User feedback is an integral part of the evolution of this and other +Bioperl modules. Send your comments and suggestions preferably to the +Bioperl mailing lists Your participation is much appreciated. + + bioperl-l@bioperl.org - General discussion + http://bio.perl.org/MailList.html - About the mailing lists + +=head2 Reporting Bugs + +report bugs to the Bioperl bug tracking system to help us keep track + the bugs and their resolution. Bug reports can be submitted via + email or the web: + + bioperl-bugs@bio.perl.org + http://bugzilla.bioperl.org/ + +=head1 AUTHOR - Heikki Lehvaslaiho + +Email: heikki@ebi.ac.uk +Address: + + EMBL Outstation, European Bioinformatics Institute + Wellcome Trust Genome Campus, Hinxton + Cambs. CB10 1SD, United Kingdom + +=head1 APPENDIX + +The rest of the documentation details each of the object +methods. Internal methods are usually preceded with a _ + +=cut + + +# Let the code begin... + + +package Bio::Variation::RNAChange; +use vars qw(@ISA); +use strict; + +# Object preamble - inheritance +my $VERSION=1.0; +use Bio::Variation::VariantI; +use Bio::Tools::CodonTable; + +@ISA = qw( Bio::Variation::VariantI ); + +sub new { + my($class,@args) = @_; + my $self = $class->SUPER::new(@args); + + my ($start, $end, $length, $strand, $primary, $source, + $frame, $score, $gff_string, + $allele_ori, $allele_mut, $upstreamseq, $dnstreamseq, + $label, $status, $proof, $region, $region_value, $region_dist, $numbering, + $mut_number, $isMutation, + $codon_ori, $codon_mut, $codon_pos, $codon_table, $cds_end) = + $self->_rearrange([qw(START + END + LENGTH + STRAND + PRIMARY + SOURCE + FRAME + SCORE + GFF_STRING + ALLELE_ORI + ALLELE_MUT + UPSTREAMSEQ + DNSTREAMSEQ + LABEL + STATUS + PROOF + REGION + REGION_VALUE + REGION_DIST + NUMBERING + MUT_NUMBER + ISMUTATION + CODON_ORI + CODON_MUT + CODON_POS + TRANSLATION_TABLE + CDS_END + )],@args); + + $self->primary_tag("Variation"); + + $self->{ 'alleles' } = []; + + $start && $self->start($start); + $end && $self->end($end); + $length && $self->length($length); + $strand && $self->strand($strand); + $primary && $self->primary_tag($primary); + $source && $self->source_tag($source); + $frame && $self->frame($frame); + $score && $self->score($score); + $gff_string && $self->_from_gff_string($gff_string); + + $allele_ori && $self->allele_ori($allele_ori); + $allele_mut && $self->allele_mut($allele_mut); + $upstreamseq && $self->upStreamSeq($upstreamseq); + $dnstreamseq && $self->dnStreamSeq($dnstreamseq); + + $label && $self->label($label); + $status && $self->status($status); + $proof && $self->proof($proof); + $region && $self->region($region); + $region_value && $self->region_value($region_value); + $region_dist && $self->region_dist($region_dist); + $numbering && $self->numbering($numbering); + $mut_number && $self->mut_number($mut_number); + $isMutation && $self->isMutation($isMutation); + + $codon_ori && $self->codon_ori($codon_ori); + $codon_mut && $self->codon_mut($codon_mut); + $codon_pos && $self->codon_pos($codon_pos); + $codon_table && $self->codon_table($codon_table); + $cds_end && $self->cds_end($cds_end); + return $self; # success - we hope! +} + + +=head2 codon_ori + + Title : codon_ori + Usage : $obj->codon_ori(); + Function: + + Sets and returns codon_ori triplet. If value is not set, + creates the codon triplet from the codon position and + flanking sequences. The string has to be three characters + long. The character content is not checked. + + Example : + Returns : string + Args : string + +=cut + +sub codon_ori { + my ($self,$value) = @_; + if (defined $value) { + if (length $value != 3) { + $self->warn("Codon string \"$value\" is not three characters long"); + } + $self->{'codon_ori'} = $value; + } + elsif (! $self->{'codon_ori'}) { + my $codon_ori = ''; + + if ($self->region eq 'coding' && $self->start && $self->start >= 1) { + + $self->warn('Codon position is not defined') + if not defined $self->codon_pos; + $self->warn('Upstream flanking sequence is not defined') + if not defined $self->upStreamSeq; + $self->warn('Downstream flanking sequence is not defined') + if not defined $self->dnStreamSeq; + + my $cpos = $self->codon_pos; + $codon_ori = substr($self->upStreamSeq, -$cpos +1 , $cpos-1); + $codon_ori .= substr($self->allele_ori->seq, 0, 4-$cpos) + if $self->allele_ori and $self->allele_ori->seq; + $codon_ori .= substr($self->dnStreamSeq, 0, 3-length($codon_ori)); + } + $self->{'codon_ori'} = lc $codon_ori; + } + return $self->{'codon_ori'}; +} + + +=head2 codon_mut + + Title : codon_mut + Usage : $obj->codon_mut(); + Function: + + Sets and returns codon_mut triplet. If value is not + set, creates the codon triplet from the codon position and + flanking sequences. Return undef for other than point mutations. + + Example : + Returns : string + Args : string + +=cut + + +sub codon_mut { + my ($self,$value) = @_; + if (defined $value) { + if (length $value != 3 ) { + $self->warn("Codon string \"$value\" is not three characters long"); + } + $self->{'codon_mut'} = $value; + } + else { + my $codon_mut = ''; + if ($self->allele_ori->seq and $self->allele_mut->seq and + CORE::length($self->allele_ori->seq) == 1 and + CORE::length($self->allele_mut->seq) == 1 and + $self->region eq 'coding' and $self->start >= 1) { + + $self->warn('Codon position is not defined') + if not defined $self->codon_pos; + $self->warn('Upstream flanking sequnce is not defined') + if not defined $self->upStreamSeq; + $self->warn('Downstream flanking sequnce is not defined') + if not defined $self->dnStreamSeq; + $self->throw('Mutated allele is not defined') + if not defined $self->allele_mut; + + my $cpos = $self->codon_pos; + $codon_mut = substr($self->upStreamSeq, -$cpos +1 , $cpos-1); + $codon_mut .= substr($self->allele_mut->seq, 0, 4-$cpos) + if $self->allele_mut and $self->allele_mut->seq; + $codon_mut .= substr($self->dnStreamSeq, 0, 3-length($codon_mut)); + + $self->{'codon_mut'} = lc $codon_mut; + } + } + return $self->{'codon_mut'}; +} + + +=head2 codon_pos + + Title : codon_pos + Usage : $obj->codon_pos(); + Function: + + Sets and returns the position of the mutation start in the + codon. If value is not set, returns false. + + Example : + Returns : 1,2,3 + Args : none if get, the new value if set + +=cut + + +sub codon_pos { + my ($self,$value) = @_; + if( defined $value) { + if ( $value !~ /[123]/ ) { + $self->throw("'$value' is not a valid codon position"); + } + $self->{'codon_pos'} = $value; + } + return $self->{'codon_pos'}; +} + + +=head2 codon_table + + Title : codon_table + Usage : $obj->codon_table(); + Function: + + Sets and returns the codon table id of the RNA + If value is not set, returns 1, 'universal' code, as the default. + + Example : + Returns : integer + Args : none if get, the new value if set + +=cut + + +sub codon_table { + my ($self,$value) = @_; + if( defined $value) { + if ( not $value =~ /^\d$/ ) { + $self->throw("'$value' is not a valid codon table ID\n". + "Has to be a positive integer. Defaulting to 1\n"); + } else { + $self->{'codon_table'} = $value; + } + } + if( ! exists $self->{'codon_table'} ) { + return 1; + } else { + return $self->{'codon_table'}; + } +} + + +=head2 DNAMutation + + Title : DNAMutation + Usage : $mutobj = $obj->DNAMutation; + : $mutobj = $obj->DNAMutation($objref); + Function: Returns or sets the link-reference to a mutation/change object. + If there is no link, it will return undef + Returns : an obj_ref or undef + +=cut + + +sub DNAMutation { + my ($self,$value) = @_; + if (defined $value) { + if( ! $value->isa('Bio::Variation::DNAMutation') ) { + $self->throw("Is not a Bio::Variation::DNAMutation object but a [$self]"); + return (undef); + } + else { + $self->{'DNAMutation'} = $value; + } + } + unless (exists $self->{'DNAMutation'}) { + return (undef); + } else { + return $self->{'DNAMutation'}; + } +} + + +=head2 AAChange + + Title : AAChange + Usage : $mutobj = $obj->AAChange; + : $mutobj = $obj->AAChange($objref); + Function: Returns or sets the link-reference to a mutation/change object. + If there is no link, it will return undef + Returns : an obj_ref or undef + +=cut + +sub AAChange { + my ($self,$value) = @_; + if (defined $value) { + if( ! $value->isa('Bio::Variation::AAChange') ) { + $self->throw("Is not a Bio::Variation::AAChange object but a [$self]"); + return (undef); + } + else { + $self->{'AAChange'} = $value; + } + } + unless (exists $self->{'AAChange'}) { + return (undef); + } else { + return $self->{'AAChange'}; + } +} + + +=head2 exons_modified + + Title : exons_modified + Usage : $modified = $obj->exons_modified; + : $modified = $obj->exons_modified(1); + Function: Returns or sets information (example: a simple boolean flag) about + the modification of exons as a result of a mutation. + +=cut + +sub exons_modified { + my ($self,$value)=@_; + if (defined($value)) { + $self->{'exons_modified'}=$value; + } + return ($self->{'exons_modified'}); +} + +=head2 region + + Title : region + Usage : $obj->region(); + Function: + + Sets and returns the name of the sequence region type or + protein domain at this location. If value is not set, + returns false. + + Example : + Returns : string + Args : string + +=cut + + + +sub region { + my ($self,$value) = @_; + if( defined $value) { + $self->{'region'} = $value; + } + elsif (not defined $self->{'region'}) { + + $self->warn('Mutation start position is not defined') + if not defined $self->start and $self->verbose; + $self->warn('Mutation end position is not defined') + if not defined $self->end and $self->verbose; + $self->warn('Length of the CDS is not defined, the mutation can be beyond coding region!') + if not defined $self->cds_end and $self->verbose; + + $self->region('coding'); + if ($self->end && $self->end < 0 ){ + $self->region('5\'UTR'); + } + elsif ($self->start && $self->cds_end && $self->start > $self->cds_end ) { + $self->region('3\'UTR'); + } + } + return $self->{'region'}; +} + +=head2 cds_end + + Title : cds_end + Usage : $cds_end = $obj->get_cds_end(); + Function: + + Sets or returns the cds_end from the beginning of the DNA sequence + to the coordinate start used to describe variants. + Should be the location of the last nucleotide of the + terminator codon of the gene. + + Example : + Returns : value of cds_end, a scalar + Args : + +=cut + + + +sub cds_end { + my ($self, $value) = @_; + if (defined $value) { + $self->warn("[$value] is not a good value for sequence position") + if not $value =~ /^\d+$/ ; + $self->{'cds_end'} = $value; + } else { + $self->{'cds_end'} = $self->SeqDiff->cds_end if $self->SeqDiff; + } + return $self->{'cds_end'}; +} + + +=head2 label + + Title : label + Usage : $obj->label(); + Function: + + Sets and returns mutation event label(s). If value is not + set, or no argument is given returns false. Each + instantiable subclass of L<Bio::Variation::VariantI> needs + to implement this method. Valid values are listed in + 'Mutation event controlled vocabulary' in + http://www.ebi.ac.uk/mutations/recommendations/mutevent.html. + + Example : + Returns : string + Args : string + +=cut + +sub label { + my ($self) = @_; + my ($o, $m, $type); + $o = $self->allele_ori->seq if $self->allele_ori and $self->allele_ori->seq; + $m = $self->allele_mut->seq if $self->allele_mut and $self->allele_mut->seq; + + my $ct = Bio::Tools::CodonTable -> new ( -id => $self->codon_table ); + if ($o and $m and CORE::length($o) == 1 and CORE::length($m) == 1) { + if (defined $self->AAChange) { + if ($self->start > 0 and $self->start < 4 ) { + $type = 'initiation codon'; + } + elsif ($self->codon_ori && $ct->is_ter_codon($self->codon_ori) ) { + #AAChange->allele_ori and $self->AAChange->allele_ori->seq eq '*' ) { + $type = 'termination codon'; + } + elsif ($self->codon_mut && $ct->is_ter_codon($self->codon_mut) ) { + #elsif ($self->AAChange->allele_mut and $self->AAChange->allele_mut->seq eq "*") { + $type = 'nonsense'; + } + elsif ($o and $m and ($o eq $m or + $self->AAChange->allele_ori->seq eq + $self->AAChange->allele_mut->seq)) { + $type = 'silent'; + } else { + $type = 'missense'; + } + } else { + $type = 'unknown'; + } + } else { + my $len = 0; + $len = CORE::length($o) if $o; + $len -= CORE::length($m) if $m; + if ($len%3 == 0 ) { + $type = 'inframe'; + } else { + $type = 'frameshift'; + } + if (not $m ) { + $type .= ', '. 'deletion'; + } + elsif (not $o ) { + $type .= ', '. 'insertion'; + } + else { + $type .= ', '. 'complex'; + } + if ($self->codon_ori && $ct->is_ter_codon($self->codon_ori) ) { + $type .= ', '. 'termination codon'; + } + } + + $self->{'label'} = $type; + return $self->{'label'}; +} + + +=head2 _change_codon_pos + + Title : _change_codon_pos + Usage : $newCodonPos = _change_codon_pos($myCodonPos, 5) + Function: + + Keeps track of the codon position in a changeing sequence + + Returns : codon_pos = integer 1, 2 or 3 + Args : valid codon position + signed integer offset to a new location in sequence + +=cut + + +sub _change_codon_pos ($$) { + my ($cpos, $i) = @_; + + $cpos = ($cpos + $i%3)%3; + if ($cpos > 3 ) { + $cpos = $cpos - 3; + } + elsif ($cpos < 1 ) { + $cpos = $cpos + 3; + } + return $cpos; +} + +1;