Mercurial > repos > mahtabm > ensembl
diff variant_effect_predictor/Bio/LiveSeq/SeqI.pm @ 0:1f6dce3d34e0
Uploaded
author | mahtabm |
---|---|
date | Thu, 11 Apr 2013 02:01:53 -0400 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/variant_effect_predictor/Bio/LiveSeq/SeqI.pm Thu Apr 11 02:01:53 2013 -0400 @@ -0,0 +1,1231 @@ +# $Id: SeqI.pm,v 1.25 2002/10/22 07:38:34 lapp Exp $ +# +# bioperl module for Bio::LiveSeq::SeqI +# +# Cared for by Joseph Insana <insana@ebi.ac.uk> <jinsana@gmx.net> +# +# Copyright Joseph Insana +# +# You may distribute this module under the same terms as perl itself +# +# POD documentation - main docs before the code + +=head1 NAME + +Bio::LiveSeq::SeqI - Abstract sequence interface class for LiveSeq + +=head1 SYNOPSIS + + # documentation needed + +=head1 DESCRIPTION + +This class implements BioPerl PrimarySeqI interface for Live Seq objects. + +One of the main difference in LiveSequence compared to traditional +"string" sequences is that coordinate systems are flexible. Typically +gene nucleotide numbering starts from 1 at the first character of the +initiator codon (A in ATG). This means that negative positions are +possible and common! + +Secondly, the sequence manipulation methods do not return a new +sequence object but change the current object. The current status can +be written out to BioPerl sequence objects. + +=head1 FEEDBACK + +=head2 Mailing Lists + +User feedback is an integral part of the evolution of this and other +Bioperl modules. Send your comments and suggestions preferably to one +of the Bioperl mailing lists. Your participation is much appreciated. + + bioperl-l@bioperl.org - General discussion + http://bio.perl.org/MailList.html - About the mailing lists + +=head2 Reporting Bugs + +Report bugs to the Bioperl bug tracking system to help us keep track +the bugs and their resolution. Bug reports can be submitted via email +or the web: + + bioperl-bugs@bio.perl.org + http://bugzilla.bioperl.org/ + +=head1 AUTHOR - Joseph A.L. Insana + +Email: Insana@ebi.ac.uk, jinsana@gmx.net + +Address: + + EMBL Outstation, European Bioinformatics Institute + Wellcome Trust Genome Campus, Hinxton + Cambs. CB10 1SD, United Kingdom + +=head1 APPENDIX + +The rest of the documentation details each of the object +methods. Internal methods are usually preceded with a _ + +Some note on the terminology/notation of method names: + label: a unique pointer to a single nucleotide + position: the position of a nucleotide according to a particular coordinate + system (e.g. counting downstream from a particular label taken as + number 1) + base: the one letter code for a nucleotide (i.e.: "a" "t" "c" "g") + + a base is the "value" that an "element" of a "chain" can assume + (see documentation on the Chain datastructure if interested) + +=cut + +#' +# Let the code begin... + +package Bio::LiveSeq::SeqI; +$VERSION=3.3; +# Version history: +# Thu Mar 16 18:11:18 GMT 2000 v.1.0 Started implementation, interface/inheritance from ChainI.pm +# Thu Mar 16 20:05:51 GMT 2000 v 1.2 implemented up to splice_out +# Fri Mar 17 05:37:37 GMT 2000 v 1.3 implemented lot of new methods and written their documentation / in sync with ChainI 1.6 and Chain 2.4 +# Fri Mar 17 17:17:24 GMT 2000 v 1.7 in sync with ChainI 1.7 +# Fri Mar 17 20:12:27 GMT 2000 v 1.8 NAMING change: index->label everywhere +# Mon Mar 20 19:19:21 GMT 2000 v 2.0 renamed from DNA to SeqI and begun +# working on methods defined with Heikki +# Tue Mar 21 01:37:52 GMT 2000 v 2.1 created strand(), seq() +# Tue Mar 21 02:43:21 GMT 2000 v 2.11 seq() prints correctly also for exons +# Wed Mar 22 19:41:45 GMT 2000 v 2.22 translate, alphabet, length, all_labels +# Thu Mar 23 21:03:42 GMT 2000 v 2.3 follows() label() position() +# Fri Mar 24 18:33:18 GMT 2000 v 2.33 rewritten position(), now works with diverse coordinate_starts +# Sat Mar 25 06:11:55 GMT 2000 v 2.4 started subseq +# Mon Mar 27 19:22:32 BST 2000 v 2.45 subseq should be ok but the thing about reverse strand has to be checked!! +# Tue Mar 28 01:53:31 BST 2000 v 2.46 changed strand behaviour in subseq +# Wed Mar 29 00:05:21 BST 2000 v 2.5 change() begun +# Wed Mar 29 02:06:20 BST 2000 v 2.53 _delete _mutate _praeinsert coded +# Wed Mar 29 02:29:01 BST 2000 v 2.531 _mutate changed to make it more general +# Wed Mar 29 03:38:21 BST 2000 v 2.54 tested and corrected change +# Wed Mar 29 16:23:39 BST 2000 v 2.55 change deals with complex now +# Fri Mar 31 18:26:54 BST 2000 v 2.56 translate_string added +# Sat Apr 1 19:02:28 BST 2000 v 2.57 labelchange() created +# Fri Apr 7 03:31:35 BST 2000 v 2.6 labelsubseq() created +# Sat Apr 8 13:01:09 BST 2000 v 2.61 obj_valid() created +# Wed Apr 12 16:23:21 BST 2000 v 2.7 _deletecheck call added in _delete +# Wed Apr 19 16:21:33 BST 2000 v 2.72 name() source() description() added +# Thu Apr 20 14:42:57 BST 2000 v 2.8 added or rewritten much pod documentation +# Thu Apr 27 16:18:55 BST 2000 v 2.82 translate now accounts for ttable info +# Thu Jun 22 20:02:39 BST 2000 v 2.9 valid() from Transcript now moved here, as the general for all objects inheriting from SeqI +# Thu Jun 22 20:17:32 BST 2000 v 2.91 _unsecure_labelsubseq() added +# Sat Jun 24 00:10:31 BST 2000 v 2.92 unsecure is an option of labelsubseq() now +# Thu Jun 29 16:38:45 BST 2000 v 3.0 labelchange() now calls itself again for the DNAobj if the label for the change is not valid for the object requested but valid for the DNAobj +# Tue Jan 30 14:16:22 EST 2001 v 3.1 delete_Obj added, to flush circular references +# Wed Mar 28 15:16:38 BST 2001 v 3.2 functions warn, verbose, throw, stack_trace, stack_trace_dump added +# Wed Apr 4 13:34:29 BST 2001 v 3.3 moved from carp to warn + +use strict; +use vars qw($VERSION @ISA); +use Bio::LiveSeq::ChainI 1.9; # to inherit from it +use Bio::Tools::CodonTable; # for the translate() function +use Bio::PrimarySeqI; + +@ISA=qw(Bio::Root::Root Bio::LiveSeq::ChainI Bio::PrimarySeqI ); # inherit from ChainI + +=head2 seq + + Title : seq + Usage : $string = $obj->seq() + Function: Returns the complete sequence of an object as a string of letters. + Suggested cases are upper case for proteins and lower case for + DNA sequence (IUPAC standard), + Returns : a string + + +=cut + +sub seq { + my $self = shift; + my ($start,$end) = ($self->start(),$self->end()); + if ($self->strand() == 1) { + return $self->{'seq'}->down_chain2string($start,undef,$end); + } else { # reverse strand + my $str = $self->{'seq'}->up_chain2string($start,undef,$end); + $str =~ tr/acgtrymkswhbvdnxACGTRYMKSWHBVDNX/tgcayrkmswdvbhnxTGCAYRKMSWDVBHNX/; + return $str; + } +} + +=head2 all_labels + + Title : all_labels + Usage : @labels = $obj->all_labels() + Function: all the labels of every nucleotide an object is composed of + Returns : an array of labels + Args : none + +=cut + +sub all_labels { + my $self = shift; + my ($start,$end) = ($self->start(),$self->end()); + my $labels; + if ($self->strand() == 1) { + $labels=$self->{'seq'}->down_labels($start,$end); + } else { + $labels=$self->{'seq'}->up_labels($start,$end); + } + return (@{$labels}); +} + +=head2 labelsubseq + + Title : labelsubseq + Usage : $dna->labelsubseq(); + : $dna->labelsubseq($startlabel); + : $dna->labelsubseq($startlabel,$length); + : $dna->labelsubseq($startlabel,undef,$endlabel); + e.g. : $dna->labelsubseq(4,undef,8); + Function: prints the sequence as string. The difference between labelsubseq + and normal subseq is that it uses /labels/ as arguments, instead + than positions. This allows for faster and more efficient lookup, + skipping the (usually) lengthy conversion of positions into labels. + This is expecially useful for manipulating with high power + LiveSeq objects, knowing the labels and exploiting their + usefulness. + Returns : a string + Errorcode -1 + Args : without arguments it returns the entire sequence + with a startlabel it returns the sequence downstream that label + if a length is specified, it returns only that number of bases + if an endlabel is specified, it overrides the length argument + and prints instead up to that label (included) + Defaults: $startlabel defaults to the beginning of the entire sequence + $endlabel defaults to the end of the entire sequence + +=cut + +# NOTE: unsecuremode is to be used /ONLY/ if sure of the start and end labels, expecially that they follow each other in the correct order!!!! + +sub labelsubseq { + my ($self,$start,$length,$end,$unsecuremode) = @_; + if (defined $unsecuremode && $unsecuremode eq "unsecuremoderequested") + { # to skip security checks (faster) + unless ($start) { + $start=$self->start; + } + if ($end) { + if ($end == $start) { + $length=1; + undef $end; + } else { + undef $length; + } + } else { + unless ($length) { + $end=$self->end; + } + } + } else { + if ($start) { + unless ($self->{'seq'}->valid($start)) { + $self->warn("Start label not valid"); return (-1); + } + } + if ($end) { + if ($end == $start) { + $length=1; + undef $end; + } else { + unless ($self->{'seq'}->valid($end)) { + $self->warn("End label not valid"); return (-1); + } + unless ($self->follows($start,$end) == 1) { + $self->warn("End label does not follow Start label!"); return (-1); + } + undef $length; + } + } + } + if ($self->strand() == 1) { + return $self->{'seq'}->down_chain2string($start,$length,$end); + } else { # reverse strand + my $str = $self->{'seq'}->up_chain2string($start,$length,$end); + $str =~ tr/acgtrymkswhbvdnxACGTRYMKSWHBVDNX/tgcayrkmswdvbhnxTGCAYRKMSWDVBHNX/; + return $str; + } +} + +=head2 subseq + + Title : subseq + Usage : $substring = $obj->subseq(10,40); + : $substring = $obj->subseq(10,undef,4); + Function: returns the subseq from start to end, where the first base + is 1 and the number is inclusive, ie 1-2 are the first two + bases of the sequence + + Start cannot be larger than end but can be equal. + + Allows for negative numbers $obj->subseq(-10,-1). By + definition, there is no 0! + -5 -1 1 5 + gctagcgcccaac atggctcgctg + + This allows to retrieve sequences upstream from given position. + + The precedence is from left to right: if END is given LENGTH is + ignored. + + Examples: $obj->subseq(-10,undef,10) returns 10 elements before position 1 + $obj->subseq(4,8) returns elements from the 4th to the 8th, inclusive + + Returns : a string + Errorcode: -1 + Args : start, integer, defaults to start of the sequence + end, integer, '' or undef, defaults to end of the sequence + length, integer, '' or undef + an optional strand (1 or -1) 4th argument + if strand argument is not given, it will default to the object + argment. This argument is useful when a call is issued from a child + of a parent object containing the subseq method + +=cut + +#' +# check the fact about reverse strand! +# is it feasible? Is it correct? Should we do it? How about exons? Does it +# work when you ask subseq of an exon? +# eliminated now (Mon night) +sub subseq { + ##my ($self,$pos1,$pos2,$length,$strand) = @_; + my ($self,$pos1,$pos2,$length,$strand) = @_; + ##unless (defined ($strand)) { # if optional [strand] argument not given + ## $strand=$self->strand; + ##} + $strand=$self->strand; + my ($str,$startlabel,$endlabel); + if (defined ($length)) { + if ($length < 1) { + $self->warn("No sense asking for a subseq of length < 1"); + return (-1); + } + } + unless (defined ($pos1)) { + #print "\n##### DEBUG pos1 not defined\n"; + $startlabel=$self->start; + } else { + if ($pos1 == 0) { # if position = 0 complain + $self->warn("Position cannot be 0!"); return (-1); + } + ##if ($strand == 1) { # CHECK THIS! + if ((defined ($pos2))&&($pos1>$pos2)) { + $self->warn("1st position($pos1) cannot be > 2nd position($pos2)!"); return (-1); + } + ##} else { # CHECK THIS! + ## if ((defined ($pos2))&&($pos1<$pos2)) { +## $self->warn("1st position($pos1) cannot be < 2nd position($pos2) on reverse strand!)"; return (-1); + ## } + ##} + $startlabel=$self->label($pos1); + if ($startlabel < 1) { + $self->warn("position $pos1 not valid as start of subseq!"); return (-1); + } + } + unless (defined ($pos2)) { + #print "\n##### pos2 not defined\n"; + unless (defined ($length)) { + $endlabel=$self->end; + } + } else { + if ($pos2 == 0) { # if position = 0 complain + $self->warn("Position cannot be 0!"); return (-1); + } + undef $length; + ##if ($strand == 1) { # CHECK THIS! + if ((defined ($pos1))&&($pos1>$pos2)) { + $self->warn("1st position($pos1) cannot be > 2nd position($pos2)!"); return (-1); + } + ##} else { # CHECK THIS! + ## if ((defined ($pos1))&&($pos1<$pos2)) { +## $self->warn("1st position($pos1) cannot be < 2nd position($pos2) on reverse strand!"); return (-1); + ## } + ##} + $endlabel=$self->label($pos2); + if ($endlabel < 1) { + $self->warn("position $pos2 not valid as end of subseq!"); return (-1); + } + } + #print "\n ####DEBUG: start $startlabel end $endlabel length $length strand $strand\n"; + + if ($strand == 1) { + $str = $self->{'seq'}->down_chain2string($startlabel,$length,$endlabel); + } else { # reverse strand + $str = $self->{'seq'}->up_chain2string($startlabel,$length,$endlabel); + $str =~ tr/acgtrymkswhbvdnxACGTRYMKSWHBVDNX/tgcayrkmswdvbhnxTGCAYRKMSWDVBHNX/; + } + return $str; +} + +=head2 length + + Title : length + Usage : $seq->length(); + Function: returns the number of nucleotides (or the number of aminoacids) + in the entire sequence + Returns : an integer + Errorcode -1 + Args : none + +=cut + +sub length { + my $self=shift; + my ($start,$end,$strand)=($self->start(),$self->end(),$self->strand()); + if ($strand == 1) { + return $self->{'seq'}->down_subchain_length($start,$end); + } else { + return $self->{'seq'}->up_subchain_length($start,$end); + } +} + +=head2 display_id + + Title : display_id + Usage : $id_string = $obj->display_id(); + Function: returns the display id, alias the common name of the object + + The semantics of this is that it is the most likely string + to be used as an identifier of the sequence, and likely to + have "human" readability. The id is equivalent to the ID + field of the GenBank/EMBL databanks and the id field of the + Swissprot/sptrembl database. In fasta format, the >(\S+) is + presumed to be the id, though some people overload the id + to embed other information. + + See also: accession_number + Returns : a string + Args : none + +=cut + +sub display_id { + my ($self,$value) = @_; + if(defined $value) { + $self->{'display_id'} = $value; + } + return $self->{'display_id'}; +} + + +=head2 accession_number + + Title : accession_number + Usage : $unique_biological_key = $obj->accession_number; + Function: Returns the unique biological id for a sequence, commonly + called the accession_number. + Notice that primary_id() provides the unique id for the + implemetation, allowing multiple objects to have the same accession + number in a particular implementation. + + For objects with no accession_number this method returns "unknown". + Returns : a string + Args : none + +=cut + +sub accession_number { + my ($self,$value) = @_; + if (defined $value) { + $self->{'accession_number'} = $value; + } + unless (exists $self->{'accession_number'}) { + return "unknown"; + } else { + return $self->{'accession_number'}; + } +} + +=head2 primary_id + + Title : primary_id + Usage : $unique_implementation_key = $obj->primary_id; + Function: Returns the unique id for this object in this + implementation. This allows implementations to manage their own + object ids in a way the implementation can control. Clients can + expect one id to map to one object. + + For sequences with no primary_id, this method returns + a stringified memory location. + + Returns : A string + Args : None + +=cut + + +sub primary_id { + my ($self,$value) = @_; + if(defined $value) { + $self->{'primary_id'} = $value; + } + unless (exists $self->{'primary_id'}) { + return "$self"; + } else { + return $self->{'primary_id'}; + } +} + +=head2 change + + Title : change + Usage : $substring = $obj->change('AA', 10); + Function: changes, modifies, mutates the LiveSequence + Examples: + $obj->change('', 10); delete nucleotide #10 + $obj->change('', 10, 2); delete two nucleotides starting from #10 + $obj->change('G', 10); change nuc #10 to 'G' + $obj->change('GA', 10, 4); replace #10 and 3 following with 'GA' + $obj->change('GA', 10, 2)); is same as $obj->change('GA', 10); + $obj->change('GA', 10, 0 ); insert 'GA' before nucleotide at #10 + $obj->change('GA', 10, 1); GA inserted before #10, #10 deleted + $obj->change('GATC', 10, 2); GATC inserted before #10, #10 deleted + $obj->change('GATC', 10, 6); GATC inserted before #10, #10-#15 deleted + + + Returns : a string of deleted bases (if any) or 1 (everything OK) + Errorcode: -1 + Args : seq, string, or '' ('' = undef = 0 = deletion) + start, integer + length, integer (optional) + +=cut + +sub change { + &positionchange; +} + +=head2 positionchange + + Title : positionchange + Function: Exactly like change. I.e. change() defaults to positionchange() + +=cut + +sub positionchange { + my ($self,$newseq,$position,$length)=@_; + unless ($position) { + $self->warn("Position not given or position 0"); + return (-1); + } + my $label=$self->label($position); + unless ($label > 0) { # label not found or error + $self->warn("No valid label found at that position!"); + return (-1); + } + return ($self->labelchange($newseq,$label,$length)); +} + +=head2 labelchange + + Title : labelchange + Function: Exactly like change but uses a /label/ instead than a position + as second argument. This allows for multiple changes in a LiveSeq + without the burden of recomputing positions. I.e. for a multiple + change in two different points of the LiveSeq, the approach would + be the following: fetch the correct labels out of the two different + positions (method: label($position)) and then use the labelchange() + method to modify the sequence using those labels instead than + relying on the positions (that would have modified after the + first change). + +=cut + +sub labelchange { + my ($self,$newseq,$label,$length)=@_; + unless ($self->valid($label)) { + if ($self->{'seq'}->valid($label)) { + #$self->warn("Label \'$label\' not valid for executing a LiveSeq change for the object asked but it's ok for DNAlevel change, reverting to that"); + shift @_; + return($self->{'seq'}->labelchange(@_)); + } else { + $self->warn("Label \'$label\' not valid for executing a LiveSeq change"); + return (-1); + } + } + unless ($newseq) { # it means this is a simple deletion + if (defined($length)) { + unless ($length >= 0) { + $self->warn("No sense having length < 0 in a deletion"); + return (-1); + } + } else { + $self->warn("Length not defined for deletion!"); + return (-1); + } + return $self->_delete($label,$length); + } + my $newseqlength=CORE::length($newseq); + if (defined($length)) { + unless ($length >= 0) { + $self->warn("No sense having length < 0 in a change()"); + return (-1); + } + } else { + $length=$newseqlength; # defaults to pointmutation(s) + } + if ($length == 0) { # it means this is a simple insertion, length def&==0 + my ($insertbegin,$insertend)=$self->_praeinsert($label,$newseq); + if ($insertbegin == -1) { + return (-1); + } else { + return (1); + } + } + if ($newseqlength == $length) { # it means this is simple pointmutation(s) + return $self->_mutate($label,$newseq,$length); + } + # if we arrived here then change is complex mixture + my $strand=$self->strand(); + my $afterendlabel=$self->label($length+1,$label,$strand); # get the label at $length+1 positions after $label + unless ($afterendlabel > 0) { # label not found or error + $self->warn("No valid afterendlabel found for executing the complex mutation!"); + return (-1); + } + my $deleted=$self->_delete($label,$length); # first delete length nucs + if ($deleted == -1) { # if errors + return (-1); + } else { # then insert the newsequence + my ($insertbegin,$insertend)=$self->_praeinsert($afterendlabel,$newseq); + if ($insertbegin == -1) { + return (-1); + } else { + return (1); + } + } +} + +# internal methods for change() + +# arguments: label for beginning of deletion, new sequence to insert +# returns: labels of beginning and end of the inserted sequence +# errorcode: -1 +sub _praeinsert { + my ($self,$label,$newseq)=@_; + my ($insertbegin,$insertend); + my $strand=$self->strand(); + if ($strand == 1) { + ($insertbegin,$insertend)=($self->{'seq'}->praeinsert_string($newseq,$label)); + } else { # since it's reverse strand and we insert in forward direction.... + $newseq=reverse($newseq); + $newseq =~ tr/acgtrymkswhbvdnxACGTRYMKSWHBVDNX/tgcayrkmswdvbhnxTGCAYRKMSWDVBHNX/; # since it's reverse strand we get the complementary bases + ($insertend,$insertbegin)=($self->{'seq'}->postinsert_string($newseq,$label)); + } + if (($insertbegin==0)||($insertend==0)) { + $self->warn("Some error occurred while inserting!"); + return (-1); + } else { + return ($insertbegin,$insertend); + } +} + +# arguments: label for beginning of deletion, length of deletion +# returns: string of deleted bases +# errorcode: -1 +sub _delete { + my ($self,$label,$length)=@_; + my $strand=$self->strand(); + my $endlabel=$self->label($length,$label,$strand); # get the label at $length positions after $label + unless ($endlabel > 0) { # label not found or error + $self->warn("No valid endlabel found for executing the deletion!"); + return (-1); + } + # this is important in Transcript to fix exon structure + $self->_deletecheck($label,$endlabel); + my $deletedseq; + if ($strand == 1) { + $deletedseq=$self->{'seq'}->splice_chain($label,undef,$endlabel); + } else { + $deletedseq=$self->{'seq'}->splice_chain($endlabel,undef,$label); + $deletedseq=reverse($deletedseq); # because we are on reverse strand and we cut anyway + # in forward direction + $deletedseq =~ tr/acgtrymkswhbvdnxACGTRYMKSWHBVDNX/tgcayrkmswdvbhnxTGCAYRKMSWDVBHNX/; # since it's reverse strand we get the complementary bases + } + return ($deletedseq); +} + +# empty function, overridden in Transcript, not useful here +sub _deletecheck { +} + +# arguments: label for beginning of mutation, newsequence, number of mutations +# returns: 1 all OK +# errorcode: -1 +sub _mutate { + my ($self,$label,$newseq,$length)=@_; # length is equal to length(newseq) + my ($i,$base,$nextlabel); + my @labels; # array of labels + my $strand=$self->strand(); + if ($length == 1) { # special cases first + @labels=($label); + } else { + my $endlabel=$self->label($length,$label,$strand); # get the label at $length positions after $label + unless ($endlabel > 0) { # label not found or error + $self->warn("No valid endlabel found for executing the mutation!"); + return (-1); + } + if ($length == 2) { # another special case + @labels=($label,$endlabel); + } else { # more than 3 bases changed + # this wouldn't work for Transcript + #my $labelsarrayref; + #if ($strand == 1) { + #$labelsarrayref=$self->{'seq'}->down_labels($label,$endlabel); + #} else { + #$labelsarrayref=$self->{'seq'}->up_labels($label,$endlabel); + #} + #@labels=@{$labelsarrayref}; + #if ($length != scalar(@labels)) { # not enough labels returned + #$self->warn("Not enough valid labels found for executing the mutation!"); + #return (-1); + #} + + # this should be more general + @labels=($label); # put the first one + while ($label != $endlabel) { + $nextlabel=$self->label(2,$label,$strand); # retrieve the next label + push (@labels,$nextlabel); + $label=$nextlabel; # move on reference + } + } + } + if ($strand == -1) { # only for reverse strand + $newseq =~ tr/acgtrymkswhbvdnxACGTRYMKSWHBVDNX/tgcayrkmswdvbhnxTGCAYRKMSWDVBHNX/; # since it's reverse strand we get the complementary bases + } + my $errorcheck; # if not equal to $length after summing for all changes, error did occurr + $i = 0; + foreach $base (split(//,$newseq)) { + $errorcheck += $self->{'seq'}->set_value_at_label($base,$labels[$i]); + $i++; + } + if ($errorcheck != $length) { + $self->warn("Some error occurred while mutating!"); + return (-1); + } else { + return (1); + } +} + +=head2 valid + + Title : valid + Usage : $boolean = $obj->valid($label) + Function: tests if a label exists inside the object + Returns : boolean + Args : label + +=cut + +# argument: label +# returns: 1 YES 0 NO +sub valid { + my ($self,$label)=@_; + my $checkme; + my @labels=$self->all_labels; + foreach $checkme (@labels) { + if ($label == $checkme) { + return (1); # found + } + } + return (0); # not found +} + + +=head2 start + + Title : start + Usage : $startlabel=$obj->start() + Function: returns the label of the first nucleotide of the object (exon, CDS) + Returns : label + Args : none + +=cut + +sub start { + my ($self) = @_; + return $self->{'start'}; # common for all classes BUT DNA (which redefines it) and Transcript (that takes the information from the Exons) +} + +=head2 end + + Title : end + Usage : $endlabel=$obj->end() + Function: returns the label of the last nucleotide of the object (exon, CDS) + Returns : label + Args : none + +=cut + +sub end { + my ($self) = @_; + return $self->{'end'}; +} + +=head2 strand + + Title : strand + Usage : $strand=$obj->strand() + $obj->strand($strand) + Function: gets or sets strand information, being 1 or -1 (forward or reverse) + Returns : -1 or 1 + Args : none OR -1 or 1 + +=cut + +sub strand { + my ($self,$strand) = @_; + if ($strand) { + if (($strand != 1)&&($strand != -1)) { + $self->warn("strand information not changed because strand identifier not valid"); + } else { + $self->{'strand'} = $strand; + } + } + return $self->{'strand'}; +} + +=head2 alphabet + + Title : alphabet + Usage : if( $obj->alphabet eq 'dna' ) { /Do Something/ } + Function: Returns the type of sequence being one of + 'dna', 'rna' or 'protein'. This is case sensitive. + + Returns : a string either 'dna','rna','protein'. + Args : none + Note : "circular dna" is set as dna + +=cut + + +sub alphabet { + my %valid_type = map {$_, 1} qw( dna rna protein ); + my ($self,$value) = @_; + if (defined $value) { + $value =~ s/circular dna/dna/; + unless ( $valid_type{$value} ) { + $self->warn("Molecular type '$value' is not a valid type"); + } + $self->{'alphabet'} = $value; + } + return $self->{'alphabet'}; +} + +=head2 coordinate_start + + Title : coordinate_start + Usage : $coordstartlabel=$obj->coordinate_start() + : $coordstartlabel=$obj->coordinate_start($label) + Function: returns and optionally sets the first label of the coordinate + system used + For some objects only labels inside the object or in frame (for + Translation objects) will be allowed to get set as coordinate start + + Returns : label. It returns 0 if label not found. + Errorcode -1 + Args : an optional reference $label that is position 1 + +=cut + + +sub coordinate_start { + my ($self,$label) = @_; + if ($label) { + if ($self->valid($label)) { + $self->{'coordinate_start'} = $label; + } else { + $self->warn("The label you are trying to set as coordinate_start is not valid for this object"); + } + } + my $coord_start = $self->{'coordinate_start'}; + if ($coord_start) { + return $coord_start; + } else { + return $self->start(); + } +} + +=head2 label + + Title : label + Usage : $seq->label($position) + : $seq->label($position,$firstlabel) + Examples: $nextlabel=$seq->label(2,$label) -> retrieves the following label + : $prevlabel=$seq->label(-1,$label) -> retrieves the preceding label + + Function: returns the label of the nucleotide at $position from current + coordinate start + Returns : a label. It returns 0 if label not found. + Errorcode -1 + Args : a position, + an optional reference $firstlabel that is to be used as position 1 + an optional strand (1 or -1) argument + if strand argument is not given, it will default to the object + argument. This argument is useful when a call is issued from a child + of a parent object containing the subseq method + +=cut + + +sub label { + my ($self,$position,$firstlabel,$strand)=@_; + my $label; + unless (defined ($firstlabel)) { + $firstlabel=$self->coordinate_start; + } + unless ($position) { # if position = 0 complain ? + $self->warn("Position not given or position 0"); + return (-1); + } + unless (defined ($strand)) { # if optional [strand] argument not given + $strand=$self->strand; + } + if ($strand == 1) { + if ($position > 0) { + $label=$self->{'seq'}->down_get_label_at_pos($position,$firstlabel) + } else { # if < 0 + $label=$self->{'seq'}->up_get_label_at_pos(1 - $position,$firstlabel) + } + } else { + if ($position > 0) { + $label=$self->{'seq'}->up_get_label_at_pos($position,$firstlabel) + } else { # if < 0 + $label=$self->{'seq'}->down_get_label_at_pos(1 - $position,$firstlabel) + } + } + return $label; +} + + +=head2 position + + Title : position + Usage : $seq->position($label) + : $seq->position($label,$firstlabel) + Function: returns the position of nucleotide at $label + Returns : the position of the label from current coordinate start + Errorcode 0 + Args : a label pointing to a certain nucleotide (e.g. start of exon) + an optional "firstlabel" as reference to count from + an optional strand (1 or -1) argument + if strand argument is not given, it will default to the object + argument. This argument is useful when a call is issued from a child + of a parent object containing the subseq method + +=cut + + +sub position { + my ($self,$label,$firstlabel,$strand)=@_; + unless (defined ($strand)) { # if optional [strand] argument not given + $strand=$self->strand; + } + unless (defined ($firstlabel)) { + $firstlabel=$self->coordinate_start; + } + unless ($self->valid($label)) { + $self->warn("label not valid"); + return (0); + } + if ($firstlabel == $label) { + return (1); + } + my ($coordpos,$position0,$position); + $position0=$self->{'seq'}->down_get_pos_of_label($label); + $coordpos=$self->{'seq'}->down_get_pos_of_label($firstlabel); + $position=$position0-$coordpos+1; + if ($position <= 0) { + $position--; + } + if ($strand == -1) { + #print "\n----------DEBUGSEQPOS label $label firstlabel $firstlabel strand $strand: position=",1-$position; + return (1-$position); + } else { + #print "\n----------DEBUGSEQPOS label $label firstlabel $firstlabel strand $strand: position=",$position; + return ($position); + } +} + +=head2 follows + + Title : follows + Usage : $seq->follows($firstlabel,$secondlabel) + : $seq->follows($firstlabel,$secondlabel,$strand) + Function: checks if SECONDlabel follows FIRSTlabel, undependent of the strand + i.e. it checks downstream for forward strand and + upstream for reverse strand + Returns : 1 or 0 + Errorcode -1 + Args : two labels + an optional strand (1 or -1) argument + if strand argument is not given, it will default to the object + argument. This argument is useful when a call is issued from a child + of a parent object containing the subseq method + +=cut + +#' +# wraparound to is_downstream and is_upstream that chooses the correct one +# depending on the strand +sub follows { + my ($self,$firstlabel,$secondlabel,$strand)=@_; + unless (defined ($strand)) { # if optional [strand] argument not given + $strand=$self->strand; + } + if ($strand == 1) { + return ($self->{'seq'}->is_downstream($firstlabel,$secondlabel)); + } else { + return ($self->{'seq'}->is_upstream($firstlabel,$secondlabel)); + } +} +# +#=head2 translate +# +# Title : translate +# Usage : $protein_seq = $obj->translate +# Function: Provides the translation of the DNA sequence +# using full IUPAC ambiguities in DNA/RNA and amino acid codes. +# +# The resulting translation is identical to EMBL/TREMBL database +# translations. +# +# Returns : a string +# Args : character for terminator (optional) defaults to '*' +# character for unknown amino acid (optional) defaults to 'X' +# frame (optional) valid values 0, 1, 3, defaults to 0 +# codon table id (optional) defaults to 1 +# +#=cut +# +#sub translate { +# my ($self) = shift; +# return ($self->translate_string($self->seq,@_)); +#} +# +#=head2 translate_string +# +# Title : translate_string +# Usage : $protein_seq = $obj->translate_string("attcgtgttgatcgatta"); +# Function: Like translate, but can be used to translate subsequences after +# having retrieved them as string. +# Args : 1st argument is a string. Optional following arguments: like in +# the translate method +# +#=cut +# +# +#sub translate_string { +# my($self) = shift; +# my($seq) = shift; +# my($stop, $unknown, $frame, $tableid) = @_; +# my($i, $len, $output) = (0,0,''); +# my($codon) = ""; +# my $aa; +# +# +# ## User can pass in symbol for stop and unknown codons +# unless(defined($stop) and $stop ne '') { $stop = "*"; } +# unless(defined($unknown) and $unknown ne '') { $unknown = "X"; } +# unless(defined($frame) and $frame ne '') { $frame = 0; } +# +# ## the codon table ID +# if ($self->translation_table) { +# $tableid = $self->translation_table; +# } +# unless(defined($tableid) and $tableid ne '') { $tableid = 1; } +# +# ##Error if monomer is "Amino" +# $self->warn("Can't translate an amino acid sequence.") +# if (defined $self->alphabet && $self->alphabet eq 'protein'); +# +# ##Error if frame is not 0, 1 or 2 +# $self->warn("Valid values for frame are 0, 1, 2, not [$frame].") +# unless ($frame == 0 or $frame == 1 or $frame == 2); +# +# #thows a warning if ID is invalid +# my $codonTable = Bio::Tools::CodonTable->new( -id => $tableid); +# +# # deal with frame offset. +# if( $frame ) { +# $seq = substr ($seq,$frame); +# } +# +# for $codon ( grep { CORE::length == 3 } split(/(.{3})/, $seq) ) { +# my $aa = $codonTable->translate($codon); +# if ($aa eq '*') { +# $output .= $stop; +# } +# elsif ($aa eq 'X') { +# $output .= $unknown; +# } +# else { +# $output .= $aa ; +# } +# } +# #if( substr($output,-1,1) eq $stop ) { +# # chop $output; +# #} +# +# return ($output); +#} + +=head2 gene + + Title : gene + Usage : my $gene=$obj->gene; + Function: Gets or sets the reference to the LiveSeq::Gene object. + Objects that are features of a LiveSeq Gene will have this + attribute set automatically. + + Returns : reference to an object of class Gene + Note : if Gene object is not set, this method will return 0; + Args : none or reference to object of class Bio::LiveSeq::Gene + +=cut + +sub gene { + my ($self,$value) = @_; + if (defined $value) { + $self->{'gene'} = $value; + } + unless (exists $self->{'gene'}) { + return (0); + } else { + return $self->{'gene'}; + } +} + +=head2 obj_valid + + Title : obj_valid + Usage : if ($obj->obj_valid) {do something;} + Function: Checks if start and end labels are still valid for the ojbect, + i.e. tests if the LiveSeq object is still valid + Returns : boolean + Args : none + +=cut + +sub obj_valid { + my $self=shift; + unless (($self->{'seq'}->valid($self->start()))&&($self->{'seq'}->valid($self->end()))) { + return (0); + } + return (1); +} + +=head2 name + + Title : name + Usage : $name = $obj->name; + : $name = $obj->name("ABCD"); + Function: Returns or sets the name of the object. + If there is no name, it will return "unknown"; + Returns : A string + Args : None + +=cut + +sub name { + my ($self,$value) = @_; + if (defined $value) { + $self->{'name'} = $value; + } + unless (exists $self->{'name'}) { + return "unknown"; + } else { + return $self->{'name'}; + } +} + +=head2 desc + + Title : desc + Usage : $desc = $obj->desc; + : $desc = $obj->desc("ABCD"); + Function: Returns or sets the description of the object. + If there is no description, it will return "unknown"; + Returns : A string + Args : None + +=cut + +sub desc { + my ($self,$value) = @_; + if (defined $value) { + $self->{'desc'} = $value; + } + unless (exists $self->{'desc'}) { + return "unknown"; + } else { + return $self->{'desc'}; + } +} + +=head2 source + + Title : source + Usage : $name = $obj->source; + : $name = $obj->source("Homo sapiens"); + Function: Returns or sets the organism that is source of the object. + If there is no source, it will return "unknown"; + Returns : A string + Args : None + +=cut + +sub source { + my ($self,$value) = @_; + if (defined $value) { + $self->{'source'} = $value; + } + unless (exists $self->{'source'}) { + return "unknown"; + } else { + return $self->{'source'}; + } +} + +sub delete_Obj { + my $self = shift; + my @values= values %{$self}; + my @keys= keys %{$self}; + + foreach my $key ( @keys ) { + delete $self->{$key}; + } + foreach my $value ( @values ) { + if (index(ref($value),"LiveSeq") != -1) { # object case + eval { + # delete $self->{$value}; + $value->delete_Obj; + }; + } elsif (index(ref($value),"ARRAY") != -1) { # array case + my @array=@{$value}; + my $element; + foreach $element (@array) { + eval { + $element->delete_Obj; + }; + } + } elsif (index(ref($value),"HASH") != -1) { # object case + my %hash=%{$value}; + my $element; + foreach $element (%hash) { + eval { + $element->delete_Obj; + }; + } + } + } + return(1); +} + +1;