Mercurial > repos > mahtabm > ensembl
diff variant_effect_predictor/Bio/Seq/SequenceTrace.pm @ 0:1f6dce3d34e0
Uploaded
author | mahtabm |
---|---|
date | Thu, 11 Apr 2013 02:01:53 -0400 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/variant_effect_predictor/Bio/Seq/SequenceTrace.pm Thu Apr 11 02:01:53 2013 -0400 @@ -0,0 +1,747 @@ +# $Id: SequenceTrace.pm,v 1.1.2.1 2003/03/25 12:32:16 heikki Exp $ +# +# BioPerl module for Bio::Seq::SeqWithQuality +# +# Cared for by Chad Matsalla <bioinformatics@dieselwurks.com +# +# Copyright Chad Matsalla +# +# You may distribute this module under the same terms as perl itself + +# POD documentation - main docs before the code + +=head1 NAME + +Bio::Seq::SequenceTrace - Bioperl object packaging a sequence with its trace + +=head1 SYNOPSIS + + # example code here + +=head1 DESCRIPTION + +This object stores a sequence with its trace. + +=head1 FEEDBACK + +=head2 Mailing Lists + +User feedback is an integral part of the evolution of this and other +Bioperl modules. Send your comments and suggestions preferably to one +of the Bioperl mailing lists. Your participation is much appreciated. + + bioperl-l@bioperl.org - General discussion + http://bio.perl.org/MailList.html - About the mailing lists + +=head2 Reporting Bugs + +Report bugs to the Bioperl bug tracking system to help us keep track +the bugs and their resolution. Bug reports can be submitted via email +or the web: + + bioperl-bugs@bio.perl.org + http://bugzilla.bioperl.org/ + +=head1 AUTHOR - Chad Matsalla + +Email bioinformatics@dieselwurks.com + +=head1 CONTRIBUTORS + +Jason Stajich, jason@bioperl.org + +=head1 APPENDIX + +The rest of the documentation details each of the object methods. +Internal methods are usually preceded with a _ + +=cut + + +package Bio::Seq::SequenceTrace; + +use vars qw(@ISA); + +use strict; +use Bio::Root::Root; +use Bio::Seq::QualI; +use Bio::PrimarySeqI; +use Bio::PrimarySeq; +use Bio::Seq::PrimaryQual; +use Bio::Seq::TraceI; + +@ISA = qw(Bio::Root::Root Bio::Seq::SeqWithQuality Bio::Seq::TraceI); + +=head2 new() + + Title : new() + Usage : $st = Bio::Seq::SequenceTrace->new + ( -sequencewithquality => Bio::Seq::SequenceWithQuality, + -trace_a => \@trace_values_for_a_channel, + -trace_t => \@trace_values_for_t_channel, + -trace_g => \@trace_values_for_g_channel, + -trace_c => \@trace_values_for_c_channel, + -trace_indices => '0 5 10 15 20 25 30 35' + ); + Function: Returns a new Bio::Seq::SequenceTrace object from basic + constructors. + Returns : a new Bio::Seq::SequenceTrace object +Arguments: I think that these are all describes in the usage above. + +=cut + +sub new { + my ($class, @args) = @_; + my $self = $class->SUPER::new(@args); + # default: turn OFF the warnings + $self->{supress_warnings} = 1; + my($sequence_with_quality,$trace_indices,$trace_a,$trace_t, + $trace_g,$trace_c) = + $self->_rearrange([qw( + SEQUENCEWITHQUALITY + TRACE_INDICES + TRACE_A + TRACE_T + TRACE_G)], @args); + # first, deal with the sequence and quality information + if ($sequence_with_quality && ref($sequence_with_quality) eq "Bio::Seq::SeqWithQuality") { + $self->{swq} = $sequence_with_quality; + } + else { + $self->throw("A Bio::Seq::SequenceTrace object must be created with a + Bio::Seq::SeqWithQuality object."); + } + $self->{trace_a} = $trace_a ? $trace_a : undef; + $self->{trace_t} = $trace_t ? $trace_t : undef; + $self->{trace_g} = $trace_g ? $trace_g : undef; + $self->{trace_c} = $trace_c ? $trace_c : undef; + $self->{trace_indices} = $trace_indices ? $trace_indices : undef; + return $self; +} + +=head2 trace($base,\@new_values) + + Title : trace($base,\@new_values) + Usage : @trace_Values = @{$obj->trace($base,\@new_values)}; + Function: Returns the trace values as a reference to an array containing the + trace values. The individual elements of the trace array are not validated + and can be any numeric value. + Returns : A reference to an array. + Status : +Arguments: $base : which color channel would you like the trace values for? + - $base must be one of "A","T","G","C" + \@new_values : a reference to an array of values containing trace + data for this base + +=cut + +sub trace { + my ($self,$base_channel,$values) = @_; + $base_channel =~ tr/A-Z/a-z/; + if (length($base_channel) > 1 && $base_channel !~ /a|t|g|c/) { + $self->throw("The base channel must be a, t, g, or c"); + } + if ( $values && ref($values) eq "ARRAY") { + $self->{trace_$base_channel} = $values; + } + elsif ($values) { + $self->warn("You tried to change the traces for the $base_channel but + the values you wave were not a reference to an array."); + } + return $self->{trace_$base_channel}; +} + + +=head2 trace_indices($new_indices) + + Title : trace_indices($new_indices) + Usage : $indices = $obj->trace_indices($new_indices); + Function: Return the trace iindex points for this object. + Returns : A scalar + Args : If used, the trace indices will be set to the provided value. + +=cut + +sub trace_indices { + my ($self,$trace_indices)= @_; + if ($trace_indices) { $self->{trace_indices} = $trace_indices; } + return $self->{trace_indices}; +} + + + + + + + + + + + + + +=head2 _common_id() + + Title : _common_id() + Usage : $common_id = $self->_common_id(); + Function: Compare the display_id of {qual_ref} and {seq_ref}. + Returns : Nothing if they don't match. If they do return + {seq_ref}->display_id() + Args : None. + +=cut + +#' +sub _common_id { + my $self = shift; + return if (!$self->{seq_ref} || !$self->{qual_ref}); + my $sid = $self->{seq_ref}->display_id(); + return if (!$sid); + return if (!$self->{qual_ref}->display_id()); + return $sid if ($sid eq $self->{qual_ref}->display_id()); + # should this become a warning? + # print("ids $sid and $self->{qual_ref}->display_id() do not match. Bummer.\n"); +} + +=head2 _common_display_id() + + Title : _common_id() + Usage : $common_id = $self->_common_display_id(); + Function: Compare the display_id of {qual_ref} and {seq_ref}. + Returns : Nothing if they don't match. If they do return + {seq_ref}->display_id() + Args : None. + +=cut + +#' +sub _common_display_id { + my $self = shift; + $self->common_id(); +} + +=head2 _common_accession_number() + + Title : _common_accession_number() + Usage : $common_id = $self->_common_accession_number(); + Function: Compare the accession_number() of {qual_ref} and {seq_ref}. + Returns : Nothing if they don't match. If they do return + {seq_ref}->accession_number() + Args : None. + +=cut + +#' +sub _common_accession_number { + my $self = shift; + return if ($self->{seq_ref} || $self->{qual_ref}); + my $acc = $self->{seq_ref}->accession_number(); + # if (!$acc) { print("the seqref has no acc.\n"); } + return if (!$acc); + # if ($acc eq $self->{qual_ref}->accession_number()) { print("$acc matches ".$self->{qual_ref}->accession_number()."\n"); } + return $acc if ($acc eq $self->{qual_ref}->accession_number()); + # should this become a warning? + # print("accession numbers $acc and $self->{qual_ref}->accession_number() do not match. Bummer.\n"); +} + +=head2 _common_primary_id() + + Title : _common_primary_id() + Usage : $common_primard_id = $self->_common_primary_id(); + Function: Compare the primary_id of {qual_ref} and {seq_ref}. + Returns : Nothing if they don't match. If they do return + {seq_ref}->primary_id() + Args : None. + +=cut + +#' +sub _common_primary_id { + my $self = shift; + return if ($self->{seq_ref} || $self->{qual_ref}); + my $pid = $self->{seq_ref}->primary_id(); + return if (!$pid); + return $pid if ($pid eq $self->{qual_ref}->primary_id()); + # should this become a warning? + # print("primary_ids $pid and $self->{qual_ref}->primary_id() do not match. Bummer.\n"); + +} + +=head2 _common_desc() + + Title : _common_desc() + Usage : $common_desc = $self->_common_desc(); + Function: Compare the desc of {qual_ref} and {seq_ref}. + Returns : Nothing if they don't match. If they do return + {seq_ref}->desc() + Args : None. + +=cut + +#' +sub _common_desc { + my $self = shift; + return if ($self->{seq_ref} || $self->{qual_ref}); + my $des = $self->{seq_ref}->desc(); + return if (!$des); + return $des if ($des eq $self->{qual_ref}->desc()); + # should this become a warning? + # print("descriptions $des and $self->{qual_ref}->desc() do not match. Bummer.\n"); + +} + +=head2 set_common_descriptors() + + Title : set_common_descriptors() + Usage : $self->set_common_descriptors(); + Function: Compare the descriptors (id,accession_number,display_id, + primary_id, desc) for the PrimarySeq and PrimaryQual objects + within the SeqWithQuality object. If they match, make that + descriptor the descriptor for the SeqWithQuality object. + Returns : Nothing. + Args : None. + +=cut + +sub set_common_descriptors { + my $self = shift; + return if ($self->{seq_ref} || $self->{qual_ref}); + &_common_id(); + &_common_display_id(); + &_common_accession_number(); + &_common_primary_id(); + &_common_desc(); +} + +=head2 alphabet() + + Title : alphabet(); + Usage : $molecule_type = $obj->alphabet(); + Function: Get the molecule type from the PrimarySeq object. + Returns : What what PrimarySeq says the type of the sequence is. + Args : None. + +=cut + +sub alphabet { + my $self = shift; + return $self->{seq_ref}->alphabet(); +} + +=head2 display_id() + + Title : display_id() + Usage : $id_string = $obj->display_id(); + Function: Returns the display id, aka the common name of the Quality + object. + The semantics of this is that it is the most likely string to be + used as an identifier of the quality sequence, and likely to have + "human" readability. The id is equivalent to the ID field of the + GenBank/EMBL databanks and the id field of the Swissprot/sptrembl + database. In fasta format, the >(\S+) is presumed to be the id, + though some people overload the id to embed other information. + Bioperl does not use any embedded information in the ID field, + and people are encouraged to use other mechanisms (accession + field for example, or extending the sequence object) to solve + this. Notice that $seq->id() maps to this function, mainly for + legacy/convience issues. + This method sets the display_id for the SeqWithQuality object. + Returns : A string + Args : If a scalar is provided, it is set as the new display_id for + the SeqWithQuality object. + Status : Virtual + +=cut + +sub display_id { + my ($obj,$value) = @_; + if( defined $value) { + $obj->{'display_id'} = $value; + } + return $obj->{'display_id'}; + +} + +=head2 accession_number() + + Title : accession_number() + Usage : $unique_biological_key = $obj->accession_number(); + Function: Returns the unique biological id for a sequence, commonly + called the accession_number. For sequences from established + databases, the implementors should try to use the correct + accession number. Notice that primary_id() provides the unique id + for the implemetation, allowing multiple objects to have the same + accession number in a particular implementation. For sequences + with no accession number, this method should return "unknown". + This method sets the accession_number for the SeqWithQuality + object. + Returns : A string (the value of accession_number) + Args : If a scalar is provided, it is set as the new accession_number + for the SeqWithQuality object. + Status : Virtual + + +=cut + +sub accession_number { + my( $obj, $acc ) = @_; + + if (defined $acc) { + $obj->{'accession_number'} = $acc; + } else { + $acc = $obj->{'accession_number'}; + $acc = 'unknown' unless defined $acc; + } + return $acc; +} + +=head2 primary_id() + + Title : primary_id() + Usage : $unique_implementation_key = $obj->primary_id(); + Function: Returns the unique id for this object in this implementation. + This allows implementations to manage their own object ids in a + way the implementaiton can control clients can expect one id to + map to one object. For sequences with no accession number, this + method should return a stringified memory location. + This method sets the primary_id for the SeqWithQuality + object. + Returns : A string. (the value of primary_id) + Args : If a scalar is provided, it is set as the new primary_id for + the SeqWithQuality object. + +=cut + +sub primary_id { + my ($obj,$value) = @_; + if ($value) { + $obj->{'primary_id'} = $value; + } + return $obj->{'primary_id'}; + +} + +=head2 desc() + + Title : desc() + Usage : $qual->desc($newval); _or_ + $description = $qual->desc(); + Function: Get/set description text for this SeqWithQuality object. + Returns : A string. (the value of desc) + Args : If a scalar is provided, it is set as the new desc for the + SeqWithQuality object. + +=cut + +sub desc { + # a mechanism to set the disc for the SeqWithQuality object. + # probably will be used most often by set_common_features() + my ($obj,$value) = @_; + if( defined $value) { + $obj->{'desc'} = $value; + } + return $obj->{'desc'}; +} + +=head2 id() + + Title : id() + Usage : $id = $qual->id(); + Function: Return the ID of the quality. This should normally be (and + actually is in the implementation provided here) just a synonym + for display_id(). + Returns : A string. (the value of id) + Args : If a scalar is provided, it is set as the new id for the + SeqWithQuality object. + +=cut + +sub id { + my ($self,$value) = @_; + if (!$self) { $self->throw("no value for self in $value"); } + if( defined $value ) { + return $self->display_id($value); + } + return $self->display_id(); +} + +=head2 seq + + Title : seq() + Usage : $string = $obj->seq(); _or_ + $obj->seq("atctatcatca"); + Function: Returns the sequence that is contained in the imbedded in the + PrimarySeq object within the SeqWithQuality object + Returns : A scalar (the seq() value for the imbedded PrimarySeq object.) + Args : If a scalar is provided, the SeqWithQuality object will + attempt to set that as the sequence for the imbedded PrimarySeq + object. Otherwise, the value of seq() for the PrimarySeq object + is returned. + Notes : This is probably not a good idea because you then should call + length() to make sure that the sequence and quality are of the + same length. Even then, how can you make sure that this sequence + belongs with that quality? I provided this to give you rope to + hang yourself with. Tie it to a strong device and use a good + knot. + +=cut + +sub seq { + my ($self,$value) = @_; + if( defined $value) { + $self->{seq_ref}->seq($value); + $self->length(); + } + return $self->{seq_ref}->seq(); +} + +=head2 qual() + + Title : qual() + Usage : @quality_values = @{$obj->qual()}; _or_ + $obj->qual("10 10 20 40 50"); + Function: Returns the quality as imbedded in the PrimaryQual object + within the SeqWithQuality object. + Returns : A reference to an array containing the quality values in the + PrimaryQual object. + Args : If a scalar is provided, the SeqWithQuality object will + attempt to set that as the quality for the imbedded PrimaryQual + object. Otherwise, the value of qual() for the PrimaryQual + object is returned. + Notes : This is probably not a good idea because you then should call + length() to make sure that the sequence and quality are of the + same length. Even then, how can you make sure that this sequence + belongs with that quality? I provided this to give you a strong + board with which to flagellate yourself. + +=cut + +sub qual { + my ($self,$value) = @_; + + if( defined $value) { + $self->{qual_ref}->qual($value); + # update the lengths + $self->length(); + } + return $self->{qual_ref}->qual(); +} + + + + +=head2 length() + + Title : length() + Usage : $length = $seqWqual->length(); + Function: Get the length of the SeqWithQuality sequence/quality. + Returns : Returns the length of the sequence and quality if they are + both the same. Returns "DIFFERENT" if they differ. + Args : None. + +=cut + +sub length { + my $self = shift; + # what do I return here? Whew. Ambiguity... + ######## + +} + + +=head2 qual_obj + + Title : qual_obj($different_obj) + Usage : $qualobj = $seqWqual->qual_obj(); _or_ + $qualobj = $seqWqual->qual_obj($ref_to_primaryqual_obj); + Function: Get the PrimaryQual object that is imbedded in the + SeqWithQuality object or if a reference to a PrimaryQual object + is provided, set this as the PrimaryQual object imbedded in the + SeqWithQuality object. + Returns : A reference to a Bio::Seq::SeqWithQuality object. + +=cut + +sub qual_obj { + my ($self,$value) = @_; + return $self->{swq}->qual_obj($value); +} + + +=head2 seq_obj + + Title : seq_obj() + Usage : $seqobj = $seqWqual->qual_obj(); _or_ + $seqobj = $seqWqual->seq_obj($ref_to_primary_seq_obj); + Function: Get the PrimarySeq object that is imbedded in the + SeqWithQuality object or if a reference to a PrimarySeq object is + provided, set this as the PrimarySeq object imbedded in the + SeqWithQuality object. + Returns : A reference to a Bio::PrimarySeq object. + +=cut + +sub seq_obj { + my ($self,$value) = @_; + return $self->{swq}->seq_obj($value); +} + +=head2 _set_descriptors + + Title : _set_descriptors() + Usage : $seqWqual->_qual_obj($qual,$seq,$id,$acc,$pid,$desc,$given_id, + $alphabet); + Function: Set the descriptors for the SeqWithQuality object. Try to + match the descriptors in the PrimarySeq object and in the + PrimaryQual object if descriptors were not provided with + construction. + Returns : Nothing. + Args : $qual,$seq,$id,$acc,$pid,$desc,$given_id,$alphabet as found + in the new() method. + Notes : Really only intended to be called by the new() method. If + you want to invoke a similar function try + set_common_descriptors(). + +=cut + + +sub _set_descriptors { + my ($self,$qual,$seq,$id,$acc,$pid,$desc,$given_id,$alphabet) = @_; + $self->{swq}->_seq_descriptors($qual,$seq,$id,$acc,$pid,$desc,$given_id,$alphabet); +} + +=head2 subseq($start,$end) + + Title : subseq($start,$end) + Usage : $subsequence = $obj->subseq($start,$end); + Function: Returns the subseq from start to end, where the first base + is 1 and the number is inclusive, ie 1-2 are the first two + bases of the sequence. + Returns : A string. + Args : Two positions. + +=cut + +sub subseq { + my ($self,@args) = @_; + # does a single value work? + return $self->{swq}->subseq(@args); +} + +=head2 baseat($position) + + Title : baseat($position) + Usage : $base_at_position_6 = $obj->baseat("6"); + Function: Returns a single base at the given position, where the first + base is 1 and the number is inclusive, ie 1-2 are the first two + bases of the sequence. + Returns : A scalar. + Args : A position. + +=cut + +sub baseat { + my ($self,$val) = @_; + return $self->{swq}->subseq($val,$val); +} + +=head2 subqual($start,$end) + + Title : subqual($start,$end) + Usage : @qualities = @{$obj->subqual(10,20); + Function: returns the quality values from $start to $end, where the + first value is 1 and the number is inclusive, ie 1-2 are the + first two bases of the sequence. Start cannot be larger than + end but can be equal. + Returns : A reference to an array. + Args : a start position and an end position + +=cut + +sub subqual { + my ($self,@args) = @_; + return $self->{swq}->subqual(@args); +} + +=head2 qualat($position) + + Title : qualat($position) + Usage : $quality = $obj->qualat(10); + Function: Return the quality value at the given location, where the + first value is 1 and the number is inclusive, ie 1-2 are the + first two bases of the sequence. Start cannot be larger than + end but can be equal. + Returns : A scalar. + Args : A position. + +=cut + +sub qualat { + my ($self,$val) = @_; + return $self->{swq}->qualat($val); +} + +=head2 sub_trace_index($start,$end) + + Title : sub_trace_index($start,$end) + Usage : @trace_indices = @{$obj->sub_trace_index(10,20); + Function: returns the trace index values from $start to $end, where the + first value is 1 and the number is inclusive, ie 1-2 are the + first two bases of the sequence. Start cannot be larger than + end but can be e_trace_index. + Returns : A reference to an array. + Args : a start position and an end position + +=cut + +sub sub_trace_index { + my ($self,$start,$end) = @_; + + if( $start > $end ){ + $self->throw("in sub_trace_index, start [$start] has to be greater than end [$end]"); + } + + if( $start <= 0 || $end > $self->length ) { + $self->throw("You have to have start positive and length less than the total length of sequence [$start:$end] Total ".$self->length.""); + } + + # remove one from start, and then length is end-start + + $start--; + $end--; + my @sub_trace_index_array = @{$self->{trace_indices}}[$start..$end]; + + # return substr $self->seq(), $start, ($end-$start); + return \@sub_trace_index_array; + +} + + + + +=head2 trace_index_at($position) + + Title : trace_index_at($position) + Usage : $trace_index = $obj->trace_index_at(10); + Function: Return the trace_index value at the given location, where the + first value is 1 and the number is inclusive, ie 1-2 are the + first two bases of the sequence. Start cannot be larger than + end but can be etrace_index_. + Returns : A scalar. + Args : A position. + +=cut + +sub trace_index_at { + my ($self,$val) = @_; + my @trace_index_at = @{$self->sub_trace_index($val,$val)}; + if (scalar(@trace_index_at) == 1) { + return $trace_index_at[0]; + } + else { + $self->throw("AAAH! trace_index_at provided more then one quality."); + } +} + + +1;