diff variant_effect_predictor/Bio/Seq/PrimaryQual.pm @ 0:1f6dce3d34e0

Uploaded
author mahtabm
date Thu, 11 Apr 2013 02:01:53 -0400
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/variant_effect_predictor/Bio/Seq/PrimaryQual.pm	Thu Apr 11 02:01:53 2013 -0400
@@ -0,0 +1,459 @@
+# $Id: PrimaryQual.pm,v 1.17 2002/10/22 07:38:40 lapp Exp $
+#
+# bioperl module for Bio::PrimaryQual
+#
+# Cared for by Chad Matsalla <bioinformatics@dieselwurks.com>
+#
+# Copyright Chad Matsalla
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Seq::PrimaryQual - Bioperl lightweight Quality Object
+
+=head1 SYNOPSIS
+
+ use Bio::Seq::PrimaryQual;
+
+ # you can use either a space-delimited string for quality
+
+ my $string_quals = "10 20 30 40 50 40 30 20 10";
+ my $qualobj = Bio::Seq::PrimaryQual->new
+ 	      ( '-qual' => $string_quals,
+ 		'-id'  => 'QualityFragment-12',
+ 		'-accession_number' => 'X78121',
+ 		);
+
+ # _or_ you can use an array of quality values
+
+ my @q2 = split/ /,$string_quals;
+ $qualobj = Bio::Seq::PrimaryQual->new( '-qual' => \@q2,
+       '-primary_id'     =>      'chads primary_id',
+       '-desc'           =>      'chads desc',
+       '-accession_number' => 'chads accession_number',
+      '-id'             =>      'chads id'
+      );
+
+ # to get the quality values out:
+
+ my @quals = @{$qualobj->qual()};
+
+ # to give _new_ quality values
+
+ my $newqualstring = "50 90 1000 20 12 0 0";
+ $qualobj->qual($newqualstring);
+
+
+=head1 DESCRIPTION
+
+This module provides a mechanism for storing quality
+values. Much more useful as part of
+Bio::Seq::SeqWithQuality where these quality values
+are associated with the sequence information.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to one
+of the Bioperl mailing lists.  Your participation is much appreciated.
+
+  bioperl-l@bioperl.org             - General discussion
+  http://bio.perl.org/MailList.html - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via email
+or the web:
+
+  bioperl-bugs@bio.perl.org
+  http://bugzilla.bioperl.org/
+
+=head1 AUTHOR - Chad Matsalla
+
+Email bioinformatics@dieselwurks.com
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods. Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+package Bio::Seq::PrimaryQual;
+use vars qw(@ISA %valid_type);
+use strict;
+
+use Bio::Root::Root;
+use Bio::Seq::QualI;
+
+@ISA = qw(Bio::Root::Root Bio::Seq::QualI);
+
+
+=head2 new()
+
+ Title   : new()
+ Usage   : $qual = Bio::Seq::PrimaryQual->new
+	( -qual => '10 20 30 40 50 50 20 10',
+	  -id  => 'human_id',
+	  -accession_number => 'AL000012',
+	);
+
+ Function: Returns a new Bio::Seq::PrimaryQual object from basic 
+	constructors, being a string _or_ a reference to an array for the
+	sequence and strings for id and accession_number. Note that you
+	can provide an empty quality string.
+ Returns : a new Bio::Seq::PrimaryQual object
+
+=cut
+
+
+
+
+sub new {
+    my ($class, @args) = @_;
+    my $self = $class->SUPER::new(@args);
+
+    # default: turn ON the warnings (duh)
+    my($qual,$id,$acc,$pid,$desc,$given_id) =
+        $self->_rearrange([qw(QUAL
+                              DISPLAY_ID
+                              ACCESSION_NUMBER
+                              PRIMARY_ID
+                              DESC
+                              ID
+                              )],
+                          @args);
+    if( defined $id && defined $given_id ) {
+        if( $id ne $given_id ) {
+            $self->throw("Provided both id and display_id constructor functions. [$id] [$given_id]");   
+        }
+    }
+    if( defined $given_id ) { $id = $given_id; }
+    
+    # note: the sequence string may be empty
+    $self->qual($qual ? $qual : []);
+    $id      && $self->display_id($id);
+    $acc     && $self->accession_number($acc);
+    $pid     && $self->primary_id($pid);
+    $desc    && $self->desc($desc);
+
+    return $self;
+}
+
+=head2 qual()
+
+ Title   : qual()
+ Usage   : @quality_values  = @{$obj->qual()};
+ Function: Returns the quality as a reference to an array containing the
+           quality values. The individual elements of the quality array are
+           not validated and can be any numeric value.
+ Returns : A reference to an array.
+
+=cut
+
+sub qual {
+    my ($self,$value) = @_;
+    
+    if( ! defined $value || length($value) == 0 ) { 
+	$self->{'qual'} ||= [];
+    } elsif( ref($value) =~ /ARRAY/i ) {
+	# if the user passed in a reference to an array
+	$self->{'qual'} = $value;
+    } elsif(! $self->validate_qual($value)){
+	$self->throw("Attempting to set the quality to [$value] which does not look healthy");	    
+    } else {
+	$self->{'qual'} = [split(/\s+/,$value)];
+    }
+    
+    return $self->{'qual'};
+}
+
+=head2 validate_qual($qualstring)
+
+ Title	 : validate_qual($qualstring)
+ Usage	 : print("Valid.") if { &validate_qual($self,$qualities); }
+ Function: Make sure that the quality, if it has length > 0, contains at
+	least one digit. Note that quality strings are parsed into arrays
+	using split/\d+/,$quality_string, so make sure that your quality
+	scalar looks like this if you want it to be parsed properly.
+ Returns : 1 for a valid sequence (WHY? Shouldn\'t it return 0? <boggle>)
+ Args    : a scalar (any scalar, why PrimarySeq author?) and a scalar
+	containing the string to validate.
+
+=cut
+
+sub validate_qual {
+    # how do I validate quality values?
+    # \d+\s+\d+..., I suppose
+    my ($self,$qualstr) = @_;
+    # why the CORE?? -- (Because Bio::PrimarySeqI namespace has a 
+    #                    length method, you have to qualify 
+    #                    which length to use)
+    return 0 if (!defined $qualstr || CORE::length($qualstr) <= 0);   
+    return 1 if( $qualstr =~ /\d/);
+    
+    return 0;
+}
+
+=head2 subqual($start,$end)
+
+ Title   : subqual($start,$end)
+ Usage   : @subset_of_quality_values = @{$obj->subqual(10,40)};
+ Function: returns the quality values from $start to $end, where the
+        first value is 1 and the number is inclusive, ie 1-2 are the
+	first two bases of the sequence. Start cannot be larger than
+	end but can be equal.
+ Returns : A reference to an array.
+ Args    : a start position and an end position
+
+=cut
+
+
+sub subqual {
+   my ($self,$start,$end) = @_;
+
+   if( $start > $end ){
+       $self->throw("in subqual, start [$start] has to be greater than end [$end]");
+   }
+
+   if( $start <= 0 || $end > $self->length ) {
+       $self->throw("You have to have start positive and length less than the total length of sequence [$start:$end] Total ".$self->length."");
+   }
+
+   # remove one from start, and then length is end-start
+
+   $start--;
+	$end--;
+	my @sub_qual_array = @{$self->{qual}}[$start..$end];
+
+ 	#   return substr $self->seq(), $start, ($end-$start);
+	return \@sub_qual_array;
+
+}
+
+=head2 display_id()
+
+ Title   : display_id()
+ Usage   : $id_string = $obj->display_id();
+ Function: returns the display id, aka the common name of the Quality
+        object.
+        The semantics of this is that it is the most likely string to be
+        used as an identifier of the quality sequence, and likely to have
+        "human" readability.  The id is equivalent to the ID field of the
+        GenBank/EMBL databanks and the id field of the Swissprot/sptrembl
+        database. In fasta format, the >(\S+) is presumed to be the id,
+        though some people overload the id to embed other information.
+        Bioperl does not use any embedded information in the ID field,
+        and people are encouraged to use other mechanisms (accession
+	field for example, or extending the sequence object) to solve
+	this. Notice that $seq->id() maps to this function, mainly for
+        legacy/convience issues
+ Returns : A string
+ Args    : None
+
+=cut
+
+sub display_id {
+   my ($obj,$value) = @_;
+   if( defined $value) {
+      $obj->{'display_id'} = $value;
+    }
+    return $obj->{'display_id'};
+
+}
+
+=head2 accession_number()
+
+ Title   : accession_number()
+ Usage   : $unique_biological_key = $obj->accession_number();
+ Function: Returns the unique biological id for a sequence, commonly
+        called the accession_number. For sequences from established
+        databases, the implementors should try to use the correct
+        accession number. Notice that primary_id() provides the unique id
+        for the implemetation, allowing multiple objects to have the same
+        accession number in a particular implementation. For sequences
+        with no accession number, this method should return "unknown".
+ Returns : A string
+ Args    : None
+
+=cut
+
+sub accession_number {
+    my( $obj, $acc ) = @_;
+
+    if (defined $acc) {
+        $obj->{'accession_number'} = $acc;
+    } else {
+        $acc = $obj->{'accession_number'};
+        $acc = 'unknown' unless defined $acc;
+    }
+    return $acc;
+}
+
+=head2 primary_id()
+
+ Title   : primary_id()
+ Usage   : $unique_implementation_key = $obj->primary_id();
+ Function: Returns the unique id for this object in this implementation.
+        This allows implementations to manage their own object ids in a
+        way the implementaiton can control clients can expect one id to
+        map to one object. For sequences with no accession number, this
+        method should return a stringified memory location.
+ Returns : A string
+ Args    : None
+
+=cut
+
+sub primary_id {
+   my ($obj,$value) = @_;
+   if( defined $value) {
+      $obj->{'primary_id'} = $value;
+    }
+   return $obj->{'primary_id'};
+
+}
+
+=head2 desc()
+
+ Title   : desc()
+ Usage   : $qual->desc($newval);
+           $description = $qual->desc();
+ Function: Get/set description text for a qual object
+ Example :
+ Returns : Value of desc
+ Args    : newvalue (optional)
+
+=cut
+
+sub desc {
+   my ($obj,$value) = @_;
+   if( defined $value) {
+      $obj->{'desc'} = $value;
+    }
+    return $obj->{'desc'};
+
+}
+
+=head2 id()
+
+ Title   : id()
+ Usage   : $id = $qual->id();
+ Function: Return the ID of the quality. This should normally be (and
+	actually is in the implementation provided here) just a synonym
+	for display_id().
+ Returns : A string.
+ Args    : None.
+
+=cut
+
+sub id {
+   my ($self,$value) = @_;
+   if( defined $value ) {
+        return $self->display_id($value);
+   }
+   return $self->display_id();
+}
+
+=head2 length()
+
+ Title	 : length()
+ Usage	 : $length = $qual->length();
+ Function: Return the length of the array holding the quality values.
+	Under most circumstances, this should match the number of quality
+	values but no validation is done when the PrimaryQual object is
+	constructed and non-digits could be put into this array. Is this
+	a bug? Just enough rope...
+ Returns : A scalar (the number of elements in the quality array).
+ Args    : None.
+
+=cut
+
+sub length {
+    my $self = shift;
+    if (ref($self->{qual}) ne "ARRAY") {
+	$self->warn("{qual} is not an array here. Why? It appears to be ".ref($self->{qual})."(".$self->{qual}."). Good thing this can _never_ happen.");
+    }
+    return scalar(@{$self->{qual}});
+}
+
+=head2 qualat($position)
+
+ Title   : qualat($position)
+ Usage   : $quality = $obj->qualat(10);
+ Function: Return the quality value at the given location, where the
+        first value is 1 and the number is inclusive, ie 1-2 are the first
+        two bases of the sequence. Start cannot be larger than end but can
+        be equal.
+ Returns : A scalar.
+ Args    : A position.
+
+=cut
+
+sub qualat {
+    my ($self,$val) = @_;
+    my @qualat = @{$self->subqual($val,$val)};
+    if (scalar(@qualat) == 1) {
+	return $qualat[0];
+    }
+    else {
+	$self->throw("AAAH! qualat provided more then one quality.");
+    }
+} 
+
+=head2 to_string()
+
+ Title   : to_string()
+ Usage   : $quality = $obj->to_string();
+ Function: Return a textual representation of what the object contains.
+	For this module, this function will return:
+		qual
+		display_id
+		accession_number
+		primary_id
+		desc
+		id
+		length
+ Returns : A scalar.
+ Args    : None.
+
+=cut
+
+sub to_string {
+    my ($self,$out,$result) = shift;
+    $out = "qual: ".join(',',@{$self->qual()});
+    foreach (qw(display_id accession_number primary_id desc id)) {
+	$result = $self->$_();
+	if (!$result) { $result = "<unset>"; }
+	$out .= "$_: $result\n";
+    }
+    return $out;
+}
+
+
+sub to_string_automatic {
+    my ($self,$sub_result,$out) = shift;
+    foreach (sort keys %$self) {
+	print("Working on $_\n");
+	eval { $self->$_(); };
+	if ($@) { $sub_result = ref($_); }
+	elsif (!($sub_result = $self->$_())) {
+	    $sub_result = "<unset>";
+	}
+	if (ref($sub_result) eq "ARRAY") {
+	    print("This thing ($_) is an array!\n");
+	    $sub_result = join(',',@$sub_result);	
+	}
+	$out .= "$_: ".$sub_result."\n";
+    }
+    return $out;
+} 
+
+1;