Mercurial > repos > mahtabm > ensembl
diff variant_effect_predictor/Bio/Tools/Primer3.pm @ 0:1f6dce3d34e0
Uploaded
author | mahtabm |
---|---|
date | Thu, 11 Apr 2013 02:01:53 -0400 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/variant_effect_predictor/Bio/Tools/Primer3.pm Thu Apr 11 02:01:53 2013 -0400 @@ -0,0 +1,566 @@ +# +# Copyright (c) 1997-2001 bioperl, Chad Matsalla. All Rights Reserved. +# This module is free software; you can redistribute it and/or +# modify it under the same terms as Perl itself. +# +# Copyright Chad Matsalla +# +# You may distribute this module under the same terms as perl itself +# POD documentation - main docs before the code + +=head1 NAME + +Bio::Tools::Primer3 - Create input for and work with the output from the +program primer3 + +=head1 SYNOPSIS + +Chad will put synopses here by the end of the second week of october, 2002. + +=head1 DESCRIPTION + +Bio::Tools::Primer3 creates the input files needed to design primers using +primer3 and provides mechanisms to access data in the primer3 output files. + +=head1 FEEDBACK + +=head2 Mailing Lists + +User feedback is an integral part of the evolution of this and other +Bioperl modules. Send your comments and suggestions preferably to one +of the Bioperl mailing lists. Your participation is much appreciated. + + bioperl-l@bioperl.org - General discussion + http://www.bioperl.org/MailList.html - About the mailing lists + +=head2 Reporting Bugs + +Report bugs to the Bioperl bug tracking system to help us keep track +the bugs and their resolution. Bug reports can be submitted via email +or the web: + + bioperl-bugs@bio.perl.org + http://bugzilla.bioperl.org/ + + +=head1 AUTHOR - Chad Matsalla + +bioinformatics1@dieselwurks.com + +=head1 APPENDIX + +The rest of the documentation details each of the object methods. +Internal methods are usually preceded with a _ + +=cut + +# Let the code begin... + +package Bio::Tools::Primer3; + +use vars qw(@ISA); +use strict; +use Bio::Seq; +use Bio::SeqFeature::Primer; +use Bio::Seq::PrimedSeq; +use Bio::Seq::SeqFactory; + +use Bio::Root::Root; +use Bio::Root::IO; + +use Dumpvalue; + +@ISA = qw(Bio::Root::Root Bio::Root::IO); + + # Chad likes to use this to debug large hashes. +my $dumper = new Dumpvalue; + + # this was a bunch of the seqio things, now deprecated. delete it soon. + # sub _initialize { + # my($self,@args) = @_; + # $self->SUPER::_initialize(@args); + # if( ! defined $self->sequence_factory ) { + # $self->sequence_factory(new Bio::Seq::SeqFactory + # (-verbose => $self->verbose(), + # -type => 'Bio::Seq')); + # } + # } + + +=head2 new() + + Title : new() + Usage : + Function: + Returns : + Args : + Notes : + +=cut + +sub new { + my($class,@args) = @_; + my $self = $class->SUPER::new(@args); + my($filename) = $self->_rearrange([qw(FILE)],@args); + if (!$filename) { + print("Ahh grasshopper, you are planning to create a primer3 infile\n"); + return $self; + } + $self->{filename} = $filename; + # check to see that the file exists + # I think that catfile should be used here. + if (!-f $filename) { + print("That file doesn't exist. Bah.\n"); + } + $self->_initialize_io( -file => $filename ); + return $self; +} + + + + +=head2 null + + Title : + Usage : + Function: + Returns : + Args : + Notes : + +=cut + + + + + + + +=head2 next_primer() + + Title : next_primer() + Usage : $primer3 = $stream->next_primer() + Function: returns the next primer in the stream + Returns : Bio::Seq::PrimedSeq containing: + - 2 Bio::SeqFeature::Primer representing the primers + - 1 Bio::Seq representing the target sequence + - 1 Bio::Seq representing the amplified region + Args : NONE + Notes : + +=cut + +sub next_primer { + my $self = shift; + my $fh = $self->_fh(); + my ($line,%primer); + # first, read in the next set of primers + while ($line = $self->_readline()) { + chomp ($line); + last if ($line =~ /^=/); + $line =~ m/(^.*)\=(.*$)/; + $primer{$1} = $2; + } + # then, get the primers as SeqFeature::Primer objects + + my ($left,$right) = &_create_primer_features(\%primer); + # then, create the sequence to place them on + my $sequence = Bio::Seq->new(-seq => $primer{SEQUENCE}, + -id => $primer{PRIMER_SEQUENCE_ID}); + # print("Sequence is ".$primer{SEQUENCE}." and id is ".$primer{PRIMER_SEQUENCE_ID}."\n"); + my $primedseq = new Bio::Seq::PrimedSeq( + -target_sequence => $sequence, + -left_primer => $left, + -right_primer => $right, + -primer_sequence_id => $primer{PRIMER_SEQUENCE_ID}, + -primer_comment => $primer{PRIMER_COMMENT}, + -target => $primer{TARGET}, + -primer_product_size_range => $primer{PRIMER_PRODUCT_SIZE_RANGE}, + -primer_file_flag => $primer{PRIMER_FILE_FLAG}, + -primer_liberal_base => $primer{PRIMER_LIBERAL_BASE}, + -primer_num_return => $primer{PRIMER_NUM_RETURN}, + -primer_first_base_index => $primer{PRIMER_FIRST_BASE_INDEX}, + -primer_explain_flag => $primer{PRIMER_EXPLAIN_FLAG}, + -primer_pair_compl_any => $primer{PRIMER_PAIR_COMPL_ANY}, + -primer_pair_compl_end => $primer{PRIMER_PAIR_COMPL_END}, + -primer_product_size => $primer{PRIMER_PRODUCT_SIZE} + ); + return $primedseq; +} + + +=head2 _create_primer_features() + + Title : _create_primer_features() + Usage : &_create_primer_features() + Function: This is an internal method used by next_seq() to create the + Bio::SeqFeature::Primer objects necessary to represent the primers + themselves. + Returns : An array of 2 Bio::SeqFeature::Primer objects. + Args : None. + Notes : This is an internal method. Do not call this method. + +=cut + + +sub _create_primer_features { + my $rdat = shift; + my (%left,%right,$updir,$downdir,$var,$trunc); + my @variables = qw( + PRIMER_DIRECTION + PRIMER_DIRECTION_END_STABILITY + PRIMER_DIRECTION_EXPLAIN + PRIMER_DIRECTION_GC_PERCENT + PRIMER_DIRECTION_PENALTY + PRIMER_DIRECTION_SELF_ANY + PRIMER_DIRECTION_SELF_END + PRIMER_DIRECTION_SEQUENCE + PRIMER_DIRECTION_TM + PRIMER_FIRST_BASE_INDEX + ); + # create the hash to pass into the creation routine + # I do it this way because the primer3 outfile variables are exactly the same for each of + # left and right. I create two hashes- one for the left and one for the right primer. + foreach $updir (qw(LEFT RIGHT)) { + my %dat; + foreach (@variables) { + ($var = $_) =~ s/DIRECTION/$updir/e; + # should you truncate the name of each variable? + # for example, should the value be: PRIMER_RIGHT_PENALTY or PENALTY? + # i think it should be the second one + if (/^PRIMER_DIRECTION$/) { + $trunc = "PRIMER"; + } + elsif (/^PRIMER_FIRST_BASE_INDEX/) { + $trunc = "FIRST_BASE_INDEX"; + } + else { + ($trunc = $_) =~ s/PRIMER_DIRECTION_//; + } + $dat{"-$trunc"} = $rdat->{$var}; + } + if ($updir eq "LEFT") { + %left = %dat; + $left{-id} = $rdat->{PRIMER_SEQUENCE_ID}."-left"; + } + else { + %right = %dat; + $right{-id} = $rdat->{PRIMER_SEQUENCE_ID}."-right"; + } + } + my $primer_left = new Bio::SeqFeature::Primer(%left); + my $primer_right = new Bio::SeqFeature::Primer(%right); + return($primer_left,$primer_right); +} + + + + + + + + + +=head2 get_amplified_region() + + Title : get_amplified_region() + Usage : $primer->get_amplified_region() + Function: Returns a Bio::Seq object representing the sequence amplified + Returns : (I think) A Bio::Seq object + Args : None. + Notes : This is not implemented at this time. + Note to chad: implement this simple getter. + Developer notes: There obviously isn't a way for a single primer to know about + its amplified region unless it is paired with another primer. At this time + these object will generally be created with another so I will put in this + method. If there is no sequence null is returned. + + THIS DOES NOT BELONG HERE. Put this into something else. + + +=cut + +sub get_amplified_region { + my ($self) = @_; +} # end get_amplified_region + +=head2 get_amplification_error() + + Title : get_amplification_error() + Usage : + Function: + Returns : + Args : + Notes : +Developer Notes: + THIS DOES NOT BELONG HERE. Put this into something else. + +=cut + +sub get_amplification_error { + my $primer = $_[1]; + my $error = $Primer3::primers{$primer}{PRIMER_ERROR}; + if ($error) { return $error; } + else { return "Some error that primer3 didn't define.\n"; } +} + +=head2 _set_target() + + Title : _set_target() + Usage : &_set_target($self); + Function: + Returns : + Args : + Notes : +Developer Notes: Really I have no idea why I put this in here. + It can is referenced by new_deprecated and by run_primer3 + + +=cut + +sub _set_target { + my $self = shift; + my ($sequence,$primer,$primer_left,$primer_right,$position_left,$position_right,$boggle); + $boggle = 1; + foreach $primer (sort keys %{$self->{primers}}) { + $sequence = $self->{primers}{$primer}{SEQUENCE}; + $primer_left = $self->{primers}{$primer}{PRIMER_LEFT}; + $primer_right = $self->{primers}{$primer}{PRIMER_RIGHT}; + if (!$primer_left) { + $self->{primers}{$primer}{design_failed} = "1"; + } + else { + $primer_left =~ m/(.*)\,(.*)/; + $position_left = $1+$2-1; + $primer_right =~ m/(.*)\,(.*)/; + $position_right = $1-$2; + $self->{primers}{$primer}{left} = $position_left; + $self->{primers}{$primer}{right} = $position_right; + $self->{primers}{$primer}{amplified} = substr($sequence,$position_left,$position_right-$position_left); + } + } +} + +=head2 _read_file($self,$filename) + + Title : _read_file($self,$filename) + Usage : + Function: + Returns : A scalar containing the contents of $filename + Args : $self and the name of a file to parse. + Notes : +Developer notes: Honestly, I have no idea what this is for. + + +=cut + +sub _read_file { + # my ($self,$filename) = @_; + # set this to keep track of things.... + # $self->{outfilename} = $filename; + # to make this better for bioperl, chad should really be using catfile and things. + # + # my $fh = new FileHandle; + # open($fh,$filename) or die "I can't open the primer report ($filename) : $!\n"; + # # _parse_report(); + # # my %Primer3::primers; + # my ($output,$line); + # while ($line=<$fh>) { + # # print("Adding $line\n"); + # $output .= $line; + # } # end while + # # print("\$output is $output\n"); + # return $output; +} + + + + + +=head2 _parse_report() + + Title : _parse_report() + Usage : &_parse_report($self,$filename); + Function: Parse a primer3 outfile and place everything into an object under + {primers} with PRIMER_SEQUENCE_ID being the name of the keys for the + {primers} hash. + Returns : Nothing. + Args : $self and the name of a file to parse. + Notes : + +=cut + +sub _parse_report { + # old + # my ($self,$filename) = @_; + my ($self,$outputs) = @_; + # print("\$self is $self, \$outputs are $outputs\n"); + # print("in _parse_report, \$self is $self\n"); + # set this to keep track of things.... + my ($sequence_name,$line,$counter,$variable_name,$variable_value); + my @output = split/\n/,$outputs; + foreach $line (@output) { + # print("Reading line $line\n"); + next if ($line =~ /^\=/); + if ($line =~ m/^PRIMER_SEQUENCE_ID/) { + $line =~ m/(\S+)=(.*$)/; + $variable_name = $1; + $sequence_name = $2; + $variable_value = $2; + } + else { + $line =~ m/(\S+)=(.*$)/; + $variable_name = $1; + $variable_value = $2; + } + # print("$sequence_name\t$variable_name\t$variable_value\n"); + $self->{primers}{$sequence_name}{$variable_name} = $variable_value; + } # end while <> +} # end parse_report + +=head2 _construct_empty() + + Title : _construct_empty() + Usage : &_construct_empty($self); + Function: Construct an empty object that will be used to construct a primer3 + input "file" so that it can be run. + Returns : + Args : + Notes : + +=cut + +sub _construct_empty { + my $self = shift; + $self->{inputs} = {}; + return; +} + +=head2 add_target(%stuff) + + Title : add_target(%stuff) + Usage : $o_primer->add_target(%stuff); + Function: Add an target to the infile constructor. + Returns : + Args : A hash. Looks something like this: + $o_primer2->add_target( + -PRIMER_SEQUENCE_ID => "sN11902", + -PRIMER_COMMENT => "3831", + -SEQUENCE => "some_sequence", + -TARGET => "513,26", + -PRIMER_PRODUCT_SIZE_RANGE => "100-500", + -PRIMER_FILE_FLAG => "0", + -PRIMER_LIBERAL_BASE => "1", + -PRIMER_NUM_RETURN => "1", + -PRIMER_FIRST_BASE_INDEX => "1", + -PRIMER_EXPLAIN_FLAG => "1"); + The add_target() method does not validate the things you put into + this parameter hash. Read the docs for Primer3 to see which fields + do what and how they should be used. + Notes : To design primers, first create a new CSM::Primer3 object with the + -construct_infile parameter. Then, add targets using this method + (add_target()) with the target hash as above in the Args: section. + Be careful. No validation will be done here. All of those parameters + will be fed straight into primer3. + Once you are done adding targets, invoke the function run_primer3(). + Then retrieve the results using something like a loop around the array + from get_primer_sequence_IDs(); + +=cut + + +sub add_target { + my ($self,%args) = @_; + my ($currkey,$renamed,$sequence_id,$value); + if (!$args{-PRIMER_SEQUENCE_ID}) { + print("You cannot add an element to the primer3 infile without specifying the PRIMER_SEQUENCE_ID. Sorry.\n"); + } + else { + $sequence_id = $args{-PRIMER_SEQUENCE_ID}; + foreach $currkey (keys %args) { + # print("\$currkey is $currkey\n"); + next if ($currkey eq "-PRIMER_SEQUENCE_ID"); + ($renamed = $currkey) =~ s/-//; + # print("Adding $renamed to the hash under $sequence_id\n"); + $value = $args{$currkey}; + # print("\$value is $value\n"); + if ($renamed eq "SEQUENCE") { $value =~ s/\n//g; } + $self->{infile}{$sequence_id}{$renamed} = $value; + } + } +} + +=head2 get_primer_sequence_IDs() + + Title : get_primer_sequence_IDs() + Usage : $o_phred->get_primer_sequence_IDs(); + Function: Return the primer sequence ID's. These normally correspond to + the name of a sequence in a database but can be whatever was used when + the primer3 infile was constructed. + Returns : An array containing the names of the primer sequence ID's + Args : None. + Notes : This would be used as the basis for an iterator to loop around each + primer that was designed. + +=cut + +sub get_primer_sequence_IDs { + my $self = shift; + return sort keys %{$self->{primers}}; +} # end get keys + +=head2 dump_hash() + + Title : dump_hash() + Usage : $o_primer->dump_hash(); + Function: Dump out the CSM::Primer3 object. + Returns : Nothing. + Args : None. + Notes : Used extensively in debugging. + +=cut + +sub dump_hash { + my $self = shift; + my $dumper = new Dumpvalue; + $dumper->dumpValue($self); +} # end dump_hash + +=head2 dump_infile_hash() + + Title : dump_infile_hash() + Usage : $o_primer->dump_infile_hash(); + Function: Dump out the contents of the infile hash. + Returns : Nothing. + Args : None. + Notes : Used for debugging the construction of the infile. + +=cut + +sub dump_infile_hash { + my $self = shift; + my $dumper = new Dumpvalue; + $dumper->dumpValue($self->{infile}); +} + + + +1; +__END__ + +=head2 placeholder + + Title : This is a place holder so chad can cut and paste + Usage : + Function: + Returns : + Args : + Notes : + +=cut + +=head1 SEE ALSO + +perl(1). + +=cut