comparison variant_effect_predictor/Bio/Tools/Primer3.pm @ 0:1f6dce3d34e0

Uploaded
author mahtabm
date Thu, 11 Apr 2013 02:01:53 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:1f6dce3d34e0
1 #
2 # Copyright (c) 1997-2001 bioperl, Chad Matsalla. All Rights Reserved.
3 # This module is free software; you can redistribute it and/or
4 # modify it under the same terms as Perl itself.
5 #
6 # Copyright Chad Matsalla
7 #
8 # You may distribute this module under the same terms as perl itself
9 # POD documentation - main docs before the code
10
11 =head1 NAME
12
13 Bio::Tools::Primer3 - Create input for and work with the output from the
14 program primer3
15
16 =head1 SYNOPSIS
17
18 Chad will put synopses here by the end of the second week of october, 2002.
19
20 =head1 DESCRIPTION
21
22 Bio::Tools::Primer3 creates the input files needed to design primers using
23 primer3 and provides mechanisms to access data in the primer3 output files.
24
25 =head1 FEEDBACK
26
27 =head2 Mailing Lists
28
29 User feedback is an integral part of the evolution of this and other
30 Bioperl modules. Send your comments and suggestions preferably to one
31 of the Bioperl mailing lists. Your participation is much appreciated.
32
33 bioperl-l@bioperl.org - General discussion
34 http://www.bioperl.org/MailList.html - About the mailing lists
35
36 =head2 Reporting Bugs
37
38 Report bugs to the Bioperl bug tracking system to help us keep track
39 the bugs and their resolution. Bug reports can be submitted via email
40 or the web:
41
42 bioperl-bugs@bio.perl.org
43 http://bugzilla.bioperl.org/
44
45
46 =head1 AUTHOR - Chad Matsalla
47
48 bioinformatics1@dieselwurks.com
49
50 =head1 APPENDIX
51
52 The rest of the documentation details each of the object methods.
53 Internal methods are usually preceded with a _
54
55 =cut
56
57 # Let the code begin...
58
59 package Bio::Tools::Primer3;
60
61 use vars qw(@ISA);
62 use strict;
63 use Bio::Seq;
64 use Bio::SeqFeature::Primer;
65 use Bio::Seq::PrimedSeq;
66 use Bio::Seq::SeqFactory;
67
68 use Bio::Root::Root;
69 use Bio::Root::IO;
70
71 use Dumpvalue;
72
73 @ISA = qw(Bio::Root::Root Bio::Root::IO);
74
75 # Chad likes to use this to debug large hashes.
76 my $dumper = new Dumpvalue;
77
78 # this was a bunch of the seqio things, now deprecated. delete it soon.
79 # sub _initialize {
80 # my($self,@args) = @_;
81 # $self->SUPER::_initialize(@args);
82 # if( ! defined $self->sequence_factory ) {
83 # $self->sequence_factory(new Bio::Seq::SeqFactory
84 # (-verbose => $self->verbose(),
85 # -type => 'Bio::Seq'));
86 # }
87 # }
88
89
90 =head2 new()
91
92 Title : new()
93 Usage :
94 Function:
95 Returns :
96 Args :
97 Notes :
98
99 =cut
100
101 sub new {
102 my($class,@args) = @_;
103 my $self = $class->SUPER::new(@args);
104 my($filename) = $self->_rearrange([qw(FILE)],@args);
105 if (!$filename) {
106 print("Ahh grasshopper, you are planning to create a primer3 infile\n");
107 return $self;
108 }
109 $self->{filename} = $filename;
110 # check to see that the file exists
111 # I think that catfile should be used here.
112 if (!-f $filename) {
113 print("That file doesn't exist. Bah.\n");
114 }
115 $self->_initialize_io( -file => $filename );
116 return $self;
117 }
118
119
120
121
122 =head2 null
123
124 Title :
125 Usage :
126 Function:
127 Returns :
128 Args :
129 Notes :
130
131 =cut
132
133
134
135
136
137
138
139 =head2 next_primer()
140
141 Title : next_primer()
142 Usage : $primer3 = $stream->next_primer()
143 Function: returns the next primer in the stream
144 Returns : Bio::Seq::PrimedSeq containing:
145 - 2 Bio::SeqFeature::Primer representing the primers
146 - 1 Bio::Seq representing the target sequence
147 - 1 Bio::Seq representing the amplified region
148 Args : NONE
149 Notes :
150
151 =cut
152
153 sub next_primer {
154 my $self = shift;
155 my $fh = $self->_fh();
156 my ($line,%primer);
157 # first, read in the next set of primers
158 while ($line = $self->_readline()) {
159 chomp ($line);
160 last if ($line =~ /^=/);
161 $line =~ m/(^.*)\=(.*$)/;
162 $primer{$1} = $2;
163 }
164 # then, get the primers as SeqFeature::Primer objects
165
166 my ($left,$right) = &_create_primer_features(\%primer);
167 # then, create the sequence to place them on
168 my $sequence = Bio::Seq->new(-seq => $primer{SEQUENCE},
169 -id => $primer{PRIMER_SEQUENCE_ID});
170 # print("Sequence is ".$primer{SEQUENCE}." and id is ".$primer{PRIMER_SEQUENCE_ID}."\n");
171 my $primedseq = new Bio::Seq::PrimedSeq(
172 -target_sequence => $sequence,
173 -left_primer => $left,
174 -right_primer => $right,
175 -primer_sequence_id => $primer{PRIMER_SEQUENCE_ID},
176 -primer_comment => $primer{PRIMER_COMMENT},
177 -target => $primer{TARGET},
178 -primer_product_size_range => $primer{PRIMER_PRODUCT_SIZE_RANGE},
179 -primer_file_flag => $primer{PRIMER_FILE_FLAG},
180 -primer_liberal_base => $primer{PRIMER_LIBERAL_BASE},
181 -primer_num_return => $primer{PRIMER_NUM_RETURN},
182 -primer_first_base_index => $primer{PRIMER_FIRST_BASE_INDEX},
183 -primer_explain_flag => $primer{PRIMER_EXPLAIN_FLAG},
184 -primer_pair_compl_any => $primer{PRIMER_PAIR_COMPL_ANY},
185 -primer_pair_compl_end => $primer{PRIMER_PAIR_COMPL_END},
186 -primer_product_size => $primer{PRIMER_PRODUCT_SIZE}
187 );
188 return $primedseq;
189 }
190
191
192 =head2 _create_primer_features()
193
194 Title : _create_primer_features()
195 Usage : &_create_primer_features()
196 Function: This is an internal method used by next_seq() to create the
197 Bio::SeqFeature::Primer objects necessary to represent the primers
198 themselves.
199 Returns : An array of 2 Bio::SeqFeature::Primer objects.
200 Args : None.
201 Notes : This is an internal method. Do not call this method.
202
203 =cut
204
205
206 sub _create_primer_features {
207 my $rdat = shift;
208 my (%left,%right,$updir,$downdir,$var,$trunc);
209 my @variables = qw(
210 PRIMER_DIRECTION
211 PRIMER_DIRECTION_END_STABILITY
212 PRIMER_DIRECTION_EXPLAIN
213 PRIMER_DIRECTION_GC_PERCENT
214 PRIMER_DIRECTION_PENALTY
215 PRIMER_DIRECTION_SELF_ANY
216 PRIMER_DIRECTION_SELF_END
217 PRIMER_DIRECTION_SEQUENCE
218 PRIMER_DIRECTION_TM
219 PRIMER_FIRST_BASE_INDEX
220 );
221 # create the hash to pass into the creation routine
222 # I do it this way because the primer3 outfile variables are exactly the same for each of
223 # left and right. I create two hashes- one for the left and one for the right primer.
224 foreach $updir (qw(LEFT RIGHT)) {
225 my %dat;
226 foreach (@variables) {
227 ($var = $_) =~ s/DIRECTION/$updir/e;
228 # should you truncate the name of each variable?
229 # for example, should the value be: PRIMER_RIGHT_PENALTY or PENALTY?
230 # i think it should be the second one
231 if (/^PRIMER_DIRECTION$/) {
232 $trunc = "PRIMER";
233 }
234 elsif (/^PRIMER_FIRST_BASE_INDEX/) {
235 $trunc = "FIRST_BASE_INDEX";
236 }
237 else {
238 ($trunc = $_) =~ s/PRIMER_DIRECTION_//;
239 }
240 $dat{"-$trunc"} = $rdat->{$var};
241 }
242 if ($updir eq "LEFT") {
243 %left = %dat;
244 $left{-id} = $rdat->{PRIMER_SEQUENCE_ID}."-left";
245 }
246 else {
247 %right = %dat;
248 $right{-id} = $rdat->{PRIMER_SEQUENCE_ID}."-right";
249 }
250 }
251 my $primer_left = new Bio::SeqFeature::Primer(%left);
252 my $primer_right = new Bio::SeqFeature::Primer(%right);
253 return($primer_left,$primer_right);
254 }
255
256
257
258
259
260
261
262
263
264 =head2 get_amplified_region()
265
266 Title : get_amplified_region()
267 Usage : $primer->get_amplified_region()
268 Function: Returns a Bio::Seq object representing the sequence amplified
269 Returns : (I think) A Bio::Seq object
270 Args : None.
271 Notes : This is not implemented at this time.
272 Note to chad: implement this simple getter.
273 Developer notes: There obviously isn't a way for a single primer to know about
274 its amplified region unless it is paired with another primer. At this time
275 these object will generally be created with another so I will put in this
276 method. If there is no sequence null is returned.
277
278 THIS DOES NOT BELONG HERE. Put this into something else.
279
280
281 =cut
282
283 sub get_amplified_region {
284 my ($self) = @_;
285 } # end get_amplified_region
286
287 =head2 get_amplification_error()
288
289 Title : get_amplification_error()
290 Usage :
291 Function:
292 Returns :
293 Args :
294 Notes :
295 Developer Notes:
296 THIS DOES NOT BELONG HERE. Put this into something else.
297
298 =cut
299
300 sub get_amplification_error {
301 my $primer = $_[1];
302 my $error = $Primer3::primers{$primer}{PRIMER_ERROR};
303 if ($error) { return $error; }
304 else { return "Some error that primer3 didn't define.\n"; }
305 }
306
307 =head2 _set_target()
308
309 Title : _set_target()
310 Usage : &_set_target($self);
311 Function:
312 Returns :
313 Args :
314 Notes :
315 Developer Notes: Really I have no idea why I put this in here.
316 It can is referenced by new_deprecated and by run_primer3
317
318
319 =cut
320
321 sub _set_target {
322 my $self = shift;
323 my ($sequence,$primer,$primer_left,$primer_right,$position_left,$position_right,$boggle);
324 $boggle = 1;
325 foreach $primer (sort keys %{$self->{primers}}) {
326 $sequence = $self->{primers}{$primer}{SEQUENCE};
327 $primer_left = $self->{primers}{$primer}{PRIMER_LEFT};
328 $primer_right = $self->{primers}{$primer}{PRIMER_RIGHT};
329 if (!$primer_left) {
330 $self->{primers}{$primer}{design_failed} = "1";
331 }
332 else {
333 $primer_left =~ m/(.*)\,(.*)/;
334 $position_left = $1+$2-1;
335 $primer_right =~ m/(.*)\,(.*)/;
336 $position_right = $1-$2;
337 $self->{primers}{$primer}{left} = $position_left;
338 $self->{primers}{$primer}{right} = $position_right;
339 $self->{primers}{$primer}{amplified} = substr($sequence,$position_left,$position_right-$position_left);
340 }
341 }
342 }
343
344 =head2 _read_file($self,$filename)
345
346 Title : _read_file($self,$filename)
347 Usage :
348 Function:
349 Returns : A scalar containing the contents of $filename
350 Args : $self and the name of a file to parse.
351 Notes :
352 Developer notes: Honestly, I have no idea what this is for.
353
354
355 =cut
356
357 sub _read_file {
358 # my ($self,$filename) = @_;
359 # set this to keep track of things....
360 # $self->{outfilename} = $filename;
361 # to make this better for bioperl, chad should really be using catfile and things.
362 #
363 # my $fh = new FileHandle;
364 # open($fh,$filename) or die "I can't open the primer report ($filename) : $!\n";
365 # # _parse_report();
366 # # my %Primer3::primers;
367 # my ($output,$line);
368 # while ($line=<$fh>) {
369 # # print("Adding $line\n");
370 # $output .= $line;
371 # } # end while
372 # # print("\$output is $output\n");
373 # return $output;
374 }
375
376
377
378
379
380 =head2 _parse_report()
381
382 Title : _parse_report()
383 Usage : &_parse_report($self,$filename);
384 Function: Parse a primer3 outfile and place everything into an object under
385 {primers} with PRIMER_SEQUENCE_ID being the name of the keys for the
386 {primers} hash.
387 Returns : Nothing.
388 Args : $self and the name of a file to parse.
389 Notes :
390
391 =cut
392
393 sub _parse_report {
394 # old
395 # my ($self,$filename) = @_;
396 my ($self,$outputs) = @_;
397 # print("\$self is $self, \$outputs are $outputs\n");
398 # print("in _parse_report, \$self is $self\n");
399 # set this to keep track of things....
400 my ($sequence_name,$line,$counter,$variable_name,$variable_value);
401 my @output = split/\n/,$outputs;
402 foreach $line (@output) {
403 # print("Reading line $line\n");
404 next if ($line =~ /^\=/);
405 if ($line =~ m/^PRIMER_SEQUENCE_ID/) {
406 $line =~ m/(\S+)=(.*$)/;
407 $variable_name = $1;
408 $sequence_name = $2;
409 $variable_value = $2;
410 }
411 else {
412 $line =~ m/(\S+)=(.*$)/;
413 $variable_name = $1;
414 $variable_value = $2;
415 }
416 # print("$sequence_name\t$variable_name\t$variable_value\n");
417 $self->{primers}{$sequence_name}{$variable_name} = $variable_value;
418 } # end while <>
419 } # end parse_report
420
421 =head2 _construct_empty()
422
423 Title : _construct_empty()
424 Usage : &_construct_empty($self);
425 Function: Construct an empty object that will be used to construct a primer3
426 input "file" so that it can be run.
427 Returns :
428 Args :
429 Notes :
430
431 =cut
432
433 sub _construct_empty {
434 my $self = shift;
435 $self->{inputs} = {};
436 return;
437 }
438
439 =head2 add_target(%stuff)
440
441 Title : add_target(%stuff)
442 Usage : $o_primer->add_target(%stuff);
443 Function: Add an target to the infile constructor.
444 Returns :
445 Args : A hash. Looks something like this:
446 $o_primer2->add_target(
447 -PRIMER_SEQUENCE_ID => "sN11902",
448 -PRIMER_COMMENT => "3831",
449 -SEQUENCE => "some_sequence",
450 -TARGET => "513,26",
451 -PRIMER_PRODUCT_SIZE_RANGE => "100-500",
452 -PRIMER_FILE_FLAG => "0",
453 -PRIMER_LIBERAL_BASE => "1",
454 -PRIMER_NUM_RETURN => "1",
455 -PRIMER_FIRST_BASE_INDEX => "1",
456 -PRIMER_EXPLAIN_FLAG => "1");
457 The add_target() method does not validate the things you put into
458 this parameter hash. Read the docs for Primer3 to see which fields
459 do what and how they should be used.
460 Notes : To design primers, first create a new CSM::Primer3 object with the
461 -construct_infile parameter. Then, add targets using this method
462 (add_target()) with the target hash as above in the Args: section.
463 Be careful. No validation will be done here. All of those parameters
464 will be fed straight into primer3.
465 Once you are done adding targets, invoke the function run_primer3().
466 Then retrieve the results using something like a loop around the array
467 from get_primer_sequence_IDs();
468
469 =cut
470
471
472 sub add_target {
473 my ($self,%args) = @_;
474 my ($currkey,$renamed,$sequence_id,$value);
475 if (!$args{-PRIMER_SEQUENCE_ID}) {
476 print("You cannot add an element to the primer3 infile without specifying the PRIMER_SEQUENCE_ID. Sorry.\n");
477 }
478 else {
479 $sequence_id = $args{-PRIMER_SEQUENCE_ID};
480 foreach $currkey (keys %args) {
481 # print("\$currkey is $currkey\n");
482 next if ($currkey eq "-PRIMER_SEQUENCE_ID");
483 ($renamed = $currkey) =~ s/-//;
484 # print("Adding $renamed to the hash under $sequence_id\n");
485 $value = $args{$currkey};
486 # print("\$value is $value\n");
487 if ($renamed eq "SEQUENCE") { $value =~ s/\n//g; }
488 $self->{infile}{$sequence_id}{$renamed} = $value;
489 }
490 }
491 }
492
493 =head2 get_primer_sequence_IDs()
494
495 Title : get_primer_sequence_IDs()
496 Usage : $o_phred->get_primer_sequence_IDs();
497 Function: Return the primer sequence ID's. These normally correspond to
498 the name of a sequence in a database but can be whatever was used when
499 the primer3 infile was constructed.
500 Returns : An array containing the names of the primer sequence ID's
501 Args : None.
502 Notes : This would be used as the basis for an iterator to loop around each
503 primer that was designed.
504
505 =cut
506
507 sub get_primer_sequence_IDs {
508 my $self = shift;
509 return sort keys %{$self->{primers}};
510 } # end get keys
511
512 =head2 dump_hash()
513
514 Title : dump_hash()
515 Usage : $o_primer->dump_hash();
516 Function: Dump out the CSM::Primer3 object.
517 Returns : Nothing.
518 Args : None.
519 Notes : Used extensively in debugging.
520
521 =cut
522
523 sub dump_hash {
524 my $self = shift;
525 my $dumper = new Dumpvalue;
526 $dumper->dumpValue($self);
527 } # end dump_hash
528
529 =head2 dump_infile_hash()
530
531 Title : dump_infile_hash()
532 Usage : $o_primer->dump_infile_hash();
533 Function: Dump out the contents of the infile hash.
534 Returns : Nothing.
535 Args : None.
536 Notes : Used for debugging the construction of the infile.
537
538 =cut
539
540 sub dump_infile_hash {
541 my $self = shift;
542 my $dumper = new Dumpvalue;
543 $dumper->dumpValue($self->{infile});
544 }
545
546
547
548 1;
549 __END__
550
551 =head2 placeholder
552
553 Title : This is a place holder so chad can cut and paste
554 Usage :
555 Function:
556 Returns :
557 Args :
558 Notes :
559
560 =cut
561
562 =head1 SEE ALSO
563
564 perl(1).
565
566 =cut