0
|
1 #
|
|
2 # Copyright (c) 1997-2001 bioperl, Chad Matsalla. All Rights Reserved.
|
|
3 # This module is free software; you can redistribute it and/or
|
|
4 # modify it under the same terms as Perl itself.
|
|
5 #
|
|
6 # Copyright Chad Matsalla
|
|
7 #
|
|
8 # You may distribute this module under the same terms as perl itself
|
|
9 # POD documentation - main docs before the code
|
|
10
|
|
11 =head1 NAME
|
|
12
|
|
13 Bio::Tools::Primer3 - Create input for and work with the output from the
|
|
14 program primer3
|
|
15
|
|
16 =head1 SYNOPSIS
|
|
17
|
|
18 Chad will put synopses here by the end of the second week of october, 2002.
|
|
19
|
|
20 =head1 DESCRIPTION
|
|
21
|
|
22 Bio::Tools::Primer3 creates the input files needed to design primers using
|
|
23 primer3 and provides mechanisms to access data in the primer3 output files.
|
|
24
|
|
25 =head1 FEEDBACK
|
|
26
|
|
27 =head2 Mailing Lists
|
|
28
|
|
29 User feedback is an integral part of the evolution of this and other
|
|
30 Bioperl modules. Send your comments and suggestions preferably to one
|
|
31 of the Bioperl mailing lists. Your participation is much appreciated.
|
|
32
|
|
33 bioperl-l@bioperl.org - General discussion
|
|
34 http://www.bioperl.org/MailList.html - About the mailing lists
|
|
35
|
|
36 =head2 Reporting Bugs
|
|
37
|
|
38 Report bugs to the Bioperl bug tracking system to help us keep track
|
|
39 the bugs and their resolution. Bug reports can be submitted via email
|
|
40 or the web:
|
|
41
|
|
42 bioperl-bugs@bio.perl.org
|
|
43 http://bugzilla.bioperl.org/
|
|
44
|
|
45
|
|
46 =head1 AUTHOR - Chad Matsalla
|
|
47
|
|
48 bioinformatics1@dieselwurks.com
|
|
49
|
|
50 =head1 APPENDIX
|
|
51
|
|
52 The rest of the documentation details each of the object methods.
|
|
53 Internal methods are usually preceded with a _
|
|
54
|
|
55 =cut
|
|
56
|
|
57 # Let the code begin...
|
|
58
|
|
59 package Bio::Tools::Primer3;
|
|
60
|
|
61 use vars qw(@ISA);
|
|
62 use strict;
|
|
63 use Bio::Seq;
|
|
64 use Bio::SeqFeature::Primer;
|
|
65 use Bio::Seq::PrimedSeq;
|
|
66 use Bio::Seq::SeqFactory;
|
|
67
|
|
68 use Bio::Root::Root;
|
|
69 use Bio::Root::IO;
|
|
70
|
|
71 use Dumpvalue;
|
|
72
|
|
73 @ISA = qw(Bio::Root::Root Bio::Root::IO);
|
|
74
|
|
75 # Chad likes to use this to debug large hashes.
|
|
76 my $dumper = new Dumpvalue;
|
|
77
|
|
78 # this was a bunch of the seqio things, now deprecated. delete it soon.
|
|
79 # sub _initialize {
|
|
80 # my($self,@args) = @_;
|
|
81 # $self->SUPER::_initialize(@args);
|
|
82 # if( ! defined $self->sequence_factory ) {
|
|
83 # $self->sequence_factory(new Bio::Seq::SeqFactory
|
|
84 # (-verbose => $self->verbose(),
|
|
85 # -type => 'Bio::Seq'));
|
|
86 # }
|
|
87 # }
|
|
88
|
|
89
|
|
90 =head2 new()
|
|
91
|
|
92 Title : new()
|
|
93 Usage :
|
|
94 Function:
|
|
95 Returns :
|
|
96 Args :
|
|
97 Notes :
|
|
98
|
|
99 =cut
|
|
100
|
|
101 sub new {
|
|
102 my($class,@args) = @_;
|
|
103 my $self = $class->SUPER::new(@args);
|
|
104 my($filename) = $self->_rearrange([qw(FILE)],@args);
|
|
105 if (!$filename) {
|
|
106 print("Ahh grasshopper, you are planning to create a primer3 infile\n");
|
|
107 return $self;
|
|
108 }
|
|
109 $self->{filename} = $filename;
|
|
110 # check to see that the file exists
|
|
111 # I think that catfile should be used here.
|
|
112 if (!-f $filename) {
|
|
113 print("That file doesn't exist. Bah.\n");
|
|
114 }
|
|
115 $self->_initialize_io( -file => $filename );
|
|
116 return $self;
|
|
117 }
|
|
118
|
|
119
|
|
120
|
|
121
|
|
122 =head2 null
|
|
123
|
|
124 Title :
|
|
125 Usage :
|
|
126 Function:
|
|
127 Returns :
|
|
128 Args :
|
|
129 Notes :
|
|
130
|
|
131 =cut
|
|
132
|
|
133
|
|
134
|
|
135
|
|
136
|
|
137
|
|
138
|
|
139 =head2 next_primer()
|
|
140
|
|
141 Title : next_primer()
|
|
142 Usage : $primer3 = $stream->next_primer()
|
|
143 Function: returns the next primer in the stream
|
|
144 Returns : Bio::Seq::PrimedSeq containing:
|
|
145 - 2 Bio::SeqFeature::Primer representing the primers
|
|
146 - 1 Bio::Seq representing the target sequence
|
|
147 - 1 Bio::Seq representing the amplified region
|
|
148 Args : NONE
|
|
149 Notes :
|
|
150
|
|
151 =cut
|
|
152
|
|
153 sub next_primer {
|
|
154 my $self = shift;
|
|
155 my $fh = $self->_fh();
|
|
156 my ($line,%primer);
|
|
157 # first, read in the next set of primers
|
|
158 while ($line = $self->_readline()) {
|
|
159 chomp ($line);
|
|
160 last if ($line =~ /^=/);
|
|
161 $line =~ m/(^.*)\=(.*$)/;
|
|
162 $primer{$1} = $2;
|
|
163 }
|
|
164 # then, get the primers as SeqFeature::Primer objects
|
|
165
|
|
166 my ($left,$right) = &_create_primer_features(\%primer);
|
|
167 # then, create the sequence to place them on
|
|
168 my $sequence = Bio::Seq->new(-seq => $primer{SEQUENCE},
|
|
169 -id => $primer{PRIMER_SEQUENCE_ID});
|
|
170 # print("Sequence is ".$primer{SEQUENCE}." and id is ".$primer{PRIMER_SEQUENCE_ID}."\n");
|
|
171 my $primedseq = new Bio::Seq::PrimedSeq(
|
|
172 -target_sequence => $sequence,
|
|
173 -left_primer => $left,
|
|
174 -right_primer => $right,
|
|
175 -primer_sequence_id => $primer{PRIMER_SEQUENCE_ID},
|
|
176 -primer_comment => $primer{PRIMER_COMMENT},
|
|
177 -target => $primer{TARGET},
|
|
178 -primer_product_size_range => $primer{PRIMER_PRODUCT_SIZE_RANGE},
|
|
179 -primer_file_flag => $primer{PRIMER_FILE_FLAG},
|
|
180 -primer_liberal_base => $primer{PRIMER_LIBERAL_BASE},
|
|
181 -primer_num_return => $primer{PRIMER_NUM_RETURN},
|
|
182 -primer_first_base_index => $primer{PRIMER_FIRST_BASE_INDEX},
|
|
183 -primer_explain_flag => $primer{PRIMER_EXPLAIN_FLAG},
|
|
184 -primer_pair_compl_any => $primer{PRIMER_PAIR_COMPL_ANY},
|
|
185 -primer_pair_compl_end => $primer{PRIMER_PAIR_COMPL_END},
|
|
186 -primer_product_size => $primer{PRIMER_PRODUCT_SIZE}
|
|
187 );
|
|
188 return $primedseq;
|
|
189 }
|
|
190
|
|
191
|
|
192 =head2 _create_primer_features()
|
|
193
|
|
194 Title : _create_primer_features()
|
|
195 Usage : &_create_primer_features()
|
|
196 Function: This is an internal method used by next_seq() to create the
|
|
197 Bio::SeqFeature::Primer objects necessary to represent the primers
|
|
198 themselves.
|
|
199 Returns : An array of 2 Bio::SeqFeature::Primer objects.
|
|
200 Args : None.
|
|
201 Notes : This is an internal method. Do not call this method.
|
|
202
|
|
203 =cut
|
|
204
|
|
205
|
|
206 sub _create_primer_features {
|
|
207 my $rdat = shift;
|
|
208 my (%left,%right,$updir,$downdir,$var,$trunc);
|
|
209 my @variables = qw(
|
|
210 PRIMER_DIRECTION
|
|
211 PRIMER_DIRECTION_END_STABILITY
|
|
212 PRIMER_DIRECTION_EXPLAIN
|
|
213 PRIMER_DIRECTION_GC_PERCENT
|
|
214 PRIMER_DIRECTION_PENALTY
|
|
215 PRIMER_DIRECTION_SELF_ANY
|
|
216 PRIMER_DIRECTION_SELF_END
|
|
217 PRIMER_DIRECTION_SEQUENCE
|
|
218 PRIMER_DIRECTION_TM
|
|
219 PRIMER_FIRST_BASE_INDEX
|
|
220 );
|
|
221 # create the hash to pass into the creation routine
|
|
222 # I do it this way because the primer3 outfile variables are exactly the same for each of
|
|
223 # left and right. I create two hashes- one for the left and one for the right primer.
|
|
224 foreach $updir (qw(LEFT RIGHT)) {
|
|
225 my %dat;
|
|
226 foreach (@variables) {
|
|
227 ($var = $_) =~ s/DIRECTION/$updir/e;
|
|
228 # should you truncate the name of each variable?
|
|
229 # for example, should the value be: PRIMER_RIGHT_PENALTY or PENALTY?
|
|
230 # i think it should be the second one
|
|
231 if (/^PRIMER_DIRECTION$/) {
|
|
232 $trunc = "PRIMER";
|
|
233 }
|
|
234 elsif (/^PRIMER_FIRST_BASE_INDEX/) {
|
|
235 $trunc = "FIRST_BASE_INDEX";
|
|
236 }
|
|
237 else {
|
|
238 ($trunc = $_) =~ s/PRIMER_DIRECTION_//;
|
|
239 }
|
|
240 $dat{"-$trunc"} = $rdat->{$var};
|
|
241 }
|
|
242 if ($updir eq "LEFT") {
|
|
243 %left = %dat;
|
|
244 $left{-id} = $rdat->{PRIMER_SEQUENCE_ID}."-left";
|
|
245 }
|
|
246 else {
|
|
247 %right = %dat;
|
|
248 $right{-id} = $rdat->{PRIMER_SEQUENCE_ID}."-right";
|
|
249 }
|
|
250 }
|
|
251 my $primer_left = new Bio::SeqFeature::Primer(%left);
|
|
252 my $primer_right = new Bio::SeqFeature::Primer(%right);
|
|
253 return($primer_left,$primer_right);
|
|
254 }
|
|
255
|
|
256
|
|
257
|
|
258
|
|
259
|
|
260
|
|
261
|
|
262
|
|
263
|
|
264 =head2 get_amplified_region()
|
|
265
|
|
266 Title : get_amplified_region()
|
|
267 Usage : $primer->get_amplified_region()
|
|
268 Function: Returns a Bio::Seq object representing the sequence amplified
|
|
269 Returns : (I think) A Bio::Seq object
|
|
270 Args : None.
|
|
271 Notes : This is not implemented at this time.
|
|
272 Note to chad: implement this simple getter.
|
|
273 Developer notes: There obviously isn't a way for a single primer to know about
|
|
274 its amplified region unless it is paired with another primer. At this time
|
|
275 these object will generally be created with another so I will put in this
|
|
276 method. If there is no sequence null is returned.
|
|
277
|
|
278 THIS DOES NOT BELONG HERE. Put this into something else.
|
|
279
|
|
280
|
|
281 =cut
|
|
282
|
|
283 sub get_amplified_region {
|
|
284 my ($self) = @_;
|
|
285 } # end get_amplified_region
|
|
286
|
|
287 =head2 get_amplification_error()
|
|
288
|
|
289 Title : get_amplification_error()
|
|
290 Usage :
|
|
291 Function:
|
|
292 Returns :
|
|
293 Args :
|
|
294 Notes :
|
|
295 Developer Notes:
|
|
296 THIS DOES NOT BELONG HERE. Put this into something else.
|
|
297
|
|
298 =cut
|
|
299
|
|
300 sub get_amplification_error {
|
|
301 my $primer = $_[1];
|
|
302 my $error = $Primer3::primers{$primer}{PRIMER_ERROR};
|
|
303 if ($error) { return $error; }
|
|
304 else { return "Some error that primer3 didn't define.\n"; }
|
|
305 }
|
|
306
|
|
307 =head2 _set_target()
|
|
308
|
|
309 Title : _set_target()
|
|
310 Usage : &_set_target($self);
|
|
311 Function:
|
|
312 Returns :
|
|
313 Args :
|
|
314 Notes :
|
|
315 Developer Notes: Really I have no idea why I put this in here.
|
|
316 It can is referenced by new_deprecated and by run_primer3
|
|
317
|
|
318
|
|
319 =cut
|
|
320
|
|
321 sub _set_target {
|
|
322 my $self = shift;
|
|
323 my ($sequence,$primer,$primer_left,$primer_right,$position_left,$position_right,$boggle);
|
|
324 $boggle = 1;
|
|
325 foreach $primer (sort keys %{$self->{primers}}) {
|
|
326 $sequence = $self->{primers}{$primer}{SEQUENCE};
|
|
327 $primer_left = $self->{primers}{$primer}{PRIMER_LEFT};
|
|
328 $primer_right = $self->{primers}{$primer}{PRIMER_RIGHT};
|
|
329 if (!$primer_left) {
|
|
330 $self->{primers}{$primer}{design_failed} = "1";
|
|
331 }
|
|
332 else {
|
|
333 $primer_left =~ m/(.*)\,(.*)/;
|
|
334 $position_left = $1+$2-1;
|
|
335 $primer_right =~ m/(.*)\,(.*)/;
|
|
336 $position_right = $1-$2;
|
|
337 $self->{primers}{$primer}{left} = $position_left;
|
|
338 $self->{primers}{$primer}{right} = $position_right;
|
|
339 $self->{primers}{$primer}{amplified} = substr($sequence,$position_left,$position_right-$position_left);
|
|
340 }
|
|
341 }
|
|
342 }
|
|
343
|
|
344 =head2 _read_file($self,$filename)
|
|
345
|
|
346 Title : _read_file($self,$filename)
|
|
347 Usage :
|
|
348 Function:
|
|
349 Returns : A scalar containing the contents of $filename
|
|
350 Args : $self and the name of a file to parse.
|
|
351 Notes :
|
|
352 Developer notes: Honestly, I have no idea what this is for.
|
|
353
|
|
354
|
|
355 =cut
|
|
356
|
|
357 sub _read_file {
|
|
358 # my ($self,$filename) = @_;
|
|
359 # set this to keep track of things....
|
|
360 # $self->{outfilename} = $filename;
|
|
361 # to make this better for bioperl, chad should really be using catfile and things.
|
|
362 #
|
|
363 # my $fh = new FileHandle;
|
|
364 # open($fh,$filename) or die "I can't open the primer report ($filename) : $!\n";
|
|
365 # # _parse_report();
|
|
366 # # my %Primer3::primers;
|
|
367 # my ($output,$line);
|
|
368 # while ($line=<$fh>) {
|
|
369 # # print("Adding $line\n");
|
|
370 # $output .= $line;
|
|
371 # } # end while
|
|
372 # # print("\$output is $output\n");
|
|
373 # return $output;
|
|
374 }
|
|
375
|
|
376
|
|
377
|
|
378
|
|
379
|
|
380 =head2 _parse_report()
|
|
381
|
|
382 Title : _parse_report()
|
|
383 Usage : &_parse_report($self,$filename);
|
|
384 Function: Parse a primer3 outfile and place everything into an object under
|
|
385 {primers} with PRIMER_SEQUENCE_ID being the name of the keys for the
|
|
386 {primers} hash.
|
|
387 Returns : Nothing.
|
|
388 Args : $self and the name of a file to parse.
|
|
389 Notes :
|
|
390
|
|
391 =cut
|
|
392
|
|
393 sub _parse_report {
|
|
394 # old
|
|
395 # my ($self,$filename) = @_;
|
|
396 my ($self,$outputs) = @_;
|
|
397 # print("\$self is $self, \$outputs are $outputs\n");
|
|
398 # print("in _parse_report, \$self is $self\n");
|
|
399 # set this to keep track of things....
|
|
400 my ($sequence_name,$line,$counter,$variable_name,$variable_value);
|
|
401 my @output = split/\n/,$outputs;
|
|
402 foreach $line (@output) {
|
|
403 # print("Reading line $line\n");
|
|
404 next if ($line =~ /^\=/);
|
|
405 if ($line =~ m/^PRIMER_SEQUENCE_ID/) {
|
|
406 $line =~ m/(\S+)=(.*$)/;
|
|
407 $variable_name = $1;
|
|
408 $sequence_name = $2;
|
|
409 $variable_value = $2;
|
|
410 }
|
|
411 else {
|
|
412 $line =~ m/(\S+)=(.*$)/;
|
|
413 $variable_name = $1;
|
|
414 $variable_value = $2;
|
|
415 }
|
|
416 # print("$sequence_name\t$variable_name\t$variable_value\n");
|
|
417 $self->{primers}{$sequence_name}{$variable_name} = $variable_value;
|
|
418 } # end while <>
|
|
419 } # end parse_report
|
|
420
|
|
421 =head2 _construct_empty()
|
|
422
|
|
423 Title : _construct_empty()
|
|
424 Usage : &_construct_empty($self);
|
|
425 Function: Construct an empty object that will be used to construct a primer3
|
|
426 input "file" so that it can be run.
|
|
427 Returns :
|
|
428 Args :
|
|
429 Notes :
|
|
430
|
|
431 =cut
|
|
432
|
|
433 sub _construct_empty {
|
|
434 my $self = shift;
|
|
435 $self->{inputs} = {};
|
|
436 return;
|
|
437 }
|
|
438
|
|
439 =head2 add_target(%stuff)
|
|
440
|
|
441 Title : add_target(%stuff)
|
|
442 Usage : $o_primer->add_target(%stuff);
|
|
443 Function: Add an target to the infile constructor.
|
|
444 Returns :
|
|
445 Args : A hash. Looks something like this:
|
|
446 $o_primer2->add_target(
|
|
447 -PRIMER_SEQUENCE_ID => "sN11902",
|
|
448 -PRIMER_COMMENT => "3831",
|
|
449 -SEQUENCE => "some_sequence",
|
|
450 -TARGET => "513,26",
|
|
451 -PRIMER_PRODUCT_SIZE_RANGE => "100-500",
|
|
452 -PRIMER_FILE_FLAG => "0",
|
|
453 -PRIMER_LIBERAL_BASE => "1",
|
|
454 -PRIMER_NUM_RETURN => "1",
|
|
455 -PRIMER_FIRST_BASE_INDEX => "1",
|
|
456 -PRIMER_EXPLAIN_FLAG => "1");
|
|
457 The add_target() method does not validate the things you put into
|
|
458 this parameter hash. Read the docs for Primer3 to see which fields
|
|
459 do what and how they should be used.
|
|
460 Notes : To design primers, first create a new CSM::Primer3 object with the
|
|
461 -construct_infile parameter. Then, add targets using this method
|
|
462 (add_target()) with the target hash as above in the Args: section.
|
|
463 Be careful. No validation will be done here. All of those parameters
|
|
464 will be fed straight into primer3.
|
|
465 Once you are done adding targets, invoke the function run_primer3().
|
|
466 Then retrieve the results using something like a loop around the array
|
|
467 from get_primer_sequence_IDs();
|
|
468
|
|
469 =cut
|
|
470
|
|
471
|
|
472 sub add_target {
|
|
473 my ($self,%args) = @_;
|
|
474 my ($currkey,$renamed,$sequence_id,$value);
|
|
475 if (!$args{-PRIMER_SEQUENCE_ID}) {
|
|
476 print("You cannot add an element to the primer3 infile without specifying the PRIMER_SEQUENCE_ID. Sorry.\n");
|
|
477 }
|
|
478 else {
|
|
479 $sequence_id = $args{-PRIMER_SEQUENCE_ID};
|
|
480 foreach $currkey (keys %args) {
|
|
481 # print("\$currkey is $currkey\n");
|
|
482 next if ($currkey eq "-PRIMER_SEQUENCE_ID");
|
|
483 ($renamed = $currkey) =~ s/-//;
|
|
484 # print("Adding $renamed to the hash under $sequence_id\n");
|
|
485 $value = $args{$currkey};
|
|
486 # print("\$value is $value\n");
|
|
487 if ($renamed eq "SEQUENCE") { $value =~ s/\n//g; }
|
|
488 $self->{infile}{$sequence_id}{$renamed} = $value;
|
|
489 }
|
|
490 }
|
|
491 }
|
|
492
|
|
493 =head2 get_primer_sequence_IDs()
|
|
494
|
|
495 Title : get_primer_sequence_IDs()
|
|
496 Usage : $o_phred->get_primer_sequence_IDs();
|
|
497 Function: Return the primer sequence ID's. These normally correspond to
|
|
498 the name of a sequence in a database but can be whatever was used when
|
|
499 the primer3 infile was constructed.
|
|
500 Returns : An array containing the names of the primer sequence ID's
|
|
501 Args : None.
|
|
502 Notes : This would be used as the basis for an iterator to loop around each
|
|
503 primer that was designed.
|
|
504
|
|
505 =cut
|
|
506
|
|
507 sub get_primer_sequence_IDs {
|
|
508 my $self = shift;
|
|
509 return sort keys %{$self->{primers}};
|
|
510 } # end get keys
|
|
511
|
|
512 =head2 dump_hash()
|
|
513
|
|
514 Title : dump_hash()
|
|
515 Usage : $o_primer->dump_hash();
|
|
516 Function: Dump out the CSM::Primer3 object.
|
|
517 Returns : Nothing.
|
|
518 Args : None.
|
|
519 Notes : Used extensively in debugging.
|
|
520
|
|
521 =cut
|
|
522
|
|
523 sub dump_hash {
|
|
524 my $self = shift;
|
|
525 my $dumper = new Dumpvalue;
|
|
526 $dumper->dumpValue($self);
|
|
527 } # end dump_hash
|
|
528
|
|
529 =head2 dump_infile_hash()
|
|
530
|
|
531 Title : dump_infile_hash()
|
|
532 Usage : $o_primer->dump_infile_hash();
|
|
533 Function: Dump out the contents of the infile hash.
|
|
534 Returns : Nothing.
|
|
535 Args : None.
|
|
536 Notes : Used for debugging the construction of the infile.
|
|
537
|
|
538 =cut
|
|
539
|
|
540 sub dump_infile_hash {
|
|
541 my $self = shift;
|
|
542 my $dumper = new Dumpvalue;
|
|
543 $dumper->dumpValue($self->{infile});
|
|
544 }
|
|
545
|
|
546
|
|
547
|
|
548 1;
|
|
549 __END__
|
|
550
|
|
551 =head2 placeholder
|
|
552
|
|
553 Title : This is a place holder so chad can cut and paste
|
|
554 Usage :
|
|
555 Function:
|
|
556 Returns :
|
|
557 Args :
|
|
558 Notes :
|
|
559
|
|
560 =cut
|
|
561
|
|
562 =head1 SEE ALSO
|
|
563
|
|
564 perl(1).
|
|
565
|
|
566 =cut
|