Mercurial > repos > mahtabm > ensembl
comparison variant_effect_predictor/Bio/Tools/Primer3.pm @ 0:1f6dce3d34e0
Uploaded
author | mahtabm |
---|---|
date | Thu, 11 Apr 2013 02:01:53 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:1f6dce3d34e0 |
---|---|
1 # | |
2 # Copyright (c) 1997-2001 bioperl, Chad Matsalla. All Rights Reserved. | |
3 # This module is free software; you can redistribute it and/or | |
4 # modify it under the same terms as Perl itself. | |
5 # | |
6 # Copyright Chad Matsalla | |
7 # | |
8 # You may distribute this module under the same terms as perl itself | |
9 # POD documentation - main docs before the code | |
10 | |
11 =head1 NAME | |
12 | |
13 Bio::Tools::Primer3 - Create input for and work with the output from the | |
14 program primer3 | |
15 | |
16 =head1 SYNOPSIS | |
17 | |
18 Chad will put synopses here by the end of the second week of october, 2002. | |
19 | |
20 =head1 DESCRIPTION | |
21 | |
22 Bio::Tools::Primer3 creates the input files needed to design primers using | |
23 primer3 and provides mechanisms to access data in the primer3 output files. | |
24 | |
25 =head1 FEEDBACK | |
26 | |
27 =head2 Mailing Lists | |
28 | |
29 User feedback is an integral part of the evolution of this and other | |
30 Bioperl modules. Send your comments and suggestions preferably to one | |
31 of the Bioperl mailing lists. Your participation is much appreciated. | |
32 | |
33 bioperl-l@bioperl.org - General discussion | |
34 http://www.bioperl.org/MailList.html - About the mailing lists | |
35 | |
36 =head2 Reporting Bugs | |
37 | |
38 Report bugs to the Bioperl bug tracking system to help us keep track | |
39 the bugs and their resolution. Bug reports can be submitted via email | |
40 or the web: | |
41 | |
42 bioperl-bugs@bio.perl.org | |
43 http://bugzilla.bioperl.org/ | |
44 | |
45 | |
46 =head1 AUTHOR - Chad Matsalla | |
47 | |
48 bioinformatics1@dieselwurks.com | |
49 | |
50 =head1 APPENDIX | |
51 | |
52 The rest of the documentation details each of the object methods. | |
53 Internal methods are usually preceded with a _ | |
54 | |
55 =cut | |
56 | |
57 # Let the code begin... | |
58 | |
59 package Bio::Tools::Primer3; | |
60 | |
61 use vars qw(@ISA); | |
62 use strict; | |
63 use Bio::Seq; | |
64 use Bio::SeqFeature::Primer; | |
65 use Bio::Seq::PrimedSeq; | |
66 use Bio::Seq::SeqFactory; | |
67 | |
68 use Bio::Root::Root; | |
69 use Bio::Root::IO; | |
70 | |
71 use Dumpvalue; | |
72 | |
73 @ISA = qw(Bio::Root::Root Bio::Root::IO); | |
74 | |
75 # Chad likes to use this to debug large hashes. | |
76 my $dumper = new Dumpvalue; | |
77 | |
78 # this was a bunch of the seqio things, now deprecated. delete it soon. | |
79 # sub _initialize { | |
80 # my($self,@args) = @_; | |
81 # $self->SUPER::_initialize(@args); | |
82 # if( ! defined $self->sequence_factory ) { | |
83 # $self->sequence_factory(new Bio::Seq::SeqFactory | |
84 # (-verbose => $self->verbose(), | |
85 # -type => 'Bio::Seq')); | |
86 # } | |
87 # } | |
88 | |
89 | |
90 =head2 new() | |
91 | |
92 Title : new() | |
93 Usage : | |
94 Function: | |
95 Returns : | |
96 Args : | |
97 Notes : | |
98 | |
99 =cut | |
100 | |
101 sub new { | |
102 my($class,@args) = @_; | |
103 my $self = $class->SUPER::new(@args); | |
104 my($filename) = $self->_rearrange([qw(FILE)],@args); | |
105 if (!$filename) { | |
106 print("Ahh grasshopper, you are planning to create a primer3 infile\n"); | |
107 return $self; | |
108 } | |
109 $self->{filename} = $filename; | |
110 # check to see that the file exists | |
111 # I think that catfile should be used here. | |
112 if (!-f $filename) { | |
113 print("That file doesn't exist. Bah.\n"); | |
114 } | |
115 $self->_initialize_io( -file => $filename ); | |
116 return $self; | |
117 } | |
118 | |
119 | |
120 | |
121 | |
122 =head2 null | |
123 | |
124 Title : | |
125 Usage : | |
126 Function: | |
127 Returns : | |
128 Args : | |
129 Notes : | |
130 | |
131 =cut | |
132 | |
133 | |
134 | |
135 | |
136 | |
137 | |
138 | |
139 =head2 next_primer() | |
140 | |
141 Title : next_primer() | |
142 Usage : $primer3 = $stream->next_primer() | |
143 Function: returns the next primer in the stream | |
144 Returns : Bio::Seq::PrimedSeq containing: | |
145 - 2 Bio::SeqFeature::Primer representing the primers | |
146 - 1 Bio::Seq representing the target sequence | |
147 - 1 Bio::Seq representing the amplified region | |
148 Args : NONE | |
149 Notes : | |
150 | |
151 =cut | |
152 | |
153 sub next_primer { | |
154 my $self = shift; | |
155 my $fh = $self->_fh(); | |
156 my ($line,%primer); | |
157 # first, read in the next set of primers | |
158 while ($line = $self->_readline()) { | |
159 chomp ($line); | |
160 last if ($line =~ /^=/); | |
161 $line =~ m/(^.*)\=(.*$)/; | |
162 $primer{$1} = $2; | |
163 } | |
164 # then, get the primers as SeqFeature::Primer objects | |
165 | |
166 my ($left,$right) = &_create_primer_features(\%primer); | |
167 # then, create the sequence to place them on | |
168 my $sequence = Bio::Seq->new(-seq => $primer{SEQUENCE}, | |
169 -id => $primer{PRIMER_SEQUENCE_ID}); | |
170 # print("Sequence is ".$primer{SEQUENCE}." and id is ".$primer{PRIMER_SEQUENCE_ID}."\n"); | |
171 my $primedseq = new Bio::Seq::PrimedSeq( | |
172 -target_sequence => $sequence, | |
173 -left_primer => $left, | |
174 -right_primer => $right, | |
175 -primer_sequence_id => $primer{PRIMER_SEQUENCE_ID}, | |
176 -primer_comment => $primer{PRIMER_COMMENT}, | |
177 -target => $primer{TARGET}, | |
178 -primer_product_size_range => $primer{PRIMER_PRODUCT_SIZE_RANGE}, | |
179 -primer_file_flag => $primer{PRIMER_FILE_FLAG}, | |
180 -primer_liberal_base => $primer{PRIMER_LIBERAL_BASE}, | |
181 -primer_num_return => $primer{PRIMER_NUM_RETURN}, | |
182 -primer_first_base_index => $primer{PRIMER_FIRST_BASE_INDEX}, | |
183 -primer_explain_flag => $primer{PRIMER_EXPLAIN_FLAG}, | |
184 -primer_pair_compl_any => $primer{PRIMER_PAIR_COMPL_ANY}, | |
185 -primer_pair_compl_end => $primer{PRIMER_PAIR_COMPL_END}, | |
186 -primer_product_size => $primer{PRIMER_PRODUCT_SIZE} | |
187 ); | |
188 return $primedseq; | |
189 } | |
190 | |
191 | |
192 =head2 _create_primer_features() | |
193 | |
194 Title : _create_primer_features() | |
195 Usage : &_create_primer_features() | |
196 Function: This is an internal method used by next_seq() to create the | |
197 Bio::SeqFeature::Primer objects necessary to represent the primers | |
198 themselves. | |
199 Returns : An array of 2 Bio::SeqFeature::Primer objects. | |
200 Args : None. | |
201 Notes : This is an internal method. Do not call this method. | |
202 | |
203 =cut | |
204 | |
205 | |
206 sub _create_primer_features { | |
207 my $rdat = shift; | |
208 my (%left,%right,$updir,$downdir,$var,$trunc); | |
209 my @variables = qw( | |
210 PRIMER_DIRECTION | |
211 PRIMER_DIRECTION_END_STABILITY | |
212 PRIMER_DIRECTION_EXPLAIN | |
213 PRIMER_DIRECTION_GC_PERCENT | |
214 PRIMER_DIRECTION_PENALTY | |
215 PRIMER_DIRECTION_SELF_ANY | |
216 PRIMER_DIRECTION_SELF_END | |
217 PRIMER_DIRECTION_SEQUENCE | |
218 PRIMER_DIRECTION_TM | |
219 PRIMER_FIRST_BASE_INDEX | |
220 ); | |
221 # create the hash to pass into the creation routine | |
222 # I do it this way because the primer3 outfile variables are exactly the same for each of | |
223 # left and right. I create two hashes- one for the left and one for the right primer. | |
224 foreach $updir (qw(LEFT RIGHT)) { | |
225 my %dat; | |
226 foreach (@variables) { | |
227 ($var = $_) =~ s/DIRECTION/$updir/e; | |
228 # should you truncate the name of each variable? | |
229 # for example, should the value be: PRIMER_RIGHT_PENALTY or PENALTY? | |
230 # i think it should be the second one | |
231 if (/^PRIMER_DIRECTION$/) { | |
232 $trunc = "PRIMER"; | |
233 } | |
234 elsif (/^PRIMER_FIRST_BASE_INDEX/) { | |
235 $trunc = "FIRST_BASE_INDEX"; | |
236 } | |
237 else { | |
238 ($trunc = $_) =~ s/PRIMER_DIRECTION_//; | |
239 } | |
240 $dat{"-$trunc"} = $rdat->{$var}; | |
241 } | |
242 if ($updir eq "LEFT") { | |
243 %left = %dat; | |
244 $left{-id} = $rdat->{PRIMER_SEQUENCE_ID}."-left"; | |
245 } | |
246 else { | |
247 %right = %dat; | |
248 $right{-id} = $rdat->{PRIMER_SEQUENCE_ID}."-right"; | |
249 } | |
250 } | |
251 my $primer_left = new Bio::SeqFeature::Primer(%left); | |
252 my $primer_right = new Bio::SeqFeature::Primer(%right); | |
253 return($primer_left,$primer_right); | |
254 } | |
255 | |
256 | |
257 | |
258 | |
259 | |
260 | |
261 | |
262 | |
263 | |
264 =head2 get_amplified_region() | |
265 | |
266 Title : get_amplified_region() | |
267 Usage : $primer->get_amplified_region() | |
268 Function: Returns a Bio::Seq object representing the sequence amplified | |
269 Returns : (I think) A Bio::Seq object | |
270 Args : None. | |
271 Notes : This is not implemented at this time. | |
272 Note to chad: implement this simple getter. | |
273 Developer notes: There obviously isn't a way for a single primer to know about | |
274 its amplified region unless it is paired with another primer. At this time | |
275 these object will generally be created with another so I will put in this | |
276 method. If there is no sequence null is returned. | |
277 | |
278 THIS DOES NOT BELONG HERE. Put this into something else. | |
279 | |
280 | |
281 =cut | |
282 | |
283 sub get_amplified_region { | |
284 my ($self) = @_; | |
285 } # end get_amplified_region | |
286 | |
287 =head2 get_amplification_error() | |
288 | |
289 Title : get_amplification_error() | |
290 Usage : | |
291 Function: | |
292 Returns : | |
293 Args : | |
294 Notes : | |
295 Developer Notes: | |
296 THIS DOES NOT BELONG HERE. Put this into something else. | |
297 | |
298 =cut | |
299 | |
300 sub get_amplification_error { | |
301 my $primer = $_[1]; | |
302 my $error = $Primer3::primers{$primer}{PRIMER_ERROR}; | |
303 if ($error) { return $error; } | |
304 else { return "Some error that primer3 didn't define.\n"; } | |
305 } | |
306 | |
307 =head2 _set_target() | |
308 | |
309 Title : _set_target() | |
310 Usage : &_set_target($self); | |
311 Function: | |
312 Returns : | |
313 Args : | |
314 Notes : | |
315 Developer Notes: Really I have no idea why I put this in here. | |
316 It can is referenced by new_deprecated and by run_primer3 | |
317 | |
318 | |
319 =cut | |
320 | |
321 sub _set_target { | |
322 my $self = shift; | |
323 my ($sequence,$primer,$primer_left,$primer_right,$position_left,$position_right,$boggle); | |
324 $boggle = 1; | |
325 foreach $primer (sort keys %{$self->{primers}}) { | |
326 $sequence = $self->{primers}{$primer}{SEQUENCE}; | |
327 $primer_left = $self->{primers}{$primer}{PRIMER_LEFT}; | |
328 $primer_right = $self->{primers}{$primer}{PRIMER_RIGHT}; | |
329 if (!$primer_left) { | |
330 $self->{primers}{$primer}{design_failed} = "1"; | |
331 } | |
332 else { | |
333 $primer_left =~ m/(.*)\,(.*)/; | |
334 $position_left = $1+$2-1; | |
335 $primer_right =~ m/(.*)\,(.*)/; | |
336 $position_right = $1-$2; | |
337 $self->{primers}{$primer}{left} = $position_left; | |
338 $self->{primers}{$primer}{right} = $position_right; | |
339 $self->{primers}{$primer}{amplified} = substr($sequence,$position_left,$position_right-$position_left); | |
340 } | |
341 } | |
342 } | |
343 | |
344 =head2 _read_file($self,$filename) | |
345 | |
346 Title : _read_file($self,$filename) | |
347 Usage : | |
348 Function: | |
349 Returns : A scalar containing the contents of $filename | |
350 Args : $self and the name of a file to parse. | |
351 Notes : | |
352 Developer notes: Honestly, I have no idea what this is for. | |
353 | |
354 | |
355 =cut | |
356 | |
357 sub _read_file { | |
358 # my ($self,$filename) = @_; | |
359 # set this to keep track of things.... | |
360 # $self->{outfilename} = $filename; | |
361 # to make this better for bioperl, chad should really be using catfile and things. | |
362 # | |
363 # my $fh = new FileHandle; | |
364 # open($fh,$filename) or die "I can't open the primer report ($filename) : $!\n"; | |
365 # # _parse_report(); | |
366 # # my %Primer3::primers; | |
367 # my ($output,$line); | |
368 # while ($line=<$fh>) { | |
369 # # print("Adding $line\n"); | |
370 # $output .= $line; | |
371 # } # end while | |
372 # # print("\$output is $output\n"); | |
373 # return $output; | |
374 } | |
375 | |
376 | |
377 | |
378 | |
379 | |
380 =head2 _parse_report() | |
381 | |
382 Title : _parse_report() | |
383 Usage : &_parse_report($self,$filename); | |
384 Function: Parse a primer3 outfile and place everything into an object under | |
385 {primers} with PRIMER_SEQUENCE_ID being the name of the keys for the | |
386 {primers} hash. | |
387 Returns : Nothing. | |
388 Args : $self and the name of a file to parse. | |
389 Notes : | |
390 | |
391 =cut | |
392 | |
393 sub _parse_report { | |
394 # old | |
395 # my ($self,$filename) = @_; | |
396 my ($self,$outputs) = @_; | |
397 # print("\$self is $self, \$outputs are $outputs\n"); | |
398 # print("in _parse_report, \$self is $self\n"); | |
399 # set this to keep track of things.... | |
400 my ($sequence_name,$line,$counter,$variable_name,$variable_value); | |
401 my @output = split/\n/,$outputs; | |
402 foreach $line (@output) { | |
403 # print("Reading line $line\n"); | |
404 next if ($line =~ /^\=/); | |
405 if ($line =~ m/^PRIMER_SEQUENCE_ID/) { | |
406 $line =~ m/(\S+)=(.*$)/; | |
407 $variable_name = $1; | |
408 $sequence_name = $2; | |
409 $variable_value = $2; | |
410 } | |
411 else { | |
412 $line =~ m/(\S+)=(.*$)/; | |
413 $variable_name = $1; | |
414 $variable_value = $2; | |
415 } | |
416 # print("$sequence_name\t$variable_name\t$variable_value\n"); | |
417 $self->{primers}{$sequence_name}{$variable_name} = $variable_value; | |
418 } # end while <> | |
419 } # end parse_report | |
420 | |
421 =head2 _construct_empty() | |
422 | |
423 Title : _construct_empty() | |
424 Usage : &_construct_empty($self); | |
425 Function: Construct an empty object that will be used to construct a primer3 | |
426 input "file" so that it can be run. | |
427 Returns : | |
428 Args : | |
429 Notes : | |
430 | |
431 =cut | |
432 | |
433 sub _construct_empty { | |
434 my $self = shift; | |
435 $self->{inputs} = {}; | |
436 return; | |
437 } | |
438 | |
439 =head2 add_target(%stuff) | |
440 | |
441 Title : add_target(%stuff) | |
442 Usage : $o_primer->add_target(%stuff); | |
443 Function: Add an target to the infile constructor. | |
444 Returns : | |
445 Args : A hash. Looks something like this: | |
446 $o_primer2->add_target( | |
447 -PRIMER_SEQUENCE_ID => "sN11902", | |
448 -PRIMER_COMMENT => "3831", | |
449 -SEQUENCE => "some_sequence", | |
450 -TARGET => "513,26", | |
451 -PRIMER_PRODUCT_SIZE_RANGE => "100-500", | |
452 -PRIMER_FILE_FLAG => "0", | |
453 -PRIMER_LIBERAL_BASE => "1", | |
454 -PRIMER_NUM_RETURN => "1", | |
455 -PRIMER_FIRST_BASE_INDEX => "1", | |
456 -PRIMER_EXPLAIN_FLAG => "1"); | |
457 The add_target() method does not validate the things you put into | |
458 this parameter hash. Read the docs for Primer3 to see which fields | |
459 do what and how they should be used. | |
460 Notes : To design primers, first create a new CSM::Primer3 object with the | |
461 -construct_infile parameter. Then, add targets using this method | |
462 (add_target()) with the target hash as above in the Args: section. | |
463 Be careful. No validation will be done here. All of those parameters | |
464 will be fed straight into primer3. | |
465 Once you are done adding targets, invoke the function run_primer3(). | |
466 Then retrieve the results using something like a loop around the array | |
467 from get_primer_sequence_IDs(); | |
468 | |
469 =cut | |
470 | |
471 | |
472 sub add_target { | |
473 my ($self,%args) = @_; | |
474 my ($currkey,$renamed,$sequence_id,$value); | |
475 if (!$args{-PRIMER_SEQUENCE_ID}) { | |
476 print("You cannot add an element to the primer3 infile without specifying the PRIMER_SEQUENCE_ID. Sorry.\n"); | |
477 } | |
478 else { | |
479 $sequence_id = $args{-PRIMER_SEQUENCE_ID}; | |
480 foreach $currkey (keys %args) { | |
481 # print("\$currkey is $currkey\n"); | |
482 next if ($currkey eq "-PRIMER_SEQUENCE_ID"); | |
483 ($renamed = $currkey) =~ s/-//; | |
484 # print("Adding $renamed to the hash under $sequence_id\n"); | |
485 $value = $args{$currkey}; | |
486 # print("\$value is $value\n"); | |
487 if ($renamed eq "SEQUENCE") { $value =~ s/\n//g; } | |
488 $self->{infile}{$sequence_id}{$renamed} = $value; | |
489 } | |
490 } | |
491 } | |
492 | |
493 =head2 get_primer_sequence_IDs() | |
494 | |
495 Title : get_primer_sequence_IDs() | |
496 Usage : $o_phred->get_primer_sequence_IDs(); | |
497 Function: Return the primer sequence ID's. These normally correspond to | |
498 the name of a sequence in a database but can be whatever was used when | |
499 the primer3 infile was constructed. | |
500 Returns : An array containing the names of the primer sequence ID's | |
501 Args : None. | |
502 Notes : This would be used as the basis for an iterator to loop around each | |
503 primer that was designed. | |
504 | |
505 =cut | |
506 | |
507 sub get_primer_sequence_IDs { | |
508 my $self = shift; | |
509 return sort keys %{$self->{primers}}; | |
510 } # end get keys | |
511 | |
512 =head2 dump_hash() | |
513 | |
514 Title : dump_hash() | |
515 Usage : $o_primer->dump_hash(); | |
516 Function: Dump out the CSM::Primer3 object. | |
517 Returns : Nothing. | |
518 Args : None. | |
519 Notes : Used extensively in debugging. | |
520 | |
521 =cut | |
522 | |
523 sub dump_hash { | |
524 my $self = shift; | |
525 my $dumper = new Dumpvalue; | |
526 $dumper->dumpValue($self); | |
527 } # end dump_hash | |
528 | |
529 =head2 dump_infile_hash() | |
530 | |
531 Title : dump_infile_hash() | |
532 Usage : $o_primer->dump_infile_hash(); | |
533 Function: Dump out the contents of the infile hash. | |
534 Returns : Nothing. | |
535 Args : None. | |
536 Notes : Used for debugging the construction of the infile. | |
537 | |
538 =cut | |
539 | |
540 sub dump_infile_hash { | |
541 my $self = shift; | |
542 my $dumper = new Dumpvalue; | |
543 $dumper->dumpValue($self->{infile}); | |
544 } | |
545 | |
546 | |
547 | |
548 1; | |
549 __END__ | |
550 | |
551 =head2 placeholder | |
552 | |
553 Title : This is a place holder so chad can cut and paste | |
554 Usage : | |
555 Function: | |
556 Returns : | |
557 Args : | |
558 Notes : | |
559 | |
560 =cut | |
561 | |
562 =head1 SEE ALSO | |
563 | |
564 perl(1). | |
565 | |
566 =cut |