Mercurial > repos > mahtabm > ensembl
comparison variant_effect_predictor/Bio/Tools/Primer3.pm @ 0:1f6dce3d34e0
Uploaded
| author | mahtabm | 
|---|---|
| date | Thu, 11 Apr 2013 02:01:53 -0400 | 
| parents | |
| children | 
   comparison
  equal
  deleted
  inserted
  replaced
| -1:000000000000 | 0:1f6dce3d34e0 | 
|---|---|
| 1 # | |
| 2 # Copyright (c) 1997-2001 bioperl, Chad Matsalla. All Rights Reserved. | |
| 3 # This module is free software; you can redistribute it and/or | |
| 4 # modify it under the same terms as Perl itself. | |
| 5 # | |
| 6 # Copyright Chad Matsalla | |
| 7 # | |
| 8 # You may distribute this module under the same terms as perl itself | |
| 9 # POD documentation - main docs before the code | |
| 10 | |
| 11 =head1 NAME | |
| 12 | |
| 13 Bio::Tools::Primer3 - Create input for and work with the output from the | |
| 14 program primer3 | |
| 15 | |
| 16 =head1 SYNOPSIS | |
| 17 | |
| 18 Chad will put synopses here by the end of the second week of october, 2002. | |
| 19 | |
| 20 =head1 DESCRIPTION | |
| 21 | |
| 22 Bio::Tools::Primer3 creates the input files needed to design primers using | |
| 23 primer3 and provides mechanisms to access data in the primer3 output files. | |
| 24 | |
| 25 =head1 FEEDBACK | |
| 26 | |
| 27 =head2 Mailing Lists | |
| 28 | |
| 29 User feedback is an integral part of the evolution of this and other | |
| 30 Bioperl modules. Send your comments and suggestions preferably to one | |
| 31 of the Bioperl mailing lists. Your participation is much appreciated. | |
| 32 | |
| 33 bioperl-l@bioperl.org - General discussion | |
| 34 http://www.bioperl.org/MailList.html - About the mailing lists | |
| 35 | |
| 36 =head2 Reporting Bugs | |
| 37 | |
| 38 Report bugs to the Bioperl bug tracking system to help us keep track | |
| 39 the bugs and their resolution. Bug reports can be submitted via email | |
| 40 or the web: | |
| 41 | |
| 42 bioperl-bugs@bio.perl.org | |
| 43 http://bugzilla.bioperl.org/ | |
| 44 | |
| 45 | |
| 46 =head1 AUTHOR - Chad Matsalla | |
| 47 | |
| 48 bioinformatics1@dieselwurks.com | |
| 49 | |
| 50 =head1 APPENDIX | |
| 51 | |
| 52 The rest of the documentation details each of the object methods. | |
| 53 Internal methods are usually preceded with a _ | |
| 54 | |
| 55 =cut | |
| 56 | |
| 57 # Let the code begin... | |
| 58 | |
| 59 package Bio::Tools::Primer3; | |
| 60 | |
| 61 use vars qw(@ISA); | |
| 62 use strict; | |
| 63 use Bio::Seq; | |
| 64 use Bio::SeqFeature::Primer; | |
| 65 use Bio::Seq::PrimedSeq; | |
| 66 use Bio::Seq::SeqFactory; | |
| 67 | |
| 68 use Bio::Root::Root; | |
| 69 use Bio::Root::IO; | |
| 70 | |
| 71 use Dumpvalue; | |
| 72 | |
| 73 @ISA = qw(Bio::Root::Root Bio::Root::IO); | |
| 74 | |
| 75 # Chad likes to use this to debug large hashes. | |
| 76 my $dumper = new Dumpvalue; | |
| 77 | |
| 78 # this was a bunch of the seqio things, now deprecated. delete it soon. | |
| 79 # sub _initialize { | |
| 80 # my($self,@args) = @_; | |
| 81 # $self->SUPER::_initialize(@args); | |
| 82 # if( ! defined $self->sequence_factory ) { | |
| 83 # $self->sequence_factory(new Bio::Seq::SeqFactory | |
| 84 # (-verbose => $self->verbose(), | |
| 85 # -type => 'Bio::Seq')); | |
| 86 # } | |
| 87 # } | |
| 88 | |
| 89 | |
| 90 =head2 new() | |
| 91 | |
| 92 Title : new() | |
| 93 Usage : | |
| 94 Function: | |
| 95 Returns : | |
| 96 Args : | |
| 97 Notes : | |
| 98 | |
| 99 =cut | |
| 100 | |
| 101 sub new { | |
| 102 my($class,@args) = @_; | |
| 103 my $self = $class->SUPER::new(@args); | |
| 104 my($filename) = $self->_rearrange([qw(FILE)],@args); | |
| 105 if (!$filename) { | |
| 106 print("Ahh grasshopper, you are planning to create a primer3 infile\n"); | |
| 107 return $self; | |
| 108 } | |
| 109 $self->{filename} = $filename; | |
| 110 # check to see that the file exists | |
| 111 # I think that catfile should be used here. | |
| 112 if (!-f $filename) { | |
| 113 print("That file doesn't exist. Bah.\n"); | |
| 114 } | |
| 115 $self->_initialize_io( -file => $filename ); | |
| 116 return $self; | |
| 117 } | |
| 118 | |
| 119 | |
| 120 | |
| 121 | |
| 122 =head2 null | |
| 123 | |
| 124 Title : | |
| 125 Usage : | |
| 126 Function: | |
| 127 Returns : | |
| 128 Args : | |
| 129 Notes : | |
| 130 | |
| 131 =cut | |
| 132 | |
| 133 | |
| 134 | |
| 135 | |
| 136 | |
| 137 | |
| 138 | |
| 139 =head2 next_primer() | |
| 140 | |
| 141 Title : next_primer() | |
| 142 Usage : $primer3 = $stream->next_primer() | |
| 143 Function: returns the next primer in the stream | |
| 144 Returns : Bio::Seq::PrimedSeq containing: | |
| 145 - 2 Bio::SeqFeature::Primer representing the primers | |
| 146 - 1 Bio::Seq representing the target sequence | |
| 147 - 1 Bio::Seq representing the amplified region | |
| 148 Args : NONE | |
| 149 Notes : | |
| 150 | |
| 151 =cut | |
| 152 | |
| 153 sub next_primer { | |
| 154 my $self = shift; | |
| 155 my $fh = $self->_fh(); | |
| 156 my ($line,%primer); | |
| 157 # first, read in the next set of primers | |
| 158 while ($line = $self->_readline()) { | |
| 159 chomp ($line); | |
| 160 last if ($line =~ /^=/); | |
| 161 $line =~ m/(^.*)\=(.*$)/; | |
| 162 $primer{$1} = $2; | |
| 163 } | |
| 164 # then, get the primers as SeqFeature::Primer objects | |
| 165 | |
| 166 my ($left,$right) = &_create_primer_features(\%primer); | |
| 167 # then, create the sequence to place them on | |
| 168 my $sequence = Bio::Seq->new(-seq => $primer{SEQUENCE}, | |
| 169 -id => $primer{PRIMER_SEQUENCE_ID}); | |
| 170 # print("Sequence is ".$primer{SEQUENCE}." and id is ".$primer{PRIMER_SEQUENCE_ID}."\n"); | |
| 171 my $primedseq = new Bio::Seq::PrimedSeq( | |
| 172 -target_sequence => $sequence, | |
| 173 -left_primer => $left, | |
| 174 -right_primer => $right, | |
| 175 -primer_sequence_id => $primer{PRIMER_SEQUENCE_ID}, | |
| 176 -primer_comment => $primer{PRIMER_COMMENT}, | |
| 177 -target => $primer{TARGET}, | |
| 178 -primer_product_size_range => $primer{PRIMER_PRODUCT_SIZE_RANGE}, | |
| 179 -primer_file_flag => $primer{PRIMER_FILE_FLAG}, | |
| 180 -primer_liberal_base => $primer{PRIMER_LIBERAL_BASE}, | |
| 181 -primer_num_return => $primer{PRIMER_NUM_RETURN}, | |
| 182 -primer_first_base_index => $primer{PRIMER_FIRST_BASE_INDEX}, | |
| 183 -primer_explain_flag => $primer{PRIMER_EXPLAIN_FLAG}, | |
| 184 -primer_pair_compl_any => $primer{PRIMER_PAIR_COMPL_ANY}, | |
| 185 -primer_pair_compl_end => $primer{PRIMER_PAIR_COMPL_END}, | |
| 186 -primer_product_size => $primer{PRIMER_PRODUCT_SIZE} | |
| 187 ); | |
| 188 return $primedseq; | |
| 189 } | |
| 190 | |
| 191 | |
| 192 =head2 _create_primer_features() | |
| 193 | |
| 194 Title : _create_primer_features() | |
| 195 Usage : &_create_primer_features() | |
| 196 Function: This is an internal method used by next_seq() to create the | |
| 197 Bio::SeqFeature::Primer objects necessary to represent the primers | |
| 198 themselves. | |
| 199 Returns : An array of 2 Bio::SeqFeature::Primer objects. | |
| 200 Args : None. | |
| 201 Notes : This is an internal method. Do not call this method. | |
| 202 | |
| 203 =cut | |
| 204 | |
| 205 | |
| 206 sub _create_primer_features { | |
| 207 my $rdat = shift; | |
| 208 my (%left,%right,$updir,$downdir,$var,$trunc); | |
| 209 my @variables = qw( | |
| 210 PRIMER_DIRECTION | |
| 211 PRIMER_DIRECTION_END_STABILITY | |
| 212 PRIMER_DIRECTION_EXPLAIN | |
| 213 PRIMER_DIRECTION_GC_PERCENT | |
| 214 PRIMER_DIRECTION_PENALTY | |
| 215 PRIMER_DIRECTION_SELF_ANY | |
| 216 PRIMER_DIRECTION_SELF_END | |
| 217 PRIMER_DIRECTION_SEQUENCE | |
| 218 PRIMER_DIRECTION_TM | |
| 219 PRIMER_FIRST_BASE_INDEX | |
| 220 ); | |
| 221 # create the hash to pass into the creation routine | |
| 222 # I do it this way because the primer3 outfile variables are exactly the same for each of | |
| 223 # left and right. I create two hashes- one for the left and one for the right primer. | |
| 224 foreach $updir (qw(LEFT RIGHT)) { | |
| 225 my %dat; | |
| 226 foreach (@variables) { | |
| 227 ($var = $_) =~ s/DIRECTION/$updir/e; | |
| 228 # should you truncate the name of each variable? | |
| 229 # for example, should the value be: PRIMER_RIGHT_PENALTY or PENALTY? | |
| 230 # i think it should be the second one | |
| 231 if (/^PRIMER_DIRECTION$/) { | |
| 232 $trunc = "PRIMER"; | |
| 233 } | |
| 234 elsif (/^PRIMER_FIRST_BASE_INDEX/) { | |
| 235 $trunc = "FIRST_BASE_INDEX"; | |
| 236 } | |
| 237 else { | |
| 238 ($trunc = $_) =~ s/PRIMER_DIRECTION_//; | |
| 239 } | |
| 240 $dat{"-$trunc"} = $rdat->{$var}; | |
| 241 } | |
| 242 if ($updir eq "LEFT") { | |
| 243 %left = %dat; | |
| 244 $left{-id} = $rdat->{PRIMER_SEQUENCE_ID}."-left"; | |
| 245 } | |
| 246 else { | |
| 247 %right = %dat; | |
| 248 $right{-id} = $rdat->{PRIMER_SEQUENCE_ID}."-right"; | |
| 249 } | |
| 250 } | |
| 251 my $primer_left = new Bio::SeqFeature::Primer(%left); | |
| 252 my $primer_right = new Bio::SeqFeature::Primer(%right); | |
| 253 return($primer_left,$primer_right); | |
| 254 } | |
| 255 | |
| 256 | |
| 257 | |
| 258 | |
| 259 | |
| 260 | |
| 261 | |
| 262 | |
| 263 | |
| 264 =head2 get_amplified_region() | |
| 265 | |
| 266 Title : get_amplified_region() | |
| 267 Usage : $primer->get_amplified_region() | |
| 268 Function: Returns a Bio::Seq object representing the sequence amplified | |
| 269 Returns : (I think) A Bio::Seq object | |
| 270 Args : None. | |
| 271 Notes : This is not implemented at this time. | |
| 272 Note to chad: implement this simple getter. | |
| 273 Developer notes: There obviously isn't a way for a single primer to know about | |
| 274 its amplified region unless it is paired with another primer. At this time | |
| 275 these object will generally be created with another so I will put in this | |
| 276 method. If there is no sequence null is returned. | |
| 277 | |
| 278 THIS DOES NOT BELONG HERE. Put this into something else. | |
| 279 | |
| 280 | |
| 281 =cut | |
| 282 | |
| 283 sub get_amplified_region { | |
| 284 my ($self) = @_; | |
| 285 } # end get_amplified_region | |
| 286 | |
| 287 =head2 get_amplification_error() | |
| 288 | |
| 289 Title : get_amplification_error() | |
| 290 Usage : | |
| 291 Function: | |
| 292 Returns : | |
| 293 Args : | |
| 294 Notes : | |
| 295 Developer Notes: | |
| 296 THIS DOES NOT BELONG HERE. Put this into something else. | |
| 297 | |
| 298 =cut | |
| 299 | |
| 300 sub get_amplification_error { | |
| 301 my $primer = $_[1]; | |
| 302 my $error = $Primer3::primers{$primer}{PRIMER_ERROR}; | |
| 303 if ($error) { return $error; } | |
| 304 else { return "Some error that primer3 didn't define.\n"; } | |
| 305 } | |
| 306 | |
| 307 =head2 _set_target() | |
| 308 | |
| 309 Title : _set_target() | |
| 310 Usage : &_set_target($self); | |
| 311 Function: | |
| 312 Returns : | |
| 313 Args : | |
| 314 Notes : | |
| 315 Developer Notes: Really I have no idea why I put this in here. | |
| 316 It can is referenced by new_deprecated and by run_primer3 | |
| 317 | |
| 318 | |
| 319 =cut | |
| 320 | |
| 321 sub _set_target { | |
| 322 my $self = shift; | |
| 323 my ($sequence,$primer,$primer_left,$primer_right,$position_left,$position_right,$boggle); | |
| 324 $boggle = 1; | |
| 325 foreach $primer (sort keys %{$self->{primers}}) { | |
| 326 $sequence = $self->{primers}{$primer}{SEQUENCE}; | |
| 327 $primer_left = $self->{primers}{$primer}{PRIMER_LEFT}; | |
| 328 $primer_right = $self->{primers}{$primer}{PRIMER_RIGHT}; | |
| 329 if (!$primer_left) { | |
| 330 $self->{primers}{$primer}{design_failed} = "1"; | |
| 331 } | |
| 332 else { | |
| 333 $primer_left =~ m/(.*)\,(.*)/; | |
| 334 $position_left = $1+$2-1; | |
| 335 $primer_right =~ m/(.*)\,(.*)/; | |
| 336 $position_right = $1-$2; | |
| 337 $self->{primers}{$primer}{left} = $position_left; | |
| 338 $self->{primers}{$primer}{right} = $position_right; | |
| 339 $self->{primers}{$primer}{amplified} = substr($sequence,$position_left,$position_right-$position_left); | |
| 340 } | |
| 341 } | |
| 342 } | |
| 343 | |
| 344 =head2 _read_file($self,$filename) | |
| 345 | |
| 346 Title : _read_file($self,$filename) | |
| 347 Usage : | |
| 348 Function: | |
| 349 Returns : A scalar containing the contents of $filename | |
| 350 Args : $self and the name of a file to parse. | |
| 351 Notes : | |
| 352 Developer notes: Honestly, I have no idea what this is for. | |
| 353 | |
| 354 | |
| 355 =cut | |
| 356 | |
| 357 sub _read_file { | |
| 358 # my ($self,$filename) = @_; | |
| 359 # set this to keep track of things.... | |
| 360 # $self->{outfilename} = $filename; | |
| 361 # to make this better for bioperl, chad should really be using catfile and things. | |
| 362 # | |
| 363 # my $fh = new FileHandle; | |
| 364 # open($fh,$filename) or die "I can't open the primer report ($filename) : $!\n"; | |
| 365 # # _parse_report(); | |
| 366 # # my %Primer3::primers; | |
| 367 # my ($output,$line); | |
| 368 # while ($line=<$fh>) { | |
| 369 # # print("Adding $line\n"); | |
| 370 # $output .= $line; | |
| 371 # } # end while | |
| 372 # # print("\$output is $output\n"); | |
| 373 # return $output; | |
| 374 } | |
| 375 | |
| 376 | |
| 377 | |
| 378 | |
| 379 | |
| 380 =head2 _parse_report() | |
| 381 | |
| 382 Title : _parse_report() | |
| 383 Usage : &_parse_report($self,$filename); | |
| 384 Function: Parse a primer3 outfile and place everything into an object under | |
| 385 {primers} with PRIMER_SEQUENCE_ID being the name of the keys for the | |
| 386 {primers} hash. | |
| 387 Returns : Nothing. | |
| 388 Args : $self and the name of a file to parse. | |
| 389 Notes : | |
| 390 | |
| 391 =cut | |
| 392 | |
| 393 sub _parse_report { | |
| 394 # old | |
| 395 # my ($self,$filename) = @_; | |
| 396 my ($self,$outputs) = @_; | |
| 397 # print("\$self is $self, \$outputs are $outputs\n"); | |
| 398 # print("in _parse_report, \$self is $self\n"); | |
| 399 # set this to keep track of things.... | |
| 400 my ($sequence_name,$line,$counter,$variable_name,$variable_value); | |
| 401 my @output = split/\n/,$outputs; | |
| 402 foreach $line (@output) { | |
| 403 # print("Reading line $line\n"); | |
| 404 next if ($line =~ /^\=/); | |
| 405 if ($line =~ m/^PRIMER_SEQUENCE_ID/) { | |
| 406 $line =~ m/(\S+)=(.*$)/; | |
| 407 $variable_name = $1; | |
| 408 $sequence_name = $2; | |
| 409 $variable_value = $2; | |
| 410 } | |
| 411 else { | |
| 412 $line =~ m/(\S+)=(.*$)/; | |
| 413 $variable_name = $1; | |
| 414 $variable_value = $2; | |
| 415 } | |
| 416 # print("$sequence_name\t$variable_name\t$variable_value\n"); | |
| 417 $self->{primers}{$sequence_name}{$variable_name} = $variable_value; | |
| 418 } # end while <> | |
| 419 } # end parse_report | |
| 420 | |
| 421 =head2 _construct_empty() | |
| 422 | |
| 423 Title : _construct_empty() | |
| 424 Usage : &_construct_empty($self); | |
| 425 Function: Construct an empty object that will be used to construct a primer3 | |
| 426 input "file" so that it can be run. | |
| 427 Returns : | |
| 428 Args : | |
| 429 Notes : | |
| 430 | |
| 431 =cut | |
| 432 | |
| 433 sub _construct_empty { | |
| 434 my $self = shift; | |
| 435 $self->{inputs} = {}; | |
| 436 return; | |
| 437 } | |
| 438 | |
| 439 =head2 add_target(%stuff) | |
| 440 | |
| 441 Title : add_target(%stuff) | |
| 442 Usage : $o_primer->add_target(%stuff); | |
| 443 Function: Add an target to the infile constructor. | |
| 444 Returns : | |
| 445 Args : A hash. Looks something like this: | |
| 446 $o_primer2->add_target( | |
| 447 -PRIMER_SEQUENCE_ID => "sN11902", | |
| 448 -PRIMER_COMMENT => "3831", | |
| 449 -SEQUENCE => "some_sequence", | |
| 450 -TARGET => "513,26", | |
| 451 -PRIMER_PRODUCT_SIZE_RANGE => "100-500", | |
| 452 -PRIMER_FILE_FLAG => "0", | |
| 453 -PRIMER_LIBERAL_BASE => "1", | |
| 454 -PRIMER_NUM_RETURN => "1", | |
| 455 -PRIMER_FIRST_BASE_INDEX => "1", | |
| 456 -PRIMER_EXPLAIN_FLAG => "1"); | |
| 457 The add_target() method does not validate the things you put into | |
| 458 this parameter hash. Read the docs for Primer3 to see which fields | |
| 459 do what and how they should be used. | |
| 460 Notes : To design primers, first create a new CSM::Primer3 object with the | |
| 461 -construct_infile parameter. Then, add targets using this method | |
| 462 (add_target()) with the target hash as above in the Args: section. | |
| 463 Be careful. No validation will be done here. All of those parameters | |
| 464 will be fed straight into primer3. | |
| 465 Once you are done adding targets, invoke the function run_primer3(). | |
| 466 Then retrieve the results using something like a loop around the array | |
| 467 from get_primer_sequence_IDs(); | |
| 468 | |
| 469 =cut | |
| 470 | |
| 471 | |
| 472 sub add_target { | |
| 473 my ($self,%args) = @_; | |
| 474 my ($currkey,$renamed,$sequence_id,$value); | |
| 475 if (!$args{-PRIMER_SEQUENCE_ID}) { | |
| 476 print("You cannot add an element to the primer3 infile without specifying the PRIMER_SEQUENCE_ID. Sorry.\n"); | |
| 477 } | |
| 478 else { | |
| 479 $sequence_id = $args{-PRIMER_SEQUENCE_ID}; | |
| 480 foreach $currkey (keys %args) { | |
| 481 # print("\$currkey is $currkey\n"); | |
| 482 next if ($currkey eq "-PRIMER_SEQUENCE_ID"); | |
| 483 ($renamed = $currkey) =~ s/-//; | |
| 484 # print("Adding $renamed to the hash under $sequence_id\n"); | |
| 485 $value = $args{$currkey}; | |
| 486 # print("\$value is $value\n"); | |
| 487 if ($renamed eq "SEQUENCE") { $value =~ s/\n//g; } | |
| 488 $self->{infile}{$sequence_id}{$renamed} = $value; | |
| 489 } | |
| 490 } | |
| 491 } | |
| 492 | |
| 493 =head2 get_primer_sequence_IDs() | |
| 494 | |
| 495 Title : get_primer_sequence_IDs() | |
| 496 Usage : $o_phred->get_primer_sequence_IDs(); | |
| 497 Function: Return the primer sequence ID's. These normally correspond to | |
| 498 the name of a sequence in a database but can be whatever was used when | |
| 499 the primer3 infile was constructed. | |
| 500 Returns : An array containing the names of the primer sequence ID's | |
| 501 Args : None. | |
| 502 Notes : This would be used as the basis for an iterator to loop around each | |
| 503 primer that was designed. | |
| 504 | |
| 505 =cut | |
| 506 | |
| 507 sub get_primer_sequence_IDs { | |
| 508 my $self = shift; | |
| 509 return sort keys %{$self->{primers}}; | |
| 510 } # end get keys | |
| 511 | |
| 512 =head2 dump_hash() | |
| 513 | |
| 514 Title : dump_hash() | |
| 515 Usage : $o_primer->dump_hash(); | |
| 516 Function: Dump out the CSM::Primer3 object. | |
| 517 Returns : Nothing. | |
| 518 Args : None. | |
| 519 Notes : Used extensively in debugging. | |
| 520 | |
| 521 =cut | |
| 522 | |
| 523 sub dump_hash { | |
| 524 my $self = shift; | |
| 525 my $dumper = new Dumpvalue; | |
| 526 $dumper->dumpValue($self); | |
| 527 } # end dump_hash | |
| 528 | |
| 529 =head2 dump_infile_hash() | |
| 530 | |
| 531 Title : dump_infile_hash() | |
| 532 Usage : $o_primer->dump_infile_hash(); | |
| 533 Function: Dump out the contents of the infile hash. | |
| 534 Returns : Nothing. | |
| 535 Args : None. | |
| 536 Notes : Used for debugging the construction of the infile. | |
| 537 | |
| 538 =cut | |
| 539 | |
| 540 sub dump_infile_hash { | |
| 541 my $self = shift; | |
| 542 my $dumper = new Dumpvalue; | |
| 543 $dumper->dumpValue($self->{infile}); | |
| 544 } | |
| 545 | |
| 546 | |
| 547 | |
| 548 1; | |
| 549 __END__ | |
| 550 | |
| 551 =head2 placeholder | |
| 552 | |
| 553 Title : This is a place holder so chad can cut and paste | |
| 554 Usage : | |
| 555 Function: | |
| 556 Returns : | |
| 557 Args : | |
| 558 Notes : | |
| 559 | |
| 560 =cut | |
| 561 | |
| 562 =head1 SEE ALSO | |
| 563 | |
| 564 perl(1). | |
| 565 | |
| 566 =cut | 
