Mercurial > repos > mahtabm > ensembl
comparison variant_effect_predictor/Bio/Tools/RestrictionEnzyme.pm @ 0:1f6dce3d34e0
Uploaded
| author | mahtabm |
|---|---|
| date | Thu, 11 Apr 2013 02:01:53 -0400 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:1f6dce3d34e0 |
|---|---|
| 1 #------------------------------------------------------------------ | |
| 2 # $Id: RestrictionEnzyme.pm,v 1.25.2.1 2003/06/29 00:53:20 jason Exp $ | |
| 3 # | |
| 4 # BioPerl module Bio::Tools::RestrictionEnzyme | |
| 5 # | |
| 6 # Cared for by Steve Chervitz <sac@bioperl.org> | |
| 7 # | |
| 8 # You may distribute this module under the same terms as perl itself | |
| 9 #------------------------------------------------------------------ | |
| 10 | |
| 11 ## POD Documentation: | |
| 12 | |
| 13 =head1 NAME | |
| 14 | |
| 15 Bio::Tools::RestrictionEnzyme - Bioperl object for a restriction endonuclease | |
| 16 (cuts DNA at specific locations) | |
| 17 | |
| 18 =head1 SYNOPSIS | |
| 19 | |
| 20 =head2 Object Creation | |
| 21 | |
| 22 require Bio::Tools::RestrictionEnzyme; | |
| 23 | |
| 24 ## Create a new object by name. | |
| 25 | |
| 26 $re1 = new Bio::Tools::RestrictionEnzyme(-NAME =>'EcoRI'); | |
| 27 | |
| 28 ## Create a new object using special syntax | |
| 29 ## which specifies the enzyme name, recognition site, and cut position. | |
| 30 ## Used for enzymes not known to this module. | |
| 31 | |
| 32 $re2 = new Bio::Tools::RestrictionEnzyme(-NAME =>'EcoRV--GAT^ATC', | |
| 33 -MAKE =>'custom'); | |
| 34 | |
| 35 ## Get a list of the resulting fragments when a sequence is cut with | |
| 36 ## the given enzyme. The method expects a Bio::Seq object. | |
| 37 | |
| 38 @fragments = $re2->cut_seq($seqobj); | |
| 39 | |
| 40 ## Get a list of names of all available restriction enzymes | |
| 41 ## known to this module. | |
| 42 | |
| 43 @all = $re->available_list(); | |
| 44 | |
| 45 ## Get the names of restriction enzymes that have 6 bp | |
| 46 ## recognition sequences. | |
| 47 | |
| 48 @sixcutters = $re->available_list(6); | |
| 49 | |
| 50 | |
| 51 =head1 INSTALLATION | |
| 52 | |
| 53 This module is included with the central Bioperl distribution: | |
| 54 | |
| 55 http://bio.perl.org/Core/Latest | |
| 56 ftp://bio.perl.org/pub/DIST | |
| 57 | |
| 58 Follow the installation instructions included in the README file. | |
| 59 | |
| 60 =head1 DESCRIPTION | |
| 61 | |
| 62 The Bio::Tools::RestrictionEnzyme.pm module encapsulates generic data and | |
| 63 methods for using restriction endonucleases for in silico restriction | |
| 64 analysis of DNA sequences. | |
| 65 | |
| 66 =head2 Considerations | |
| 67 | |
| 68 This module is a precursor for a more full featured version that may do such | |
| 69 things as download data from online databases such as REBase http://www.neb.com/rebase/. | |
| 70 Thus, there is currently no functionality for obtaining data about commercial | |
| 71 availability for a restriction enzyme. | |
| 72 | |
| 73 At some point in the future, it may make sense to derive RestrictionEnzymes | |
| 74 from a class such as Bio::Enzyme or Bio::Prot::Protein (neither of which now | |
| 75 exist) so that more data about the enzyme and related information can be | |
| 76 easily obtained. | |
| 77 | |
| 78 This module is currently in use at | |
| 79 | |
| 80 http://genome-www.stanford.edu/Sacch3D/analysis/ | |
| 81 | |
| 82 | |
| 83 =head2 Digesting on Runs of N | |
| 84 | |
| 85 To digest a sequence on runs of N's in the sequence. Here's what you can do: | |
| 86 | |
| 87 $re_n = new Bio::Tools::RestrictionEnzyme(-name=>'N--NNNNN', | |
| 88 -make=>'custom'); | |
| 89 | |
| 90 Specify the number of N's you want to match in the -name parameter. | |
| 91 So the above example will recognize and cut at runs of 5 Ns. | |
| 92 If you wanted to cut at runs of 10 N's, you would use | |
| 93 | |
| 94 -name => 'N--NNNNNNNNNN' | |
| 95 | |
| 96 Note that you must use a specific number of N's, you cannot use a regexp to | |
| 97 digest at N+ for example, because the actual number of N's at each site are | |
| 98 not recorded when the sequence is analyzed. So cut_locations( ) wouldn't be | |
| 99 correct. | |
| 100 | |
| 101 =head1 EXAMPLES | |
| 102 | |
| 103 See the script examples/restriction.pl in the Bioperl distribution. | |
| 104 | |
| 105 =head1 DEPENDENCIES | |
| 106 | |
| 107 Bio::Tools::RestrictionEnzyme.pm is a concrete class that inherits from | |
| 108 B<Bio::Root::Root> and uses by delegation B<Bio::PrimarySeq>. | |
| 109 | |
| 110 =head1 FEEDBACK | |
| 111 | |
| 112 =head2 Mailing Lists | |
| 113 | |
| 114 User feedback is an integral part of the evolution of this and other Bioperl | |
| 115 modules. Send your comments and suggestions preferably to one of the Bioperl | |
| 116 mailing lists. Your participation is much appreciated. | |
| 117 | |
| 118 bioperl-l@bioperl.org - General discussion | |
| 119 http://bioperl.org/MailList.shtml - About the mailing lists | |
| 120 | |
| 121 =head2 Reporting Bugs | |
| 122 | |
| 123 Report bugs to the Bioperl bug tracking system to help us keep track the bugs | |
| 124 and their resolution. Bug reports can be submitted via email or the web: | |
| 125 | |
| 126 bioperl-bugs@bio.perl.org | |
| 127 http://bugzilla.bioperl.org/ | |
| 128 | |
| 129 =head1 AUTHOR | |
| 130 | |
| 131 Steve Chervitz, E<lt>sac@bioperl.orgE<gt> | |
| 132 | |
| 133 =head1 COPYRIGHT | |
| 134 | |
| 135 Copyright (c) 1997-2002 Steve A. Chervitz. All Rights Reserved. | |
| 136 This module is free software; you can redistribute it and/or | |
| 137 modify it under the same terms as Perl itself. | |
| 138 | |
| 139 =head1 SEE ALSO | |
| 140 | |
| 141 Bio::Root::Root - Base class. | |
| 142 Bio::PrimarySeq - Lightweight sequence object. | |
| 143 | |
| 144 http://bio.perl.org/ - Bioperl Project Homepage | |
| 145 | |
| 146 =cut | |
| 147 | |
| 148 # | |
| 149 ## | |
| 150 ### | |
| 151 #### END of main POD documentation. | |
| 152 ### | |
| 153 ## | |
| 154 #' | |
| 155 | |
| 156 | |
| 157 =head1 APPENDIX | |
| 158 | |
| 159 Methods beginning with a leading underscore are considered private | |
| 160 and are intended for internal use by this module. They are | |
| 161 B<not> considered part of the public interface and are described here | |
| 162 for documentation purposes only. | |
| 163 | |
| 164 =cut | |
| 165 | |
| 166 | |
| 167 package Bio::Tools::RestrictionEnzyme; | |
| 168 use strict; | |
| 169 | |
| 170 use Bio::Root::Root; | |
| 171 use Exporter; | |
| 172 | |
| 173 use vars qw (@ISA @EXPORT_OK %EXPORT_TAGS $ID $version @RE_available $Revision); | |
| 174 | |
| 175 @ISA = qw(Bio::Root::Root Exporter); | |
| 176 @EXPORT_OK = qw(@RE_available); | |
| 177 %EXPORT_TAGS = ( std => [qw(@RE_available)] ); | |
| 178 | |
| 179 $ID = 'Bio::Tools::RestrictionEnzyme'; | |
| 180 $version = 0.04; | |
| 181 $Revision = '$Id: RestrictionEnzyme.pm,v 1.25.2.1 2003/06/29 00:53:20 jason Exp $'; #' | |
| 182 | |
| 183 # Generated from REBASE version 208 (strider format), dated Aug 1 2002 | |
| 184 # using scripts/contributed/rebase2list.pl | |
| 185 # Syntax: RE-name => 'SITE CUTS-AT' where SITE and CUTS-AT are separated | |
| 186 # by a space. | |
| 187 | |
| 188 my %RE = ( | |
| 189 'AasI' => 'GACNNNNNNGTC 7', | |
| 190 'AatI' => 'AGGCCT 3', | |
| 191 'AatII' => 'GACGTC 5', | |
| 192 'AauI' => 'TGTACA 1', | |
| 193 'AccI' => 'GTMKAC 2', | |
| 194 'AccII' => 'CGCG 2', | |
| 195 'AccIII' => 'TCCGGA 1', | |
| 196 'Acc16I' => 'TGCGCA 3', | |
| 197 'Acc65I' => 'GGTACC 1', | |
| 198 'Acc113I' => 'AGTACT 3', | |
| 199 'AccB1I' => 'GGYRCC 1', | |
| 200 'AccB7I' => 'CCANNNNNTGG 7', | |
| 201 'AclI' => 'AACGTT 2', | |
| 202 'AcsI' => 'RAATTY 1', | |
| 203 'AcvI' => 'CACGTG 3', | |
| 204 'AcyI' => 'GRCGYC 2', | |
| 205 'AdeI' => 'CACNNNGTG 6', | |
| 206 'AfaI' => 'GTAC 2', | |
| 207 'AfeI' => 'AGCGCT 3', | |
| 208 'AflI' => 'GGWCC 1', | |
| 209 'AflII' => 'CTTAAG 1', | |
| 210 'AflIII' => 'ACRYGT 1', | |
| 211 'AgeI' => 'ACCGGT 1', | |
| 212 'AhaIII' => 'TTTAAA 3', | |
| 213 'AhdI' => 'GACNNNNNGTC 6', | |
| 214 'AhlI' => 'ACTAGT 1', | |
| 215 'AleI' => 'CACNNNNGTG 5', | |
| 216 'AluI' => 'AGCT 2', | |
| 217 'Alw21I' => 'GWGCWC 5', | |
| 218 'Alw44I' => 'GTGCAC 1', | |
| 219 'AlwNI' => 'CAGNNNCTG 6', | |
| 220 'Ama87I' => 'CYCGRG 1', | |
| 221 'AocI' => 'CCTNAGG 2', | |
| 222 'Aor51HI' => 'AGCGCT 3', | |
| 223 'ApaI' => 'GGGCCC 5', | |
| 224 'ApaBI' => 'GCANNNNNTGC 8', | |
| 225 'ApaLI' => 'GTGCAC 1', | |
| 226 'ApoI' => 'RAATTY 1', | |
| 227 'AscI' => 'GGCGCGCC 2', | |
| 228 'AseI' => 'ATTAAT 2', | |
| 229 'AsiAI' => 'ACCGGT 1', | |
| 230 'AsiSI' => 'GCGATCGC 5', | |
| 231 'AsnI' => 'ATTAAT 2', | |
| 232 'AspI' => 'GACNNNGTC 4', | |
| 233 'Asp700I' => 'GAANNNNTTC 5', | |
| 234 'Asp718I' => 'GGTACC 1', | |
| 235 'AspEI' => 'GACNNNNNGTC 6', | |
| 236 'AspHI' => 'GWGCWC 5', | |
| 237 'AspLEI' => 'GCGC 3', | |
| 238 'AspS9I' => 'GGNCC 1', | |
| 239 'AsuI' => 'GGNCC 1', | |
| 240 'AsuII' => 'TTCGAA 2', | |
| 241 'AsuC2I' => 'CCSGG 2', | |
| 242 'AsuNHI' => 'GCTAGC 1', | |
| 243 'AvaI' => 'CYCGRG 1', | |
| 244 'AvaII' => 'GGWCC 1', | |
| 245 'AviII' => 'TGCGCA 3', | |
| 246 'AvrII' => 'CCTAGG 1', | |
| 247 'AxyI' => 'CCTNAGG 2', | |
| 248 'BalI' => 'TGGCCA 3', | |
| 249 'BamHI' => 'GGATCC 1', | |
| 250 'BanI' => 'GGYRCC 1', | |
| 251 'BanII' => 'GRGCYC 5', | |
| 252 'BanIII' => 'ATCGAT 2', | |
| 253 'BbeI' => 'GGCGCC 5', | |
| 254 'BbrPI' => 'CACGTG 3', | |
| 255 'BbuI' => 'GCATGC 5', | |
| 256 'Bbv12I' => 'GWGCWC 5', | |
| 257 'BclI' => 'TGATCA 1', | |
| 258 'BcnI' => 'CCSGG 2', | |
| 259 'BcoI' => 'CYCGRG 1', | |
| 260 'BcuI' => 'ACTAGT 1', | |
| 261 'BetI' => 'WCCGGW 1', | |
| 262 'BfaI' => 'CTAG 1', | |
| 263 'BfmI' => 'CTRYAG 1', | |
| 264 'BfrI' => 'CTTAAG 1', | |
| 265 'BfrBI' => 'ATGCAT 3', | |
| 266 'BfuCI' => 'GATC 0', | |
| 267 'BglI' => 'GCCNNNNNGGC 7', | |
| 268 'BglII' => 'AGATCT 1', | |
| 269 'BlnI' => 'CCTAGG 1', | |
| 270 'BloHII' => 'CTGCAG 5', | |
| 271 'BlpI' => 'GCTNAGC 2', | |
| 272 'Bme18I' => 'GGWCC 1', | |
| 273 'Bme1390I' => 'CCNGG 2', | |
| 274 'Bme1580I' => 'GKGCMC 5', | |
| 275 'BmtI' => 'GCTAGC 5', | |
| 276 'BmyI' => 'GDGCHC 5', | |
| 277 'BoxI' => 'GACNNNNGTC 5', | |
| 278 'Bpu14I' => 'TTCGAA 2', | |
| 279 'Bpu1102I' => 'GCTNAGC 2', | |
| 280 'Bsa29I' => 'ATCGAT 2', | |
| 281 'BsaAI' => 'YACGTR 3', | |
| 282 'BsaBI' => 'GATNNNNATC 5', | |
| 283 'BsaHI' => 'GRCGYC 2', | |
| 284 'BsaJI' => 'CCNNGG 1', | |
| 285 'BsaOI' => 'CGRYCG 4', | |
| 286 'BsaWI' => 'WCCGGW 1', | |
| 287 'BscI' => 'ATCGAT 2', | |
| 288 'Bsc4I' => 'CCNNNNNNNGG 7', | |
| 289 'BscBI' => 'GGNNCC 3', | |
| 290 'BscFI' => 'GATC 0', | |
| 291 'Bse8I' => 'GATNNNNATC 5', | |
| 292 'Bse21I' => 'CCTNAGG 2', | |
| 293 'Bse118I' => 'RCCGGY 1', | |
| 294 'BseAI' => 'TCCGGA 1', | |
| 295 'BseBI' => 'CCWGG 2', | |
| 296 'BseCI' => 'ATCGAT 2', | |
| 297 'BseDI' => 'CCNNGG 1', | |
| 298 'BseJI' => 'GATNNNNATC 5', | |
| 299 'BseLI' => 'CCNNNNNNNGG 7', | |
| 300 'BsePI' => 'GCGCGC 1', | |
| 301 'BseSI' => 'GKGCMC 5', | |
| 302 'BseX3I' => 'CGGCCG 1', | |
| 303 'BshI' => 'GGCC 2', | |
| 304 'Bsh1236I' => 'CGCG 2', | |
| 305 'Bsh1285I' => 'CGRYCG 4', | |
| 306 'BshFI' => 'GGCC 2', | |
| 307 'BshNI' => 'GGYRCC 1', | |
| 308 'BshTI' => 'ACCGGT 1', | |
| 309 'BsiBI' => 'GATNNNNATC 5', | |
| 310 'BsiCI' => 'TTCGAA 2', | |
| 311 'BsiEI' => 'CGRYCG 4', | |
| 312 'BsiHKAI' => 'GWGCWC 5', | |
| 313 'BsiHKCI' => 'CYCGRG 1', | |
| 314 'BsiLI' => 'CCWGG 2', | |
| 315 'BsiMI' => 'TCCGGA 1', | |
| 316 'BsiQI' => 'TGATCA 1', | |
| 317 'BsiSI' => 'CCGG 1', | |
| 318 'BsiWI' => 'CGTACG 1', | |
| 319 'BsiXI' => 'ATCGAT 2', | |
| 320 'BsiYI' => 'CCNNNNNNNGG 7', | |
| 321 'BsiZI' => 'GGNCC 1', | |
| 322 'BslI' => 'CCNNNNNNNGG 7', | |
| 323 'BsoBI' => 'CYCGRG 1', | |
| 324 'Bsp13I' => 'TCCGGA 1', | |
| 325 'Bsp19I' => 'CCATGG 1', | |
| 326 'Bsp68I' => 'TCGCGA 3', | |
| 327 'Bsp106I' => 'ATCGAT 2', | |
| 328 'Bsp119I' => 'TTCGAA 2', | |
| 329 'Bsp120I' => 'GGGCCC 1', | |
| 330 'Bsp143I' => 'GATC 0', | |
| 331 'Bsp143II' => 'RGCGCY 5', | |
| 332 'Bsp1286I' => 'GDGCHC 5', | |
| 333 'Bsp1407I' => 'TGTACA 1', | |
| 334 'Bsp1720I' => 'GCTNAGC 2', | |
| 335 'BspA2I' => 'CCTAGG 1', | |
| 336 'BspCI' => 'CGATCG 4', | |
| 337 'BspDI' => 'ATCGAT 2', | |
| 338 'BspEI' => 'TCCGGA 1', | |
| 339 'BspHI' => 'TCATGA 1', | |
| 340 'BspLI' => 'GGNNCC 3', | |
| 341 'BspLU11I' => 'ACATGT 1', | |
| 342 'BspMII' => 'TCCGGA 1', | |
| 343 'BspTI' => 'CTTAAG 1', | |
| 344 'BspT104I' => 'TTCGAA 2', | |
| 345 'BspT107I' => 'GGYRCC 1', | |
| 346 'BspXI' => 'ATCGAT 2', | |
| 347 'BsrBRI' => 'GATNNNNATC 5', | |
| 348 'BsrFI' => 'RCCGGY 1', | |
| 349 'BsrGI' => 'TGTACA 1', | |
| 350 'BssAI' => 'RCCGGY 1', | |
| 351 'BssECI' => 'CCNNGG 1', | |
| 352 'BssHI' => 'CTCGAG 1', | |
| 353 'BssHII' => 'GCGCGC 1', | |
| 354 'BssKI' => 'CCNGG 0', | |
| 355 'BssNAI' => 'GTATAC 3', | |
| 356 'BssT1I' => 'CCWWGG 1', | |
| 357 'Bst98I' => 'CTTAAG 1', | |
| 358 'Bst1107I' => 'GTATAC 3', | |
| 359 'BstACI' => 'GRCGYC 2', | |
| 360 'BstAPI' => 'GCANNNNNTGC 7', | |
| 361 'BstBI' => 'TTCGAA 2', | |
| 362 'BstBAI' => 'YACGTR 3', | |
| 363 'Bst4CI' => 'ACNGT 3', | |
| 364 'BstC8I' => 'GCNNGC 3', | |
| 365 'BstDEI' => 'CTNAG 1', | |
| 366 'BstDSI' => 'CCRYGG 1', | |
| 367 'BstEII' => 'GGTNACC 1', | |
| 368 'BstENI' => 'CCTNNNNNAGG 5', | |
| 369 'BstENII' => 'GATC 0', | |
| 370 'BstFNI' => 'CGCG 2', | |
| 371 'BstH2I' => 'RGCGCY 5', | |
| 372 'BstHHI' => 'GCGC 3', | |
| 373 'BstHPI' => 'GTTAAC 3', | |
| 374 'BstKTI' => 'GATC 3', | |
| 375 'BstMAI' => 'CTGCAG 5', | |
| 376 'BstMCI' => 'CGRYCG 4', | |
| 377 'BstMWI' => 'GCNNNNNNNGC 7', | |
| 378 'BstNI' => 'CCWGG 2', | |
| 379 'BstNSI' => 'RCATGY 5', | |
| 380 'BstOI' => 'CCWGG 2', | |
| 381 'BstPI' => 'GGTNACC 1', | |
| 382 'BstPAI' => 'GACNNNNGTC 5', | |
| 383 'BstSCI' => 'CCNGG 0', | |
| 384 'BstSFI' => 'CTRYAG 1', | |
| 385 'BstSNI' => 'TACGTA 3', | |
| 386 'BstUI' => 'CGCG 2', | |
| 387 'Bst2UI' => 'CCWGG 2', | |
| 388 'BstXI' => 'CCANNNNNNTGG 8', | |
| 389 'BstX2I' => 'RGATCY 1', | |
| 390 'BstYI' => 'RGATCY 1', | |
| 391 'BstZI' => 'CGGCCG 1', | |
| 392 'BstZ17I' => 'GTATAC 3', | |
| 393 'Bsu15I' => 'ATCGAT 2', | |
| 394 'Bsu36I' => 'CCTNAGG 2', | |
| 395 'BsuRI' => 'GGCC 2', | |
| 396 'BsuTUI' => 'ATCGAT 2', | |
| 397 'BtgI' => 'CCRYGG 1', | |
| 398 'BthCI' => 'GCNGC 4', | |
| 399 'Cac8I' => 'GCNNGC 3', | |
| 400 'CaiI' => 'CAGNNNCTG 6', | |
| 401 'CauII' => 'CCSGG 2', | |
| 402 'CciNI' => 'GCGGCCGC 2', | |
| 403 'CelII' => 'GCTNAGC 2', | |
| 404 'CfoI' => 'GCGC 3', | |
| 405 'CfrI' => 'YGGCCR 1', | |
| 406 'Cfr9I' => 'CCCGGG 1', | |
| 407 'Cfr10I' => 'RCCGGY 1', | |
| 408 'Cfr13I' => 'GGNCC 1', | |
| 409 'Cfr42I' => 'CCGCGG 4', | |
| 410 'ChaI' => 'GATC 4', | |
| 411 'ClaI' => 'ATCGAT 2', | |
| 412 'CpoI' => 'CGGWCCG 2', | |
| 413 'CspI' => 'CGGWCCG 2', | |
| 414 'Csp6I' => 'GTAC 1', | |
| 415 'Csp45I' => 'TTCGAA 2', | |
| 416 'CspAI' => 'ACCGGT 1', | |
| 417 'CviAII' => 'CATG 1', | |
| 418 'CviJI' => 'RGCY 2', | |
| 419 'CviRI' => 'TGCA 2', | |
| 420 'CviTI' => 'RGCY 2', | |
| 421 'CvnI' => 'CCTNAGG 2', | |
| 422 'DdeI' => 'CTNAG 1', | |
| 423 'DpnI' => 'GATC 2', | |
| 424 'DpnII' => 'GATC 0', | |
| 425 'DraI' => 'TTTAAA 3', | |
| 426 'DraII' => 'RGGNCCY 2', | |
| 427 'DraIII' => 'CACNNNGTG 6', | |
| 428 'DrdI' => 'GACNNNNNNGTC 7', | |
| 429 'DsaI' => 'CCRYGG 1', | |
| 430 'DseDI' => 'GACNNNNNNGTC 7', | |
| 431 'EaeI' => 'YGGCCR 1', | |
| 432 'EagI' => 'CGGCCG 1', | |
| 433 'Eam1105I' => 'GACNNNNNGTC 6', | |
| 434 'Ecl136II' => 'GAGCTC 3', | |
| 435 'EclHKI' => 'GACNNNNNGTC 6', | |
| 436 'EclXI' => 'CGGCCG 1', | |
| 437 'Eco24I' => 'GRGCYC 5', | |
| 438 'Eco32I' => 'GATATC 3', | |
| 439 'Eco47I' => 'GGWCC 1', | |
| 440 'Eco47III' => 'AGCGCT 3', | |
| 441 'Eco52I' => 'CGGCCG 1', | |
| 442 'Eco72I' => 'CACGTG 3', | |
| 443 'Eco81I' => 'CCTNAGG 2', | |
| 444 'Eco88I' => 'CYCGRG 1', | |
| 445 'Eco91I' => 'GGTNACC 1', | |
| 446 'Eco105I' => 'TACGTA 3', | |
| 447 'Eco130I' => 'CCWWGG 1', | |
| 448 'Eco147I' => 'AGGCCT 3', | |
| 449 'EcoHI' => 'CCSGG 0', | |
| 450 'EcoICRI' => 'GAGCTC 3', | |
| 451 'EcoNI' => 'CCTNNNNNAGG 5', | |
| 452 'EcoO65I' => 'GGTNACC 1', | |
| 453 'EcoO109I' => 'RGGNCCY 2', | |
| 454 'EcoRI' => 'GAATTC 1', | |
| 455 'EcoRII' => 'CCWGG 0', | |
| 456 'EcoRV' => 'GATATC 3', | |
| 457 'EcoT14I' => 'CCWWGG 1', | |
| 458 'EcoT22I' => 'ATGCAT 5', | |
| 459 'EcoT38I' => 'GRGCYC 5', | |
| 460 'EgeI' => 'GGCGCC 3', | |
| 461 'EheI' => 'GGCGCC 3', | |
| 462 'ErhI' => 'CCWWGG 1', | |
| 463 'EsaBC3I' => 'TCGA 2', | |
| 464 'EspI' => 'GCTNAGC 2', | |
| 465 'FatI' => 'CATG 0', | |
| 466 'FauNDI' => 'CATATG 2', | |
| 467 'FbaI' => 'TGATCA 1', | |
| 468 'FblI' => 'GTMKAC 2', | |
| 469 'FmuI' => 'GGNCC 4', | |
| 470 'FnuDII' => 'CGCG 2', | |
| 471 'Fnu4HI' => 'GCNGC 2', | |
| 472 'FriOI' => 'GRGCYC 5', | |
| 473 'FseI' => 'GGCCGGCC 6', | |
| 474 'FspI' => 'TGCGCA 3', | |
| 475 'FspAI' => 'RTGCGCAY 4', | |
| 476 'Fsp4HI' => 'GCNGC 2', | |
| 477 'FunI' => 'AGCGCT 3', | |
| 478 'FunII' => 'GAATTC 1', | |
| 479 'HaeI' => 'WGGCCW 3', | |
| 480 'HaeII' => 'RGCGCY 5', | |
| 481 'HaeIII' => 'GGCC 2', | |
| 482 'HapII' => 'CCGG 1', | |
| 483 'HgiAI' => 'GWGCWC 5', | |
| 484 'HgiCI' => 'GGYRCC 1', | |
| 485 'HgiJII' => 'GRGCYC 5', | |
| 486 'HhaI' => 'GCGC 3', | |
| 487 'Hin1I' => 'GRCGYC 2', | |
| 488 'Hin6I' => 'GCGC 1', | |
| 489 'HinP1I' => 'GCGC 1', | |
| 490 'HincII' => 'GTYRAC 3', | |
| 491 'HindII' => 'GTYRAC 3', | |
| 492 'HindIII' => 'AAGCTT 1', | |
| 493 'HinfI' => 'GANTC 1', | |
| 494 'HpaI' => 'GTTAAC 3', | |
| 495 'HpaII' => 'CCGG 1', | |
| 496 'Hpy8I' => 'GTNNAC 3', | |
| 497 'Hpy99I' => 'CGWCG 5', | |
| 498 'Hpy178III' => 'TCNNGA 2', | |
| 499 'Hpy188I' => 'TCNGA 3', | |
| 500 'Hpy188III' => 'TCNNGA 2', | |
| 501 'HpyCH4I' => 'CATG 3', | |
| 502 'HpyCH4III' => 'ACNGT 3', | |
| 503 'HpyCH4IV' => 'ACGT 1', | |
| 504 'HpyCH4V' => 'TGCA 2', | |
| 505 'HpyF10VI' => 'GCNNNNNNNGC 8', | |
| 506 'Hsp92I' => 'GRCGYC 2', | |
| 507 'Hsp92II' => 'CATG 4', | |
| 508 'HspAI' => 'GCGC 1', | |
| 509 'ItaI' => 'GCNGC 2', | |
| 510 'KasI' => 'GGCGCC 1', | |
| 511 'KpnI' => 'GGTACC 5', | |
| 512 'Kpn2I' => 'TCCGGA 1', | |
| 513 'KspI' => 'CCGCGG 4', | |
| 514 'Ksp22I' => 'TGATCA 1', | |
| 515 'KspAI' => 'GTTAAC 3', | |
| 516 'Kzo9I' => 'GATC 0', | |
| 517 'LpnI' => 'RGCGCY 3', | |
| 518 'LspI' => 'TTCGAA 2', | |
| 519 'MabI' => 'ACCWGGT 1', | |
| 520 'MaeI' => 'CTAG 1', | |
| 521 'MaeII' => 'ACGT 1', | |
| 522 'MaeIII' => 'GTNAC 0', | |
| 523 'MamI' => 'GATNNNNATC 5', | |
| 524 'MboI' => 'GATC 0', | |
| 525 'McrI' => 'CGRYCG 4', | |
| 526 'MfeI' => 'CAATTG 1', | |
| 527 'MflI' => 'RGATCY 1', | |
| 528 'MhlI' => 'GDGCHC 5', | |
| 529 'MlsI' => 'TGGCCA 3', | |
| 530 'MluI' => 'ACGCGT 1', | |
| 531 'MluNI' => 'TGGCCA 3', | |
| 532 'Mly113I' => 'GGCGCC 2', | |
| 533 'Mph1103I' => 'ATGCAT 5', | |
| 534 'MroI' => 'TCCGGA 1', | |
| 535 'MroNI' => 'GCCGGC 1', | |
| 536 'MroXI' => 'GAANNNNTTC 5', | |
| 537 'MscI' => 'TGGCCA 3', | |
| 538 'MseI' => 'TTAA 1', | |
| 539 'MslI' => 'CAYNNNNRTG 5', | |
| 540 'MspI' => 'CCGG 1', | |
| 541 'Msp20I' => 'TGGCCA 3', | |
| 542 'MspA1I' => 'CMGCKG 3', | |
| 543 'MspCI' => 'CTTAAG 1', | |
| 544 'MspR9I' => 'CCNGG 2', | |
| 545 'MssI' => 'GTTTAAAC 4', | |
| 546 'MstI' => 'TGCGCA 3', | |
| 547 'MunI' => 'CAATTG 1', | |
| 548 'MvaI' => 'CCWGG 2', | |
| 549 'MvnI' => 'CGCG 2', | |
| 550 'MwoI' => 'GCNNNNNNNGC 7', | |
| 551 'NaeI' => 'GCCGGC 3', | |
| 552 'NarI' => 'GGCGCC 2', | |
| 553 'NciI' => 'CCSGG 2', | |
| 554 'NcoI' => 'CCATGG 1', | |
| 555 'NdeI' => 'CATATG 2', | |
| 556 'NdeII' => 'GATC 0', | |
| 557 'NgoAIV' => 'GCCGGC 1', | |
| 558 'NgoMIV' => 'GCCGGC 1', | |
| 559 'NheI' => 'GCTAGC 1', | |
| 560 'NlaIII' => 'CATG 4', | |
| 561 'NlaIV' => 'GGNNCC 3', | |
| 562 'Nli3877I' => 'CYCGRG 5', | |
| 563 'NmuCI' => 'GTSAC 0', | |
| 564 'NotI' => 'GCGGCCGC 2', | |
| 565 'NruI' => 'TCGCGA 3', | |
| 566 'NruGI' => 'GACNNNNNGTC 6', | |
| 567 'NsbI' => 'TGCGCA 3', | |
| 568 'NsiI' => 'ATGCAT 5', | |
| 569 'NspI' => 'RCATGY 5', | |
| 570 'NspIII' => 'CYCGRG 1', | |
| 571 'NspV' => 'TTCGAA 2', | |
| 572 'NspBII' => 'CMGCKG 3', | |
| 573 'OliI' => 'CACNNNNGTG 5', | |
| 574 'PacI' => 'TTAATTAA 5', | |
| 575 'PaeI' => 'GCATGC 5', | |
| 576 'PaeR7I' => 'CTCGAG 1', | |
| 577 'PagI' => 'TCATGA 1', | |
| 578 'PalI' => 'GGCC 2', | |
| 579 'PauI' => 'GCGCGC 1', | |
| 580 'PceI' => 'AGGCCT 3', | |
| 581 'PciI' => 'ACATGT 1', | |
| 582 'PdiI' => 'GCCGGC 3', | |
| 583 'PdmI' => 'GAANNNNTTC 5', | |
| 584 'Pfl23II' => 'CGTACG 1', | |
| 585 'PflBI' => 'CCANNNNNTGG 7', | |
| 586 'PflFI' => 'GACNNNGTC 4', | |
| 587 'PflMI' => 'CCANNNNNTGG 7', | |
| 588 'PfoI' => 'TCCNGGA 1', | |
| 589 'PinAI' => 'ACCGGT 1', | |
| 590 'Ple19I' => 'CGATCG 4', | |
| 591 'PmaCI' => 'CACGTG 3', | |
| 592 'PmeI' => 'GTTTAAAC 4', | |
| 593 'PmlI' => 'CACGTG 3', | |
| 594 'Ppu10I' => 'ATGCAT 1', | |
| 595 'PpuMI' => 'RGGWCCY 2', | |
| 596 'PpuXI' => 'RGGWCCY 2', | |
| 597 'PshAI' => 'GACNNNNGTC 5', | |
| 598 'PshBI' => 'ATTAAT 2', | |
| 599 'PsiI' => 'TTATAA 3', | |
| 600 'Psp03I' => 'GGWCC 4', | |
| 601 'Psp5II' => 'RGGWCCY 2', | |
| 602 'Psp6I' => 'CCWGG 0', | |
| 603 'Psp1406I' => 'AACGTT 2', | |
| 604 'PspAI' => 'CCCGGG 1', | |
| 605 'Psp124BI' => 'GAGCTC 5', | |
| 606 'PspEI' => 'GGTNACC 1', | |
| 607 'PspGI' => 'CCWGG 0', | |
| 608 'PspLI' => 'CGTACG 1', | |
| 609 'PspN4I' => 'GGNNCC 3', | |
| 610 'PspOMI' => 'GGGCCC 1', | |
| 611 'PspPI' => 'GGNCC 1', | |
| 612 'PspPPI' => 'RGGWCCY 2', | |
| 613 'PssI' => 'RGGNCCY 5', | |
| 614 'PstI' => 'CTGCAG 5', | |
| 615 'PsuI' => 'RGATCY 1', | |
| 616 'PsyI' => 'GACNNNGTC 4', | |
| 617 'PvuI' => 'CGATCG 4', | |
| 618 'PvuII' => 'CAGCTG 3', | |
| 619 'RcaI' => 'TCATGA 1', | |
| 620 'RsaI' => 'GTAC 2', | |
| 621 'RsrII' => 'CGGWCCG 2', | |
| 622 'Rsr2I' => 'CGGWCCG 2', | |
| 623 'SacI' => 'GAGCTC 5', | |
| 624 'SacII' => 'CCGCGG 4', | |
| 625 'SalI' => 'GTCGAC 1', | |
| 626 'SanDI' => 'GGGWCCC 2', | |
| 627 'SatI' => 'GCNGC 2', | |
| 628 'SauI' => 'CCTNAGG 2', | |
| 629 'Sau96I' => 'GGNCC 1', | |
| 630 'Sau3AI' => 'GATC 0', | |
| 631 'SbfI' => 'CCTGCAGG 6', | |
| 632 'ScaI' => 'AGTACT 3', | |
| 633 'SciI' => 'CTCGAG 3', | |
| 634 'ScrFI' => 'CCNGG 2', | |
| 635 'SdaI' => 'CCTGCAGG 6', | |
| 636 'SduI' => 'GDGCHC 5', | |
| 637 'SecI' => 'CCNNGG 1', | |
| 638 'SelI' => 'CGCG 0', | |
| 639 'SexAI' => 'ACCWGGT 1', | |
| 640 'SfcI' => 'CTRYAG 1', | |
| 641 'SfeI' => 'CTRYAG 1', | |
| 642 'SfiI' => 'GGCCNNNNNGGCC 8', | |
| 643 'SfoI' => 'GGCGCC 3', | |
| 644 'Sfr274I' => 'CTCGAG 1', | |
| 645 'Sfr303I' => 'CCGCGG 4', | |
| 646 'SfuI' => 'TTCGAA 2', | |
| 647 'SgfI' => 'GCGATCGC 5', | |
| 648 'SgrAI' => 'CRCCGGYG 2', | |
| 649 'SgrBI' => 'CCGCGG 4', | |
| 650 'SinI' => 'GGWCC 1', | |
| 651 'SlaI' => 'CTCGAG 1', | |
| 652 'SmaI' => 'CCCGGG 3', | |
| 653 'SmiI' => 'ATTTAAAT 4', | |
| 654 'SmiMI' => 'CAYNNNNRTG 5', | |
| 655 'SmlI' => 'CTYRAG 1', | |
| 656 'SnaBI' => 'TACGTA 3', | |
| 657 'SpaHI' => 'GCATGC 5', | |
| 658 'SpeI' => 'ACTAGT 1', | |
| 659 'SphI' => 'GCATGC 5', | |
| 660 'SplI' => 'CGTACG 1', | |
| 661 'SrfI' => 'GCCCGGGC 4', | |
| 662 'Sse9I' => 'AATT 0', | |
| 663 'Sse232I' => 'CGCCGGCG 2', | |
| 664 'Sse8387I' => 'CCTGCAGG 6', | |
| 665 'Sse8647I' => 'AGGWCCT 2', | |
| 666 'SseBI' => 'AGGCCT 3', | |
| 667 'SspI' => 'AATATT 3', | |
| 668 'SspBI' => 'TGTACA 1', | |
| 669 'SstI' => 'GAGCTC 5', | |
| 670 'SstII' => 'CCGCGG 4', | |
| 671 'StuI' => 'AGGCCT 3', | |
| 672 'StyI' => 'CCWWGG 1', | |
| 673 'SunI' => 'CGTACG 1', | |
| 674 'SwaI' => 'ATTTAAAT 4', | |
| 675 'TaaI' => 'ACNGT 3', | |
| 676 'TaiI' => 'ACGT 4', | |
| 677 'TaqI' => 'TCGA 1', | |
| 678 'TasI' => 'AATT 0', | |
| 679 'TatI' => 'WGTACW 1', | |
| 680 'TauI' => 'GCSGC 4', | |
| 681 'TelI' => 'GACNNNGTC 4', | |
| 682 'TfiI' => 'GAWTC 1', | |
| 683 'ThaI' => 'CGCG 2', | |
| 684 'TliI' => 'CTCGAG 1', | |
| 685 'Tru1I' => 'TTAA 1', | |
| 686 'Tru9I' => 'TTAA 1', | |
| 687 'TscI' => 'ACGT 4', | |
| 688 'TseI' => 'GCWGC 1', | |
| 689 'Tsp45I' => 'GTSAC 0', | |
| 690 'Tsp509I' => 'AATT 0', | |
| 691 'Tsp4CI' => 'ACNGT 3', | |
| 692 'TspEI' => 'AATT 0', | |
| 693 'Tth111I' => 'GACNNNGTC 4', | |
| 694 'TthHB8I' => 'TCGA 1', | |
| 695 'UnbI' => 'GGNCC 0', | |
| 696 'Van91I' => 'CCANNNNNTGG 7', | |
| 697 'Vha464I' => 'CTTAAG 1', | |
| 698 'VneI' => 'GTGCAC 1', | |
| 699 'VpaK11AI' => 'GGWCC 0', | |
| 700 'VpaK11BI' => 'GGWCC 1', | |
| 701 'VspI' => 'ATTAAT 2', | |
| 702 'XagI' => 'CCTNNNNNAGG 5', | |
| 703 'XapI' => 'RAATTY 1', | |
| 704 'XbaI' => 'TCTAGA 1', | |
| 705 'XceI' => 'RCATGY 5', | |
| 706 'XcmI' => 'CCANNNNNNNNNTGG 8', | |
| 707 'XhoI' => 'CTCGAG 1', | |
| 708 'XhoII' => 'RGATCY 1', | |
| 709 'XmaI' => 'CCCGGG 1', | |
| 710 'XmaIII' => 'CGGCCG 1', | |
| 711 'XmaCI' => 'CCCGGG 1', | |
| 712 'XmaJI' => 'CCTAGG 1', | |
| 713 'XmiI' => 'GTMKAC 2', | |
| 714 'XmnI' => 'GAANNNNTTC 5', | |
| 715 'XspI' => 'CTAG 1', | |
| 716 'ZhoI' => 'ATCGAT 2', | |
| 717 'ZraI' => 'GACGTC 3', | |
| 718 'Zsp2I' => 'ATGCAT 5', | |
| 719 ); | |
| 720 | |
| 721 @RE_available = sort keys %RE; | |
| 722 | |
| 723 | |
| 724 =head1 new | |
| 725 | |
| 726 Title : new | |
| 727 Purpose : Initializes the RestrictionEnzyme object and calls | |
| 728 : superclass constructor last (Bio:Seq.pm). | |
| 729 Returns : n/a | |
| 730 Argument : Parameters passed to new() | |
| 731 Comments : A RestrictionEnzyme object manages its recognition sequence | |
| 732 : as a Bio::PrimarySeq object. | |
| 733 | |
| 734 See Also : L<_make_custom>(), L<_make_standard>(), B<Bio::PrimarySeq.pm::_initialize()> | |
| 735 | |
| 736 =cut | |
| 737 | |
| 738 #--------------- | |
| 739 sub new { | |
| 740 #--------------- | |
| 741 my($class, @args) = @_; | |
| 742 | |
| 743 my $self = $class->SUPER::new(@args); | |
| 744 my ($name,$make) = $self->_rearrange([qw(NAME MAKE)],@args); | |
| 745 $name && $self->name($name); | |
| 746 my %data; | |
| 747 if(defined $make && $make eq 'custom') { | |
| 748 %data = $self->_make_custom($name); | |
| 749 } else { | |
| 750 %data = $self->_make_standard($name); | |
| 751 } | |
| 752 $self->{'_seq'} = new Bio::PrimarySeq(%data, | |
| 753 -VERBOSE =>$self->verbose, | |
| 754 -alphabet => 'dna', | |
| 755 ); | |
| 756 return $self; | |
| 757 } | |
| 758 | |
| 759 | |
| 760 #=head1 _make_standard | |
| 761 # | |
| 762 # Title : _make_standard | |
| 763 # Usage : n/a; automatically called by _initialize() | |
| 764 # Purpose : Permits custom RE object construction from name. | |
| 765 # : 'EcoRI'. | |
| 766 # Returns : Hash containing named parameters for Bio::PrimarySeq.pm constructor. | |
| 767 # Argument : String containing string with special syntax. | |
| 768 # Throws : Exception if the requested enzyme name is unavailable. | |
| 769 # : NOTE: Case sensitive. | |
| 770 # | |
| 771 #See Also : L<Bio::PrimarySeq::_initialize()|Bio::PrimarySeq>, L<_make_custom()|_make_custom> | |
| 772 # | |
| 773 #=cut | |
| 774 | |
| 775 #------------------ | |
| 776 sub _make_standard { | |
| 777 #------------------ | |
| 778 my($self, $name) = @_; | |
| 779 | |
| 780 $name =~ s/^\s+|\s+$//g; | |
| 781 | |
| 782 $self->is_available($name) || | |
| 783 $self->throw("Unavailable or undefined enzyme: $name (Note: CASE SENSITIVE)\n" . | |
| 784 "Currently available enzymes: \n@RE_available\n"); | |
| 785 | |
| 786 my @data = split( ' ', $RE{$name}); | |
| 787 my (%dat); | |
| 788 $dat{-SEQ} = $data[0]; | |
| 789 $dat{-NAME} = $dat{-ID}= $name; | |
| 790 $self->{'_cuts_after'} = $data[1]; | |
| 791 | |
| 792 return %dat; | |
| 793 } | |
| 794 | |
| 795 | |
| 796 #=head1 _make_custom | |
| 797 # | |
| 798 # Title : _make_custom | |
| 799 # Usage : n/a; automatically called by _initialize() | |
| 800 # Purpose : Permits custom RE object construction from strings | |
| 801 # : such as 'EcoRI--G^AATTC' as the name of the enzyme. | |
| 802 # Returns : Hash containing named parameters for Bio::PrimarySeq.pm constructor. | |
| 803 # Argument : String containing string with special syntax. | |
| 804 # Throws : Exception if the string has bad syntax. | |
| 805 # : Warning if the string did not specify cut position. | |
| 806 # : Places cut site after 5'-most position. | |
| 807 # | |
| 808 #See Also : L<Bio::PrimarySeq::_initialize()|Bio::PrimarySeq> | |
| 809 # | |
| 810 #=cut | |
| 811 | |
| 812 #' | |
| 813 #----------------- | |
| 814 sub _make_custom { | |
| 815 #----------------- | |
| 816 my($self, $name) = @_; | |
| 817 | |
| 818 $name =~ s/\s+//g; | |
| 819 my @parts = split '--', $name; | |
| 820 my (%dat); | |
| 821 $dat{-NAME} = $dat{-ID} = $parts[0]; | |
| 822 $self->name($parts[0]); ## Reset name | |
| 823 | |
| 824 $parts[1] || return $self->throw("Undefined recognition site for $parts[0].", | |
| 825 "Use this syntax: EcoRV--GAT^ATC"); | |
| 826 ## Determine the cuts_after point. | |
| 827 my $cut_index = index $parts[1], '^'; | |
| 828 if( $cut_index <0) { $cut_index = 0; | |
| 829 $self->warn("Unknown cut position for $parts[0]. Assuming position 0\n" . | |
| 830 "Use carat to specify cut position (e.g., G^AATTC)"); } | |
| 831 $self->{'_cuts_after'} = $cut_index; | |
| 832 | |
| 833 ## Save the recognition sequence after removing the '^' | |
| 834 $parts[1] =~ s/\^//g; | |
| 835 $dat{-SEQ} = $parts[1]; | |
| 836 return %dat; | |
| 837 } | |
| 838 | |
| 839 | |
| 840 =head1 cuts_after | |
| 841 | |
| 842 Title : cuts_after | |
| 843 Usage : $num = $re->cuts_after(); | |
| 844 Purpose : Sets/Gets an integer indicating the position of cleavage | |
| 845 : relative to the 5' end of the recognition sequence. | |
| 846 Returns : Integer | |
| 847 Argument : Integer (optional) | |
| 848 Throws : Exception if argument is non-numeric. | |
| 849 Access : Public | |
| 850 Comments : This method is only needed to change the cuts at | |
| 851 : position. This data is automatically set during | |
| 852 : construction. | |
| 853 | |
| 854 See Also : L<_make_standard()|_make_standard>, L<_make_custom()|_make_custom> | |
| 855 | |
| 856 =cut | |
| 857 | |
| 858 #' | |
| 859 #--------------- | |
| 860 sub cuts_after { | |
| 861 #--------------- | |
| 862 my $self = shift; | |
| 863 if(@_) { my $num = shift; | |
| 864 if($num == 0 and $num ne '0') { | |
| 865 $self->throw("The cuts_after position be an integer ($num)"); | |
| 866 } | |
| 867 $self->{'_cuts_after'} = $num; | |
| 868 } | |
| 869 $self->{'_cuts_after'}; | |
| 870 } | |
| 871 | |
| 872 | |
| 873 | |
| 874 =head1 site | |
| 875 | |
| 876 Title : site | |
| 877 Usage : $re->site(); | |
| 878 Purpose : Gets the recognition sequence for the enzyme. | |
| 879 Example : $seq_string = $re->site(); | |
| 880 Returns : String containing recognition sequence indicating | |
| 881 : cleavage site as in 'G^AATTC'. | |
| 882 Argument : n/a | |
| 883 Throws : n/a | |
| 884 Comments : If you want a simple string representing the site without | |
| 885 any '^', use the string() method. | |
| 886 | |
| 887 See Also : L<string()|string> | |
| 888 | |
| 889 =cut | |
| 890 | |
| 891 #--------- | |
| 892 sub site { | |
| 893 #--------- | |
| 894 my $self = shift; | |
| 895 my $seq = $self->seq; | |
| 896 my $cuts_after = $self->cuts_after; | |
| 897 if($cuts_after > 0) { | |
| 898 if( $cuts_after >= $seq->length) { | |
| 899 return $seq->seq.'^'; | |
| 900 } else { | |
| 901 return $seq->subseq(1, $self->cuts_after).'^'.$seq->subseq($self->cuts_after+1, $seq->length); | |
| 902 } | |
| 903 } else { | |
| 904 return $seq->seq; | |
| 905 } | |
| 906 } | |
| 907 | |
| 908 | |
| 909 =head1 seq | |
| 910 | |
| 911 Title : seq | |
| 912 Usage : $re->seq(); | |
| 913 Purpose : Get the Bio::PrimarySeq.pm-derived object representing | |
| 914 : the recognition sequence | |
| 915 Returns : String | |
| 916 Argument : n/a | |
| 917 Throws : n/a | |
| 918 | |
| 919 See Also : L<string()|string>, L<revcom()|revcom> | |
| 920 | |
| 921 =cut | |
| 922 | |
| 923 #--------- | |
| 924 sub seq { my $self = shift; $self->{'_seq'}; } | |
| 925 #--------- | |
| 926 | |
| 927 | |
| 928 | |
| 929 =head1 string | |
| 930 | |
| 931 Title : string | |
| 932 Usage : $re->string(); | |
| 933 Purpose : Get a string representing the recognition sequence. | |
| 934 Returns : String. Does NOT contain a '^' representing the cut location | |
| 935 as returned by the site() method | |
| 936 Argument : n/a | |
| 937 Throws : n/a | |
| 938 Comments : Delegates to the Bio::PrimarySeq-derived object. | |
| 939 | |
| 940 See Also : L<seq()|seq>, L<site()|site>, L<revcom()|revcom> | |
| 941 | |
| 942 =cut | |
| 943 | |
| 944 #----------- | |
| 945 sub string { my $self = shift; $self->{'_seq'}->seq; } | |
| 946 #----------- | |
| 947 | |
| 948 | |
| 949 | |
| 950 =head1 revcom | |
| 951 | |
| 952 Title : revcom | |
| 953 Usage : $re->revcom(); | |
| 954 Purpose : Get a string representing the reverse complement of | |
| 955 : the recognition sequence. | |
| 956 Returns : String | |
| 957 Argument : n/a | |
| 958 Throws : n/a | |
| 959 Comments : Delegates to the Bio::PrimarySeq.pm-derived object, but needs to | |
| 960 get out the string from it, as now Bio::PrimarySeq->revcom makes a | |
| 961 Bio::PrimarySeq object | |
| 962 | |
| 963 See Also : L<seq()|seq>, L<string()|string> | |
| 964 | |
| 965 =cut | |
| 966 | |
| 967 #----------- | |
| 968 sub revcom { my $self = shift; $self->{'_seq'}->revcom->seq(); } | |
| 969 #----------- | |
| 970 | |
| 971 | |
| 972 | |
| 973 =head1 cut_seq | |
| 974 | |
| 975 Title : cut_seq | |
| 976 Usage : $re->cut_seq(<sequence object>); | |
| 977 Purpose : Conceptually cut or "digest" a DNA sequence with the given enzyme. | |
| 978 Example : $string = $re->cut_seq(<sequence object>); | |
| 979 Returns : List of strings containing the resulting fragments. | |
| 980 Argument : Reference to a Bio::PrimarySeq.pm-derived object. | |
| 981 Throws : Exception if argument is not an object. | |
| 982 : (Does not yet verify that it is derived from Bio::PrimarySeq.pm.) | |
| 983 Comments : Strategy relies on Perl's built-in split() function. | |
| 984 : Since split removes the recognition pattern, the resulting | |
| 985 : fragments are repaired after split()-ing. | |
| 986 : A side-effect of this is that for sites with ambiguous | |
| 987 : recognition sequence (i.e., containing N), the fragments | |
| 988 : will contain ambiguity characters instead of AGCT. | |
| 989 : | |
| 990 : There is currently no support for partial digestions. | |
| 991 : There is currently no support for circular sequences. | |
| 992 : (This should just involve merging the first and last frag | |
| 993 : if $seqObj->is_circular returns true). | |
| 994 | |
| 995 =cut | |
| 996 | |
| 997 #' | |
| 998 #------------- | |
| 999 sub cut_seq { | |
| 1000 #------------- | |
| 1001 my( $self, $seqObj) = @_; | |
| 1002 if( !ref($seqObj) || | |
| 1003 ! $seqObj->isa('Bio::PrimarySeqI') ) { | |
| 1004 $self->throw( "Can't cut sequence. Missing or invalid object". | |
| 1005 "seqObj: $seqObj"); | |
| 1006 } | |
| 1007 | |
| 1008 my $cuts_after = $self->{'_cuts_after'}; | |
| 1009 my ($site_3prime_seq, $site_5prime_seq); | |
| 1010 my $reSeq = $self->seq; | |
| 1011 if($cuts_after == 0) { | |
| 1012 $site_3prime_seq = ''; | |
| 1013 $site_5prime_seq = $reSeq->seq(); | |
| 1014 } elsif($cuts_after == $reSeq->length) { | |
| 1015 $site_3prime_seq = $reSeq->seq(); | |
| 1016 $site_5prime_seq = ''; | |
| 1017 } else { | |
| 1018 $site_3prime_seq = $reSeq->subseq(1, $self->{'_cuts_after'}); | |
| 1019 $site_5prime_seq = $reSeq->subseq($self->{'_cuts_after'}+1, $reSeq->length); | |
| 1020 } | |
| 1021 | |
| 1022 $self->debug("3' site: $site_3prime_seq\n5' site: $site_5prime_seq\n"); | |
| 1023 | |
| 1024 my(@re_frags); | |
| 1025 my $seq = uc $self->_expanded_string; | |
| 1026 | |
| 1027 if(!$self->palindromic and $self->name ne 'N') { | |
| 1028 my $revseq = $self->_expanded_string( $reSeq->revcom->seq ); | |
| 1029 $seq .= '|'.uc($revseq); | |
| 1030 } | |
| 1031 $self->debug(sprintf("$ID: site seq: %s\n\n", $seq)); | |
| 1032 $self->debug(sprintf("$ID: splitting %s\n\n",$reSeq->seq)); | |
| 1033 @re_frags = split(/$seq/i, $seqObj->seq); | |
| 1034 | |
| 1035 $self->debug("$ID: cut_seq, ".scalar @re_frags. " fragments.\n"); | |
| 1036 | |
| 1037 ## Re-attach the split recognition site back to the frags | |
| 1038 ## since perl zapped them in the split() call. | |
| 1039 my($i); | |
| 1040 my $numFrags = scalar @re_frags; | |
| 1041 for($i=0; $i<$numFrags; $i++) { | |
| 1042 $i < $#re_frags and $re_frags[$i] = $re_frags[$i].$site_3prime_seq; | |
| 1043 $i > 0 and $re_frags[$i] = $site_5prime_seq.$re_frags[$i]; | |
| 1044 } | |
| 1045 @re_frags; | |
| 1046 } | |
| 1047 | |
| 1048 =head1 cut_locations | |
| 1049 | |
| 1050 Title : cut_locations | |
| 1051 Usage : my $locations = $re->cut_locations(<sequence_object>); | |
| 1052 Purpose : Report the location of the recognition site(s) within | |
| 1053 : an input sequence. | |
| 1054 Example : my $locations = $re->annotate_seq($seqObj); | |
| 1055 Returns : Arrayref of starting locations where enzyme would cut | |
| 1056 Argument : Reference to a Bio::PrimarySeqI-derived sequence object. | |
| 1057 Throws : n/a | |
| 1058 Comments : | |
| 1059 | |
| 1060 =cut | |
| 1061 | |
| 1062 #----------------- | |
| 1063 sub cut_locations { | |
| 1064 #----------------- | |
| 1065 my($self, $seqobj) = @_; | |
| 1066 | |
| 1067 my $site = $self->_expanded_string; | |
| 1068 my $seq = $seqobj->seq; | |
| 1069 study($seq); | |
| 1070 my @locations; | |
| 1071 while( $seq =~ /($site)/ig ) { | |
| 1072 # $` is preceding string before pattern so length returns position | |
| 1073 push @locations, length($`); | |
| 1074 } | |
| 1075 return \@locations; | |
| 1076 } | |
| 1077 | |
| 1078 # Purpose : Expand nucleotide ambiguity codes to their representative letters | |
| 1079 # Argument: (optional) the string to be expanded. If not supplied, used | |
| 1080 # the string returned by $self->string(). | |
| 1081 # Returns : String | |
| 1082 sub _expanded_string { | |
| 1083 my ($self, $str) = @_; | |
| 1084 | |
| 1085 $str ||= $self->string; | |
| 1086 | |
| 1087 if( $self->name ne 'N' ) { | |
| 1088 $str =~ s/N|X/\./g; | |
| 1089 $str =~ s/R/\[AG\]/g; | |
| 1090 $str =~ s/Y/\[CT\]/g; | |
| 1091 $str =~ s/S/\[GC\]/g; | |
| 1092 $str =~ s/W/\[AT\]/g; | |
| 1093 $str =~ s/M/\[AC\]/g; | |
| 1094 $str =~ s/K/\[TG\]/g; | |
| 1095 $str =~ s/B/\[CGT\]/g; | |
| 1096 $str =~ s/D/\[AGT\]/g; | |
| 1097 $str =~ s/H/\[ACT\]/g; | |
| 1098 $str =~ s/V/\[ACG\]/g; | |
| 1099 } | |
| 1100 return $str; | |
| 1101 } | |
| 1102 | |
| 1103 | |
| 1104 =head1 annotate_seq | |
| 1105 | |
| 1106 Title : annotate_seq | |
| 1107 Usage : $re->annotate_seq(<sequence_object>); | |
| 1108 Purpose : Identify the location of the recognition site(s) within | |
| 1109 : an input sequence. Uses HTML. | |
| 1110 Example : $annot_seq = $re->annotate_seq($seqObj); | |
| 1111 Returns : String containing the annotated sequence. | |
| 1112 Argument : Reference to a Bio::PrimarySeq.pm-derived sequence object. | |
| 1113 Throws : n/a | |
| 1114 Comments : The annotated sequence must be viewed with a web | |
| 1115 : browser to see the location(s) of the recognition site(s). | |
| 1116 | |
| 1117 =cut | |
| 1118 | |
| 1119 #----------------- | |
| 1120 sub annotate_seq { | |
| 1121 #----------------- | |
| 1122 my($self, $seqObj) = @_; | |
| 1123 | |
| 1124 my $site = $self->_expanded_string; | |
| 1125 my $seq = $seqObj->seq; | |
| 1126 | |
| 1127 $seq =~ s|$site|<b>$site</b>|g; | |
| 1128 return $seq; | |
| 1129 } | |
| 1130 | |
| 1131 | |
| 1132 =head1 palindromic | |
| 1133 | |
| 1134 Title : palindromic | |
| 1135 Usage : $re->palindromic(); | |
| 1136 Purpose : Determines if the recognition sequence is palindromic | |
| 1137 : for the current restriction enzyme. | |
| 1138 Returns : Boolean | |
| 1139 Argument : n/a | |
| 1140 Throws : n/a | |
| 1141 Access : Public | |
| 1142 Comments : A palindromic site (EcoRI): 5-GAATTC-3 | |
| 1143 : 3-CTTAAG-5 | |
| 1144 | |
| 1145 =cut | |
| 1146 | |
| 1147 #---------------- | |
| 1148 sub palindromic { | |
| 1149 #---------------- | |
| 1150 my $self = shift; | |
| 1151 $self->string eq $self->revcom; | |
| 1152 } | |
| 1153 | |
| 1154 | |
| 1155 | |
| 1156 =head1 is_available | |
| 1157 | |
| 1158 Title : is_available | |
| 1159 Usage : $re->is_available(<string containing name of enzyme>); | |
| 1160 Purpose : Determine if an enzyme is available (to this module). | |
| 1161 : (see the package lexical %RE). | |
| 1162 Example : $re->is_available('EcoRI'); | |
| 1163 : &Bio::Tools::RestrictionEnzyme::is_available($object,'EcoRI'); | |
| 1164 Returns : Boolean | |
| 1165 Argument : String | |
| 1166 Throws : n/a | |
| 1167 Comments : This method does NOT give information about | |
| 1168 : commercial availability (yet). | |
| 1169 : Enzyme names are CASE SENSITIVE. | |
| 1170 | |
| 1171 See Also : L<available_list()|available_list> | |
| 1172 | |
| 1173 =cut | |
| 1174 | |
| 1175 #---------------- | |
| 1176 sub is_available { | |
| 1177 #---------------- | |
| 1178 my($self,$name) = @_; | |
| 1179 exists $RE{$name}; | |
| 1180 } | |
| 1181 | |
| 1182 #-------------- | |
| 1183 sub available { | |
| 1184 #-------------- | |
| 1185 my($self,$name) = @_; | |
| 1186 print STDERR "\nDeprecated method: $ID:: available(); ". | |
| 1187 "use is_available() instead.\n"; | |
| 1188 $self->is_available($name); | |
| 1189 } | |
| 1190 | |
| 1191 | |
| 1192 =head2 name | |
| 1193 | |
| 1194 Title : name | |
| 1195 Usage : $obj->name($newval) | |
| 1196 Function: | |
| 1197 Example : | |
| 1198 Returns : value of name | |
| 1199 Args : newvalue (optional) | |
| 1200 | |
| 1201 | |
| 1202 =cut | |
| 1203 | |
| 1204 sub name{ | |
| 1205 my ($obj,$value) = @_; | |
| 1206 if( defined $value) { | |
| 1207 $obj->{'name'} = $value; | |
| 1208 } | |
| 1209 return $obj->{'name'}; | |
| 1210 | |
| 1211 } | |
| 1212 | |
| 1213 =head1 available_list | |
| 1214 | |
| 1215 Title : available_list | |
| 1216 Usage : $re->available_list([<integer>]); | |
| 1217 Purpose : Retrieve a list of currently available enzymes. | |
| 1218 Example : @all = $re->available_list(); ## All enzymes | |
| 1219 : @six_cutters = $re->available_list(6); ## All 6-cutters | |
| 1220 Returns : List of strings | |
| 1221 Argument : Integer (optional) | |
| 1222 Throws : n/a | |
| 1223 Comments : This method may be more appropriate for a REData.pm class. | |
| 1224 | |
| 1225 See Also : L<is_available()|is_available> | |
| 1226 | |
| 1227 =cut | |
| 1228 | |
| 1229 #------------------- | |
| 1230 sub available_list { | |
| 1231 #------------------- | |
| 1232 my($self,$size) = @_; | |
| 1233 $size ||= 'all'; | |
| 1234 | |
| 1235 $size eq 'all' and return @RE_available; | |
| 1236 | |
| 1237 my(@data, @names); | |
| 1238 foreach (@RE_available) { | |
| 1239 @data = split /\s/, $RE{$_}; | |
| 1240 if(length $data[0] == $size) { | |
| 1241 push @names, $_; | |
| 1242 } | |
| 1243 } | |
| 1244 @names; | |
| 1245 } | |
| 1246 | |
| 1247 1; |
