| 
0
 | 
     1 #------------------------------------------------------------------
 | 
| 
 | 
     2 # $Id: RestrictionEnzyme.pm,v 1.25.2.1 2003/06/29 00:53:20 jason Exp $
 | 
| 
 | 
     3 #
 | 
| 
 | 
     4 # BioPerl module Bio::Tools::RestrictionEnzyme
 | 
| 
 | 
     5 #
 | 
| 
 | 
     6 # Cared for by Steve Chervitz <sac@bioperl.org>
 | 
| 
 | 
     7 #
 | 
| 
 | 
     8 # You may distribute this module under the same terms as perl itself
 | 
| 
 | 
     9 #------------------------------------------------------------------
 | 
| 
 | 
    10 
 | 
| 
 | 
    11 ## POD Documentation:
 | 
| 
 | 
    12 
 | 
| 
 | 
    13 =head1 NAME
 | 
| 
 | 
    14 
 | 
| 
 | 
    15 Bio::Tools::RestrictionEnzyme - Bioperl object for a restriction endonuclease
 | 
| 
 | 
    16 (cuts DNA at specific locations)
 | 
| 
 | 
    17 
 | 
| 
 | 
    18 =head1 SYNOPSIS
 | 
| 
 | 
    19 
 | 
| 
 | 
    20 =head2 Object Creation
 | 
| 
 | 
    21 
 | 
| 
 | 
    22     require Bio::Tools::RestrictionEnzyme;
 | 
| 
 | 
    23 
 | 
| 
 | 
    24     ## Create a new object by name.
 | 
| 
 | 
    25 
 | 
| 
 | 
    26     $re1 = new Bio::Tools::RestrictionEnzyme(-NAME =>'EcoRI');
 | 
| 
 | 
    27 
 | 
| 
 | 
    28     ## Create a new object using special syntax
 | 
| 
 | 
    29     ## which specifies the enzyme name, recognition site, and cut position.
 | 
| 
 | 
    30     ## Used for enzymes not known to this module.
 | 
| 
 | 
    31 
 | 
| 
 | 
    32     $re2 = new Bio::Tools::RestrictionEnzyme(-NAME =>'EcoRV--GAT^ATC', 
 | 
| 
 | 
    33 				  	     -MAKE =>'custom');
 | 
| 
 | 
    34 
 | 
| 
 | 
    35     ## Get a list of the resulting fragments when a sequence is cut with
 | 
| 
 | 
    36     ## the given enzyme. The method expects a Bio::Seq object.
 | 
| 
 | 
    37 
 | 
| 
 | 
    38     @fragments = $re2->cut_seq($seqobj);
 | 
| 
 | 
    39 
 | 
| 
 | 
    40     ## Get a list of names of all available restriction enzymes 
 | 
| 
 | 
    41     ## known to this module.
 | 
| 
 | 
    42 
 | 
| 
 | 
    43     @all = $re->available_list();
 | 
| 
 | 
    44 
 | 
| 
 | 
    45     ## Get the names of restriction enzymes that have 6 bp 
 | 
| 
 | 
    46     ## recognition sequences.
 | 
| 
 | 
    47 
 | 
| 
 | 
    48     @sixcutters = $re->available_list(6);
 | 
| 
 | 
    49 
 | 
| 
 | 
    50 
 | 
| 
 | 
    51 =head1 INSTALLATION
 | 
| 
 | 
    52 
 | 
| 
 | 
    53 This module is included with the central Bioperl distribution:
 | 
| 
 | 
    54 
 | 
| 
 | 
    55    http://bio.perl.org/Core/Latest
 | 
| 
 | 
    56    ftp://bio.perl.org/pub/DIST
 | 
| 
 | 
    57 
 | 
| 
 | 
    58 Follow the installation instructions included in the README file.
 | 
| 
 | 
    59 
 | 
| 
 | 
    60 =head1 DESCRIPTION
 | 
| 
 | 
    61 
 | 
| 
 | 
    62 The Bio::Tools::RestrictionEnzyme.pm module encapsulates generic data and 
 | 
| 
 | 
    63 methods for using restriction endonucleases for in silico restriction
 | 
| 
 | 
    64 analysis of DNA sequences.
 | 
| 
 | 
    65 
 | 
| 
 | 
    66 =head2 Considerations
 | 
| 
 | 
    67 
 | 
| 
 | 
    68 This module is a precursor for a more full featured version that may do such
 | 
| 
 | 
    69 things as download data from online databases such as REBase http://www.neb.com/rebase/.
 | 
| 
 | 
    70 Thus, there is currently no functionality for obtaining data about commercial
 | 
| 
 | 
    71 availability for a restriction enzyme.
 | 
| 
 | 
    72 
 | 
| 
 | 
    73 At some point in the future, it may make sense to derive RestrictionEnzymes
 | 
| 
 | 
    74 from a class such as Bio::Enzyme or Bio::Prot::Protein (neither of which now
 | 
| 
 | 
    75 exist) so that more data about the enzyme and related information can be 
 | 
| 
 | 
    76 easily obtained.
 | 
| 
 | 
    77 
 | 
| 
 | 
    78 This module is currently in use at 
 | 
| 
 | 
    79 
 | 
| 
 | 
    80  http://genome-www.stanford.edu/Sacch3D/analysis/
 | 
| 
 | 
    81 
 | 
| 
 | 
    82 
 | 
| 
 | 
    83 =head2 Digesting on Runs of N
 | 
| 
 | 
    84 
 | 
| 
 | 
    85 To digest a sequence on runs of N's in the sequence. Here's what you can do:
 | 
| 
 | 
    86 
 | 
| 
 | 
    87     $re_n  = new Bio::Tools::RestrictionEnzyme(-name=>'N--NNNNN',
 | 
| 
 | 
    88                                                -make=>'custom');
 | 
| 
 | 
    89 
 | 
| 
 | 
    90 Specify the number of N's you want to match in the -name parameter. 
 | 
| 
 | 
    91 So the above example will recognize and cut at runs of 5 Ns.
 | 
| 
 | 
    92  If you wanted to cut at runs of 10 N's, you would use 
 | 
| 
 | 
    93 
 | 
| 
 | 
    94      -name => 'N--NNNNNNNNNN'
 | 
| 
 | 
    95 
 | 
| 
 | 
    96 Note that you must use a specific number of N's, you cannot use a regexp to
 | 
| 
 | 
    97 digest at N+ for example, because the actual number of N's at each site are
 | 
| 
 | 
    98 not recorded when the sequence is analyzed. So cut_locations( ) wouldn't be 
 | 
| 
 | 
    99 correct. 
 | 
| 
 | 
   100 
 | 
| 
 | 
   101 =head1 EXAMPLES
 | 
| 
 | 
   102 
 | 
| 
 | 
   103 See the script examples/restriction.pl in the Bioperl distribution.
 | 
| 
 | 
   104 
 | 
| 
 | 
   105 =head1 DEPENDENCIES 
 | 
| 
 | 
   106 
 | 
| 
 | 
   107 Bio::Tools::RestrictionEnzyme.pm is a concrete class that inherits from 
 | 
| 
 | 
   108 B<Bio::Root::Root> and uses by delegation B<Bio::PrimarySeq>.
 | 
| 
 | 
   109 
 | 
| 
 | 
   110 =head1 FEEDBACK
 | 
| 
 | 
   111 
 | 
| 
 | 
   112 =head2 Mailing Lists 
 | 
| 
 | 
   113 
 | 
| 
 | 
   114 User feedback is an integral part of the evolution of this and other Bioperl
 | 
| 
 | 
   115 modules. Send your comments and suggestions preferably to one of the Bioperl
 | 
| 
 | 
   116 mailing lists. Your participation is much appreciated.
 | 
| 
 | 
   117 
 | 
| 
 | 
   118    bioperl-l@bioperl.org             - General discussion
 | 
| 
 | 
   119    http://bioperl.org/MailList.shtml - About the mailing lists
 | 
| 
 | 
   120 
 | 
| 
 | 
   121 =head2 Reporting Bugs
 | 
| 
 | 
   122 
 | 
| 
 | 
   123 Report bugs to the Bioperl bug tracking system to help us keep track the bugs
 | 
| 
 | 
   124 and their resolution. Bug reports can be submitted via email or the web:
 | 
| 
 | 
   125 
 | 
| 
 | 
   126     bioperl-bugs@bio.perl.org
 | 
| 
 | 
   127     http://bugzilla.bioperl.org/
 | 
| 
 | 
   128 
 | 
| 
 | 
   129 =head1 AUTHOR
 | 
| 
 | 
   130 
 | 
| 
 | 
   131 Steve Chervitz, E<lt>sac@bioperl.orgE<gt>
 | 
| 
 | 
   132 
 | 
| 
 | 
   133 =head1 COPYRIGHT
 | 
| 
 | 
   134 
 | 
| 
 | 
   135 Copyright (c) 1997-2002 Steve A. Chervitz. All Rights Reserved.
 | 
| 
 | 
   136 This module is free software; you can redistribute it and/or 
 | 
| 
 | 
   137 modify it under the same terms as Perl itself.
 | 
| 
 | 
   138 
 | 
| 
 | 
   139 =head1 SEE ALSO
 | 
| 
 | 
   140 
 | 
| 
 | 
   141   Bio::Root::Root    - Base class.
 | 
| 
 | 
   142   Bio::PrimarySeq    - Lightweight sequence object.
 | 
| 
 | 
   143 
 | 
| 
 | 
   144   http://bio.perl.org/  - Bioperl Project Homepage
 | 
| 
 | 
   145 
 | 
| 
 | 
   146 =cut
 | 
| 
 | 
   147 
 | 
| 
 | 
   148 #
 | 
| 
 | 
   149 ##
 | 
| 
 | 
   150 ###
 | 
| 
 | 
   151 #### END of main POD documentation.
 | 
| 
 | 
   152 ###
 | 
| 
 | 
   153 ##
 | 
| 
 | 
   154 #'
 | 
| 
 | 
   155 
 | 
| 
 | 
   156 
 | 
| 
 | 
   157 =head1 APPENDIX
 | 
| 
 | 
   158 
 | 
| 
 | 
   159 Methods beginning with a leading underscore are considered private
 | 
| 
 | 
   160 and are intended for internal use by this module. They are
 | 
| 
 | 
   161 B<not> considered part of the public interface and are described here
 | 
| 
 | 
   162 for documentation purposes only.
 | 
| 
 | 
   163 
 | 
| 
 | 
   164 =cut
 | 
| 
 | 
   165 
 | 
| 
 | 
   166 
 | 
| 
 | 
   167 package Bio::Tools::RestrictionEnzyme;
 | 
| 
 | 
   168 use strict;
 | 
| 
 | 
   169 
 | 
| 
 | 
   170 use Bio::Root::Root;
 | 
| 
 | 
   171 use Exporter;
 | 
| 
 | 
   172 
 | 
| 
 | 
   173 use vars qw (@ISA @EXPORT_OK %EXPORT_TAGS $ID $version @RE_available $Revision);
 | 
| 
 | 
   174 
 | 
| 
 | 
   175 @ISA         = qw(Bio::Root::Root Exporter);
 | 
| 
 | 
   176 @EXPORT_OK   = qw(@RE_available);
 | 
| 
 | 
   177 %EXPORT_TAGS = ( std => [qw(@RE_available)] );
 | 
| 
 | 
   178 
 | 
| 
 | 
   179 $ID = 'Bio::Tools::RestrictionEnzyme';
 | 
| 
 | 
   180 $version = 0.04;
 | 
| 
 | 
   181 $Revision = '$Id: RestrictionEnzyme.pm,v 1.25.2.1 2003/06/29 00:53:20 jason Exp $';  #'
 | 
| 
 | 
   182 
 | 
| 
 | 
   183 # Generated from REBASE version 208 (strider format), dated Aug 1 2002
 | 
| 
 | 
   184 # using scripts/contributed/rebase2list.pl
 | 
| 
 | 
   185 # Syntax: RE-name => 'SITE CUTS-AT' where SITE and CUTS-AT are separated 
 | 
| 
 | 
   186 # by a space.
 | 
| 
 | 
   187 
 | 
| 
 | 
   188 my %RE = (
 | 
| 
 | 
   189  'AasI'	=> 'GACNNNNNNGTC 7',
 | 
| 
 | 
   190  'AatI'	=> 'AGGCCT 3',
 | 
| 
 | 
   191  'AatII'	=> 'GACGTC 5',
 | 
| 
 | 
   192  'AauI'	=> 'TGTACA 1',
 | 
| 
 | 
   193  'AccI'	=> 'GTMKAC 2',
 | 
| 
 | 
   194  'AccII'	=> 'CGCG 2',
 | 
| 
 | 
   195  'AccIII'	=> 'TCCGGA 1',
 | 
| 
 | 
   196  'Acc16I'	=> 'TGCGCA 3',
 | 
| 
 | 
   197  'Acc65I'	=> 'GGTACC 1',
 | 
| 
 | 
   198  'Acc113I'	=> 'AGTACT 3',
 | 
| 
 | 
   199  'AccB1I'	=> 'GGYRCC 1',
 | 
| 
 | 
   200  'AccB7I'	=> 'CCANNNNNTGG 7',
 | 
| 
 | 
   201  'AclI'	=> 'AACGTT 2',
 | 
| 
 | 
   202  'AcsI'	=> 'RAATTY 1',
 | 
| 
 | 
   203  'AcvI'	=> 'CACGTG 3',
 | 
| 
 | 
   204  'AcyI'	=> 'GRCGYC 2',
 | 
| 
 | 
   205  'AdeI'	=> 'CACNNNGTG 6',
 | 
| 
 | 
   206  'AfaI'	=> 'GTAC 2',
 | 
| 
 | 
   207  'AfeI'	=> 'AGCGCT 3',
 | 
| 
 | 
   208  'AflI'	=> 'GGWCC 1',
 | 
| 
 | 
   209  'AflII'	=> 'CTTAAG 1',
 | 
| 
 | 
   210  'AflIII'	=> 'ACRYGT 1',
 | 
| 
 | 
   211  'AgeI'	=> 'ACCGGT 1',
 | 
| 
 | 
   212  'AhaIII'	=> 'TTTAAA 3',
 | 
| 
 | 
   213  'AhdI'	=> 'GACNNNNNGTC 6',
 | 
| 
 | 
   214  'AhlI'	=> 'ACTAGT 1',
 | 
| 
 | 
   215  'AleI'	=> 'CACNNNNGTG 5',
 | 
| 
 | 
   216  'AluI'	=> 'AGCT 2',
 | 
| 
 | 
   217  'Alw21I'	=> 'GWGCWC 5',
 | 
| 
 | 
   218  'Alw44I'	=> 'GTGCAC 1',
 | 
| 
 | 
   219  'AlwNI'	=> 'CAGNNNCTG 6',
 | 
| 
 | 
   220  'Ama87I'	=> 'CYCGRG 1',
 | 
| 
 | 
   221  'AocI'	=> 'CCTNAGG 2',
 | 
| 
 | 
   222  'Aor51HI'	=> 'AGCGCT 3',
 | 
| 
 | 
   223  'ApaI'	=> 'GGGCCC 5',
 | 
| 
 | 
   224  'ApaBI'	=> 'GCANNNNNTGC 8',
 | 
| 
 | 
   225  'ApaLI'	=> 'GTGCAC 1',
 | 
| 
 | 
   226  'ApoI'	=> 'RAATTY 1',
 | 
| 
 | 
   227  'AscI'	=> 'GGCGCGCC 2',
 | 
| 
 | 
   228  'AseI'	=> 'ATTAAT 2',
 | 
| 
 | 
   229  'AsiAI'	=> 'ACCGGT 1',
 | 
| 
 | 
   230  'AsiSI'	=> 'GCGATCGC 5',
 | 
| 
 | 
   231  'AsnI'	=> 'ATTAAT 2',
 | 
| 
 | 
   232  'AspI'	=> 'GACNNNGTC 4',
 | 
| 
 | 
   233  'Asp700I'	=> 'GAANNNNTTC 5',
 | 
| 
 | 
   234  'Asp718I'	=> 'GGTACC 1',
 | 
| 
 | 
   235  'AspEI'	=> 'GACNNNNNGTC 6',
 | 
| 
 | 
   236  'AspHI'	=> 'GWGCWC 5',
 | 
| 
 | 
   237  'AspLEI'	=> 'GCGC 3',
 | 
| 
 | 
   238  'AspS9I'	=> 'GGNCC 1',
 | 
| 
 | 
   239  'AsuI'	=> 'GGNCC 1',
 | 
| 
 | 
   240  'AsuII'	=> 'TTCGAA 2',
 | 
| 
 | 
   241  'AsuC2I'	=> 'CCSGG 2',
 | 
| 
 | 
   242  'AsuNHI'	=> 'GCTAGC 1',
 | 
| 
 | 
   243  'AvaI'	=> 'CYCGRG 1',
 | 
| 
 | 
   244  'AvaII'	=> 'GGWCC 1',
 | 
| 
 | 
   245  'AviII'	=> 'TGCGCA 3',
 | 
| 
 | 
   246  'AvrII'	=> 'CCTAGG 1',
 | 
| 
 | 
   247  'AxyI'	=> 'CCTNAGG 2',
 | 
| 
 | 
   248  'BalI'	=> 'TGGCCA 3',
 | 
| 
 | 
   249  'BamHI'	=> 'GGATCC 1',
 | 
| 
 | 
   250  'BanI'	=> 'GGYRCC 1',
 | 
| 
 | 
   251  'BanII'	=> 'GRGCYC 5',
 | 
| 
 | 
   252  'BanIII'	=> 'ATCGAT 2',
 | 
| 
 | 
   253  'BbeI'	=> 'GGCGCC 5',
 | 
| 
 | 
   254  'BbrPI'	=> 'CACGTG 3',
 | 
| 
 | 
   255  'BbuI'	=> 'GCATGC 5',
 | 
| 
 | 
   256  'Bbv12I'	=> 'GWGCWC 5',
 | 
| 
 | 
   257  'BclI'	=> 'TGATCA 1',
 | 
| 
 | 
   258  'BcnI'	=> 'CCSGG 2',
 | 
| 
 | 
   259  'BcoI'	=> 'CYCGRG 1',
 | 
| 
 | 
   260  'BcuI'	=> 'ACTAGT 1',
 | 
| 
 | 
   261  'BetI'	=> 'WCCGGW 1',
 | 
| 
 | 
   262  'BfaI'	=> 'CTAG 1',
 | 
| 
 | 
   263  'BfmI'	=> 'CTRYAG 1',
 | 
| 
 | 
   264  'BfrI'	=> 'CTTAAG 1',
 | 
| 
 | 
   265  'BfrBI'	=> 'ATGCAT 3',
 | 
| 
 | 
   266  'BfuCI'	=> 'GATC 0',
 | 
| 
 | 
   267  'BglI'	=> 'GCCNNNNNGGC 7',
 | 
| 
 | 
   268  'BglII'	=> 'AGATCT 1',
 | 
| 
 | 
   269  'BlnI'	=> 'CCTAGG 1',
 | 
| 
 | 
   270  'BloHII'	=> 'CTGCAG 5',
 | 
| 
 | 
   271  'BlpI'	=> 'GCTNAGC 2',
 | 
| 
 | 
   272  'Bme18I'	=> 'GGWCC 1',
 | 
| 
 | 
   273  'Bme1390I'	=> 'CCNGG 2',
 | 
| 
 | 
   274  'Bme1580I'	=> 'GKGCMC 5',
 | 
| 
 | 
   275  'BmtI'	=> 'GCTAGC 5',
 | 
| 
 | 
   276  'BmyI'	=> 'GDGCHC 5',
 | 
| 
 | 
   277  'BoxI'	=> 'GACNNNNGTC 5',
 | 
| 
 | 
   278  'Bpu14I'	=> 'TTCGAA 2',
 | 
| 
 | 
   279  'Bpu1102I'	=> 'GCTNAGC 2',
 | 
| 
 | 
   280  'Bsa29I'	=> 'ATCGAT 2',
 | 
| 
 | 
   281  'BsaAI'	=> 'YACGTR 3',
 | 
| 
 | 
   282  'BsaBI'	=> 'GATNNNNATC 5',
 | 
| 
 | 
   283  'BsaHI'	=> 'GRCGYC 2',
 | 
| 
 | 
   284  'BsaJI'	=> 'CCNNGG 1',
 | 
| 
 | 
   285  'BsaOI'	=> 'CGRYCG 4',
 | 
| 
 | 
   286  'BsaWI'	=> 'WCCGGW 1',
 | 
| 
 | 
   287  'BscI'	=> 'ATCGAT 2',
 | 
| 
 | 
   288  'Bsc4I'	=> 'CCNNNNNNNGG 7',
 | 
| 
 | 
   289  'BscBI'	=> 'GGNNCC 3',
 | 
| 
 | 
   290  'BscFI'	=> 'GATC 0',
 | 
| 
 | 
   291  'Bse8I'	=> 'GATNNNNATC 5',
 | 
| 
 | 
   292  'Bse21I'	=> 'CCTNAGG 2',
 | 
| 
 | 
   293  'Bse118I'	=> 'RCCGGY 1',
 | 
| 
 | 
   294  'BseAI'	=> 'TCCGGA 1',
 | 
| 
 | 
   295  'BseBI'	=> 'CCWGG 2',
 | 
| 
 | 
   296  'BseCI'	=> 'ATCGAT 2',
 | 
| 
 | 
   297  'BseDI'	=> 'CCNNGG 1',
 | 
| 
 | 
   298  'BseJI'	=> 'GATNNNNATC 5',
 | 
| 
 | 
   299  'BseLI'	=> 'CCNNNNNNNGG 7',
 | 
| 
 | 
   300  'BsePI'	=> 'GCGCGC 1',
 | 
| 
 | 
   301  'BseSI'	=> 'GKGCMC 5',
 | 
| 
 | 
   302  'BseX3I'	=> 'CGGCCG 1',
 | 
| 
 | 
   303  'BshI'	=> 'GGCC 2',
 | 
| 
 | 
   304  'Bsh1236I'	=> 'CGCG 2',
 | 
| 
 | 
   305  'Bsh1285I'	=> 'CGRYCG 4',
 | 
| 
 | 
   306  'BshFI'	=> 'GGCC 2',
 | 
| 
 | 
   307  'BshNI'	=> 'GGYRCC 1',
 | 
| 
 | 
   308  'BshTI'	=> 'ACCGGT 1',
 | 
| 
 | 
   309  'BsiBI'	=> 'GATNNNNATC 5',
 | 
| 
 | 
   310  'BsiCI'	=> 'TTCGAA 2',
 | 
| 
 | 
   311  'BsiEI'	=> 'CGRYCG 4',
 | 
| 
 | 
   312  'BsiHKAI'	=> 'GWGCWC 5',
 | 
| 
 | 
   313  'BsiHKCI'	=> 'CYCGRG 1',
 | 
| 
 | 
   314  'BsiLI'	=> 'CCWGG 2',
 | 
| 
 | 
   315  'BsiMI'	=> 'TCCGGA 1',
 | 
| 
 | 
   316  'BsiQI'	=> 'TGATCA 1',
 | 
| 
 | 
   317  'BsiSI'	=> 'CCGG 1',
 | 
| 
 | 
   318  'BsiWI'	=> 'CGTACG 1',
 | 
| 
 | 
   319  'BsiXI'	=> 'ATCGAT 2',
 | 
| 
 | 
   320  'BsiYI'	=> 'CCNNNNNNNGG 7',
 | 
| 
 | 
   321  'BsiZI'	=> 'GGNCC 1',
 | 
| 
 | 
   322  'BslI'	=> 'CCNNNNNNNGG 7',
 | 
| 
 | 
   323  'BsoBI'	=> 'CYCGRG 1',
 | 
| 
 | 
   324  'Bsp13I'	=> 'TCCGGA 1',
 | 
| 
 | 
   325  'Bsp19I'	=> 'CCATGG 1',
 | 
| 
 | 
   326  'Bsp68I'	=> 'TCGCGA 3',
 | 
| 
 | 
   327  'Bsp106I'	=> 'ATCGAT 2',
 | 
| 
 | 
   328  'Bsp119I'	=> 'TTCGAA 2',
 | 
| 
 | 
   329  'Bsp120I'	=> 'GGGCCC 1',
 | 
| 
 | 
   330  'Bsp143I'	=> 'GATC 0',
 | 
| 
 | 
   331  'Bsp143II'	=> 'RGCGCY 5',
 | 
| 
 | 
   332  'Bsp1286I'	=> 'GDGCHC 5',
 | 
| 
 | 
   333  'Bsp1407I'	=> 'TGTACA 1',
 | 
| 
 | 
   334  'Bsp1720I'	=> 'GCTNAGC 2',
 | 
| 
 | 
   335  'BspA2I'	=> 'CCTAGG 1',
 | 
| 
 | 
   336  'BspCI'	=> 'CGATCG 4',
 | 
| 
 | 
   337  'BspDI'	=> 'ATCGAT 2',
 | 
| 
 | 
   338  'BspEI'	=> 'TCCGGA 1',
 | 
| 
 | 
   339  'BspHI'	=> 'TCATGA 1',
 | 
| 
 | 
   340  'BspLI'	=> 'GGNNCC 3',
 | 
| 
 | 
   341  'BspLU11I'	=> 'ACATGT 1',
 | 
| 
 | 
   342  'BspMII'	=> 'TCCGGA 1',
 | 
| 
 | 
   343  'BspTI'	=> 'CTTAAG 1',
 | 
| 
 | 
   344  'BspT104I'	=> 'TTCGAA 2',
 | 
| 
 | 
   345  'BspT107I'	=> 'GGYRCC 1',
 | 
| 
 | 
   346  'BspXI'	=> 'ATCGAT 2',
 | 
| 
 | 
   347  'BsrBRI'	=> 'GATNNNNATC 5',
 | 
| 
 | 
   348  'BsrFI'	=> 'RCCGGY 1',
 | 
| 
 | 
   349  'BsrGI'	=> 'TGTACA 1',
 | 
| 
 | 
   350  'BssAI'	=> 'RCCGGY 1',
 | 
| 
 | 
   351  'BssECI'	=> 'CCNNGG 1',
 | 
| 
 | 
   352  'BssHI'	=> 'CTCGAG 1',
 | 
| 
 | 
   353  'BssHII'	=> 'GCGCGC 1',
 | 
| 
 | 
   354  'BssKI'	=> 'CCNGG 0',
 | 
| 
 | 
   355  'BssNAI'	=> 'GTATAC 3',
 | 
| 
 | 
   356  'BssT1I'	=> 'CCWWGG 1',
 | 
| 
 | 
   357  'Bst98I'	=> 'CTTAAG 1',
 | 
| 
 | 
   358  'Bst1107I'	=> 'GTATAC 3',
 | 
| 
 | 
   359  'BstACI'	=> 'GRCGYC 2',
 | 
| 
 | 
   360  'BstAPI'	=> 'GCANNNNNTGC 7',
 | 
| 
 | 
   361  'BstBI'	=> 'TTCGAA 2',
 | 
| 
 | 
   362  'BstBAI'	=> 'YACGTR 3',
 | 
| 
 | 
   363  'Bst4CI'	=> 'ACNGT 3',
 | 
| 
 | 
   364  'BstC8I'	=> 'GCNNGC 3',
 | 
| 
 | 
   365  'BstDEI'	=> 'CTNAG 1',
 | 
| 
 | 
   366  'BstDSI'	=> 'CCRYGG 1',
 | 
| 
 | 
   367  'BstEII'	=> 'GGTNACC 1',
 | 
| 
 | 
   368  'BstENI'	=> 'CCTNNNNNAGG 5',
 | 
| 
 | 
   369  'BstENII'	=> 'GATC 0',
 | 
| 
 | 
   370  'BstFNI'	=> 'CGCG 2',
 | 
| 
 | 
   371  'BstH2I'	=> 'RGCGCY 5',
 | 
| 
 | 
   372  'BstHHI'	=> 'GCGC 3',
 | 
| 
 | 
   373  'BstHPI'	=> 'GTTAAC 3',
 | 
| 
 | 
   374  'BstKTI'	=> 'GATC 3',
 | 
| 
 | 
   375  'BstMAI'	=> 'CTGCAG 5',
 | 
| 
 | 
   376  'BstMCI'	=> 'CGRYCG 4',
 | 
| 
 | 
   377  'BstMWI'	=> 'GCNNNNNNNGC 7',
 | 
| 
 | 
   378  'BstNI'	=> 'CCWGG 2',
 | 
| 
 | 
   379  'BstNSI'	=> 'RCATGY 5',
 | 
| 
 | 
   380  'BstOI'	=> 'CCWGG 2',
 | 
| 
 | 
   381  'BstPI'	=> 'GGTNACC 1',
 | 
| 
 | 
   382  'BstPAI'	=> 'GACNNNNGTC 5',
 | 
| 
 | 
   383  'BstSCI'	=> 'CCNGG 0',
 | 
| 
 | 
   384  'BstSFI'	=> 'CTRYAG 1',
 | 
| 
 | 
   385  'BstSNI'	=> 'TACGTA 3',
 | 
| 
 | 
   386  'BstUI'	=> 'CGCG 2',
 | 
| 
 | 
   387  'Bst2UI'	=> 'CCWGG 2',
 | 
| 
 | 
   388  'BstXI'	=> 'CCANNNNNNTGG 8',
 | 
| 
 | 
   389  'BstX2I'	=> 'RGATCY 1',
 | 
| 
 | 
   390  'BstYI'	=> 'RGATCY 1',
 | 
| 
 | 
   391  'BstZI'	=> 'CGGCCG 1',
 | 
| 
 | 
   392  'BstZ17I'	=> 'GTATAC 3',
 | 
| 
 | 
   393  'Bsu15I'	=> 'ATCGAT 2',
 | 
| 
 | 
   394  'Bsu36I'	=> 'CCTNAGG 2',
 | 
| 
 | 
   395  'BsuRI'	=> 'GGCC 2',
 | 
| 
 | 
   396  'BsuTUI'	=> 'ATCGAT 2',
 | 
| 
 | 
   397  'BtgI'	=> 'CCRYGG 1',
 | 
| 
 | 
   398  'BthCI'	=> 'GCNGC 4',
 | 
| 
 | 
   399  'Cac8I'	=> 'GCNNGC 3',
 | 
| 
 | 
   400  'CaiI'	=> 'CAGNNNCTG 6',
 | 
| 
 | 
   401  'CauII'	=> 'CCSGG 2',
 | 
| 
 | 
   402  'CciNI'	=> 'GCGGCCGC 2',
 | 
| 
 | 
   403  'CelII'	=> 'GCTNAGC 2',
 | 
| 
 | 
   404  'CfoI'	=> 'GCGC 3',
 | 
| 
 | 
   405  'CfrI'	=> 'YGGCCR 1',
 | 
| 
 | 
   406  'Cfr9I'	=> 'CCCGGG 1',
 | 
| 
 | 
   407  'Cfr10I'	=> 'RCCGGY 1',
 | 
| 
 | 
   408  'Cfr13I'	=> 'GGNCC 1',
 | 
| 
 | 
   409  'Cfr42I'	=> 'CCGCGG 4',
 | 
| 
 | 
   410  'ChaI'	=> 'GATC 4',
 | 
| 
 | 
   411  'ClaI'	=> 'ATCGAT 2',
 | 
| 
 | 
   412  'CpoI'	=> 'CGGWCCG 2',
 | 
| 
 | 
   413  'CspI'	=> 'CGGWCCG 2',
 | 
| 
 | 
   414  'Csp6I'	=> 'GTAC 1',
 | 
| 
 | 
   415  'Csp45I'	=> 'TTCGAA 2',
 | 
| 
 | 
   416  'CspAI'	=> 'ACCGGT 1',
 | 
| 
 | 
   417  'CviAII'	=> 'CATG 1',
 | 
| 
 | 
   418  'CviJI'	=> 'RGCY 2',
 | 
| 
 | 
   419  'CviRI'	=> 'TGCA 2',
 | 
| 
 | 
   420  'CviTI'	=> 'RGCY 2',
 | 
| 
 | 
   421  'CvnI'	=> 'CCTNAGG 2',
 | 
| 
 | 
   422  'DdeI'	=> 'CTNAG 1',
 | 
| 
 | 
   423  'DpnI'	=> 'GATC 2',
 | 
| 
 | 
   424  'DpnII'	=> 'GATC 0',
 | 
| 
 | 
   425  'DraI'	=> 'TTTAAA 3',
 | 
| 
 | 
   426  'DraII'	=> 'RGGNCCY 2',
 | 
| 
 | 
   427  'DraIII'	=> 'CACNNNGTG 6',
 | 
| 
 | 
   428  'DrdI'	=> 'GACNNNNNNGTC 7',
 | 
| 
 | 
   429  'DsaI'	=> 'CCRYGG 1',
 | 
| 
 | 
   430  'DseDI'	=> 'GACNNNNNNGTC 7',
 | 
| 
 | 
   431  'EaeI'	=> 'YGGCCR 1',
 | 
| 
 | 
   432  'EagI'	=> 'CGGCCG 1',
 | 
| 
 | 
   433  'Eam1105I'	=> 'GACNNNNNGTC 6',
 | 
| 
 | 
   434  'Ecl136II'	=> 'GAGCTC 3',
 | 
| 
 | 
   435  'EclHKI'	=> 'GACNNNNNGTC 6',
 | 
| 
 | 
   436  'EclXI'	=> 'CGGCCG 1',
 | 
| 
 | 
   437  'Eco24I'	=> 'GRGCYC 5',
 | 
| 
 | 
   438  'Eco32I'	=> 'GATATC 3',
 | 
| 
 | 
   439  'Eco47I'	=> 'GGWCC 1',
 | 
| 
 | 
   440  'Eco47III'	=> 'AGCGCT 3',
 | 
| 
 | 
   441  'Eco52I'	=> 'CGGCCG 1',
 | 
| 
 | 
   442  'Eco72I'	=> 'CACGTG 3',
 | 
| 
 | 
   443  'Eco81I'	=> 'CCTNAGG 2',
 | 
| 
 | 
   444  'Eco88I'	=> 'CYCGRG 1',
 | 
| 
 | 
   445  'Eco91I'	=> 'GGTNACC 1',
 | 
| 
 | 
   446  'Eco105I'	=> 'TACGTA 3',
 | 
| 
 | 
   447  'Eco130I'	=> 'CCWWGG 1',
 | 
| 
 | 
   448  'Eco147I'	=> 'AGGCCT 3',
 | 
| 
 | 
   449  'EcoHI'	=> 'CCSGG 0',
 | 
| 
 | 
   450  'EcoICRI'	=> 'GAGCTC 3',
 | 
| 
 | 
   451  'EcoNI'	=> 'CCTNNNNNAGG 5',
 | 
| 
 | 
   452  'EcoO65I'	=> 'GGTNACC 1',
 | 
| 
 | 
   453  'EcoO109I'	=> 'RGGNCCY 2',
 | 
| 
 | 
   454  'EcoRI'	=> 'GAATTC 1',
 | 
| 
 | 
   455  'EcoRII'	=> 'CCWGG 0',
 | 
| 
 | 
   456  'EcoRV'	=> 'GATATC 3',
 | 
| 
 | 
   457  'EcoT14I'	=> 'CCWWGG 1',
 | 
| 
 | 
   458  'EcoT22I'	=> 'ATGCAT 5',
 | 
| 
 | 
   459  'EcoT38I'	=> 'GRGCYC 5',
 | 
| 
 | 
   460  'EgeI'	=> 'GGCGCC 3',
 | 
| 
 | 
   461  'EheI'	=> 'GGCGCC 3',
 | 
| 
 | 
   462  'ErhI'	=> 'CCWWGG 1',
 | 
| 
 | 
   463  'EsaBC3I'	=> 'TCGA 2',
 | 
| 
 | 
   464  'EspI'	=> 'GCTNAGC 2',
 | 
| 
 | 
   465  'FatI'	=> 'CATG 0',
 | 
| 
 | 
   466  'FauNDI'	=> 'CATATG 2',
 | 
| 
 | 
   467  'FbaI'	=> 'TGATCA 1',
 | 
| 
 | 
   468  'FblI'	=> 'GTMKAC 2',
 | 
| 
 | 
   469  'FmuI'	=> 'GGNCC 4',
 | 
| 
 | 
   470  'FnuDII'	=> 'CGCG 2',
 | 
| 
 | 
   471  'Fnu4HI'	=> 'GCNGC 2',
 | 
| 
 | 
   472  'FriOI'	=> 'GRGCYC 5',
 | 
| 
 | 
   473  'FseI'	=> 'GGCCGGCC 6',
 | 
| 
 | 
   474  'FspI'	=> 'TGCGCA 3',
 | 
| 
 | 
   475  'FspAI'	=> 'RTGCGCAY 4',
 | 
| 
 | 
   476  'Fsp4HI'	=> 'GCNGC 2',
 | 
| 
 | 
   477  'FunI'	=> 'AGCGCT 3',
 | 
| 
 | 
   478  'FunII'	=> 'GAATTC 1',
 | 
| 
 | 
   479  'HaeI'	=> 'WGGCCW 3',
 | 
| 
 | 
   480  'HaeII'	=> 'RGCGCY 5',
 | 
| 
 | 
   481  'HaeIII'	=> 'GGCC 2',
 | 
| 
 | 
   482  'HapII'	=> 'CCGG 1',
 | 
| 
 | 
   483  'HgiAI'	=> 'GWGCWC 5',
 | 
| 
 | 
   484  'HgiCI'	=> 'GGYRCC 1',
 | 
| 
 | 
   485  'HgiJII'	=> 'GRGCYC 5',
 | 
| 
 | 
   486  'HhaI'	=> 'GCGC 3',
 | 
| 
 | 
   487  'Hin1I'	=> 'GRCGYC 2',
 | 
| 
 | 
   488  'Hin6I'	=> 'GCGC 1',
 | 
| 
 | 
   489  'HinP1I'	=> 'GCGC 1',
 | 
| 
 | 
   490  'HincII'	=> 'GTYRAC 3',
 | 
| 
 | 
   491  'HindII'	=> 'GTYRAC 3',
 | 
| 
 | 
   492  'HindIII'	=> 'AAGCTT 1',
 | 
| 
 | 
   493  'HinfI'	=> 'GANTC 1',
 | 
| 
 | 
   494  'HpaI'	=> 'GTTAAC 3',
 | 
| 
 | 
   495  'HpaII'	=> 'CCGG 1',
 | 
| 
 | 
   496  'Hpy8I'	=> 'GTNNAC 3',
 | 
| 
 | 
   497  'Hpy99I'	=> 'CGWCG 5',
 | 
| 
 | 
   498  'Hpy178III'	=> 'TCNNGA 2',
 | 
| 
 | 
   499  'Hpy188I'	=> 'TCNGA 3',
 | 
| 
 | 
   500  'Hpy188III'	=> 'TCNNGA 2',
 | 
| 
 | 
   501  'HpyCH4I'	=> 'CATG 3',
 | 
| 
 | 
   502  'HpyCH4III'	=> 'ACNGT 3',
 | 
| 
 | 
   503  'HpyCH4IV'	=> 'ACGT 1',
 | 
| 
 | 
   504  'HpyCH4V'	=> 'TGCA 2',
 | 
| 
 | 
   505  'HpyF10VI'	=> 'GCNNNNNNNGC 8',
 | 
| 
 | 
   506  'Hsp92I'	=> 'GRCGYC 2',
 | 
| 
 | 
   507  'Hsp92II'	=> 'CATG 4',
 | 
| 
 | 
   508  'HspAI'	=> 'GCGC 1',
 | 
| 
 | 
   509  'ItaI'	=> 'GCNGC 2',
 | 
| 
 | 
   510  'KasI'	=> 'GGCGCC 1',
 | 
| 
 | 
   511  'KpnI'	=> 'GGTACC 5',
 | 
| 
 | 
   512  'Kpn2I'	=> 'TCCGGA 1',
 | 
| 
 | 
   513  'KspI'	=> 'CCGCGG 4',
 | 
| 
 | 
   514  'Ksp22I'	=> 'TGATCA 1',
 | 
| 
 | 
   515  'KspAI'	=> 'GTTAAC 3',
 | 
| 
 | 
   516  'Kzo9I'	=> 'GATC 0',
 | 
| 
 | 
   517  'LpnI'	=> 'RGCGCY 3',
 | 
| 
 | 
   518  'LspI'	=> 'TTCGAA 2',
 | 
| 
 | 
   519  'MabI'	=> 'ACCWGGT 1',
 | 
| 
 | 
   520  'MaeI'	=> 'CTAG 1',
 | 
| 
 | 
   521  'MaeII'	=> 'ACGT 1',
 | 
| 
 | 
   522  'MaeIII'	=> 'GTNAC 0',
 | 
| 
 | 
   523  'MamI'	=> 'GATNNNNATC 5',
 | 
| 
 | 
   524  'MboI'	=> 'GATC 0',
 | 
| 
 | 
   525  'McrI'	=> 'CGRYCG 4',
 | 
| 
 | 
   526  'MfeI'	=> 'CAATTG 1',
 | 
| 
 | 
   527  'MflI'	=> 'RGATCY 1',
 | 
| 
 | 
   528  'MhlI'	=> 'GDGCHC 5',
 | 
| 
 | 
   529  'MlsI'	=> 'TGGCCA 3',
 | 
| 
 | 
   530  'MluI'	=> 'ACGCGT 1',
 | 
| 
 | 
   531  'MluNI'	=> 'TGGCCA 3',
 | 
| 
 | 
   532  'Mly113I'	=> 'GGCGCC 2',
 | 
| 
 | 
   533  'Mph1103I'	=> 'ATGCAT 5',
 | 
| 
 | 
   534  'MroI'	=> 'TCCGGA 1',
 | 
| 
 | 
   535  'MroNI'	=> 'GCCGGC 1',
 | 
| 
 | 
   536  'MroXI'	=> 'GAANNNNTTC 5',
 | 
| 
 | 
   537  'MscI'	=> 'TGGCCA 3',
 | 
| 
 | 
   538  'MseI'	=> 'TTAA 1',
 | 
| 
 | 
   539  'MslI'	=> 'CAYNNNNRTG 5',
 | 
| 
 | 
   540  'MspI'	=> 'CCGG 1',
 | 
| 
 | 
   541  'Msp20I'	=> 'TGGCCA 3',
 | 
| 
 | 
   542  'MspA1I'	=> 'CMGCKG 3',
 | 
| 
 | 
   543  'MspCI'	=> 'CTTAAG 1',
 | 
| 
 | 
   544  'MspR9I'	=> 'CCNGG 2',
 | 
| 
 | 
   545  'MssI'	=> 'GTTTAAAC 4',
 | 
| 
 | 
   546  'MstI'	=> 'TGCGCA 3',
 | 
| 
 | 
   547  'MunI'	=> 'CAATTG 1',
 | 
| 
 | 
   548  'MvaI'	=> 'CCWGG 2',
 | 
| 
 | 
   549  'MvnI'	=> 'CGCG 2',
 | 
| 
 | 
   550  'MwoI'	=> 'GCNNNNNNNGC 7',
 | 
| 
 | 
   551  'NaeI'	=> 'GCCGGC 3',
 | 
| 
 | 
   552  'NarI'	=> 'GGCGCC 2',
 | 
| 
 | 
   553  'NciI'	=> 'CCSGG 2',
 | 
| 
 | 
   554  'NcoI'	=> 'CCATGG 1',
 | 
| 
 | 
   555  'NdeI'	=> 'CATATG 2',
 | 
| 
 | 
   556  'NdeII'	=> 'GATC 0',
 | 
| 
 | 
   557  'NgoAIV'	=> 'GCCGGC 1',
 | 
| 
 | 
   558  'NgoMIV'	=> 'GCCGGC 1',
 | 
| 
 | 
   559  'NheI'	=> 'GCTAGC 1',
 | 
| 
 | 
   560  'NlaIII'	=> 'CATG 4',
 | 
| 
 | 
   561  'NlaIV'	=> 'GGNNCC 3',
 | 
| 
 | 
   562  'Nli3877I'	=> 'CYCGRG 5',
 | 
| 
 | 
   563  'NmuCI'	=> 'GTSAC 0',
 | 
| 
 | 
   564  'NotI'	=> 'GCGGCCGC 2',
 | 
| 
 | 
   565  'NruI'	=> 'TCGCGA 3',
 | 
| 
 | 
   566  'NruGI'	=> 'GACNNNNNGTC 6',
 | 
| 
 | 
   567  'NsbI'	=> 'TGCGCA 3',
 | 
| 
 | 
   568  'NsiI'	=> 'ATGCAT 5',
 | 
| 
 | 
   569  'NspI'	=> 'RCATGY 5',
 | 
| 
 | 
   570  'NspIII'	=> 'CYCGRG 1',
 | 
| 
 | 
   571  'NspV'	=> 'TTCGAA 2',
 | 
| 
 | 
   572  'NspBII'	=> 'CMGCKG 3',
 | 
| 
 | 
   573  'OliI'	=> 'CACNNNNGTG 5',
 | 
| 
 | 
   574  'PacI'	=> 'TTAATTAA 5',
 | 
| 
 | 
   575  'PaeI'	=> 'GCATGC 5',
 | 
| 
 | 
   576  'PaeR7I'	=> 'CTCGAG 1',
 | 
| 
 | 
   577  'PagI'	=> 'TCATGA 1',
 | 
| 
 | 
   578  'PalI'	=> 'GGCC 2',
 | 
| 
 | 
   579  'PauI'	=> 'GCGCGC 1',
 | 
| 
 | 
   580  'PceI'	=> 'AGGCCT 3',
 | 
| 
 | 
   581  'PciI'	=> 'ACATGT 1',
 | 
| 
 | 
   582  'PdiI'	=> 'GCCGGC 3',
 | 
| 
 | 
   583  'PdmI'	=> 'GAANNNNTTC 5',
 | 
| 
 | 
   584  'Pfl23II'	=> 'CGTACG 1',
 | 
| 
 | 
   585  'PflBI'	=> 'CCANNNNNTGG 7',
 | 
| 
 | 
   586  'PflFI'	=> 'GACNNNGTC 4',
 | 
| 
 | 
   587  'PflMI'	=> 'CCANNNNNTGG 7',
 | 
| 
 | 
   588  'PfoI'	=> 'TCCNGGA 1',
 | 
| 
 | 
   589  'PinAI'	=> 'ACCGGT 1',
 | 
| 
 | 
   590  'Ple19I'	=> 'CGATCG 4',
 | 
| 
 | 
   591  'PmaCI'	=> 'CACGTG 3',
 | 
| 
 | 
   592  'PmeI'	=> 'GTTTAAAC 4',
 | 
| 
 | 
   593  'PmlI'	=> 'CACGTG 3',
 | 
| 
 | 
   594  'Ppu10I'	=> 'ATGCAT 1',
 | 
| 
 | 
   595  'PpuMI'	=> 'RGGWCCY 2',
 | 
| 
 | 
   596  'PpuXI'	=> 'RGGWCCY 2',
 | 
| 
 | 
   597  'PshAI'	=> 'GACNNNNGTC 5',
 | 
| 
 | 
   598  'PshBI'	=> 'ATTAAT 2',
 | 
| 
 | 
   599  'PsiI'	=> 'TTATAA 3',
 | 
| 
 | 
   600  'Psp03I'	=> 'GGWCC 4',
 | 
| 
 | 
   601  'Psp5II'	=> 'RGGWCCY 2',
 | 
| 
 | 
   602  'Psp6I'	=> 'CCWGG 0',
 | 
| 
 | 
   603  'Psp1406I'	=> 'AACGTT 2',
 | 
| 
 | 
   604  'PspAI'	=> 'CCCGGG 1',
 | 
| 
 | 
   605  'Psp124BI'	=> 'GAGCTC 5',
 | 
| 
 | 
   606  'PspEI'	=> 'GGTNACC 1',
 | 
| 
 | 
   607  'PspGI'	=> 'CCWGG 0',
 | 
| 
 | 
   608  'PspLI'	=> 'CGTACG 1',
 | 
| 
 | 
   609  'PspN4I'	=> 'GGNNCC 3',
 | 
| 
 | 
   610  'PspOMI'	=> 'GGGCCC 1',
 | 
| 
 | 
   611  'PspPI'	=> 'GGNCC 1',
 | 
| 
 | 
   612  'PspPPI'	=> 'RGGWCCY 2',
 | 
| 
 | 
   613  'PssI'	=> 'RGGNCCY 5',
 | 
| 
 | 
   614  'PstI'	=> 'CTGCAG 5',
 | 
| 
 | 
   615  'PsuI'	=> 'RGATCY 1',
 | 
| 
 | 
   616  'PsyI'	=> 'GACNNNGTC 4',
 | 
| 
 | 
   617  'PvuI'	=> 'CGATCG 4',
 | 
| 
 | 
   618  'PvuII'	=> 'CAGCTG 3',
 | 
| 
 | 
   619  'RcaI'	=> 'TCATGA 1',
 | 
| 
 | 
   620  'RsaI'	=> 'GTAC 2',
 | 
| 
 | 
   621  'RsrII'	=> 'CGGWCCG 2',
 | 
| 
 | 
   622  'Rsr2I'	=> 'CGGWCCG 2',
 | 
| 
 | 
   623  'SacI'	=> 'GAGCTC 5',
 | 
| 
 | 
   624  'SacII'	=> 'CCGCGG 4',
 | 
| 
 | 
   625  'SalI'	=> 'GTCGAC 1',
 | 
| 
 | 
   626  'SanDI'	=> 'GGGWCCC 2',
 | 
| 
 | 
   627  'SatI'	=> 'GCNGC 2',
 | 
| 
 | 
   628  'SauI'	=> 'CCTNAGG 2',
 | 
| 
 | 
   629  'Sau96I'	=> 'GGNCC 1',
 | 
| 
 | 
   630  'Sau3AI'	=> 'GATC 0',
 | 
| 
 | 
   631  'SbfI'	=> 'CCTGCAGG 6',
 | 
| 
 | 
   632  'ScaI'	=> 'AGTACT 3',
 | 
| 
 | 
   633  'SciI'	=> 'CTCGAG 3',
 | 
| 
 | 
   634  'ScrFI'	=> 'CCNGG 2',
 | 
| 
 | 
   635  'SdaI'	=> 'CCTGCAGG 6',
 | 
| 
 | 
   636  'SduI'	=> 'GDGCHC 5',
 | 
| 
 | 
   637  'SecI'	=> 'CCNNGG 1',
 | 
| 
 | 
   638  'SelI'	=> 'CGCG 0',
 | 
| 
 | 
   639  'SexAI'	=> 'ACCWGGT 1',
 | 
| 
 | 
   640  'SfcI'	=> 'CTRYAG 1',
 | 
| 
 | 
   641  'SfeI'	=> 'CTRYAG 1',
 | 
| 
 | 
   642  'SfiI'	=> 'GGCCNNNNNGGCC 8',
 | 
| 
 | 
   643  'SfoI'	=> 'GGCGCC 3',
 | 
| 
 | 
   644  'Sfr274I'	=> 'CTCGAG 1',
 | 
| 
 | 
   645  'Sfr303I'	=> 'CCGCGG 4',
 | 
| 
 | 
   646  'SfuI'	=> 'TTCGAA 2',
 | 
| 
 | 
   647  'SgfI'	=> 'GCGATCGC 5',
 | 
| 
 | 
   648  'SgrAI'	=> 'CRCCGGYG 2',
 | 
| 
 | 
   649  'SgrBI'	=> 'CCGCGG 4',
 | 
| 
 | 
   650  'SinI'	=> 'GGWCC 1',
 | 
| 
 | 
   651  'SlaI'	=> 'CTCGAG 1',
 | 
| 
 | 
   652  'SmaI'	=> 'CCCGGG 3',
 | 
| 
 | 
   653  'SmiI'	=> 'ATTTAAAT 4',
 | 
| 
 | 
   654  'SmiMI'	=> 'CAYNNNNRTG 5',
 | 
| 
 | 
   655  'SmlI'	=> 'CTYRAG 1',
 | 
| 
 | 
   656  'SnaBI'	=> 'TACGTA 3',
 | 
| 
 | 
   657  'SpaHI'	=> 'GCATGC 5',
 | 
| 
 | 
   658  'SpeI'	=> 'ACTAGT 1',
 | 
| 
 | 
   659  'SphI'	=> 'GCATGC 5',
 | 
| 
 | 
   660  'SplI'	=> 'CGTACG 1',
 | 
| 
 | 
   661  'SrfI'	=> 'GCCCGGGC 4',
 | 
| 
 | 
   662  'Sse9I'	=> 'AATT 0',
 | 
| 
 | 
   663  'Sse232I'	=> 'CGCCGGCG 2',
 | 
| 
 | 
   664  'Sse8387I'	=> 'CCTGCAGG 6',
 | 
| 
 | 
   665  'Sse8647I'	=> 'AGGWCCT 2',
 | 
| 
 | 
   666  'SseBI'	=> 'AGGCCT 3',
 | 
| 
 | 
   667  'SspI'	=> 'AATATT 3',
 | 
| 
 | 
   668  'SspBI'	=> 'TGTACA 1',
 | 
| 
 | 
   669  'SstI'	=> 'GAGCTC 5',
 | 
| 
 | 
   670  'SstII'	=> 'CCGCGG 4',
 | 
| 
 | 
   671  'StuI'	=> 'AGGCCT 3',
 | 
| 
 | 
   672  'StyI'	=> 'CCWWGG 1',
 | 
| 
 | 
   673  'SunI'	=> 'CGTACG 1',
 | 
| 
 | 
   674  'SwaI'	=> 'ATTTAAAT 4',
 | 
| 
 | 
   675  'TaaI'	=> 'ACNGT 3',
 | 
| 
 | 
   676  'TaiI'	=> 'ACGT 4',
 | 
| 
 | 
   677  'TaqI'	=> 'TCGA 1',
 | 
| 
 | 
   678  'TasI'	=> 'AATT 0',
 | 
| 
 | 
   679  'TatI'	=> 'WGTACW 1',
 | 
| 
 | 
   680  'TauI'	=> 'GCSGC 4',
 | 
| 
 | 
   681  'TelI'	=> 'GACNNNGTC 4',
 | 
| 
 | 
   682  'TfiI'	=> 'GAWTC 1',
 | 
| 
 | 
   683  'ThaI'	=> 'CGCG 2',
 | 
| 
 | 
   684  'TliI'	=> 'CTCGAG 1',
 | 
| 
 | 
   685  'Tru1I'	=> 'TTAA 1',
 | 
| 
 | 
   686  'Tru9I'	=> 'TTAA 1',
 | 
| 
 | 
   687  'TscI'	=> 'ACGT 4',
 | 
| 
 | 
   688  'TseI'	=> 'GCWGC 1',
 | 
| 
 | 
   689  'Tsp45I'	=> 'GTSAC 0',
 | 
| 
 | 
   690  'Tsp509I'	=> 'AATT 0',
 | 
| 
 | 
   691  'Tsp4CI'	=> 'ACNGT 3',
 | 
| 
 | 
   692  'TspEI'	=> 'AATT 0',
 | 
| 
 | 
   693  'Tth111I'	=> 'GACNNNGTC 4',
 | 
| 
 | 
   694  'TthHB8I'	=> 'TCGA 1',
 | 
| 
 | 
   695  'UnbI'	=> 'GGNCC 0',
 | 
| 
 | 
   696  'Van91I'	=> 'CCANNNNNTGG 7',
 | 
| 
 | 
   697  'Vha464I'	=> 'CTTAAG 1',
 | 
| 
 | 
   698  'VneI'	=> 'GTGCAC 1',
 | 
| 
 | 
   699  'VpaK11AI'	=> 'GGWCC 0',
 | 
| 
 | 
   700  'VpaK11BI'	=> 'GGWCC 1',
 | 
| 
 | 
   701  'VspI'	=> 'ATTAAT 2',
 | 
| 
 | 
   702  'XagI'	=> 'CCTNNNNNAGG 5',
 | 
| 
 | 
   703  'XapI'	=> 'RAATTY 1',
 | 
| 
 | 
   704  'XbaI'	=> 'TCTAGA 1',
 | 
| 
 | 
   705  'XceI'	=> 'RCATGY 5',
 | 
| 
 | 
   706  'XcmI'	=> 'CCANNNNNNNNNTGG 8',
 | 
| 
 | 
   707  'XhoI'	=> 'CTCGAG 1',
 | 
| 
 | 
   708  'XhoII'	=> 'RGATCY 1',
 | 
| 
 | 
   709  'XmaI'	=> 'CCCGGG 1',
 | 
| 
 | 
   710  'XmaIII'	=> 'CGGCCG 1',
 | 
| 
 | 
   711  'XmaCI'	=> 'CCCGGG 1',
 | 
| 
 | 
   712  'XmaJI'	=> 'CCTAGG 1',
 | 
| 
 | 
   713  'XmiI'	=> 'GTMKAC 2',
 | 
| 
 | 
   714  'XmnI'	=> 'GAANNNNTTC 5',
 | 
| 
 | 
   715  'XspI'	=> 'CTAG 1',
 | 
| 
 | 
   716  'ZhoI'	=> 'ATCGAT 2',
 | 
| 
 | 
   717  'ZraI'	=> 'GACGTC 3',
 | 
| 
 | 
   718  'Zsp2I'	=> 'ATGCAT 5',
 | 
| 
 | 
   719 );
 | 
| 
 | 
   720 
 | 
| 
 | 
   721 @RE_available = sort keys %RE;
 | 
| 
 | 
   722 
 | 
| 
 | 
   723 
 | 
| 
 | 
   724 =head1 new
 | 
| 
 | 
   725 
 | 
| 
 | 
   726  Title     : new
 | 
| 
 | 
   727  Purpose   : Initializes the RestrictionEnzyme object and calls
 | 
| 
 | 
   728            : superclass constructor last (Bio:Seq.pm).
 | 
| 
 | 
   729  Returns   : n/a
 | 
| 
 | 
   730  Argument  : Parameters passed to new()
 | 
| 
 | 
   731  Comments  : A RestrictionEnzyme object manages its recognition sequence
 | 
| 
 | 
   732            : as a Bio::PrimarySeq object.
 | 
| 
 | 
   733 
 | 
| 
 | 
   734 See Also   : L<_make_custom>(), L<_make_standard>(), B<Bio::PrimarySeq.pm::_initialize()>
 | 
| 
 | 
   735 
 | 
| 
 | 
   736 =cut
 | 
| 
 | 
   737 
 | 
| 
 | 
   738 #---------------
 | 
| 
 | 
   739 sub new {
 | 
| 
 | 
   740 #---------------
 | 
| 
 | 
   741     my($class, @args) = @_;
 | 
| 
 | 
   742 
 | 
| 
 | 
   743     my $self = $class->SUPER::new(@args);
 | 
| 
 | 
   744     my ($name,$make) = $self->_rearrange([qw(NAME MAKE)],@args);
 | 
| 
 | 
   745     $name && $self->name($name);
 | 
| 
 | 
   746     my %data;
 | 
| 
 | 
   747     if(defined $make && $make eq 'custom') {
 | 
| 
 | 
   748 	%data = $self->_make_custom($name); 
 | 
| 
 | 
   749     } else {
 | 
| 
 | 
   750 	%data = $self->_make_standard($name);
 | 
| 
 | 
   751     }
 | 
| 
 | 
   752     $self->{'_seq'} = new Bio::PrimarySeq(%data, 
 | 
| 
 | 
   753 				   -VERBOSE =>$self->verbose,
 | 
| 
 | 
   754  				   -alphabet => 'dna',
 | 
| 
 | 
   755 				   );
 | 
| 
 | 
   756     return $self;
 | 
| 
 | 
   757 }
 | 
| 
 | 
   758 
 | 
| 
 | 
   759 
 | 
| 
 | 
   760 #=head1 _make_standard
 | 
| 
 | 
   761 #
 | 
| 
 | 
   762 # Title     : _make_standard
 | 
| 
 | 
   763 # Usage     : n/a; automatically called by _initialize()
 | 
| 
 | 
   764 # Purpose   : Permits custom RE object construction from name.
 | 
| 
 | 
   765 #	    : 'EcoRI'.
 | 
| 
 | 
   766 # Returns   : Hash containing named parameters for Bio::PrimarySeq.pm constructor.
 | 
| 
 | 
   767 # Argument  : String containing string with special syntax.
 | 
| 
 | 
   768 # Throws    : Exception if the requested enzyme name is unavailable.
 | 
| 
 | 
   769 #	    : NOTE: Case sensitive.
 | 
| 
 | 
   770 #
 | 
| 
 | 
   771 #See Also   : L<Bio::PrimarySeq::_initialize()|Bio::PrimarySeq>, L<_make_custom()|_make_custom>
 | 
| 
 | 
   772 #
 | 
| 
 | 
   773 #=cut
 | 
| 
 | 
   774 
 | 
| 
 | 
   775 #------------------
 | 
| 
 | 
   776 sub _make_standard {
 | 
| 
 | 
   777 #------------------
 | 
| 
 | 
   778     my($self, $name) = @_;
 | 
| 
 | 
   779 
 | 
| 
 | 
   780     $name =~ s/^\s+|\s+$//g;
 | 
| 
 | 
   781  
 | 
| 
 | 
   782     $self->is_available($name) || 
 | 
| 
 | 
   783 	$self->throw("Unavailable or undefined enzyme: $name (Note: CASE SENSITIVE)\n" .
 | 
| 
 | 
   784 		     "Currently available enzymes: \n@RE_available\n");
 | 
| 
 | 
   785 
 | 
| 
 | 
   786     my @data = split( ' ', $RE{$name});
 | 
| 
 | 
   787     my (%dat);
 | 
| 
 | 
   788     $dat{-SEQ} = $data[0];
 | 
| 
 | 
   789     $dat{-NAME} = $dat{-ID}= $name;    
 | 
| 
 | 
   790     $self->{'_cuts_after'} = $data[1];
 | 
| 
 | 
   791 
 | 
| 
 | 
   792     return %dat;
 | 
| 
 | 
   793 }
 | 
| 
 | 
   794 
 | 
| 
 | 
   795 
 | 
| 
 | 
   796 #=head1 _make_custom
 | 
| 
 | 
   797 #
 | 
| 
 | 
   798 # Title     : _make_custom
 | 
| 
 | 
   799 # Usage     : n/a; automatically called by _initialize()
 | 
| 
 | 
   800 # Purpose   : Permits custom RE object construction from strings 
 | 
| 
 | 
   801 #	    : such as 'EcoRI--G^AATTC' as the name of the enzyme.
 | 
| 
 | 
   802 # Returns   : Hash containing named parameters for Bio::PrimarySeq.pm constructor.
 | 
| 
 | 
   803 # Argument  : String containing string with special syntax.
 | 
| 
 | 
   804 # Throws    : Exception if the string has bad syntax.
 | 
| 
 | 
   805 #	    : Warning if the string did not specify cut position.
 | 
| 
 | 
   806 #	    :         Places cut site after 5'-most position.
 | 
| 
 | 
   807 #
 | 
| 
 | 
   808 #See Also   : L<Bio::PrimarySeq::_initialize()|Bio::PrimarySeq>
 | 
| 
 | 
   809 #
 | 
| 
 | 
   810 #=cut
 | 
| 
 | 
   811 
 | 
| 
 | 
   812 #'
 | 
| 
 | 
   813 #-----------------
 | 
| 
 | 
   814 sub _make_custom {
 | 
| 
 | 
   815 #-----------------
 | 
| 
 | 
   816     my($self, $name) = @_;
 | 
| 
 | 
   817 
 | 
| 
 | 
   818     $name =~ s/\s+//g;
 | 
| 
 | 
   819     my @parts  = split '--', $name;
 | 
| 
 | 
   820     my (%dat);
 | 
| 
 | 
   821     $dat{-NAME} = $dat{-ID} = $parts[0];
 | 
| 
 | 
   822     $self->name($parts[0]);  ## Reset name
 | 
| 
 | 
   823 
 | 
| 
 | 
   824     $parts[1] || return $self->throw("Undefined recognition site for $parts[0].",
 | 
| 
 | 
   825 				      "Use this syntax: EcoRV--GAT^ATC");
 | 
| 
 | 
   826     ## Determine the cuts_after point.
 | 
| 
 | 
   827     my $cut_index = index $parts[1], '^';
 | 
| 
 | 
   828     if( $cut_index <0) { $cut_index = 0;
 | 
| 
 | 
   829 			 $self->warn("Unknown cut position for $parts[0]. Assuming position 0\n" . 
 | 
| 
 | 
   830 				     "Use carat to specify cut position (e.g., G^AATTC)"); }
 | 
| 
 | 
   831     $self->{'_cuts_after'} =  $cut_index;
 | 
| 
 | 
   832 
 | 
| 
 | 
   833     ## Save the recognition sequence after removing the '^'
 | 
| 
 | 
   834     $parts[1] =~ s/\^//g;
 | 
| 
 | 
   835     $dat{-SEQ} = $parts[1];
 | 
| 
 | 
   836     return %dat;
 | 
| 
 | 
   837 }
 | 
| 
 | 
   838     
 | 
| 
 | 
   839 
 | 
| 
 | 
   840 =head1 cuts_after
 | 
| 
 | 
   841 
 | 
| 
 | 
   842  Title     : cuts_after
 | 
| 
 | 
   843  Usage     : $num = $re->cuts_after();
 | 
| 
 | 
   844  Purpose   : Sets/Gets an integer indicating the position of cleavage 
 | 
| 
 | 
   845            : relative to the 5' end of the recognition sequence.
 | 
| 
 | 
   846  Returns   : Integer
 | 
| 
 | 
   847  Argument  : Integer (optional)
 | 
| 
 | 
   848  Throws    : Exception if argument is non-numeric.
 | 
| 
 | 
   849  Access    : Public
 | 
| 
 | 
   850  Comments  : This method is only needed to change the cuts at
 | 
| 
 | 
   851            : position. This data is automatically set during
 | 
| 
 | 
   852            : construction.
 | 
| 
 | 
   853 
 | 
| 
 | 
   854 See Also   : L<_make_standard()|_make_standard>, L<_make_custom()|_make_custom>
 | 
| 
 | 
   855 
 | 
| 
 | 
   856 =cut
 | 
| 
 | 
   857 
 | 
| 
 | 
   858 #'
 | 
| 
 | 
   859 #---------------
 | 
| 
 | 
   860 sub cuts_after { 
 | 
| 
 | 
   861 #---------------
 | 
| 
 | 
   862     my $self = shift; 
 | 
| 
 | 
   863     if(@_) { my $num = shift;
 | 
| 
 | 
   864 	     if($num == 0 and $num ne '0') {
 | 
| 
 | 
   865 		 $self->throw("The cuts_after position be an integer ($num)");
 | 
| 
 | 
   866 	     }
 | 
| 
 | 
   867 	     $self->{'_cuts_after'} = $num;
 | 
| 
 | 
   868 	 }
 | 
| 
 | 
   869     $self->{'_cuts_after'}; 
 | 
| 
 | 
   870 }
 | 
| 
 | 
   871 
 | 
| 
 | 
   872 
 | 
| 
 | 
   873 
 | 
| 
 | 
   874 =head1 site
 | 
| 
 | 
   875 
 | 
| 
 | 
   876  Title     : site
 | 
| 
 | 
   877  Usage     : $re->site();
 | 
| 
 | 
   878  Purpose   : Gets the recognition sequence for the enzyme. 
 | 
| 
 | 
   879  Example   : $seq_string = $re->site();
 | 
| 
 | 
   880  Returns   : String containing recognition sequence indicating 
 | 
| 
 | 
   881            : cleavage site as in  'G^AATTC'.
 | 
| 
 | 
   882  Argument  : n/a
 | 
| 
 | 
   883  Throws    : n/a
 | 
| 
 | 
   884  Comments  : If you want a simple string representing the site without 
 | 
| 
 | 
   885              any '^', use the string() method.
 | 
| 
 | 
   886 
 | 
| 
 | 
   887 See Also   : L<string()|string>
 | 
| 
 | 
   888 
 | 
| 
 | 
   889 =cut
 | 
| 
 | 
   890 
 | 
| 
 | 
   891 #---------
 | 
| 
 | 
   892 sub site {
 | 
| 
 | 
   893 #---------
 | 
| 
 | 
   894     my $self = shift;
 | 
| 
 | 
   895     my $seq = $self->seq;
 | 
| 
 | 
   896     my $cuts_after = $self->cuts_after;
 | 
| 
 | 
   897     if($cuts_after > 0) {
 | 
| 
 | 
   898 	if( $cuts_after >= $seq->length) {
 | 
| 
 | 
   899 	    return $seq->seq.'^';
 | 
| 
 | 
   900 	} else { 
 | 
| 
 | 
   901 	    return $seq->subseq(1, $self->cuts_after).'^'.$seq->subseq($self->cuts_after+1, $seq->length); 
 | 
| 
 | 
   902 	}
 | 
| 
 | 
   903     } else {
 | 
| 
 | 
   904         return $seq->seq;
 | 
| 
 | 
   905     }
 | 
| 
 | 
   906 }
 | 
| 
 | 
   907 
 | 
| 
 | 
   908 
 | 
| 
 | 
   909 =head1 seq
 | 
| 
 | 
   910 
 | 
| 
 | 
   911  Title     : seq
 | 
| 
 | 
   912  Usage     : $re->seq();
 | 
| 
 | 
   913  Purpose   : Get the Bio::PrimarySeq.pm-derived object representing 
 | 
| 
 | 
   914            : the recognition sequence
 | 
| 
 | 
   915  Returns   : String
 | 
| 
 | 
   916  Argument  : n/a
 | 
| 
 | 
   917  Throws    : n/a
 | 
| 
 | 
   918 
 | 
| 
 | 
   919 See Also   : L<string()|string>, L<revcom()|revcom>
 | 
| 
 | 
   920 
 | 
| 
 | 
   921 =cut
 | 
| 
 | 
   922 
 | 
| 
 | 
   923 #---------
 | 
| 
 | 
   924 sub seq    {  my $self = shift; $self->{'_seq'}; }
 | 
| 
 | 
   925 #---------
 | 
| 
 | 
   926 
 | 
| 
 | 
   927 
 | 
| 
 | 
   928 
 | 
| 
 | 
   929 =head1 string
 | 
| 
 | 
   930 
 | 
| 
 | 
   931  Title     : string
 | 
| 
 | 
   932  Usage     : $re->string();
 | 
| 
 | 
   933  Purpose   : Get a string representing the recognition sequence.
 | 
| 
 | 
   934  Returns   : String. Does NOT contain a  '^' representing the cut location
 | 
| 
 | 
   935              as returned by the site() method
 | 
| 
 | 
   936  Argument  : n/a
 | 
| 
 | 
   937  Throws    : n/a
 | 
| 
 | 
   938  Comments  : Delegates to the Bio::PrimarySeq-derived object.
 | 
| 
 | 
   939 
 | 
| 
 | 
   940 See Also   : L<seq()|seq>, L<site()|site>, L<revcom()|revcom>
 | 
| 
 | 
   941 
 | 
| 
 | 
   942 =cut
 | 
| 
 | 
   943 
 | 
| 
 | 
   944 #-----------
 | 
| 
 | 
   945 sub string {  my $self = shift; $self->{'_seq'}->seq; }
 | 
| 
 | 
   946 #-----------
 | 
| 
 | 
   947 
 | 
| 
 | 
   948 
 | 
| 
 | 
   949 
 | 
| 
 | 
   950 =head1 revcom
 | 
| 
 | 
   951 
 | 
| 
 | 
   952  Title     : revcom
 | 
| 
 | 
   953  Usage     : $re->revcom();
 | 
| 
 | 
   954  Purpose   : Get a string representing the reverse complement of
 | 
| 
 | 
   955            : the recognition sequence.
 | 
| 
 | 
   956  Returns   : String
 | 
| 
 | 
   957  Argument  : n/a
 | 
| 
 | 
   958  Throws    : n/a
 | 
| 
 | 
   959  Comments  : Delegates to the Bio::PrimarySeq.pm-derived object, but needs to
 | 
| 
 | 
   960              get out the string from it, as now Bio::PrimarySeq->revcom makes a
 | 
| 
 | 
   961              Bio::PrimarySeq object
 | 
| 
 | 
   962 
 | 
| 
 | 
   963 See Also   : L<seq()|seq>, L<string()|string>
 | 
| 
 | 
   964 
 | 
| 
 | 
   965 =cut
 | 
| 
 | 
   966 
 | 
| 
 | 
   967 #-----------
 | 
| 
 | 
   968 sub revcom {  my $self = shift; $self->{'_seq'}->revcom->seq(); }
 | 
| 
 | 
   969 #-----------
 | 
| 
 | 
   970 
 | 
| 
 | 
   971 
 | 
| 
 | 
   972 
 | 
| 
 | 
   973 =head1 cut_seq
 | 
| 
 | 
   974 
 | 
| 
 | 
   975  Title     : cut_seq
 | 
| 
 | 
   976  Usage     : $re->cut_seq(<sequence object>);
 | 
| 
 | 
   977  Purpose   : Conceptually cut or "digest" a DNA sequence with the given enzyme.
 | 
| 
 | 
   978  Example   : $string = $re->cut_seq(<sequence object>); 
 | 
| 
 | 
   979  Returns   : List of strings containing the resulting fragments.
 | 
| 
 | 
   980  Argument  : Reference to a Bio::PrimarySeq.pm-derived object.
 | 
| 
 | 
   981  Throws    : Exception if argument is not an object.
 | 
| 
 | 
   982            : (Does not yet verify that it is derived from Bio::PrimarySeq.pm.)
 | 
| 
 | 
   983  Comments  : Strategy relies on Perl's built-in split() function.
 | 
| 
 | 
   984            : Since split removes the recognition pattern, the resulting
 | 
| 
 | 
   985            : fragments are repaired after split()-ing.
 | 
| 
 | 
   986            : A side-effect of this is that for sites with ambiguous
 | 
| 
 | 
   987            : recognition sequence (i.e., containing N), the fragments
 | 
| 
 | 
   988            : will contain ambiguity characters instead of AGCT.
 | 
| 
 | 
   989            :
 | 
| 
 | 
   990            : There is currently no support for partial digestions.
 | 
| 
 | 
   991            : There is currently no support for circular sequences.
 | 
| 
 | 
   992            : (This should just involve merging the first and last frag
 | 
| 
 | 
   993            : if $seqObj->is_circular returns true).
 | 
| 
 | 
   994 
 | 
| 
 | 
   995 =cut
 | 
| 
 | 
   996 
 | 
| 
 | 
   997 #'
 | 
| 
 | 
   998 #-------------
 | 
| 
 | 
   999 sub cut_seq {
 | 
| 
 | 
  1000 #-------------
 | 
| 
 | 
  1001     my( $self, $seqObj) = @_;
 | 
| 
 | 
  1002     if( !ref($seqObj) || 
 | 
| 
 | 
  1003 	! $seqObj->isa('Bio::PrimarySeqI') ) {
 | 
| 
 | 
  1004 	$self->throw( "Can't cut sequence. Missing or invalid object".
 | 
| 
 | 
  1005 		      "seqObj: $seqObj");
 | 
| 
 | 
  1006     }
 | 
| 
 | 
  1007 
 | 
| 
 | 
  1008     my $cuts_after = $self->{'_cuts_after'};
 | 
| 
 | 
  1009     my ($site_3prime_seq, $site_5prime_seq);
 | 
| 
 | 
  1010     my $reSeq = $self->seq;
 | 
| 
 | 
  1011     if($cuts_after == 0) {
 | 
| 
 | 
  1012 	$site_3prime_seq = '';
 | 
| 
 | 
  1013 	$site_5prime_seq = $reSeq->seq();
 | 
| 
 | 
  1014     } elsif($cuts_after == $reSeq->length) {
 | 
| 
 | 
  1015 	$site_3prime_seq = $reSeq->seq();
 | 
| 
 | 
  1016 	$site_5prime_seq = '';
 | 
| 
 | 
  1017     } else {
 | 
| 
 | 
  1018 	$site_3prime_seq = $reSeq->subseq(1, $self->{'_cuts_after'});
 | 
| 
 | 
  1019 	$site_5prime_seq = $reSeq->subseq($self->{'_cuts_after'}+1, $reSeq->length);
 | 
| 
 | 
  1020     }
 | 
| 
 | 
  1021 
 | 
| 
 | 
  1022     $self->debug("3' site: $site_3prime_seq\n5' site: $site_5prime_seq\n");
 | 
| 
 | 
  1023 
 | 
| 
 | 
  1024     my(@re_frags);
 | 
| 
 | 
  1025     my $seq = uc $self->_expanded_string;
 | 
| 
 | 
  1026 
 | 
| 
 | 
  1027     if(!$self->palindromic and $self->name ne 'N') {
 | 
| 
 | 
  1028 	my $revseq = $self->_expanded_string( $reSeq->revcom->seq );
 | 
| 
 | 
  1029 	$seq .= '|'.uc($revseq);
 | 
| 
 | 
  1030     }
 | 
| 
 | 
  1031     $self->debug(sprintf("$ID: site seq: %s\n\n", $seq));
 | 
| 
 | 
  1032     $self->debug(sprintf("$ID: splitting %s\n\n",$reSeq->seq));
 | 
| 
 | 
  1033     @re_frags = split(/$seq/i, $seqObj->seq);
 | 
| 
 | 
  1034 
 | 
| 
 | 
  1035     $self->debug("$ID: cut_seq, ".scalar @re_frags. " fragments.\n");
 | 
| 
 | 
  1036 
 | 
| 
 | 
  1037     ## Re-attach the split recognition site back to the frags
 | 
| 
 | 
  1038     ## since perl zapped them in the split() call.
 | 
| 
 | 
  1039     my($i);
 | 
| 
 | 
  1040     my $numFrags = scalar @re_frags;
 | 
| 
 | 
  1041     for($i=0; $i<$numFrags; $i++) {
 | 
| 
 | 
  1042         $i < $#re_frags  and $re_frags[$i] = $re_frags[$i].$site_3prime_seq;
 | 
| 
 | 
  1043         $i > 0           and $re_frags[$i] = $site_5prime_seq.$re_frags[$i];
 | 
| 
 | 
  1044     }
 | 
| 
 | 
  1045     @re_frags;
 | 
| 
 | 
  1046 }
 | 
| 
 | 
  1047 
 | 
| 
 | 
  1048 =head1 cut_locations
 | 
| 
 | 
  1049 
 | 
| 
 | 
  1050  Title     : cut_locations
 | 
| 
 | 
  1051  Usage     : my $locations = $re->cut_locations(<sequence_object>);
 | 
| 
 | 
  1052  Purpose   : Report the location of the recognition site(s) within
 | 
| 
 | 
  1053            : an input sequence. 
 | 
| 
 | 
  1054  Example   : my $locations = $re->annotate_seq($seqObj);
 | 
| 
 | 
  1055  Returns   : Arrayref of starting locations where enzyme would cut 
 | 
| 
 | 
  1056  Argument  : Reference to a Bio::PrimarySeqI-derived sequence object.
 | 
| 
 | 
  1057  Throws    : n/a
 | 
| 
 | 
  1058  Comments  : 
 | 
| 
 | 
  1059 
 | 
| 
 | 
  1060 =cut
 | 
| 
 | 
  1061 
 | 
| 
 | 
  1062 #-----------------
 | 
| 
 | 
  1063 sub cut_locations {
 | 
| 
 | 
  1064 #-----------------
 | 
| 
 | 
  1065     my($self, $seqobj) = @_;
 | 
| 
 | 
  1066 
 | 
| 
 | 
  1067     my $site = $self->_expanded_string;
 | 
| 
 | 
  1068     my $seq = $seqobj->seq;
 | 
| 
 | 
  1069     study($seq);
 | 
| 
 | 
  1070     my @locations;
 | 
| 
 | 
  1071     while( $seq =~ /($site)/ig ) {
 | 
| 
 | 
  1072         # $` is preceding string before pattern so length returns position
 | 
| 
 | 
  1073 	push @locations, length($`); 	
 | 
| 
 | 
  1074     }
 | 
| 
 | 
  1075     return \@locations;
 | 
| 
 | 
  1076 }    
 | 
| 
 | 
  1077 
 | 
| 
 | 
  1078 # Purpose : Expand nucleotide ambiguity codes to their representative letters
 | 
| 
 | 
  1079 # Argument: (optional) the string to be expanded. If not supplied, used
 | 
| 
 | 
  1080 #           the string returned by $self->string().
 | 
| 
 | 
  1081 # Returns : String
 | 
| 
 | 
  1082 sub _expanded_string {
 | 
| 
 | 
  1083     my ($self, $str) = @_;
 | 
| 
 | 
  1084     
 | 
| 
 | 
  1085     $str ||= $self->string;
 | 
| 
 | 
  1086 
 | 
| 
 | 
  1087     if( $self->name ne 'N' ) {
 | 
| 
 | 
  1088         $str =~ s/N|X/\./g;
 | 
| 
 | 
  1089         $str =~ s/R/\[AG\]/g;
 | 
| 
 | 
  1090         $str =~ s/Y/\[CT\]/g;
 | 
| 
 | 
  1091         $str =~ s/S/\[GC\]/g;
 | 
| 
 | 
  1092         $str =~ s/W/\[AT\]/g;
 | 
| 
 | 
  1093         $str =~ s/M/\[AC\]/g;
 | 
| 
 | 
  1094         $str =~ s/K/\[TG\]/g;
 | 
| 
 | 
  1095         $str =~ s/B/\[CGT\]/g;
 | 
| 
 | 
  1096         $str =~ s/D/\[AGT\]/g;
 | 
| 
 | 
  1097         $str =~ s/H/\[ACT\]/g;
 | 
| 
 | 
  1098         $str =~ s/V/\[ACG\]/g;
 | 
| 
 | 
  1099     }
 | 
| 
 | 
  1100     return $str;
 | 
| 
 | 
  1101 }
 | 
| 
 | 
  1102 
 | 
| 
 | 
  1103 
 | 
| 
 | 
  1104 =head1 annotate_seq
 | 
| 
 | 
  1105 
 | 
| 
 | 
  1106  Title     : annotate_seq
 | 
| 
 | 
  1107  Usage     : $re->annotate_seq(<sequence_object>);
 | 
| 
 | 
  1108  Purpose   : Identify the location of the recognition site(s) within
 | 
| 
 | 
  1109            : an input sequence. Uses HTML.
 | 
| 
 | 
  1110  Example   : $annot_seq = $re->annotate_seq($seqObj);
 | 
| 
 | 
  1111  Returns   : String containing the annotated sequence.
 | 
| 
 | 
  1112  Argument  : Reference to a Bio::PrimarySeq.pm-derived sequence object.
 | 
| 
 | 
  1113  Throws    : n/a
 | 
| 
 | 
  1114  Comments  : The annotated sequence must be viewed with a web
 | 
| 
 | 
  1115            : browser to see the location(s) of the recognition site(s).
 | 
| 
 | 
  1116 
 | 
| 
 | 
  1117 =cut
 | 
| 
 | 
  1118 
 | 
| 
 | 
  1119 #-----------------
 | 
| 
 | 
  1120 sub annotate_seq {
 | 
| 
 | 
  1121 #-----------------
 | 
| 
 | 
  1122     my($self, $seqObj) = @_;
 | 
| 
 | 
  1123 
 | 
| 
 | 
  1124     my $site = $self->_expanded_string;
 | 
| 
 | 
  1125     my $seq = $seqObj->seq;
 | 
| 
 | 
  1126 
 | 
| 
 | 
  1127     $seq =~ s|$site|<b>$site</b>|g;
 | 
| 
 | 
  1128     return $seq;
 | 
| 
 | 
  1129 }    
 | 
| 
 | 
  1130 
 | 
| 
 | 
  1131 
 | 
| 
 | 
  1132 =head1 palindromic
 | 
| 
 | 
  1133 
 | 
| 
 | 
  1134  Title     : palindromic
 | 
| 
 | 
  1135  Usage     : $re->palindromic();
 | 
| 
 | 
  1136  Purpose   : Determines if the recognition sequence is palindromic
 | 
| 
 | 
  1137            : for the current restriction enzyme.
 | 
| 
 | 
  1138  Returns   : Boolean
 | 
| 
 | 
  1139  Argument  : n/a
 | 
| 
 | 
  1140  Throws    : n/a
 | 
| 
 | 
  1141  Access    : Public 
 | 
| 
 | 
  1142  Comments  : A palindromic site (EcoRI): 5-GAATTC-3
 | 
| 
 | 
  1143            :                             3-CTTAAG-5
 | 
| 
 | 
  1144 
 | 
| 
 | 
  1145 =cut
 | 
| 
 | 
  1146 
 | 
| 
 | 
  1147 #----------------
 | 
| 
 | 
  1148 sub palindromic {
 | 
| 
 | 
  1149 #----------------
 | 
| 
 | 
  1150     my $self = shift;
 | 
| 
 | 
  1151     $self->string eq $self->revcom;
 | 
| 
 | 
  1152 }
 | 
| 
 | 
  1153 
 | 
| 
 | 
  1154 
 | 
| 
 | 
  1155 
 | 
| 
 | 
  1156 =head1 is_available
 | 
| 
 | 
  1157 
 | 
| 
 | 
  1158  Title     : is_available
 | 
| 
 | 
  1159  Usage     : $re->is_available(<string containing name of enzyme>);
 | 
| 
 | 
  1160  Purpose   : Determine if an enzyme is available (to this module).
 | 
| 
 | 
  1161            : (see the package lexical %RE).
 | 
| 
 | 
  1162  Example   : $re->is_available('EcoRI');
 | 
| 
 | 
  1163            : &Bio::Tools::RestrictionEnzyme::is_available($object,'EcoRI');
 | 
| 
 | 
  1164  Returns   : Boolean
 | 
| 
 | 
  1165  Argument  : String
 | 
| 
 | 
  1166  Throws    : n/a
 | 
| 
 | 
  1167  Comments  : This method does NOT give information about
 | 
| 
 | 
  1168            : commercial availability (yet). 
 | 
| 
 | 
  1169            : Enzyme names are CASE SENSITIVE.
 | 
| 
 | 
  1170 
 | 
| 
 | 
  1171 See Also   : L<available_list()|available_list>
 | 
| 
 | 
  1172 
 | 
| 
 | 
  1173 =cut
 | 
| 
 | 
  1174 
 | 
| 
 | 
  1175 #----------------
 | 
| 
 | 
  1176 sub is_available {
 | 
| 
 | 
  1177 #----------------
 | 
| 
 | 
  1178     my($self,$name) = @_;
 | 
| 
 | 
  1179     exists $RE{$name};
 | 
| 
 | 
  1180 }
 | 
| 
 | 
  1181 
 | 
| 
 | 
  1182 #--------------
 | 
| 
 | 
  1183 sub available {
 | 
| 
 | 
  1184 #--------------
 | 
| 
 | 
  1185     my($self,$name) = @_;
 | 
| 
 | 
  1186     print STDERR "\nDeprecated method: $ID:: available(); ".
 | 
| 
 | 
  1187 	"use is_available() instead.\n";
 | 
| 
 | 
  1188     $self->is_available($name);
 | 
| 
 | 
  1189 }
 | 
| 
 | 
  1190 
 | 
| 
 | 
  1191 
 | 
| 
 | 
  1192 =head2 name
 | 
| 
 | 
  1193 
 | 
| 
 | 
  1194  Title   : name
 | 
| 
 | 
  1195  Usage   : $obj->name($newval)
 | 
| 
 | 
  1196  Function: 
 | 
| 
 | 
  1197  Example : 
 | 
| 
 | 
  1198  Returns : value of name
 | 
| 
 | 
  1199  Args    : newvalue (optional)
 | 
| 
 | 
  1200 
 | 
| 
 | 
  1201 
 | 
| 
 | 
  1202 =cut
 | 
| 
 | 
  1203 
 | 
| 
 | 
  1204 sub name{
 | 
| 
 | 
  1205    my ($obj,$value) = @_;
 | 
| 
 | 
  1206    if( defined $value) {
 | 
| 
 | 
  1207       $obj->{'name'} = $value;
 | 
| 
 | 
  1208     }
 | 
| 
 | 
  1209     return $obj->{'name'};
 | 
| 
 | 
  1210 
 | 
| 
 | 
  1211 }
 | 
| 
 | 
  1212 
 | 
| 
 | 
  1213 =head1 available_list
 | 
| 
 | 
  1214 
 | 
| 
 | 
  1215  Title     : available_list
 | 
| 
 | 
  1216  Usage     : $re->available_list([<integer>]);
 | 
| 
 | 
  1217  Purpose   : Retrieve a list of currently available enzymes.
 | 
| 
 | 
  1218  Example   : @all = $re->available_list();  ## All enzymes
 | 
| 
 | 
  1219            : @six_cutters = $re->available_list(6);  ## All 6-cutters
 | 
| 
 | 
  1220  Returns   : List of strings
 | 
| 
 | 
  1221  Argument  : Integer (optional)
 | 
| 
 | 
  1222  Throws    : n/a
 | 
| 
 | 
  1223  Comments  : This method may be more appropriate for a REData.pm class.
 | 
| 
 | 
  1224 
 | 
| 
 | 
  1225 See Also   : L<is_available()|is_available>
 | 
| 
 | 
  1226 
 | 
| 
 | 
  1227 =cut
 | 
| 
 | 
  1228 
 | 
| 
 | 
  1229 #-------------------
 | 
| 
 | 
  1230 sub available_list {
 | 
| 
 | 
  1231 #-------------------
 | 
| 
 | 
  1232     my($self,$size) = @_;
 | 
| 
 | 
  1233     $size ||= 'all';
 | 
| 
 | 
  1234 
 | 
| 
 | 
  1235     $size eq 'all' and return @RE_available;
 | 
| 
 | 
  1236 
 | 
| 
 | 
  1237     my(@data, @names);
 | 
| 
 | 
  1238     foreach (@RE_available) {
 | 
| 
 | 
  1239 	@data = split /\s/, $RE{$_};
 | 
| 
 | 
  1240 	if(length $data[0] == $size) {
 | 
| 
 | 
  1241 	    push @names, $_;
 | 
| 
 | 
  1242 	}
 | 
| 
 | 
  1243     }
 | 
| 
 | 
  1244     @names;
 | 
| 
 | 
  1245 }
 | 
| 
 | 
  1246 
 | 
| 
 | 
  1247 1;
 |